From c4f4678ecf25bdf4e3e5cfe4d178569ec4a1ef2a Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 10 Apr 2016 18:50:12 -0700 Subject: [PATCH] CROSSTABS: Implement COUNT subcommand to round case or cell weights. Bug #47547. Requested by Harry Thijssen. --- NEWS | 3 +- doc/statistics.texi | 11 ++++ src/language/stats/crosstabs.q | 62 +++++++++++++++--- tests/language/stats/crosstabs.at | 105 ++++++++++++++++++++++++++++++ 4 files changed, 170 insertions(+), 11 deletions(-) diff --git a/NEWS b/NEWS index 3b37cd78cf..525083ead4 100644 --- a/NEWS +++ b/NEWS @@ -6,7 +6,8 @@ Please send PSPP bug reports to bug-gnu-pspp@gnu.org. Changes from 0.10.1 to 0.10.2: - (nothing yet) + * CROSSTABS implements a new COUNT subcommand to round case or cell + weights. Changes from 0.10.0 to 0.10.1: diff --git a/doc/statistics.texi b/doc/statistics.texi index 9774c514aa..34a7a3d987 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -597,6 +597,8 @@ CROSSTABS @{BOX,NOBOX@} /CELLS=@{COUNT,ROW,COLUMN,TOTAL,EXPECTED,RESIDUAL,SRESIDUAL, ASRESIDUAL,ALL,NONE@} + /COUNT=@{ASIS,CASE,CELL@} + @{ROUND,TRUNCATE@} /STATISTICS=@{CHISQ,PHI,CC,LAMBDA,UC,BTAU,CTAU,RISK,GAMMA,D, KAPPA,ETA,CORR,ALL,NONE@} /BARCHART @@ -696,6 +698,15 @@ Suppress cells entirely. If @subcmd{CELLS} is not specified at all then only @subcmd{COUNT} will be selected. +By default, crosstabulation and statistics use raw case weights, +without rounding. Use the @subcmd{/COUNT} subcommand to perform +rounding: CASE rounds the weights of individual weights as cases are +read, CELL rounds the weights of cells within each crosstabulation +table after it has been constructed, and ASIS explicitly specifies the +default non-rounding behavior. When rounding is requested, ROUND, the +default, rounds to the nearest integer and TRUNCATE rounds toward +zero. + The @subcmd{STATISTICS} subcommand selects statistics for computation: @table @asis diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q index 7b0c6c345e..6e87c0260b 100644 --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@ -74,6 +74,8 @@ *^tables=custom; +variables=custom; missing=miss:!table/include/report; + count=roundwhat:asis/case/!cell, + roundhow:!round/truncate; +write[wr_]=none,cells,all; +format=val:!avalue/dvalue, indx:!noindex/index, @@ -182,6 +184,11 @@ struct crosstabs_proc unsigned int cells; /* Bit k is 1 if cell k is requested. */ int a_cells[CRS_CL_count]; /* 0...n_cells-1 are the requested cells. */ + /* Rounding of cells. */ + bool round_case_weights; /* Round case weights? */ + bool round_cells; /* If !round_case_weights, round cells? */ + bool round_down; /* Round down? (otherwise to nearest) */ + /* STATISTICS. */ unsigned int statistics; /* Bit k is 1 if statistic k is requested. */ @@ -200,6 +207,12 @@ static void tabulate_integer_case (struct pivot_table *, const struct ccase *, static void postcalc (struct crosstabs_proc *); static void submit (struct pivot_table *, struct tab_table *); +static double +round_weight (const struct crosstabs_proc *proc, double weight) +{ + return proc->round_down ? floor (weight) : floor (weight + 0.5); +} + /* Parses and executes the CROSSTABS procedure. */ int cmd_crosstabs (struct lexer *lexer, struct dataset *ds) @@ -236,6 +249,10 @@ cmd_crosstabs (struct lexer *lexer, struct dataset *ds) proc.descending = cmd.val == CRS_DVALUE; + proc.round_case_weights = cmd.sbc_count && cmd.roundwhat == CRS_CASE; + proc.round_cells = cmd.sbc_count && cmd.roundwhat == CRS_CELL; + proc.round_down = cmd.roundhow == CRS_TRUNCATE; + /* CELLS. */ if (!cmd.sbc_cells) proc.cells = 1u << CRS_CL_COUNT; @@ -316,6 +333,12 @@ cmd_crosstabs (struct lexer *lexer, struct dataset *ds) { double weight = dict_get_case_weight (dataset_dict (ds), c, &proc.bad_warn); + if (cmd.roundwhat == CRS_CASE) + { + weight = round_weight (&proc, weight); + if (weight == 0.) + continue; + } if (should_tabulate_case (pt, c, proc.exclude)) { if (proc.mode == GENERAL) @@ -677,17 +700,36 @@ static bool find_crosstab (struct pivot_table *, size_t *row0p, size_t *row1p); static void postcalc (struct crosstabs_proc *proc) { - struct pivot_table *pt; + + /* Round hash table entries, if requested + + If this causes any of the cell counts to fall to zero, delete those + cells. */ + if (proc->round_cells) + for (struct pivot_table *pt = proc->pivots; + pt < &proc->pivots[proc->n_pivots]; pt++) + { + struct freq *e, *next; + HMAP_FOR_EACH_SAFE (e, next, struct freq, node, &pt->data) + { + e->count = round_weight (proc, e->count); + if (e->count == 0.0) + { + hmap_delete (&pt->data, &e->node); + free (e); + } + } + } /* Convert hash tables into sorted arrays of entries. */ - for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) + for (struct pivot_table *pt = proc->pivots; + pt < &proc->pivots[proc->n_pivots]; pt++) { struct freq *e; - size_t i; pt->n_entries = hmap_count (&pt->data); pt->entries = xnmalloc (pt->n_entries, sizeof *pt->entries); - i = 0; + size_t i = 0; HMAP_FOR_EACH (e, struct freq, node, &pt->data) pt->entries[i++] = e; hmap_destroy (&pt->data); @@ -701,7 +743,8 @@ postcalc (struct crosstabs_proc *proc) make_summary_table (proc); /* Output each pivot table. */ - for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) + for (struct pivot_table *pt = proc->pivots; + pt < &proc->pivots[proc->n_pivots]; pt++) { if (proc->pivot || pt->n_vars == 2) output_pivot_table (proc, pt); @@ -721,10 +764,9 @@ postcalc (struct crosstabs_proc *proc) } /* Free output and prepare for next split file. */ - for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) + for (struct pivot_table *pt = proc->pivots; + pt < &proc->pivots[proc->n_pivots]; pt++) { - size_t i; - pt->missing = 0.0; /* Free the members that were allocated in this function(and the values @@ -734,7 +776,7 @@ postcalc (struct crosstabs_proc *proc) lower level (in output_pivot_table), or both allocated and destroyed at a higher level (in crs_custom_tables and free_proc, respectively). */ - for (i = 0; i < pt->n_vars; i++) + for (size_t i = 0; i < pt->n_vars; i++) { int width = var_get_width (pt->vars[i]); if (value_needs_init (width)) @@ -746,7 +788,7 @@ postcalc (struct crosstabs_proc *proc) } } - for (i = 0; i < pt->n_entries; i++) + for (size_t i = 0; i < pt->n_entries; i++) free (pt->entries[i]); free (pt->entries); } diff --git a/tests/language/stats/crosstabs.at b/tests/language/stats/crosstabs.at index f7fccb434d..9cd2a04f09 100644 --- a/tests/language/stats/crosstabs.at +++ b/tests/language/stats/crosstabs.at @@ -431,7 +431,112 @@ z,Category,Statistic,Type,Value,Asymp. Std. Error,Approx. T,Approx. Sig. ]]) AT_CLEANUP +AT_SETUP([CROSSTABS rounding weights with COUNT]) +AT_DATA([crosstabs.sps], + [[DATA LIST NOTABLE LIST /x y w. +BEGIN DATA. +1 1 1.4 +1 1 1.4 +1 2 1.6 +1 2 1.6 +2 1 1 +2 2 2 +END DATA. +WEIGHT BY w. + +* These should have the same effect (no rounding). +CROSSTABS /TABLES x BY y. +CROSSTABS /TABLES x BY y /COUNT ASIS. + +* Round input weights. +CROSSTABS /TABLES x BY y /COUNT CASE ROUND. +CROSSTABS /TABLES x BY y /COUNT CASE TRUNCATE. + +* Round cell weights. +CROSSTABS /TABLES x BY y /COUNT. +CROSSTABS /TABLES x BY y /COUNT TRUNCATE. +]]) + +AT_CHECK([pspp -O format=csv crosstabs.sps], [0], + [[Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,9.00,100.0%,.00,0.0%,9.00,100.0% + +Table: x * y [count]. +,y,, +x,1.00,2.00,Total +1.00,2.80,3.20,6.00 +2.00,1.00,2.00,3.00 +Total,3.80,5.20,9.00 + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,9.00,100.0%,.00,0.0%,9.00,100.0% + +Table: x * y [count]. +,y,, +x,1.00,2.00,Total +1.00,2.80,3.20,6.00 +2.00,1.00,2.00,3.00 +Total,3.80,5.20,9.00 + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,9.00,100.0%,.00,0.0%,9.00,100.0% +Table: x * y [count]. +,y,, +x,1.00,2.00,Total +1.00,2.00,4.00,6.00 +2.00,1.00,2.00,3.00 +Total,3.00,6.00,9.00 + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,7.00,100.0%,.00,0.0%,7.00,100.0% + +Table: x * y [count]. +,y,, +x,1.00,2.00,Total +1.00,2.00,2.00,4.00 +2.00,1.00,2.00,3.00 +Total,3.00,4.00,7.00 + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,9.00,100.0%,.00,0.0%,9.00,100.0% + +Table: x * y [count]. +,y,, +x,1.00,2.00,Total +1.00,3.00,3.00,6.00 +2.00,1.00,2.00,3.00 +Total,4.00,5.00,9.00 + +Table: Summary. +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x * y,8.00,100.0%,.00,0.0%,8.00,100.0% + +Table: x * y [count]. +,y,, +x,1.00,2.00,Total +1.00,2.00,3.00,5.00 +2.00,1.00,2.00,3.00 +Total,3.00,5.00,8.00 +]]) +AT_CLEANUP AT_SETUP([CROSSTABS descending sort order]) AT_DATA([crosstabs-descending.sps], -- 2.30.2