Bug #47547.
Requested by Harry Thijssen.
Changes from 0.10.1 to 0.10.2:
- (nothing yet)
+ * CROSSTABS implements a new COUNT subcommand to round case or cell
+ weights.
Changes from 0.10.0 to 0.10.1:
@{BOX,NOBOX@}
/CELLS=@{COUNT,ROW,COLUMN,TOTAL,EXPECTED,RESIDUAL,SRESIDUAL,
ASRESIDUAL,ALL,NONE@}
+ /COUNT=@{ASIS,CASE,CELL@}
+ @{ROUND,TRUNCATE@}
/STATISTICS=@{CHISQ,PHI,CC,LAMBDA,UC,BTAU,CTAU,RISK,GAMMA,D,
KAPPA,ETA,CORR,ALL,NONE@}
/BARCHART
If @subcmd{CELLS} is not specified at all then only @subcmd{COUNT}
will be selected.
+By default, crosstabulation and statistics use raw case weights,
+without rounding. Use the @subcmd{/COUNT} subcommand to perform
+rounding: CASE rounds the weights of individual weights as cases are
+read, CELL rounds the weights of cells within each crosstabulation
+table after it has been constructed, and ASIS explicitly specifies the
+default non-rounding behavior. When rounding is requested, ROUND, the
+default, rounds to the nearest integer and TRUNCATE rounds toward
+zero.
+
The @subcmd{STATISTICS} subcommand selects statistics for computation:
@table @asis
*^tables=custom;
+variables=custom;
missing=miss:!table/include/report;
+ count=roundwhat:asis/case/!cell,
+ roundhow:!round/truncate;
+write[wr_]=none,cells,all;
+format=val:!avalue/dvalue,
indx:!noindex/index,
unsigned int cells; /* Bit k is 1 if cell k is requested. */
int a_cells[CRS_CL_count]; /* 0...n_cells-1 are the requested cells. */
+ /* Rounding of cells. */
+ bool round_case_weights; /* Round case weights? */
+ bool round_cells; /* If !round_case_weights, round cells? */
+ bool round_down; /* Round down? (otherwise to nearest) */
+
/* STATISTICS. */
unsigned int statistics; /* Bit k is 1 if statistic k is requested. */
static void postcalc (struct crosstabs_proc *);
static void submit (struct pivot_table *, struct tab_table *);
+static double
+round_weight (const struct crosstabs_proc *proc, double weight)
+{
+ return proc->round_down ? floor (weight) : floor (weight + 0.5);
+}
+
/* Parses and executes the CROSSTABS procedure. */
int
cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
proc.descending = cmd.val == CRS_DVALUE;
+ proc.round_case_weights = cmd.sbc_count && cmd.roundwhat == CRS_CASE;
+ proc.round_cells = cmd.sbc_count && cmd.roundwhat == CRS_CELL;
+ proc.round_down = cmd.roundhow == CRS_TRUNCATE;
+
/* CELLS. */
if (!cmd.sbc_cells)
proc.cells = 1u << CRS_CL_COUNT;
{
double weight = dict_get_case_weight (dataset_dict (ds), c,
&proc.bad_warn);
+ if (cmd.roundwhat == CRS_CASE)
+ {
+ weight = round_weight (&proc, weight);
+ if (weight == 0.)
+ continue;
+ }
if (should_tabulate_case (pt, c, proc.exclude))
{
if (proc.mode == GENERAL)
static void
postcalc (struct crosstabs_proc *proc)
{
- struct pivot_table *pt;
+
+ /* Round hash table entries, if requested
+
+ If this causes any of the cell counts to fall to zero, delete those
+ cells. */
+ if (proc->round_cells)
+ for (struct pivot_table *pt = proc->pivots;
+ pt < &proc->pivots[proc->n_pivots]; pt++)
+ {
+ struct freq *e, *next;
+ HMAP_FOR_EACH_SAFE (e, next, struct freq, node, &pt->data)
+ {
+ e->count = round_weight (proc, e->count);
+ if (e->count == 0.0)
+ {
+ hmap_delete (&pt->data, &e->node);
+ free (e);
+ }
+ }
+ }
/* Convert hash tables into sorted arrays of entries. */
- for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++)
+ for (struct pivot_table *pt = proc->pivots;
+ pt < &proc->pivots[proc->n_pivots]; pt++)
{
struct freq *e;
- size_t i;
pt->n_entries = hmap_count (&pt->data);
pt->entries = xnmalloc (pt->n_entries, sizeof *pt->entries);
- i = 0;
+ size_t i = 0;
HMAP_FOR_EACH (e, struct freq, node, &pt->data)
pt->entries[i++] = e;
hmap_destroy (&pt->data);
make_summary_table (proc);
/* Output each pivot table. */
- for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++)
+ for (struct pivot_table *pt = proc->pivots;
+ pt < &proc->pivots[proc->n_pivots]; pt++)
{
if (proc->pivot || pt->n_vars == 2)
output_pivot_table (proc, pt);
}
/* Free output and prepare for next split file. */
- for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++)
+ for (struct pivot_table *pt = proc->pivots;
+ pt < &proc->pivots[proc->n_pivots]; pt++)
{
- size_t i;
-
pt->missing = 0.0;
/* Free the members that were allocated in this function(and the values
lower level (in output_pivot_table), or both allocated and destroyed
at a higher level (in crs_custom_tables and free_proc,
respectively). */
- for (i = 0; i < pt->n_vars; i++)
+ for (size_t i = 0; i < pt->n_vars; i++)
{
int width = var_get_width (pt->vars[i]);
if (value_needs_init (width))
}
}
- for (i = 0; i < pt->n_entries; i++)
+ for (size_t i = 0; i < pt->n_entries; i++)
free (pt->entries[i]);
free (pt->entries);
}
]])
AT_CLEANUP
+AT_SETUP([CROSSTABS rounding weights with COUNT])
+AT_DATA([crosstabs.sps],
+ [[DATA LIST NOTABLE LIST /x y w.
+BEGIN DATA.
+1 1 1.4
+1 1 1.4
+1 2 1.6
+1 2 1.6
+2 1 1
+2 2 2
+END DATA.
+WEIGHT BY w.
+
+* These should have the same effect (no rounding).
+CROSSTABS /TABLES x BY y.
+CROSSTABS /TABLES x BY y /COUNT ASIS.
+
+* Round input weights.
+CROSSTABS /TABLES x BY y /COUNT CASE ROUND.
+CROSSTABS /TABLES x BY y /COUNT CASE TRUNCATE.
+
+* Round cell weights.
+CROSSTABS /TABLES x BY y /COUNT.
+CROSSTABS /TABLES x BY y /COUNT TRUNCATE.
+]])
+
+AT_CHECK([pspp -O format=csv crosstabs.sps], [0],
+ [[Table: Summary.
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x * y,9.00,100.0%,.00,0.0%,9.00,100.0%
+
+Table: x * y [count].
+,y,,
+x,1.00,2.00,Total
+1.00,2.80,3.20,6.00
+2.00,1.00,2.00,3.00
+Total,3.80,5.20,9.00
+
+Table: Summary.
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x * y,9.00,100.0%,.00,0.0%,9.00,100.0%
+
+Table: x * y [count].
+,y,,
+x,1.00,2.00,Total
+1.00,2.80,3.20,6.00
+2.00,1.00,2.00,3.00
+Total,3.80,5.20,9.00
+
+Table: Summary.
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x * y,9.00,100.0%,.00,0.0%,9.00,100.0%
+Table: x * y [count].
+,y,,
+x,1.00,2.00,Total
+1.00,2.00,4.00,6.00
+2.00,1.00,2.00,3.00
+Total,3.00,6.00,9.00
+
+Table: Summary.
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x * y,7.00,100.0%,.00,0.0%,7.00,100.0%
+
+Table: x * y [count].
+,y,,
+x,1.00,2.00,Total
+1.00,2.00,2.00,4.00
+2.00,1.00,2.00,3.00
+Total,3.00,4.00,7.00
+
+Table: Summary.
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x * y,9.00,100.0%,.00,0.0%,9.00,100.0%
+
+Table: x * y [count].
+,y,,
+x,1.00,2.00,Total
+1.00,3.00,3.00,6.00
+2.00,1.00,2.00,3.00
+Total,4.00,5.00,9.00
+
+Table: Summary.
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x * y,8.00,100.0%,.00,0.0%,8.00,100.0%
+
+Table: x * y [count].
+,y,,
+x,1.00,2.00,Total
+1.00,2.00,3.00,5.00
+2.00,1.00,2.00,3.00
+Total,3.00,5.00,8.00
+]])
+AT_CLEANUP
AT_SETUP([CROSSTABS descending sort order])
AT_DATA([crosstabs-descending.sps],