const struct ctables_cell *example;
double d_valid; /* Dictionary weight. */
- double d_missing;
+ double d_count;
+ double d_total;
double e_valid; /* Effective weight */
- double e_missing;
+ double e_count;
+ double e_total;
};
enum ctables_summary_variant
struct ctables_domain *domains[N_CTDTS];
bool hide;
+
+ /* Is at least one value missing, whether included or excluded? */
+ bool is_missing;
+
+ /* Is at least one value missing and excluded? */
+ bool excluded_missing;
+
bool postcompute;
enum ctables_summary_variant sv;
size_t n;
size_t allocated;
+ /* The variable to which the summary specs are applied. */
struct variable *var;
+
+ /* Whether the variable to which the summary specs are applied is a scale
+ variable for the purpose of summarization.
+
+ (VALIDN and TOTALN act differently for summarizing scale and categorical
+ variables.) */
+ bool is_scale;
};
static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
CCT_VALUE,
CCT_LABEL,
CCT_FUNCTION,
+
+ /* For contributing to TOTALN. */
+ CCT_EXCLUDED_MISSING,
}
type;
};
/* Source location. This is null for CCT_TOTAL, CCT_VALUE, CCT_LABEL,
- CCT_FUNCTION. */
+ CCT_FUNCTION, CCT_EXCLUDED_MISSING. */
struct msg_location *location;
};
case CCT_LABEL:
case CCT_FUNCTION:
break;
+
+ case CCT_EXCLUDED_MISSING:
+ break;
}
}
&& a->sort_function == b->sort_function
&& a->sort_var == b->sort_var
&& a->percentile == b->percentile);
+
+ case CCT_EXCLUDED_MISSING:
+ return true;
}
NOT_REACHED ();
.specs = specs,
.n = src->n,
.allocated = src->n,
- .var = src->var
+ .var = src->var,
+ .is_scale = src->is_scale,
};
}
break;
case CTFA_SCALE:
+#if 0
if (!axis->scale)
{
msg_at (SE, loc,
var_name);
return false;
}
+#endif
break;
case CTFA_ALL:
case CCT_VALUE:
case CCT_LABEL:
case CCT_FUNCTION:
+ case CCT_EXCLUDED_MISSING:
break;
}
}
{
ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
nest->specs[sv].var = a->var.var;
+ nest->specs[sv].is_scale = a->scale;
}
return (struct ctables_stack) { .nests = nest, .n = 1 };
union ctables_summary
{
/* COUNT, VALIDN, TOTALN. */
- struct
- {
- double valid;
- double missing;
- };
+ double count;
/* MINIMUM, MAXIMUM, RANGE. */
struct
case CTSF_ETOTALN:
case CTSF_VALIDN:
case CTSF_EVALIDN:
- s->missing = s->valid = 0;
+ s->count = 0;
break;
case CTSF_MAXIMUM:
}
static void
-ctables_summary_add (union ctables_summary *s,
+ctables_summary_add (const struct ctables_cell *cell, union ctables_summary *s,
const struct ctables_summary_spec *ss,
const struct variable *var, const union value *value,
+ bool is_scale, bool is_missing,
double d_weight, double e_weight)
{
/* To determine whether a case is included in a given table for a particular
*/
switch (ss->function)
{
- case CTSF_COUNT:
case CSTF_TOTALN:
+ s->count += d_weight;
+ break;
+
+ case CTSF_COUNT:
+ if (is_scale || !cell->excluded_missing)
+ s->count += d_weight;
+ break;
+
case CTSF_VALIDN:
- if (var_is_value_missing (var, value))
- s->missing += d_weight;
- else
- s->valid += d_weight;
+ if (is_scale
+ ? !var_is_value_missing (var, value)
+ : !is_missing)
+ s->count += d_weight;
+ break;
+
+ case CTSF_MISSING:
+ if (is_missing)
+ s->count += d_weight;
break;
case CTSF_ECOUNT:
case CTSF_LAYERPCT_TOTALN:
case CTSF_LAYERROWPCT_TOTALN:
case CTSF_LAYERCOLPCT_TOTALN:
- case CTSF_MISSING:
- case CTSF_ETOTALN:
+ s->count += d_weight;
+ break;
+
case CTSF_EVALIDN:
- if (var_is_value_missing (var, value))
- s->missing += e_weight;
- else
- s->valid += e_weight;
+ if (is_scale
+ ? !var_is_value_missing (var, value)
+ : !is_missing)
+ s->count += e_weight;
+ break;
+
+ case CTSF_ETOTALN:
+ s->count += e_weight;
break;
case CTSF_MAXIMUM:
{
case CTSF_COUNT:
case CTSF_ECOUNT:
- return s->valid;
+ return s->count;
case CTSF_ROWPCT_COUNT:
case CTSF_COLPCT_COUNT:
{
enum ctables_domain_type d = ctables_function_domain (ss->function);
return (cell->domains[d]->e_valid
- ? s->valid / cell->domains[d]->e_valid * 100
+ ? s->count / cell->domains[d]->e_valid * 100
: SYSMIS);
}
NOT_REACHED ();
case CTSF_MISSING:
- return s->missing;
+ return s->count;
case CSTF_TOTALN:
case CTSF_ETOTALN:
- return s->valid + s->missing;
+ return s->count;
case CTSF_VALIDN:
+ return s->count;
+
case CTSF_EVALIDN:
- return s->valid;
+ return s->count;
case CTSF_MAXIMUM:
return s->max;
case CCT_SUBTOTAL:
case CCT_TOTAL:
case CCT_POSTCOMPUTE:
+ case CCT_EXCLUDED_MISSING:
/* Must be equal. */
continue;
ctables_categories_match (const struct ctables_categories *c,
const union value *v, const struct variable *var)
{
+ if (var_is_numeric (var) && v->f == SYSMIS)
+ return NULL;
+
const struct ctables_category *othernm = NULL;
for (size_t i = c->n_cats; i-- > 0; )
{
case CCT_FUNCTION:
return (cat->include_missing || !var_is_value_missing (var, v) ? cat
: NULL);
+
+ case CCT_EXCLUDED_MISSING:
+ break;
}
}
cell = xmalloc (sizeof *cell);
cell->hide = false;
+ cell->is_missing = false;
+ cell->excluded_missing = false;
cell->sv = sv;
cell->contributes_to_domains = true;
cell->postcompute = false;
for (size_t i = 0; i < nest->n; i++)
{
const struct ctables_category *cat = cats[a][i];
+ const struct variable *var = nest->vars[i];
+ const union value *value = case_data (c, var);
if (i != nest->scale_idx)
{
const struct ctables_category *subtotal = cat->subtotal;
|| cat->type == CCT_SUBTOTAL
|| cat->type == CCT_POSTCOMPUTE)
cell->contributes_to_domains = false;
+ else if (var_is_value_missing (var, value))
+ cell->is_missing = true;
+ if (cat->type == CCT_EXCLUDED_MISSING)
+ cell->excluded_missing = true;
if (cat->type == CCT_POSTCOMPUTE)
cell->postcompute = true;
}
cell->axes[a].cvs[i].category = cat;
- value_clone (&cell->axes[a].cvs[i].value, case_data (c, nest->vars[i]),
- var_get_width (nest->vars[i]));
+ value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
}
}
static void
ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
const struct ctables_category *cats[PIVOT_N_AXES][10],
- double d_weight, double e_weight)
+ bool is_missing, double d_weight, double e_weight)
{
struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
const struct ctables_nest *ss = s->nests[s->table->summary_axis];
const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
for (size_t i = 0; i < specs->n; i++)
- ctables_summary_add (&cell->summaries[i], &specs->specs[i], specs->var,
- case_data (c, specs->var), d_weight, e_weight);
+ ctables_summary_add (cell, &cell->summaries[i], &specs->specs[i],
+ specs->var, case_data (c, specs->var), specs->is_scale,
+ is_missing, d_weight, e_weight);
if (cell->contributes_to_domains)
{
for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
{
- cell->domains[dt]->d_valid += d_weight;
- cell->domains[dt]->e_valid += e_weight;
+ struct ctables_domain *d = cell->domains[dt];
+ d->d_total += d_weight;
+ d->e_total += e_weight;
+ if (!cell->excluded_missing)
+ {
+ d->d_count += d_weight;
+ d->e_count += e_weight;
+ }
+ if (!cell->is_missing)
+ {
+ d->d_valid += d_weight;
+ d->e_valid += e_weight;
+ }
}
}
}
static void
recurse_totals (struct ctables_section *s, const struct ccase *c,
const struct ctables_category *cats[PIVOT_N_AXES][10],
- double d_weight, double e_weight,
+ bool is_missing, double d_weight, double e_weight,
enum pivot_axis_type start_axis, size_t start_nest)
{
for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
{
const struct ctables_category *save = cats[a][i];
cats[a][i] = total;
- ctables_cell_add__ (s, c, cats, d_weight, e_weight);
- recurse_totals (s, c, cats, d_weight, e_weight, a, i + 1);
+ ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight);
+ recurse_totals (s, c, cats, is_missing,
+ d_weight, e_weight, a, i + 1);
cats[a][i] = save;
}
}
static void
recurse_subtotals (struct ctables_section *s, const struct ccase *c,
const struct ctables_category *cats[PIVOT_N_AXES][10],
- double d_weight, double e_weight,
+ bool is_missing, double d_weight, double e_weight,
enum pivot_axis_type start_axis, size_t start_nest)
{
for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
if (save->subtotal)
{
cats[a][i] = save->subtotal;
- ctables_cell_add__ (s, c, cats, d_weight, e_weight);
- recurse_subtotals (s, c, cats, d_weight, e_weight, a, i + 1);
+ ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight);
+ recurse_subtotals (s, c, cats, is_missing,
+ d_weight, e_weight, a, i + 1);
cats[a][i] = save;
}
}
double d_weight, double e_weight)
{
const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
+ bool is_missing = false;
+ bool excluded_missing = false;
for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
const struct ctables_nest *nest = s->nests[a];
const struct variable *var = nest->vars[i];
const union value *value = case_data (c, var);
- if (var_is_numeric (var) && value->f == SYSMIS)
- return;
+ bool var_missing = var_is_value_missing (var, value) != 0;
+ if (var_missing)
+ is_missing = true;
cats[a][i] = ctables_categories_match (
s->table->categories[var_get_dict_index (var)], value, var);
if (!cats[a][i])
- return;
+ {
+ if (!is_missing)
+ return;
+
+ static const struct ctables_category cct_excluded_missing = {
+ .type = CCT_EXCLUDED_MISSING,
+ .hide = true,
+ };
+ cats[a][i] = &cct_excluded_missing;
+ excluded_missing = true;
+ }
}
}
- for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
- {
- const struct ctables_nest *nest = s->nests[a];
- for (size_t i = 0; i < nest->n; i++)
- if (i != nest->scale_idx)
- {
- const struct variable *var = nest->vars[i];
- const union value *value = case_data (c, var);
- ctables_add_occurrence (var, value, &s->occurrences[a][i]);
- }
- }
+ if (!excluded_missing)
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ {
+ const struct ctables_nest *nest = s->nests[a];
+ for (size_t i = 0; i < nest->n; i++)
+ if (i != nest->scale_idx)
+ {
+ const struct variable *var = nest->vars[i];
+ const union value *value = case_data (c, var);
+ ctables_add_occurrence (var, value, &s->occurrences[a][i]);
+ }
+ }
- ctables_cell_add__ (s, c, cats, d_weight, e_weight);
+ ctables_cell_add__ (s, c, cats, is_missing, d_weight, e_weight);
- recurse_totals (s, c, cats, d_weight, e_weight, 0, 0);
- recurse_subtotals (s, c, cats, d_weight, e_weight, 0, 0);
+ if (!excluded_missing)
+ {
+ recurse_totals (s, c, cats, is_missing, d_weight, e_weight, 0, 0);
+ recurse_subtotals (s, c, cats, is_missing, d_weight, e_weight, 0, 0);
+ }
}
struct merge_item
specs->n = 1;
enum ctables_summary_function function
- = specs->var ? CTSF_MEAN : CTSF_COUNT;
+ = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
struct ctables_var var = { .is_mrset = false, .var = specs->var };
*specs->specs = (struct ctables_summary_spec) {
if (c->include_missing || !var_is_value_missing (var, &vl->value))
ctables_add_occurrence (var, &vl->value, occurrences);
break;
+
+ case CCT_EXCLUDED_MISSING:
+ break;
}
}
}
dnl * U-prefix for unweighted summaries.
dnl * .LCL and .UCL suffixes.
dnl * .SE suffixes.
+dnl * Why are summary functions for scale variables also available for totals and subtotals?
dnl - CATEGORIES:
dnl * String values
dnl * Date values
AT_CHECK([ln $top_srcdir/examples/nhtsa.sav . || cp $top_srcdir/examples/nhtsa.sav .])
AT_DATA([ctables.sps],
[[GET 'nhtsa.sav'.
-CTABLES /TABLE qnd1[COUNT, MEAN, STDDEV, MINIMUM, MAXIMUM].
+CTABLES /TABLE qnd1[COUNT, VALIDN, TOTALN, MEAN, STDDEV, MINIMUM, MAXIMUM].
CTABLES /TABLE BY qnd1.
CTABLES /TABLE BY BY qnd1.
]])
AT_CHECK([pspp ctables.sps -O box=unicode -O width=80], [0], [dnl
- Custom Tables
-╭──────────────────────────┬─────┬────┬─────────────┬───────┬───────╮
-│ │Count│Mean│Std Deviation│Minimum│Maximum│
-├──────────────────────────┼─────┼────┼─────────────┼───────┼───────┤
-│D1. AGE: What is your age?│ 6930│ 48│ 19│ 16│ 86│
-╰──────────────────────────┴─────┴────┴─────────────┴───────┴───────╯
+ Custom Tables
+╭──────────────────────┬─────┬───────┬───────┬────┬────────────┬───────┬───────╮
+│ │ │ │ │ │ Std │ │ │
+│ │Count│Valid N│Total N│Mean│ Deviation │Minimum│Maximum│
+├──────────────────────┼─────┼───────┼───────┼────┼────────────┼───────┼───────┤
+│D1. AGE: What is your │ 6999│ 6930│ 6999│ 48│ 19│ 16│ 86│
+│age? │ │ │ │ │ │ │ │
+╰──────────────────────┴─────┴───────┴───────┴────┴────────────┴───────┴───────╯
Custom Tables
╭──────────────────────────╮
[[GET 'nhtsa.sav'.
CTABLES /TABLE=qn17
/CATEGORIES VARIABLES=qn17 TOTAL=YES LABEL='Number responding'.
-CTABLES /TABLE=region > qn18 [MEAN, COUNT]
+DESCRIPTIVES qn18/STATISTICS=MEAN.
+CTABLES /TABLE=region > qn18 [MEAN, COUNT, VALIDN, TOTALN]
/CATEGORIES VARIABLES=region TOTAL=YES LABEL='All regions'.
]])
AT_CHECK([pspp ctables.sps -O box=unicode -O width=80], [0], [dnl
│ Number responding │ 4221│
╰────────────────────────────────────────────────────────────────────────┴─────╯
+ Descriptive Statistics
+╭────────────────────────────────────────────────────────────────────┬────┬────╮
+│ │ N │Mean│
+├────────────────────────────────────────────────────────────────────┼────┼────┤
+│18. When you drink ANSWERFROM(QN17R1), about how many │4218│4.62│
+│ANSWERFROM(QN17R2) do you usually drink per sitting? │ │ │
+│Valid N (listwise) │6999│ │
+│Missing N (listwise) │2781│ │
+╰────────────────────────────────────────────────────────────────────┴────┴────╯
+
Custom Tables
-╭───────────────────────────────────────────────────────────────────┬────┬─────╮
-│ │Mean│Count│
-├───────────────────────────────────────────────────────────────────┼────┼─────┤
-│Region NE 18. When you drink ANSWERFROM(QN17R1), about how │4.36│ 949│
-│ many ANSWERFROM(QN17R2) do you usually drink per │ │ │
-│ sitting? │ │ │
-│ ╶────────────────────────────────────────────────────────────┼────┼─────┤
-│ MW 18. When you drink ANSWERFROM(QN17R1), about how │4.67│ 1027│
-│ many ANSWERFROM(QN17R2) do you usually drink per │ │ │
-│ sitting? │ │ │
-│ ╶────────────────────────────────────────────────────────────┼────┼─────┤
-│ S 18. When you drink ANSWERFROM(QN17R1), about how │4.71│ 1287│
-│ many ANSWERFROM(QN17R2) do you usually drink per │ │ │
-│ sitting? │ │ │
-│ ╶────────────────────────────────────────────────────────────┼────┼─────┤
-│ W 18. When you drink ANSWERFROM(QN17R1), about how │4.69│ 955│
-│ many ANSWERFROM(QN17R2) do you usually drink per │ │ │
-│ sitting? │ │ │
-│ ╶────────────────────────────────────────────────────────────┼────┼─────┤
-│ All 18. When you drink ANSWERFROM(QN17R1), about how │4.62│ 4218│
-│ regions many ANSWERFROM(QN17R2) do you usually drink per │ │ │
-│ sitting? │ │ │
-╰───────────────────────────────────────────────────────────────────┴────┴─────╯
+╭──────────────────────────────────────────────────────┬────┬─────┬──────┬─────╮
+│ │ │ │ Valid│Total│
+│ │Mean│Count│ N │ N │
+├──────────────────────────────────────────────────────┼────┼─────┼──────┼─────┤
+│Region NE 18. When you drink ANSWERFROM(QN17R1),│4.36│ 1409│ 949│ 1409│
+│ about how many ANSWERFROM(QN17R2) do │ │ │ │ │
+│ you usually drink per sitting? │ │ │ │ │
+│ ╶───────────────────────────────────────────────┼────┼─────┼──────┼─────┤
+│ MW 18. When you drink ANSWERFROM(QN17R1),│4.67│ 1654│ 1027│ 1654│
+│ about how many ANSWERFROM(QN17R2) do │ │ │ │ │
+│ you usually drink per sitting? │ │ │ │ │
+│ ╶───────────────────────────────────────────────┼────┼─────┼──────┼─────┤
+│ S 18. When you drink ANSWERFROM(QN17R1),│4.71│ 2390│ 1287│ 2390│
+│ about how many ANSWERFROM(QN17R2) do │ │ │ │ │
+│ you usually drink per sitting? │ │ │ │ │
+│ ╶───────────────────────────────────────────────┼────┼─────┼──────┼─────┤
+│ W 18. When you drink ANSWERFROM(QN17R1),│4.69│ 1546│ 955│ 1546│
+│ about how many ANSWERFROM(QN17R2) do │ │ │ │ │
+│ you usually drink per sitting? │ │ │ │ │
+│ ╶───────────────────────────────────────────────┼────┼─────┼──────┼─────┤
+│ All 18. When you drink ANSWERFROM(QN17R1),│4.62│ 6999│ 4218│ 6999│
+│ regions about how many ANSWERFROM(QN17R2) do │ │ │ │ │
+│ you usually drink per sitting? │ │ │ │ │
+╰──────────────────────────────────────────────────────┴────┴─────┴──────┴─────╯
])
AT_CLEANUP
│ Female│ 943│
╰──────────────────────────────┴────────────╯
])
+AT_CLEANUP
+
+AT_SETUP([CTABLES missing values])
+AT_DATA([ctables.sps],
+[[DATA LIST LIST NOTABLE/x y.
+BEGIN DATA.
+1 1
+1 2
+1 3
+1 4
+1 5
+1 .
+2 1
+2 2
+2 3
+2 4
+2 5
+2 .
+3 1
+3 2
+3 3
+3 4
+3 5
+3 .
+4 1
+4 2
+4 3
+4 4
+4 5
+4 .
+5 1
+5 2
+5 3
+5 4
+5 5
+5 .
+. 1
+. 2
+. 3
+. 4
+. 5
+END DATA.
+MISSING VALUES x (1, 2) y (2, 3).
+VARIABLE LEVEL ALL (NOMINAL).
+
+CTABLES /TABLE x[COUNT,TOTALS[COUNT, VALIDN, TOTALN]]
+ /CATEGORIES VARIABLES=ALL TOTAL=YES.
+CTABLES /TABLE x[COUNT,TOTALS[COUNT, VALIDN, TOTALN]]
+ /CATEGORIES VARIABLES=ALL TOTAL=YES MISSING=INCLUDE.
+CTABLES /TABLE x BY y.
+CTABLES /TABLE x BY y /CATEGORIES VARIABLES=ALL MISSING=INCLUDE.
+CTABLES /TABLE y BY x /CATEGORIES VARIABLES=ALL MISSING=INCLUDE.
+]])
+AT_CHECK([pspp ctables.sps -O box=unicode -O width=120], [0], [])
AT_CLEANUP
\ No newline at end of file