X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fctables.c;h=12f9958c3c1dfc8ebdbcc282e87c60a6efbc53ac;hb=50e00137bfcc4eb3d4ae753a5e57e7a444194c96;hp=813bec9891482959018887cfadf571e14f875512;hpb=499552917fce3a3da9d0fa826e1589a1c62acf27;p=pspp diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 813bec9891..12f9958c3c 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -63,18 +63,32 @@ enum ctables_vlabel CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH, }; +enum ctables_weighting + { + CTW_EFFECTIVE, + CTW_DICTIONARY, + CTW_UNWEIGHTED +#define N_CTWS 3 + }; + enum ctables_function_type { - /* A function that operates on data in a single cell. The function does - not have an unweighted version. */ + /* A function that operates on data in a single cell. It operates on + effective weights. It does not have an unweighted version. */ CTFT_CELL, - /* A function that operates on data in a single cell. The function has an - unweighted version. */ + /* A function that operates on data in a single cell. The function + operates on effective weights and has a U-prefixed unweighted + version. */ CTFT_UCELL, - /* A function that operates on an area of cells. The function has an - unweighted version. */ + /* A function that operates on data in a single cell. It operates on + dictionary weights, and has U-prefixed unweighted version and an + E-prefixed effective weight version. */ + CTFT_UECELL, + + /* A function that operates on an area of cells. It operates on effective + weights and has a U-prefixed unweighted version. */ CTFT_AREA, }; @@ -113,8 +127,9 @@ struct ctables_function_info enum ctables_format format; enum ctables_function_availability availability; - bool may_be_unweighted; - bool is_area; + bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */ + bool e_prefix; /* Accepts an 'E' prefix (for effective)? */ + bool is_area; /* Needs an area prefix. */ }; static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = { #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \ @@ -123,15 +138,14 @@ static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS .type = TYPE, \ .format = FORMAT, \ .availability = AVAILABILITY, \ - .may_be_unweighted = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_AREA, \ + .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \ + .e_prefix = (TYPE) == CTFT_UECELL, \ .is_area = (TYPE) == CTFT_AREA \ }, #include "ctables.inc" #undef S }; -static bool ctables_summary_function_is_count (enum ctables_summary_function); - enum ctables_area_type { /* Within a section, where stacked variables divide one section from @@ -169,22 +183,15 @@ struct ctables_area const struct ctables_cell *example; size_t sequence; - double d_valid; /* Dictionary weight. */ - double d_count; - double d_total; - double e_valid; /* Effective weight */ - double e_count; - double e_total; - double u_valid; /* Unweighted. */ - double u_count; - double u_total; + double count[N_CTWS]; + double valid[N_CTWS]; + double total[N_CTWS]; struct ctables_sum *sums; }; struct ctables_sum { - double e_sum; - double u_sum; + double sum[N_CTWS]; }; enum ctables_summary_variant @@ -373,6 +380,7 @@ struct ctables_nest struct variable **vars; size_t n; size_t scale_idx; + size_t summary_idx; size_t *areas[N_CTATS]; size_t n_areas[N_CTATS]; size_t group_head; @@ -542,7 +550,7 @@ struct ctables_category /* CCT_FUNCTION. */ enum ctables_summary_function sort_function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type area; struct variable *sort_var; double percentile; @@ -745,7 +753,7 @@ struct ctables_summary_spec cell, it must be 0). For CTSF_PTILE only, 'percentile' is the percentile between 0 and 100 (for other functions it must be 0). */ enum ctables_summary_function function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type calc_area; double percentile; /* CTSF_PTILE only. */ @@ -855,23 +863,18 @@ ctables_function_availability (enum ctables_summary_function f) return availability[f]; } -static bool -ctables_summary_function_is_count (enum ctables_summary_function f) -{ - return f == CTSF_COUNT || f == CTSF_ECOUNT; -} - static bool parse_ctables_summary_function (struct lexer *lexer, enum ctables_summary_function *function, - bool *weighted, + enum ctables_weighting *weighting, enum ctables_area_type *area) { if (!lex_force_id (lexer)) return false; struct substring name = lex_tokss (lexer); - *weighted = !(ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u')); + bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'); + bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e')); bool has_area = false; *area = 0; @@ -885,6 +888,7 @@ parse_ctables_summary_function (struct lexer *lexer, { /* Special case where .COUNT suffix is omitted. */ *function = CTSF_areaPCT_COUNT; + *weighting = CTW_EFFECTIVE; lex_get (lexer); return true; } @@ -897,11 +901,13 @@ parse_ctables_summary_function (struct lexer *lexer, if (ss_equals_case (cfi->basename, name)) { *function = f; - if (!*weighted && !cfi->may_be_unweighted) - break; - if (has_area != cfi->is_area) + if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area)) break; + *weighting = (e ? CTW_EFFECTIVE + : u ? CTW_UNWEIGHTED + : cfi->e_prefix ? CTW_DICTIONARY + : CTW_EFFECTIVE); lex_get (lexer); return true; } @@ -985,15 +991,15 @@ ctables_summary_default_format (enum ctables_summary_function function, static const char * ctables_summary_label__ (const struct ctables_summary_spec *spec) { - bool w = spec->weighted; + bool w = spec->weighting != CTW_UNWEIGHTED; + bool d = spec->weighting == CTW_DICTIONARY; enum ctables_area_type a = spec->user_area; switch (spec->function) { case CTSF_COUNT: - return w ? N_("Count") : N_("Unweighted Count"); - - case CTSF_ECOUNT: - return N_("Adjusted Count"); + return (d ? N_("Count") + : w ? N_("Adjusted Count") + : N_("Unweighted Count")); case CTSF_areaPCT_COUNT: switch (a) @@ -1036,20 +1042,22 @@ ctables_summary_label__ (const struct ctables_summary_spec *spec) case CTSF_MAXIMUM: return N_("Maximum"); case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean"); - case CTSF_MEDIAN: return N_("Median"); + case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median"); case CTSF_MINIMUM: return N_("Minimum"); - case CTSF_MISSING: return N_("Missing"); - case CTSF_MODE: return N_("Mode"); + case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing"); + case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode"); case CTSF_PTILE: NOT_REACHED (); case CTSF_RANGE: return N_("Range"); - case CTSF_SEMEAN: return N_("Std Error of Mean"); - case CTSF_STDDEV: return N_("Std Deviation"); - case CTSF_SUM: return N_("Sum"); - case CTSF_TOTALN: return N_("Total N"); - case CTSF_ETOTALN: return N_("Adjusted Total N"); - case CTSF_VALIDN: return N_("Valid N"); - case CTSF_EVALIDN: return N_("Adjusted Valid N"); - case CTSF_VARIANCE: return N_("Variance"); + case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean"); + case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation"); + case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum"); + case CTSF_TOTALN: return (d ? N_("Total N") + : w ? N_("Adjusted Total N") + : N_("Unweighted Total N")); + case CTSF_VALIDN: return (d ? N_("Valid N") + : w ? N_("Adjusted Valid N") + : N_("Unweighted Valid N")); + case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance"); case CTSF_areaPCT_SUM: switch (a) { @@ -1089,7 +1097,7 @@ ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel) if (spec->function == CTSF_PTILE) { double p = spec->percentile; - char *s = (spec->weighted + char *s = (spec->weighting != CTW_UNWEIGHTED ? xasprintf (_("Percentile %.2f"), p) : xasprintf (_("Unweighted Percentile %.2f"), p)); return pivot_value_new_user_text_nocopy (s); @@ -1119,13 +1127,16 @@ ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel) static const char * ctables_summary_function_name (enum ctables_summary_function function, - bool weighted, + enum ctables_weighting weighting, enum ctables_area_type area, char *buffer, size_t bufsize) { const struct ctables_function_info *cfi = &ctables_function_info[function]; snprintf (buffer, bufsize, "%s%s%s", - weighted ? "" : "U", + (weighting == CTW_UNWEIGHTED ? "U" + : weighting == CTW_DICTIONARY ? "" + : cfi->e_prefix ? "E" + : ""), cfi->is_area ? ctables_area_type_name[area] : "", cfi->basename.string); return buffer; @@ -1133,7 +1144,8 @@ ctables_summary_function_name (enum ctables_summary_function function, static bool add_summary_spec (struct ctables_axis *axis, - enum ctables_summary_function function, bool weighted, + enum ctables_summary_function function, + enum ctables_weighting weighting, enum ctables_area_type area, double percentile, const char *label, const struct fmt_spec *format, bool is_ctables_format, const struct msg_location *loc, @@ -1142,7 +1154,7 @@ add_summary_spec (struct ctables_axis *axis, if (axis->op == CTAO_VAR) { char function_name[128]; - ctables_summary_function_name (function, weighted, area, + ctables_summary_function_name (function, weighting, area, function_name, sizeof function_name); const char *var_name = var_get_name (axis->var); switch (ctables_function_availability (function)) @@ -1180,7 +1192,7 @@ add_summary_spec (struct ctables_axis *axis, struct ctables_summary_spec *dst = &set->specs[set->n++]; *dst = (struct ctables_summary_spec) { .function = function, - .weighted = weighted, + .weighting = weighting, .calc_area = area, .user_area = area, .percentile = percentile, @@ -1194,7 +1206,7 @@ add_summary_spec (struct ctables_axis *axis, else { for (size_t i = 0; i < 2; i++) - if (!add_summary_spec (axis->subs[i], function, weighted, area, + if (!add_summary_spec (axis->subs[i], function, weighting, area, percentile, label, format, is_ctables_format, loc, sv)) return false; @@ -1312,9 +1324,9 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) /* Parse function. */ enum ctables_summary_function function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type area; - if (!parse_ctables_summary_function (ctx->lexer, &function, &weighted, + if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting, &area)) goto error; @@ -1356,7 +1368,7 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs, lex_ofs (ctx->lexer) - 1); - add_summary_spec (sub, function, weighted, area, percentile, label, + add_summary_spec (sub, function, weighting, area, percentile, label, formatp, is_ctables_format, loc, sv); free (label); msg_location_destroy (loc); @@ -2106,7 +2118,7 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, { cat.type = CCT_FUNCTION; if (!parse_ctables_summary_function (lexer, &cat.sort_function, - &cat.weighted, &cat.area)) + &cat.weighting, &cat.area)) goto error; if (lex_match (lexer, T_LPAREN)) @@ -2421,6 +2433,9 @@ nest_fts (struct ctables_stack s0, struct ctables_stack s1) .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx : SIZE_MAX), + .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx + : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx + : SIZE_MAX), .n = n, }; for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) @@ -2455,13 +2470,15 @@ var_fts (const struct ctables_axis *a) struct variable **vars = xmalloc (sizeof *vars); *vars = a->var; + bool is_summary = a->specs[CSV_CELL].n || a->scale; struct ctables_nest *nest = xmalloc (sizeof *nest); *nest = (struct ctables_nest) { .vars = vars, .n = 1, .scale_idx = a->scale ? 0 : SIZE_MAX, + .summary_idx = is_summary ? 0 : SIZE_MAX, }; - if (a->specs[CSV_CELL].n || a->scale) + if (is_summary) for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) { ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]); @@ -2518,8 +2535,6 @@ union ctables_summary double ovalid; double ovalue; }; - - /* XXX multiple response */ }; static void @@ -2529,15 +2544,12 @@ ctables_summary_init (union ctables_summary *s, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: case CTSF_areaPCT_COUNT: case CTSF_areaPCT_VALIDN: case CTSF_areaPCT_TOTALN: case CTSF_MISSING: case CTSF_TOTALN: - case CTSF_ETOTALN: case CTSF_VALIDN: - case CTSF_EVALIDN: s->count = 0; break; @@ -2551,11 +2563,14 @@ ctables_summary_init (union ctables_summary *s, break; case CTSF_MEAN: + case CTSF_SUM: + case CTSF_areaPCT_SUM: + s->moments = moments1_create (MOMENT_MEAN); + break; + case CTSF_SEMEAN: case CTSF_STDDEV: - case CTSF_SUM: case CTSF_VARIANCE: - case CTSF_areaPCT_SUM: s->moments = moments1_create (MOMENT_VARIANCE); break; @@ -2587,15 +2602,12 @@ ctables_summary_uninit (union ctables_summary *s, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: case CTSF_areaPCT_COUNT: case CTSF_areaPCT_VALIDN: case CTSF_areaPCT_TOTALN: case CTSF_MISSING: case CTSF_TOTALN: - case CTSF_ETOTALN: case CTSF_VALIDN: - case CTSF_EVALIDN: break; case CTSF_areaID: @@ -2626,10 +2638,10 @@ ctables_summary_uninit (union ctables_summary *s, static void ctables_summary_add (union ctables_summary *s, const struct ctables_summary_spec *ss, - const struct variable *var, const union value *value, + const union value *value, bool is_scale, bool is_scale_missing, - bool is_missing, bool excluded_missing, - double d_weight, double e_weight) + bool is_missing, bool is_included, + double weight) { /* To determine whether a case is included in a given table for a particular kind of summary, consider the following charts for each variable in the @@ -2652,22 +2664,35 @@ ctables_summary_add (union ctables_summary *s, switch (ss->function) { case CTSF_TOTALN: + s->count += weight; + break; + case CTSF_areaPCT_TOTALN: - s->count += ss->weighted ? d_weight : 1.0; + s->count += weight; break; case CTSF_COUNT: + if (is_scale || is_included) + s->count += weight; + break; + case CTSF_areaPCT_COUNT: - if (is_scale || !excluded_missing) - s->count += ss->weighted ? d_weight : 1.0; + if (is_scale || is_included) + s->count += weight; break; case CTSF_VALIDN: + if (is_scale + ? !is_scale_missing + : !is_missing) + s->count += weight; + break; + case CTSF_areaPCT_VALIDN: if (is_scale ? !is_scale_missing : !is_missing) - s->count += ss->weighted ? d_weight : 1.0; + s->count += weight; break; case CTSF_areaID: @@ -2677,23 +2702,7 @@ ctables_summary_add (union ctables_summary *s, if (is_scale ? is_scale_missing : is_missing) - s->count += ss->weighted ? d_weight : 1.0; - break; - - case CTSF_ECOUNT: - if (is_scale || !excluded_missing) - s->count += e_weight; - break; - - case CTSF_EVALIDN: - if (is_scale - ? !is_scale_missing - : !is_missing) - s->count += e_weight; - break; - - case CTSF_ETOTALN: - s->count += e_weight; + s->count += weight; break; case CTSF_MAXIMUM: @@ -2701,7 +2710,6 @@ ctables_summary_add (union ctables_summary *s, case CTSF_RANGE: if (!is_scale_missing) { - assert (!var_is_alpha (var)); /* XXX? */ if (s->min == SYSMIS || value->f < s->min) s->min = value->f; if (s->max == SYSMIS || value->f > s->max) @@ -2714,9 +2722,13 @@ ctables_summary_add (union ctables_summary *s, case CTSF_STDDEV: case CTSF_SUM: case CTSF_VARIANCE: - case CTSF_areaPCT_SUM: if (!is_scale_missing) - moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0); + moments1_add (s->moments, value->f, weight); + break; + + case CTSF_areaPCT_SUM: + if (!is_missing && !is_scale_missing) + moments1_add (s->moments, value->f, weight); break; case CTSF_MEDIAN: @@ -2724,12 +2736,11 @@ ctables_summary_add (union ctables_summary *s, case CTSF_PTILE: if (!is_scale_missing) { - double w = ss->weighted ? e_weight : 1.0; - s->ovalid += w; + s->ovalid += weight; struct ccase *c = case_create (casewriter_get_proto (s->writer)); *case_num_rw_idx (c, 0) = value->f; - *case_num_rw_idx (c, 1) = w; + *case_num_rw_idx (c, 1) = weight; casewriter_write (s->writer, c); } break; @@ -2744,7 +2755,6 @@ ctables_summary_value (const struct ctables_cell *cell, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: return s->count; case CTSF_areaID: @@ -2753,29 +2763,27 @@ ctables_summary_value (const struct ctables_cell *cell, case CTSF_areaPCT_COUNT: { const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_count = ss->weighted ? a->e_count : a->u_count; + double a_count = a->count[ss->weighting]; return a_count ? s->count / a_count * 100 : SYSMIS; } case CTSF_areaPCT_VALIDN: { const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_valid = ss->weighted ? a->e_valid : a->u_valid; + double a_valid = a->valid[ss->weighting]; return a_valid ? s->count / a_valid * 100 : SYSMIS; } case CTSF_areaPCT_TOTALN: { const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_total = ss->weighted ? a->e_total : a->u_total; + double a_total = a->total[ss->weighting]; return a_total ? s->count / a_total * 100 : SYSMIS; } case CTSF_MISSING: case CTSF_TOTALN: - case CTSF_ETOTALN: case CTSF_VALIDN: - case CTSF_EVALIDN: return s->count; case CTSF_MAXIMUM: @@ -2831,7 +2839,7 @@ ctables_summary_value (const struct ctables_cell *cell, const struct ctables_area *a = cell->areas[ss->calc_area]; const struct ctables_sum *sum = &a->sums[ss->sum_var_idx]; - double denom = ss->weighted ? sum->e_sum : sum->u_sum; + double denom = sum->sum[ss->weighting]; return denom != 0 ? weight * mean / denom * 100 : SYSMIS; } @@ -2974,24 +2982,6 @@ ctables_cell_compare_leaf_3way (const void *a_, const void *b_, return 0; } -/* Algorithm: - - For each row: - For each ctables_table: - For each combination of row vars: - For each combination of column vars: - For each combination of layer vars: - Add entry - Make a table of row values: - Sort entries by row values - Assign a 0-based index to each actual value - Construct a dimension - Make a table of column values - Make a table of layer values - For each entry: - Fill the table entry using the indexes from before. - */ - static struct ctables_area * ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell, enum ctables_area_type area) @@ -3144,7 +3134,7 @@ ctables_categories_total (const struct ctables_categories *c) static struct ctables_cell * ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, - const struct ctables_category *cats[PIVOT_N_AXES][10]) + const struct ctables_category **cats[PIVOT_N_AXES]) { size_t hash = 0; enum ctables_summary_variant sv = CSV_CELL; @@ -3215,8 +3205,6 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, || cat->type == CCT_SUBTOTAL || cat->type == CCT_POSTCOMPUTE) { - /* XXX these should be more encompassing I think.*/ - switch (a) { case PIVOT_AXIS_COLUMN: @@ -3280,15 +3268,9 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, } static bool -is_scale_missing (const struct ctables_summary_spec_set *specs, - const struct ccase *c) +is_listwise_missing (const struct ctables_summary_spec_set *specs, + const struct ccase *c) { - if (!specs->is_scale) - return false; - - if (var_is_num_missing (specs->var, case_num (c, specs->var))) - return true; - for (size_t i = 0; i < specs->n_listwise_vars; i++) { const struct variable *var = specs->listwise_vars[i]; @@ -3299,63 +3281,59 @@ is_scale_missing (const struct ctables_summary_spec_set *specs, return false; } +static void +add_weight (double dst[N_CTWS], const double src[N_CTWS]) +{ + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + dst[wt] += src[wt]; +} + static void ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, - const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, bool excluded_missing, - double d_weight, double e_weight) + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS]) { struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats); const struct ctables_nest *ss = s->nests[s->table->summary_axis]; const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; + const union value *value = case_data (c, specs->var); + bool is_missing = var_is_value_missing (specs->var, value); + bool scale_missing = specs->is_scale && (is_missing || is_listwise_missing (specs, c)); - bool scale_missing = is_scale_missing (specs, c); for (size_t i = 0; i < specs->n; i++) - ctables_summary_add (&cell->summaries[i], &specs->specs[i], - specs->var, case_data (c, specs->var), specs->is_scale, - scale_missing, is_missing, excluded_missing, - d_weight, e_weight); + ctables_summary_add (&cell->summaries[i], &specs->specs[i], value, + specs->is_scale, scale_missing, is_missing, + is_included, weight[specs->specs[i].weighting]); for (enum ctables_area_type at = 0; at < N_CTATS; at++) if (!(cell->omit_areas && (1u << at))) { struct ctables_area *a = cell->areas[at]; - a->d_total += d_weight; - a->e_total += e_weight; - a->u_total += 1.0; - if (!excluded_missing) - { - a->d_count += d_weight; - a->e_count += e_weight; - a->u_count += 1.0; - } + + add_weight (a->total, weight); + if (is_included) + add_weight (a->count, weight); if (!is_missing) { - a->d_valid += d_weight; - a->e_valid += e_weight; - a->u_count += 1.0; + add_weight (a->valid, weight); - for (size_t i = 0; i < s->table->n_sum_vars; i++) - { - /* XXX listwise_missing??? */ - const struct variable *var = s->table->sum_vars[i]; - double addend = case_num (c, var); - if (!var_is_num_missing (var, addend)) - { - struct ctables_sum *sum = &a->sums[i]; - sum->e_sum += addend * e_weight; - sum->u_sum += addend; - } - } + if (!scale_missing) + for (size_t i = 0; i < s->table->n_sum_vars; i++) + { + const struct variable *var = s->table->sum_vars[i]; + double addend = case_num (c, var); + if (!var_is_num_missing (var, addend)) + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + a->sums[i].sum[wt] += addend * weight[wt]; + } } } } static void recurse_totals (struct ctables_section *s, const struct ccase *c, - const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, bool excluded_missing, - double d_weight, double e_weight, + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS], enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -3374,10 +3352,8 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, { const struct ctables_category *save = cats[a][i]; cats[a][i] = total; - ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight); - recurse_totals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, a, i + 1); + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, a, i + 1); cats[a][i] = save; } } @@ -3387,9 +3363,8 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, static void recurse_subtotals (struct ctables_section *s, const struct ccase *c, - const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, bool excluded_missing, - double d_weight, double e_weight, + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS], enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -3404,10 +3379,8 @@ recurse_subtotals (struct ctables_section *s, const struct ccase *c, if (save->subtotal) { cats[a][i] = save->subtotal; - ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight); - recurse_subtotals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, a, i + 1); + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_subtotals (s, c, cats, is_included, weight, a, i + 1); cats[a][i] = save; } } @@ -3435,53 +3408,51 @@ ctables_add_occurrence (const struct variable *var, } static void -ctables_cell_insert (struct ctables_section *s, - const struct ccase *c, - double d_weight, double e_weight) +ctables_cell_insert (struct ctables_section *s, const struct ccase *c, + double weight[N_CTWS]) { - const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */ - - /* Does at least one categorical variable have a missing value in an included - or excluded category? */ - bool is_missing = false; + const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n]; + const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n]; + const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n]; + const struct ctables_category **cats[PIVOT_N_AXES] = + { + [PIVOT_AXIS_LAYER] = layer_cats, + [PIVOT_AXIS_ROW] = row_cats, + [PIVOT_AXIS_COLUMN] = column_cats, + }; - /* Does at least one categorical variable have a missing value in an excluded - category? */ - bool excluded_missing = false; + bool is_included = true; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; for (size_t i = 0; i < nest->n; i++) - { - if (i == nest->scale_idx) - continue; - - const struct variable *var = nest->vars[i]; - const union value *value = case_data (c, var); - - bool var_missing = var_is_value_missing (var, value) != 0; - if (var_missing) - is_missing = true; - - cats[a][i] = ctables_categories_match ( - s->table->categories[var_get_dict_index (var)], value, var); - if (!cats[a][i]) - { - if (!var_missing) - return; + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); - static const struct ctables_category cct_excluded_missing = { - .type = CCT_EXCLUDED_MISSING, - .hide = true, - }; - cats[a][i] = &cct_excluded_missing; - excluded_missing = true; - } + cats[a][i] = ctables_categories_match ( + s->table->categories[var_get_dict_index (var)], value, var); + if (!cats[a][i]) + { + if (i != nest->summary_idx) + return; + + if (!var_is_value_missing (var, value)) + return; + + static const struct ctables_category cct_excluded_missing = { + .type = CCT_EXCLUDED_MISSING, + .hide = true, + }; + cats[a][i] = &cct_excluded_missing; + is_included = false; + } } } - if (!excluded_missing) + if (is_included) for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; @@ -3494,16 +3465,9 @@ ctables_cell_insert (struct ctables_section *s, } } - ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight); - - //if (!excluded_missing) - { - recurse_totals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, 0, 0); - recurse_subtotals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, 0, 0); - } + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, 0, 0); + recurse_subtotals (s, c, cats, is_included, weight, 0, 0); } struct merge_item @@ -3519,8 +3483,8 @@ merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b) const struct ctables_summary_spec *bs = &b->set->specs[b->ofs]; if (as->function != bs->function) return as->function > bs->function ? 1 : -1; - else if (as->weighted != bs->weighted) - return as->weighted > bs->weighted ? 1 : -1; + else if (as->weighting != bs->weighting) + return as->weighting > bs->weighting ? 1 : -1; else if (as->calc_area != bs->calc_area) return as->calc_area > bs->calc_area ? 1 : -1; else if (as->percentile != bs->percentile) @@ -3726,8 +3690,8 @@ ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a, for (size_t i = 0; i < nest->n; i++) hmap_init (&s->occurrences[a][i]); } - for (size_t i = 0; i < N_CTATS; i++) - hmap_init (&s->areas[i]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + hmap_init (&s->areas[at]); } } @@ -4017,7 +3981,7 @@ ctables_cell_calculate_postcompute (const struct ctables_section *s, { const struct ctables_summary_spec *ss2 = &pc->specs->specs[i]; if (ss->function == ss2->function - && ss->weighted == ss2->weighted + && ss->weighting == ss2->weighting && ss->calc_area == ss2->calc_area && ss->percentile == ss2->percentile) { @@ -4465,7 +4429,7 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) struct pivot_value *value; if (ct->hide_threshold != 0 && d < ct->hide_threshold - && ctables_summary_function_is_count (ss->function)) + && ss->function == CTSF_COUNT) { value = pivot_value_new_user_text_nocopy ( xasprintf ("<%d", ct->hide_threshold)); @@ -4665,95 +4629,76 @@ ctables_prepare_table (struct ctables_table *t) nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]); nest->n_areas[at] = 0; - bool add_vars = (at == CTAT_LAYER ? a == PIVOT_AXIS_LAYER - : at == CTAT_LAYERROW ? a != PIVOT_AXIS_COLUMN - : at == CTAT_LAYERCOL ? a != PIVOT_AXIS_ROW - : at == CTAT_TABLE ? false - : true); - if (add_vars) - for (size_t k = 0; k < nest->n; k++) - { - if (k == nest->scale_idx) - continue; - nest->areas[at][nest->n_areas[at]++] = k; - } - else if ((at == CTAT_LAYERCOL && a == PIVOT_AXIS_ROW && t->label_axis[a] != a) - || (at == CTAT_LAYERROW && a == PIVOT_AXIS_COLUMN && t->label_axis[a] != a) - || (at == CTAT_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER)) - { - for (size_t k = nest->n - 1; k < nest->n; k--) - { - if (k == nest->scale_idx) - continue; - nest->areas[at][nest->n_areas[at]++] = k; - break; - } - continue; - } - - size_t n_drop; - bool drop_inner = false; - if (at == CTAT_SUBTABLE) + enum pivot_axis_type ata, atb; + if (at == CTAT_ROW || at == CTAT_LAYERROW) { - if (t->clabels_from_axis != PIVOT_AXIS_LAYER) - { - if (t->clabels_to_axis != PIVOT_AXIS_LAYER) - n_drop = a == t->clabels_from_axis ? 2 : 0; - else - { - drop_inner = a == t->clabels_from_axis; - n_drop = a != t->clabels_from_axis && a != PIVOT_AXIS_LAYER; - } - } - else - n_drop = a != PIVOT_AXIS_LAYER; + ata = PIVOT_AXIS_ROW; + atb = PIVOT_AXIS_COLUMN; } - else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN) - n_drop = 0; - else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW) + else if (at == CTAT_COL || at == CTAT_LAYERCOL) { - drop_inner = true; - n_drop = 0; + ata = PIVOT_AXIS_COLUMN; + atb = PIVOT_AXIS_ROW; } - else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_LAYER) - { - drop_inner = true; - n_drop = 0; - } - else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN) - { - drop_inner = true; - n_drop = 0; - } - else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW) - n_drop = 0; - else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_LAYER) - { - drop_inner = true; - n_drop = 0; - } - else if (at == CTAT_LAYERROW) - n_drop = a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN; - else if (at == CTAT_LAYERCOL) - n_drop = a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW; - else if (at == CTAT_ROW) - { - n_drop = a == PIVOT_AXIS_COLUMN; - if (a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN) - n_drop++; - } - else if (at == CTAT_COL) + + if (at == CTAT_LAYER + ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER + : at == CTAT_LAYERCOL || at == CTAT_LAYERROW + ? a == atb && t->label_axis[a] != a + : false) { - n_drop = a == PIVOT_AXIS_ROW; - if (a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW) - n_drop++; + for (size_t k = nest->n - 1; k < nest->n; k--) + if (k != nest->scale_idx) + { + nest->areas[at][nest->n_areas[at]++] = k; + break; + } + continue; } - else + + if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER + : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb + : at == CTAT_TABLE ? true + : false) + continue; + + for (size_t k = 0; k < nest->n; k++) + if (k != nest->scale_idx) + nest->areas[at][nest->n_areas[at]++] = k; + + int n_drop; + switch (at) { + case CTAT_SUBTABLE: +#define L PIVOT_AXIS_LAYER + n_drop = (t->clabels_from_axis == L ? a != L + : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L) + : t->clabels_from_axis == a ? 2 + : 0); +#undef L + break; + + case CTAT_LAYERROW: + case CTAT_LAYERCOL: + n_drop = a == ata && t->label_axis[ata] == atb; + break; + + case CTAT_ROW: + case CTAT_COL: + n_drop = (a == ata ? t->label_axis[ata] == atb + : a != atb ? 0 + : t->clabels_from_axis == atb ? -1 + : t->clabels_to_axis != atb ? 1 + : 0); + break; + + case CTAT_LAYER: + case CTAT_TABLE: n_drop = 0; + break; } - if (drop_inner) + if (n_drop < 0) { size_t n = nest->n_areas[at]; if (n > 1) @@ -4762,16 +4707,23 @@ ctables_prepare_table (struct ctables_table *t) nest->n_areas[at]--; } } - for (size_t i = 0; i < n_drop; i++) - if (nest->n_areas[at] > 0) - nest->n_areas[at]--; + else + { + for (int i = 0; i < n_drop; i++) + if (nest->n_areas[at] > 0) + nest->n_areas[at]--; + } } } } else { struct ctables_nest *nest = xmalloc (sizeof *nest); - *nest = (struct ctables_nest) { .n = 0 }; + *nest = (struct ctables_nest) { + .n = 0, + .scale_idx = SIZE_MAX, + .summary_idx = SIZE_MAX + }; t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 }; /* There's no point in moving labels away from an axis that has no @@ -4785,20 +4737,23 @@ ctables_prepare_table (struct ctables_table *t) struct ctables_nest *nest = &stack->nests[i]; if (!nest->specs[CSV_CELL].n) { - struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL]; - specs->specs = xmalloc (sizeof *specs->specs); - specs->n = 1; + struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL]; + ss->specs = xmalloc (sizeof *ss->specs); + ss->n = 1; enum ctables_summary_function function - = specs->is_scale ? CTSF_MEAN : CTSF_COUNT; + = ss->is_scale ? CTSF_MEAN : CTSF_COUNT; - *specs->specs = (struct ctables_summary_spec) { + if (!ss->var) + { + nest->summary_idx = nest->n - 1; + ss->var = nest->vars[nest->summary_idx]; + } + *ss->specs = (struct ctables_summary_spec) { .function = function, - .weighted = true, - .format = ctables_summary_default_format (function, specs->var), + .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY, + .format = ctables_summary_default_format (function, ss->var), }; - if (!specs->var) - specs->var = nest->vars[0]; ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL], &nest->specs[CSV_CELL]); @@ -5066,7 +5021,7 @@ ctables_add_category_occurrences (const struct variable *var, static void ctables_section_recurse_add_empty_categories ( struct ctables_section *s, - const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c, + const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c, enum pivot_axis_type a, size_t a_idx) { if (a >= PIVOT_N_AXES) @@ -5124,7 +5079,15 @@ ctables_section_add_empty_categories (struct ctables_section *s) if (!show_empty) return; - const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */ + const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n]; + const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n]; + const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n]; + const struct ctables_category **cats[PIVOT_N_AXES] = + { + [PIVOT_AXIS_LAYER] = layer_cats, + [PIVOT_AXIS_ROW] = row_cats, + [PIVOT_AXIS_COLUMN] = column_cats, + }; struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict)); ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0); case_unref (c); @@ -5205,8 +5168,8 @@ ctables_section_uninit (struct ctables_section *s) } hmap_destroy (&s->cells); - for (size_t i = 0; i < N_CTATS; i++) - hmap_destroy (&s->areas[i]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + hmap_destroy (&s->areas[at]); } static void @@ -5273,19 +5236,24 @@ ctables_execute (struct dataset *ds, struct casereader *input, for (struct ccase *c = casereader_read (group); c; case_unref (c), c = casereader_read (group)) { - double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid); + double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid); double e_weight = (ct->e_weight ? var_force_valid_weight (ct->e_weight, case_num (c, ct->e_weight), &warn_on_invalid) : d_weight); + double weight[] = { + [CTW_DICTIONARY] = d_weight, + [CTW_EFFECTIVE] = e_weight, + [CTW_UNWEIGHTED] = 1.0, + }; for (size_t i = 0; i < ct->n_tables; i++) { struct ctables_table *t = ct->tables[i]; for (size_t j = 0; j < t->n_sections; j++) - ctables_cell_insert (&t->sections[j], c, d_weight, e_weight); + ctables_cell_insert (&t->sections[j], c, weight); for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) if (t->label_axis[a] != a) @@ -5783,9 +5751,9 @@ ctables_parse_pproperties_format (struct lexer *lexer, { /* Parse function. */ enum ctables_summary_function function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type area; - if (!parse_ctables_summary_function (lexer, &function, &weighted, &area)) + if (!parse_ctables_summary_function (lexer, &function, &weighting, &area)) goto error; /* Parse percentile. */ @@ -5809,7 +5777,7 @@ ctables_parse_pproperties_format (struct lexer *lexer, sizeof *sss->specs); sss->specs[sss->n++] = (struct ctables_summary_spec) { .function = function, - .weighted = weighted, + .weighting = weighting, .calc_area = area, .user_area = area, .percentile = percentile,