X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fctables.c;h=68d3e852bada61724c5fcfd5f45696eca28c7903;hb=981adc6169ffe7227de286f92f70edf684d37a2b;hp=621b9bab50598b9c53b047b6da94d30d66b99dbc;hpb=b7f672dd5f58c5891d7845871a13f20240eb9edf;p=pspp diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 621b9bab50..68d3e852ba 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -63,18 +63,32 @@ enum ctables_vlabel CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH, }; +enum ctables_weighting + { + CTW_EFFECTIVE, + CTW_DICTIONARY, + CTW_UNWEIGHTED +#define N_CTWS 3 + }; + enum ctables_function_type { - /* A function that operates on data in a single cell. The function does - not have an unweighted version. */ + /* A function that operates on data in a single cell. It operates on + effective weights. It does not have an unweighted version. */ CTFT_CELL, - /* A function that operates on data in a single cell. The function has an - unweighted version. */ + /* A function that operates on data in a single cell. The function + operates on effective weights and has a U-prefixed unweighted + version. */ CTFT_UCELL, - /* A function that operates on an area of cells. The function has an - unweighted version. */ + /* A function that operates on data in a single cell. It operates on + dictionary weights, and has U-prefixed unweighted version and an + E-prefixed effective weight version. */ + CTFT_UECELL, + + /* A function that operates on an area of cells. It operates on effective + weights and has a U-prefixed unweighted version. */ CTFT_AREA, }; @@ -113,8 +127,9 @@ struct ctables_function_info enum ctables_format format; enum ctables_function_availability availability; - bool may_be_unweighted; - bool is_area; + bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */ + bool e_prefix; /* Accepts an 'E' prefix (for effective)? */ + bool is_area; /* Needs an area prefix. */ }; static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = { #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \ @@ -123,15 +138,14 @@ static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS .type = TYPE, \ .format = FORMAT, \ .availability = AVAILABILITY, \ - .may_be_unweighted = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_AREA, \ + .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \ + .e_prefix = (TYPE) == CTFT_UECELL, \ .is_area = (TYPE) == CTFT_AREA \ }, #include "ctables.inc" #undef S }; -static bool ctables_summary_function_is_count (enum ctables_summary_function); - enum ctables_area_type { /* Within a section, where stacked variables divide one section from @@ -169,22 +183,15 @@ struct ctables_area const struct ctables_cell *example; size_t sequence; - double d_valid; /* Dictionary weight. */ - double d_count; - double d_total; - double e_valid; /* Effective weight */ - double e_count; - double e_total; - double u_valid; /* Unweighted. */ - double u_count; - double u_total; + double count[N_CTWS]; + double valid[N_CTWS]; + double total[N_CTWS]; struct ctables_sum *sums; }; struct ctables_sum { - double e_sum; - double u_sum; + double sum[N_CTWS]; }; enum ctables_summary_variant @@ -222,8 +229,6 @@ struct ctables_cell axes[PIVOT_N_AXES]; union ctables_summary *summaries; - - //char *name; }; struct ctables @@ -543,7 +548,7 @@ struct ctables_category /* CCT_FUNCTION. */ enum ctables_summary_function sort_function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type area; struct variable *sort_var; double percentile; @@ -746,7 +751,7 @@ struct ctables_summary_spec cell, it must be 0). For CTSF_PTILE only, 'percentile' is the percentile between 0 and 100 (for other functions it must be 0). */ enum ctables_summary_function function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type calc_area; double percentile; /* CTSF_PTILE only. */ @@ -856,23 +861,27 @@ ctables_function_availability (enum ctables_summary_function f) return availability[f]; } -static bool -ctables_summary_function_is_count (enum ctables_summary_function f) -{ - return f == CTSF_COUNT || f == CTSF_ECOUNT; -} - static bool parse_ctables_summary_function (struct lexer *lexer, enum ctables_summary_function *function, - bool *weighted, + enum ctables_weighting *weighting, enum ctables_area_type *area) { if (!lex_force_id (lexer)) return false; struct substring name = lex_tokss (lexer); - *weighted = !(ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u')); + if (ss_ends_with_case (name, ss_cstr (".LCL")) + || ss_ends_with_case (name, ss_cstr (".UCL")) + || ss_ends_with_case (name, ss_cstr (".SE"))) + { + lex_error (lexer, _("Support for LCL, UCL, and SE summary functions " + "is not yet implemented.")); + return false; + } + + bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'); + bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e')); bool has_area = false; *area = 0; @@ -886,6 +895,7 @@ parse_ctables_summary_function (struct lexer *lexer, { /* Special case where .COUNT suffix is omitted. */ *function = CTSF_areaPCT_COUNT; + *weighting = CTW_EFFECTIVE; lex_get (lexer); return true; } @@ -898,11 +908,13 @@ parse_ctables_summary_function (struct lexer *lexer, if (ss_equals_case (cfi->basename, name)) { *function = f; - if (!*weighted && !cfi->may_be_unweighted) - break; - if (has_area != cfi->is_area) + if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area)) break; + *weighting = (e ? CTW_EFFECTIVE + : u ? CTW_UNWEIGHTED + : cfi->e_prefix ? CTW_DICTIONARY + : CTW_EFFECTIVE); lex_get (lexer); return true; } @@ -986,15 +998,15 @@ ctables_summary_default_format (enum ctables_summary_function function, static const char * ctables_summary_label__ (const struct ctables_summary_spec *spec) { - bool w = spec->weighted; + bool w = spec->weighting != CTW_UNWEIGHTED; + bool d = spec->weighting == CTW_DICTIONARY; enum ctables_area_type a = spec->user_area; switch (spec->function) { case CTSF_COUNT: - return w ? N_("Count") : N_("Unweighted Count"); - - case CTSF_ECOUNT: - return N_("Adjusted Count"); + return (d ? N_("Count") + : w ? N_("Adjusted Count") + : N_("Unweighted Count")); case CTSF_areaPCT_COUNT: switch (a) @@ -1037,20 +1049,22 @@ ctables_summary_label__ (const struct ctables_summary_spec *spec) case CTSF_MAXIMUM: return N_("Maximum"); case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean"); - case CTSF_MEDIAN: return N_("Median"); + case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median"); case CTSF_MINIMUM: return N_("Minimum"); - case CTSF_MISSING: return N_("Missing"); - case CTSF_MODE: return N_("Mode"); + case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing"); + case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode"); case CTSF_PTILE: NOT_REACHED (); case CTSF_RANGE: return N_("Range"); - case CTSF_SEMEAN: return N_("Std Error of Mean"); - case CTSF_STDDEV: return N_("Std Deviation"); - case CTSF_SUM: return N_("Sum"); - case CTSF_TOTALN: return N_("Total N"); - case CTSF_ETOTALN: return N_("Adjusted Total N"); - case CTSF_VALIDN: return N_("Valid N"); - case CTSF_EVALIDN: return N_("Adjusted Valid N"); - case CTSF_VARIANCE: return N_("Variance"); + case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean"); + case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation"); + case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum"); + case CTSF_TOTALN: return (d ? N_("Total N") + : w ? N_("Adjusted Total N") + : N_("Unweighted Total N")); + case CTSF_VALIDN: return (d ? N_("Valid N") + : w ? N_("Adjusted Valid N") + : N_("Unweighted Valid N")); + case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance"); case CTSF_areaPCT_SUM: switch (a) { @@ -1090,7 +1104,7 @@ ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel) if (spec->function == CTSF_PTILE) { double p = spec->percentile; - char *s = (spec->weighted + char *s = (spec->weighting != CTW_UNWEIGHTED ? xasprintf (_("Percentile %.2f"), p) : xasprintf (_("Unweighted Percentile %.2f"), p)); return pivot_value_new_user_text_nocopy (s); @@ -1120,13 +1134,16 @@ ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel) static const char * ctables_summary_function_name (enum ctables_summary_function function, - bool weighted, + enum ctables_weighting weighting, enum ctables_area_type area, char *buffer, size_t bufsize) { const struct ctables_function_info *cfi = &ctables_function_info[function]; snprintf (buffer, bufsize, "%s%s%s", - weighted ? "" : "U", + (weighting == CTW_UNWEIGHTED ? "U" + : weighting == CTW_DICTIONARY ? "" + : cfi->e_prefix ? "E" + : ""), cfi->is_area ? ctables_area_type_name[area] : "", cfi->basename.string); return buffer; @@ -1134,7 +1151,8 @@ ctables_summary_function_name (enum ctables_summary_function function, static bool add_summary_spec (struct ctables_axis *axis, - enum ctables_summary_function function, bool weighted, + enum ctables_summary_function function, + enum ctables_weighting weighting, enum ctables_area_type area, double percentile, const char *label, const struct fmt_spec *format, bool is_ctables_format, const struct msg_location *loc, @@ -1143,7 +1161,7 @@ add_summary_spec (struct ctables_axis *axis, if (axis->op == CTAO_VAR) { char function_name[128]; - ctables_summary_function_name (function, weighted, area, + ctables_summary_function_name (function, weighting, area, function_name, sizeof function_name); const char *var_name = var_get_name (axis->var); switch (ctables_function_availability (function)) @@ -1181,7 +1199,7 @@ add_summary_spec (struct ctables_axis *axis, struct ctables_summary_spec *dst = &set->specs[set->n++]; *dst = (struct ctables_summary_spec) { .function = function, - .weighted = weighted, + .weighting = weighting, .calc_area = area, .user_area = area, .percentile = percentile, @@ -1195,7 +1213,7 @@ add_summary_spec (struct ctables_axis *axis, else { for (size_t i = 0; i < 2; i++) - if (!add_summary_spec (axis->subs[i], function, weighted, area, + if (!add_summary_spec (axis->subs[i], function, weighting, area, percentile, label, format, is_ctables_format, loc, sv)) return false; @@ -1224,6 +1242,13 @@ ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx) if (!lex_force_id (ctx->lexer)) return NULL; + if (lex_tokcstr (ctx->lexer)[0] == '$') + { + lex_error (ctx->lexer, + _("Multiple response set support not implemented.")); + return NULL; + } + int start_ofs = lex_ofs (ctx->lexer); struct variable *var = parse_variable (ctx->lexer, ctx->dict); if (!var) @@ -1313,9 +1338,9 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) /* Parse function. */ enum ctables_summary_function function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type area; - if (!parse_ctables_summary_function (ctx->lexer, &function, &weighted, + if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting, &area)) goto error; @@ -1357,7 +1382,7 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs, lex_ofs (ctx->lexer) - 1); - add_summary_spec (sub, function, weighted, area, percentile, label, + add_summary_spec (sub, function, weighting, area, percentile, label, formatp, is_ctables_format, loc, sv); free (label); msg_location_destroy (loc); @@ -2098,6 +2123,7 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, } else if (!c->n_cats && lex_match_id (lexer, "KEY")) { + int start_ofs = lex_ofs (lexer) - 1; lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "VALUE")) cat.type = CCT_VALUE; @@ -2107,7 +2133,7 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, { cat.type = CCT_FUNCTION; if (!parse_ctables_summary_function (lexer, &cat.sort_function, - &cat.weighted, &cat.area)) + &cat.weighting, &cat.area)) goto error; if (lex_match (lexer, T_LPAREN)) @@ -2134,6 +2160,10 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, bool UNUSED b = lex_force_match (lexer, T_LPAREN); goto error; } + + lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1, + _("Data-dependent sorting is not implemented.")); + goto error; } } else if (!c->n_cats && lex_match_id (lexer, "MISSING")) @@ -2524,8 +2554,6 @@ union ctables_summary double ovalid; double ovalue; }; - - /* XXX multiple response */ }; static void @@ -2535,15 +2563,12 @@ ctables_summary_init (union ctables_summary *s, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: case CTSF_areaPCT_COUNT: case CTSF_areaPCT_VALIDN: case CTSF_areaPCT_TOTALN: case CTSF_MISSING: case CTSF_TOTALN: - case CTSF_ETOTALN: case CTSF_VALIDN: - case CTSF_EVALIDN: s->count = 0; break; @@ -2596,15 +2621,12 @@ ctables_summary_uninit (union ctables_summary *s, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: case CTSF_areaPCT_COUNT: case CTSF_areaPCT_VALIDN: case CTSF_areaPCT_TOTALN: case CTSF_MISSING: case CTSF_TOTALN: - case CTSF_ETOTALN: case CTSF_VALIDN: - case CTSF_EVALIDN: break; case CTSF_areaID: @@ -2635,10 +2657,10 @@ ctables_summary_uninit (union ctables_summary *s, static void ctables_summary_add (union ctables_summary *s, const struct ctables_summary_spec *ss, - const struct variable *var, const union value *value, + const union value *value, bool is_scale, bool is_scale_missing, bool is_missing, bool is_included, - double d_weight, double e_weight) + double weight) { /* To determine whether a case is included in a given table for a particular kind of summary, consider the following charts for each variable in the @@ -2661,35 +2683,35 @@ ctables_summary_add (union ctables_summary *s, switch (ss->function) { case CTSF_TOTALN: - s->count += ss->weighted ? d_weight : 1.0; + s->count += weight; break; case CTSF_areaPCT_TOTALN: - s->count += ss->weighted ? e_weight : 1.0; + s->count += weight; break; case CTSF_COUNT: if (is_scale || is_included) - s->count += ss->weighted ? d_weight : 1.0; + s->count += weight; break; case CTSF_areaPCT_COUNT: if (is_scale || is_included) - s->count += ss->weighted ? e_weight : 1.0; + s->count += weight; break; case CTSF_VALIDN: if (is_scale ? !is_scale_missing : !is_missing) - s->count += ss->weighted ? d_weight : 1.0; + s->count += weight; break; case CTSF_areaPCT_VALIDN: if (is_scale ? !is_scale_missing : !is_missing) - s->count += ss->weighted ? e_weight : 1.0; + s->count += weight; break; case CTSF_areaID: @@ -2699,23 +2721,7 @@ ctables_summary_add (union ctables_summary *s, if (is_scale ? is_scale_missing : is_missing) - s->count += ss->weighted ? e_weight : 1.0; - break; - - case CTSF_ECOUNT: - if (is_scale || is_included) - s->count += e_weight; - break; - - case CTSF_EVALIDN: - if (is_scale - ? !is_scale_missing - : !is_missing) - s->count += e_weight; - break; - - case CTSF_ETOTALN: - s->count += e_weight; + s->count += weight; break; case CTSF_MAXIMUM: @@ -2723,7 +2729,6 @@ ctables_summary_add (union ctables_summary *s, case CTSF_RANGE: if (!is_scale_missing) { - assert (!var_is_alpha (var)); /* XXX? */ if (s->min == SYSMIS || value->f < s->min) s->min = value->f; if (s->max == SYSMIS || value->f > s->max) @@ -2737,12 +2742,12 @@ ctables_summary_add (union ctables_summary *s, case CTSF_SUM: case CTSF_VARIANCE: if (!is_scale_missing) - moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0); + moments1_add (s->moments, value->f, weight); break; case CTSF_areaPCT_SUM: if (!is_missing && !is_scale_missing) - moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0); + moments1_add (s->moments, value->f, weight); break; case CTSF_MEDIAN: @@ -2750,12 +2755,11 @@ ctables_summary_add (union ctables_summary *s, case CTSF_PTILE: if (!is_scale_missing) { - double w = ss->weighted ? e_weight : 1.0; - s->ovalid += w; + s->ovalid += weight; struct ccase *c = case_create (casewriter_get_proto (s->writer)); *case_num_rw_idx (c, 0) = value->f; - *case_num_rw_idx (c, 1) = w; + *case_num_rw_idx (c, 1) = weight; casewriter_write (s->writer, c); } break; @@ -2770,7 +2774,6 @@ ctables_summary_value (const struct ctables_cell *cell, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: return s->count; case CTSF_areaID: @@ -2779,29 +2782,27 @@ ctables_summary_value (const struct ctables_cell *cell, case CTSF_areaPCT_COUNT: { const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_count = ss->weighted ? a->e_count : a->u_count; + double a_count = a->count[ss->weighting]; return a_count ? s->count / a_count * 100 : SYSMIS; } case CTSF_areaPCT_VALIDN: { const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_valid = ss->weighted ? a->e_valid : a->u_valid; + double a_valid = a->valid[ss->weighting]; return a_valid ? s->count / a_valid * 100 : SYSMIS; } case CTSF_areaPCT_TOTALN: { const struct ctables_area *a = cell->areas[ss->calc_area]; - double a_total = ss->weighted ? a->e_total : a->u_total; + double a_total = a->total[ss->weighting]; return a_total ? s->count / a_total * 100 : SYSMIS; } case CTSF_MISSING: case CTSF_TOTALN: - case CTSF_ETOTALN: case CTSF_VALIDN: - case CTSF_EVALIDN: return s->count; case CTSF_MAXIMUM: @@ -2857,7 +2858,7 @@ ctables_summary_value (const struct ctables_cell *cell, const struct ctables_area *a = cell->areas[ss->calc_area]; const struct ctables_sum *sum = &a->sums[ss->sum_var_idx]; - double denom = ss->weighted ? sum->e_sum : sum->u_sum; + double denom = sum->sum[ss->weighting]; return denom != 0 ? weight * mean / denom * 100 : SYSMIS; } @@ -3000,24 +3001,6 @@ ctables_cell_compare_leaf_3way (const void *a_, const void *b_, return 0; } -/* Algorithm: - - For each row: - For each ctables_table: - For each combination of row vars: - For each combination of column vars: - For each combination of layer vars: - Add entry - Make a table of row values: - Sort entries by row values - Assign a 0-based index to each actual value - Construct a dimension - Make a table of column values - Make a table of layer values - For each entry: - Fill the table entry using the indexes from before. - */ - static struct ctables_area * ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell, enum ctables_area_type area) @@ -3170,7 +3153,7 @@ ctables_categories_total (const struct ctables_categories *c) static struct ctables_cell * ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, - const struct ctables_category *cats[PIVOT_N_AXES][10]) + const struct ctables_category **cats[PIVOT_N_AXES]) { size_t hash = 0; enum ctables_summary_variant sv = CSV_CELL; @@ -3219,7 +3202,6 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, cell->sv = sv; cell->omit_areas = 0; cell->postcompute = false; - //struct string name = DS_EMPTY_INITIALIZER; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; @@ -3241,8 +3223,6 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, || cat->type == CCT_SUBTOTAL || cat->type == CCT_POSTCOMPUTE) { - /* XXX these should be more encompassing I think.*/ - switch (a) { case PIVOT_AXIS_COLUMN: @@ -3271,28 +3251,8 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, cell->axes[a].cvs[i].category = cat; value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var)); - -#if 0 - if (i != nest->scale_idx) - { - if (!ds_is_empty (&name)) - ds_put_cstr (&name, ", "); - char *value_s = data_out (value, var_get_encoding (var), - var_get_print_format (var), - settings_get_fmt_settings ()); - if (cat->type == CCT_TOTAL - || cat->type == CCT_SUBTOTAL - || cat->type == CCT_POSTCOMPUTE) - ds_put_format (&name, "%s=total", var_get_name (var)); - else - ds_put_format (&name, "%s=%s", var_get_name (var), - value_s + strspn (value_s, " ")); - free (value_s); - } -#endif } } - //cell->name = ds_steal_cstr (&name); const struct ctables_nest *ss = s->nests[s->table->summary_axis]; const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; @@ -3319,10 +3279,17 @@ is_listwise_missing (const struct ctables_summary_spec_set *specs, return false; } +static void +add_weight (double dst[N_CTWS], const double src[N_CTWS]) +{ + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + dst[wt] += src[wt]; +} + static void ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, - const struct ctables_category *cats[PIVOT_N_AXES][10], - double d_weight, double e_weight) + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS]) { struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats); const struct ctables_nest *ss = s->nests[s->table->summary_axis]; @@ -3330,64 +3297,41 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; const union value *value = case_data (c, specs->var); bool is_missing = var_is_value_missing (specs->var, value); - bool is_included; - bool scale_missing; - if (specs->is_scale) - { - is_included = !is_missing; - scale_missing = is_missing || is_listwise_missing (specs, c); - } - else - { - is_included = (cats[s->table->summary_axis][ss->summary_idx]->type - != CCT_EXCLUDED_MISSING); - scale_missing = false; - } + bool scale_missing = specs->is_scale && (is_missing || is_listwise_missing (specs, c)); for (size_t i = 0; i < specs->n; i++) - ctables_summary_add (&cell->summaries[i], &specs->specs[i], - specs->var, value, specs->is_scale, - scale_missing, is_missing, is_included, - d_weight, e_weight); + ctables_summary_add (&cell->summaries[i], &specs->specs[i], value, + specs->is_scale, scale_missing, is_missing, + is_included, weight[specs->specs[i].weighting]); for (enum ctables_area_type at = 0; at < N_CTATS; at++) if (!(cell->omit_areas && (1u << at))) { struct ctables_area *a = cell->areas[at]; - a->d_total += d_weight; - a->e_total += e_weight; - a->u_total += 1.0; + + add_weight (a->total, weight); if (is_included) - { - a->d_count += d_weight; - a->e_count += e_weight; - a->u_count += 1.0; - } + add_weight (a->count, weight); if (!is_missing) { - a->d_valid += d_weight; - a->e_valid += e_weight; - a->u_count += 1.0; + add_weight (a->valid, weight); - for (size_t i = 0; i < s->table->n_sum_vars; i++) - { - /* XXX listwise_missing??? */ - const struct variable *var = s->table->sum_vars[i]; - double addend = case_num (c, var); - if (!var_is_num_missing (var, addend)) - { - struct ctables_sum *sum = &a->sums[i]; - sum->e_sum += addend * e_weight; - sum->u_sum += addend; - } - } + if (!scale_missing) + for (size_t i = 0; i < s->table->n_sum_vars; i++) + { + const struct variable *var = s->table->sum_vars[i]; + double addend = case_num (c, var); + if (!var_is_num_missing (var, addend)) + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + a->sums[i].sum[wt] += addend * weight[wt]; + } } } } static void recurse_totals (struct ctables_section *s, const struct ccase *c, - const struct ctables_category *cats[PIVOT_N_AXES][10], - double d_weight, double e_weight, + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS], enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -3405,17 +3349,10 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, if (total) { const struct ctables_category *save = cats[a][i]; - if (save->type != CCT_EXCLUDED_MISSING) - /* XXX ^^ this shows why we need to keep track of - 'excluded_missing' (or 'is_included') at a high level, - because it gets replaced by a total category. So we need to - restore that and plumb it through again. */ - { - cats[a][i] = total; - ctables_cell_add__ (s, c, cats, d_weight, e_weight); - recurse_totals (s, c, cats, d_weight, e_weight, a, i + 1); - cats[a][i] = save; - } + cats[a][i] = total; + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, a, i + 1); + cats[a][i] = save; } } start_nest = 0; @@ -3424,8 +3361,8 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, static void recurse_subtotals (struct ctables_section *s, const struct ccase *c, - const struct ctables_category *cats[PIVOT_N_AXES][10], - double d_weight, double e_weight, + const struct ctables_category **cats[PIVOT_N_AXES], + bool is_included, double weight[N_CTWS], enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -3440,8 +3377,8 @@ recurse_subtotals (struct ctables_section *s, const struct ccase *c, if (save->subtotal) { cats[a][i] = save->subtotal; - ctables_cell_add__ (s, c, cats, d_weight, e_weight); - recurse_subtotals (s, c, cats, d_weight, e_weight, a, i + 1); + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_subtotals (s, c, cats, is_included, weight, a, i + 1); cats[a][i] = save; } } @@ -3469,13 +3406,20 @@ ctables_add_occurrence (const struct variable *var, } static void -ctables_cell_insert (struct ctables_section *s, - const struct ccase *c, - double d_weight, double e_weight) +ctables_cell_insert (struct ctables_section *s, const struct ccase *c, + double weight[N_CTWS]) { - const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */ + const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n]; + const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n]; + const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n]; + const struct ctables_category **cats[PIVOT_N_AXES] = + { + [PIVOT_AXIS_LAYER] = layer_cats, + [PIVOT_AXIS_ROW] = row_cats, + [PIVOT_AXIS_COLUMN] = column_cats, + }; - bool excluded_missing = false; + bool is_included = true; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { @@ -3501,12 +3445,12 @@ ctables_cell_insert (struct ctables_section *s, .hide = true, }; cats[a][i] = &cct_excluded_missing; - excluded_missing = true; + is_included = false; } } } - if (!excluded_missing) + if (is_included) for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; @@ -3519,9 +3463,9 @@ ctables_cell_insert (struct ctables_section *s, } } - ctables_cell_add__ (s, c, cats, d_weight, e_weight); - recurse_totals (s, c, cats, d_weight, e_weight, 0, 0); - recurse_subtotals (s, c, cats, d_weight, e_weight, 0, 0); + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, 0, 0); + recurse_subtotals (s, c, cats, is_included, weight, 0, 0); } struct merge_item @@ -3537,8 +3481,8 @@ merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b) const struct ctables_summary_spec *bs = &b->set->specs[b->ofs]; if (as->function != bs->function) return as->function > bs->function ? 1 : -1; - else if (as->weighted != bs->weighted) - return as->weighted > bs->weighted ? 1 : -1; + else if (as->weighting != bs->weighting) + return as->weighting > bs->weighting ? 1 : -1; else if (as->calc_area != bs->calc_area) return as->calc_area > bs->calc_area ? 1 : -1; else if (as->percentile != bs->percentile) @@ -4035,7 +3979,7 @@ ctables_cell_calculate_postcompute (const struct ctables_section *s, { const struct ctables_summary_spec *ss2 = &pc->specs->specs[i]; if (ss->function == ss2->function - && ss->weighted == ss2->weighted + && ss->weighting == ss2->weighting && ss->calc_area == ss2->calc_area && ss->percentile == ss2->percentile) { @@ -4216,32 +4160,6 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) struct ctables_cell_sort_aux aux = { .nest = nest, .a = a }; sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_3way, &aux); -#if 0 - if (a == PIVOT_AXIS_ROW) - { - size_t ids[N_CTATS]; - memset (ids, 0, sizeof ids); - for (size_t j = 0; j < n_sorted; j++) - { - struct ctables_cell *cell = sorted[j]; - for (enum ctables_area_type at = 0; at < N_CTATS; at++) - { - struct ctables_area *area = cell->areas[at]; - if (!area->sequence) - area->sequence = ++ids[at]; - } - } - } -#endif - -#if 0 - for (size_t j = 0; j < n_sorted; j++) - { - printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0); - } - printf ("\n"); -#endif - struct ctables_level { enum ctables_level_type @@ -4483,7 +4401,7 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) struct pivot_value *value; if (ct->hide_threshold != 0 && d < ct->hide_threshold - && ctables_summary_function_is_count (ss->function)) + && ss->function == CTSF_COUNT) { value = pivot_value_new_user_text_nocopy ( xasprintf ("<%d", ct->hide_threshold)); @@ -4805,7 +4723,7 @@ ctables_prepare_table (struct ctables_table *t) } *ss->specs = (struct ctables_summary_spec) { .function = function, - .weighted = true, + .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY, .format = ctables_summary_default_format (function, ss->var), }; @@ -4901,24 +4819,6 @@ ctables_prepare_table (struct ctables_table *t) } free (items); -#if 0 - for (size_t j = 0; j < merged->n; j++) - printf ("%s\n", ctables_summary_function_name (merged->specs[j].function)); - - for (size_t j = 0; j < stack->n; j++) - { - const struct ctables_nest *nest = &stack->nests[j]; - for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) - { - const struct ctables_summary_spec_set *specs = &nest->specs[sv]; - for (size_t k = 0; k < specs->n; k++) - printf ("(%s, %zu) ", ctables_summary_function_name (specs->specs[k].function), - specs->specs[k].axis_idx); - printf ("\n"); - } - } -#endif - size_t allocated_sum_vars = 0; enumerate_sum_vars (t->axes[t->summary_axis], &t->sum_vars, &t->n_sum_vars, &allocated_sum_vars); @@ -5075,7 +4975,7 @@ ctables_add_category_occurrences (const struct variable *var, static void ctables_section_recurse_add_empty_categories ( struct ctables_section *s, - const struct ctables_category *cats[PIVOT_N_AXES][10], struct ccase *c, + const struct ctables_category **cats[PIVOT_N_AXES], struct ccase *c, enum pivot_axis_type a, size_t a_idx) { if (a >= PIVOT_N_AXES) @@ -5133,7 +5033,15 @@ ctables_section_add_empty_categories (struct ctables_section *s) if (!show_empty) return; - const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */ + const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n]; + const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n]; + const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n]; + const struct ctables_category **cats[PIVOT_N_AXES] = + { + [PIVOT_AXIS_LAYER] = layer_cats, + [PIVOT_AXIS_ROW] = row_cats, + [PIVOT_AXIS_COLUMN] = column_cats, + }; struct ccase *c = case_create (dict_get_proto (s->table->ctables->dict)); ctables_section_recurse_add_empty_categories (s, cats, c, 0, 0); case_unref (c); @@ -5288,13 +5196,18 @@ ctables_execute (struct dataset *ds, struct casereader *input, case_num (c, ct->e_weight), &warn_on_invalid) : d_weight); + double weight[] = { + [CTW_DICTIONARY] = d_weight, + [CTW_EFFECTIVE] = e_weight, + [CTW_UNWEIGHTED] = 1.0, + }; for (size_t i = 0; i < ct->n_tables; i++) { struct ctables_table *t = ct->tables[i]; for (size_t j = 0; j < t->n_sections; j++) - ctables_cell_insert (&t->sections[j], c, d_weight, e_weight); + ctables_cell_insert (&t->sections[j], c, weight); for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) if (t->label_axis[a] != a) @@ -5792,9 +5705,9 @@ ctables_parse_pproperties_format (struct lexer *lexer, { /* Parse function. */ enum ctables_summary_function function; - bool weighted; + enum ctables_weighting weighting; enum ctables_area_type area; - if (!parse_ctables_summary_function (lexer, &function, &weighted, &area)) + if (!parse_ctables_summary_function (lexer, &function, &weighting, &area)) goto error; /* Parse percentile. */ @@ -5818,7 +5731,7 @@ ctables_parse_pproperties_format (struct lexer *lexer, sizeof *sss->specs); sss->specs[sss->n++] = (struct ctables_summary_spec) { .function = function, - .weighted = weighted, + .weighting = weighting, .calc_area = area, .user_area = area, .percentile = percentile, @@ -6524,6 +6437,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "SIGTEST")) { + int start_ofs = lex_ofs (lexer) - 1; if (!t->chisq) { t->chisq = xmalloc (sizeof *t->chisq); @@ -6579,9 +6493,14 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) } while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD); + + lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1, + _("Support for SIGTEST not yet implemented.")); + goto error; } else if (lex_match_id (lexer, "COMPARETEST")) { + int start_ofs = lex_ofs (lexer); if (!t->pairwise) { t->pairwise = xmalloc (sizeof *t->pairwise); @@ -6721,6 +6640,10 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) } while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD); + + lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1, + _("Support for COMPARETEST not yet implemented.")); + goto error; } else {