X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fctables.c;h=7e899d1b668a640355ead9e7ccf83fd8c340490f;hb=c4bc3574d974d3aaf4d291097c995a31515a308a;hp=da511b69510f16bc7fb9d303392f7d719a1f4c55;hpb=15e323443edce619168aede1421aa195e7f7ecc2;p=pspp diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index da511b6951..7e899d1b66 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -63,69 +63,135 @@ enum ctables_vlabel CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH, }; -/* XXX: - - unweighted summaries (U*) - - lower confidence limits (*.LCL) - - upper confidence limits (*.UCL) - - standard error (*.SE) - */ +enum ctables_weighting + { + CTW_EFFECTIVE, + CTW_DICTIONARY, + CTW_UNWEIGHTED +#define N_CTWS 3 + }; + +enum ctables_function_type + { + /* A function that operates on data in a single cell. It operates on + effective weights. It does not have an unweighted version. */ + CTFT_CELL, + + /* A function that operates on data in a single cell. The function + operates on effective weights and has a U-prefixed unweighted + version. */ + CTFT_UCELL, + + /* A function that operates on data in a single cell. It operates on + dictionary weights, and has U-prefixed unweighted version and an + E-prefixed effective weight version. */ + CTFT_UECELL, + + /* A function that operates on an area of cells. It operates on effective + weights and has a U-prefixed unweighted version. */ + CTFT_AREA, + }; + +enum ctables_format + { + CTF_COUNT, + CTF_PERCENT, + CTF_GENERAL + }; + +enum ctables_function_availability + { + CTFA_ALL, /* Any variables. */ + CTFA_SCALE, /* Only scale variables, totals, and subtotals. */ + //CTFA_MRSETS, /* Only multiple-response sets */ + }; enum ctables_summary_function { -#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) ENUM, +#define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) ENUM, #include "ctables.inc" #undef S }; enum { -#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) +1 +#define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) +1 N_CTSF_FUNCTIONS = #include "ctables.inc" #undef S }; -static bool ctables_summary_function_is_count (enum ctables_summary_function); +struct ctables_function_info + { + struct substring basename; + enum ctables_function_type type; + enum ctables_format format; + enum ctables_function_availability availability; + + bool u_prefix; /* Accepts a 'U' prefix (for unweighted)? */ + bool e_prefix; /* Accepts an 'E' prefix (for effective)? */ + bool is_area; /* Needs an area prefix. */ + }; +static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = { +#define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) \ + [ENUM] = { \ + .basename = SS_LITERAL_INITIALIZER (NAME), \ + .type = TYPE, \ + .format = FORMAT, \ + .availability = AVAILABILITY, \ + .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \ + .e_prefix = (TYPE) == CTFT_UECELL, \ + .is_area = (TYPE) == CTFT_AREA \ + }, +#include "ctables.inc" +#undef S +}; -enum ctables_domain_type +enum ctables_area_type { /* Within a section, where stacked variables divide one section from - another. */ - CTDT_TABLE, /* All layers of a whole section. */ - CTDT_LAYER, /* One layer within a section. */ - CTDT_LAYERROW, /* Row in one layer within a section. */ - CTDT_LAYERCOL, /* Column in one layer within a section. */ + another. + + Keep CTAT_LAYER after CTAT_LAYERROW and CTAT_LAYERCOL so that + parse_ctables_summary_function() parses correctly. */ + CTAT_TABLE, /* All layers of a whole section. */ + CTAT_LAYERROW, /* Row in one layer within a section. */ + CTAT_LAYERCOL, /* Column in one layer within a section. */ + CTAT_LAYER, /* One layer within a section. */ /* Within a subtable, where a subtable pairs an innermost row variable with an innermost column variable within a single layer. */ - CTDT_SUBTABLE, /* Whole subtable. */ - CTDT_ROW, /* Row within a subtable. */ - CTDT_COL, /* Column within a subtable. */ -#define N_CTDTS 7 + CTAT_SUBTABLE, /* Whole subtable. */ + CTAT_ROW, /* Row within a subtable. */ + CTAT_COL, /* Column within a subtable. */ +#define N_CTATS 7 }; -struct ctables_domain +static const char *ctables_area_type_name[N_CTATS] = { + [CTAT_TABLE] = "TABLE", + [CTAT_LAYER] = "LAYER", + [CTAT_LAYERROW] = "LAYERROW", + [CTAT_LAYERCOL] = "LAYERCOL", + [CTAT_SUBTABLE] = "SUBTABLE", + [CTAT_ROW] = "ROW", + [CTAT_COL] = "COL", +}; + +struct ctables_area { struct hmap_node node; const struct ctables_cell *example; size_t sequence; - double d_valid; /* Dictionary weight. */ - double d_count; - double d_total; - double e_valid; /* Effective weight */ - double e_count; - double e_total; - double u_valid; /* Unweighted. */ - double u_count; - double u_total; + double count[N_CTWS]; + double valid[N_CTWS]; + double total[N_CTWS]; struct ctables_sum *sums; }; struct ctables_sum { - double e_sum; - double u_sum; + double sum[N_CTWS]; }; enum ctables_summary_variant @@ -141,9 +207,9 @@ struct ctables_cell all the axes (except the scalar variable, if any). */ struct hmap_node node; - /* The domains that contain this cell. */ - uint32_t omit_domains; - struct ctables_domain *domains[N_CTDTS]; + /* The areas that contain this cell. */ + uint32_t omit_areas; + struct ctables_area *areas[N_CTATS]; bool hide; @@ -314,8 +380,9 @@ struct ctables_nest struct variable **vars; size_t n; size_t scale_idx; - size_t *domains[N_CTDTS]; - size_t n_domains[N_CTDTS]; + size_t summary_idx; + size_t *areas[N_CTATS]; + size_t n_areas[N_CTATS]; size_t group_head; struct ctables_summary_spec_set specs[N_CSVS]; @@ -352,7 +419,7 @@ struct ctables_section /* Data. */ struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */ struct hmap cells; /* Contains "struct ctables_cell"s. */ - struct hmap domains[N_CTDTS]; /* Contains "struct ctables_domain"s. */ + struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */ }; static void ctables_section_uninit (struct ctables_section *); @@ -396,6 +463,7 @@ struct ctables_table */ enum pivot_axis_type label_axis[PIVOT_N_AXES]; enum pivot_axis_type clabels_from_axis; + enum pivot_axis_type clabels_to_axis; const struct variable *clabels_example; struct hmap clabels_values_map; struct ctables_value **clabels_values; @@ -482,6 +550,8 @@ struct ctables_category /* CCT_FUNCTION. */ enum ctables_summary_function sort_function; + enum ctables_weighting weighting; + enum ctables_area_type area; struct variable *sort_var; double percentile; }; @@ -672,26 +742,34 @@ struct ctables_axis static void ctables_axis_destroy (struct ctables_axis *); -enum ctables_format - { - CTF_COUNT, - CTF_PERCENT, - CTF_GENERAL - }; - -enum ctables_function_availability - { - CTFA_ALL, /* Any variables. */ - CTFA_SCALE, /* Only scale variables, totals, and subtotals. */ - //CTFA_MRSETS, /* Only multiple-response sets */ - }; - struct ctables_summary_spec { + /* The calculation to be performed. + + 'function' is the function to calculate. 'weighted' specifies whether + to use weighted or unweighted data (for functions that do not support a + choice, it must be true). 'calc_area' is the area over which the + calculation takes place (for functions that target only an individual + cell, it must be 0). For CTSF_PTILE only, 'percentile' is the + percentile between 0 and 100 (for other functions it must be 0). */ enum ctables_summary_function function; + enum ctables_weighting weighting; + enum ctables_area_type calc_area; double percentile; /* CTSF_PTILE only. */ - char *label; + /* How to display the result of the calculation. + + 'label' is a user-specified label, NULL if the user didn't specify + one. + + 'user_area' is usually the same as 'calc_area', but when category labels + are rotated from one axis to another it swaps rows and columns. + + 'format' is the format for displaying the output. If + 'is_ctables_format' is true, then 'format.type' is one of the special + CTEF_* formats instead of the standard ones. */ + char *label; + enum ctables_area_type user_area; struct fmt_spec format; bool is_ctables_format; /* Is 'format' one of CTEF_*? */ @@ -777,7 +855,7 @@ static enum ctables_function_availability ctables_function_availability (enum ctables_summary_function f) { static enum ctables_function_availability availability[] = { -#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY, +#define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = AVAILABILITY, #include "ctables.inc" #undef S }; @@ -785,47 +863,56 @@ ctables_function_availability (enum ctables_summary_function f) return availability[f]; } -static bool -ctables_summary_function_is_count (enum ctables_summary_function f) -{ - return f == CTSF_COUNT || f == CTSF_ECOUNT || f == CTSF_UCOUNT; -} - static bool parse_ctables_summary_function (struct lexer *lexer, - enum ctables_summary_function *f) + enum ctables_summary_function *function, + enum ctables_weighting *weighting, + enum ctables_area_type *area) { - struct pair - { - enum ctables_summary_function function; - struct substring name; - }; - static struct pair names[] = { -#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) \ - { ENUM, SS_LITERAL_INITIALIZER (NAME) }, -#include "ctables.inc" - /* The .COUNT suffix may be omitted. */ - S(CTSF_ROWPCT_COUNT, "ROWPCT", _, _, _) - S(CTSF_COLPCT_COUNT, "COLPCT", _, _, _) - S(CTSF_TABLEPCT_COUNT, "TABLEPCT", _, _, _) - S(CTSF_SUBTABLEPCT_COUNT, "SUBTABLEPCT", _, _, _) - S(CTSF_LAYERPCT_COUNT, "LAYERPCT", _, _, _) - S(CTSF_LAYERROWPCT_COUNT, "LAYERROWPCT", _, _, _) - S(CTSF_LAYERCOLPCT_COUNT, "LAYERCOLPCT", _, _, _) -#undef S - }; - if (!lex_force_id (lexer)) return false; - for (size_t i = 0; i < sizeof names / sizeof *names; i++) - if (ss_equals_case (names[i].name, lex_tokss (lexer))) + struct substring name = lex_tokss (lexer); + bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'); + bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e')); + + bool has_area = false; + *area = 0; + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + if (ss_match_string_case (&name, ss_cstr (ctables_area_type_name[at]))) { - *f = names[i].function; - lex_get (lexer); - return true; + has_area = true; + *area = at; + + if (ss_equals_case (name, ss_cstr ("PCT"))) + { + /* Special case where .COUNT suffix is omitted. */ + *function = CTSF_areaPCT_COUNT; + *weighting = CTW_EFFECTIVE; + lex_get (lexer); + return true; + } + break; } + for (int f = 0; f < N_CTSF_FUNCTIONS; f++) + { + const struct ctables_function_info *cfi = &ctables_function_info[f]; + if (ss_equals_case (cfi->basename, name)) + { + *function = f; + if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area)) + break; + + *weighting = (e ? CTW_EFFECTIVE + : u ? CTW_UNWEIGHTED + : cfi->e_prefix ? CTW_DICTIONARY + : CTW_EFFECTIVE); + lex_get (lexer); + return true; + } + } + lex_error (lexer, _("Expecting summary function name.")); return false; } @@ -881,7 +968,7 @@ ctables_summary_default_format (enum ctables_summary_function function, const struct variable *var) { static const enum ctables_format default_formats[] = { -#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT, +#define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY) [ENUM] = FORMAT, #include "ctables.inc" #undef S }; @@ -901,21 +988,122 @@ ctables_summary_default_format (enum ctables_summary_function function, } } +static const char * +ctables_summary_label__ (const struct ctables_summary_spec *spec) +{ + bool w = spec->weighting != CTW_UNWEIGHTED; + bool d = spec->weighting == CTW_DICTIONARY; + enum ctables_area_type a = spec->user_area; + switch (spec->function) + { + case CTSF_COUNT: + return (d ? N_("Count") + : w ? N_("Adjusted Count") + : N_("Unweighted Count")); + + case CTSF_areaPCT_COUNT: + switch (a) + { + case CTAT_TABLE: return w ? N_("Table %") : N_("Unweighted Table %"); + case CTAT_LAYER: return w ? N_("Layer %") : N_("Unweighted Layer %"); + case CTAT_LAYERROW: return w ? N_("Layer Row %") : N_("Unweighted Layer Row %"); + case CTAT_LAYERCOL: return w ? N_("Layer Column %") : N_("Unweighted Layer Column %"); + case CTAT_SUBTABLE: return w ? N_("Subtable %") : N_("Unweighted Subtable %"); + case CTAT_ROW: return w ? N_("Row %") : N_("Unweighted Row %"); + case CTAT_COL: return w ? N_("Column %") : N_("Unweighted Column %"); + } + NOT_REACHED (); + + case CTSF_areaPCT_VALIDN: + switch (a) + { + case CTAT_TABLE: return w ? N_("Table Valid N %") : N_("Unweighted Table Valid N %"); + case CTAT_LAYER: return w ? N_("Layer Valid N %") : N_("Unweighted Layer Valid N %"); + case CTAT_LAYERROW: return w ? N_("Layer Row Valid N %") : N_("Unweighted Layer Row Valid N %"); + case CTAT_LAYERCOL: return w ? N_("Layer Column Valid N %") : N_("Unweighted Layer Column Valid N %"); + case CTAT_SUBTABLE: return w ? N_("Subtable Valid N %") : N_("Unweighted Subtable Valid N %"); + case CTAT_ROW: return w ? N_("Row Valid N %") : N_("Unweighted Row Valid N %"); + case CTAT_COL: return w ? N_("Column Valid N %") : N_("Unweighted Column Valid N %"); + } + NOT_REACHED (); + + case CTSF_areaPCT_TOTALN: + switch (a) + { + case CTAT_TABLE: return w ? N_("Table Total N %") : N_("Unweighted Table Total N %"); + case CTAT_LAYER: return w ? N_("Layer Total N %") : N_("Unweighted Layer Total N %"); + case CTAT_LAYERROW: return w ? N_("Layer Row Total N %") : N_("Unweighted Layer Row Total N %"); + case CTAT_LAYERCOL: return w ? N_("Layer Column Total N %") : N_("Unweighted Layer Column Total N %"); + case CTAT_SUBTABLE: return w ? N_("Subtable Total N %") : N_("Unweighted Subtable Total N %"); + case CTAT_ROW: return w ? N_("Row Total N %") : N_("Unweighted Row Total N %"); + case CTAT_COL: return w ? N_("Column Total N %") : N_("Unweighted Column Total N %"); + } + NOT_REACHED (); + + case CTSF_MAXIMUM: return N_("Maximum"); + case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean"); + case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median"); + case CTSF_MINIMUM: return N_("Minimum"); + case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing"); + case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode"); + case CTSF_PTILE: NOT_REACHED (); + case CTSF_RANGE: return N_("Range"); + case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean"); + case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation"); + case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum"); + case CTSF_TOTALN: return (d ? N_("Total N") + : w ? N_("Adjusted Total N") + : N_("Unweighted Total N")); + case CTSF_VALIDN: return (d ? N_("Valid N") + : w ? N_("Adjusted Valid N") + : N_("Unweighted Valid N")); + case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance"); + case CTSF_areaPCT_SUM: + switch (a) + { + case CTAT_TABLE: return w ? N_("Table Sum %") : N_("Unweighted Table Sum %"); + case CTAT_LAYER: return w ? N_("Layer Sum %") : N_("Unweighted Layer Sum %"); + case CTAT_LAYERROW: return w ? N_("Layer Row Sum %") : N_("Unweighted Layer Row Sum %"); + case CTAT_LAYERCOL: return w ? N_("Layer Column Sum %") : N_("Unweighted Layer Column Sum %"); + case CTAT_SUBTABLE: return w ? N_("Subtable Sum %") : N_("Unweighted Subtable Sum %"); + case CTAT_ROW: return w ? N_("Row Sum %") : N_("Unweighted Row Sum %"); + case CTAT_COL: return w ? N_("Column Sum %") : N_("Unweighted Column Sum %"); + } + NOT_REACHED (); + + case CTSF_areaID: + switch (a) + { + /* Don't bother translating these: they are for developers only. */ + case CTAT_TABLE: return "Table ID"; + case CTAT_LAYER: return "Layer ID"; + case CTAT_LAYERROW: return "Layer Row ID"; + case CTAT_LAYERCOL: return "Layer Column ID"; + case CTAT_SUBTABLE: return "Subtable ID"; + case CTAT_ROW: return "Row ID"; + case CTAT_COL: return "Column ID"; + } + NOT_REACHED (); + } + + NOT_REACHED (); +} + static struct pivot_value * ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel) { if (!spec->label) { - static const char *default_labels[] = { -#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL, -#include "ctables.inc" -#undef S - }; - - return (spec->function == CTSF_PTILE - ? pivot_value_new_text_format (N_("Percentile %.2f"), - spec->percentile) - : pivot_value_new_text (default_labels[spec->function])); + if (spec->function == CTSF_PTILE) + { + double p = spec->percentile; + char *s = (spec->weighting != CTW_UNWEIGHTED + ? xasprintf (_("Percentile %.2f"), p) + : xasprintf (_("Unweighted Percentile %.2f"), p)); + return pivot_value_new_user_text_nocopy (s); + } + else + return pivot_value_new_text (ctables_summary_label__ (spec)); } else { @@ -930,7 +1118,7 @@ ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel) ss_advance (&in, chunk); if (!in.length) return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out)); - + ss_advance (&in, target.length); ds_put_format (&out, "%g", cilevel); } @@ -938,26 +1126,36 @@ ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel) } static const char * -ctables_summary_function_name (enum ctables_summary_function function) +ctables_summary_function_name (enum ctables_summary_function function, + enum ctables_weighting weighting, + enum ctables_area_type area, + char *buffer, size_t bufsize) { - static const char *names[] = { -#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME, -#include "ctables.inc" -#undef S - }; - return names[function]; + const struct ctables_function_info *cfi = &ctables_function_info[function]; + snprintf (buffer, bufsize, "%s%s%s", + (weighting == CTW_UNWEIGHTED ? "U" + : weighting == CTW_DICTIONARY ? "" + : cfi->e_prefix ? "E" + : ""), + cfi->is_area ? ctables_area_type_name[area] : "", + cfi->basename.string); + return buffer; } static bool add_summary_spec (struct ctables_axis *axis, - enum ctables_summary_function function, double percentile, + enum ctables_summary_function function, + enum ctables_weighting weighting, + enum ctables_area_type area, double percentile, const char *label, const struct fmt_spec *format, bool is_ctables_format, const struct msg_location *loc, enum ctables_summary_variant sv) { if (axis->op == CTAO_VAR) { - const char *function_name = ctables_summary_function_name (function); + char function_name[128]; + ctables_summary_function_name (function, weighting, area, + function_name, sizeof function_name); const char *var_name = var_get_name (axis->var); switch (ctables_function_availability (function)) { @@ -994,6 +1192,9 @@ add_summary_spec (struct ctables_axis *axis, struct ctables_summary_spec *dst = &set->specs[set->n++]; *dst = (struct ctables_summary_spec) { .function = function, + .weighting = weighting, + .calc_area = area, + .user_area = area, .percentile = percentile, .label = xstrdup_if_nonnull (label), .format = (format ? *format @@ -1005,8 +1206,9 @@ add_summary_spec (struct ctables_axis *axis, else { for (size_t i = 0; i < 2; i++) - if (!add_summary_spec (axis->subs[i], function, percentile, label, - format, is_ctables_format, loc, sv)) + if (!add_summary_spec (axis->subs[i], function, weighting, area, + percentile, label, format, is_ctables_format, + loc, sv)) return false; return true; } @@ -1122,7 +1324,10 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) /* Parse function. */ enum ctables_summary_function function; - if (!parse_ctables_summary_function (ctx->lexer, &function)) + enum ctables_weighting weighting; + enum ctables_area_type area; + if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting, + &area)) goto error; /* Parse percentile. */ @@ -1163,8 +1368,8 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs, lex_ofs (ctx->lexer) - 1); - add_summary_spec (sub, function, percentile, label, formatp, - is_ctables_format, loc, sv); + add_summary_spec (sub, function, weighting, area, percentile, label, + formatp, is_ctables_format, loc, sv); free (label); msg_location_destroy (loc); @@ -1395,6 +1600,7 @@ ctables_destroy (struct ctables *ct) hmap_delete (&ct->postcomputes, &pc->hmap_node); free (pc); } + hmap_destroy (&ct->postcomputes); fmt_settings_uninit (&ct->ctables_formats); pivot_table_look_unref (ct->look); @@ -1911,7 +2117,8 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, else { cat.type = CCT_FUNCTION; - if (!parse_ctables_summary_function (lexer, &cat.sort_function)) + if (!parse_ctables_summary_function (lexer, &cat.sort_function, + &cat.weighting, &cat.area)) goto error; if (lex_match (lexer, T_LPAREN)) @@ -2173,8 +2380,8 @@ ctables_nest_uninit (struct ctables_nest *nest) free (nest->vars); for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) ctables_summary_spec_set_uninit (&nest->specs[sv]); - for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) - free (nest->domains[dt]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + free (nest->areas[at]); } static void @@ -2226,6 +2433,9 @@ nest_fts (struct ctables_stack s0, struct ctables_stack s1) .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx : SIZE_MAX), + .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx + : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx + : SIZE_MAX), .n = n, }; for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) @@ -2260,13 +2470,15 @@ var_fts (const struct ctables_axis *a) struct variable **vars = xmalloc (sizeof *vars); *vars = a->var; + bool is_summary = a->specs[CSV_CELL].n || a->scale; struct ctables_nest *nest = xmalloc (sizeof *nest); *nest = (struct ctables_nest) { .vars = vars, .n = 1, .scale_idx = a->scale ? 0 : SIZE_MAX, + .summary_idx = is_summary ? 0 : SIZE_MAX, }; - if (a->specs[CSV_CELL].n || a->scale) + if (is_summary) for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) { ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]); @@ -2334,68 +2546,16 @@ ctables_summary_init (union ctables_summary *s, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: - case CTSF_ROWPCT_COUNT: - case CTSF_COLPCT_COUNT: - case CTSF_TABLEPCT_COUNT: - case CTSF_SUBTABLEPCT_COUNT: - case CTSF_LAYERPCT_COUNT: - case CTSF_LAYERROWPCT_COUNT: - case CTSF_LAYERCOLPCT_COUNT: - case CTSF_ROWPCT_VALIDN: - case CTSF_COLPCT_VALIDN: - case CTSF_TABLEPCT_VALIDN: - case CTSF_SUBTABLEPCT_VALIDN: - case CTSF_LAYERPCT_VALIDN: - case CTSF_LAYERROWPCT_VALIDN: - case CTSF_LAYERCOLPCT_VALIDN: - case CTSF_ROWPCT_TOTALN: - case CTSF_COLPCT_TOTALN: - case CTSF_TABLEPCT_TOTALN: - case CTSF_SUBTABLEPCT_TOTALN: - case CTSF_LAYERPCT_TOTALN: - case CTSF_LAYERROWPCT_TOTALN: - case CTSF_LAYERCOLPCT_TOTALN: + case CTSF_areaPCT_COUNT: + case CTSF_areaPCT_VALIDN: + case CTSF_areaPCT_TOTALN: case CTSF_MISSING: - case CSTF_TOTALN: - case CTSF_ETOTALN: + case CTSF_TOTALN: case CTSF_VALIDN: - case CTSF_EVALIDN: - case CTSF_UCOUNT: - case CTSF_UROWPCT_COUNT: - case CTSF_UCOLPCT_COUNT: - case CTSF_UTABLEPCT_COUNT: - case CTSF_USUBTABLEPCT_COUNT: - case CTSF_ULAYERPCT_COUNT: - case CTSF_ULAYERROWPCT_COUNT: - case CTSF_ULAYERCOLPCT_COUNT: - case CTSF_UROWPCT_VALIDN: - case CTSF_UCOLPCT_VALIDN: - case CTSF_UTABLEPCT_VALIDN: - case CTSF_USUBTABLEPCT_VALIDN: - case CTSF_ULAYERPCT_VALIDN: - case CTSF_ULAYERROWPCT_VALIDN: - case CTSF_ULAYERCOLPCT_VALIDN: - case CTSF_UROWPCT_TOTALN: - case CTSF_UCOLPCT_TOTALN: - case CTSF_UTABLEPCT_TOTALN: - case CTSF_USUBTABLEPCT_TOTALN: - case CTSF_ULAYERPCT_TOTALN: - case CTSF_ULAYERROWPCT_TOTALN: - case CTSF_ULAYERCOLPCT_TOTALN: - case CTSF_UMISSING: - case CSTF_UTOTALN: - case CTSF_UVALIDN: s->count = 0; break; - case CTSF_ROW_ID: - case CTSF_COL_ID: - case CTSF_TABLE_ID: - case CTSF_SUBTABLE_ID: - case CTSF_LAYER_ID: - case CTSF_LAYERROW_ID: - case CTSF_LAYERCOL_ID: + case CTSF_areaID: break; case CTSF_MAXIMUM: @@ -2405,38 +2565,20 @@ ctables_summary_init (union ctables_summary *s, break; case CTSF_MEAN: + case CTSF_SUM: + case CTSF_areaPCT_SUM: + s->moments = moments1_create (MOMENT_MEAN); + break; + case CTSF_SEMEAN: case CTSF_STDDEV: - case CTSF_SUM: case CTSF_VARIANCE: - case CTSF_ROWPCT_SUM: - case CTSF_COLPCT_SUM: - case CTSF_TABLEPCT_SUM: - case CTSF_SUBTABLEPCT_SUM: - case CTSF_LAYERPCT_SUM: - case CTSF_LAYERROWPCT_SUM: - case CTSF_LAYERCOLPCT_SUM: - case CTSF_UMEAN: - case CTSF_USEMEAN: - case CTSF_USTDDEV: - case CTSF_USUM: - case CTSF_UVARIANCE: - case CTSF_UROWPCT_SUM: - case CTSF_UCOLPCT_SUM: - case CTSF_UTABLEPCT_SUM: - case CTSF_USUBTABLEPCT_SUM: - case CTSF_ULAYERPCT_SUM: - case CTSF_ULAYERROWPCT_SUM: - case CTSF_ULAYERCOLPCT_SUM: s->moments = moments1_create (MOMENT_VARIANCE); break; case CTSF_MEDIAN: case CTSF_MODE: case CTSF_PTILE: - case CTSF_UMEDIAN: - case CTSF_UMODE: - case CTSF_UPTILE: { struct caseproto *proto = caseproto_create (); proto = caseproto_add_width (proto, 0); @@ -2462,67 +2604,15 @@ ctables_summary_uninit (union ctables_summary *s, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: - case CTSF_ROWPCT_COUNT: - case CTSF_COLPCT_COUNT: - case CTSF_TABLEPCT_COUNT: - case CTSF_SUBTABLEPCT_COUNT: - case CTSF_LAYERPCT_COUNT: - case CTSF_LAYERROWPCT_COUNT: - case CTSF_LAYERCOLPCT_COUNT: - case CTSF_ROWPCT_VALIDN: - case CTSF_COLPCT_VALIDN: - case CTSF_TABLEPCT_VALIDN: - case CTSF_SUBTABLEPCT_VALIDN: - case CTSF_LAYERPCT_VALIDN: - case CTSF_LAYERROWPCT_VALIDN: - case CTSF_LAYERCOLPCT_VALIDN: - case CTSF_ROWPCT_TOTALN: - case CTSF_COLPCT_TOTALN: - case CTSF_TABLEPCT_TOTALN: - case CTSF_SUBTABLEPCT_TOTALN: - case CTSF_LAYERPCT_TOTALN: - case CTSF_LAYERROWPCT_TOTALN: - case CTSF_LAYERCOLPCT_TOTALN: + case CTSF_areaPCT_COUNT: + case CTSF_areaPCT_VALIDN: + case CTSF_areaPCT_TOTALN: case CTSF_MISSING: - case CSTF_TOTALN: - case CTSF_ETOTALN: + case CTSF_TOTALN: case CTSF_VALIDN: - case CTSF_EVALIDN: - case CTSF_UCOUNT: - case CTSF_UROWPCT_COUNT: - case CTSF_UCOLPCT_COUNT: - case CTSF_UTABLEPCT_COUNT: - case CTSF_USUBTABLEPCT_COUNT: - case CTSF_ULAYERPCT_COUNT: - case CTSF_ULAYERROWPCT_COUNT: - case CTSF_ULAYERCOLPCT_COUNT: - case CTSF_UROWPCT_VALIDN: - case CTSF_UCOLPCT_VALIDN: - case CTSF_UTABLEPCT_VALIDN: - case CTSF_USUBTABLEPCT_VALIDN: - case CTSF_ULAYERPCT_VALIDN: - case CTSF_ULAYERROWPCT_VALIDN: - case CTSF_ULAYERCOLPCT_VALIDN: - case CTSF_UROWPCT_TOTALN: - case CTSF_UCOLPCT_TOTALN: - case CTSF_UTABLEPCT_TOTALN: - case CTSF_USUBTABLEPCT_TOTALN: - case CTSF_ULAYERPCT_TOTALN: - case CTSF_ULAYERROWPCT_TOTALN: - case CTSF_ULAYERCOLPCT_TOTALN: - case CTSF_UMISSING: - case CSTF_UTOTALN: - case CTSF_UVALIDN: break; - case CTSF_ROW_ID: - case CTSF_COL_ID: - case CTSF_TABLE_ID: - case CTSF_SUBTABLE_ID: - case CTSF_LAYER_ID: - case CTSF_LAYERROW_ID: - case CTSF_LAYERCOL_ID: + case CTSF_areaID: break; case CTSF_MAXIMUM: @@ -2535,34 +2625,13 @@ ctables_summary_uninit (union ctables_summary *s, case CTSF_STDDEV: case CTSF_SUM: case CTSF_VARIANCE: - case CTSF_ROWPCT_SUM: - case CTSF_COLPCT_SUM: - case CTSF_TABLEPCT_SUM: - case CTSF_SUBTABLEPCT_SUM: - case CTSF_LAYERPCT_SUM: - case CTSF_LAYERROWPCT_SUM: - case CTSF_LAYERCOLPCT_SUM: - case CTSF_UMEAN: - case CTSF_USEMEAN: - case CTSF_USTDDEV: - case CTSF_USUM: - case CTSF_UVARIANCE: - case CTSF_UROWPCT_SUM: - case CTSF_UCOLPCT_SUM: - case CTSF_UTABLEPCT_SUM: - case CTSF_USUBTABLEPCT_SUM: - case CTSF_ULAYERPCT_SUM: - case CTSF_ULAYERROWPCT_SUM: - case CTSF_ULAYERCOLPCT_SUM: + case CTSF_areaPCT_SUM: moments1_destroy (s->moments); break; case CTSF_MEDIAN: case CTSF_MODE: case CTSF_PTILE: - case CTSF_UMEDIAN: - case CTSF_UMODE: - case CTSF_UPTILE: casewriter_destroy (s->writer); break; } @@ -2573,8 +2642,8 @@ ctables_summary_add (union ctables_summary *s, const struct ctables_summary_spec *ss, const struct variable *var, const union value *value, bool is_scale, bool is_scale_missing, - bool is_missing, bool excluded_missing, - double d_weight, double e_weight) + bool is_missing, bool is_included, + double weight) { /* To determine whether a case is included in a given table for a particular kind of summary, consider the following charts for each variable in the @@ -2596,117 +2665,46 @@ ctables_summary_add (union ctables_summary *s, */ switch (ss->function) { - case CSTF_TOTALN: - case CTSF_ROWPCT_TOTALN: - case CTSF_COLPCT_TOTALN: - case CTSF_TABLEPCT_TOTALN: - case CTSF_SUBTABLEPCT_TOTALN: - case CTSF_LAYERPCT_TOTALN: - case CTSF_LAYERROWPCT_TOTALN: - case CTSF_LAYERCOLPCT_TOTALN: - s->count += d_weight; + case CTSF_TOTALN: + s->count += weight; break; - case CSTF_UTOTALN: - case CTSF_UROWPCT_TOTALN: - case CTSF_UCOLPCT_TOTALN: - case CTSF_UTABLEPCT_TOTALN: - case CTSF_USUBTABLEPCT_TOTALN: - case CTSF_ULAYERPCT_TOTALN: - case CTSF_ULAYERROWPCT_TOTALN: - case CTSF_ULAYERCOLPCT_TOTALN: - s->count += 1.0; + case CTSF_areaPCT_TOTALN: + s->count += weight; break; case CTSF_COUNT: - case CTSF_ROWPCT_COUNT: - case CTSF_COLPCT_COUNT: - case CTSF_TABLEPCT_COUNT: - case CTSF_SUBTABLEPCT_COUNT: - case CTSF_LAYERPCT_COUNT: - case CTSF_LAYERROWPCT_COUNT: - case CTSF_LAYERCOLPCT_COUNT: - if (is_scale || !excluded_missing) - s->count += d_weight; + if (is_scale || is_included) + s->count += weight; break; - case CTSF_UCOUNT: - case CTSF_UROWPCT_COUNT: - case CTSF_UCOLPCT_COUNT: - case CTSF_UTABLEPCT_COUNT: - case CTSF_USUBTABLEPCT_COUNT: - case CTSF_ULAYERPCT_COUNT: - case CTSF_ULAYERROWPCT_COUNT: - case CTSF_ULAYERCOLPCT_COUNT: - if (is_scale || !excluded_missing) - s->count += 1.0; + case CTSF_areaPCT_COUNT: + if (is_scale || is_included) + s->count += weight; break; case CTSF_VALIDN: - case CTSF_ROWPCT_VALIDN: - case CTSF_COLPCT_VALIDN: - case CTSF_TABLEPCT_VALIDN: - case CTSF_SUBTABLEPCT_VALIDN: - case CTSF_LAYERPCT_VALIDN: - case CTSF_LAYERROWPCT_VALIDN: - case CTSF_LAYERCOLPCT_VALIDN: if (is_scale ? !is_scale_missing : !is_missing) - s->count += d_weight; + s->count += weight; break; - case CTSF_UVALIDN: - case CTSF_UROWPCT_VALIDN: - case CTSF_UCOLPCT_VALIDN: - case CTSF_UTABLEPCT_VALIDN: - case CTSF_USUBTABLEPCT_VALIDN: - case CTSF_ULAYERPCT_VALIDN: - case CTSF_ULAYERROWPCT_VALIDN: - case CTSF_ULAYERCOLPCT_VALIDN: + case CTSF_areaPCT_VALIDN: if (is_scale ? !is_scale_missing : !is_missing) - s->count += 1.0; + s->count += weight; break; - case CTSF_ROW_ID: - case CTSF_COL_ID: - case CTSF_TABLE_ID: - case CTSF_SUBTABLE_ID: - case CTSF_LAYER_ID: - case CTSF_LAYERROW_ID: - case CTSF_LAYERCOL_ID: + case CTSF_areaID: break; case CTSF_MISSING: if (is_scale ? is_scale_missing : is_missing) - s->count += d_weight; - break; - - case CTSF_UMISSING: - if (is_scale - ? is_scale_missing - : is_missing) - s->count += 1.0; - break; - - case CTSF_ECOUNT: - if (is_scale || !excluded_missing) - s->count += e_weight; - break; - - case CTSF_EVALIDN: - if (is_scale - ? !is_scale_missing - : !is_missing) - s->count += e_weight; - break; - - case CTSF_ETOTALN: - s->count += e_weight; + s->count += weight; break; case CTSF_MAXIMUM: @@ -2727,278 +2725,31 @@ ctables_summary_add (union ctables_summary *s, case CTSF_STDDEV: case CTSF_SUM: case CTSF_VARIANCE: - case CTSF_ROWPCT_SUM: - case CTSF_COLPCT_SUM: - case CTSF_TABLEPCT_SUM: - case CTSF_SUBTABLEPCT_SUM: - case CTSF_LAYERPCT_SUM: - case CTSF_LAYERROWPCT_SUM: - case CTSF_LAYERCOLPCT_SUM: if (!is_scale_missing) - moments1_add (s->moments, value->f, e_weight); + moments1_add (s->moments, value->f, weight); break; - case CTSF_UMEAN: - case CTSF_USEMEAN: - case CTSF_USTDDEV: - case CTSF_USUM: - case CTSF_UVARIANCE: - case CTSF_UROWPCT_SUM: - case CTSF_UCOLPCT_SUM: - case CTSF_UTABLEPCT_SUM: - case CTSF_USUBTABLEPCT_SUM: - case CTSF_ULAYERPCT_SUM: - case CTSF_ULAYERROWPCT_SUM: - case CTSF_ULAYERCOLPCT_SUM: - if (!is_scale_missing) - moments1_add (s->moments, value->f, 1.0); + case CTSF_areaPCT_SUM: + if (!is_missing && !is_scale_missing) + moments1_add (s->moments, value->f, weight); break; - case CTSF_UMEDIAN: - case CTSF_UMODE: - case CTSF_UPTILE: - d_weight = e_weight = 1.0; - /* Fall through. */ case CTSF_MEDIAN: case CTSF_MODE: case CTSF_PTILE: if (!is_scale_missing) { - s->ovalid += e_weight; + s->ovalid += weight; struct ccase *c = case_create (casewriter_get_proto (s->writer)); *case_num_rw_idx (c, 0) = value->f; - *case_num_rw_idx (c, 1) = e_weight; + *case_num_rw_idx (c, 1) = weight; casewriter_write (s->writer, c); } break; } } -static enum ctables_domain_type -ctables_function_domain (enum ctables_summary_function function) -{ - switch (function) - { - case CTSF_COUNT: - case CTSF_ECOUNT: - case CTSF_MISSING: - case CSTF_TOTALN: - case CTSF_ETOTALN: - case CTSF_VALIDN: - case CTSF_EVALIDN: - case CTSF_MAXIMUM: - case CTSF_MINIMUM: - case CTSF_RANGE: - case CTSF_MEAN: - case CTSF_SEMEAN: - case CTSF_STDDEV: - case CTSF_SUM: - case CTSF_VARIANCE: - case CTSF_MEDIAN: - case CTSF_PTILE: - case CTSF_MODE: - case CTSF_UCOUNT: - case CTSF_UMISSING: - case CSTF_UTOTALN: - case CTSF_UVALIDN: - case CTSF_UMEAN: - case CTSF_USEMEAN: - case CTSF_USTDDEV: - case CTSF_USUM: - case CTSF_UVARIANCE: - case CTSF_UMEDIAN: - case CTSF_UPTILE: - case CTSF_UMODE: - NOT_REACHED (); - - case CTSF_COLPCT_COUNT: - case CTSF_COLPCT_SUM: - case CTSF_COLPCT_TOTALN: - case CTSF_COLPCT_VALIDN: - case CTSF_UCOLPCT_COUNT: - case CTSF_UCOLPCT_SUM: - case CTSF_UCOLPCT_TOTALN: - case CTSF_UCOLPCT_VALIDN: - case CTSF_COL_ID: - return CTDT_COL; - - case CTSF_LAYERCOLPCT_COUNT: - case CTSF_LAYERCOLPCT_SUM: - case CTSF_LAYERCOLPCT_TOTALN: - case CTSF_LAYERCOLPCT_VALIDN: - case CTSF_ULAYERCOLPCT_COUNT: - case CTSF_ULAYERCOLPCT_SUM: - case CTSF_ULAYERCOLPCT_TOTALN: - case CTSF_ULAYERCOLPCT_VALIDN: - case CTSF_LAYERCOL_ID: - return CTDT_LAYERCOL; - - case CTSF_LAYERPCT_COUNT: - case CTSF_LAYERPCT_SUM: - case CTSF_LAYERPCT_TOTALN: - case CTSF_LAYERPCT_VALIDN: - case CTSF_ULAYERPCT_COUNT: - case CTSF_ULAYERPCT_SUM: - case CTSF_ULAYERPCT_TOTALN: - case CTSF_ULAYERPCT_VALIDN: - case CTSF_LAYER_ID: - return CTDT_LAYER; - - case CTSF_LAYERROWPCT_COUNT: - case CTSF_LAYERROWPCT_SUM: - case CTSF_LAYERROWPCT_TOTALN: - case CTSF_LAYERROWPCT_VALIDN: - case CTSF_ULAYERROWPCT_COUNT: - case CTSF_ULAYERROWPCT_SUM: - case CTSF_ULAYERROWPCT_TOTALN: - case CTSF_ULAYERROWPCT_VALIDN: - case CTSF_LAYERROW_ID: - return CTDT_LAYERROW; - - case CTSF_ROWPCT_COUNT: - case CTSF_ROWPCT_SUM: - case CTSF_ROWPCT_TOTALN: - case CTSF_ROWPCT_VALIDN: - case CTSF_UROWPCT_COUNT: - case CTSF_UROWPCT_SUM: - case CTSF_UROWPCT_TOTALN: - case CTSF_UROWPCT_VALIDN: - case CTSF_ROW_ID: - return CTDT_ROW; - - case CTSF_SUBTABLEPCT_COUNT: - case CTSF_SUBTABLEPCT_SUM: - case CTSF_SUBTABLEPCT_TOTALN: - case CTSF_SUBTABLEPCT_VALIDN: - case CTSF_USUBTABLEPCT_COUNT: - case CTSF_USUBTABLEPCT_SUM: - case CTSF_USUBTABLEPCT_TOTALN: - case CTSF_USUBTABLEPCT_VALIDN: - case CTSF_SUBTABLE_ID: - return CTDT_SUBTABLE; - - case CTSF_TABLEPCT_COUNT: - case CTSF_TABLEPCT_SUM: - case CTSF_TABLEPCT_TOTALN: - case CTSF_TABLEPCT_VALIDN: - case CTSF_UTABLEPCT_COUNT: - case CTSF_UTABLEPCT_SUM: - case CTSF_UTABLEPCT_TOTALN: - case CTSF_UTABLEPCT_VALIDN: - case CTSF_TABLE_ID: - return CTDT_TABLE; - } - - NOT_REACHED (); -} - -static enum ctables_domain_type -ctables_function_is_pctsum (enum ctables_summary_function function) -{ - switch (function) - { - case CTSF_COUNT: - case CTSF_ECOUNT: - case CTSF_MISSING: - case CSTF_TOTALN: - case CTSF_ETOTALN: - case CTSF_VALIDN: - case CTSF_EVALIDN: - case CTSF_MAXIMUM: - case CTSF_MINIMUM: - case CTSF_RANGE: - case CTSF_MEAN: - case CTSF_SEMEAN: - case CTSF_STDDEV: - case CTSF_SUM: - case CTSF_VARIANCE: - case CTSF_MEDIAN: - case CTSF_PTILE: - case CTSF_MODE: - case CTSF_UCOUNT: - case CTSF_UMISSING: - case CSTF_UTOTALN: - case CTSF_UVALIDN: - case CTSF_UMEAN: - case CTSF_USEMEAN: - case CTSF_USTDDEV: - case CTSF_USUM: - case CTSF_UVARIANCE: - case CTSF_UMEDIAN: - case CTSF_UPTILE: - case CTSF_UMODE: - case CTSF_COLPCT_COUNT: - case CTSF_COLPCT_TOTALN: - case CTSF_COLPCT_VALIDN: - case CTSF_UCOLPCT_COUNT: - case CTSF_UCOLPCT_TOTALN: - case CTSF_UCOLPCT_VALIDN: - case CTSF_LAYERCOLPCT_COUNT: - case CTSF_LAYERCOLPCT_TOTALN: - case CTSF_LAYERCOLPCT_VALIDN: - case CTSF_ULAYERCOLPCT_COUNT: - case CTSF_ULAYERCOLPCT_TOTALN: - case CTSF_ULAYERCOLPCT_VALIDN: - case CTSF_LAYERPCT_COUNT: - case CTSF_LAYERPCT_TOTALN: - case CTSF_LAYERPCT_VALIDN: - case CTSF_ULAYERPCT_COUNT: - case CTSF_ULAYERPCT_TOTALN: - case CTSF_ULAYERPCT_VALIDN: - case CTSF_LAYERROWPCT_COUNT: - case CTSF_LAYERROWPCT_TOTALN: - case CTSF_LAYERROWPCT_VALIDN: - case CTSF_ULAYERROWPCT_COUNT: - case CTSF_ULAYERROWPCT_TOTALN: - case CTSF_ULAYERROWPCT_VALIDN: - case CTSF_ROWPCT_COUNT: - case CTSF_ROWPCT_TOTALN: - case CTSF_ROWPCT_VALIDN: - case CTSF_UROWPCT_COUNT: - case CTSF_UROWPCT_TOTALN: - case CTSF_UROWPCT_VALIDN: - case CTSF_SUBTABLEPCT_COUNT: - case CTSF_SUBTABLEPCT_TOTALN: - case CTSF_SUBTABLEPCT_VALIDN: - case CTSF_USUBTABLEPCT_COUNT: - case CTSF_USUBTABLEPCT_TOTALN: - case CTSF_USUBTABLEPCT_VALIDN: - case CTSF_TABLEPCT_COUNT: - case CTSF_TABLEPCT_TOTALN: - case CTSF_TABLEPCT_VALIDN: - case CTSF_UTABLEPCT_COUNT: - case CTSF_UTABLEPCT_TOTALN: - case CTSF_UTABLEPCT_VALIDN: - case CTSF_ROW_ID: - case CTSF_COL_ID: - case CTSF_TABLE_ID: - case CTSF_SUBTABLE_ID: - case CTSF_LAYER_ID: - case CTSF_LAYERROW_ID: - case CTSF_LAYERCOL_ID: - return false; - - case CTSF_COLPCT_SUM: - case CTSF_UCOLPCT_SUM: - case CTSF_LAYERCOLPCT_SUM: - case CTSF_ULAYERCOLPCT_SUM: - case CTSF_LAYERPCT_SUM: - case CTSF_ULAYERPCT_SUM: - case CTSF_LAYERROWPCT_SUM: - case CTSF_ULAYERROWPCT_SUM: - case CTSF_ROWPCT_SUM: - case CTSF_UROWPCT_SUM: - case CTSF_SUBTABLEPCT_SUM: - case CTSF_USUBTABLEPCT_SUM: - case CTSF_TABLEPCT_SUM: - case CTSF_UTABLEPCT_SUM: - return true; - } - - NOT_REACHED (); -} - static double ctables_summary_value (const struct ctables_cell *cell, union ctables_summary *s, @@ -3007,111 +2758,35 @@ ctables_summary_value (const struct ctables_cell *cell, switch (ss->function) { case CTSF_COUNT: - case CTSF_ECOUNT: - case CTSF_UCOUNT: return s->count; - case CTSF_ROW_ID: - case CTSF_COL_ID: - case CTSF_TABLE_ID: - case CTSF_SUBTABLE_ID: - case CTSF_LAYER_ID: - case CTSF_LAYERROW_ID: - case CTSF_LAYERCOL_ID: - return cell->domains[ctables_function_domain (ss->function)]->sequence; - - case CTSF_ROWPCT_COUNT: - case CTSF_COLPCT_COUNT: - case CTSF_TABLEPCT_COUNT: - case CTSF_SUBTABLEPCT_COUNT: - case CTSF_LAYERPCT_COUNT: - case CTSF_LAYERROWPCT_COUNT: - case CTSF_LAYERCOLPCT_COUNT: - { - enum ctables_domain_type d = ctables_function_domain (ss->function); - return (cell->domains[d]->e_count - ? s->count / cell->domains[d]->e_count * 100 - : SYSMIS); - } - - case CTSF_UROWPCT_COUNT: - case CTSF_UCOLPCT_COUNT: - case CTSF_UTABLEPCT_COUNT: - case CTSF_USUBTABLEPCT_COUNT: - case CTSF_ULAYERPCT_COUNT: - case CTSF_ULAYERROWPCT_COUNT: - case CTSF_ULAYERCOLPCT_COUNT: - { - enum ctables_domain_type d = ctables_function_domain (ss->function); - return (cell->domains[d]->u_count - ? s->count / cell->domains[d]->u_count * 100 - : SYSMIS); - } - - case CTSF_ROWPCT_VALIDN: - case CTSF_COLPCT_VALIDN: - case CTSF_TABLEPCT_VALIDN: - case CTSF_SUBTABLEPCT_VALIDN: - case CTSF_LAYERPCT_VALIDN: - case CTSF_LAYERROWPCT_VALIDN: - case CTSF_LAYERCOLPCT_VALIDN: - { - enum ctables_domain_type d = ctables_function_domain (ss->function); - return (cell->domains[d]->e_valid - ? s->count / cell->domains[d]->e_valid * 100 - : SYSMIS); - } + case CTSF_areaID: + return cell->areas[ss->calc_area]->sequence; - case CTSF_UROWPCT_VALIDN: - case CTSF_UCOLPCT_VALIDN: - case CTSF_UTABLEPCT_VALIDN: - case CTSF_USUBTABLEPCT_VALIDN: - case CTSF_ULAYERPCT_VALIDN: - case CTSF_ULAYERROWPCT_VALIDN: - case CTSF_ULAYERCOLPCT_VALIDN: + case CTSF_areaPCT_COUNT: { - enum ctables_domain_type d = ctables_function_domain (ss->function); - return (cell->domains[d]->u_valid - ? s->count / cell->domains[d]->u_valid * 100 - : SYSMIS); + const struct ctables_area *a = cell->areas[ss->calc_area]; + double a_count = a->count[ss->weighting]; + return a_count ? s->count / a_count * 100 : SYSMIS; } - case CTSF_ROWPCT_TOTALN: - case CTSF_COLPCT_TOTALN: - case CTSF_TABLEPCT_TOTALN: - case CTSF_SUBTABLEPCT_TOTALN: - case CTSF_LAYERPCT_TOTALN: - case CTSF_LAYERROWPCT_TOTALN: - case CTSF_LAYERCOLPCT_TOTALN: + case CTSF_areaPCT_VALIDN: { - enum ctables_domain_type d = ctables_function_domain (ss->function); - return (cell->domains[d]->e_total - ? s->count / cell->domains[d]->e_total * 100 - : SYSMIS); + const struct ctables_area *a = cell->areas[ss->calc_area]; + double a_valid = a->valid[ss->weighting]; + return a_valid ? s->count / a_valid * 100 : SYSMIS; } - case CTSF_UROWPCT_TOTALN: - case CTSF_UCOLPCT_TOTALN: - case CTSF_UTABLEPCT_TOTALN: - case CTSF_USUBTABLEPCT_TOTALN: - case CTSF_ULAYERPCT_TOTALN: - case CTSF_ULAYERROWPCT_TOTALN: - case CTSF_ULAYERCOLPCT_TOTALN: + case CTSF_areaPCT_TOTALN: { - enum ctables_domain_type d = ctables_function_domain (ss->function); - return (cell->domains[d]->u_total - ? s->count / cell->domains[d]->u_total * 100 - : SYSMIS); + const struct ctables_area *a = cell->areas[ss->calc_area]; + double a_total = a->total[ss->weighting]; + return a_total ? s->count / a_total * 100 : SYSMIS; } case CTSF_MISSING: - case CTSF_UMISSING: - case CSTF_TOTALN: - case CTSF_ETOTALN: - case CSTF_UTOTALN: + case CTSF_TOTALN: case CTSF_VALIDN: - case CTSF_UVALIDN: - case CTSF_EVALIDN: return s->count; case CTSF_MAXIMUM: @@ -3124,7 +2799,6 @@ ctables_summary_value (const struct ctables_cell *cell, return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS; case CTSF_MEAN: - case CTSF_UMEAN: { double mean; moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL); @@ -3132,7 +2806,6 @@ ctables_summary_value (const struct ctables_cell *cell, } case CTSF_SEMEAN: - case CTSF_USEMEAN: { double weight, variance; moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL); @@ -3140,7 +2813,6 @@ ctables_summary_value (const struct ctables_cell *cell, } case CTSF_STDDEV: - case CTSF_USTDDEV: { double variance; moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); @@ -3148,7 +2820,6 @@ ctables_summary_value (const struct ctables_cell *cell, } case CTSF_SUM: - case CTSF_USUM: { double weight, mean; moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); @@ -3156,52 +2827,27 @@ ctables_summary_value (const struct ctables_cell *cell, } case CTSF_VARIANCE: - case CTSF_UVARIANCE: { double variance; moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); return variance; } - case CTSF_ROWPCT_SUM: - case CTSF_COLPCT_SUM: - case CTSF_TABLEPCT_SUM: - case CTSF_SUBTABLEPCT_SUM: - case CTSF_LAYERPCT_SUM: - case CTSF_LAYERROWPCT_SUM: - case CTSF_LAYERCOLPCT_SUM: + case CTSF_areaPCT_SUM: { double weight, mean; moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); if (weight == SYSMIS || mean == SYSMIS) return SYSMIS; - enum ctables_domain_type d = ctables_function_domain (ss->function); - double num = weight * mean; - double denom = cell->domains[d]->sums[ss->sum_var_idx].e_sum; - return denom != 0 ? num / denom * 100 : SYSMIS; - } - case CTSF_UROWPCT_SUM: - case CTSF_UCOLPCT_SUM: - case CTSF_UTABLEPCT_SUM: - case CTSF_USUBTABLEPCT_SUM: - case CTSF_ULAYERPCT_SUM: - case CTSF_ULAYERROWPCT_SUM: - case CTSF_ULAYERCOLPCT_SUM: - { - double weight, mean; - moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); - if (weight == SYSMIS || mean == SYSMIS) - return SYSMIS; - enum ctables_domain_type d = ctables_function_domain (ss->function); - double num = weight * mean; - double denom = cell->domains[d]->sums[ss->sum_var_idx].u_sum; - return denom != 0 ? num / denom * 100 : SYSMIS; + + const struct ctables_area *a = cell->areas[ss->calc_area]; + const struct ctables_sum *sum = &a->sums[ss->sum_var_idx]; + double denom = sum->sum[ss->weighting]; + return denom != 0 ? weight * mean / denom * 100 : SYSMIS; } case CTSF_MEDIAN: case CTSF_PTILE: - case CTSF_UMEDIAN: - case CTSF_UPTILE: if (s->writer) { struct casereader *reader = casewriter_make_reader (s->writer); @@ -3217,7 +2863,6 @@ ctables_summary_value (const struct ctables_cell *cell, return s->ovalue; case CTSF_MODE: - case CTSF_UMODE: if (s->writer) { struct casereader *reader = casewriter_make_reader (s->writer); @@ -3358,17 +3003,17 @@ ctables_cell_compare_leaf_3way (const void *a_, const void *b_, Fill the table entry using the indexes from before. */ -static struct ctables_domain * -ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell, - enum ctables_domain_type domain) +static struct ctables_area * +ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell, + enum ctables_area_type area) { size_t hash = 0; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n_domains[domain]; i++) + for (size_t i = 0; i < nest->n_areas[area]; i++) { - size_t v_idx = nest->domains[domain][i]; + size_t v_idx = nest->areas[area][i]; struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx]; hash = hash_pointer (cv->category, hash); if (cv->category->type != CCT_TOTAL @@ -3379,16 +3024,16 @@ ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell, } } - struct ctables_domain *d; - HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &s->domains[domain]) + struct ctables_area *a; + HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area]) { - const struct ctables_cell *df = d->example; + const struct ctables_cell *df = a->example; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; - for (size_t i = 0; i < nest->n_domains[domain]; i++) + for (size_t i = 0; i < nest->n_areas[area]; i++) { - size_t v_idx = nest->domains[domain][i]; + size_t v_idx = nest->areas[area][i]; struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx]; struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx]; if (cv1->category != cv2->category @@ -3400,7 +3045,7 @@ ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell, goto not_equal; } } - return d; + return a; not_equal: ; } @@ -3409,10 +3054,10 @@ ctables_domain_insert (struct ctables_section *s, struct ctables_cell *cell, ? xzalloc (s->table->n_sum_vars * sizeof *sums) : NULL); - d = xmalloc (sizeof *d); - *d = (struct ctables_domain) { .example = cell, .sums = sums }; - hmap_insert (&s->domains[domain], &d->node, hash); - return d; + a = xmalloc (sizeof *a); + *a = (struct ctables_area) { .example = cell, .sums = sums }; + hmap_insert (&s->areas[area], &a->node, hash); + return a; } static struct substring @@ -3557,7 +3202,7 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, cell = xmalloc (sizeof *cell); cell->hide = false; cell->sv = sv; - cell->omit_domains = 0; + cell->omit_areas = 0; cell->postcompute = false; //struct string name = DS_EMPTY_INITIALIZER; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) @@ -3586,22 +3231,22 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, switch (a) { case PIVOT_AXIS_COLUMN: - cell->omit_domains |= ((1u << CTDT_TABLE) | - (1u << CTDT_LAYER) | - (1u << CTDT_LAYERCOL) | - (1u << CTDT_SUBTABLE) | - (1u << CTDT_COL)); + cell->omit_areas |= ((1u << CTAT_TABLE) | + (1u << CTAT_LAYER) | + (1u << CTAT_LAYERCOL) | + (1u << CTAT_SUBTABLE) | + (1u << CTAT_COL)); break; case PIVOT_AXIS_ROW: - cell->omit_domains |= ((1u << CTDT_TABLE) | - (1u << CTDT_LAYER) | - (1u << CTDT_LAYERROW) | - (1u << CTDT_SUBTABLE) | - (1u << CTDT_ROW)); + cell->omit_areas |= ((1u << CTAT_TABLE) | + (1u << CTAT_LAYER) | + (1u << CTAT_LAYERROW) | + (1u << CTAT_SUBTABLE) | + (1u << CTAT_ROW)); break; case PIVOT_AXIS_LAYER: - cell->omit_domains |= ((1u << CTDT_TABLE) | - (1u << CTDT_LAYER)); + cell->omit_areas |= ((1u << CTAT_TABLE) | + (1u << CTAT_LAYER)); break; } } @@ -3639,22 +3284,16 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c, cell->summaries = xmalloc (specs->n * sizeof *cell->summaries); for (size_t i = 0; i < specs->n; i++) ctables_summary_init (&cell->summaries[i], &specs->specs[i]); - for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) - cell->domains[dt] = ctables_domain_insert (s, cell, dt); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + cell->areas[at] = ctables_area_insert (s, cell, at); hmap_insert (&s->cells, &cell->node, hash); return cell; } static bool -is_scale_missing (const struct ctables_summary_spec_set *specs, - const struct ccase *c) +is_listwise_missing (const struct ctables_summary_spec_set *specs, + const struct ccase *c) { - if (!specs->is_scale) - return false; - - if (var_is_num_missing (specs->var, case_num (c, specs->var))) - return true; - for (size_t i = 0; i < specs->n_listwise_vars; i++) { const struct variable *var = specs->listwise_vars[i]; @@ -3665,41 +3304,42 @@ is_scale_missing (const struct ctables_summary_spec_set *specs, return false; } +static void +add_weight (double dst[N_CTWS], const double src[N_CTWS]) +{ + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + dst[wt] += src[wt]; +} + static void ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, bool excluded_missing, - double d_weight, double e_weight) + bool is_included, double weight[N_CTWS]) { struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats); const struct ctables_nest *ss = s->nests[s->table->summary_axis]; const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv]; + const union value *value = case_data (c, specs->var); + bool is_missing = var_is_value_missing (specs->var, value); + bool scale_missing = specs->is_scale && (is_missing || is_listwise_missing (specs, c)); - bool scale_missing = is_scale_missing (specs, c); for (size_t i = 0; i < specs->n; i++) - ctables_summary_add (&cell->summaries[i], &specs->specs[i], - specs->var, case_data (c, specs->var), specs->is_scale, - scale_missing, is_missing, excluded_missing, - d_weight, e_weight); - for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) - if (!(cell->omit_domains && (1u << dt))) + ctables_summary_add (&cell->summaries[i], &specs->specs[i], + specs->var, value, specs->is_scale, + scale_missing, is_missing, is_included, + weight[specs->specs[i].weighting]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + if (!(cell->omit_areas && (1u << at))) { - struct ctables_domain *d = cell->domains[dt]; - d->d_total += d_weight; - d->e_total += e_weight; - d->u_total += 1.0; - if (!excluded_missing) - { - d->d_count += d_weight; - d->e_count += e_weight; - d->u_count += 1.0; - } + struct ctables_area *a = cell->areas[at]; + + add_weight (a->total, weight); + if (is_included) + add_weight (a->count, weight); if (!is_missing) { - d->d_valid += d_weight; - d->e_valid += e_weight; - d->u_count += 1.0; + add_weight (a->valid, weight); for (size_t i = 0; i < s->table->n_sum_vars; i++) { @@ -3708,9 +3348,9 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, double addend = case_num (c, var); if (!var_is_num_missing (var, addend)) { - struct ctables_sum *sum = &d->sums[i]; - sum->e_sum += addend * e_weight; - sum->u_sum += addend; + struct ctables_sum *sum = &a->sums[i]; + for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++) + sum->sum[wt] += addend * weight[wt]; } } } @@ -3720,8 +3360,7 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c, static void recurse_totals (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, bool excluded_missing, - double d_weight, double e_weight, + bool is_included, double weight[N_CTWS], enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -3740,10 +3379,8 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, { const struct ctables_category *save = cats[a][i]; cats[a][i] = total; - ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight); - recurse_totals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, a, i + 1); + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, a, i + 1); cats[a][i] = save; } } @@ -3754,8 +3391,7 @@ recurse_totals (struct ctables_section *s, const struct ccase *c, static void recurse_subtotals (struct ctables_section *s, const struct ccase *c, const struct ctables_category *cats[PIVOT_N_AXES][10], - bool is_missing, bool excluded_missing, - double d_weight, double e_weight, + bool is_included, double weight[N_CTWS], enum pivot_axis_type start_axis, size_t start_nest) { for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++) @@ -3770,10 +3406,8 @@ recurse_subtotals (struct ctables_section *s, const struct ccase *c, if (save->subtotal) { cats[a][i] = save->subtotal; - ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight); - recurse_subtotals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, a, i + 1); + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_subtotals (s, c, cats, is_included, weight, a, i + 1); cats[a][i] = save; } } @@ -3801,53 +3435,43 @@ ctables_add_occurrence (const struct variable *var, } static void -ctables_cell_insert (struct ctables_section *s, - const struct ccase *c, - double d_weight, double e_weight) +ctables_cell_insert (struct ctables_section *s, const struct ccase *c, + double weight[N_CTWS]) { const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */ - /* Does at least one categorical variable have a missing value in an included - or excluded category? */ - bool is_missing = false; - - /* Does at least one categorical variable have a missing value in an excluded - category? */ - bool excluded_missing = false; + bool is_included = true; for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; for (size_t i = 0; i < nest->n; i++) - { - if (i == nest->scale_idx) - continue; - - const struct variable *var = nest->vars[i]; - const union value *value = case_data (c, var); - - bool var_missing = var_is_value_missing (var, value) != 0; - if (var_missing) - is_missing = true; - - cats[a][i] = ctables_categories_match ( - s->table->categories[var_get_dict_index (var)], value, var); - if (!cats[a][i]) - { - if (!var_missing) - return; + if (i != nest->scale_idx) + { + const struct variable *var = nest->vars[i]; + const union value *value = case_data (c, var); - static const struct ctables_category cct_excluded_missing = { - .type = CCT_EXCLUDED_MISSING, - .hide = true, - }; - cats[a][i] = &cct_excluded_missing; - excluded_missing = true; - } + cats[a][i] = ctables_categories_match ( + s->table->categories[var_get_dict_index (var)], value, var); + if (!cats[a][i]) + { + if (i != nest->summary_idx) + return; + + if (!var_is_value_missing (var, value)) + return; + + static const struct ctables_category cct_excluded_missing = { + .type = CCT_EXCLUDED_MISSING, + .hide = true, + }; + cats[a][i] = &cct_excluded_missing; + is_included = false; + } } } - if (!excluded_missing) + if (is_included) for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { const struct ctables_nest *nest = s->nests[a]; @@ -3860,16 +3484,9 @@ ctables_cell_insert (struct ctables_section *s, } } - ctables_cell_add__ (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight); - - //if (!excluded_missing) - { - recurse_totals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, 0, 0); - recurse_subtotals (s, c, cats, is_missing, excluded_missing, - d_weight, e_weight, 0, 0); - } + ctables_cell_add__ (s, c, cats, is_included, weight); + recurse_totals (s, c, cats, is_included, weight, 0, 0); + recurse_subtotals (s, c, cats, is_included, weight, 0, 0); } struct merge_item @@ -3885,6 +3502,10 @@ merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b) const struct ctables_summary_spec *bs = &b->set->specs[b->ofs]; if (as->function != bs->function) return as->function > bs->function ? 1 : -1; + else if (as->weighting != bs->weighting) + return as->weighting > bs->weighting ? 1 : -1; + else if (as->calc_area != bs->calc_area) + return as->calc_area > bs->calc_area ? 1 : -1; else if (as->percentile != bs->percentile) return as->percentile < bs->percentile ? 1 : -1; @@ -3893,21 +3514,88 @@ merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b) return strcmp (as_label, bs_label); } -static struct pivot_value * -ctables_category_create_label__ (const struct ctables_category *cat, - const struct variable *var, - const union value *value) +static void +ctables_category_format_number (double number, const struct variable *var, + struct string *s) { - return (cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL - ? pivot_value_new_user_text (cat->total_label, SIZE_MAX) - : pivot_value_new_var_value (var, value)); + struct pivot_value *pv = pivot_value_new_var_value ( + var, &(union value) { .f = number }); + pivot_value_format (pv, NULL, s); + pivot_value_destroy (pv); +} + +static void +ctables_category_format_string (struct substring string, + const struct variable *var, struct string *out) +{ + int width = var_get_width (var); + char *s = xmalloc (width); + buf_copy_rpad (s, width, string.string, string.length, ' '); + struct pivot_value *pv = pivot_value_new_var_value ( + var, &(union value) { .s = CHAR_CAST (uint8_t *, s) }); + pivot_value_format (pv, NULL, out); + pivot_value_destroy (pv); + free (s); +} + +static bool +ctables_category_format_label (const struct ctables_category *cat, + const struct variable *var, + struct string *s) +{ + switch (cat->type) + { + case CCT_NUMBER: + ctables_category_format_number (cat->number, var, s); + return true; + + case CCT_STRING: + ctables_category_format_string (cat->string, var, s); + return true; + + case CCT_NRANGE: + ctables_category_format_number (cat->nrange[0], var, s); + ds_put_format (s, " THRU "); + ctables_category_format_number (cat->nrange[1], var, s); + return true; + + case CCT_SRANGE: + ctables_category_format_string (cat->srange[0], var, s); + ds_put_format (s, " THRU "); + ctables_category_format_string (cat->srange[1], var, s); + return true; + + case CCT_MISSING: + ds_put_cstr (s, "MISSING"); + return true; + + case CCT_OTHERNM: + ds_put_cstr (s, "OTHERNM"); + return true; + + case CCT_POSTCOMPUTE: + ds_put_format (s, "&%s", cat->pc->name); + return true; + + case CCT_TOTAL: + case CCT_SUBTOTAL: + ds_put_cstr (s, cat->total_label); + return true; + + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: + return false; + } + + return false; } static struct pivot_value * ctables_postcompute_label (const struct ctables_categories *cats, const struct ctables_category *cat, - const struct variable *var, - const union value *value) + const struct variable *var) { struct substring in = ss_cstr (cat->pc->label); struct substring target = ss_cstr (")LABEL["); @@ -3939,12 +3627,8 @@ ctables_postcompute_label (const struct ctables_categories *cats, goto error; struct ctables_category *cat2 = &cats->cats[idx - 1]; - struct pivot_value *label2 - = ctables_category_create_label__ (cat2, var, value); - char *label2_s = pivot_value_to_string_defaults (label2); - ds_put_cstr (&out, label2_s); - free (label2_s); - pivot_value_destroy (label2); + if (!ctables_category_format_label (cat2, var, &out)) + goto error; } error: @@ -3953,14 +3637,16 @@ error: } static struct pivot_value * -ctables_category_create_label (const struct ctables_categories *cats, - const struct ctables_category *cat, - const struct variable *var, - const union value *value) +ctables_category_create_value_label (const struct ctables_categories *cats, + const struct ctables_category *cat, + const struct variable *var, + const union value *value) { return (cat->type == CCT_POSTCOMPUTE && cat->pc->label - ? ctables_postcompute_label (cats, cat, var, value) - : ctables_category_create_label__ (cat, var, value)); + ? ctables_postcompute_label (cats, cat, var) + : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL + ? pivot_value_new_user_text (cat->total_label, SIZE_MAX) + : pivot_value_new_var_value (var, value)); } static struct ctables_value * @@ -4023,8 +3709,8 @@ ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a, for (size_t i = 0; i < nest->n; i++) hmap_init (&s->occurrences[a][i]); } - for (size_t i = 0; i < N_CTDTS; i++) - hmap_init (&s->domains[i]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + hmap_init (&s->areas[at]); } } @@ -4174,6 +3860,8 @@ ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx, case CTPO_CAT_NRANGE: case CTPO_CAT_SRANGE: + case CTPO_CAT_MISSING: + case CTPO_CAT_OTHERNM: { struct ctables_cell_value cv = { .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e) @@ -4195,8 +3883,6 @@ ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx, } case CTPO_CAT_NUMBER: - case CTPO_CAT_MISSING: - case CTPO_CAT_OTHERNM: case CTPO_CAT_SUBTOTAL: case CTPO_CAT_TOTAL: { @@ -4217,11 +3903,21 @@ ctables_pcexpr_evaluate (const struct ctables_pcexpr_evaluate_ctx *ctx, s = xmalloc (width); buf_copy_rpad (s, width, e->string.string, e->string.length, ' '); } - struct ctables_cell_value cv = { - .category = ctables_find_category_for_postcompute (ctx->section->table->ctables->dict, ctx->cats, ctx->parse_format, e), - .value = { .s = CHAR_CAST (uint8_t *, s ? s : e->string.string) }, - }; - assert (cv.category != NULL); + + const struct ctables_category *category + = ctables_find_category_for_postcompute ( + ctx->section->table->ctables->dict, + ctx->cats, ctx->parse_format, e); + assert (category != NULL); + + struct ctables_cell_value cv = { .category = category }; + if (category->type == CCT_NUMBER) + cv.value.f = category->number; + else if (category->type == CCT_STRING) + cv.value.s = CHAR_CAST (uint8_t *, s ? s : e->string.string); + else + NOT_REACHED (); + double retval = ctables_pcexpr_evaluate_category (ctx, &cv); free (s); return retval; @@ -4304,6 +4000,8 @@ ctables_cell_calculate_postcompute (const struct ctables_section *s, { const struct ctables_summary_spec *ss2 = &pc->specs->specs[i]; if (ss->function == ss2->function + && ss->weighting == ss2->weighting + && ss->calc_area == ss2->calc_area && ss->percentile == ss2->percentile) { *format = ss2->format; @@ -4359,6 +4057,23 @@ ctables_format (double d, const struct fmt_spec *format, return s; } +static bool +all_hidden_vlabels (const struct ctables_table *t, enum pivot_axis_type a) +{ + for (size_t i = 0; i < t->stacks[a].n; i++) + { + struct ctables_nest *nest = &t->stacks[a].nests[i]; + if (nest->n != 1 || nest->scale_idx != 0) + return false; + + enum ctables_vlabel vlabel + = t->ctables->vlabels[var_get_dict_index (nest->vars[0])]; + if (vlabel != CTVL_NONE) + return false; + } + return true; +} + static void ctables_table_output (struct ctables *ct, struct ctables_table *t) { @@ -4404,9 +4119,10 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) const struct ctables_value *value = t->clabels_values[i]; const struct ctables_category *cat = ctables_categories_match (c, &value->value, var); assert (cat != NULL); - pivot_category_create_leaf (d->root, ctables_category_create_label ( - c, cat, t->clabels_example, - &value->value)); + pivot_category_create_leaf ( + d->root, ctables_category_create_value_label (c, cat, + t->clabels_example, + &value->value)); } } @@ -4468,16 +4184,16 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) #if 0 if (a == PIVOT_AXIS_ROW) { - size_t ids[N_CTDTS]; + size_t ids[N_CTATS]; memset (ids, 0, sizeof ids); for (size_t j = 0; j < n_sorted; j++) { struct ctables_cell *cell = sorted[j]; - for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) + for (enum ctables_area_type at = 0; at < N_CTATS; at++) { - struct ctables_domain *domain = cell->domains[dt]; - if (!domain->sequence) - domain->sequence = ++ids[dt]; + struct ctables_area *area = cell->areas[at]; + if (!area->sequence) + area->sequence = ++ids[at]; } } } @@ -4486,7 +4202,7 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) #if 0 for (size_t j = 0; j < n_sorted; j++) { - printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_domains ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->domains[CTDT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->domains[CTDT_COL]->e_count * 100.0); + printf ("%s (%s): %f/%f = %.1f%%\n", sorted[j]->name, sorted[j]->contributes_to_areas ? "y" : "n", sorted[j]->summaries[0].count, sorted[j]->areas[CTAT_COL]->e_count, sorted[j]->summaries[0].count / sorted[j]->areas[CTAT_COL]->e_count * 100.0); } printf ("\n"); #endif @@ -4509,6 +4225,8 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) for (size_t k = 0; k < nest->n; k++) { enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (nest->vars[k])]; + if (vlabel == CTVL_NONE && nest->scale_idx == k) + vlabel = CTVL_NAME; if (vlabel != CTVL_NONE) { levels[n_levels++] = (struct ctables_level) { @@ -4617,7 +4335,7 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) else if (level->type == CTL_CATEGORY) { const struct ctables_cell_value *cv = &cell->axes[a].cvs[level->var_idx]; - label = ctables_category_create_label ( + label = ctables_category_create_value_label ( t->categories[var_get_dict_index (var)], cv->category, var, &cv->value); } @@ -4637,7 +4355,10 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) free (groups); free (levels); free (sections); + } + + d[a]->hide_all_labels = all_hidden_vlabels (t, a); } { @@ -4658,16 +4379,16 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) assert (n_sorted <= n_total_cells); sort (sorted, n_sorted, sizeof *sorted, ctables_cell_compare_leaf_3way, NULL); - size_t ids[N_CTDTS]; + size_t ids[N_CTATS]; memset (ids, 0, sizeof ids); for (size_t j = 0; j < n_sorted; j++) { struct ctables_cell *cell = sorted[j]; - for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) + for (enum ctables_area_type at = 0; at < N_CTATS; at++) { - struct ctables_domain *domain = cell->domains[dt]; - if (!domain->sequence) - domain->sequence = ++ids[dt]; + struct ctables_area *area = cell->areas[at]; + if (!area->sequence) + area->sequence = ++ids[at]; } } @@ -4727,7 +4448,7 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) struct pivot_value *value; if (ct->hide_threshold != 0 && d < ct->hide_threshold - && ctables_summary_function_is_count (ss->function)) + && ss->function == CTSF_COUNT) { value = pivot_value_new_user_text_nocopy ( xasprintf ("<%d", ct->hide_threshold)); @@ -4760,8 +4481,6 @@ ctables_check_label_position (struct ctables_table *t, enum pivot_axis_type a) if (label_pos == a) return true; - t->clabels_from_axis = a; - const char *subcommand_name = a == PIVOT_AXIS_ROW ? "ROWLABELS" : "COLLABELS"; const char *pos_name = label_pos == PIVOT_AXIS_LAYER ? "LAYER" : "OPPOSITE"; @@ -4859,6 +4578,33 @@ add_sum_var (struct variable *var, return (*n)++; } +static enum ctables_area_type +rotate_area (enum ctables_area_type area) +{ + return area; + switch (area) + { + case CTAT_TABLE: + case CTAT_LAYER: + case CTAT_SUBTABLE: + return area; + + case CTAT_LAYERROW: + return CTAT_LAYERCOL; + + case CTAT_LAYERCOL: + return CTAT_LAYERROW; + + case CTAT_ROW: + return CTAT_COL; + + case CTAT_COL: + return CTAT_ROW; + } + + NOT_REACHED (); +} + static void enumerate_sum_vars (const struct ctables_axis *a, struct variable ***sum_vars, size_t *n, size_t *allocated) @@ -4873,7 +4619,7 @@ enumerate_sum_vars (const struct ctables_axis *a, for (size_t j = 0; j < a->specs[i].n; j++) { struct ctables_summary_spec *spec = &a->specs[i].specs[j]; - if (ctables_function_is_pctsum (spec->function)) + if (spec->function == CTSF_areaPCT_SUM) spec->sum_var_idx = add_sum_var (a->var, sum_vars, n, allocated); } break; @@ -4897,52 +4643,94 @@ ctables_prepare_table (struct ctables_table *t) for (size_t j = 0; j < t->stacks[a].n; j++) { struct ctables_nest *nest = &t->stacks[a].nests[j]; - for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) + for (enum ctables_area_type at = 0; at < N_CTATS; at++) { - nest->domains[dt] = xmalloc (nest->n * sizeof *nest->domains[dt]); - nest->n_domains[dt] = 0; + nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]); + nest->n_areas[at] = 0; + + enum pivot_axis_type ata, atb; + if (at == CTAT_ROW || at == CTAT_LAYERROW) + { + ata = PIVOT_AXIS_ROW; + atb = PIVOT_AXIS_COLUMN; + } + else if (at == CTAT_COL || at == CTAT_LAYERCOL) + { + ata = PIVOT_AXIS_COLUMN; + atb = PIVOT_AXIS_ROW; + } + + if (at == CTAT_LAYER + ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER + : at == CTAT_LAYERCOL || at == CTAT_LAYERROW + ? a == atb && t->label_axis[a] != a + : false) + { + for (size_t k = nest->n - 1; k < nest->n; k--) + if (k != nest->scale_idx) + { + nest->areas[at][nest->n_areas[at]++] = k; + break; + } + continue; + } + + if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER + : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb + : at == CTAT_TABLE ? true + : false) + continue; for (size_t k = 0; k < nest->n; k++) + if (k != nest->scale_idx) + nest->areas[at][nest->n_areas[at]++] = k; + + int n_drop; + switch (at) { - if (k == nest->scale_idx) - continue; + case CTAT_SUBTABLE: +#define L PIVOT_AXIS_LAYER + n_drop = (t->clabels_from_axis == L ? a != L + : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L) + : t->clabels_from_axis == a ? 2 + : 0); +#undef L + break; + + case CTAT_LAYERROW: + case CTAT_LAYERCOL: + n_drop = a == ata && t->label_axis[ata] == atb; + break; + + case CTAT_ROW: + case CTAT_COL: + n_drop = (a == ata ? t->label_axis[ata] == atb + : a != atb ? 0 + : t->clabels_from_axis == atb ? -1 + : t->clabels_to_axis != atb ? 1 + : 0); + break; + + case CTAT_LAYER: + case CTAT_TABLE: + n_drop = 0; + break; + } - switch (dt) + if (n_drop < 0) + { + size_t n = nest->n_areas[at]; + if (n > 1) { - case CTDT_TABLE: - continue; - - case CTDT_LAYER: - if (a != PIVOT_AXIS_LAYER) - continue; - break; - - case CTDT_SUBTABLE: - case CTDT_ROW: - case CTDT_COL: - if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER - : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN - : a == PIVOT_AXIS_ROW) - { - if (k == nest->n - 1 - || (nest->scale_idx == nest->n - 1 - && k == nest->n - 2)) - continue; - } - break; - - case CTDT_LAYERROW: - if (a == PIVOT_AXIS_COLUMN) - continue; - break; - - case CTDT_LAYERCOL: - if (a == PIVOT_AXIS_ROW) - continue; - break; + nest->areas[at][n - 2] = nest->areas[at][n - 1]; + nest->n_areas[at]--; } - - nest->domains[dt][nest->n_domains[dt]++] = k; + } + else + { + for (int i = 0; i < n_drop; i++) + if (nest->n_areas[at] > 0) + nest->n_areas[at]--; } } } @@ -4950,7 +4738,11 @@ ctables_prepare_table (struct ctables_table *t) else { struct ctables_nest *nest = xmalloc (sizeof *nest); - *nest = (struct ctables_nest) { .n = 0 }; + *nest = (struct ctables_nest) { + .n = 0, + .scale_idx = SIZE_MAX, + .summary_idx = SIZE_MAX + }; t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 }; /* There's no point in moving labels away from an axis that has no @@ -4964,19 +4756,23 @@ ctables_prepare_table (struct ctables_table *t) struct ctables_nest *nest = &stack->nests[i]; if (!nest->specs[CSV_CELL].n) { - struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL]; - specs->specs = xmalloc (sizeof *specs->specs); - specs->n = 1; + struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL]; + ss->specs = xmalloc (sizeof *ss->specs); + ss->n = 1; enum ctables_summary_function function - = specs->is_scale ? CTSF_MEAN : CTSF_COUNT; + = ss->is_scale ? CTSF_MEAN : CTSF_COUNT; - *specs->specs = (struct ctables_summary_spec) { + if (!ss->var) + { + nest->summary_idx = nest->n - 1; + ss->var = nest->vars[nest->summary_idx]; + } + *ss->specs = (struct ctables_summary_spec) { .function = function, - .format = ctables_summary_default_format (function, specs->var), + .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY, + .format = ctables_summary_default_format (function, ss->var), }; - if (!specs->var) - specs->var = nest->vars[0]; ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL], &nest->specs[CSV_CELL]); @@ -4985,6 +4781,20 @@ ctables_prepare_table (struct ctables_table *t) ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL], &nest->specs[CSV_CELL]); + if (t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN + || t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW) + { + for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++) + for (size_t i = 0; i < nest->specs[sv].n; i++) + { + struct ctables_summary_spec *ss = &nest->specs[sv].specs[i]; + const struct ctables_function_info *cfi = + &ctables_function_info[ss->function]; + if (cfi->is_area) + ss->calc_area = rotate_area (ss->calc_area); + } + } + if (t->ctables->smissing_listwise) { struct variable **listwise_vars = NULL; @@ -5341,17 +5151,17 @@ ctables_section_clear (struct ctables_section *s) } hmap_shrink (&s->cells); - for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++) + for (enum ctables_area_type at = 0; at < N_CTATS; at++) { - struct ctables_domain *domain, *next_domain; - HMAP_FOR_EACH_SAFE (domain, next_domain, struct ctables_domain, node, - &s->domains[dt]) + struct ctables_area *area, *next_area; + HMAP_FOR_EACH_SAFE (area, next_area, struct ctables_area, node, + &s->areas[at]) { - free (domain->sums); - hmap_delete (&s->domains[dt], &domain->node); - free (domain); + free (area->sums); + hmap_delete (&s->areas[at], &area->node); + free (area); } - hmap_shrink (&s->domains[dt]); + hmap_shrink (&s->areas[at]); } } @@ -5369,8 +5179,8 @@ ctables_section_uninit (struct ctables_section *s) } hmap_destroy (&s->cells); - for (size_t i = 0; i < N_CTDTS; i++) - hmap_destroy (&s->domains[i]); + for (enum ctables_area_type at = 0; at < N_CTATS; at++) + hmap_destroy (&s->areas[at]); } static void @@ -5414,38 +5224,47 @@ ctables_execute (struct dataset *ds, struct casereader *input, } struct dictionary *dict = dataset_dict (ds); + + bool splitting = dict_get_split_type (dict) == SPLIT_SEPARATE; struct casegrouper *grouper - = (dict_get_split_type (dict) == SPLIT_SEPARATE + = (splitting ? casegrouper_create_splits (input, dict) : casegrouper_create_vars (input, NULL, 0)); struct casereader *group; while (casegrouper_get_next_group (grouper, &group)) { - /* Output SPLIT FILE variables. */ - struct ccase *c = casereader_peek (group, 0); - if (c != NULL) + if (splitting) { - output_split_file_values (ds, c); - case_unref (c); + struct ccase *c = casereader_peek (group, 0); + if (c != NULL) + { + output_split_file_values (ds, c); + case_unref (c); + } } bool warn_on_invalid = true; - for (c = casereader_read (group); c; + for (struct ccase *c = casereader_read (group); c; case_unref (c), c = casereader_read (group)) { - double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid); + double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid); double e_weight = (ct->e_weight ? var_force_valid_weight (ct->e_weight, case_num (c, ct->e_weight), &warn_on_invalid) : d_weight); + double weight[] = { + [CTW_DICTIONARY] = d_weight, + [CTW_EFFECTIVE] = e_weight, + [CTW_UNWEIGHTED] = 1.0, + }; for (size_t i = 0; i < ct->n_tables; i++) { struct ctables_table *t = ct->tables[i]; for (size_t j = 0; j < t->n_sections; j++) - ctables_cell_insert (&t->sections[j], c, d_weight, e_weight); + ctables_cell_insert (&t->sections[j], c, weight); for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) if (t->label_axis[a] != a) @@ -5943,7 +5762,9 @@ ctables_parse_pproperties_format (struct lexer *lexer, { /* Parse function. */ enum ctables_summary_function function; - if (!parse_ctables_summary_function (lexer, &function)) + enum ctables_weighting weighting; + enum ctables_area_type area; + if (!parse_ctables_summary_function (lexer, &function, &weighting, &area)) goto error; /* Parse percentile. */ @@ -5967,6 +5788,9 @@ ctables_parse_pproperties_format (struct lexer *lexer, sizeof *sss->specs); sss->specs[sss->n++] = (struct ctables_summary_spec) { .function = function, + .weighting = weighting, + .calc_area = area, + .user_area = area, .percentile = percentile, .format = format, .is_ctables_format = is_ctables_format, @@ -6441,6 +6265,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) [PIVOT_AXIS_LAYER] = PIVOT_AXIS_LAYER, }, .clabels_from_axis = PIVOT_AXIS_LAYER, + .clabels_to_axis = PIVOT_AXIS_LAYER, .categories = categories, .n_categories = n_vars, .cilevel = 95, @@ -6879,12 +6704,18 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) break; } - if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW - && t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN) + if (t->label_axis[PIVOT_AXIS_ROW] != PIVOT_AXIS_ROW) { - msg (SE, _("ROWLABELS and COLLABELS may not both be specified.")); - goto error; + t->clabels_from_axis = PIVOT_AXIS_ROW; + if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN) + { + msg (SE, _("ROWLABELS and COLLABELS may not both be specified.")); + goto error; + } } + else if (t->label_axis[PIVOT_AXIS_COLUMN] != PIVOT_AXIS_COLUMN) + t->clabels_from_axis = PIVOT_AXIS_COLUMN; + t->clabels_to_axis = t->label_axis[t->clabels_from_axis]; if (!ctables_prepare_table (t)) goto error;