X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fctables.c;h=7023c05adbedaf01e8ae62c8ad13041edebd3340;hb=5289fbc7d0f9896033f172844e84364aa3f27066;hp=7b289ed2c78c479d7d414b658e6b1eefab7d4783;hpb=b531990f665ed1ad4c0c46d7de174f9aaff5a697;p=pspp diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 7b289ed2c7..7023c05adb 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -16,6 +16,9 @@ #include +#include + +#include "data/casereader.h" #include "data/dataset.h" #include "data/dictionary.h" #include "data/mrset.h" @@ -23,9 +26,12 @@ #include "language/lexer/format-parser.h" #include "language/lexer/lexer.h" #include "language/lexer/variable-parser.h" +#include "libpspp/array.h" #include "libpspp/assertion.h" #include "libpspp/hmap.h" #include "libpspp/message.h" +#include "libpspp/string-array.h" +#include "math/moments.h" #include "output/pivot-table.h" #include "gl/minmax.h" @@ -37,26 +43,11 @@ enum ctables_vlabel { - CTVL_DEFAULT = SETTINGS_VALUE_SHOW_DEFAULT, + CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT, CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE, CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL, CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH, - CTVL_NONE, }; -static void UNUSED -ctables_vlabel_unique (enum ctables_vlabel vlabel) -{ - /* This ensures that all of the values are unique. */ - switch (vlabel) - { - case CTVL_DEFAULT: - case CTVL_NAME: - case CTVL_LABEL: - case CTVL_BOTH: - case CTVL_NONE: - abort (); - } -} /* XXX: - unweighted summaries (U*) @@ -216,12 +207,7 @@ struct ctables_postcompute_expr /* CTPO_CAT_RANGE. XXX what about string ranges? */ - struct - { - double low; /* -DBL_MAX for LO. */ - double high; /* DBL_MAX for HIGH. */ - } - range; + double range[2]; /* CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW. */ struct ctables_postcompute_expr *subs[2]; @@ -257,6 +243,9 @@ struct ctables_table struct ctables_chisq *chisq; struct ctables_pairwise *pairwise; + + struct ctables_freqtab **fts; + size_t n_fts; }; struct ctables_var @@ -277,6 +266,12 @@ ctables_var_get_print_format (const struct ctables_var *var) : var_get_print_format (var->var)); } +static const char * +ctables_var_name (const struct ctables_var *var) +{ + return var->is_mrset ? var->mrset->name : var_get_name (var->var); +} + struct ctables_categories { size_t n_refs; @@ -408,8 +403,9 @@ struct ctables_axis { struct ctables_var var; bool scale; - struct ctables_summary *summaries; + struct ctables_summary_spec *summaries; size_t n_summaries; + size_t allocated_summaries; }; /* Nonterminals. */ @@ -435,7 +431,7 @@ enum ctables_function_availability CTFA_MRSETS, /* Only multiple-response sets */ }; -struct ctables_summary +struct ctables_summary_spec { enum ctables_summary_function function; double percentile; /* CTSF_PTILE only. */ @@ -444,7 +440,7 @@ struct ctables_summary }; static void -ctables_summary_uninit (struct ctables_summary *s) +ctables_summary_spec_uninit (struct ctables_summary_spec *s) { if (s) free (s->label); @@ -544,7 +540,7 @@ ctables_axis_destroy (struct ctables_axis *axis) { case CTAO_VAR: for (size_t i = 0; i < axis->n_summaries; i++) - ctables_summary_uninit (&axis->summaries[i]); + ctables_summary_spec_uninit (&axis->summaries[i]); free (axis->summaries); break; @@ -581,55 +577,104 @@ struct ctables_axis_parse_ctx struct ctables_table *t; }; -static struct ctables_summary * -add_summary (struct ctables_axis *axis, enum ctables_summary_function function, - double percentile, size_t *allocated_summaries) +static struct fmt_spec +ctables_summary_default_format (enum ctables_summary_function function, + const struct ctables_var *var) { - if (axis->n_summaries >= *allocated_summaries) - axis->summaries = x2nrealloc (axis->summaries, allocated_summaries, - sizeof *axis->summaries); - - static const char *default_labels[] = { -#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL, - SUMMARIES -#undef S - }; - char *label = (function == CTSF_PTILE - ? xasprintf (_("Percentile %.2f"), percentile) - : xstrdup (gettext (default_labels[function]))); - static const enum ctables_format default_formats[] = { #define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT, SUMMARIES #undef S }; - struct fmt_spec format; switch (default_formats[function]) { case CTF_COUNT: - format = (struct fmt_spec) { .type = FMT_F, .w = 40 }; - break; + return (struct fmt_spec) { .type = FMT_F, .w = 40 }; case CTF_PERCENT: - format = (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 }; - break; + return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 }; case CTF_GENERAL: - format = *ctables_var_get_print_format (&axis->var); - break; + return *ctables_var_get_print_format (var); default: NOT_REACHED (); } +} - struct ctables_summary *s = &axis->summaries[axis->n_summaries++]; - *s = (struct ctables_summary) { - .function = function, - .percentile = percentile, - .label = label, - .format = format, +static const char * +ctables_summary_function_name (enum ctables_summary_function function) +{ + static const char *names[] = { +#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME, + SUMMARIES +#undef S }; - return s; + return names[function]; +} + +static bool +add_summary_spec (struct ctables_axis *axis, + enum ctables_summary_function function, double percentile, + const char *label, const struct fmt_spec *format, + const struct msg_location *loc) +{ + if (axis->op == CTAO_VAR) + { + if (axis->n_summaries >= axis->allocated_summaries) + axis->summaries = x2nrealloc (axis->summaries, + &axis->allocated_summaries, + sizeof *axis->summaries); + + const char *function_name = ctables_summary_function_name (function); + const char *var_name = ctables_var_name (&axis->var); + switch (ctables_function_availability (function)) + { + case CTFA_MRSETS: + if (!axis->var.is_mrset) + { + msg_at (SE, loc, _("Summary function %s applies only to multiple " + "response sets."), function_name); + msg_at (SN, axis->loc, _("'%s' is not a multiple response set."), + var_name); + return false; + } + break; + + case CTFA_SCALE: + if (!axis->scale) + { + msg_at (SE, loc, + _("Summary function %s applies only to scale variables."), + function_name); + msg_at (SN, axis->loc, _("'%s' is not a scale variable."), + var_name); + return false; + } + break; + + case CTFA_ALL: + break; + } + + struct ctables_summary_spec *dst = &axis->summaries[axis->n_summaries++]; + *dst = (struct ctables_summary_spec) { + .function = function, + .percentile = percentile, + .label = xstrdup (label), + .format = (format ? *format + : ctables_summary_default_format (function, &axis->var)), + }; + return true; + } + else + { + for (size_t i = 0; i < 2; i++) + if (!add_summary_spec (axis->subs[i], function, percentile, label, + format, loc)) + return false; + return true; + } } static struct ctables_axis *ctables_axis_parse_stack ( @@ -697,48 +742,85 @@ ctables_axis_parse_primary (struct ctables_axis_parse_ctx *ctx) : var_get_measure (var.var) == MEASURE_SCALE); axis->loc = lex_ofs_location (ctx->lexer, start_ofs, lex_ofs (ctx->lexer) - 1); + return axis; +} - if (lex_match (ctx->lexer, T_LBRACK)) +static struct ctables_axis * +ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) +{ + struct ctables_axis *sub = ctables_axis_parse_primary (ctx); + if (!sub || !lex_match (ctx->lexer, T_LBRACK)) + return sub; + + do { - size_t allocated_summaries = 0; - do + int start_ofs = lex_ofs (ctx->lexer); + + /* Parse function. */ + enum ctables_summary_function function; + if (!parse_ctables_summary_function (ctx->lexer, &function)) + goto error; + + /* Parse percentile. */ + double percentile = 0; + if (function == CTSF_PTILE) { - enum ctables_summary_function function; - if (!parse_ctables_summary_function (ctx->lexer, &function)) + if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100)) goto error; + percentile = lex_number (ctx->lexer); + lex_get (ctx->lexer); + } - double percentile = 0; - if (function == CTSF_PTILE) - { - if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100)) - goto error; - percentile = lex_number (ctx->lexer); - lex_get (ctx->lexer); - } + /* Parse label. */ + char *label; + if (lex_is_string (ctx->lexer)) + { + label = ss_xstrdup (lex_tokss (ctx->lexer)); + lex_get (ctx->lexer); + } + else if (function == CTSF_PTILE) + label = xasprintf (_("Percentile %.2f"), percentile); + else + { + static const char *default_labels[] = { +#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL, + SUMMARIES +#undef S + }; + label = xstrdup (gettext (default_labels[function])); + } - struct ctables_summary *s = add_summary (axis, function, percentile, - &allocated_summaries); - if (lex_is_string (ctx->lexer)) - { - free (s->label); - s->label = ss_xstrdup (lex_tokss (ctx->lexer)); - lex_get (ctx->lexer); - } - if (lex_token (ctx->lexer) == T_ID) + /* Parse format. */ + struct fmt_spec format; + const struct fmt_spec *formatp; + if (lex_token (ctx->lexer) == T_ID) + { + if (!parse_format_specifier (ctx->lexer, &format) + || !fmt_check_output (&format) + || !fmt_check_type_compat (&format, VAL_NUMERIC)) { - if (!parse_format_specifier (ctx->lexer, &s->format) - || !fmt_check_output (&s->format) - || !fmt_check_type_compat (&s->format, VAL_NUMERIC)) - goto error; + free (label); + goto error; } - lex_match (ctx->lexer, T_COMMA); + formatp = &format; } - while (!lex_match (ctx->lexer, T_RBRACK)); + else + formatp = NULL; + + struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs, + lex_ofs (ctx->lexer) - 1); + add_summary_spec (sub, function, percentile, label, formatp, loc); + free (label); + msg_location_destroy (loc); + + lex_match (ctx->lexer, T_COMMA); } - return axis; + while (!lex_match (ctx->lexer, T_RBRACK)); + + return sub; error: - ctables_axis_destroy (axis); + ctables_axis_destroy (sub); return NULL; } @@ -770,7 +852,7 @@ find_scale (const struct ctables_axis *axis) } static const struct ctables_axis * -find_categorical_summary (const struct ctables_axis *axis) +find_categorical_summary_spec (const struct ctables_axis *axis) { if (!axis) return NULL; @@ -781,7 +863,7 @@ find_categorical_summary (const struct ctables_axis *axis) for (size_t i = 0; i < 2; i++) { const struct ctables_axis *sum - = find_categorical_summary (axis->subs[i]); + = find_categorical_summary_spec (axis->subs[i]); if (sum) return sum; } @@ -793,13 +875,13 @@ static struct ctables_axis * ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx) { int start_ofs = lex_ofs (ctx->lexer); - struct ctables_axis *lhs = ctables_axis_parse_primary (ctx); + struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx); if (!lhs) return NULL; while (lex_match (ctx->lexer, T_GT)) { - struct ctables_axis *rhs = ctables_axis_parse_primary (ctx); + struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx); if (!rhs) return NULL; @@ -817,7 +899,7 @@ ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx) return NULL; } - const struct ctables_axis *outer_sum = find_categorical_summary (lhs); + const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs); if (outer_sum) { msg_at (SE, nest->loc, @@ -1159,13 +1241,864 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, return true; } +struct var_array + { + const struct ctables_axis *summary; + struct variable **vars; + size_t n; + }; + +static void +var_array_uninit (struct var_array *va) +{ + if (va) + free (va->vars); +} + +struct var_array2 + { + struct var_array *vas; + size_t n; + }; + +static void +var_array2_uninit (struct var_array2 *vaa) +{ + if (vaa) + { + for (size_t i = 0; i < vaa->n; i++) + var_array_uninit (&vaa->vas[i]); + free (vaa->vas); + } +} + +static struct var_array2 +nest_fts (struct var_array2 va0, struct var_array2 va1) +{ + if (!va0.n) + return va1; + else if (!va1.n) + return va0; + + struct var_array2 vaa = { .vas = xnmalloc (va0.n, va1.n * sizeof *vaa.vas) }; + for (size_t i = 0; i < va0.n; i++) + for (size_t j = 0; j < va1.n; j++) + { + const struct var_array *a = &va0.vas[i]; + const struct var_array *b = &va1.vas[j]; + + size_t allocate = a->n + b->n; + struct variable **vars = xnmalloc (allocate, sizeof *vars); + size_t n = 0; + for (size_t k = 0; k < a->n; k++) + vars[n++] = a->vars[k]; + for (size_t k = 0; k < b->n; k++) + vars[n++] = b->vars[k]; + assert (n == allocate); + + assert (!(a->summary && b->summary)); + vaa.vas[vaa.n++] = (struct var_array) { + .summary = a->summary ? a->summary : b->summary, + .vars = vars, + .n = n + }; + } + var_array2_uninit (&va0); + var_array2_uninit (&va1); + return vaa; +} + +static struct var_array2 +stack_fts (struct var_array2 va0, struct var_array2 va1) +{ + struct var_array2 vaa = { .vas = xnmalloc (va0.n + va1.n, sizeof *vaa.vas) }; + for (size_t i = 0; i < va0.n; i++) + vaa.vas[vaa.n++] = va0.vas[i]; + for (size_t i = 0; i < va1.n; i++) + vaa.vas[vaa.n++] = va1.vas[i]; + assert (vaa.n == va0.n + va1.n); + free (va0.vas); + free (va1.vas); + return vaa; +} + +static struct var_array2 +enumerate_fts (const struct ctables_axis *a) +{ + if (!a) + return (struct var_array2) { .n = 0 }; + + switch (a->op) + { + case CTAO_VAR: + assert (!a->var.is_mrset); + struct var_array *va = xmalloc (sizeof *va); + if (a->scale) + *va = (struct var_array) { .n = 0 }; + else + { + struct variable **v = xmalloc (sizeof *v); + *v = a->var.var; + *va = (struct var_array) { .vars = v, .n = 1 }; + } + va->summary = a->scale || a->n_summaries ? a : NULL; + return (struct var_array2) { .vas = va, .n = 1 }; + + case CTAO_STACK: + return stack_fts (enumerate_fts (a->subs[0]), + enumerate_fts (a->subs[1])); + + case CTAO_NEST: + return nest_fts (enumerate_fts (a->subs[0]), + enumerate_fts (a->subs[1])); + } + + NOT_REACHED (); +} + +union ctables_summary + { + /* COUNT, VALIDN, TOTALN. */ + struct + { + double valid; + double missing; + }; + + /* MINIMUM, MAXIMUM, RANGE. */ + struct + { + double min; + double max; + }; + + /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */ + struct moments1 *moments; + + /* XXX percentiles, median, mode, multiple response */ + }; + +static void +ctables_summary_init (union ctables_summary *s, + const struct ctables_summary_spec *ss) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + case CSTF_TOTALN: + case CTSF_ETOTALN: + case CTSF_VALIDN: + case CTSF_EVALIDN: + s->missing = s->valid = 0; + break; + + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + s->min = s->max = SYSMIS; + break; + + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + s->moments = moments1_create (MOMENT_VARIANCE); + break; + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } +} + +static void +ctables_summary_uninit (union ctables_summary *s, + const struct ctables_summary_spec *ss) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + case CSTF_TOTALN: + case CTSF_ETOTALN: + case CTSF_VALIDN: + case CTSF_EVALIDN: + break; + + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + break; + + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + moments1_destroy (s->moments); + break; + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } +} + +static void +ctables_summary_add (union ctables_summary *s, + const struct ctables_summary_spec *ss, + const struct variable *var, const union value *value, + double weight) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + case CSTF_TOTALN: + case CTSF_ETOTALN: + case CTSF_VALIDN: + case CTSF_EVALIDN: + if (var_is_value_missing (var, value)) + s->missing += weight; + else + s->valid += weight; + break; + + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + if (!var_is_value_missing (var, value)) + { + assert (!var_is_alpha (var)); /* XXX? */ + if (s->min == SYSMIS || value->f < s->min) + s->min = value->f; + if (s->max == SYSMIS || value->f > s->max) + s->max = value->f; + } + break; + + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + moments1_add (s->moments, value->f, weight); + break; + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } +} + + +static double +ctables_summary_value (union ctables_summary *s, + const struct ctables_summary_spec *ss) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + return s->valid; + + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + NOT_REACHED (); + + case CSTF_TOTALN: + case CTSF_ETOTALN: + return s->valid + s->missing; + + case CTSF_VALIDN: + case CTSF_EVALIDN: + return s->valid; + + case CTSF_MAXIMUM: + return s->max; + + case CTSF_MINIMUM: + return s->min; + + case CTSF_RANGE: + return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS; + + case CTSF_MEAN: + { + double mean; + moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL); + return mean; + } + + case CTSF_SEMEAN: + { + double weight, variance; + moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL); + return calc_semean (variance, weight); + } + + case CTSF_STDDEV: + { + double variance; + moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); + return variance != SYSMIS ? sqrt (variance) : SYSMIS; + } + + case CTSF_SUM: + { + double weight, mean; + moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); + return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS; + } + + case CTSF_VARIANCE: + { + double variance; + moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); + return variance; + } + + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + NOT_REACHED (); + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } + + NOT_REACHED (); +} + +struct ctables_freq + { + struct hmap_node node; /* Element in hash table. */ + union ctables_summary *summaries; + union value values[]; /* The value. */ + }; + +struct ctables_freqtab + { + struct var_array vars; + struct hmap data; /* Contains "struct ctables_freq"s. */ + const struct ctables_summary_spec *summaries; + size_t n_summaries; + const struct variable *summary_var; + struct ctables_freq **sorted; + }; + +static struct ctables_freq * +ctables_freq_create (struct ctables_freqtab *ft) +{ + struct ctables_freq *f = xmalloc (sizeof *f + ft->vars.n * sizeof *f->values); + f->summaries = xmalloc (ft->n_summaries * sizeof *f->summaries); + for (size_t i = 0; i < ft->n_summaries; i++) + ctables_summary_init (&f->summaries[i], &ft->summaries[i]); + return f; +} + +static void +ctables_freq_add (struct ctables_freqtab *ft, struct ctables_freq *f, + const struct variable *var, const union value *value, + double weight) +{ + for (size_t i = 0; i < ft->n_summaries; i++) + ctables_summary_add (&f->summaries[i], &ft->summaries[i], + var, value, weight); +} + +static int +ctables_freq_compare_3way (const void *a_, const void *b_, const void *vars_) +{ + const struct var_array *vars = vars_; + struct ctables_freq *const *a = a_; + struct ctables_freq *const *b = b_; + + for (size_t i = 0; i < vars->n; i++) + { + int cmp = value_compare_3way (&(*a)->values[i], &(*b)->values[i], + var_get_width (vars->vars[i])); + if (cmp) + return cmp; + } + return 0; +} + +static bool +ctables_execute (struct dataset *ds, struct ctables *ct) +{ + for (size_t i = 0; i < ct->n_tables; i++) + { + size_t allocated_fts = 0; + + struct ctables_table *t = &ct->tables[i]; + struct var_array2 vaa = enumerate_fts (t->axes[PIVOT_AXIS_ROW]); + vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_COLUMN])); + vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_LAYER])); + for (size_t i = 0; i < vaa.n; i++) + { + for (size_t j = 0; j < vaa.vas[i].n; j++) + { + if (j) + fputs (", ", stdout); + fputs (var_get_name (vaa.vas[i].vars[j]), stdout); + } + putchar ('\n'); + } + + for (size_t j = 0; j < vaa.n; j++) + { + const struct var_array *va = &vaa.vas[j]; + const struct ctables_summary_spec *summaries; + size_t n_summaries; + const struct variable *summary_var; + if (!va->summary) + { + static const struct ctables_summary_spec count = { + .function = CTSF_COUNT, + .label = (char *) N_("Count"), + .format = { .type = FMT_F, .w = 40 }, + }; + summaries = &count; + n_summaries = 1; + summary_var = va->vars[0]; + } + else if (va->summary->n_summaries) + { + summaries = va->summary->summaries; + n_summaries = va->summary->n_summaries; + summary_var = va->summary->var.var; + } + else + { + static const struct ctables_summary_spec mean = { + .function = CTSF_MEAN, + .label = (char *) N_("Mean"), + .format = { .type = FMT_F, .w = 40, .d = 2}, /* XXX */ + }; + summaries = &mean; + n_summaries = 1; + summary_var = va->summary->var.var; + }; + + struct ctables_freqtab *ft = xmalloc (sizeof *ft); + *ft = (struct ctables_freqtab) { + .vars = *va, + .summaries = summaries, + .n_summaries = n_summaries, + .summary_var = summary_var, + .data = HMAP_INITIALIZER (ft->data), + }; + + if (t->n_fts >= allocated_fts) + t->fts = x2nrealloc (t->fts, &allocated_fts, sizeof *t->fts); + t->fts[t->n_fts++] = ft; + } + + free (vaa.vas); + } + + struct casereader *input = casereader_create_filter_weight (proc_open (ds), + dataset_dict (ds), + NULL, NULL); + bool warn_on_invalid = true; + for (struct ccase *c = casereader_read (input); c; + case_unref (c), c = casereader_read (input)) + { + double weight = dict_get_case_weight (dataset_dict (ds), c, + &warn_on_invalid); + + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = &ct->tables[i]; + + for (size_t j = 0; j < t->n_fts; j++) + { + struct ctables_freqtab *ft = t->fts[j]; + + for (size_t k = 0; k < ft->vars.n; k++) + { + const struct variable *var = ft->vars.vars[k]; + switch (var_is_value_missing (var, case_data (c, var))) + { + case MV_SYSTEM: + goto next_ft; + + case MV_USER: + if (!t->categories[var_get_dict_index (var)] + || !t->categories[var_get_dict_index (var)]->include_missing) + goto next_ft; + break; + } + } + size_t hash = 0; + for (size_t k = 0; k < ft->vars.n; k++) + { + const struct variable *var = ft->vars.vars[k]; + hash = value_hash (case_data (c, var), var_get_width (var), hash); + } + + struct ctables_freq *f; + HMAP_FOR_EACH_WITH_HASH (f, struct ctables_freq, node, hash, &ft->data) + { + for (size_t k = 0; k < ft->vars.n; k++) + { + const struct variable *var = ft->vars.vars[k]; + if (!value_equal (case_data (c, var), &f->values[k], + var_get_width (var))) + goto next_hash_node; + } + goto found; + + next_hash_node: ; + } + + f = ctables_freq_create (ft); + for (size_t k = 0; k < ft->vars.n; k++) + { + const struct variable *var = ft->vars.vars[k]; + value_clone (&f->values[k], case_data (c, var), + var_get_width (var)); + } + hmap_insert (&ft->data, &f->node, hash); + + found: + ctables_freq_add (ft, f, ft->summary_var, + case_data (c, ft->summary_var), weight); + + next_ft: ; + } + } + } + casereader_destroy (input); + + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = &ct->tables[i]; + + struct pivot_table *pt = pivot_table_create (N_("Custom Tables")); + struct pivot_dimension *d = pivot_dimension_create ( + pt, PIVOT_AXIS_ROW, N_("Rows")); + for (size_t j = 0; j < t->n_fts; j++) + { + struct ctables_freqtab *ft = t->fts[j]; + ft->sorted = xnmalloc (ft->data.count, sizeof *ft->sorted); + + struct ctables_freq *f; + size_t n = 0; + HMAP_FOR_EACH (f, struct ctables_freq, node, &ft->data) + ft->sorted[n++] = f; + assert (n == ft->data.count); + sort (ft->sorted, n, sizeof *ft->sorted, + ctables_freq_compare_3way, &ft->vars); + + struct pivot_category **groups = xnmalloc (ft->vars.n, + sizeof *groups); + for (size_t k = 0; k < n; k++) + { + struct ctables_freq *prev = k > 0 ? ft->sorted[k - 1] : NULL; + struct ctables_freq *f = ft->sorted[k]; + + size_t n_common = 0; + if (prev) + for (; n_common + 1 < ft->vars.n; n_common++) + if (!value_equal (&prev->values[n_common], + &f->values[n_common], + var_get_type (ft->vars.vars[n_common]))) + break; + + for (size_t m = n_common; m < ft->vars.n; m++) + { + struct pivot_category *parent = m > 0 ? groups[m - 1] : d->root; + const struct variable *var = ft->vars.vars[m]; + enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (var)]; + + if (vlabel != CTVL_NONE) + parent = pivot_category_create_group__ ( + parent, pivot_value_new_variable (ft->vars.vars[m])); + + if (m + 1 < ft->vars.n) + parent = pivot_category_create_group__ ( + parent, + pivot_value_new_var_value (ft->vars.vars[m], &f->values[m])); + groups[m] = parent; + + if (m == ft->vars.n - 1) + { + struct pivot_category *c = pivot_category_create_group__ ( + parent, + pivot_value_new_var_value (ft->vars.vars[ft->vars.n - 1], + &f->values[ft->vars.n - 1])); + for (size_t p = 0; p < ft->n_summaries; p++) + { + double value = ctables_summary_value ( + &f->summaries[p], &ft->summaries[p]); + int leaf = pivot_category_create_leaf ( + c, pivot_value_new_text (ft->summaries[p].label)); + pivot_table_put1 (pt, leaf, pivot_value_new_number (value)); + } + } + } + } + free (groups); + } + pivot_table_submit (pt); + } + + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = &ct->tables[i]; + + for (size_t j = 0; j < t->n_fts; j++) + { + struct ctables_freqtab *ft = t->fts[j]; + struct ctables_freq *f, *next; + HMAP_FOR_EACH_SAFE (f, next, struct ctables_freq, node, &ft->data) + { + hmap_delete (&ft->data, &f->node); + for (size_t k = 0; k < ft->n_summaries; k++) + ctables_summary_uninit (&f->summaries[k], &ft->summaries[k]); + free (f->summaries); + for (size_t k = 0; k < ft->vars.n; k++) + { + const struct variable *var = ft->vars.vars[k]; + value_destroy (&f->values[k], var_get_width (var)); + } + free (f); + } + hmap_destroy (&ft->data); + free (ft->sorted); + var_array_uninit (&ft->vars); + free (ft); + } + free (t->fts); + } + + return proc_commit (ds); +} + int cmd_ctables (struct lexer *lexer, struct dataset *ds) { size_t n_vars = dict_get_n_vars (dataset_dict (ds)); enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels); + enum settings_value_show tvars = settings_get_show_variables (); for (size_t i = 0; i < n_vars; i++) - vlabels[i] = CTVL_DEFAULT; + vlabels[i] = (enum ctables_vlabel) tvars; struct ctables *ct = xmalloc (sizeof *ct); *ct = (struct ctables) { @@ -1289,7 +2222,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) enum ctables_vlabel vlabel; if (lex_match_id (lexer, "DEFAULT")) - vlabel = CTVL_DEFAULT; + vlabel = (enum ctables_vlabel) settings_get_show_variables (); else if (lex_match_id (lexer, "NAME")) vlabel = CTVL_NAME; else if (lex_match_id (lexer, "LABEL")) @@ -1433,7 +2366,6 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) if (!lex_force_match (lexer, T_SLASH)) break; - /* XXX Validate axes. */ while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD) { if (lex_match_id (lexer, "SLABELS")) @@ -1764,10 +2696,19 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) goto error; } } + + if (t->row_labels != CTLP_NORMAL && t->col_labels != CTLP_NORMAL) + { + msg (SE, _("ROWLABELS and COLLABELS may not both be specified.")); + goto error; + } + } while (lex_token (lexer) != T_ENDCMD); + + bool ok = ctables_execute (ds, ct); ctables_destroy (ct); - return CMD_SUCCESS; + return ok ? CMD_SUCCESS : CMD_FAILURE; error: ctables_destroy (ct);