X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fctables.c;h=4f23b3607d46aab491e832c407bc1a00ad8a40cd;hb=75943041c08afda14867f3b6d14a416170825418;hp=1b8ba198f83cca7c882e2b3140e729a698a7d270;hpb=8ef3ef3a5f9ea8290aa4806e7b5dae1de4332866;p=pspp diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 1b8ba198f8..4f23b3607d 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -16,6 +16,9 @@ #include +#include + +#include "data/casereader.h" #include "data/dataset.h" #include "data/dictionary.h" #include "data/mrset.h" @@ -23,9 +26,13 @@ #include "language/lexer/format-parser.h" #include "language/lexer/lexer.h" #include "language/lexer/variable-parser.h" +#include "libpspp/array.h" #include "libpspp/assertion.h" +#include "libpspp/hash-functions.h" #include "libpspp/hmap.h" #include "libpspp/message.h" +#include "libpspp/string-array.h" +#include "math/moments.h" #include "output/pivot-table.h" #include "gl/minmax.h" @@ -37,26 +44,11 @@ enum ctables_vlabel { - CTVL_DEFAULT = SETTINGS_VALUE_SHOW_DEFAULT, + CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT, CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE, CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL, CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH, - CTVL_NONE, }; -static void UNUSED -ctables_vlabel_unique (enum ctables_vlabel vlabel) -{ - /* This ensures that all of the values are unique. */ - switch (vlabel) - { - case CTVL_DEFAULT: - case CTVL_NAME: - case CTVL_LABEL: - case CTVL_BOTH: - case CTVL_NONE: - abort (); - } -} /* XXX: - unweighted summaries (U*) @@ -172,7 +164,7 @@ struct ctables struct variable *base_weight; /* WEIGHT. */ int hide_threshold; /* HIDESMALLCOUNTS. */ - struct ctables_table *tables; + struct ctables_table **tables; size_t n_tables; }; @@ -216,12 +208,7 @@ struct ctables_postcompute_expr /* CTPO_CAT_RANGE. XXX what about string ranges? */ - struct - { - double low; /* -DBL_MAX for LO. */ - double high; /* DBL_MAX for HIGH. */ - } - range; + double range[2]; /* CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW. */ struct ctables_postcompute_expr *subs[2]; @@ -235,9 +222,28 @@ enum ctables_label_position CTLP_LAYER, }; +struct var_array + { + struct variable **vars; + size_t n; + + struct ctables_summary_spec *summaries; + size_t n_summaries; + struct variable *summary_var; + }; + +struct var_array2 + { + struct var_array *vas; + size_t n; + }; + struct ctables_table { struct ctables_axis *axes[PIVOT_N_AXES]; + struct var_array2 vaas[PIVOT_N_AXES]; + enum pivot_axis_type summary_axis; + struct hmap ft; enum pivot_axis_type slabels_position; bool slabels_visible; @@ -257,6 +263,7 @@ struct ctables_table struct ctables_chisq *chisq; struct ctables_pairwise *pairwise; + }; struct ctables_var @@ -414,7 +421,7 @@ struct ctables_axis { struct ctables_var var; bool scale; - struct ctables_summary *summaries; + struct ctables_summary_spec *summaries; size_t n_summaries; size_t allocated_summaries; }; @@ -442,7 +449,7 @@ enum ctables_function_availability CTFA_MRSETS, /* Only multiple-response sets */ }; -struct ctables_summary +struct ctables_summary_spec { enum ctables_summary_function function; double percentile; /* CTSF_PTILE only. */ @@ -451,7 +458,7 @@ struct ctables_summary }; static void -ctables_summary_uninit (struct ctables_summary *s) +ctables_summary_spec_uninit (struct ctables_summary_spec *s) { if (s) free (s->label); @@ -551,7 +558,7 @@ ctables_axis_destroy (struct ctables_axis *axis) { case CTAO_VAR: for (size_t i = 0; i < axis->n_summaries; i++) - ctables_summary_uninit (&axis->summaries[i]); + ctables_summary_spec_uninit (&axis->summaries[i]); free (axis->summaries); break; @@ -613,6 +620,21 @@ ctables_summary_default_format (enum ctables_summary_function function, } } +static char * +ctables_summary_default_label (enum ctables_summary_function function, + double percentile) +{ + static const char *default_labels[] = { +#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL, + SUMMARIES +#undef S + }; + + return (function == CTSF_PTILE + ? xasprintf (_("Percentile %.2f"), percentile) + : xstrdup (gettext (default_labels[function]))); +} + static const char * ctables_summary_function_name (enum ctables_summary_function function) { @@ -625,10 +647,10 @@ ctables_summary_function_name (enum ctables_summary_function function) } static bool -add_summary (struct ctables_axis *axis, - enum ctables_summary_function function, double percentile, - const char *label, const struct fmt_spec *format, - const struct msg_location *loc) +add_summary_spec (struct ctables_axis *axis, + enum ctables_summary_function function, double percentile, + const char *label, const struct fmt_spec *format, + const struct msg_location *loc) { if (axis->op == CTAO_VAR) { @@ -668,8 +690,8 @@ add_summary (struct ctables_axis *axis, break; } - struct ctables_summary *dst = &axis->summaries[axis->n_summaries++]; - *dst = (struct ctables_summary) { + struct ctables_summary_spec *dst = &axis->summaries[axis->n_summaries++]; + *dst = (struct ctables_summary_spec) { .function = function, .percentile = percentile, .label = xstrdup (label), @@ -681,8 +703,8 @@ add_summary (struct ctables_axis *axis, else { for (size_t i = 0; i < 2; i++) - if (!add_summary (axis->subs[i], function, percentile, label, format, - loc)) + if (!add_summary_spec (axis->subs[i], function, percentile, label, + format, loc)) return false; return true; } @@ -789,17 +811,8 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) label = ss_xstrdup (lex_tokss (ctx->lexer)); lex_get (ctx->lexer); } - else if (function == CTSF_PTILE) - label = xasprintf (_("Percentile %.2f"), percentile); else - { - static const char *default_labels[] = { -#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL, - SUMMARIES -#undef S - }; - label = xstrdup (gettext (default_labels[function])); - } + label = ctables_summary_default_label (function, percentile); /* Parse format. */ struct fmt_spec format; @@ -820,7 +833,7 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs, lex_ofs (ctx->lexer) - 1); - add_summary (sub, function, percentile, label, formatp, loc); + add_summary_spec (sub, function, percentile, label, formatp, loc); free (label); msg_location_destroy (loc); @@ -863,7 +876,7 @@ find_scale (const struct ctables_axis *axis) } static const struct ctables_axis * -find_categorical_summary (const struct ctables_axis *axis) +find_categorical_summary_spec (const struct ctables_axis *axis) { if (!axis) return NULL; @@ -874,7 +887,7 @@ find_categorical_summary (const struct ctables_axis *axis) for (size_t i = 0; i < 2; i++) { const struct ctables_axis *sum - = find_categorical_summary (axis->subs[i]); + = find_categorical_summary_spec (axis->subs[i]); if (sum) return sum; } @@ -910,7 +923,7 @@ ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx) return NULL; } - const struct ctables_axis *outer_sum = find_categorical_summary (lhs); + const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs); if (outer_sum) { msg_at (SE, nest->loc, @@ -982,7 +995,7 @@ ctables_pairwise_destroy (struct ctables_pairwise *pairwise) } static void -ctables_table_uninit (struct ctables_table *t) +ctables_table_destroy (struct ctables_table *t) { if (!t) return; @@ -999,6 +1012,7 @@ ctables_table_uninit (struct ctables_table *t) free (t->title); ctables_chisq_destroy (t->chisq); ctables_pairwise_destroy (t->pairwise); + free (t); } static void @@ -1012,7 +1026,7 @@ ctables_destroy (struct ctables *ct) free (ct->missing); free (ct->vlabels); for (size_t i = 0; i < ct->n_tables; i++) - ctables_table_uninit (&ct->tables[i]); + ctables_table_destroy (ct->tables[i]); free (ct->tables); free (ct); } @@ -1252,13 +1266,936 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, return true; } +static void +var_array_uninit (struct var_array *va) +{ + if (va) + free (va->vars); +} + +static void +var_array2_uninit (struct var_array2 *vaa) +{ + if (vaa) + { + for (size_t i = 0; i < vaa->n; i++) + var_array_uninit (&vaa->vas[i]); + free (vaa->vas); + } +} + +static struct var_array2 +nest_fts (struct var_array2 va0, struct var_array2 va1) +{ + if (!va0.n) + return va1; + else if (!va1.n) + return va0; + + struct var_array2 vaa = { .vas = xnmalloc (va0.n, va1.n * sizeof *vaa.vas) }; + for (size_t i = 0; i < va0.n; i++) + for (size_t j = 0; j < va1.n; j++) + { + const struct var_array *a = &va0.vas[i]; + const struct var_array *b = &va1.vas[j]; + + size_t allocate = a->n + b->n; + struct variable **vars = xnmalloc (allocate, sizeof *vars); + enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes); + size_t n = 0; + for (size_t k = 0; k < a->n; k++) + vars[n++] = a->vars[k]; + for (size_t k = 0; k < b->n; k++) + vars[n++] = b->vars[k]; + assert (n == allocate); + + const struct var_array *summary_src; + if (!a->summary_var) + summary_src = b; + else if (!b->summary_var) + summary_src = a; + else + NOT_REACHED (); + vaa.vas[vaa.n++] = (struct var_array) { + .vars = vars, + .n = n, + .summaries = summary_src->summaries, + .n_summaries = summary_src->n_summaries, + .summary_var = summary_src->summary_var, + }; + } + var_array2_uninit (&va0); + var_array2_uninit (&va1); + return vaa; +} + +static struct var_array2 +stack_fts (struct var_array2 va0, struct var_array2 va1) +{ + struct var_array2 vaa = { .vas = xnmalloc (va0.n + va1.n, sizeof *vaa.vas) }; + for (size_t i = 0; i < va0.n; i++) + vaa.vas[vaa.n++] = va0.vas[i]; + for (size_t i = 0; i < va1.n; i++) + vaa.vas[vaa.n++] = va1.vas[i]; + assert (vaa.n == va0.n + va1.n); + free (va0.vas); + free (va1.vas); + return vaa; +} + +static struct var_array2 +enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a) +{ + if (!a) + return (struct var_array2) { .n = 0 }; + + switch (a->op) + { + case CTAO_VAR: + assert (!a->var.is_mrset); + struct var_array *va = xmalloc (sizeof *va); + if (a->scale) + *va = (struct var_array) { .n = 0 }; + else + { + struct variable **vars = xmalloc (sizeof *vars); + *vars = a->var.var; + enum pivot_axis_type *axes = xmalloc (sizeof *axes); + *axes = axis_type; + *va = (struct var_array) { .vars = vars, .n = 1 }; + } + if (a->n_summaries || a->scale) + { + va->summaries = a->summaries; + va->n_summaries = a->n_summaries; + va->summary_var = a->var.var; + } + return (struct var_array2) { .vas = va, .n = 1 }; + + case CTAO_STACK: + return stack_fts (enumerate_fts (axis_type, a->subs[0]), + enumerate_fts (axis_type, a->subs[1])); + + case CTAO_NEST: + return nest_fts (enumerate_fts (axis_type, a->subs[0]), + enumerate_fts (axis_type, a->subs[1])); + } + + NOT_REACHED (); +} + +union ctables_summary + { + /* COUNT, VALIDN, TOTALN. */ + struct + { + double valid; + double missing; + }; + + /* MINIMUM, MAXIMUM, RANGE. */ + struct + { + double min; + double max; + }; + + /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */ + struct moments1 *moments; + + /* XXX percentiles, median, mode, multiple response */ + }; + +#if 0 +static void +ctables_summary_init (union ctables_summary *s, + const struct ctables_summary_spec *ss) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + case CSTF_TOTALN: + case CTSF_ETOTALN: + case CTSF_VALIDN: + case CTSF_EVALIDN: + s->missing = s->valid = 0; + break; + + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + s->min = s->max = SYSMIS; + break; + + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + s->moments = moments1_create (MOMENT_VARIANCE); + break; + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } +} + +static void +ctables_summary_uninit (union ctables_summary *s, + const struct ctables_summary_spec *ss) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + case CSTF_TOTALN: + case CTSF_ETOTALN: + case CTSF_VALIDN: + case CTSF_EVALIDN: + break; + + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + break; + + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + moments1_destroy (s->moments); + break; + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } +} + +static void +ctables_summary_add (union ctables_summary *s, + const struct ctables_summary_spec *ss, + const struct variable *var, const union value *value, + double weight) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + case CSTF_TOTALN: + case CTSF_ETOTALN: + case CTSF_VALIDN: + case CTSF_EVALIDN: + if (var_is_value_missing (var, value)) + s->missing += weight; + else + s->valid += weight; + break; + + case CTSF_MAXIMUM: + case CTSF_MINIMUM: + case CTSF_RANGE: + if (!var_is_value_missing (var, value)) + { + assert (!var_is_alpha (var)); /* XXX? */ + if (s->min == SYSMIS || value->f < s->min) + s->min = value->f; + if (s->max == SYSMIS || value->f > s->max) + s->max = value->f; + } + break; + + case CTSF_MEAN: + case CTSF_SEMEAN: + case CTSF_STDDEV: + case CTSF_SUM: + case CTSF_VARIANCE: + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + moments1_add (s->moments, value->f, weight); + break; + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } +} + +static double +ctables_summary_value (union ctables_summary *s, + const struct ctables_summary_spec *ss) +{ + switch (ss->function) + { + case CTSF_COUNT: + case CTSF_ECOUNT: + return s->valid; + + case CTSF_ROWPCT_COUNT: + case CTSF_COLPCT_COUNT: + case CTSF_TABLEPCT_COUNT: + case CTSF_SUBTABLEPCT_COUNT: + case CTSF_LAYERPCT_COUNT: + case CTSF_LAYERROWPCT_COUNT: + case CTSF_LAYERCOLPCT_COUNT: + case CTSF_ROWPCT_VALIDN: + case CTSF_COLPCT_VALIDN: + case CTSF_TABLEPCT_VALIDN: + case CTSF_SUBTABLEPCT_VALIDN: + case CTSF_LAYERPCT_VALIDN: + case CTSF_LAYERROWPCT_VALIDN: + case CTSF_LAYERCOLPCT_VALIDN: + case CTSF_ROWPCT_TOTALN: + case CTSF_COLPCT_TOTALN: + case CTSF_TABLEPCT_TOTALN: + case CTSF_SUBTABLEPCT_TOTALN: + case CTSF_LAYERPCT_TOTALN: + case CTSF_LAYERROWPCT_TOTALN: + case CTSF_LAYERCOLPCT_TOTALN: + NOT_REACHED (); + + case CSTF_TOTALN: + case CTSF_ETOTALN: + return s->valid + s->missing; + + case CTSF_VALIDN: + case CTSF_EVALIDN: + return s->valid; + + case CTSF_MAXIMUM: + return s->max; + + case CTSF_MINIMUM: + return s->min; + + case CTSF_RANGE: + return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS; + + case CTSF_MEAN: + { + double mean; + moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL); + return mean; + } + + case CTSF_SEMEAN: + { + double weight, variance; + moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL); + return calc_semean (variance, weight); + } + + case CTSF_STDDEV: + { + double variance; + moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); + return variance != SYSMIS ? sqrt (variance) : SYSMIS; + } + + case CTSF_SUM: + { + double weight, mean; + moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL); + return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS; + } + + case CTSF_VARIANCE: + { + double variance; + moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL); + return variance; + } + + case CTSF_ROWPCT_SUM: + case CTSF_COLPCT_SUM: + case CTSF_TABLEPCT_SUM: + case CTSF_SUBTABLEPCT_SUM: + case CTSF_LAYERPCT_SUM: + case CTSF_LAYERROWPCT_SUM: + case CTSF_LAYERCOLPCT_SUM: + NOT_REACHED (); + + case CTSF_MEDIAN: + case CTSF_MISSING: + case CTSF_MODE: + case CTSF_PTILE: + NOT_REACHED (); + + case CTSF_RESPONSES: + case CTSF_ROWPCT_RESPONSES: + case CTSF_COLPCT_RESPONSES: + case CTSF_TABLEPCT_RESPONSES: + case CTSF_SUBTABLEPCT_RESPONSES: + case CTSF_LAYERPCT_RESPONSES: + case CTSF_LAYERROWPCT_RESPONSES: + case CTSF_LAYERCOLPCT_RESPONSES: + case CTSF_ROWPCT_RESPONSES_COUNT: + case CTSF_COLPCT_RESPONSES_COUNT: + case CTSF_TABLEPCT_RESPONSES_COUNT: + case CTSF_SUBTABLEPCT_RESPONSES_COUNT: + case CTSF_LAYERPCT_RESPONSES_COUNT: + case CTSF_LAYERROWPCT_RESPONSES_COUNT: + case CTSF_LAYERCOLPCT_RESPONSES_COUNT: + case CTSF_ROWPCT_COUNT_RESPONSES: + case CTSF_COLPCT_COUNT_RESPONSES: + case CTSF_TABLEPCT_COUNT_RESPONSES: + case CTSF_SUBTABLEPCT_COUNT_RESPONSES: + case CTSF_LAYERPCT_COUNT_RESPONSES: + case CTSF_LAYERROWPCT_COUNT_RESPONSES: + case CTSF_LAYERCOLPCT_COUNT_RESPONSES: + NOT_REACHED (); + } + + NOT_REACHED (); +} +#endif + +struct ctables_freq + { + struct hmap_node node; /* Element in hash table. */ + + struct + { + size_t vaa_idx; + union value *values; + int leaf; + } + axes[PIVOT_N_AXES]; + + //union ctables_summary *summaries; + double count; + }; + +#if 0 +static struct ctables_freq * +ctables_freq_create (struct ctables_freqtab *ft) +{ + struct ctables_freq *f = xmalloc (sizeof *f + ft->vars.n * sizeof *f->values); + f->summaries = xmalloc (ft->n_summaries * sizeof *f->summaries); + for (size_t i = 0; i < ft->n_summaries; i++) + ctables_summary_init (&f->summaries[i], &ft->summaries[i]); + return f; +} + +static void +ctables_freq_add (struct ctables_freqtab *ft, struct ctables_freq *f, + const struct variable *var, const union value *value, + double weight) +{ + for (size_t i = 0; i < ft->n_summaries; i++) + ctables_summary_add (&f->summaries[i], &ft->summaries[i], + var, value, weight); +} +#endif + +struct ctables_freq_sort_aux + { + const struct ctables_table *t; + enum pivot_axis_type a; + }; + +static int +ctables_freq_compare_3way (const void *a_, const void *b_, const void *aux_) +{ + const struct ctables_freq_sort_aux *aux = aux_; + struct ctables_freq *const *ap = a_; + struct ctables_freq *const *bp = b_; + const struct ctables_freq *a = *ap; + const struct ctables_freq *b = *bp; + + size_t a_idx = a->axes[aux->a].vaa_idx; + size_t b_idx = b->axes[aux->a].vaa_idx; + if (a_idx != b_idx) + return a_idx < b_idx ? -1 : 1; + + const struct var_array *va = &aux->t->vaas[aux->a].vas[a_idx]; + for (size_t i = 0; i < va->n; i++) + { + int cmp = value_compare_3way (&a->axes[aux->a].values[i], + &b->axes[aux->a].values[i], + var_get_width (va->vars[i])); + if (cmp) + return cmp; + } + return 0; +} + +/* Algorithm: + + For each row: + For each ctables_table: + For each combination of row vars: + For each combination of column vars: + For each combination of layer vars: + Add entry + Make a table of row values: + Sort entries by row values + Assign a 0-based index to each actual value + Construct a dimension + Make a table of column values + Make a table of layer values + For each entry: + Fill the table entry using the indexes from before. + */ + +static void +ctables_freqtab_insert (struct ctables_table *t, + const struct ccase *c, + size_t ir, size_t ic, size_t il, + double weight) +{ + size_t ix[PIVOT_N_AXES] = { + [PIVOT_AXIS_ROW] = ir, + [PIVOT_AXIS_COLUMN] = ic, + [PIVOT_AXIS_LAYER] = il, + }; + + size_t hash = 0; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct var_array *va = &t->vaas[a].vas[ix[a]]; + hash = hash_int (ix[a], hash); + for (size_t i = 0; i < va->n; i++) + hash = value_hash (case_data (c, va->vars[i]), + var_get_width (va->vars[i]), hash); + } + + struct ctables_freq *f; + HMAP_FOR_EACH_WITH_HASH (f, struct ctables_freq, node, hash, &t->ft) + { + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct var_array *va = &t->vaas[a].vas[ix[a]]; + if (f->axes[a].vaa_idx != ix[a]) + goto not_equal; + for (size_t i = 0; i < va->n; i++) + if (!value_equal (case_data (c, va->vars[i]), + &f->axes[a].values[i], + var_get_width (va->vars[i]))) + goto not_equal; + } + + f->count += weight; + return; + + not_equal: ; + } + + f = xmalloc (sizeof *f); + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + const struct var_array *va = &t->vaas[a].vas[ix[a]]; + f->axes[a].vaa_idx = ix[a]; + f->axes[a].values = (va->n + ? xnmalloc (va->n, sizeof *f->axes[a].values) + : NULL); + for (size_t i = 0; i < va->n; i++) + value_clone (&f->axes[a].values[i], case_data (c, va->vars[i]), + var_get_width (va->vars[i])); + } + f->count = weight; + hmap_insert (&t->ft, &f->node, hash); +} + +static bool +ctables_execute (struct dataset *ds, struct ctables *ct) +{ + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = ct->tables[i]; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + if (t->axes[a]) + t->vaas[a] = enumerate_fts (a, t->axes[a]); + else + { + struct var_array *va = xmalloc (sizeof *va); + *va = (struct var_array) { .n = 0 }; + t->vaas[a] = (struct var_array2) { .vas = va, .n = 1 }; + } + + for (size_t i = 0; i < t->vaas[t->summary_axis].n; i++) + { + struct var_array *va = &t->vaas[t->summary_axis].vas[i]; + if (!va->n_summaries) + { + va->summaries = xmalloc (sizeof *va->summaries); + va->n_summaries = 1; + + enum ctables_summary_function function + = va->summary_var ? CTSF_MEAN : CTSF_COUNT; + struct ctables_var var = { .is_mrset = false, .var = va->summary_var }; + + *va->summaries = (struct ctables_summary_spec) { + .function = function, + .format = ctables_summary_default_format (function, &var), + .label = ctables_summary_default_label (function, 0), + }; + if (!va->summary_var) + va->summary_var = va->vars[0]; + } + } + } + + struct casereader *input = casereader_create_filter_weight (proc_open (ds), + dataset_dict (ds), + NULL, NULL); + bool warn_on_invalid = true; + for (struct ccase *c = casereader_read (input); c; + case_unref (c), c = casereader_read (input)) + { + double weight = dict_get_case_weight (dataset_dict (ds), c, + &warn_on_invalid); + + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = ct->tables[i]; + + for (size_t ir = 0; ir < t->vaas[PIVOT_AXIS_ROW].n; ir++) + for (size_t ic = 0; ic < t->vaas[PIVOT_AXIS_COLUMN].n; ic++) + for (size_t il = 0; il < t->vaas[PIVOT_AXIS_LAYER].n; il++) + ctables_freqtab_insert (t, c, ir, ic, il, weight); + } + } + casereader_destroy (input); + + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = ct->tables[i]; + + struct pivot_table *pt = pivot_table_create (N_("Custom Tables")); + pivot_table_set_look (pt, ct->look); + struct pivot_dimension *d[PIVOT_N_AXES]; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + static const char *names[] = { + [PIVOT_AXIS_ROW] = N_("Rows"), + [PIVOT_AXIS_COLUMN] = N_("Columns"), + [PIVOT_AXIS_LAYER] = N_("Layers"), + }; + d[a] = (t->axes[a] || a == t->summary_axis + ? pivot_dimension_create (pt, a, names[a]) + : NULL); + if (!d[a]) + continue; + + assert (t->axes[a]); + + struct ctables_freq **sorted = xnmalloc (t->ft.count, sizeof *sorted); + + struct ctables_freq *f; + size_t n = 0; + HMAP_FOR_EACH (f, struct ctables_freq, node, &t->ft) + sorted[n++] = f; + assert (n == t->ft.count); + + struct ctables_freq_sort_aux aux = { .t = t, .a = a }; + sort (sorted, n, sizeof *sorted, ctables_freq_compare_3way, &aux); + + size_t max_depth = 0; + for (size_t j = 0; j < t->vaas[a].n; j++) + if (t->vaas[a].vas[j].n > max_depth) + max_depth = t->vaas[a].vas[j].n; + + struct pivot_category **groups = xnmalloc (max_depth, sizeof *groups); + struct pivot_category *top = NULL; + int prev_leaf = 0; + for (size_t j = 0; j < n; j++) + { + struct ctables_freq *f = sorted[j]; + const struct var_array *va = &t->vaas[a].vas[f->axes[a].vaa_idx]; + + size_t n_common = 0; + bool new_subtable = false; + if (j > 0) + { + struct ctables_freq *prev = sorted[j - 1]; + if (prev->axes[a].vaa_idx == f->axes[a].vaa_idx) + { + for (; n_common < va->n; n_common++) + if (!value_equal (&prev->axes[a].values[n_common], + &f->axes[a].values[n_common], + var_get_type (va->vars[n_common]))) + break; + } + else + new_subtable = true; + } + else + new_subtable = true; + + if (new_subtable) + { + enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (va->vars[0])]; + top = d[a]->root; + if (vlabel != CTVL_NONE) + top = pivot_category_create_group__ ( + top, pivot_value_new_variable (va->vars[0])); + } + if (n_common == va->n) + { + f->axes[a].leaf = prev_leaf; + continue; + } + + for (size_t k = n_common; k < va->n; k++) + { + struct pivot_category *parent = k > 0 ? groups[k - 1] : top; + + struct pivot_value *label = pivot_value_new_var_value ( + va->vars[k], &f->axes[a].values[k]); + + if (k == va->n - 1) + { + if (a == t->summary_axis) + { + parent = pivot_category_create_group__ (parent, label); + for (size_t m = 0; m < va->n_summaries; m++) + { + int leaf = pivot_category_create_leaf ( + parent, pivot_value_new_text (va->summaries[m].label)); + if (m == 0) + prev_leaf = leaf; + } + } + else + prev_leaf = pivot_category_create_leaf (parent, label); + break; + } + + parent = pivot_category_create_group__ (parent, label); + + enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (va->vars[k + 1])]; + if (vlabel != CTVL_NONE) + parent = pivot_category_create_group__ ( + parent, pivot_value_new_variable (va->vars[k + 1])); + groups[k] = parent; + } + + f->axes[a].leaf = prev_leaf; + } + free (sorted); + free (groups); + } + struct ctables_freq *f; + HMAP_FOR_EACH (f, struct ctables_freq, node, &t->ft) + { + size_t dindexes[3]; + size_t n_dindexes = 0; + + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + if (d[a]) + dindexes[n_dindexes++] = f->axes[a].leaf; + pivot_table_put (pt, dindexes, n_dindexes, + pivot_value_new_number (f->count)); + } + + pivot_table_submit (pt); + } + +#if 0 + for (size_t i = 0; i < ct->n_tables; i++) + { + struct ctables_table *t = ct->tables[i]; + + for (size_t j = 0; j < t->n_fts; j++) + { + struct ctables_freqtab *ft = t->fts[j]; + struct ctables_freq *f, *next; + HMAP_FOR_EACH_SAFE (f, next, struct ctables_freq, node, &ft->data) + { + hmap_delete (&ft->data, &f->node); + for (size_t k = 0; k < ft->n_summaries; k++) + ctables_summary_uninit (&f->summaries[k], &ft->summaries[k]); + free (f->summaries); + for (size_t k = 0; k < ft->vars.n; k++) + { + const struct variable *var = ft->vars.vars[k]; + value_destroy (&f->values[k], var_get_width (var)); + } + free (f); + } + hmap_destroy (&ft->data); + var_array_uninit (&ft->vars); + free (ft); + } + free (t->fts); + } +#endif + + return proc_commit (ds); +} + int cmd_ctables (struct lexer *lexer, struct dataset *ds) { size_t n_vars = dict_get_n_vars (dataset_dict (ds)); enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels); + enum settings_value_show tvars = settings_get_show_variables (); for (size_t i = 0; i < n_vars; i++) - vlabels[i] = CTVL_DEFAULT; + vlabels[i] = (enum ctables_vlabel) tvars; struct ctables *ct = xmalloc (sizeof *ct); *ct = (struct ctables) { @@ -1267,6 +2204,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) .vlabels = vlabels, .hide_threshold = 5, }; + ct->look->omit_empty = false; if (!lex_force_match (lexer, T_SLASH)) goto error; @@ -1382,7 +2320,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) enum ctables_vlabel vlabel; if (lex_match_id (lexer, "DEFAULT")) - vlabel = CTVL_DEFAULT; + vlabel = (enum ctables_vlabel) settings_get_show_variables (); else if (lex_match_id (lexer, "NAME")) vlabel = CTVL_NAME; else if (lex_match_id (lexer, "LABEL")) @@ -1462,8 +2400,9 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) ct->tables = x2nrealloc (ct->tables, &allocated_tables, sizeof *ct->tables); - struct ctables_table *t = &ct->tables[ct->n_tables++]; + struct ctables_table *t = xmalloc (sizeof *t); *t = (struct ctables_table) { + .ft = HMAP_INITIALIZER (t->ft), .slabels_position = PIVOT_AXIS_COLUMN, .slabels_visible = true, .row_labels = CTLP_NORMAL, @@ -1473,6 +2412,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) .n_categories = dict_get_n_vars (dataset_dict (ds)), .cilevel = 95, }; + ct->tables[ct->n_tables++] = t; lex_match (lexer, T_EQUALS); if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW)) @@ -1500,33 +2440,63 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) const struct ctables_axis *scales[PIVOT_N_AXES]; size_t n_scales = 0; - for (size_t i = 0; i < 3; i++) + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) { - scales[i] = find_scale (t->axes[i]); - if (scales[i]) + scales[a] = find_scale (t->axes[a]); + if (scales[a]) n_scales++; } if (n_scales > 1) { - msg (SE, _("Scale variables may appear only on one dimension.")); + msg (SE, _("Scale variables may appear only on one axis.")); if (scales[PIVOT_AXIS_ROW]) msg_at (SN, scales[PIVOT_AXIS_ROW]->loc, - _("This scale variable appears in the rows dimension.")); + _("This scale variable appears on the rows axis.")); if (scales[PIVOT_AXIS_COLUMN]) msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc, - _("This scale variable appears in the columns dimension.")); + _("This scale variable appears on the columns axis.")); if (scales[PIVOT_AXIS_LAYER]) msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc, - _("This scale variable appears in the layer dimension.")); + _("This scale variable appears on the layer axis.")); goto error; } + const struct ctables_axis *summaries[PIVOT_N_AXES]; + size_t n_summaries = 0; + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + { + summaries[a] = (scales[a] + ? scales[a] + : find_categorical_summary_spec (t->axes[a])); + if (summaries[a]) + n_summaries++; + } + if (n_summaries > 1) + { + msg (SE, _("Summaries may appear only on one axis.")); + if (summaries[PIVOT_AXIS_ROW]) + msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc, + _("This variable on the rows axis has a summary.")); + if (summaries[PIVOT_AXIS_COLUMN]) + msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc, + _("This variable on the columns axis has a summary.")); + if (summaries[PIVOT_AXIS_LAYER]) + msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc, + _("This variable on the layers axis has a summary.")); + goto error; + } + for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++) + if (n_summaries ? summaries[a] : t->axes[a]) + { + t->summary_axis = a; + break; + } + if (lex_token (lexer) == T_ENDCMD) break; if (!lex_force_match (lexer, T_SLASH)) break; - /* XXX Validate axes. */ while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD) { if (lex_match_id (lexer, "SLABELS")) @@ -1857,10 +2827,19 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) goto error; } } + + if (t->row_labels != CTLP_NORMAL && t->col_labels != CTLP_NORMAL) + { + msg (SE, _("ROWLABELS and COLLABELS may not both be specified.")); + goto error; + } + } while (lex_token (lexer) != T_ENDCMD); + + bool ok = ctables_execute (ds, ct); ctables_destroy (ct); - return CMD_SUCCESS; + return ok ? CMD_SUCCESS : CMD_FAILURE; error: ctables_destroy (ct);