\f
/* CTABLES postcompute expressions. */
+struct ctables_postcompute
+ {
+ struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
+ char *name; /* Name, without leading &. */
+
+ struct msg_location *location; /* Location of definition. */
+ struct ctables_pcexpr *expr;
+ char *label;
+ struct ctables_summary_spec_set *specs;
+ bool hide_source_cats;
+ };
+
struct ctables_pcexpr
{
/* Precedence table:
}
return ctables_find_category_for_postcompute__ (cats, e);
}
-\f
-/* CTABLES variable nesting and stacking. */
-
-/* A nested sequence of variables, e.g. a > b > c. */
-struct ctables_nest
- {
- struct variable **vars;
- size_t n;
- size_t scale_idx;
- size_t summary_idx;
- size_t *areas[N_CTATS];
- size_t n_areas[N_CTATS];
- size_t group_head;
-
- struct ctables_summary_spec_set specs[N_CSVS];
- };
-
-/* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
-struct ctables_stack
- {
- struct ctables_nest *nests;
- size_t n;
- };
-static void
-ctables_nest_uninit (struct ctables_nest *nest)
+static struct substring
+rtrim_value (const union value *v, const struct variable *var)
{
- free (nest->vars);
- for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
- ctables_summary_spec_set_uninit (&nest->specs[sv]);
- for (enum ctables_area_type at = 0; at < N_CTATS; at++)
- free (nest->areas[at]);
+ struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
+ var_get_width (var));
+ ss_rtrim (&s, ss_cstr (" "));
+ return s;
}
-static void
-ctables_stack_uninit (struct ctables_stack *stack)
+static bool
+in_string_range (const union value *v, const struct variable *var,
+ const struct substring *srange)
{
- if (stack)
- {
- for (size_t i = 0; i < stack->n; i++)
- ctables_nest_uninit (&stack->nests[i]);
- free (stack->nests);
- }
+ struct substring s = rtrim_value (v, var);
+ return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
+ && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
}
-static struct ctables_stack
-nest_fts (struct ctables_stack s0, struct ctables_stack s1)
+static const struct ctables_category *
+ctables_categories_match (const struct ctables_categories *c,
+ const union value *v, const struct variable *var)
{
- if (!s0.n)
- return s1;
- else if (!s1.n)
- return s0;
-
- struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
- for (size_t i = 0; i < s0.n; i++)
- for (size_t j = 0; j < s1.n; j++)
- {
- const struct ctables_nest *a = &s0.nests[i];
- const struct ctables_nest *b = &s1.nests[j];
+ if (var_is_numeric (var) && v->f == SYSMIS)
+ return NULL;
- size_t allocate = a->n + b->n;
- struct variable **vars = xnmalloc (allocate, sizeof *vars);
- size_t n = 0;
- for (size_t k = 0; k < a->n; k++)
- vars[n++] = a->vars[k];
- for (size_t k = 0; k < b->n; k++)
- vars[n++] = b->vars[k];
- assert (n == allocate);
+ const struct ctables_category *othernm = NULL;
+ for (size_t i = c->n_cats; i-- > 0; )
+ {
+ const struct ctables_category *cat = &c->cats[i];
+ switch (cat->type)
+ {
+ case CCT_NUMBER:
+ if (cat->number == v->f)
+ return cat;
+ break;
- const struct ctables_nest *summary_src;
- if (!a->specs[CSV_CELL].var)
- summary_src = b;
- else if (!b->specs[CSV_CELL].var)
- summary_src = a;
- else
- NOT_REACHED ();
+ case CCT_STRING:
+ if (ss_equals (cat->string, rtrim_value (v, var)))
+ return cat;
+ break;
- struct ctables_nest *new = &stack.nests[stack.n++];
- *new = (struct ctables_nest) {
- .vars = vars,
- .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
- : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
- : SIZE_MAX),
- .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
- : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
- : SIZE_MAX),
- .n = n,
- };
- for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
- ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
- }
- ctables_stack_uninit (&s0);
- ctables_stack_uninit (&s1);
- return stack;
-}
+ case CCT_NRANGE:
+ if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
+ && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
+ return cat;
+ break;
-static struct ctables_stack
-stack_fts (struct ctables_stack s0, struct ctables_stack s1)
-{
- struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
- for (size_t i = 0; i < s0.n; i++)
- stack.nests[stack.n++] = s0.nests[i];
- for (size_t i = 0; i < s1.n; i++)
- {
- stack.nests[stack.n] = s1.nests[i];
- stack.nests[stack.n].group_head += s0.n;
- stack.n++;
- }
- assert (stack.n == s0.n + s1.n);
- free (s0.nests);
- free (s1.nests);
- return stack;
-}
+ case CCT_SRANGE:
+ if (in_string_range (v, var, cat->srange))
+ return cat;
+ break;
-static struct ctables_stack
-var_fts (const struct ctables_axis *a)
-{
- struct variable **vars = xmalloc (sizeof *vars);
- *vars = a->var;
+ case CCT_MISSING:
+ if (var_is_value_missing (var, v))
+ return cat;
+ break;
- bool is_summary = a->specs[CSV_CELL].n || a->scale;
- struct ctables_nest *nest = xmalloc (sizeof *nest);
- *nest = (struct ctables_nest) {
- .vars = vars,
- .n = 1,
- .scale_idx = a->scale ? 0 : SIZE_MAX,
- .summary_idx = is_summary ? 0 : SIZE_MAX,
- };
- if (is_summary)
- for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
- {
- ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
- nest->specs[sv].var = a->var;
- nest->specs[sv].is_scale = a->scale;
- }
- return (struct ctables_stack) { .nests = nest, .n = 1 };
-}
+ case CCT_POSTCOMPUTE:
+ break;
-static struct ctables_stack
-enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
-{
- if (!a)
- return (struct ctables_stack) { .n = 0 };
+ case CCT_OTHERNM:
+ if (!othernm)
+ othernm = cat;
+ break;
- switch (a->op)
- {
- case CTAO_VAR:
- return var_fts (a);
+ case CCT_SUBTOTAL:
+ case CCT_TOTAL:
+ break;
- case CTAO_STACK:
- return stack_fts (enumerate_fts (axis_type, a->subs[0]),
- enumerate_fts (axis_type, a->subs[1]));
+ case CCT_VALUE:
+ case CCT_LABEL:
+ case CCT_FUNCTION:
+ return (cat->include_missing || !var_is_value_missing (var, v) ? cat
+ : NULL);
- case CTAO_NEST:
- /* This should consider any of the scale variables found in the result to
- be linked to each other listwise for SMISSING=LISTWISE. */
- return nest_fts (enumerate_fts (axis_type, a->subs[0]),
- enumerate_fts (axis_type, a->subs[1]));
+ case CCT_EXCLUDED_MISSING:
+ break;
+ }
}
- NOT_REACHED ();
+ return var_is_value_missing (var, v) ? NULL : othernm;
}
-\f
-\f
-/* CTABLES summary calculation. */
-
-union ctables_summary
- {
- /* COUNT, VALIDN, TOTALN. */
- double count;
-
- /* MINIMUM, MAXIMUM, RANGE. */
- struct
- {
- double min;
- double max;
- };
-
- /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
- struct moments1 *moments;
- /* MEDIAN, MODE, PTILE. */
- struct
- {
- struct casewriter *writer;
- double ovalid;
- double ovalue;
- };
- };
+static const struct ctables_category *
+ctables_categories_total (const struct ctables_categories *c)
+{
+ const struct ctables_category *first = &c->cats[0];
+ const struct ctables_category *last = &c->cats[c->n_cats - 1];
+ return (first->type == CCT_TOTAL ? first
+ : last->type == CCT_TOTAL ? last
+ : NULL);
+}
static void
-ctables_summary_init (union ctables_summary *s,
- const struct ctables_summary_spec *ss)
+ctables_category_format_number (double number, const struct variable *var,
+ struct string *s)
{
- switch (ss->function)
- {
- case CTSF_COUNT:
- case CTSF_areaPCT_COUNT:
- case CTSF_areaPCT_VALIDN:
- case CTSF_areaPCT_TOTALN:
- case CTSF_MISSING:
- case CTSF_TOTALN:
- case CTSF_VALIDN:
- s->count = 0;
- break;
-
- case CTSF_areaID:
- break;
-
- case CTSF_MAXIMUM:
- case CTSF_MINIMUM:
- case CTSF_RANGE:
- s->min = s->max = SYSMIS;
- break;
-
- case CTSF_MEAN:
- case CTSF_SUM:
- case CTSF_areaPCT_SUM:
- s->moments = moments1_create (MOMENT_MEAN);
- break;
-
- case CTSF_SEMEAN:
- case CTSF_STDDEV:
- case CTSF_VARIANCE:
- s->moments = moments1_create (MOMENT_VARIANCE);
- break;
-
- case CTSF_MEDIAN:
- case CTSF_MODE:
- case CTSF_PTILE:
- {
- struct caseproto *proto = caseproto_create ();
- proto = caseproto_add_width (proto, 0);
- proto = caseproto_add_width (proto, 0);
-
- struct subcase ordering;
- subcase_init (&ordering, 0, 0, SC_ASCEND);
- s->writer = sort_create_writer (&ordering, proto);
- subcase_uninit (&ordering);
- caseproto_unref (proto);
-
- s->ovalid = 0;
- s->ovalue = SYSMIS;
- }
- break;
- }
+ struct pivot_value *pv = pivot_value_new_var_value (
+ var, &(union value) { .f = number });
+ pivot_value_format (pv, NULL, s);
+ pivot_value_destroy (pv);
}
static void
-ctables_summary_uninit (union ctables_summary *s,
- const struct ctables_summary_spec *ss)
+ctables_category_format_string (struct substring string,
+ const struct variable *var, struct string *out)
{
- switch (ss->function)
- {
- case CTSF_COUNT:
- case CTSF_areaPCT_COUNT:
- case CTSF_areaPCT_VALIDN:
- case CTSF_areaPCT_TOTALN:
- case CTSF_MISSING:
- case CTSF_TOTALN:
- case CTSF_VALIDN:
- break;
-
- case CTSF_areaID:
- break;
-
- case CTSF_MAXIMUM:
- case CTSF_MINIMUM:
- case CTSF_RANGE:
- break;
-
- case CTSF_MEAN:
- case CTSF_SEMEAN:
- case CTSF_STDDEV:
- case CTSF_SUM:
- case CTSF_VARIANCE:
- case CTSF_areaPCT_SUM:
- moments1_destroy (s->moments);
- break;
-
- case CTSF_MEDIAN:
- case CTSF_MODE:
- case CTSF_PTILE:
- casewriter_destroy (s->writer);
- break;
- }
+ int width = var_get_width (var);
+ char *s = xmalloc (width);
+ buf_copy_rpad (s, width, string.string, string.length, ' ');
+ struct pivot_value *pv = pivot_value_new_var_value (
+ var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
+ pivot_value_format (pv, NULL, out);
+ pivot_value_destroy (pv);
+ free (s);
}
-static void
-ctables_summary_add (union ctables_summary *s,
- const struct ctables_summary_spec *ss,
- const union value *value,
- bool is_missing, bool is_included,
- double weight)
+static bool
+ctables_category_format_label (const struct ctables_category *cat,
+ const struct variable *var,
+ struct string *s)
{
- /* To determine whether a case is included in a given table for a particular
- kind of summary, consider the following charts for the variable being
- summarized. Only if "yes" appears is the case counted.
-
- Categorical variables: VALIDN other TOTALN
- Valid values in included categories yes yes yes
- Missing values in included categories --- yes yes
- Missing values in excluded categories --- --- yes
- Valid values in excluded categories --- --- ---
-
- Scale variables: VALIDN other TOTALN
- Valid value yes yes yes
- Missing value --- yes yes
-
- Missing values include both user- and system-missing. (The system-missing
- value is always in an excluded category.)
-
- One way to interpret the above table is that scale variables are like
- categorical variables in which all values are in included categories.
- */
- switch (ss->function)
+ switch (cat->type)
{
- case CTSF_TOTALN:
- case CTSF_areaPCT_TOTALN:
- s->count += weight;
- break;
-
- case CTSF_COUNT:
- case CTSF_areaPCT_COUNT:
- if (is_included)
- s->count += weight;
- break;
+ case CCT_NUMBER:
+ ctables_category_format_number (cat->number, var, s);
+ return true;
- case CTSF_VALIDN:
- case CTSF_areaPCT_VALIDN:
- if (!is_missing)
- s->count += weight;
- break;
+ case CCT_STRING:
+ ctables_category_format_string (cat->string, var, s);
+ return true;
- case CTSF_areaID:
- break;
+ case CCT_NRANGE:
+ ctables_category_format_number (cat->nrange[0], var, s);
+ ds_put_format (s, " THRU ");
+ ctables_category_format_number (cat->nrange[1], var, s);
+ return true;
- case CTSF_MISSING:
- if (is_missing)
- s->count += weight;
- break;
+ case CCT_SRANGE:
+ ctables_category_format_string (cat->srange[0], var, s);
+ ds_put_format (s, " THRU ");
+ ctables_category_format_string (cat->srange[1], var, s);
+ return true;
- case CTSF_MAXIMUM:
- case CTSF_MINIMUM:
- case CTSF_RANGE:
- if (!is_missing)
- {
- if (s->min == SYSMIS || value->f < s->min)
- s->min = value->f;
- if (s->max == SYSMIS || value->f > s->max)
- s->max = value->f;
- }
- break;
+ case CCT_MISSING:
+ ds_put_cstr (s, "MISSING");
+ return true;
- case CTSF_MEAN:
- case CTSF_SEMEAN:
- case CTSF_STDDEV:
- case CTSF_SUM:
- case CTSF_VARIANCE:
- if (!is_missing)
- moments1_add (s->moments, value->f, weight);
- break;
+ case CCT_OTHERNM:
+ ds_put_cstr (s, "OTHERNM");
+ return true;
- case CTSF_areaPCT_SUM:
- if (!is_missing)
- moments1_add (s->moments, value->f, weight);
- break;
+ case CCT_POSTCOMPUTE:
+ ds_put_format (s, "&%s", cat->pc->name);
+ return true;
- case CTSF_MEDIAN:
- case CTSF_MODE:
- case CTSF_PTILE:
- if (!is_missing)
- {
- s->ovalid += weight;
+ case CCT_TOTAL:
+ case CCT_SUBTOTAL:
+ ds_put_cstr (s, cat->total_label);
+ return true;
- struct ccase *c = case_create (casewriter_get_proto (s->writer));
- *case_num_rw_idx (c, 0) = value->f;
- *case_num_rw_idx (c, 1) = weight;
- casewriter_write (s->writer, c);
- }
- break;
+ case CCT_VALUE:
+ case CCT_LABEL:
+ case CCT_FUNCTION:
+ case CCT_EXCLUDED_MISSING:
+ return false;
}
+
+ return false;
}
-static double
-ctables_summary_value (struct ctables_area *areas[N_CTATS],
- union ctables_summary *s,
- const struct ctables_summary_spec *ss)
+static bool
+ctables_recursive_check_postcompute (struct dictionary *dict,
+ const struct ctables_pcexpr *e,
+ struct ctables_category *pc_cat,
+ const struct ctables_categories *cats,
+ const struct msg_location *cats_location)
{
- switch (ss->function)
+ switch (e->op)
{
- case CTSF_COUNT:
- return s->count;
-
- case CTSF_areaID:
- return areas[ss->calc_area]->sequence;
-
- case CTSF_areaPCT_COUNT:
+ case CTPO_CAT_NUMBER:
+ case CTPO_CAT_STRING:
+ case CTPO_CAT_NRANGE:
+ case CTPO_CAT_SRANGE:
+ case CTPO_CAT_MISSING:
+ case CTPO_CAT_OTHERNM:
+ case CTPO_CAT_SUBTOTAL:
+ case CTPO_CAT_TOTAL:
{
- const struct ctables_area *a = areas[ss->calc_area];
- double a_count = a->count[ss->weighting];
- return a_count ? s->count / a_count * 100 : SYSMIS;
- }
-
- case CTSF_areaPCT_VALIDN:
- {
- const struct ctables_area *a = areas[ss->calc_area];
- double a_valid = a->valid[ss->weighting];
- return a_valid ? s->count / a_valid * 100 : SYSMIS;
- }
-
- case CTSF_areaPCT_TOTALN:
- {
- const struct ctables_area *a = areas[ss->calc_area];
- double a_total = a->total[ss->weighting];
- return a_total ? s->count / a_total * 100 : SYSMIS;
- }
-
- case CTSF_MISSING:
- case CTSF_TOTALN:
- case CTSF_VALIDN:
- return s->count;
-
- case CTSF_MAXIMUM:
- return s->max;
-
- case CTSF_MINIMUM:
- return s->min;
-
- case CTSF_RANGE:
- return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
-
- case CTSF_MEAN:
- {
- double mean;
- moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
- return mean;
- }
-
- case CTSF_SEMEAN:
- {
- double weight, variance;
- moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
- return calc_semean (variance, weight);
- }
+ struct ctables_category *cat = ctables_find_category_for_postcompute (
+ dict, cats, pc_cat->parse_format, e);
+ if (!cat)
+ {
+ if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
+ {
+ size_t n_subtotals = 0;
+ for (size_t i = 0; i < cats->n_cats; i++)
+ n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
+ if (n_subtotals > 1)
+ {
+ msg_at (SE, cats_location,
+ ngettext ("These categories include %zu instance "
+ "of SUBTOTAL or HSUBTOTAL, so references "
+ "from computed categories must refer to "
+ "subtotals by position, "
+ "e.g. SUBTOTAL[1].",
+ "These categories include %zu instances "
+ "of SUBTOTAL or HSUBTOTAL, so references "
+ "from computed categories must refer to "
+ "subtotals by position, "
+ "e.g. SUBTOTAL[1].",
+ n_subtotals),
+ n_subtotals);
+ msg_at (SN, e->location,
+ _("This is the reference that lacks a position."));
+ return NULL;
+ }
+ }
- case CTSF_STDDEV:
- {
- double variance;
- moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
- return variance != SYSMIS ? sqrt (variance) : SYSMIS;
+ msg_at (SE, pc_cat->location,
+ _("Computed category &%s references a category not included "
+ "in the category list."),
+ pc_cat->pc->name);
+ msg_at (SN, e->location, _("This is the missing category."));
+ if (e->op == CTPO_CAT_SUBTOTAL)
+ msg_at (SN, cats_location,
+ _("To fix the problem, add subtotals to the "
+ "list of categories here."));
+ else if (e->op == CTPO_CAT_TOTAL)
+ msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
+ "CATEGORIES specification."));
+ else
+ msg_at (SN, cats_location,
+ _("To fix the problem, add the missing category to the "
+ "list of categories here."));
+ return false;
+ }
+ if (pc_cat->pc->hide_source_cats)
+ cat->hide = true;
+ return true;
}
- case CTSF_SUM:
- {
- double weight, mean;
- moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
- return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
- }
+ case CTPO_CONSTANT:
+ return true;
- case CTSF_VARIANCE:
- {
- double variance;
- moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
- return variance;
- }
+ case CTPO_ADD:
+ case CTPO_SUB:
+ case CTPO_MUL:
+ case CTPO_DIV:
+ case CTPO_POW:
+ case CTPO_NEG:
+ for (size_t i = 0; i < 2; i++)
+ if (e->subs[i] && !ctables_recursive_check_postcompute (
+ dict, e->subs[i], pc_cat, cats, cats_location))
+ return false;
+ return true;
+ }
- case CTSF_areaPCT_SUM:
- {
- double weight, mean;
- moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
- if (weight == SYSMIS || mean == SYSMIS)
- return SYSMIS;
+ NOT_REACHED ();
+}
- const struct ctables_area *a = areas[ss->calc_area];
- const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
- double denom = sum->sum[ss->weighting];
- return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
- }
+static struct pivot_value *
+ctables_postcompute_label (const struct ctables_categories *cats,
+ const struct ctables_category *cat,
+ const struct variable *var)
+{
+ struct substring in = ss_cstr (cat->pc->label);
+ struct substring target = ss_cstr (")LABEL[");
- case CTSF_MEDIAN:
- case CTSF_PTILE:
- if (s->writer)
+ struct string out = DS_EMPTY_INITIALIZER;
+ for (;;)
+ {
+ size_t chunk = ss_find_substring (in, target);
+ if (chunk == SIZE_MAX)
{
- struct casereader *reader = casewriter_make_reader (s->writer);
- s->writer = NULL;
-
- struct percentile *ptile = percentile_create (
- ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
- struct order_stats *os = &ptile->parent;
- order_stats_accumulate_idx (&os, 1, reader, 1, 0);
- s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
- statistic_destroy (&ptile->parent.parent);
+ if (ds_is_empty (&out))
+ return pivot_value_new_user_text (in.string, in.length);
+ else
+ {
+ ds_put_substring (&out, in);
+ return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
+ }
}
- return s->ovalue;
- case CTSF_MODE:
- if (s->writer)
- {
- struct casereader *reader = casewriter_make_reader (s->writer);
- s->writer = NULL;
+ ds_put_substring (&out, ss_head (in, chunk));
+ ss_advance (&in, chunk + target.length);
- struct mode *mode = mode_create ();
- struct order_stats *os = &mode->parent;
- order_stats_accumulate_idx (&os, 1, reader, 1, 0);
- s->ovalue = mode->mode;
- statistic_destroy (&mode->parent.parent);
- }
- return s->ovalue;
+ struct substring idx_s;
+ if (!ss_get_until (&in, ']', &idx_s))
+ goto error;
+ char *tail;
+ long int idx = strtol (idx_s.string, &tail, 10);
+ if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
+ goto error;
+
+ struct ctables_category *cat2 = &cats->cats[idx - 1];
+ if (!ctables_category_format_label (cat2, var, &out))
+ goto error;
}
- NOT_REACHED ();
+error:
+ ds_destroy (&out);
+ return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
+}
+
+static struct pivot_value *
+ctables_category_create_value_label (const struct ctables_categories *cats,
+ const struct ctables_category *cat,
+ const struct variable *var,
+ const union value *value)
+{
+ return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
+ ? ctables_postcompute_label (cats, cat, var)
+ : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
+ ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
+ : pivot_value_new_var_value (var, value));
}
\f
-enum ctables_vlabel
- {
- CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
- CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
- CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
- CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
- };
+/* CTABLES variable nesting and stacking. */
-struct ctables_cell
+/* A nested sequence of variables, e.g. a > b > c. */
+struct ctables_nest
{
- /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
- all the axes (except the scalar variable, if any). */
- struct hmap_node node;
-
- /* The areas that contain this cell. */
- uint32_t omit_areas;
- struct ctables_area *areas[N_CTATS];
-
- bool hide;
-
- bool postcompute;
- enum ctables_summary_variant sv;
-
- struct ctables_cell_axis
- {
- struct ctables_cell_value
- {
- const struct ctables_category *category;
- union value value;
- }
- *cvs;
- int leaf;
- }
- axes[PIVOT_N_AXES];
+ struct variable **vars;
+ size_t n;
+ size_t scale_idx;
+ size_t summary_idx;
+ size_t *areas[N_CTATS];
+ size_t n_areas[N_CTATS];
+ size_t group_head;
- union ctables_summary *summaries;
+ struct ctables_summary_spec_set specs[N_CSVS];
};
-struct ctables_postcompute
+/* A stack of nestings, e.g. nest1 + nest2 + ... + nestN. */
+struct ctables_stack
{
- struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
- char *name; /* Name, without leading &. */
-
- struct msg_location *location; /* Location of definition. */
- struct ctables_pcexpr *expr;
- char *label;
- struct ctables_summary_spec_set *specs;
- bool hide_source_cats;
+ struct ctables_nest *nests;
+ size_t n;
};
-struct ctables
- {
- const struct dictionary *dict;
- struct pivot_table_look *look;
+static void
+ctables_nest_uninit (struct ctables_nest *nest)
+{
+ free (nest->vars);
+ for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
+ ctables_summary_spec_set_uninit (&nest->specs[sv]);
+ for (enum ctables_area_type at = 0; at < N_CTATS; at++)
+ free (nest->areas[at]);
+}
- /* For CTEF_* formats. */
- struct fmt_settings ctables_formats;
+static void
+ctables_stack_uninit (struct ctables_stack *stack)
+{
+ if (stack)
+ {
+ for (size_t i = 0; i < stack->n; i++)
+ ctables_nest_uninit (&stack->nests[i]);
+ free (stack->nests);
+ }
+}
- /* If this is NULL, zeros are displayed using the normal print format.
- Otherwise, this string is displayed. */
- char *zero;
+static struct ctables_stack
+nest_fts (struct ctables_stack s0, struct ctables_stack s1)
+{
+ if (!s0.n)
+ return s1;
+ else if (!s1.n)
+ return s0;
- /* If this is NULL, missing values are displayed using the normal print
- format. Otherwise, this string is displayed. */
- char *missing;
+ struct ctables_stack stack = { .nests = xnmalloc (s0.n, s1.n * sizeof *stack.nests) };
+ for (size_t i = 0; i < s0.n; i++)
+ for (size_t j = 0; j < s1.n; j++)
+ {
+ const struct ctables_nest *a = &s0.nests[i];
+ const struct ctables_nest *b = &s1.nests[j];
- /* Indexed by variable dictionary index. */
- enum ctables_vlabel *vlabels;
+ size_t allocate = a->n + b->n;
+ struct variable **vars = xnmalloc (allocate, sizeof *vars);
+ size_t n = 0;
+ for (size_t k = 0; k < a->n; k++)
+ vars[n++] = a->vars[k];
+ for (size_t k = 0; k < b->n; k++)
+ vars[n++] = b->vars[k];
+ assert (n == allocate);
- struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
+ const struct ctables_nest *summary_src;
+ if (!a->specs[CSV_CELL].var)
+ summary_src = b;
+ else if (!b->specs[CSV_CELL].var)
+ summary_src = a;
+ else
+ NOT_REACHED ();
- bool mrsets_count_duplicates; /* MRSETS. */
- bool smissing_listwise; /* SMISSING. */
- struct variable *e_weight; /* WEIGHT. */
- int hide_threshold; /* HIDESMALLCOUNTS. */
+ struct ctables_nest *new = &stack.nests[stack.n++];
+ *new = (struct ctables_nest) {
+ .vars = vars,
+ .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
+ : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
+ : SIZE_MAX),
+ .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
+ : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
+ : SIZE_MAX),
+ .n = n,
+ };
+ for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
+ ctables_summary_spec_set_clone (&new->specs[sv], &summary_src->specs[sv]);
+ }
+ ctables_stack_uninit (&s0);
+ ctables_stack_uninit (&s1);
+ return stack;
+}
- struct ctables_table **tables;
- size_t n_tables;
- };
+static struct ctables_stack
+stack_fts (struct ctables_stack s0, struct ctables_stack s1)
+{
+ struct ctables_stack stack = { .nests = xnmalloc (s0.n + s1.n, sizeof *stack.nests) };
+ for (size_t i = 0; i < s0.n; i++)
+ stack.nests[stack.n++] = s0.nests[i];
+ for (size_t i = 0; i < s1.n; i++)
+ {
+ stack.nests[stack.n] = s1.nests[i];
+ stack.nests[stack.n].group_head += s0.n;
+ stack.n++;
+ }
+ assert (stack.n == s0.n + s1.n);
+ free (s0.nests);
+ free (s1.nests);
+ return stack;
+}
-struct ctables_value
- {
- struct hmap_node node;
- union value value;
- int leaf;
- };
+static struct ctables_stack
+var_fts (const struct ctables_axis *a)
+{
+ struct variable **vars = xmalloc (sizeof *vars);
+ *vars = a->var;
-struct ctables_occurrence
- {
- struct hmap_node node;
- union value value;
+ bool is_summary = a->specs[CSV_CELL].n || a->scale;
+ struct ctables_nest *nest = xmalloc (sizeof *nest);
+ *nest = (struct ctables_nest) {
+ .vars = vars,
+ .n = 1,
+ .scale_idx = a->scale ? 0 : SIZE_MAX,
+ .summary_idx = is_summary ? 0 : SIZE_MAX,
};
+ if (is_summary)
+ for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
+ {
+ ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
+ nest->specs[sv].var = a->var;
+ nest->specs[sv].is_scale = a->scale;
+ }
+ return (struct ctables_stack) { .nests = nest, .n = 1 };
+}
-struct ctables_section
- {
- /* Settings. */
- struct ctables_table *table;
- struct ctables_nest *nests[PIVOT_N_AXES];
-
- /* Data. */
- struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
- struct hmap cells; /* Contains "struct ctables_cell"s. */
- struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
- };
+static struct ctables_stack
+enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
+{
+ if (!a)
+ return (struct ctables_stack) { .n = 0 };
-static void ctables_section_uninit (struct ctables_section *);
+ switch (a->op)
+ {
+ case CTAO_VAR:
+ return var_fts (a);
-struct ctables_table
- {
- struct ctables *ctables;
- struct ctables_axis *axes[PIVOT_N_AXES];
- struct ctables_stack stacks[PIVOT_N_AXES];
- struct ctables_section *sections;
- size_t n_sections;
- enum pivot_axis_type summary_axis;
- struct ctables_summary_spec_set summary_specs;
- struct variable **sum_vars;
- size_t n_sum_vars;
+ case CTAO_STACK:
+ return stack_fts (enumerate_fts (axis_type, a->subs[0]),
+ enumerate_fts (axis_type, a->subs[1]));
- enum pivot_axis_type slabels_axis;
- bool slabels_visible;
+ case CTAO_NEST:
+ /* This should consider any of the scale variables found in the result to
+ be linked to each other listwise for SMISSING=LISTWISE. */
+ return nest_fts (enumerate_fts (axis_type, a->subs[0]),
+ enumerate_fts (axis_type, a->subs[1]));
+ }
- /* The innermost category labels for axis 'a' appear on axis label_axis[a].
+ NOT_REACHED ();
+}
+\f
+/* CTABLES summary calculation. */
- Most commonly, label_axis[a] == a, and in particular we always have
- label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
+union ctables_summary
+ {
+ /* COUNT, VALIDN, TOTALN. */
+ double count;
- If ROWLABELS or COLLABELS is specified, then one of
- label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
- opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
+ /* MINIMUM, MAXIMUM, RANGE. */
+ struct
+ {
+ double min;
+ double max;
+ };
- If any category labels are moved, then 'clabels_example' is one of the
- variables being moved (and it is otherwise NULL). All of the variables
- being moved have the same width, value labels, and categories, so this
- example variable can be used to find those out.
+ /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
+ struct moments1 *moments;
- The remaining members in this group are relevant only if category labels
- are moved.
+ /* MEDIAN, MODE, PTILE. */
+ struct
+ {
+ struct casewriter *writer;
+ double ovalid;
+ double ovalue;
+ };
+ };
- 'clabels_values_map' holds a "struct ctables_value" for all the values
- that appear in all of the variables in the moved categories. It is
- accumulated as the data is read. Once the data is fully read, its
- sorted values are put into 'clabels_values' and 'n_clabels_values'.
- */
- enum pivot_axis_type label_axis[PIVOT_N_AXES];
- enum pivot_axis_type clabels_from_axis;
- enum pivot_axis_type clabels_to_axis;
- const struct variable *clabels_example;
- struct hmap clabels_values_map;
- struct ctables_value **clabels_values;
- size_t n_clabels_values;
+static void
+ctables_summary_init (union ctables_summary *s,
+ const struct ctables_summary_spec *ss)
+{
+ switch (ss->function)
+ {
+ case CTSF_COUNT:
+ case CTSF_areaPCT_COUNT:
+ case CTSF_areaPCT_VALIDN:
+ case CTSF_areaPCT_TOTALN:
+ case CTSF_MISSING:
+ case CTSF_TOTALN:
+ case CTSF_VALIDN:
+ s->count = 0;
+ break;
- /* Indexed by variable dictionary index. */
- struct ctables_categories **categories;
- size_t n_categories;
+ case CTSF_areaID:
+ break;
- double cilevel;
-
- char *caption;
- char *corner;
- char *title;
-
- struct ctables_chisq *chisq;
- struct ctables_pairwise *pairwise;
- };
+ case CTSF_MAXIMUM:
+ case CTSF_MINIMUM:
+ case CTSF_RANGE:
+ s->min = s->max = SYSMIS;
+ break;
-/* Chi-square test (SIGTEST). */
-struct ctables_chisq
- {
- double alpha;
- bool include_mrsets;
- bool all_visible;
- };
+ case CTSF_MEAN:
+ case CTSF_SUM:
+ case CTSF_areaPCT_SUM:
+ s->moments = moments1_create (MOMENT_MEAN);
+ break;
-/* Pairwise comparison test (COMPARETEST). */
-struct ctables_pairwise
- {
- enum { PROP, MEAN } type;
- double alpha[2];
- bool include_mrsets;
- bool meansvariance_allcats;
- bool all_visible;
- enum { BONFERRONI = 1, BH } adjust;
- bool merge;
- bool apa_style;
- bool show_sig;
- };
+ case CTSF_SEMEAN:
+ case CTSF_STDDEV:
+ case CTSF_VARIANCE:
+ s->moments = moments1_create (MOMENT_VARIANCE);
+ break;
+ case CTSF_MEDIAN:
+ case CTSF_MODE:
+ case CTSF_PTILE:
+ {
+ struct caseproto *proto = caseproto_create ();
+ proto = caseproto_add_width (proto, 0);
+ proto = caseproto_add_width (proto, 0);
+ struct subcase ordering;
+ subcase_init (&ordering, 0, 0, SC_ASCEND);
+ s->writer = sort_create_writer (&ordering, proto);
+ subcase_uninit (&ordering);
+ caseproto_unref (proto);
-static bool
-parse_col_width (struct lexer *lexer, const char *name, double *width)
-{
- lex_match (lexer, T_EQUALS);
- if (lex_match_id (lexer, "DEFAULT"))
- *width = SYSMIS;
- else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
- {
- *width = lex_number (lexer);
- lex_get (lexer);
+ s->ovalid = 0;
+ s->ovalue = SYSMIS;
+ }
+ break;
}
- else
- return false;
-
- return true;
}
-static bool
-parse_bool (struct lexer *lexer, bool *b)
+static void
+ctables_summary_uninit (union ctables_summary *s,
+ const struct ctables_summary_spec *ss)
{
- if (lex_match_id (lexer, "NO"))
- *b = false;
- else if (lex_match_id (lexer, "YES"))
- *b = true;
- else
+ switch (ss->function)
{
- lex_error_expecting (lexer, "YES", "NO");
- return false;
- }
- return true;
-}
+ case CTSF_COUNT:
+ case CTSF_areaPCT_COUNT:
+ case CTSF_areaPCT_VALIDN:
+ case CTSF_areaPCT_TOTALN:
+ case CTSF_MISSING:
+ case CTSF_TOTALN:
+ case CTSF_VALIDN:
+ break;
-static void
-ctables_chisq_destroy (struct ctables_chisq *chisq)
-{
- free (chisq);
-}
+ case CTSF_areaID:
+ break;
-static void
-ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
-{
- free (pairwise);
+ case CTSF_MAXIMUM:
+ case CTSF_MINIMUM:
+ case CTSF_RANGE:
+ break;
+
+ case CTSF_MEAN:
+ case CTSF_SEMEAN:
+ case CTSF_STDDEV:
+ case CTSF_SUM:
+ case CTSF_VARIANCE:
+ case CTSF_areaPCT_SUM:
+ moments1_destroy (s->moments);
+ break;
+
+ case CTSF_MEDIAN:
+ case CTSF_MODE:
+ case CTSF_PTILE:
+ casewriter_destroy (s->writer);
+ break;
+ }
}
static void
-ctables_table_destroy (struct ctables_table *t)
+ctables_summary_add (union ctables_summary *s,
+ const struct ctables_summary_spec *ss,
+ const union value *value,
+ bool is_missing, bool is_included,
+ double weight)
{
- if (!t)
- return;
+ /* To determine whether a case is included in a given table for a particular
+ kind of summary, consider the following charts for the variable being
+ summarized. Only if "yes" appears is the case counted.
- for (size_t i = 0; i < t->n_sections; i++)
- ctables_section_uninit (&t->sections[i]);
- free (t->sections);
+ Categorical variables: VALIDN other TOTALN
+ Valid values in included categories yes yes yes
+ Missing values in included categories --- yes yes
+ Missing values in excluded categories --- --- yes
+ Valid values in excluded categories --- --- ---
- for (size_t i = 0; i < t->n_categories; i++)
- ctables_categories_unref (t->categories[i]);
- free (t->categories);
+ Scale variables: VALIDN other TOTALN
+ Valid value yes yes yes
+ Missing value --- yes yes
- for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
- {
- ctables_axis_destroy (t->axes[a]);
- ctables_stack_uninit (&t->stacks[a]);
- }
- free (t->summary_specs.specs);
+ Missing values include both user- and system-missing. (The system-missing
+ value is always in an excluded category.)
- struct ctables_value *ctv, *next_ctv;
- HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
- &t->clabels_values_map)
+ One way to interpret the above table is that scale variables are like
+ categorical variables in which all values are in included categories.
+ */
+ switch (ss->function)
{
- value_destroy (&ctv->value, var_get_width (t->clabels_example));
- hmap_delete (&t->clabels_values_map, &ctv->node);
- free (ctv);
- }
- hmap_destroy (&t->clabels_values_map);
- free (t->clabels_values);
+ case CTSF_TOTALN:
+ case CTSF_areaPCT_TOTALN:
+ s->count += weight;
+ break;
- free (t->sum_vars);
- free (t->caption);
- free (t->corner);
- free (t->title);
- ctables_chisq_destroy (t->chisq);
- ctables_pairwise_destroy (t->pairwise);
- free (t);
-}
+ case CTSF_COUNT:
+ case CTSF_areaPCT_COUNT:
+ if (is_included)
+ s->count += weight;
+ break;
-static void
-ctables_destroy (struct ctables *ct)
-{
- if (!ct)
- return;
+ case CTSF_VALIDN:
+ case CTSF_areaPCT_VALIDN:
+ if (!is_missing)
+ s->count += weight;
+ break;
- struct ctables_postcompute *pc, *next_pc;
- HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
- &ct->postcomputes)
- {
- free (pc->name);
- msg_location_destroy (pc->location);
- ctables_pcexpr_destroy (pc->expr);
- free (pc->label);
- if (pc->specs)
+ case CTSF_areaID:
+ break;
+
+ case CTSF_MISSING:
+ if (is_missing)
+ s->count += weight;
+ break;
+
+ case CTSF_MAXIMUM:
+ case CTSF_MINIMUM:
+ case CTSF_RANGE:
+ if (!is_missing)
{
- ctables_summary_spec_set_uninit (pc->specs);
- free (pc->specs);
+ if (s->min == SYSMIS || value->f < s->min)
+ s->min = value->f;
+ if (s->max == SYSMIS || value->f > s->max)
+ s->max = value->f;
}
- hmap_delete (&ct->postcomputes, &pc->hmap_node);
- free (pc);
- }
- hmap_destroy (&ct->postcomputes);
+ break;
- fmt_settings_uninit (&ct->ctables_formats);
- pivot_table_look_unref (ct->look);
- free (ct->zero);
- free (ct->missing);
- free (ct->vlabels);
- for (size_t i = 0; i < ct->n_tables; i++)
- ctables_table_destroy (ct->tables[i]);
- free (ct->tables);
- free (ct);
-}
+ case CTSF_MEAN:
+ case CTSF_SEMEAN:
+ case CTSF_STDDEV:
+ case CTSF_SUM:
+ case CTSF_VARIANCE:
+ if (!is_missing)
+ moments1_add (s->moments, value->f, weight);
+ break;
-static bool
-ctables_recursive_check_postcompute (struct dictionary *dict,
- const struct ctables_pcexpr *e,
- struct ctables_category *pc_cat,
- const struct ctables_categories *cats,
- const struct msg_location *cats_location)
+ case CTSF_areaPCT_SUM:
+ if (!is_missing)
+ moments1_add (s->moments, value->f, weight);
+ break;
+
+ case CTSF_MEDIAN:
+ case CTSF_MODE:
+ case CTSF_PTILE:
+ if (!is_missing)
+ {
+ s->ovalid += weight;
+
+ struct ccase *c = case_create (casewriter_get_proto (s->writer));
+ *case_num_rw_idx (c, 0) = value->f;
+ *case_num_rw_idx (c, 1) = weight;
+ casewriter_write (s->writer, c);
+ }
+ break;
+ }
+}
+
+static double
+ctables_summary_value (struct ctables_area *areas[N_CTATS],
+ union ctables_summary *s,
+ const struct ctables_summary_spec *ss)
{
- switch (e->op)
+ switch (ss->function)
{
- case CTPO_CAT_NUMBER:
- case CTPO_CAT_STRING:
- case CTPO_CAT_NRANGE:
- case CTPO_CAT_SRANGE:
- case CTPO_CAT_MISSING:
- case CTPO_CAT_OTHERNM:
- case CTPO_CAT_SUBTOTAL:
- case CTPO_CAT_TOTAL:
+ case CTSF_COUNT:
+ return s->count;
+
+ case CTSF_areaID:
+ return areas[ss->calc_area]->sequence;
+
+ case CTSF_areaPCT_COUNT:
{
- struct ctables_category *cat = ctables_find_category_for_postcompute (
- dict, cats, pc_cat->parse_format, e);
- if (!cat)
- {
- if (e->op == CTPO_CAT_SUBTOTAL && e->subtotal_index == 0)
- {
- size_t n_subtotals = 0;
- for (size_t i = 0; i < cats->n_cats; i++)
- n_subtotals += cats->cats[i].type == CCT_SUBTOTAL;
- if (n_subtotals > 1)
- {
- msg_at (SE, cats_location,
- ngettext ("These categories include %zu instance "
- "of SUBTOTAL or HSUBTOTAL, so references "
- "from computed categories must refer to "
- "subtotals by position, "
- "e.g. SUBTOTAL[1].",
- "These categories include %zu instances "
- "of SUBTOTAL or HSUBTOTAL, so references "
- "from computed categories must refer to "
- "subtotals by position, "
- "e.g. SUBTOTAL[1].",
- n_subtotals),
- n_subtotals);
- msg_at (SN, e->location,
- _("This is the reference that lacks a position."));
- return NULL;
- }
- }
+ const struct ctables_area *a = areas[ss->calc_area];
+ double a_count = a->count[ss->weighting];
+ return a_count ? s->count / a_count * 100 : SYSMIS;
+ }
- msg_at (SE, pc_cat->location,
- _("Computed category &%s references a category not included "
- "in the category list."),
- pc_cat->pc->name);
- msg_at (SN, e->location, _("This is the missing category."));
- if (e->op == CTPO_CAT_SUBTOTAL)
- msg_at (SN, cats_location,
- _("To fix the problem, add subtotals to the "
- "list of categories here."));
- else if (e->op == CTPO_CAT_TOTAL)
- msg (SN, _("To fix the problem, add TOTAL=YES to the variable's "
- "CATEGORIES specification."));
- else
- msg_at (SN, cats_location,
- _("To fix the problem, add the missing category to the "
- "list of categories here."));
- return false;
- }
- if (pc_cat->pc->hide_source_cats)
- cat->hide = true;
- return true;
+ case CTSF_areaPCT_VALIDN:
+ {
+ const struct ctables_area *a = areas[ss->calc_area];
+ double a_valid = a->valid[ss->weighting];
+ return a_valid ? s->count / a_valid * 100 : SYSMIS;
}
- case CTPO_CONSTANT:
- return true;
+ case CTSF_areaPCT_TOTALN:
+ {
+ const struct ctables_area *a = areas[ss->calc_area];
+ double a_total = a->total[ss->weighting];
+ return a_total ? s->count / a_total * 100 : SYSMIS;
+ }
- case CTPO_ADD:
- case CTPO_SUB:
- case CTPO_MUL:
- case CTPO_DIV:
- case CTPO_POW:
- case CTPO_NEG:
- for (size_t i = 0; i < 2; i++)
- if (e->subs[i] && !ctables_recursive_check_postcompute (
- dict, e->subs[i], pc_cat, cats, cats_location))
- return false;
- return true;
+ case CTSF_MISSING:
+ case CTSF_TOTALN:
+ case CTSF_VALIDN:
+ return s->count;
+
+ case CTSF_MAXIMUM:
+ return s->max;
+
+ case CTSF_MINIMUM:
+ return s->min;
+
+ case CTSF_RANGE:
+ return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
+
+ case CTSF_MEAN:
+ {
+ double mean;
+ moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
+ return mean;
+ }
+
+ case CTSF_SEMEAN:
+ {
+ double weight, variance;
+ moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
+ return calc_semean (variance, weight);
+ }
+
+ case CTSF_STDDEV:
+ {
+ double variance;
+ moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
+ return variance != SYSMIS ? sqrt (variance) : SYSMIS;
+ }
+
+ case CTSF_SUM:
+ {
+ double weight, mean;
+ moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
+ return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
+ }
+
+ case CTSF_VARIANCE:
+ {
+ double variance;
+ moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
+ return variance;
+ }
+
+ case CTSF_areaPCT_SUM:
+ {
+ double weight, mean;
+ moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
+ if (weight == SYSMIS || mean == SYSMIS)
+ return SYSMIS;
+
+ const struct ctables_area *a = areas[ss->calc_area];
+ const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
+ double denom = sum->sum[ss->weighting];
+ return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
+ }
+
+ case CTSF_MEDIAN:
+ case CTSF_PTILE:
+ if (s->writer)
+ {
+ struct casereader *reader = casewriter_make_reader (s->writer);
+ s->writer = NULL;
+
+ struct percentile *ptile = percentile_create (
+ ss->function == CTSF_PTILE ? ss->percentile : 0.5, s->ovalid);
+ struct order_stats *os = &ptile->parent;
+ order_stats_accumulate_idx (&os, 1, reader, 1, 0);
+ s->ovalue = percentile_calculate (ptile, PC_HAVERAGE);
+ statistic_destroy (&ptile->parent.parent);
+ }
+ return s->ovalue;
+
+ case CTSF_MODE:
+ if (s->writer)
+ {
+ struct casereader *reader = casewriter_make_reader (s->writer);
+ s->writer = NULL;
+
+ struct mode *mode = mode_create ();
+ struct order_stats *os = &mode->parent;
+ order_stats_accumulate_idx (&os, 1, reader, 1, 0);
+ s->ovalue = mode->mode;
+ statistic_destroy (&mode->parent.parent);
+ }
+ return s->ovalue;
}
NOT_REACHED ();
}
+\f
+/* CTABLES occurrences. */
-static bool
-all_strings (struct variable **vars, size_t n_vars,
- const struct ctables_category *cat)
+struct ctables_occurrence
+ {
+ struct hmap_node node;
+ union value value;
+ };
+
+static void
+ctables_add_occurrence (const struct variable *var,
+ const union value *value,
+ struct hmap *occurrences)
{
- for (size_t j = 0; j < n_vars; j++)
- if (var_is_numeric (vars[j]))
+ int width = var_get_width (var);
+ unsigned int hash = value_hash (value, width, 0);
+
+ struct ctables_occurrence *o;
+ HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
+ occurrences)
+ if (value_equal (value, &o->value, width))
+ return;
+
+ o = xmalloc (sizeof *o);
+ value_clone (&o->value, value, width);
+ hmap_insert (occurrences, &o->node, hash);
+}
+\f
+enum ctables_vlabel
+ {
+ CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
+ CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
+ CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
+ CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
+ };
+
+struct ctables_cell
+ {
+ /* In struct ctables_section's 'cells' hmap. Indexed by all the values in
+ all the axes (except the scalar variable, if any). */
+ struct hmap_node node;
+
+ /* The areas that contain this cell. */
+ uint32_t omit_areas;
+ struct ctables_area *areas[N_CTATS];
+
+ bool hide;
+
+ bool postcompute;
+ enum ctables_summary_variant sv;
+
+ struct ctables_cell_axis
{
- msg_at (SE, cat->location,
- _("This category specification may be applied only to string "
- "variables, but this subcommand tries to apply it to "
- "numeric variable %s."),
- var_get_name (vars[j]));
- return false;
+ struct ctables_cell_value
+ {
+ const struct ctables_category *category;
+ union value value;
+ }
+ *cvs;
+ int leaf;
}
- return true;
+ axes[PIVOT_N_AXES];
+
+ union ctables_summary *summaries;
+ };
+
+struct ctables_section
+ {
+ /* Settings. */
+ struct ctables_table *table;
+ struct ctables_nest *nests[PIVOT_N_AXES];
+
+ /* Data. */
+ struct hmap *occurrences[PIVOT_N_AXES]; /* "struct ctables_occurrence"s. */
+ struct hmap cells; /* Contains "struct ctables_cell"s. */
+ struct hmap areas[N_CTATS]; /* Contains "struct ctables_area"s. */
+ };
+
+static void ctables_section_uninit (struct ctables_section *);
+
+struct ctables_table
+ {
+ struct ctables *ctables;
+ struct ctables_axis *axes[PIVOT_N_AXES];
+ struct ctables_stack stacks[PIVOT_N_AXES];
+ struct ctables_section *sections;
+ size_t n_sections;
+ enum pivot_axis_type summary_axis;
+ struct ctables_summary_spec_set summary_specs;
+ struct variable **sum_vars;
+ size_t n_sum_vars;
+
+ enum pivot_axis_type slabels_axis;
+ bool slabels_visible;
+
+ /* The innermost category labels for axis 'a' appear on axis label_axis[a].
+
+ Most commonly, label_axis[a] == a, and in particular we always have
+ label_axis{PIVOT_AXIS_LAYER] == PIVOT_AXIS_LAYER.
+
+ If ROWLABELS or COLLABELS is specified, then one of
+ label_axis[PIVOT_AXIS_ROW] or label_axis[PIVOT_AXIS_COLUMN] can be the
+ opposite axis or PIVOT_AXIS_LAYER. Only one of them will differ.
+
+ If any category labels are moved, then 'clabels_example' is one of the
+ variables being moved (and it is otherwise NULL). All of the variables
+ being moved have the same width, value labels, and categories, so this
+ example variable can be used to find those out.
+
+ The remaining members in this group are relevant only if category labels
+ are moved.
+
+ 'clabels_values_map' holds a "struct ctables_value" for all the values
+ that appear in all of the variables in the moved categories. It is
+ accumulated as the data is read. Once the data is fully read, its
+ sorted values are put into 'clabels_values' and 'n_clabels_values'.
+ */
+ enum pivot_axis_type label_axis[PIVOT_N_AXES];
+ enum pivot_axis_type clabels_from_axis;
+ enum pivot_axis_type clabels_to_axis;
+ const struct variable *clabels_example;
+ struct hmap clabels_values_map;
+ struct ctables_value **clabels_values;
+ size_t n_clabels_values;
+
+ /* Indexed by variable dictionary index. */
+ struct ctables_categories **categories;
+ size_t n_categories;
+
+ double cilevel;
+
+ char *caption;
+ char *corner;
+ char *title;
+
+ struct ctables_chisq *chisq;
+ struct ctables_pairwise *pairwise;
+ };
+
+struct ctables_cell_sort_aux
+ {
+ const struct ctables_nest *nest;
+ enum pivot_axis_type a;
+ };
+
+static int
+ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
+{
+ const struct ctables_cell_sort_aux *aux = aux_;
+ struct ctables_cell *const *ap = a_;
+ struct ctables_cell *const *bp = b_;
+ const struct ctables_cell *a = *ap;
+ const struct ctables_cell *b = *bp;
+
+ const struct ctables_nest *nest = aux->nest;
+ for (size_t i = 0; i < nest->n; i++)
+ if (i != nest->scale_idx)
+ {
+ const struct variable *var = nest->vars[i];
+ const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
+ const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
+ if (a_cv->category != b_cv->category)
+ return a_cv->category > b_cv->category ? 1 : -1;
+
+ const union value *a_val = &a_cv->value;
+ const union value *b_val = &b_cv->value;
+ switch (a_cv->category->type)
+ {
+ case CCT_NUMBER:
+ case CCT_STRING:
+ case CCT_SUBTOTAL:
+ case CCT_TOTAL:
+ case CCT_POSTCOMPUTE:
+ case CCT_EXCLUDED_MISSING:
+ /* Must be equal. */
+ continue;
+
+ case CCT_NRANGE:
+ case CCT_SRANGE:
+ case CCT_MISSING:
+ case CCT_OTHERNM:
+ {
+ int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
+ if (cmp)
+ return cmp;
+ }
+ break;
+
+ case CCT_VALUE:
+ {
+ int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
+ if (cmp)
+ return a_cv->category->sort_ascending ? cmp : -cmp;
+ }
+ break;
+
+ case CCT_LABEL:
+ {
+ const char *a_label = var_lookup_value_label (var, a_val);
+ const char *b_label = var_lookup_value_label (var, b_val);
+ int cmp;
+ if (a_label)
+ {
+ if (!b_label)
+ return -1;
+ cmp = strcmp (a_label, b_label);
+ }
+ else
+ {
+ if (b_label)
+ return 1;
+ cmp = value_compare_3way (a_val, b_val, var_get_width (var));
+ }
+ if (cmp)
+ return a_cv->category->sort_ascending ? cmp : -cmp;
+ }
+ break;
+
+ case CCT_FUNCTION:
+ NOT_REACHED ();
+ }
+ }
+ return 0;
}
-static bool
-ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
- struct ctables *ct, struct ctables_table *t)
+static int
+ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
+ const void *aux UNUSED)
{
- if (!lex_match_id (lexer, "VARIABLES"))
- return false;
- lex_match (lexer, T_EQUALS);
+ struct ctables_cell *const *ap = a_;
+ struct ctables_cell *const *bp = b_;
+ const struct ctables_cell *a = *ap;
+ const struct ctables_cell *b = *bp;
- struct variable **vars;
- size_t n_vars;
- if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
- return false;
+ for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
+ {
+ int al = a->axes[axis].leaf;
+ int bl = b->axes[axis].leaf;
+ if (al != bl)
+ return al > bl ? 1 : -1;
+ }
+ return 0;
+}
+
+static struct ctables_area *
+ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
+ enum ctables_area_type area)
+{
+ size_t hash = 0;
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ {
+ const struct ctables_nest *nest = s->nests[a];
+ for (size_t i = 0; i < nest->n_areas[area]; i++)
+ {
+ size_t v_idx = nest->areas[area][i];
+ struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
+ hash = hash_pointer (cv->category, hash);
+ if (cv->category->type != CCT_TOTAL
+ && cv->category->type != CCT_SUBTOTAL
+ && cv->category->type != CCT_POSTCOMPUTE)
+ hash = value_hash (&cv->value,
+ var_get_width (nest->vars[v_idx]), hash);
+ }
+ }
- const struct fmt_spec *common_format = var_get_print_format (vars[0]);
- for (size_t i = 1; i < n_vars; i++)
+ struct ctables_area *a;
+ HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
{
- const struct fmt_spec *f = var_get_print_format (vars[i]);
- if (f->type != common_format->type)
+ const struct ctables_cell *df = a->example;
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
- common_format = NULL;
- break;
+ const struct ctables_nest *nest = s->nests[a];
+ for (size_t i = 0; i < nest->n_areas[area]; i++)
+ {
+ size_t v_idx = nest->areas[area][i];
+ struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
+ struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
+ if (cv1->category != cv2->category
+ || (cv1->category->type != CCT_TOTAL
+ && cv1->category->type != CCT_SUBTOTAL
+ && cv1->category->type != CCT_POSTCOMPUTE
+ && !value_equal (&cv1->value, &cv2->value,
+ var_get_width (nest->vars[v_idx]))))
+ goto not_equal;
+ }
}
+ return a;
+
+ not_equal: ;
}
- bool parse_strings
- = (common_format
- && (fmt_get_category (common_format->type)
- & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
- struct ctables_categories *c = xmalloc (sizeof *c);
- *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
- for (size_t i = 0; i < n_vars; i++)
+ struct ctables_sum *sums = (s->table->n_sum_vars
+ ? xzalloc (s->table->n_sum_vars * sizeof *sums)
+ : NULL);
+
+ a = xmalloc (sizeof *a);
+ *a = (struct ctables_area) { .example = cell, .sums = sums };
+ hmap_insert (&s->areas[area], &a->node, hash);
+ return a;
+}
+
+static struct ctables_cell *
+ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
+ const struct ctables_category **cats[PIVOT_N_AXES])
+{
+ size_t hash = 0;
+ enum ctables_summary_variant sv = CSV_CELL;
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
- struct ctables_categories **cp
- = &t->categories[var_get_dict_index (vars[i])];
- ctables_categories_unref (*cp);
- *cp = c;
+ const struct ctables_nest *nest = s->nests[a];
+ for (size_t i = 0; i < nest->n; i++)
+ if (i != nest->scale_idx)
+ {
+ hash = hash_pointer (cats[a][i], hash);
+ if (cats[a][i]->type != CCT_TOTAL
+ && cats[a][i]->type != CCT_SUBTOTAL
+ && cats[a][i]->type != CCT_POSTCOMPUTE)
+ hash = value_hash (case_data (c, nest->vars[i]),
+ var_get_width (nest->vars[i]), hash);
+ else
+ sv = CSV_TOTAL;
+ }
}
- size_t allocated_cats = 0;
- int cats_start_ofs = -1;
- int cats_end_ofs = -1;
- if (lex_match (lexer, T_LBRACK))
+ struct ctables_cell *cell;
+ HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
{
- cats_start_ofs = lex_ofs (lexer);
- do
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
- if (c->n_cats >= allocated_cats)
- c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
+ const struct ctables_nest *nest = s->nests[a];
+ for (size_t i = 0; i < nest->n; i++)
+ if (i != nest->scale_idx
+ && (cats[a][i] != cell->axes[a].cvs[i].category
+ || (cats[a][i]->type != CCT_TOTAL
+ && cats[a][i]->type != CCT_SUBTOTAL
+ && cats[a][i]->type != CCT_POSTCOMPUTE
+ && !value_equal (case_data (c, nest->vars[i]),
+ &cell->axes[a].cvs[i].value,
+ var_get_width (nest->vars[i])))))
+ goto not_equal;
+ }
- int start_ofs = lex_ofs (lexer);
- struct ctables_category *cat = &c->cats[c->n_cats];
- if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
- goto error;
- cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
- c->n_cats++;
+ return cell;
- lex_match (lexer, T_COMMA);
- }
- while (!lex_match (lexer, T_RBRACK));
- cats_end_ofs = lex_ofs (lexer) - 1;
+ not_equal: ;
}
- struct ctables_category cat = {
- .type = CCT_VALUE,
- .include_missing = false,
- .sort_ascending = true,
- };
- bool show_totals = false;
- char *total_label = NULL;
- bool totals_before = false;
- while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
+ cell = xmalloc (sizeof *cell);
+ cell->hide = false;
+ cell->sv = sv;
+ cell->omit_areas = 0;
+ cell->postcompute = false;
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
- if (!c->n_cats && lex_match_id (lexer, "ORDER"))
- {
- lex_match (lexer, T_EQUALS);
- if (lex_match_id (lexer, "A"))
- cat.sort_ascending = true;
- else if (lex_match_id (lexer, "D"))
- cat.sort_ascending = false;
- else
- {
- lex_error_expecting (lexer, "A", "D");
- goto error;
- }
- }
- else if (!c->n_cats && lex_match_id (lexer, "KEY"))
+ const struct ctables_nest *nest = s->nests[a];
+ cell->axes[a].cvs = (nest->n
+ ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
+ : NULL);
+ for (size_t i = 0; i < nest->n; i++)
{
- int start_ofs = lex_ofs (lexer) - 1;
- lex_match (lexer, T_EQUALS);
- if (lex_match_id (lexer, "VALUE"))
- cat.type = CCT_VALUE;
- else if (lex_match_id (lexer, "LABEL"))
- cat.type = CCT_LABEL;
- else
+ const struct ctables_category *cat = cats[a][i];
+ const struct variable *var = nest->vars[i];
+ const union value *value = case_data (c, var);
+ if (i != nest->scale_idx)
{
- cat.type = CCT_FUNCTION;
- if (!parse_ctables_summary_function (lexer, &cat.sort_function,
- &cat.weighting, &cat.area))
- goto error;
+ const struct ctables_category *subtotal = cat->subtotal;
+ if (cat->hide || (subtotal && subtotal->hide_subcategories))
+ cell->hide = true;
- if (lex_match (lexer, T_LPAREN))
+ if (cat->type == CCT_TOTAL
+ || cat->type == CCT_SUBTOTAL
+ || cat->type == CCT_POSTCOMPUTE)
{
- cat.sort_var = parse_variable (lexer, dict);
- if (!cat.sort_var)
- goto error;
-
- if (cat.sort_function == CTSF_PTILE)
+ switch (a)
{
- lex_match (lexer, T_COMMA);
- if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
- goto error;
- cat.percentile = lex_number (lexer);
- lex_get (lexer);
+ case PIVOT_AXIS_COLUMN:
+ cell->omit_areas |= ((1u << CTAT_TABLE) |
+ (1u << CTAT_LAYER) |
+ (1u << CTAT_LAYERCOL) |
+ (1u << CTAT_SUBTABLE) |
+ (1u << CTAT_COL));
+ break;
+ case PIVOT_AXIS_ROW:
+ cell->omit_areas |= ((1u << CTAT_TABLE) |
+ (1u << CTAT_LAYER) |
+ (1u << CTAT_LAYERROW) |
+ (1u << CTAT_SUBTABLE) |
+ (1u << CTAT_ROW));
+ break;
+ case PIVOT_AXIS_LAYER:
+ cell->omit_areas |= ((1u << CTAT_TABLE) |
+ (1u << CTAT_LAYER));
+ break;
}
+ }
+ if (cat->type == CCT_POSTCOMPUTE)
+ cell->postcompute = true;
+ }
+
+ cell->axes[a].cvs[i].category = cat;
+ value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
+ }
+ }
+
+ const struct ctables_nest *ss = s->nests[s->table->summary_axis];
+ const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
+ cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
+ for (size_t i = 0; i < specs->n; i++)
+ ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
+ for (enum ctables_area_type at = 0; at < N_CTATS; at++)
+ cell->areas[at] = ctables_area_insert (s, cell, at);
+ hmap_insert (&s->cells, &cell->node, hash);
+ return cell;
+}
+
+static void
+add_weight (double dst[N_CTWS], const double src[N_CTWS])
+{
+ for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
+ dst[wt] += src[wt];
+}
+
+static void
+ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
+ const struct ctables_category **cats[PIVOT_N_AXES],
+ bool is_included, double weight[N_CTWS])
+{
+ struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
+ const struct ctables_nest *ss = s->nests[s->table->summary_axis];
+
+ const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
+ const union value *value = case_data (c, specs->var);
+ bool is_missing = var_is_value_missing (specs->var, value);
+ bool is_scale_missing
+ = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
+
+ for (size_t i = 0; i < specs->n; i++)
+ ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
+ is_scale_missing, is_included,
+ weight[specs->specs[i].weighting]);
+ for (enum ctables_area_type at = 0; at < N_CTATS; at++)
+ if (!(cell->omit_areas && (1u << at)))
+ {
+ struct ctables_area *a = cell->areas[at];
+
+ add_weight (a->total, weight);
+ if (is_included)
+ add_weight (a->count, weight);
+ if (!is_missing)
+ {
+ add_weight (a->valid, weight);
- if (!lex_force_match (lexer, T_RPAREN))
- goto error;
- }
- else if (ctables_function_availability (cat.sort_function)
- == CTFA_SCALE)
+ if (!is_scale_missing)
+ for (size_t i = 0; i < s->table->n_sum_vars; i++)
{
- bool UNUSED b = lex_force_match (lexer, T_LPAREN);
- goto error;
+ const struct variable *var = s->table->sum_vars[i];
+ double addend = case_num (c, var);
+ if (!var_is_num_missing (var, addend))
+ for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
+ a->sums[i].sum[wt] += addend * weight[wt];
}
+ }
+ }
+}
- lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
- _("Data-dependent sorting is not implemented."));
- goto error;
- }
- }
- else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
- {
- lex_match (lexer, T_EQUALS);
- if (lex_match_id (lexer, "INCLUDE"))
- cat.include_missing = true;
- else if (lex_match_id (lexer, "EXCLUDE"))
- cat.include_missing = false;
- else
- {
- lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
- goto error;
- }
- }
- else if (lex_match_id (lexer, "TOTAL"))
- {
- lex_match (lexer, T_EQUALS);
- if (!parse_bool (lexer, &show_totals))
- goto error;
- }
- else if (lex_match_id (lexer, "LABEL"))
- {
- lex_match (lexer, T_EQUALS);
- if (!lex_force_string (lexer))
- goto error;
- free (total_label);
- total_label = ss_xstrdup (lex_tokss (lexer));
- lex_get (lexer);
- }
- else if (lex_match_id (lexer, "POSITION"))
- {
- lex_match (lexer, T_EQUALS);
- if (lex_match_id (lexer, "BEFORE"))
- totals_before = true;
- else if (lex_match_id (lexer, "AFTER"))
- totals_before = false;
- else
- {
- lex_error_expecting (lexer, "BEFORE", "AFTER");
- goto error;
- }
- }
- else if (lex_match_id (lexer, "EMPTY"))
+static void
+recurse_totals (struct ctables_section *s, const struct ccase *c,
+ const struct ctables_category **cats[PIVOT_N_AXES],
+ bool is_included, double weight[N_CTWS],
+ enum pivot_axis_type start_axis, size_t start_nest)
+{
+ for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
+ {
+ const struct ctables_nest *nest = s->nests[a];
+ for (size_t i = start_nest; i < nest->n; i++)
{
- lex_match (lexer, T_EQUALS);
- if (lex_match_id (lexer, "INCLUDE"))
- c->show_empty = true;
- else if (lex_match_id (lexer, "EXCLUDE"))
- c->show_empty = false;
- else
+ if (i == nest->scale_idx)
+ continue;
+
+ const struct variable *var = nest->vars[i];
+
+ const struct ctables_category *total = ctables_categories_total (
+ s->table->categories[var_get_dict_index (var)]);
+ if (total)
{
- lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
- goto error;
+ const struct ctables_category *save = cats[a][i];
+ cats[a][i] = total;
+ ctables_cell_add__ (s, c, cats, is_included, weight);
+ recurse_totals (s, c, cats, is_included, weight, a, i + 1);
+ cats[a][i] = save;
}
}
- else
- {
- if (!c->n_cats)
- lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
- "TOTAL", "LABEL", "POSITION", "EMPTY");
- else
- lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
- goto error;
- }
- }
-
- if (!c->n_cats)
- {
- if (c->n_cats >= allocated_cats)
- c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
- c->cats[c->n_cats++] = cat;
+ start_nest = 0;
}
+}
- if (show_totals)
+static void
+recurse_subtotals (struct ctables_section *s, const struct ccase *c,
+ const struct ctables_category **cats[PIVOT_N_AXES],
+ bool is_included, double weight[N_CTWS],
+ enum pivot_axis_type start_axis, size_t start_nest)
+{
+ for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
{
- if (c->n_cats >= allocated_cats)
- c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
-
- struct ctables_category *totals;
- if (totals_before)
+ const struct ctables_nest *nest = s->nests[a];
+ for (size_t i = start_nest; i < nest->n; i++)
{
- insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
- totals = &c->cats[0];
- }
- else
- totals = &c->cats[c->n_cats];
- c->n_cats++;
+ if (i == nest->scale_idx)
+ continue;
- *totals = (struct ctables_category) {
- .type = CCT_TOTAL,
- .total_label = total_label ? total_label : xstrdup (_("Total")),
- };
+ const struct ctables_category *save = cats[a][i];
+ if (save->subtotal)
+ {
+ cats[a][i] = save->subtotal;
+ ctables_cell_add__ (s, c, cats, is_included, weight);
+ recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
+ cats[a][i] = save;
+ }
+ }
+ start_nest = 0;
}
+}
- struct ctables_category *subtotal = NULL;
- for (size_t i = totals_before ? 0 : c->n_cats;
- totals_before ? i < c->n_cats : i-- > 0;
- totals_before ? i++ : 0)
+static void
+ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
+ double weight[N_CTWS])
+{
+ const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
+ const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
+ const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
+ const struct ctables_category **cats[PIVOT_N_AXES] =
{
- struct ctables_category *cat = &c->cats[i];
- switch (cat->type)
- {
- case CCT_NUMBER:
- case CCT_STRING:
- case CCT_NRANGE:
- case CCT_SRANGE:
- case CCT_MISSING:
- case CCT_OTHERNM:
- cat->subtotal = subtotal;
- break;
-
- case CCT_POSTCOMPUTE:
- break;
-
- case CCT_SUBTOTAL:
- subtotal = cat;
- break;
+ [PIVOT_AXIS_LAYER] = layer_cats,
+ [PIVOT_AXIS_ROW] = row_cats,
+ [PIVOT_AXIS_COLUMN] = column_cats,
+ };
- case CCT_TOTAL:
- case CCT_VALUE:
- case CCT_LABEL:
- case CCT_FUNCTION:
- case CCT_EXCLUDED_MISSING:
- break;
- }
- }
+ bool is_included = true;
- if (cats_start_ofs != -1)
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
- for (size_t i = 0; i < c->n_cats; i++)
- {
- struct ctables_category *cat = &c->cats[i];
- switch (cat->type)
- {
- case CCT_POSTCOMPUTE:
- cat->parse_format = parse_strings ? common_format->type : FMT_F;
- struct msg_location *cats_location
- = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
- bool ok = ctables_recursive_check_postcompute (
- dict, cat->pc->expr, cat, c, cats_location);
- msg_location_destroy (cats_location);
- if (!ok)
- goto error;
- break;
+ const struct ctables_nest *nest = s->nests[a];
+ for (size_t i = 0; i < nest->n; i++)
+ if (i != nest->scale_idx)
+ {
+ const struct variable *var = nest->vars[i];
+ const union value *value = case_data (c, var);
- case CCT_NUMBER:
- case CCT_NRANGE:
- for (size_t j = 0; j < n_vars; j++)
- if (var_is_alpha (vars[j]))
- {
- msg_at (SE, cat->location,
- _("This category specification may be applied "
- "only to numeric variables, but this "
- "subcommand tries to apply it to string "
- "variable %s."),
- var_get_name (vars[j]));
- goto error;
- }
- break;
+ cats[a][i] = ctables_categories_match (
+ s->table->categories[var_get_dict_index (var)], value, var);
+ if (!cats[a][i])
+ {
+ if (i != nest->summary_idx)
+ return;
- case CCT_STRING:
- if (parse_strings)
- {
- double n;
- if (!parse_category_string (cat->location, cat->string, dict,
- common_format->type, &n))
- goto error;
+ if (!var_is_value_missing (var, value))
+ return;
+
+ static const struct ctables_category cct_excluded_missing = {
+ .type = CCT_EXCLUDED_MISSING,
+ .hide = true,
+ };
+ cats[a][i] = &cct_excluded_missing;
+ is_included = false;
+ }
+ }
+ }
- ss_dealloc (&cat->string);
+ if (is_included)
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ {
+ const struct ctables_nest *nest = s->nests[a];
+ for (size_t i = 0; i < nest->n; i++)
+ if (i != nest->scale_idx)
+ {
+ const struct variable *var = nest->vars[i];
+ const union value *value = case_data (c, var);
+ ctables_add_occurrence (var, value, &s->occurrences[a][i]);
+ }
+ }
- cat->type = CCT_NUMBER;
- cat->number = n;
- }
- else if (!all_strings (vars, n_vars, cat))
- goto error;
- break;
+ ctables_cell_add__ (s, c, cats, is_included, weight);
+ recurse_totals (s, c, cats, is_included, weight, 0, 0);
+ recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
+}
+\f
+struct ctables_value
+ {
+ struct hmap_node node;
+ union value value;
+ int leaf;
+ };
- case CCT_SRANGE:
- if (parse_strings)
- {
- double n[2];
+static struct ctables_value *
+ctables_value_find__ (struct ctables_table *t, const union value *value,
+ int width, unsigned int hash)
+{
+ struct ctables_value *clv;
+ HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
+ hash, &t->clabels_values_map)
+ if (value_equal (value, &clv->value, width))
+ return clv;
+ return NULL;
+}
- if (!cat->srange[0].string)
- n[0] = -DBL_MAX;
- else if (!parse_category_string (cat->location,
- cat->srange[0], dict,
- common_format->type, &n[0]))
- goto error;
+static void
+ctables_value_insert (struct ctables_table *t, const union value *value,
+ int width)
+{
+ unsigned int hash = value_hash (value, width, 0);
+ struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
+ if (!clv)
+ {
+ clv = xmalloc (sizeof *clv);
+ value_clone (&clv->value, value, width);
+ hmap_insert (&t->clabels_values_map, &clv->node, hash);
+ }
+}
- if (!cat->srange[1].string)
- n[1] = DBL_MAX;
- else if (!parse_category_string (cat->location,
- cat->srange[1], dict,
- common_format->type, &n[1]))
- goto error;
+static struct ctables_value *
+ctables_value_find (struct ctables_table *t,
+ const union value *value, int width)
+{
+ return ctables_value_find__ (t, value, width,
+ value_hash (value, width, 0));
+}
- ss_dealloc (&cat->srange[0]);
- ss_dealloc (&cat->srange[1]);
+static int
+compare_ctables_values_3way (const void *a_, const void *b_, const void *width_)
+{
+ const struct ctables_value *const *ap = a_;
+ const struct ctables_value *const *bp = b_;
+ const struct ctables_value *a = *ap;
+ const struct ctables_value *b = *bp;
+ const int *width = width_;
+ return value_compare_3way (&a->value, &b->value, *width);
+}
- cat->type = CCT_NRANGE;
- cat->nrange[0] = n[0];
- cat->nrange[1] = n[1];
- }
- else if (!all_strings (vars, n_vars, cat))
- goto error;
- break;
+static void
+ctables_sort_clabels_values (struct ctables_table *t)
+{
+ const struct variable *v0 = t->clabels_example;
+ int width = var_get_width (v0);
- case CCT_MISSING:
- case CCT_OTHERNM:
- case CCT_SUBTOTAL:
- case CCT_TOTAL:
- case CCT_VALUE:
- case CCT_LABEL:
- case CCT_FUNCTION:
- case CCT_EXCLUDED_MISSING:
- break;
- }
- }
+ struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
+ if (c0->show_empty)
+ {
+ const struct val_labs *val_labs = var_get_value_labels (v0);
+ for (const struct val_lab *vl = val_labs_first (val_labs); vl;
+ vl = val_labs_next (val_labs, vl))
+ if (ctables_categories_match (c0, &vl->value, v0))
+ ctables_value_insert (t, &vl->value, width);
}
- free (vars);
- return true;
+ size_t n = hmap_count (&t->clabels_values_map);
+ t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
-error:
- free (vars);
- return false;
+ struct ctables_value *clv;
+ size_t i = 0;
+ HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
+ t->clabels_values[i++] = clv;
+ t->n_clabels_values = n;
+ assert (i == n);
+
+ sort (t->clabels_values, n, sizeof *t->clabels_values,
+ compare_ctables_values_3way, &width);
+
+ for (size_t i = 0; i < n; i++)
+ t->clabels_values[i]->leaf = i;
}
\f
-struct ctables_cell_sort_aux
+struct ctables
{
- const struct ctables_nest *nest;
- enum pivot_axis_type a;
- };
+ const struct dictionary *dict;
+ struct pivot_table_look *look;
-static int
-ctables_cell_compare_3way (const void *a_, const void *b_, const void *aux_)
-{
- const struct ctables_cell_sort_aux *aux = aux_;
- struct ctables_cell *const *ap = a_;
- struct ctables_cell *const *bp = b_;
- const struct ctables_cell *a = *ap;
- const struct ctables_cell *b = *bp;
+ /* For CTEF_* formats. */
+ struct fmt_settings ctables_formats;
- const struct ctables_nest *nest = aux->nest;
- for (size_t i = 0; i < nest->n; i++)
- if (i != nest->scale_idx)
- {
- const struct variable *var = nest->vars[i];
- const struct ctables_cell_value *a_cv = &a->axes[aux->a].cvs[i];
- const struct ctables_cell_value *b_cv = &b->axes[aux->a].cvs[i];
- if (a_cv->category != b_cv->category)
- return a_cv->category > b_cv->category ? 1 : -1;
+ /* If this is NULL, zeros are displayed using the normal print format.
+ Otherwise, this string is displayed. */
+ char *zero;
- const union value *a_val = &a_cv->value;
- const union value *b_val = &b_cv->value;
- switch (a_cv->category->type)
- {
- case CCT_NUMBER:
- case CCT_STRING:
- case CCT_SUBTOTAL:
- case CCT_TOTAL:
- case CCT_POSTCOMPUTE:
- case CCT_EXCLUDED_MISSING:
- /* Must be equal. */
- continue;
+ /* If this is NULL, missing values are displayed using the normal print
+ format. Otherwise, this string is displayed. */
+ char *missing;
- case CCT_NRANGE:
- case CCT_SRANGE:
- case CCT_MISSING:
- case CCT_OTHERNM:
- {
- int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
- if (cmp)
- return cmp;
- }
- break;
+ /* Indexed by variable dictionary index. */
+ enum ctables_vlabel *vlabels;
- case CCT_VALUE:
- {
- int cmp = value_compare_3way (a_val, b_val, var_get_width (var));
- if (cmp)
- return a_cv->category->sort_ascending ? cmp : -cmp;
- }
- break;
+ struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
- case CCT_LABEL:
- {
- const char *a_label = var_lookup_value_label (var, a_val);
- const char *b_label = var_lookup_value_label (var, b_val);
- int cmp;
- if (a_label)
- {
- if (!b_label)
- return -1;
- cmp = strcmp (a_label, b_label);
- }
- else
- {
- if (b_label)
- return 1;
- cmp = value_compare_3way (a_val, b_val, var_get_width (var));
- }
- if (cmp)
- return a_cv->category->sort_ascending ? cmp : -cmp;
- }
- break;
+ bool mrsets_count_duplicates; /* MRSETS. */
+ bool smissing_listwise; /* SMISSING. */
+ struct variable *e_weight; /* WEIGHT. */
+ int hide_threshold; /* HIDESMALLCOUNTS. */
+
+ struct ctables_table **tables;
+ size_t n_tables;
+ };
+
+/* Chi-square test (SIGTEST). */
+struct ctables_chisq
+ {
+ double alpha;
+ bool include_mrsets;
+ bool all_visible;
+ };
+
+/* Pairwise comparison test (COMPARETEST). */
+struct ctables_pairwise
+ {
+ enum { PROP, MEAN } type;
+ double alpha[2];
+ bool include_mrsets;
+ bool meansvariance_allcats;
+ bool all_visible;
+ enum { BONFERRONI = 1, BH } adjust;
+ bool merge;
+ bool apa_style;
+ bool show_sig;
+ };
- case CCT_FUNCTION:
- NOT_REACHED ();
- }
- }
- return 0;
-}
-static int
-ctables_cell_compare_leaf_3way (const void *a_, const void *b_,
- const void *aux UNUSED)
-{
- struct ctables_cell *const *ap = a_;
- struct ctables_cell *const *bp = b_;
- const struct ctables_cell *a = *ap;
- const struct ctables_cell *b = *bp;
- for (enum pivot_axis_type axis = 0; axis < PIVOT_N_AXES; axis++)
+static bool
+parse_col_width (struct lexer *lexer, const char *name, double *width)
+{
+ lex_match (lexer, T_EQUALS);
+ if (lex_match_id (lexer, "DEFAULT"))
+ *width = SYSMIS;
+ else if (lex_force_num_range_closed (lexer, name, 0, DBL_MAX))
{
- int al = a->axes[axis].leaf;
- int bl = b->axes[axis].leaf;
- if (al != bl)
- return al > bl ? 1 : -1;
+ *width = lex_number (lexer);
+ lex_get (lexer);
}
- return 0;
+ else
+ return false;
+
+ return true;
}
-static struct ctables_area *
-ctables_area_insert (struct ctables_section *s, struct ctables_cell *cell,
- enum ctables_area_type area)
+static bool
+parse_bool (struct lexer *lexer, bool *b)
{
- size_t hash = 0;
- for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
- {
- const struct ctables_nest *nest = s->nests[a];
- for (size_t i = 0; i < nest->n_areas[area]; i++)
- {
- size_t v_idx = nest->areas[area][i];
- struct ctables_cell_value *cv = &cell->axes[a].cvs[v_idx];
- hash = hash_pointer (cv->category, hash);
- if (cv->category->type != CCT_TOTAL
- && cv->category->type != CCT_SUBTOTAL
- && cv->category->type != CCT_POSTCOMPUTE)
- hash = value_hash (&cv->value,
- var_get_width (nest->vars[v_idx]), hash);
- }
- }
-
- struct ctables_area *a;
- HMAP_FOR_EACH_WITH_HASH (a, struct ctables_area, node, hash, &s->areas[area])
+ if (lex_match_id (lexer, "NO"))
+ *b = false;
+ else if (lex_match_id (lexer, "YES"))
+ *b = true;
+ else
{
- const struct ctables_cell *df = a->example;
- for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
- {
- const struct ctables_nest *nest = s->nests[a];
- for (size_t i = 0; i < nest->n_areas[area]; i++)
- {
- size_t v_idx = nest->areas[area][i];
- struct ctables_cell_value *cv1 = &df->axes[a].cvs[v_idx];
- struct ctables_cell_value *cv2 = &cell->axes[a].cvs[v_idx];
- if (cv1->category != cv2->category
- || (cv1->category->type != CCT_TOTAL
- && cv1->category->type != CCT_SUBTOTAL
- && cv1->category->type != CCT_POSTCOMPUTE
- && !value_equal (&cv1->value, &cv2->value,
- var_get_width (nest->vars[v_idx]))))
- goto not_equal;
- }
- }
- return a;
-
- not_equal: ;
+ lex_error_expecting (lexer, "YES", "NO");
+ return false;
}
-
- struct ctables_sum *sums = (s->table->n_sum_vars
- ? xzalloc (s->table->n_sum_vars * sizeof *sums)
- : NULL);
-
- a = xmalloc (sizeof *a);
- *a = (struct ctables_area) { .example = cell, .sums = sums };
- hmap_insert (&s->areas[area], &a->node, hash);
- return a;
+ return true;
}
-static struct substring
-rtrim_value (const union value *v, const struct variable *var)
+static void
+ctables_chisq_destroy (struct ctables_chisq *chisq)
{
- struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
- var_get_width (var));
- ss_rtrim (&s, ss_cstr (" "));
- return s;
+ free (chisq);
}
-static bool
-in_string_range (const union value *v, const struct variable *var,
- const struct substring *srange)
+static void
+ctables_pairwise_destroy (struct ctables_pairwise *pairwise)
{
- struct substring s = rtrim_value (v, var);
- return ((!srange[0].string || ss_compare (s, srange[0]) >= 0)
- && (!srange[1].string || ss_compare (s, srange[1]) <= 0));
+ free (pairwise);
}
-static const struct ctables_category *
-ctables_categories_match (const struct ctables_categories *c,
- const union value *v, const struct variable *var)
+static void
+ctables_table_destroy (struct ctables_table *t)
{
- if (var_is_numeric (var) && v->f == SYSMIS)
- return NULL;
+ if (!t)
+ return;
- const struct ctables_category *othernm = NULL;
- for (size_t i = c->n_cats; i-- > 0; )
+ for (size_t i = 0; i < t->n_sections; i++)
+ ctables_section_uninit (&t->sections[i]);
+ free (t->sections);
+
+ for (size_t i = 0; i < t->n_categories; i++)
+ ctables_categories_unref (t->categories[i]);
+ free (t->categories);
+
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
- const struct ctables_category *cat = &c->cats[i];
- switch (cat->type)
- {
- case CCT_NUMBER:
- if (cat->number == v->f)
- return cat;
- break;
+ ctables_axis_destroy (t->axes[a]);
+ ctables_stack_uninit (&t->stacks[a]);
+ }
+ free (t->summary_specs.specs);
- case CCT_STRING:
- if (ss_equals (cat->string, rtrim_value (v, var)))
- return cat;
- break;
+ struct ctables_value *ctv, *next_ctv;
+ HMAP_FOR_EACH_SAFE (ctv, next_ctv, struct ctables_value, node,
+ &t->clabels_values_map)
+ {
+ value_destroy (&ctv->value, var_get_width (t->clabels_example));
+ hmap_delete (&t->clabels_values_map, &ctv->node);
+ free (ctv);
+ }
+ hmap_destroy (&t->clabels_values_map);
+ free (t->clabels_values);
- case CCT_NRANGE:
- if ((cat->nrange[0] == -DBL_MAX || v->f >= cat->nrange[0])
- && (cat->nrange[1] == DBL_MAX || v->f <= cat->nrange[1]))
- return cat;
- break;
+ free (t->sum_vars);
+ free (t->caption);
+ free (t->corner);
+ free (t->title);
+ ctables_chisq_destroy (t->chisq);
+ ctables_pairwise_destroy (t->pairwise);
+ free (t);
+}
- case CCT_SRANGE:
- if (in_string_range (v, var, cat->srange))
- return cat;
- break;
+static void
+ctables_destroy (struct ctables *ct)
+{
+ if (!ct)
+ return;
- case CCT_MISSING:
- if (var_is_value_missing (var, v))
- return cat;
- break;
+ struct ctables_postcompute *pc, *next_pc;
+ HMAP_FOR_EACH_SAFE (pc, next_pc, struct ctables_postcompute, hmap_node,
+ &ct->postcomputes)
+ {
+ free (pc->name);
+ msg_location_destroy (pc->location);
+ ctables_pcexpr_destroy (pc->expr);
+ free (pc->label);
+ if (pc->specs)
+ {
+ ctables_summary_spec_set_uninit (pc->specs);
+ free (pc->specs);
+ }
+ hmap_delete (&ct->postcomputes, &pc->hmap_node);
+ free (pc);
+ }
+ hmap_destroy (&ct->postcomputes);
- case CCT_POSTCOMPUTE:
- break;
+ fmt_settings_uninit (&ct->ctables_formats);
+ pivot_table_look_unref (ct->look);
+ free (ct->zero);
+ free (ct->missing);
+ free (ct->vlabels);
+ for (size_t i = 0; i < ct->n_tables; i++)
+ ctables_table_destroy (ct->tables[i]);
+ free (ct->tables);
+ free (ct);
+}
- case CCT_OTHERNM:
- if (!othernm)
- othernm = cat;
- break;
+static bool
+all_strings (struct variable **vars, size_t n_vars,
+ const struct ctables_category *cat)
+{
+ for (size_t j = 0; j < n_vars; j++)
+ if (var_is_numeric (vars[j]))
+ {
+ msg_at (SE, cat->location,
+ _("This category specification may be applied only to string "
+ "variables, but this subcommand tries to apply it to "
+ "numeric variable %s."),
+ var_get_name (vars[j]));
+ return false;
+ }
+ return true;
+}
- case CCT_SUBTOTAL:
- case CCT_TOTAL:
- break;
+static bool
+ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
+ struct ctables *ct, struct ctables_table *t)
+{
+ if (!lex_match_id (lexer, "VARIABLES"))
+ return false;
+ lex_match (lexer, T_EQUALS);
- case CCT_VALUE:
- case CCT_LABEL:
- case CCT_FUNCTION:
- return (cat->include_missing || !var_is_value_missing (var, v) ? cat
- : NULL);
+ struct variable **vars;
+ size_t n_vars;
+ if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
+ return false;
- case CCT_EXCLUDED_MISSING:
+ const struct fmt_spec *common_format = var_get_print_format (vars[0]);
+ for (size_t i = 1; i < n_vars; i++)
+ {
+ const struct fmt_spec *f = var_get_print_format (vars[i]);
+ if (f->type != common_format->type)
+ {
+ common_format = NULL;
break;
}
}
+ bool parse_strings
+ = (common_format
+ && (fmt_get_category (common_format->type)
+ & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
- return var_is_value_missing (var, v) ? NULL : othernm;
-}
-
-static const struct ctables_category *
-ctables_categories_total (const struct ctables_categories *c)
-{
- const struct ctables_category *first = &c->cats[0];
- const struct ctables_category *last = &c->cats[c->n_cats - 1];
- return (first->type == CCT_TOTAL ? first
- : last->type == CCT_TOTAL ? last
- : NULL);
-}
-
-static struct ctables_cell *
-ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
- const struct ctables_category **cats[PIVOT_N_AXES])
-{
- size_t hash = 0;
- enum ctables_summary_variant sv = CSV_CELL;
- for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ struct ctables_categories *c = xmalloc (sizeof *c);
+ *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
+ for (size_t i = 0; i < n_vars; i++)
{
- const struct ctables_nest *nest = s->nests[a];
- for (size_t i = 0; i < nest->n; i++)
- if (i != nest->scale_idx)
- {
- hash = hash_pointer (cats[a][i], hash);
- if (cats[a][i]->type != CCT_TOTAL
- && cats[a][i]->type != CCT_SUBTOTAL
- && cats[a][i]->type != CCT_POSTCOMPUTE)
- hash = value_hash (case_data (c, nest->vars[i]),
- var_get_width (nest->vars[i]), hash);
- else
- sv = CSV_TOTAL;
- }
+ struct ctables_categories **cp
+ = &t->categories[var_get_dict_index (vars[i])];
+ ctables_categories_unref (*cp);
+ *cp = c;
}
- struct ctables_cell *cell;
- HMAP_FOR_EACH_WITH_HASH (cell, struct ctables_cell, node, hash, &s->cells)
+ size_t allocated_cats = 0;
+ int cats_start_ofs = -1;
+ int cats_end_ofs = -1;
+ if (lex_match (lexer, T_LBRACK))
{
- for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ cats_start_ofs = lex_ofs (lexer);
+ do
{
- const struct ctables_nest *nest = s->nests[a];
- for (size_t i = 0; i < nest->n; i++)
- if (i != nest->scale_idx
- && (cats[a][i] != cell->axes[a].cvs[i].category
- || (cats[a][i]->type != CCT_TOTAL
- && cats[a][i]->type != CCT_SUBTOTAL
- && cats[a][i]->type != CCT_POSTCOMPUTE
- && !value_equal (case_data (c, nest->vars[i]),
- &cell->axes[a].cvs[i].value,
- var_get_width (nest->vars[i])))))
- goto not_equal;
- }
+ if (c->n_cats >= allocated_cats)
+ c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
- return cell;
+ int start_ofs = lex_ofs (lexer);
+ struct ctables_category *cat = &c->cats[c->n_cats];
+ if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
+ goto error;
+ cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
+ c->n_cats++;
- not_equal: ;
+ lex_match (lexer, T_COMMA);
+ }
+ while (!lex_match (lexer, T_RBRACK));
+ cats_end_ofs = lex_ofs (lexer) - 1;
}
- cell = xmalloc (sizeof *cell);
- cell->hide = false;
- cell->sv = sv;
- cell->omit_areas = 0;
- cell->postcompute = false;
- for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ struct ctables_category cat = {
+ .type = CCT_VALUE,
+ .include_missing = false,
+ .sort_ascending = true,
+ };
+ bool show_totals = false;
+ char *total_label = NULL;
+ bool totals_before = false;
+ while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
{
- const struct ctables_nest *nest = s->nests[a];
- cell->axes[a].cvs = (nest->n
- ? xnmalloc (nest->n, sizeof *cell->axes[a].cvs)
- : NULL);
- for (size_t i = 0; i < nest->n; i++)
+ if (!c->n_cats && lex_match_id (lexer, "ORDER"))
{
- const struct ctables_category *cat = cats[a][i];
- const struct variable *var = nest->vars[i];
- const union value *value = case_data (c, var);
- if (i != nest->scale_idx)
+ lex_match (lexer, T_EQUALS);
+ if (lex_match_id (lexer, "A"))
+ cat.sort_ascending = true;
+ else if (lex_match_id (lexer, "D"))
+ cat.sort_ascending = false;
+ else
{
- const struct ctables_category *subtotal = cat->subtotal;
- if (cat->hide || (subtotal && subtotal->hide_subcategories))
- cell->hide = true;
+ lex_error_expecting (lexer, "A", "D");
+ goto error;
+ }
+ }
+ else if (!c->n_cats && lex_match_id (lexer, "KEY"))
+ {
+ int start_ofs = lex_ofs (lexer) - 1;
+ lex_match (lexer, T_EQUALS);
+ if (lex_match_id (lexer, "VALUE"))
+ cat.type = CCT_VALUE;
+ else if (lex_match_id (lexer, "LABEL"))
+ cat.type = CCT_LABEL;
+ else
+ {
+ cat.type = CCT_FUNCTION;
+ if (!parse_ctables_summary_function (lexer, &cat.sort_function,
+ &cat.weighting, &cat.area))
+ goto error;
- if (cat->type == CCT_TOTAL
- || cat->type == CCT_SUBTOTAL
- || cat->type == CCT_POSTCOMPUTE)
+ if (lex_match (lexer, T_LPAREN))
{
- switch (a)
+ cat.sort_var = parse_variable (lexer, dict);
+ if (!cat.sort_var)
+ goto error;
+
+ if (cat.sort_function == CTSF_PTILE)
{
- case PIVOT_AXIS_COLUMN:
- cell->omit_areas |= ((1u << CTAT_TABLE) |
- (1u << CTAT_LAYER) |
- (1u << CTAT_LAYERCOL) |
- (1u << CTAT_SUBTABLE) |
- (1u << CTAT_COL));
- break;
- case PIVOT_AXIS_ROW:
- cell->omit_areas |= ((1u << CTAT_TABLE) |
- (1u << CTAT_LAYER) |
- (1u << CTAT_LAYERROW) |
- (1u << CTAT_SUBTABLE) |
- (1u << CTAT_ROW));
- break;
- case PIVOT_AXIS_LAYER:
- cell->omit_areas |= ((1u << CTAT_TABLE) |
- (1u << CTAT_LAYER));
- break;
+ lex_match (lexer, T_COMMA);
+ if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
+ goto error;
+ cat.percentile = lex_number (lexer);
+ lex_get (lexer);
}
+
+ if (!lex_force_match (lexer, T_RPAREN))
+ goto error;
+ }
+ else if (ctables_function_availability (cat.sort_function)
+ == CTFA_SCALE)
+ {
+ bool UNUSED b = lex_force_match (lexer, T_LPAREN);
+ goto error;
}
- if (cat->type == CCT_POSTCOMPUTE)
- cell->postcompute = true;
- }
- cell->axes[a].cvs[i].category = cat;
- value_clone (&cell->axes[a].cvs[i].value, value, var_get_width (var));
+ lex_ofs_error (lexer, start_ofs, lex_ofs (lexer) - 1,
+ _("Data-dependent sorting is not implemented."));
+ goto error;
+ }
}
- }
-
- const struct ctables_nest *ss = s->nests[s->table->summary_axis];
- const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
- cell->summaries = xmalloc (specs->n * sizeof *cell->summaries);
- for (size_t i = 0; i < specs->n; i++)
- ctables_summary_init (&cell->summaries[i], &specs->specs[i]);
- for (enum ctables_area_type at = 0; at < N_CTATS; at++)
- cell->areas[at] = ctables_area_insert (s, cell, at);
- hmap_insert (&s->cells, &cell->node, hash);
- return cell;
-}
-
-static void
-add_weight (double dst[N_CTWS], const double src[N_CTWS])
-{
- for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
- dst[wt] += src[wt];
-}
-
-static void
-ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
- const struct ctables_category **cats[PIVOT_N_AXES],
- bool is_included, double weight[N_CTWS])
-{
- struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
- const struct ctables_nest *ss = s->nests[s->table->summary_axis];
+ else if (!c->n_cats && lex_match_id (lexer, "MISSING"))
+ {
+ lex_match (lexer, T_EQUALS);
+ if (lex_match_id (lexer, "INCLUDE"))
+ cat.include_missing = true;
+ else if (lex_match_id (lexer, "EXCLUDE"))
+ cat.include_missing = false;
+ else
+ {
+ lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
+ goto error;
+ }
+ }
+ else if (lex_match_id (lexer, "TOTAL"))
+ {
+ lex_match (lexer, T_EQUALS);
+ if (!parse_bool (lexer, &show_totals))
+ goto error;
+ }
+ else if (lex_match_id (lexer, "LABEL"))
+ {
+ lex_match (lexer, T_EQUALS);
+ if (!lex_force_string (lexer))
+ goto error;
+ free (total_label);
+ total_label = ss_xstrdup (lex_tokss (lexer));
+ lex_get (lexer);
+ }
+ else if (lex_match_id (lexer, "POSITION"))
+ {
+ lex_match (lexer, T_EQUALS);
+ if (lex_match_id (lexer, "BEFORE"))
+ totals_before = true;
+ else if (lex_match_id (lexer, "AFTER"))
+ totals_before = false;
+ else
+ {
+ lex_error_expecting (lexer, "BEFORE", "AFTER");
+ goto error;
+ }
+ }
+ else if (lex_match_id (lexer, "EMPTY"))
+ {
+ lex_match (lexer, T_EQUALS);
+ if (lex_match_id (lexer, "INCLUDE"))
+ c->show_empty = true;
+ else if (lex_match_id (lexer, "EXCLUDE"))
+ c->show_empty = false;
+ else
+ {
+ lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
+ goto error;
+ }
+ }
+ else
+ {
+ if (!c->n_cats)
+ lex_error_expecting (lexer, "ORDER", "KEY", "MISSING",
+ "TOTAL", "LABEL", "POSITION", "EMPTY");
+ else
+ lex_error_expecting (lexer, "TOTAL", "LABEL", "POSITION", "EMPTY");
+ goto error;
+ }
+ }
- const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
- const union value *value = case_data (c, specs->var);
- bool is_missing = var_is_value_missing (specs->var, value);
- bool is_scale_missing
- = is_missing || (specs->is_scale && is_listwise_missing (specs, c));
+ if (!c->n_cats)
+ {
+ if (c->n_cats >= allocated_cats)
+ c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
+ c->cats[c->n_cats++] = cat;
+ }
- for (size_t i = 0; i < specs->n; i++)
- ctables_summary_add (&cell->summaries[i], &specs->specs[i], value,
- is_scale_missing, is_included,
- weight[specs->specs[i].weighting]);
- for (enum ctables_area_type at = 0; at < N_CTATS; at++)
- if (!(cell->omit_areas && (1u << at)))
- {
- struct ctables_area *a = cell->areas[at];
+ if (show_totals)
+ {
+ if (c->n_cats >= allocated_cats)
+ c->cats = x2nrealloc (c->cats, &allocated_cats, sizeof *c->cats);
- add_weight (a->total, weight);
- if (is_included)
- add_weight (a->count, weight);
- if (!is_missing)
- {
- add_weight (a->valid, weight);
+ struct ctables_category *totals;
+ if (totals_before)
+ {
+ insert_element (c->cats, c->n_cats, sizeof *c->cats, 0);
+ totals = &c->cats[0];
+ }
+ else
+ totals = &c->cats[c->n_cats];
+ c->n_cats++;
- if (!is_scale_missing)
- for (size_t i = 0; i < s->table->n_sum_vars; i++)
- {
- const struct variable *var = s->table->sum_vars[i];
- double addend = case_num (c, var);
- if (!var_is_num_missing (var, addend))
- for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
- a->sums[i].sum[wt] += addend * weight[wt];
- }
- }
- }
-}
+ *totals = (struct ctables_category) {
+ .type = CCT_TOTAL,
+ .total_label = total_label ? total_label : xstrdup (_("Total")),
+ };
+ }
-static void
-recurse_totals (struct ctables_section *s, const struct ccase *c,
- const struct ctables_category **cats[PIVOT_N_AXES],
- bool is_included, double weight[N_CTWS],
- enum pivot_axis_type start_axis, size_t start_nest)
-{
- for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
+ struct ctables_category *subtotal = NULL;
+ for (size_t i = totals_before ? 0 : c->n_cats;
+ totals_before ? i < c->n_cats : i-- > 0;
+ totals_before ? i++ : 0)
{
- const struct ctables_nest *nest = s->nests[a];
- for (size_t i = start_nest; i < nest->n; i++)
+ struct ctables_category *cat = &c->cats[i];
+ switch (cat->type)
{
- if (i == nest->scale_idx)
- continue;
+ case CCT_NUMBER:
+ case CCT_STRING:
+ case CCT_NRANGE:
+ case CCT_SRANGE:
+ case CCT_MISSING:
+ case CCT_OTHERNM:
+ cat->subtotal = subtotal;
+ break;
- const struct variable *var = nest->vars[i];
+ case CCT_POSTCOMPUTE:
+ break;
- const struct ctables_category *total = ctables_categories_total (
- s->table->categories[var_get_dict_index (var)]);
- if (total)
- {
- const struct ctables_category *save = cats[a][i];
- cats[a][i] = total;
- ctables_cell_add__ (s, c, cats, is_included, weight);
- recurse_totals (s, c, cats, is_included, weight, a, i + 1);
- cats[a][i] = save;
- }
+ case CCT_SUBTOTAL:
+ subtotal = cat;
+ break;
+
+ case CCT_TOTAL:
+ case CCT_VALUE:
+ case CCT_LABEL:
+ case CCT_FUNCTION:
+ case CCT_EXCLUDED_MISSING:
+ break;
}
- start_nest = 0;
}
-}
-static void
-recurse_subtotals (struct ctables_section *s, const struct ccase *c,
- const struct ctables_category **cats[PIVOT_N_AXES],
- bool is_included, double weight[N_CTWS],
- enum pivot_axis_type start_axis, size_t start_nest)
-{
- for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
+ if (cats_start_ofs != -1)
{
- const struct ctables_nest *nest = s->nests[a];
- for (size_t i = start_nest; i < nest->n; i++)
+ for (size_t i = 0; i < c->n_cats; i++)
{
- if (i == nest->scale_idx)
- continue;
-
- const struct ctables_category *save = cats[a][i];
- if (save->subtotal)
+ struct ctables_category *cat = &c->cats[i];
+ switch (cat->type)
{
- cats[a][i] = save->subtotal;
- ctables_cell_add__ (s, c, cats, is_included, weight);
- recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
- cats[a][i] = save;
- }
- }
- start_nest = 0;
- }
-}
+ case CCT_POSTCOMPUTE:
+ cat->parse_format = parse_strings ? common_format->type : FMT_F;
+ struct msg_location *cats_location
+ = lex_ofs_location (lexer, cats_start_ofs, cats_end_ofs);
+ bool ok = ctables_recursive_check_postcompute (
+ dict, cat->pc->expr, cat, c, cats_location);
+ msg_location_destroy (cats_location);
+ if (!ok)
+ goto error;
+ break;
-static void
-ctables_add_occurrence (const struct variable *var,
- const union value *value,
- struct hmap *occurrences)
-{
- int width = var_get_width (var);
- unsigned int hash = value_hash (value, width, 0);
+ case CCT_NUMBER:
+ case CCT_NRANGE:
+ for (size_t j = 0; j < n_vars; j++)
+ if (var_is_alpha (vars[j]))
+ {
+ msg_at (SE, cat->location,
+ _("This category specification may be applied "
+ "only to numeric variables, but this "
+ "subcommand tries to apply it to string "
+ "variable %s."),
+ var_get_name (vars[j]));
+ goto error;
+ }
+ break;
- struct ctables_occurrence *o;
- HMAP_FOR_EACH_WITH_HASH (o, struct ctables_occurrence, node, hash,
- occurrences)
- if (value_equal (value, &o->value, width))
- return;
+ case CCT_STRING:
+ if (parse_strings)
+ {
+ double n;
+ if (!parse_category_string (cat->location, cat->string, dict,
+ common_format->type, &n))
+ goto error;
- o = xmalloc (sizeof *o);
- value_clone (&o->value, value, width);
- hmap_insert (occurrences, &o->node, hash);
-}
+ ss_dealloc (&cat->string);
-static void
-ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
- double weight[N_CTWS])
-{
- const struct ctables_category *layer_cats[s->nests[PIVOT_AXIS_LAYER]->n];
- const struct ctables_category *row_cats[s->nests[PIVOT_AXIS_ROW]->n];
- const struct ctables_category *column_cats[s->nests[PIVOT_AXIS_COLUMN]->n];
- const struct ctables_category **cats[PIVOT_N_AXES] =
- {
- [PIVOT_AXIS_LAYER] = layer_cats,
- [PIVOT_AXIS_ROW] = row_cats,
- [PIVOT_AXIS_COLUMN] = column_cats,
- };
+ cat->type = CCT_NUMBER;
+ cat->number = n;
+ }
+ else if (!all_strings (vars, n_vars, cat))
+ goto error;
+ break;
- bool is_included = true;
+ case CCT_SRANGE:
+ if (parse_strings)
+ {
+ double n[2];
- for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
- {
- const struct ctables_nest *nest = s->nests[a];
- for (size_t i = 0; i < nest->n; i++)
- if (i != nest->scale_idx)
- {
- const struct variable *var = nest->vars[i];
- const union value *value = case_data (c, var);
+ if (!cat->srange[0].string)
+ n[0] = -DBL_MAX;
+ else if (!parse_category_string (cat->location,
+ cat->srange[0], dict,
+ common_format->type, &n[0]))
+ goto error;
- cats[a][i] = ctables_categories_match (
- s->table->categories[var_get_dict_index (var)], value, var);
- if (!cats[a][i])
- {
- if (i != nest->summary_idx)
- return;
+ if (!cat->srange[1].string)
+ n[1] = DBL_MAX;
+ else if (!parse_category_string (cat->location,
+ cat->srange[1], dict,
+ common_format->type, &n[1]))
+ goto error;
- if (!var_is_value_missing (var, value))
- return;
+ ss_dealloc (&cat->srange[0]);
+ ss_dealloc (&cat->srange[1]);
+
+ cat->type = CCT_NRANGE;
+ cat->nrange[0] = n[0];
+ cat->nrange[1] = n[1];
+ }
+ else if (!all_strings (vars, n_vars, cat))
+ goto error;
+ break;
- static const struct ctables_category cct_excluded_missing = {
- .type = CCT_EXCLUDED_MISSING,
- .hide = true,
- };
- cats[a][i] = &cct_excluded_missing;
- is_included = false;
- }
+ case CCT_MISSING:
+ case CCT_OTHERNM:
+ case CCT_SUBTOTAL:
+ case CCT_TOTAL:
+ case CCT_VALUE:
+ case CCT_LABEL:
+ case CCT_FUNCTION:
+ case CCT_EXCLUDED_MISSING:
+ break;
+ }
}
}
- if (is_included)
- for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
- {
- const struct ctables_nest *nest = s->nests[a];
- for (size_t i = 0; i < nest->n; i++)
- if (i != nest->scale_idx)
- {
- const struct variable *var = nest->vars[i];
- const union value *value = case_data (c, var);
- ctables_add_occurrence (var, value, &s->occurrences[a][i]);
- }
- }
+ free (vars);
+ return true;
- ctables_cell_add__ (s, c, cats, is_included, weight);
- recurse_totals (s, c, cats, is_included, weight, 0, 0);
- recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
+error:
+ free (vars);
+ return false;
}
+\f
struct merge_item
{
return strcmp (as_label, bs_label);
}
-static void
-ctables_category_format_number (double number, const struct variable *var,
- struct string *s)
-{
- struct pivot_value *pv = pivot_value_new_var_value (
- var, &(union value) { .f = number });
- pivot_value_format (pv, NULL, s);
- pivot_value_destroy (pv);
-}
-
-static void
-ctables_category_format_string (struct substring string,
- const struct variable *var, struct string *out)
-{
- int width = var_get_width (var);
- char *s = xmalloc (width);
- buf_copy_rpad (s, width, string.string, string.length, ' ');
- struct pivot_value *pv = pivot_value_new_var_value (
- var, &(union value) { .s = CHAR_CAST (uint8_t *, s) });
- pivot_value_format (pv, NULL, out);
- pivot_value_destroy (pv);
- free (s);
-}
-
-static bool
-ctables_category_format_label (const struct ctables_category *cat,
- const struct variable *var,
- struct string *s)
-{
- switch (cat->type)
- {
- case CCT_NUMBER:
- ctables_category_format_number (cat->number, var, s);
- return true;
-
- case CCT_STRING:
- ctables_category_format_string (cat->string, var, s);
- return true;
-
- case CCT_NRANGE:
- ctables_category_format_number (cat->nrange[0], var, s);
- ds_put_format (s, " THRU ");
- ctables_category_format_number (cat->nrange[1], var, s);
- return true;
-
- case CCT_SRANGE:
- ctables_category_format_string (cat->srange[0], var, s);
- ds_put_format (s, " THRU ");
- ctables_category_format_string (cat->srange[1], var, s);
- return true;
-
- case CCT_MISSING:
- ds_put_cstr (s, "MISSING");
- return true;
-
- case CCT_OTHERNM:
- ds_put_cstr (s, "OTHERNM");
- return true;
-
- case CCT_POSTCOMPUTE:
- ds_put_format (s, "&%s", cat->pc->name);
- return true;
-
- case CCT_TOTAL:
- case CCT_SUBTOTAL:
- ds_put_cstr (s, cat->total_label);
- return true;
-
- case CCT_VALUE:
- case CCT_LABEL:
- case CCT_FUNCTION:
- case CCT_EXCLUDED_MISSING:
- return false;
- }
-
- return false;
-}
-
-static struct pivot_value *
-ctables_postcompute_label (const struct ctables_categories *cats,
- const struct ctables_category *cat,
- const struct variable *var)
-{
- struct substring in = ss_cstr (cat->pc->label);
- struct substring target = ss_cstr (")LABEL[");
-
- struct string out = DS_EMPTY_INITIALIZER;
- for (;;)
- {
- size_t chunk = ss_find_substring (in, target);
- if (chunk == SIZE_MAX)
- {
- if (ds_is_empty (&out))
- return pivot_value_new_user_text (in.string, in.length);
- else
- {
- ds_put_substring (&out, in);
- return pivot_value_new_user_text_nocopy (ds_steal_cstr (&out));
- }
- }
-
- ds_put_substring (&out, ss_head (in, chunk));
- ss_advance (&in, chunk + target.length);
-
- struct substring idx_s;
- if (!ss_get_until (&in, ']', &idx_s))
- goto error;
- char *tail;
- long int idx = strtol (idx_s.string, &tail, 10);
- if (idx < 1 || idx > cats->n_cats || tail != ss_end (idx_s))
- goto error;
-
- struct ctables_category *cat2 = &cats->cats[idx - 1];
- if (!ctables_category_format_label (cat2, var, &out))
- goto error;
- }
-
-error:
- ds_destroy (&out);
- return pivot_value_new_user_text (cat->pc->label, SIZE_MAX);
-}
-
-static struct pivot_value *
-ctables_category_create_value_label (const struct ctables_categories *cats,
- const struct ctables_category *cat,
- const struct variable *var,
- const union value *value)
-{
- return (cat->type == CCT_POSTCOMPUTE && cat->pc->label
- ? ctables_postcompute_label (cats, cat, var)
- : cat->type == CCT_TOTAL || cat->type == CCT_SUBTOTAL
- ? pivot_value_new_user_text (cat->total_label, SIZE_MAX)
- : pivot_value_new_var_value (var, value));
-}
-
-static struct ctables_value *
-ctables_value_find__ (struct ctables_table *t, const union value *value,
- int width, unsigned int hash)
-{
- struct ctables_value *clv;
- HMAP_FOR_EACH_WITH_HASH (clv, struct ctables_value, node,
- hash, &t->clabels_values_map)
- if (value_equal (value, &clv->value, width))
- return clv;
- return NULL;
-}
-
-static void
-ctables_value_insert (struct ctables_table *t, const union value *value,
- int width)
-{
- unsigned int hash = value_hash (value, width, 0);
- struct ctables_value *clv = ctables_value_find__ (t, value, width, hash);
- if (!clv)
- {
- clv = xmalloc (sizeof *clv);
- value_clone (&clv->value, value, width);
- hmap_insert (&t->clabels_values_map, &clv->node, hash);
- }
-}
-
-static struct ctables_value *
-ctables_value_find (struct ctables_table *t,
- const union value *value, int width)
-{
- return ctables_value_find__ (t, value, width,
- value_hash (value, width, 0));
-}
-
static void
ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
size_t ix[PIVOT_N_AXES])
}
}
-static int
-compare_clabels_values_3way (const void *a_, const void *b_, const void *width_)
-{
- const struct ctables_value *const *ap = a_;
- const struct ctables_value *const *bp = b_;
- const struct ctables_value *a = *ap;
- const struct ctables_value *b = *bp;
- const int *width = width_;
- return value_compare_3way (&a->value, &b->value, *width);
-}
-
-static void
-ctables_sort_clabels_values (struct ctables_table *t)
-{
- const struct variable *v0 = t->clabels_example;
- int width = var_get_width (v0);
-
- struct ctables_categories *c0 = t->categories[var_get_dict_index (v0)];
- if (c0->show_empty)
- {
- const struct val_labs *val_labs = var_get_value_labels (v0);
- for (const struct val_lab *vl = val_labs_first (val_labs); vl;
- vl = val_labs_next (val_labs, vl))
- if (ctables_categories_match (c0, &vl->value, v0))
- ctables_value_insert (t, &vl->value, width);
- }
-
- size_t n = hmap_count (&t->clabels_values_map);
- t->clabels_values = xnmalloc (n, sizeof *t->clabels_values);
-
- struct ctables_value *clv;
- size_t i = 0;
- HMAP_FOR_EACH (clv, struct ctables_value, node, &t->clabels_values_map)
- t->clabels_values[i++] = clv;
- t->n_clabels_values = n;
- assert (i == n);
-
- sort (t->clabels_values, n, sizeof *t->clabels_values,
- compare_clabels_values_3way, &width);
-
- for (size_t i = 0; i < n; i++)
- t->clabels_values[i]->leaf = i;
-}
-
static void
ctables_add_category_occurrences (const struct variable *var,
struct hmap *occurrences,