#include <config.h>
+#include "data/casereader.h"
#include "data/dataset.h"
#include "data/dictionary.h"
#include "data/mrset.h"
#include "language/lexer/format-parser.h"
#include "language/lexer/lexer.h"
#include "language/lexer/variable-parser.h"
+#include "libpspp/array.h"
#include "libpspp/assertion.h"
#include "libpspp/hmap.h"
#include "libpspp/message.h"
+#include "libpspp/string-array.h"
#include "output/pivot-table.h"
#include "gl/minmax.h"
enum ctables_vlabel
{
- CTVL_DEFAULT = SETTINGS_VALUE_SHOW_DEFAULT,
+ CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
- CTVL_NONE,
};
-static void UNUSED
-ctables_vlabel_unique (enum ctables_vlabel vlabel)
-{
- /* This ensures that all of the values are unique. */
- switch (vlabel)
- {
- case CTVL_DEFAULT:
- case CTVL_NAME:
- case CTVL_LABEL:
- case CTVL_BOTH:
- case CTVL_NONE:
- abort ();
- }
-}
/* XXX:
- unweighted summaries (U*)
/* CTPO_CAT_RANGE.
XXX what about string ranges? */
- struct
- {
- double low; /* -DBL_MAX for LO. */
- double high; /* DBL_MAX for HIGH. */
- }
- range;
+ double range[2];
/* CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW. */
struct ctables_postcompute_expr *subs[2];
struct ctables_chisq *chisq;
struct ctables_pairwise *pairwise;
+
+ struct ctables_freqtab **fts;
+ size_t n_fts;
};
struct ctables_var
: var_get_print_format (var->var));
}
+static const char *
+ctables_var_name (const struct ctables_var *var)
+{
+ return var->is_mrset ? var->mrset->name : var_get_name (var->var);
+}
+
struct ctables_categories
{
size_t n_refs;
bool scale;
struct ctables_summary *summaries;
size_t n_summaries;
+ size_t allocated_summaries;
};
/* Nonterminals. */
struct ctables_table *t;
};
-static struct ctables_summary *
-add_summary (struct ctables_axis *axis, enum ctables_summary_function function,
- double percentile, size_t *allocated_summaries)
+static struct fmt_spec
+ctables_summary_default_format (enum ctables_summary_function function,
+ const struct ctables_var *var)
{
- if (axis->n_summaries >= *allocated_summaries)
- axis->summaries = x2nrealloc (axis->summaries, allocated_summaries,
- sizeof *axis->summaries);
-
- static const char *default_labels[] = {
-#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
- SUMMARIES
-#undef S
- };
- char *label = (function == CTSF_PTILE
- ? xasprintf (_("Percentile %.2f"), percentile)
- : xstrdup (gettext (default_labels[function])));
-
static const enum ctables_format default_formats[] = {
#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = FORMAT,
SUMMARIES
#undef S
};
- struct fmt_spec format;
switch (default_formats[function])
{
case CTF_COUNT:
- format = (struct fmt_spec) { .type = FMT_F, .w = 40 };
- break;
+ return (struct fmt_spec) { .type = FMT_F, .w = 40 };
case CTF_PERCENT:
- format = (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
- break;
+ return (struct fmt_spec) { .type = FMT_PCT, .w = 40, .d = 1 };
case CTF_GENERAL:
- format = *ctables_var_get_print_format (&axis->var);
- break;
+ return *ctables_var_get_print_format (var);
default:
NOT_REACHED ();
}
+}
- struct ctables_summary *s = &axis->summaries[axis->n_summaries++];
- *s = (struct ctables_summary) {
- .function = function,
- .percentile = percentile,
- .label = label,
- .format = format,
+static const char *
+ctables_summary_function_name (enum ctables_summary_function function)
+{
+ static const char *names[] = {
+#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = NAME,
+ SUMMARIES
+#undef S
};
- return s;
+ return names[function];
+}
+
+static bool
+add_summary (struct ctables_axis *axis,
+ enum ctables_summary_function function, double percentile,
+ const char *label, const struct fmt_spec *format,
+ const struct msg_location *loc)
+{
+ if (axis->op == CTAO_VAR)
+ {
+ if (axis->n_summaries >= axis->allocated_summaries)
+ axis->summaries = x2nrealloc (axis->summaries,
+ &axis->allocated_summaries,
+ sizeof *axis->summaries);
+
+ const char *function_name = ctables_summary_function_name (function);
+ const char *var_name = ctables_var_name (&axis->var);
+ switch (ctables_function_availability (function))
+ {
+ case CTFA_MRSETS:
+ if (!axis->var.is_mrset)
+ {
+ msg_at (SE, loc, _("Summary function %s applies only to multiple "
+ "response sets."), function_name);
+ msg_at (SN, axis->loc, _("'%s' is not a multiple response set."),
+ var_name);
+ return false;
+ }
+ break;
+
+ case CTFA_SCALE:
+ if (!axis->scale)
+ {
+ msg_at (SE, loc,
+ _("Summary function %s applies only to scale variables."),
+ function_name);
+ msg_at (SN, axis->loc, _("'%s' is not a scale variable."),
+ var_name);
+ return false;
+ }
+ break;
+
+ case CTFA_ALL:
+ break;
+ }
+
+ struct ctables_summary *dst = &axis->summaries[axis->n_summaries++];
+ *dst = (struct ctables_summary) {
+ .function = function,
+ .percentile = percentile,
+ .label = xstrdup (label),
+ .format = (format ? *format
+ : ctables_summary_default_format (function, &axis->var)),
+ };
+ return true;
+ }
+ else
+ {
+ for (size_t i = 0; i < 2; i++)
+ if (!add_summary (axis->subs[i], function, percentile, label, format,
+ loc))
+ return false;
+ return true;
+ }
}
static struct ctables_axis *ctables_axis_parse_stack (
: var_get_measure (var.var) == MEASURE_SCALE);
axis->loc = lex_ofs_location (ctx->lexer, start_ofs,
lex_ofs (ctx->lexer) - 1);
+ return axis;
+}
+
+static struct ctables_axis *
+ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
+{
+ struct ctables_axis *sub = ctables_axis_parse_primary (ctx);
+ if (!sub || !lex_match (ctx->lexer, T_LBRACK))
+ return sub;
- if (lex_match (ctx->lexer, T_LBRACK))
+ do
{
- size_t allocated_summaries = 0;
- do
+ int start_ofs = lex_ofs (ctx->lexer);
+
+ /* Parse function. */
+ enum ctables_summary_function function;
+ if (!parse_ctables_summary_function (ctx->lexer, &function))
+ goto error;
+
+ /* Parse percentile. */
+ double percentile = 0;
+ if (function == CTSF_PTILE)
{
- enum ctables_summary_function function;
- if (!parse_ctables_summary_function (ctx->lexer, &function))
+ if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
goto error;
+ percentile = lex_number (ctx->lexer);
+ lex_get (ctx->lexer);
+ }
- double percentile = 0;
- if (function == CTSF_PTILE)
- {
- if (!lex_force_num_range_closed (ctx->lexer, "PTILE", 0, 100))
- goto error;
- percentile = lex_number (ctx->lexer);
- lex_get (ctx->lexer);
- }
+ /* Parse label. */
+ char *label;
+ if (lex_is_string (ctx->lexer))
+ {
+ label = ss_xstrdup (lex_tokss (ctx->lexer));
+ lex_get (ctx->lexer);
+ }
+ else if (function == CTSF_PTILE)
+ label = xasprintf (_("Percentile %.2f"), percentile);
+ else
+ {
+ static const char *default_labels[] = {
+#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
+ SUMMARIES
+#undef S
+ };
+ label = xstrdup (gettext (default_labels[function]));
+ }
- struct ctables_summary *s = add_summary (axis, function, percentile,
- &allocated_summaries);
- if (lex_is_string (ctx->lexer))
- {
- free (s->label);
- s->label = ss_xstrdup (lex_tokss (ctx->lexer));
- lex_get (ctx->lexer);
- }
- if (lex_token (ctx->lexer) == T_ID)
+ /* Parse format. */
+ struct fmt_spec format;
+ const struct fmt_spec *formatp;
+ if (lex_token (ctx->lexer) == T_ID)
+ {
+ if (!parse_format_specifier (ctx->lexer, &format)
+ || !fmt_check_output (&format)
+ || !fmt_check_type_compat (&format, VAL_NUMERIC))
{
- if (!parse_format_specifier (ctx->lexer, &s->format)
- || !fmt_check_output (&s->format)
- || !fmt_check_type_compat (&s->format, VAL_NUMERIC))
- goto error;
+ free (label);
+ goto error;
}
- lex_match (ctx->lexer, T_COMMA);
+ formatp = &format;
}
- while (!lex_match (ctx->lexer, T_RBRACK));
+ else
+ formatp = NULL;
+
+ struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
+ lex_ofs (ctx->lexer) - 1);
+ add_summary (sub, function, percentile, label, formatp, loc);
+ free (label);
+ msg_location_destroy (loc);
+
+ lex_match (ctx->lexer, T_COMMA);
}
- return axis;
+ while (!lex_match (ctx->lexer, T_RBRACK));
+
+ return sub;
error:
- ctables_axis_destroy (axis);
+ ctables_axis_destroy (sub);
return NULL;
}
ctables_axis_parse_nest (struct ctables_axis_parse_ctx *ctx)
{
int start_ofs = lex_ofs (ctx->lexer);
- struct ctables_axis *lhs = ctables_axis_parse_primary (ctx);
+ struct ctables_axis *lhs = ctables_axis_parse_postfix (ctx);
if (!lhs)
return NULL;
while (lex_match (ctx->lexer, T_GT))
{
- struct ctables_axis *rhs = ctables_axis_parse_primary (ctx);
+ struct ctables_axis *rhs = ctables_axis_parse_postfix (ctx);
if (!rhs)
return NULL;
return true;
}
+struct var_array
+ {
+ struct variable **vars;
+ size_t n;
+ };
+
+static void
+var_array_uninit (struct var_array *va)
+{
+ if (va)
+ free (va->vars);
+}
+
+struct var_array2
+ {
+ struct var_array *vas;
+ size_t n;
+ };
+
+static void
+var_array2_uninit (struct var_array2 *vaa)
+{
+ if (vaa)
+ {
+ for (size_t i = 0; i < vaa->n; i++)
+ var_array_uninit (&vaa->vas[i]);
+ free (vaa->vas);
+ }
+}
+
+static struct var_array2
+nest_fts (struct var_array2 va0, struct var_array2 va1)
+{
+ if (!va0.n)
+ return va1;
+ else if (!va1.n)
+ return va0;
+
+ struct var_array2 vaa = { .vas = xnmalloc (va0.n, va1.n * sizeof *vaa.vas) };
+ for (size_t i = 0; i < va0.n; i++)
+ for (size_t j = 0; j < va1.n; j++)
+ {
+ size_t allocate = va0.vas[i].n + va1.vas[j].n;
+ struct variable **vars = xnmalloc (allocate, sizeof *vars);
+ size_t n = 0;
+ for (size_t k = 0; k < va0.vas[i].n; k++)
+ vars[n++] = va0.vas[i].vars[k];
+ for (size_t k = 0; k < va1.vas[j].n; k++)
+ vars[n++] = va1.vas[j].vars[k];
+ assert (n == allocate);
+
+ vaa.vas[vaa.n++] = (struct var_array) { .vars = vars, n = n };
+ }
+ var_array2_uninit (&va0);
+ var_array2_uninit (&va1);
+ return vaa;
+}
+
+static struct var_array2
+stack_fts (struct var_array2 va0, struct var_array2 va1)
+{
+ struct var_array2 vaa = { .vas = xnmalloc (va0.n + va1.n, sizeof *vaa.vas) };
+ for (size_t i = 0; i < va0.n; i++)
+ vaa.vas[vaa.n++] = va0.vas[i];
+ for (size_t i = 0; i < va1.n; i++)
+ vaa.vas[vaa.n++] = va1.vas[i];
+ assert (vaa.n == va0.n + va1.n);
+ free (va0.vas);
+ free (va1.vas);
+ return vaa;
+}
+
+static struct var_array2
+enumerate_fts (const struct ctables_axis *a)
+{
+ if (!a)
+ return (struct var_array2) { .n = 0 };
+
+ switch (a->op)
+ {
+ case CTAO_VAR:
+ assert (!a->var.is_mrset);
+ struct variable **v = xmalloc (sizeof *v);
+ *v = a->var.var;
+ struct var_array *va = xmalloc (sizeof *va);
+ *va = (struct var_array) { .vars = v, .n = 1 };
+ return (struct var_array2) { .vas = va, .n = 1 };
+
+ case CTAO_STACK:
+ return stack_fts (enumerate_fts (a->subs[0]),
+ enumerate_fts (a->subs[1]));
+
+ case CTAO_NEST:
+ return nest_fts (enumerate_fts (a->subs[0]),
+ enumerate_fts (a->subs[1]));
+ }
+
+ NOT_REACHED ();
+}
+
+struct ctables_freq
+ {
+ struct hmap_node node; /* Element in hash table. */
+ double count;
+ union value values[]; /* The value. */
+ };
+
+static struct ctables_freq *
+ctables_freq_allocate (size_t n_values)
+{
+ struct ctables_freq *f;
+ return xmalloc (sizeof *f + n_values * sizeof *f->values);
+}
+
+struct ctables_freqtab
+ {
+ struct var_array vars;
+ struct hmap data; /* Contains "struct ctables_freq"s. */
+ struct ctables_freq **sorted;
+ };
+
+static int
+ctables_freq_compare_3way (const void *a_, const void *b_, const void *vars_)
+{
+ const struct var_array *vars = vars_;
+ struct ctables_freq *const *a = a_;
+ struct ctables_freq *const *b = b_;
+
+ for (size_t i = 0; i < vars->n; i++)
+ {
+ int cmp = value_compare_3way (&(*a)->values[i], &(*b)->values[i],
+ var_get_width (vars->vars[i]));
+ if (cmp)
+ return cmp;
+ }
+ return 0;
+}
+
+static bool
+ctables_execute (struct dataset *ds, struct ctables *ct)
+{
+ for (size_t i = 0; i < ct->n_tables; i++)
+ {
+ size_t allocated_fts = 0;
+
+ struct ctables_table *t = &ct->tables[i];
+ struct var_array2 vaa = enumerate_fts (t->axes[PIVOT_AXIS_ROW]);
+ vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_COLUMN]));
+ vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_LAYER]));
+ for (size_t i = 0; i < vaa.n; i++)
+ {
+ for (size_t j = 0; j < vaa.vas[i].n; j++)
+ {
+ if (j)
+ fputs (", ", stdout);
+ fputs (var_get_name (vaa.vas[i].vars[j]), stdout);
+ }
+ putchar ('\n');
+ }
+
+ for (size_t j = 0; j < vaa.n; j++)
+ {
+ struct ctables_freqtab *ft = xmalloc (sizeof *ft);
+ *ft = (struct ctables_freqtab) {
+ .vars = vaa.vas[j],
+ .data = HMAP_INITIALIZER (ft->data),
+ };
+
+ if (t->n_fts >= allocated_fts)
+ t->fts = x2nrealloc (t->fts, &allocated_fts, sizeof *t->fts);
+ t->fts[t->n_fts++] = ft;
+ }
+
+ free (vaa.vas);
+ }
+
+ struct casereader *input = casereader_create_filter_weight (proc_open (ds),
+ dataset_dict (ds),
+ NULL, NULL);
+ bool warn_on_invalid = true;
+ for (struct ccase *c = casereader_read (input); c;
+ case_unref (c), c = casereader_read (input))
+ {
+ double weight = dict_get_case_weight (dataset_dict (ds), c,
+ &warn_on_invalid);
+
+ for (size_t i = 0; i < ct->n_tables; i++)
+ {
+ struct ctables_table *t = &ct->tables[i];
+
+ for (size_t j = 0; j < t->n_fts; j++)
+ {
+ struct ctables_freqtab *ft = t->fts[j];
+
+ for (size_t k = 0; k < ft->vars.n; k++)
+ {
+ const struct variable *var = ft->vars.vars[k];
+ switch (var_is_value_missing (var, case_data (c, var)))
+ {
+ case MV_SYSTEM:
+ goto next_ft;
+
+ case MV_USER:
+ if (!t->categories[var_get_dict_index (var)]
+ || !t->categories[var_get_dict_index (var)]->include_missing)
+ goto next_ft;
+ break;
+ }
+ }
+ size_t hash = 0;
+ for (size_t k = 0; k < ft->vars.n; k++)
+ {
+ const struct variable *var = ft->vars.vars[k];
+ hash = value_hash (case_data (c, var), var_get_width (var), hash);
+ }
+
+ struct ctables_freq *f;
+ HMAP_FOR_EACH_WITH_HASH (f, struct ctables_freq, node, hash, &ft->data)
+ {
+ for (size_t k = 0; k < ft->vars.n; k++)
+ {
+ const struct variable *var = ft->vars.vars[k];
+ if (!value_equal (case_data (c, var), &f->values[k],
+ var_get_width (var)))
+ goto next_hash_node;
+ }
+
+ f->count += weight;
+ goto next_ft;
+
+ next_hash_node: ;
+ }
+
+ f = ctables_freq_allocate (ft->vars.n);
+ f->count = weight;
+ for (size_t k = 0; k < ft->vars.n; k++)
+ {
+ const struct variable *var = ft->vars.vars[k];
+ value_clone (&f->values[k], case_data (c, var),
+ var_get_width (var));
+ }
+ hmap_insert (&ft->data, &f->node, hash);
+
+ next_ft: ;
+ }
+ }
+ }
+ casereader_destroy (input);
+
+ for (size_t i = 0; i < ct->n_tables; i++)
+ {
+ struct ctables_table *t = &ct->tables[i];
+
+ struct pivot_table *pt = pivot_table_create (N_("Custom Tables"));
+ struct pivot_dimension *d = pivot_dimension_create (
+ pt, PIVOT_AXIS_ROW, N_("Rows"));
+ for (size_t j = 0; j < t->n_fts; j++)
+ {
+ struct ctables_freqtab *ft = t->fts[j];
+ ft->sorted = xnmalloc (ft->data.count, sizeof *ft->sorted);
+
+ struct ctables_freq *f;
+ size_t n = 0;
+ HMAP_FOR_EACH (f, struct ctables_freq, node, &ft->data)
+ ft->sorted[n++] = f;
+ assert (n == ft->data.count);
+ sort (ft->sorted, n, sizeof *ft->sorted,
+ ctables_freq_compare_3way, &ft->vars);
+
+ struct pivot_category **groups = xnmalloc (ft->vars.n,
+ sizeof *groups);
+ for (size_t k = 0; k < n; k++)
+ {
+ struct ctables_freq *prev = k > 0 ? ft->sorted[k - 1] : NULL;
+ struct ctables_freq *f = ft->sorted[k];
+
+ size_t n_common = 0;
+ if (prev)
+ for (; n_common + 1 < ft->vars.n; n_common++)
+ if (!value_equal (&prev->values[n_common],
+ &f->values[n_common],
+ var_get_type (ft->vars.vars[n_common])))
+ break;
+
+ for (size_t m = n_common; m < ft->vars.n; m++)
+ {
+ struct pivot_category *parent = m > 0 ? groups[m - 1] : d->root;
+ const struct variable *var = ft->vars.vars[m];
+ enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (var)];
+
+ if (vlabel != CTVL_NONE)
+ parent = pivot_category_create_group__ (
+ parent, pivot_value_new_variable (ft->vars.vars[m]));
+
+ if (m + 1 < ft->vars.n)
+ parent = pivot_category_create_group__ (
+ parent,
+ pivot_value_new_var_value (ft->vars.vars[m], &f->values[m]));
+ groups[m] = parent;
+
+ if (m == ft->vars.n - 1)
+ {
+ int leaf = pivot_category_create_leaf (
+ parent,
+ pivot_value_new_var_value (ft->vars.vars[ft->vars.n - 1],
+ &f->values[ft->vars.n - 1]));
+ pivot_table_put1 (pt, leaf, pivot_value_new_number (f->count));
+ }
+ }
+ }
+ free (groups);
+ }
+ pivot_table_submit (pt);
+ }
+
+ for (size_t i = 0; i < ct->n_tables; i++)
+ {
+ struct ctables_table *t = &ct->tables[i];
+
+ for (size_t j = 0; j < t->n_fts; j++)
+ {
+ struct ctables_freqtab *ft = t->fts[j];
+ struct ctables_freq *f, *next;
+ HMAP_FOR_EACH_SAFE (f, next, struct ctables_freq, node, &ft->data)
+ {
+ hmap_delete (&ft->data, &f->node);
+ for (size_t k = 0; k < ft->vars.n; k++)
+ {
+ const struct variable *var = ft->vars.vars[k];
+ value_destroy (&f->values[k], var_get_width (var));
+ }
+ free (f);
+ }
+ hmap_destroy (&ft->data);
+ free (ft->sorted);
+ var_array_uninit (&ft->vars);
+ free (ft);
+ }
+ free (t->fts);
+ }
+
+ return proc_commit (ds);
+}
+
int
cmd_ctables (struct lexer *lexer, struct dataset *ds)
{
size_t n_vars = dict_get_n_vars (dataset_dict (ds));
enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
+ enum settings_value_show tvars = settings_get_show_variables ();
for (size_t i = 0; i < n_vars; i++)
- vlabels[i] = CTVL_DEFAULT;
+ vlabels[i] = (enum ctables_vlabel) tvars;
struct ctables *ct = xmalloc (sizeof *ct);
*ct = (struct ctables) {
enum ctables_vlabel vlabel;
if (lex_match_id (lexer, "DEFAULT"))
- vlabel = CTVL_DEFAULT;
+ vlabel = (enum ctables_vlabel) settings_get_show_variables ();
else if (lex_match_id (lexer, "NAME"))
vlabel = CTVL_NAME;
else if (lex_match_id (lexer, "LABEL"))
if (!lex_force_match (lexer, T_SLASH))
break;
- /* XXX Validate axes. */
while (!lex_match_id (lexer, "TABLE") && lex_token (lexer) != T_ENDCMD)
{
if (lex_match_id (lexer, "SLABELS"))
goto error;
}
}
+
+ if (t->row_labels != CTLP_NORMAL && t->col_labels != CTLP_NORMAL)
+ {
+ msg (SE, _("ROWLABELS and COLLABELS may not both be specified."));
+ goto error;
+ }
+
}
while (lex_token (lexer) != T_ENDCMD);
+
+ bool ok = ctables_execute (ds, ct);
ctables_destroy (ct);
- return CMD_SUCCESS;
+ return ok ? CMD_SUCCESS : CMD_FAILURE;
error:
ctables_destroy (ct);