#include "language/lexer/variable-parser.h"
#include "libpspp/array.h"
#include "libpspp/assertion.h"
+#include "libpspp/hash-functions.h"
#include "libpspp/hmap.h"
#include "libpspp/message.h"
#include "libpspp/string-array.h"
struct variable *base_weight; /* WEIGHT. */
int hide_threshold; /* HIDESMALLCOUNTS. */
- struct ctables_table *tables;
+ struct ctables_table **tables;
size_t n_tables;
};
CTLP_LAYER,
};
+struct var_array
+ {
+ const struct ctables_axis *summary;
+ struct variable **vars;
+ size_t n;
+ };
+
+struct var_array2
+ {
+ struct var_array *vas;
+ size_t n;
+ };
+
struct ctables_table
{
struct ctables_axis *axes[PIVOT_N_AXES];
+ struct var_array2 vaas[PIVOT_N_AXES];
+ struct hmap ft;
enum pivot_axis_type slabels_position;
bool slabels_visible;
struct ctables_chisq *chisq;
struct ctables_pairwise *pairwise;
- struct ctables_freqtab **fts;
- size_t n_fts;
};
struct ctables_var
}
static void
-ctables_table_uninit (struct ctables_table *t)
+ctables_table_destroy (struct ctables_table *t)
{
if (!t)
return;
free (t->title);
ctables_chisq_destroy (t->chisq);
ctables_pairwise_destroy (t->pairwise);
+ free (t);
}
static void
free (ct->missing);
free (ct->vlabels);
for (size_t i = 0; i < ct->n_tables; i++)
- ctables_table_uninit (&ct->tables[i]);
+ ctables_table_destroy (ct->tables[i]);
free (ct->tables);
free (ct);
}
return true;
}
-struct var_array
- {
- const struct ctables_axis *summary;
- struct variable **vars;
- enum pivot_axis_type *axes;
- size_t n;
- };
-
static void
var_array_uninit (struct var_array *va)
{
if (va)
- {
- free (va->vars);
- free (va->axes);
- }
+ free (va->vars);
}
-struct var_array2
- {
- struct var_array *vas;
- size_t n;
- };
-
static void
var_array2_uninit (struct var_array2 *vaa)
{
enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
size_t n = 0;
for (size_t k = 0; k < a->n; k++)
- {
- vars[n] = a->vars[k];
- axes[n] = a->axes[k];
- n++;
- }
+ vars[n++] = a->vars[k];
for (size_t k = 0; k < b->n; k++)
- {
- vars[n] = b->vars[k];
- axes[n] = b->axes[k];
- n++;
- }
+ vars[n++] = b->vars[k];
assert (n == allocate);
assert (!(a->summary && b->summary));
vaa.vas[vaa.n++] = (struct var_array) {
.summary = a->summary ? a->summary : b->summary,
.vars = vars,
- .axes = axes,
.n = n
};
}
*vars = a->var.var;
enum pivot_axis_type *axes = xmalloc (sizeof *axes);
*axes = axis_type;
- *va = (struct var_array) { .vars = vars, .axes = axes, .n = 1 };
+ *va = (struct var_array) { .vars = vars, .n = 1 };
}
va->summary = a->scale || a->n_summaries ? a : NULL;
return (struct var_array2) { .vas = va, .n = 1 };
/* XXX percentiles, median, mode, multiple response */
};
+#if 0
static void
ctables_summary_init (union ctables_summary *s,
const struct ctables_summary_spec *ss)
}
}
-
-static double UNUSED
+static double
ctables_summary_value (union ctables_summary *s,
const struct ctables_summary_spec *ss)
{
NOT_REACHED ();
}
+#endif
struct ctables_freq
{
struct hmap_node node; /* Element in hash table. */
- union ctables_summary *summaries;
- union value values[]; /* The value. */
- };
-struct ctables_freqtab
- {
- struct var_array vars;
- struct hmap data; /* Contains "struct ctables_freq"s. */
- const struct ctables_summary_spec *summaries;
- size_t n_summaries;
- const struct variable *summary_var;
+ struct
+ {
+ size_t vaa_idx;
+ union value *values;
+ }
+ axes[PIVOT_N_AXES];
+
+ //union ctables_summary *summaries;
+ double count;
};
+#if 0
static struct ctables_freq *
ctables_freq_create (struct ctables_freqtab *ft)
{
ctables_summary_add (&f->summaries[i], &ft->summaries[i],
var, value, weight);
}
+#endif
-struct ctables_axis_render
+struct ctables_freq_sort_aux
{
- size_t ofs;
- struct variable **vars;
- size_t n_vars;
-
- struct ctables_freq **freqs;
- size_t n_freqs;
+ const struct ctables_table *t;
+ enum pivot_axis_type a;
};
static int
-ctables_freq_compare_3way (const void *a_, const void *b_, const void *ar_)
+ctables_freq_compare_3way (const void *a_, const void *b_, const void *aux_)
{
- const struct ctables_axis_render *ar = ar_;
- struct ctables_freq *const *a = a_;
- struct ctables_freq *const *b = b_;
-
- for (size_t i = 0; i < ar->n_vars; i++)
+ const struct ctables_freq_sort_aux *aux = aux_;
+ struct ctables_freq *const *ap = a_;
+ struct ctables_freq *const *bp = b_;
+ const struct ctables_freq *a = *ap;
+ const struct ctables_freq *b = *bp;
+
+ size_t a_idx = a->axes[aux->a].vaa_idx;
+ size_t b_idx = b->axes[aux->a].vaa_idx;
+ if (a_idx != b_idx)
+ return a_idx < b_idx ? -1 : 1;
+
+ const struct var_array *va = &aux->t->vaas[aux->a].vas[a_idx];
+ for (size_t i = 0; i < va->n; i++)
{
- int cmp = value_compare_3way (&(*a)->values[i + ar->ofs],
- &(*b)->values[i + ar->ofs],
- var_get_width (ar->vars[i]));
+ int cmp = value_compare_3way (&a->axes[aux->a].values[i],
+ &b->axes[aux->a].values[i],
+ var_get_width (va->vars[i]));
if (cmp)
return cmp;
}
return 0;
}
-static bool
-ctables_execute (struct dataset *ds, struct ctables *ct)
+/* Algorithm:
+
+ For each row:
+ For each ctables_table:
+ For each combination of row vars:
+ For each combination of column vars:
+ For each combination of layer vars:
+ Add entry
+ Make a table of row values:
+ Sort entries by row values
+ Assign a 0-based index to each actual value
+ Construct a dimension
+ Make a table of column values
+ Make a table of layer values
+ For each entry:
+ Fill the table entry using the indexes from before.
+ */
+
+static void
+ctables_freqtab_insert (struct ctables_table *t,
+ const struct ccase *c,
+ size_t ir, size_t ic, size_t il,
+ double weight)
{
- for (size_t i = 0; i < ct->n_tables; i++)
+ size_t ix[PIVOT_N_AXES] = {
+ [PIVOT_AXIS_ROW] = ir,
+ [PIVOT_AXIS_COLUMN] = ic,
+ [PIVOT_AXIS_LAYER] = il,
+ };
+
+ size_t hash = 0;
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
- size_t allocated_fts = 0;
+ const struct var_array *va = &t->vaas[a].vas[ix[a]];
+ hash = hash_int (ix[a], hash);
+ for (size_t i = 0; i < va->n; i++)
+ hash = value_hash (case_data (c, va->vars[i]),
+ var_get_width (va->vars[i]), hash);
+ }
- struct ctables_table *t = &ct->tables[i];
- struct var_array2 vaa = { .n = 0 };
+ struct ctables_freq *f;
+ HMAP_FOR_EACH_WITH_HASH (f, struct ctables_freq, node, hash, &t->ft)
+ {
for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
- vaa = nest_fts (vaa, enumerate_fts (a, t->axes[a]));
- for (size_t i = 0; i < vaa.n; i++)
{
- for (size_t j = 0; j < vaa.vas[i].n; j++)
- {
- if (j)
- fputs (", ", stdout);
- printf ("%s (%c)", var_get_name (vaa.vas[i].vars[j]),
- vaa.vas[i].axes[j] == PIVOT_AXIS_ROW ? 'r'
- : vaa.vas[i].axes[j] == PIVOT_AXIS_COLUMN ? 'c'
- : vaa.vas[i].axes[j] == PIVOT_AXIS_LAYER ? 'l'
- : '?');
- }
- putchar ('\n');
+ const struct var_array *va = &t->vaas[a].vas[ix[a]];
+ if (f->axes[a].vaa_idx != ix[a])
+ goto not_equal;
+ for (size_t i = 0; i < va->n; i++)
+ if (!value_equal (case_data (c, va->vars[i]),
+ &f->axes[a].values[i],
+ var_get_width (va->vars[i])))
+ goto not_equal;
}
- for (size_t j = 0; j < vaa.n; j++)
- {
- const struct var_array *va = &vaa.vas[j];
- const struct ctables_summary_spec *summaries;
- size_t n_summaries;
- const struct variable *summary_var;
- if (!va->summary)
- {
- static const struct ctables_summary_spec count = {
- .function = CTSF_COUNT,
- .label = (char *) N_("Count"),
- .format = { .type = FMT_F, .w = 40 },
- };
- summaries = &count;
- n_summaries = 1;
- summary_var = va->vars[0];
- }
- else if (va->summary->n_summaries)
- {
- summaries = va->summary->summaries;
- n_summaries = va->summary->n_summaries;
- summary_var = va->summary->var.var;
- }
- else
- {
- static const struct ctables_summary_spec mean = {
- .function = CTSF_MEAN,
- .label = (char *) N_("Mean"),
- .format = { .type = FMT_F, .w = 40, .d = 2}, /* XXX */
- };
- summaries = &mean;
- n_summaries = 1;
- summary_var = va->summary->var.var;
- };
-
- struct ctables_freqtab *ft = xmalloc (sizeof *ft);
- *ft = (struct ctables_freqtab) {
- .vars = *va,
- .summaries = summaries,
- .n_summaries = n_summaries,
- .summary_var = summary_var,
- .data = HMAP_INITIALIZER (ft->data),
- };
+ f->count += weight;
+ return;
- if (t->n_fts >= allocated_fts)
- t->fts = x2nrealloc (t->fts, &allocated_fts, sizeof *t->fts);
- t->fts[t->n_fts++] = ft;
- }
+ not_equal: ;
+ }
+
+ f = xmalloc (sizeof *f);
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ {
+ const struct var_array *va = &t->vaas[a].vas[ix[a]];
+ f->axes[a].vaa_idx = ix[a];
+ f->axes[a].values = (va->n
+ ? xnmalloc (va->n, sizeof *f->axes[a].values)
+ : NULL);
+ for (size_t i = 0; i < va->n; i++)
+ value_clone (&f->axes[a].values[i], case_data (c, va->vars[i]),
+ var_get_width (va->vars[i]));
+ }
+ f->count = weight;
+ hmap_insert (&t->ft, &f->node, hash);
+}
- free (vaa.vas);
+static bool
+ctables_execute (struct dataset *ds, struct ctables *ct)
+{
+ for (size_t i = 0; i < ct->n_tables; i++)
+ {
+ struct ctables_table *t = ct->tables[i];
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ if (t->axes[a])
+ t->vaas[a] = enumerate_fts (a, t->axes[a]);
+ else
+ {
+ struct var_array *va = xmalloc (sizeof *va);
+ *va = (struct var_array) { .n = 0 };
+ t->vaas[a] = (struct var_array2) { .vas = va, .n = 1 };
+ }
}
struct casereader *input = casereader_create_filter_weight (proc_open (ds),
for (size_t i = 0; i < ct->n_tables; i++)
{
- struct ctables_table *t = &ct->tables[i];
+ struct ctables_table *t = ct->tables[i];
- for (size_t j = 0; j < t->n_fts; j++)
- {
- struct ctables_freqtab *ft = t->fts[j];
-
- for (size_t k = 0; k < ft->vars.n; k++)
- {
- const struct variable *var = ft->vars.vars[k];
- switch (var_is_value_missing (var, case_data (c, var)))
- {
- case MV_SYSTEM:
- goto next_ft;
-
- case MV_USER:
- if (!t->categories[var_get_dict_index (var)]
- || !t->categories[var_get_dict_index (var)]->include_missing)
- goto next_ft;
- break;
- }
- }
- size_t hash = 0;
- for (size_t k = 0; k < ft->vars.n; k++)
- {
- const struct variable *var = ft->vars.vars[k];
- hash = value_hash (case_data (c, var), var_get_width (var), hash);
- }
-
- struct ctables_freq *f;
- HMAP_FOR_EACH_WITH_HASH (f, struct ctables_freq, node, hash, &ft->data)
- {
- for (size_t k = 0; k < ft->vars.n; k++)
- {
- const struct variable *var = ft->vars.vars[k];
- if (!value_equal (case_data (c, var), &f->values[k],
- var_get_width (var)))
- goto next_hash_node;
- }
- goto found;
-
- next_hash_node: ;
- }
-
- f = ctables_freq_create (ft);
- for (size_t k = 0; k < ft->vars.n; k++)
- {
- const struct variable *var = ft->vars.vars[k];
- value_clone (&f->values[k], case_data (c, var),
- var_get_width (var));
- }
- hmap_insert (&ft->data, &f->node, hash);
-
- found:
- ctables_freq_add (ft, f, ft->summary_var,
- case_data (c, ft->summary_var), weight);
-
- next_ft: ;
- }
+ for (size_t ir = 0; ir < t->vaas[PIVOT_AXIS_ROW].n; ir++)
+ for (size_t ic = 0; ic < t->vaas[PIVOT_AXIS_COLUMN].n; ic++)
+ for (size_t il = 0; il < t->vaas[PIVOT_AXIS_LAYER].n; il++)
+ ctables_freqtab_insert (t, c, ir, ic, il, weight);
}
}
casereader_destroy (input);
for (size_t i = 0; i < ct->n_tables; i++)
{
- struct ctables_table *t = &ct->tables[i];
+ struct ctables_table *t = ct->tables[i];
struct pivot_table *pt = pivot_table_create (N_("Custom Tables"));
pivot_table_set_look (pt, ct->look);
[PIVOT_AXIS_COLUMN] = N_("Columns"),
[PIVOT_AXIS_LAYER] = N_("Layers"),
};
- d[a] = (t->axes[a] || a == t->slabels_position
+ d[a] = (t->axes[a]
? pivot_dimension_create (pt, a, names[a])
: NULL);
- }
- for (size_t j = 0; j < t->n_fts; j++)
- {
- struct ctables_freqtab *ft = t->fts[j];
+ if (!d[a])
+ continue;
- struct ctables_axis_render axis_renders[PIVOT_N_AXES];
+ struct ctables_freq **sorted = xnmalloc (t->ft.count, sizeof *sorted);
- for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
- {
- size_t ofs = 0;
- struct variable **vars = NULL;
- size_t n = 0;
- for (size_t k = 0; k < ft->vars.n; k++)
- if (ft->vars.axes[k] == a)
- {
- ofs = k;
- vars = &ft->vars.vars[k];
- for (n = 1; k + n < ft->vars.n; n++)
- if (ft->vars.axes[k + n] != a)
- break;
- break;
- }
-
- struct ctables_axis_render *ar = &axis_renders[a];
- *ar = (struct ctables_axis_render) {
- .ofs = ofs,
- .vars = vars,
- .n_vars = n
- };
- printf ("axis %s: %zu variables\n", pivot_axis_type_to_string (a), n);
- if (!n)
- continue;
-
- ar->freqs = xnmalloc (ft->data.count, sizeof *ar->freqs);
-
- struct ctables_freq *f;
- size_t n_freqs = 0;
- HMAP_FOR_EACH (f, struct ctables_freq, node, &ft->data)
- ar->freqs[n_freqs++] = f;
- assert (n_freqs == ft->data.count);
- ar->n_freqs = sort_unique (ar->freqs, n_freqs, sizeof *ar->freqs,
- ctables_freq_compare_3way, ar);
- }
+ struct ctables_freq *f;
+ size_t n = 0;
+ HMAP_FOR_EACH (f, struct ctables_freq, node, &t->ft)
+ sorted[n++] = f;
+ assert (n == t->ft.count);
+
+ struct ctables_freq_sort_aux aux = { .t = t, .a = a };
+ n = sort_unique (sorted, n, sizeof *sorted, ctables_freq_compare_3way, &aux);
- for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ size_t max_depth = 0;
+ for (size_t j = 0; j < t->vaas[a].n; j++)
+ if (t->vaas[a].vas[j].n > max_depth)
+ max_depth = t->vaas[a].vas[j].n;
+
+ struct pivot_category **groups = xnmalloc (max_depth, sizeof *groups);
+ struct pivot_category *top = NULL;
+ for (size_t j = 0; j < n; j++)
{
- struct ctables_axis_render *ar = &axis_renders[a];
- if (!ar->n_vars && a != t->slabels_position)
- continue;
+ struct ctables_freq *f = sorted[j];
+ const struct var_array *va = &t->vaas[a].vas[f->axes[a].vaa_idx];
+
+ size_t n_common = 0;
+ bool new_subtable = false;
+ if (j > 0)
+ {
+ struct ctables_freq *prev = sorted[j - 1];
+ if (prev->axes[a].vaa_idx == f->axes[a].vaa_idx)
+ {
+ for (; n_common < va->n; n_common++)
+ if (!value_equal (&prev->axes[a].values[n_common],
+ &f->axes[a].values[n_common],
+ var_get_type (va->vars[n_common])))
+ break;
+ }
+ else
+ new_subtable = true;
+ }
+ else
+ new_subtable = true;
+ if (new_subtable)
+ top = pivot_category_create_group__ (
+ d[a]->root, pivot_value_new_variable (va->vars[0]));
+ printf ("n_common=%zu\n", n_common);
- struct pivot_category **groups = xnmalloc (ar->n_vars,
- sizeof *groups);
- for (size_t k = 0; k < ar->n_freqs; k++)
+ for (size_t k = n_common; k < va->n; k++)
{
- struct ctables_freq *prev = k > 0 ? ar->freqs[k - 1] : NULL;
- struct ctables_freq *f = ar->freqs[k];
-
- size_t n_common = 0;
- if (prev)
- for (; n_common + 1 < ar->n_vars; n_common++)
- if (!value_equal (&prev->values[ar->ofs + n_common],
- &f->values[ar->ofs + n_common],
- var_get_type (ar->vars[n_common])))
- break;
-
- for (size_t m = n_common; m < ar->n_vars; m++)
+ struct pivot_category *parent = k > 0 ? groups[k - 1] : top;
+
+ if (k == va->n - 1)
{
- struct pivot_category *parent = m > 0 ? groups[m - 1] : d[a]->root;
- const struct variable *var = ar->vars[m];
- enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (var)];
+ pivot_category_create_leaf (
+ parent,
+ pivot_value_new_var_value (va->vars[va->n - 1],
+ &f->axes[a].values[va->n - 1]));
+ break;
+ }
- if (vlabel != CTVL_NONE)
- parent = pivot_category_create_group__ (
- parent, pivot_value_new_variable (ar->vars[m]));
+ parent = pivot_category_create_group__ (
+ parent,
+ pivot_value_new_var_value (va->vars[k], &f->axes[a].values[k]));
- if (m + 1 < ar->n_vars)
- parent = pivot_category_create_group__ (
- parent,
- pivot_value_new_var_value (ar->vars[m], &f->values[m]));
- groups[m] = parent;
+ parent = pivot_category_create_group__ (
+ parent, pivot_value_new_variable (va->vars[k]));
+ groups[k] = parent;
- if (m == ar->n_vars - 1)
- {
- pivot_category_create_leaf (
- parent,
- pivot_value_new_var_value (ar->vars[ar->n_vars - 1],
- &f->values[ar->ofs + ar->n_vars - 1]));
#if 0
- for (size_t p = 0; p < ft->n_summaries; p++)
- {
- if (a == t->slabels_position)
- pivot_category_create_leaf (
- c, pivot_value_new_text (ft->summaries[p].label));
- //pivot_table_put1 (pt, leaf, pivot_value_new_number (value));
- }
-#endif
+ for (size_t p = 0; p < ft->n_summaries; p++)
+ {
+ if (a == t->slabels_position)
+ pivot_category_create_leaf (
+ c, pivot_value_new_text (ft->summaries[p].label));
+ //pivot_table_put1 (pt, leaf, pivot_value_new_number (value));
}
+#endif
}
}
- free (groups);
- }
+ free (groups);
}
pivot_table_submit (pt);
}
+#if 0
for (size_t i = 0; i < ct->n_tables; i++)
{
- struct ctables_table *t = &ct->tables[i];
+ struct ctables_table *t = ct->tables[i];
for (size_t j = 0; j < t->n_fts; j++)
{
}
free (t->fts);
}
-
+#endif
+
return proc_commit (ds);
}
ct->tables = x2nrealloc (ct->tables, &allocated_tables,
sizeof *ct->tables);
- struct ctables_table *t = &ct->tables[ct->n_tables++];
+ struct ctables_table *t = xmalloc (sizeof *t);
*t = (struct ctables_table) {
+ .ft = HMAP_INITIALIZER (t->ft),
.slabels_position = PIVOT_AXIS_COLUMN,
.slabels_visible = true,
.row_labels = CTLP_NORMAL,
.n_categories = dict_get_n_vars (dataset_dict (ds)),
.cilevel = 95,
};
+ ct->tables[ct->n_tables++] = t;
lex_match (lexer, T_EQUALS);
if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))