#include <config.h>
+#include "data/casereader.h"
#include "data/dataset.h"
#include "data/dictionary.h"
#include "data/mrset.h"
#include "language/lexer/format-parser.h"
#include "language/lexer/lexer.h"
#include "language/lexer/variable-parser.h"
+#include "language/stats/freq.h"
#include "libpspp/assertion.h"
#include "libpspp/hmap.h"
#include "libpspp/message.h"
return true;
}
-struct ctables_freqtab
+struct var_array
{
struct variable **vars;
- size_t n_vars;
+ size_t n;
+ };
- struct hmap data; /* Contains "struct freq"s. */
+static void
+var_array_uninit (struct var_array *va)
+{
+ if (va)
+ free (va->vars);
+}
+
+struct var_array2
+ {
+ struct var_array *vas;
+ size_t n;
};
-static struct string_array
-nest_fts (struct string_array sa0, struct string_array sa1)
+static void
+var_array2_uninit (struct var_array2 *vaa)
{
- if (!sa0.n)
- return sa1;
- else if (!sa1.n)
- return sa0;
-
- struct string_array sa = STRING_ARRAY_INITIALIZER;
- for (size_t i = 0; i < sa0.n; i++)
- for (size_t j = 0; j < sa1.n; j++)
- string_array_append_nocopy (&sa, xasprintf ("%s, %s",
- sa0.strings[i],
- sa1.strings[j]));
- string_array_destroy (&sa0);
- string_array_destroy (&sa1);
- return sa;
+ if (vaa)
+ {
+ for (size_t i = 0; i < vaa->n; i++)
+ var_array_uninit (&vaa->vas[i]);
+ free (vaa->vas);
+ }
+}
+
+static struct var_array2
+nest_fts (struct var_array2 va0, struct var_array2 va1)
+{
+ if (!va0.n)
+ return va1;
+ else if (!va1.n)
+ return va0;
+
+ struct var_array2 vaa = { .vas = xnmalloc (va0.n, va1.n * sizeof *vaa.vas) };
+ for (size_t i = 0; i < va0.n; i++)
+ for (size_t j = 0; j < va1.n; j++)
+ {
+ size_t allocate = va0.vas[i].n + va1.vas[j].n;
+ struct variable **vars = xnmalloc (allocate, sizeof *vars);
+ size_t n = 0;
+ for (size_t k = 0; k < va0.vas[i].n; k++)
+ vars[n++] = va0.vas[i].vars[k];
+ for (size_t k = 0; k < va1.vas[j].n; k++)
+ vars[n++] = va1.vas[j].vars[k];
+ assert (n == allocate);
+
+ vaa.vas[vaa.n++] = (struct var_array) { .vars = vars, n = n };
+ }
+ var_array2_uninit (&va0);
+ var_array2_uninit (&va1);
+ return vaa;
}
-static struct string_array
+static struct var_array2
+stack_fts (struct var_array2 va0, struct var_array2 va1)
+{
+ struct var_array2 vaa = { .vas = xnmalloc (va0.n + va1.n, sizeof *vaa.vas) };
+ for (size_t i = 0; i < va0.n; i++)
+ vaa.vas[vaa.n++] = va0.vas[i];
+ for (size_t i = 0; i < va1.n; i++)
+ vaa.vas[vaa.n++] = va1.vas[i];
+ assert (vaa.n == va0.n + va1.n);
+ free (va0.vas);
+ free (va1.vas);
+ return vaa;
+}
+
+static struct var_array2
enumerate_fts (const struct ctables_axis *a)
{
- struct string_array sa = STRING_ARRAY_INITIALIZER;
if (!a)
- return sa;
+ return (struct var_array2) { .n = 0 };
switch (a->op)
{
case CTAO_VAR:
- string_array_append (&sa, ctables_var_name (&a->var));
- break;
+ assert (!a->var.is_mrset);
+ struct variable **v = xmalloc (sizeof *v);
+ *v = a->var.var;
+ struct var_array *va = xmalloc (sizeof *va);
+ *va = (struct var_array) { .vars = v, .n = 1 };
+ return (struct var_array2) { .vas = va, .n = 1 };
case CTAO_STACK:
- sa = enumerate_fts (a->subs[0]);
- struct string_array sa2 = enumerate_fts (a->subs[1]);
- for (size_t i = 0; i < sa2.n; i++)
- string_array_append_nocopy (&sa, sa2.strings[i]);
- free (sa2.strings);
- break;
+ return stack_fts (enumerate_fts (a->subs[0]),
+ enumerate_fts (a->subs[1]));
case CTAO_NEST:
return nest_fts (enumerate_fts (a->subs[0]),
enumerate_fts (a->subs[1]));
}
- return sa;
+
+ NOT_REACHED ();
}
-static void
-ctables_execute (struct ctables *ct)
+struct ctables_freqtab
+ {
+ struct var_array vars;
+ struct hmap data; /* Contains "struct freq"s. */
+ };
+
+static bool
+ctables_execute (struct dataset *ds, struct ctables *ct)
{
+ struct ctables_freqtab **fts = NULL;
+ size_t n_fts = 0;
+ size_t allocated_fts = 0;
for (size_t i = 0; i < ct->n_tables; i++)
{
struct ctables_table *t = &ct->tables[i];
- struct string_array sa = enumerate_fts (t->axes[PIVOT_AXIS_ROW]);
- sa = nest_fts (sa, enumerate_fts (t->axes[PIVOT_AXIS_COLUMN]));
- sa = nest_fts (sa, enumerate_fts (t->axes[PIVOT_AXIS_LAYER]));
- for (size_t i = 0; i < sa.n; i++)
- puts (sa.strings[i]);
- putc ('\n', stdout);
- string_array_destroy (&sa);
+ struct var_array2 vaa = enumerate_fts (t->axes[PIVOT_AXIS_ROW]);
+ vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_COLUMN]));
+ vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_LAYER]));
+ for (size_t i = 0; i < vaa.n; i++)
+ {
+ for (size_t j = 0; j < vaa.vas[i].n; j++)
+ {
+ if (j)
+ fputs (", ", stdout);
+ fputs (var_get_name (vaa.vas[i].vars[j]), stdout);
+ }
+ putchar ('\n');
+ }
+
+ for (size_t j = 0; j < vaa.n; j++)
+ {
+ if (n_fts >= allocated_fts)
+ fts = x2nrealloc (fts, &allocated_fts, sizeof *fts);
+
+ struct ctables_freqtab *ft = xmalloc (sizeof *ft);
+ *ft = (struct ctables_freqtab) {
+ .vars = vaa.vas[j],
+ .data = HMAP_INITIALIZER (ft->data),
+ };
+ fts[n_fts++] = ft;
+ }
+
+ free (vaa.vas);
}
+
+ struct casereader *input = casereader_create_filter_weight (proc_open (ds),
+ dataset_dict (ds),
+ NULL, NULL);
+ bool warn_on_invalid = true;
+ for (struct ccase *c = casereader_read (input); c;
+ case_unref (c), c = casereader_read (input))
+ {
+ double weight = dict_get_case_weight (dataset_dict (ds), c,
+ &warn_on_invalid);
+ for (size_t i = 0; i < n_fts; i++)
+ {
+ struct ctables_freqtab *ft = fts[i];
+
+ size_t hash = 0;
+
+ for (size_t j = 0; j < ft->vars.n; j++)
+ {
+ const struct variable *var = ft->vars.vars[j];
+ hash = value_hash (case_data (c, var), var_get_width (var), hash);
+ }
+
+ struct freq *f;
+ HMAP_FOR_EACH_WITH_HASH (f, struct freq, node, hash, &ft->data)
+ {
+ for (size_t j = 0; j < ft->vars.n; j++)
+ {
+ const struct variable *var = ft->vars.vars[j];
+ if (!value_equal (case_data (c, var), &f->values[j],
+ var_get_width (var)))
+ goto next_hash_node;
+ }
+
+ f->count += weight;
+ goto next_ft;
+
+ next_hash_node: ;
+ }
+
+ f = xmalloc (table_entry_size (ft->vars.n));
+ f->count = weight;
+ for (size_t j = 0; j < ft->vars.n; j++)
+ {
+ const struct variable *var = ft->vars.vars[j];
+ value_clone (&f->values[j], case_data (c, var),
+ var_get_width (var));
+ }
+ hmap_insert (&ft->data, &f->node, hash);
+
+ next_ft: ;
+ }
+ }
+ casereader_destroy (input);
+
+ for (size_t i = 0; i < n_fts; i++)
+ {
+ struct ctables_freqtab *ft = fts[i];
+ struct freq *f, *next;
+ HMAP_FOR_EACH_SAFE (f, next, struct freq, node, &ft->data)
+ {
+ hmap_delete (&ft->data, &f->node);
+ for (size_t j = 0; j < ft->vars.n; j++)
+ {
+ const struct variable *var = ft->vars.vars[j];
+ value_destroy (&f->values[j], var_get_width (var));
+ }
+ free (f);
+ }
+ hmap_destroy (&ft->data);
+ var_array_uninit (&ft->vars);
+ free (ft);
+ }
+ free (fts);
+
+ return proc_commit (ds);
}
int
}
while (lex_token (lexer) != T_ENDCMD);
- ctables_execute (ct);
+ bool ok = ctables_execute (ds, ct);
ctables_destroy (ct);
- return CMD_SUCCESS;
+ return ok ? CMD_SUCCESS : CMD_FAILURE;
error:
ctables_destroy (ct);