From: Ben Pfaff Date: Wed, 29 Dec 2021 22:46:47 +0000 (-0800) Subject: now builds frequency tables X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=d8bb2f81d40557f9cf3bc92fcc803d82a820da87 now builds frequency tables --- diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index d71754d01e..563abdf7f1 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -16,6 +16,7 @@ #include +#include "data/casereader.h" #include "data/dataset.h" #include "data/dictionary.h" #include "data/mrset.h" @@ -23,6 +24,7 @@ #include "language/lexer/format-parser.h" #include "language/lexer/lexer.h" #include "language/lexer/variable-parser.h" +#include "language/stats/freq.h" #include "libpspp/assertion.h" #include "libpspp/hmap.h" #include "libpspp/message.h" @@ -1248,75 +1250,225 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, return true; } -struct ctables_freqtab +struct var_array { struct variable **vars; - size_t n_vars; + size_t n; + }; - struct hmap data; /* Contains "struct freq"s. */ +static void +var_array_uninit (struct var_array *va) +{ + if (va) + free (va->vars); +} + +struct var_array2 + { + struct var_array *vas; + size_t n; }; -static struct string_array -nest_fts (struct string_array sa0, struct string_array sa1) +static void +var_array2_uninit (struct var_array2 *vaa) { - if (!sa0.n) - return sa1; - else if (!sa1.n) - return sa0; - - struct string_array sa = STRING_ARRAY_INITIALIZER; - for (size_t i = 0; i < sa0.n; i++) - for (size_t j = 0; j < sa1.n; j++) - string_array_append_nocopy (&sa, xasprintf ("%s, %s", - sa0.strings[i], - sa1.strings[j])); - string_array_destroy (&sa0); - string_array_destroy (&sa1); - return sa; + if (vaa) + { + for (size_t i = 0; i < vaa->n; i++) + var_array_uninit (&vaa->vas[i]); + free (vaa->vas); + } +} + +static struct var_array2 +nest_fts (struct var_array2 va0, struct var_array2 va1) +{ + if (!va0.n) + return va1; + else if (!va1.n) + return va0; + + struct var_array2 vaa = { .vas = xnmalloc (va0.n, va1.n * sizeof *vaa.vas) }; + for (size_t i = 0; i < va0.n; i++) + for (size_t j = 0; j < va1.n; j++) + { + size_t allocate = va0.vas[i].n + va1.vas[j].n; + struct variable **vars = xnmalloc (allocate, sizeof *vars); + size_t n = 0; + for (size_t k = 0; k < va0.vas[i].n; k++) + vars[n++] = va0.vas[i].vars[k]; + for (size_t k = 0; k < va1.vas[j].n; k++) + vars[n++] = va1.vas[j].vars[k]; + assert (n == allocate); + + vaa.vas[vaa.n++] = (struct var_array) { .vars = vars, n = n }; + } + var_array2_uninit (&va0); + var_array2_uninit (&va1); + return vaa; } -static struct string_array +static struct var_array2 +stack_fts (struct var_array2 va0, struct var_array2 va1) +{ + struct var_array2 vaa = { .vas = xnmalloc (va0.n + va1.n, sizeof *vaa.vas) }; + for (size_t i = 0; i < va0.n; i++) + vaa.vas[vaa.n++] = va0.vas[i]; + for (size_t i = 0; i < va1.n; i++) + vaa.vas[vaa.n++] = va1.vas[i]; + assert (vaa.n == va0.n + va1.n); + free (va0.vas); + free (va1.vas); + return vaa; +} + +static struct var_array2 enumerate_fts (const struct ctables_axis *a) { - struct string_array sa = STRING_ARRAY_INITIALIZER; if (!a) - return sa; + return (struct var_array2) { .n = 0 }; switch (a->op) { case CTAO_VAR: - string_array_append (&sa, ctables_var_name (&a->var)); - break; + assert (!a->var.is_mrset); + struct variable **v = xmalloc (sizeof *v); + *v = a->var.var; + struct var_array *va = xmalloc (sizeof *va); + *va = (struct var_array) { .vars = v, .n = 1 }; + return (struct var_array2) { .vas = va, .n = 1 }; case CTAO_STACK: - sa = enumerate_fts (a->subs[0]); - struct string_array sa2 = enumerate_fts (a->subs[1]); - for (size_t i = 0; i < sa2.n; i++) - string_array_append_nocopy (&sa, sa2.strings[i]); - free (sa2.strings); - break; + return stack_fts (enumerate_fts (a->subs[0]), + enumerate_fts (a->subs[1])); case CTAO_NEST: return nest_fts (enumerate_fts (a->subs[0]), enumerate_fts (a->subs[1])); } - return sa; + + NOT_REACHED (); } -static void -ctables_execute (struct ctables *ct) +struct ctables_freqtab + { + struct var_array vars; + struct hmap data; /* Contains "struct freq"s. */ + }; + +static bool +ctables_execute (struct dataset *ds, struct ctables *ct) { + struct ctables_freqtab **fts = NULL; + size_t n_fts = 0; + size_t allocated_fts = 0; for (size_t i = 0; i < ct->n_tables; i++) { struct ctables_table *t = &ct->tables[i]; - struct string_array sa = enumerate_fts (t->axes[PIVOT_AXIS_ROW]); - sa = nest_fts (sa, enumerate_fts (t->axes[PIVOT_AXIS_COLUMN])); - sa = nest_fts (sa, enumerate_fts (t->axes[PIVOT_AXIS_LAYER])); - for (size_t i = 0; i < sa.n; i++) - puts (sa.strings[i]); - putc ('\n', stdout); - string_array_destroy (&sa); + struct var_array2 vaa = enumerate_fts (t->axes[PIVOT_AXIS_ROW]); + vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_COLUMN])); + vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_LAYER])); + for (size_t i = 0; i < vaa.n; i++) + { + for (size_t j = 0; j < vaa.vas[i].n; j++) + { + if (j) + fputs (", ", stdout); + fputs (var_get_name (vaa.vas[i].vars[j]), stdout); + } + putchar ('\n'); + } + + for (size_t j = 0; j < vaa.n; j++) + { + if (n_fts >= allocated_fts) + fts = x2nrealloc (fts, &allocated_fts, sizeof *fts); + + struct ctables_freqtab *ft = xmalloc (sizeof *ft); + *ft = (struct ctables_freqtab) { + .vars = vaa.vas[j], + .data = HMAP_INITIALIZER (ft->data), + }; + fts[n_fts++] = ft; + } + + free (vaa.vas); } + + struct casereader *input = casereader_create_filter_weight (proc_open (ds), + dataset_dict (ds), + NULL, NULL); + bool warn_on_invalid = true; + for (struct ccase *c = casereader_read (input); c; + case_unref (c), c = casereader_read (input)) + { + double weight = dict_get_case_weight (dataset_dict (ds), c, + &warn_on_invalid); + for (size_t i = 0; i < n_fts; i++) + { + struct ctables_freqtab *ft = fts[i]; + + size_t hash = 0; + + for (size_t j = 0; j < ft->vars.n; j++) + { + const struct variable *var = ft->vars.vars[j]; + hash = value_hash (case_data (c, var), var_get_width (var), hash); + } + + struct freq *f; + HMAP_FOR_EACH_WITH_HASH (f, struct freq, node, hash, &ft->data) + { + for (size_t j = 0; j < ft->vars.n; j++) + { + const struct variable *var = ft->vars.vars[j]; + if (!value_equal (case_data (c, var), &f->values[j], + var_get_width (var))) + goto next_hash_node; + } + + f->count += weight; + goto next_ft; + + next_hash_node: ; + } + + f = xmalloc (table_entry_size (ft->vars.n)); + f->count = weight; + for (size_t j = 0; j < ft->vars.n; j++) + { + const struct variable *var = ft->vars.vars[j]; + value_clone (&f->values[j], case_data (c, var), + var_get_width (var)); + } + hmap_insert (&ft->data, &f->node, hash); + + next_ft: ; + } + } + casereader_destroy (input); + + for (size_t i = 0; i < n_fts; i++) + { + struct ctables_freqtab *ft = fts[i]; + struct freq *f, *next; + HMAP_FOR_EACH_SAFE (f, next, struct freq, node, &ft->data) + { + hmap_delete (&ft->data, &f->node); + for (size_t j = 0; j < ft->vars.n; j++) + { + const struct variable *var = ft->vars.vars[j]; + value_destroy (&f->values[j], var_get_width (var)); + } + free (f); + } + hmap_destroy (&ft->data); + var_array_uninit (&ft->vars); + free (ft); + } + free (fts); + + return proc_commit (ds); } int @@ -1933,9 +2085,9 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) } while (lex_token (lexer) != T_ENDCMD); - ctables_execute (ct); + bool ok = ctables_execute (ds, ct); ctables_destroy (ct); - return CMD_SUCCESS; + return ok ? CMD_SUCCESS : CMD_FAILURE; error: ctables_destroy (ct);