From: Ben Pfaff Date: Sun, 7 Aug 2022 16:41:44 +0000 (-0700) Subject: Layered split file for FREQUENCIES works. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=53fe23e6cf0f599423080ff2b101df0216ae5480 Layered split file for FREQUENCIES works. --- diff --git a/src/language/stats/frequencies.c b/src/language/stats/frequencies.c index ce2b416e3b..cf40446a77 100644 --- a/src/language/stats/frequencies.c +++ b/src/language/stats/frequencies.c @@ -244,7 +244,8 @@ static void do_barchart(const struct frq_chart *bar, static struct frq_stats_table *frq_stats_table_submit ( struct frq_stats_table *, const struct frq_proc *, - const struct dictionary *, const struct variable *wv); + const struct dictionary *, const struct variable *wv, + const struct ccase *example); static void frq_stats_table_destroy (struct frq_stats_table *); static int @@ -492,7 +493,7 @@ output_splits_once (bool *need_splits, const struct dataset *ds, calculated. Displays statistics, percentiles, ... */ static struct frq_stats_table * postcalc (struct frq_proc *frq, const struct dataset *ds, - struct ccase *first, struct frq_stats_table *fst) + struct ccase *example, struct frq_stats_table *fst) { const struct dictionary *dict = dataset_dict (ds); const struct variable *wv = dict_get_weight (dict); @@ -509,8 +510,8 @@ postcalc (struct frq_proc *frq, const struct dataset *ds, if (frq->n_stats) { if (st != SPLIT_LAYERED) - output_splits_once (&need_splits, ds, first); - fst = frq_stats_table_submit (fst, frq, dict, wv); + output_splits_once (&need_splits, ds, example); + fst = frq_stats_table_submit (fst, frq, dict, wv, example); } for (size_t i = 0; i < frq->n_vars; i++) @@ -520,7 +521,7 @@ postcalc (struct frq_proc *frq, const struct dataset *ds, /* Frequencies tables. */ if (vf->tab.n_valid + vf->tab.n_missing <= frq->max_categories) { - output_splits_once (&need_splits, ds, first); + output_splits_once (&need_splits, ds, example); dump_freq_table (vf, wv); } @@ -535,7 +536,7 @@ postcalc (struct frq_proc *frq, const struct dataset *ds, if (histogram) { - output_splits_once (&need_splits, ds, first); + output_splits_once (&need_splits, ds, example); chart_submit (histogram_chart_create ( histogram->gsl_hist, var_to_string(vf->var), vf->tab.valid_cases, @@ -549,13 +550,13 @@ postcalc (struct frq_proc *frq, const struct dataset *ds, if (frq->pie) { - output_splits_once (&need_splits, ds, first); + output_splits_once (&need_splits, ds, example); do_piechart(frq->pie, vf->var, &vf->tab); } if (frq->bar) { - output_splits_once (&need_splits, ds, first); + output_splits_once (&need_splits, ds, example); do_barchart(frq->bar, &vf->var, &vf->tab); } @@ -578,15 +579,15 @@ frq_run (struct frq_proc *frq, struct dataset *ds) for (size_t i = 0; i < frq->n_vars; i++) hmap_init (&frq->vars[i].tab.data); - struct ccase *first = casereader_peek (group, 0); + struct ccase *example = casereader_peek (group, 0); struct ccase *c; for (; (c = casereader_read (group)) != NULL; case_unref (c)) calc (frq, c, ds); - fst = postcalc (frq, ds, first, fst); + fst = postcalc (frq, ds, example, fst); casereader_destroy (group); - case_unref (first); + case_unref (example); } frq_stats_table_destroy (fst); casegrouper_destroy (grouper); @@ -1579,8 +1580,6 @@ frq_stats_table_create (const struct frq_proc *frq, struct pivot_table *table = pivot_table_create (N_("Statistics")); pivot_table_set_weight_var (table, wv); - struct pivot_splits *splits = pivot_splits_create (table, dict); - struct pivot_dimension *variables = pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Variables")); for (size_t i = 0; i < frq->n_vars; i++) @@ -1614,6 +1613,9 @@ frq_stats_table_create (const struct frq_proc *frq, pc->p * 100.0)); } + struct pivot_splits *splits = pivot_splits_create (table, PIVOT_AXIS_ROW, + dict); + struct frq_stats_table *fst = xmalloc (sizeof *fst); *fst = (struct frq_stats_table) { .table = table, .splits = splits }; return fst; @@ -1623,7 +1625,8 @@ static struct frq_stats_table * frq_stats_table_submit (struct frq_stats_table *fst, const struct frq_proc *frq, const struct dictionary *dict, - const struct variable *wv) + const struct variable *wv, + const struct ccase *example) { if (!fst) { @@ -1631,6 +1634,7 @@ frq_stats_table_submit (struct frq_stats_table *fst, if (!fst) return NULL; } + pivot_splits_new_split (fst->splits, example); int var_idx = 0; for (size_t i = 0; i < frq->n_vars; i++) @@ -1642,9 +1646,9 @@ frq_stats_table_submit (struct frq_stats_table *fst, const struct freq_tab *ft = &vf->tab; int row = 0; - pivot_table_put2 (fst->table, var_idx, row++, + pivot_splits_put2 (fst->splits, fst->table, var_idx, row++, pivot_value_new_number (ft->valid_cases)); - pivot_table_put2 (fst->table, var_idx, row++, + pivot_splits_put2 (fst->splits, fst->table, var_idx, row++, pivot_value_new_number ( ft->total_cases - ft->valid_cases)); @@ -1660,7 +1664,7 @@ frq_stats_table_submit (struct frq_stats_table *fst, = (j == FRQ_ST_MODE || j == FRQ_ST_MINIMUM || j == FRQ_ST_MAXIMUM ? pivot_value_new_var_value (vf->var, &v) : pivot_value_new_number (v.f)); - pivot_table_put2 (fst->table, var_idx, row++, pv); + pivot_splits_put2 (fst->splits, fst->table, var_idx, row++, pv); } for (size_t j = 0; j < frq->n_percentiles; j++) @@ -1672,8 +1676,8 @@ frq_stats_table_submit (struct frq_stats_table *fst, union value v = { .f = vf->tab.n_valid ? vf->percentiles[j] : SYSMIS }; - pivot_table_put2 (fst->table, var_idx, row++, - pivot_value_new_var_value (vf->var, &v)); + pivot_splits_put2 (fst->splits, fst->table, var_idx, row++, + pivot_value_new_var_value (vf->var, &v)); } var_idx++; diff --git a/src/output/pivot-table.c b/src/output/pivot-table.c index 3e54d776be..1e7d2fbb5a 100644 --- a/src/output/pivot-table.c +++ b/src/output/pivot-table.c @@ -24,6 +24,7 @@ #include #include "data/data-out.h" +#include "data/dictionary.h" #include "data/settings.h" #include "data/value.h" #include "data/variable.h" @@ -1443,8 +1444,9 @@ pivot_table_insert_cell (struct pivot_table *table, const size_t *dindexes) } /* Puts VALUE in the cell in TABLE whose indexes are given by the N indexes in - DINDEXES. N must be the number of dimensions in TABLE. Takes ownership of - VALUE. + DINDEXES. The order of the indexes is the same as the order in which the + dimensions were created. N must be the number of dimensions in TABLE. + Takes ownership of VALUE. If VALUE is a numeric value without a specified format, this function checks each of the categories designated by DINDEXES[] and takes the format from @@ -3031,10 +3033,29 @@ pivot_value_ex_destroy (struct pivot_value_ex *ex) /* pivot_splits */ +struct pivot_splits_value + { + struct hmap_node hmap_node; + union value value; + int leaf; + }; + +struct pivot_splits_var + { + struct pivot_dimension *dimension; + int width; + size_t idx; + struct fmt_spec format; + struct hmap values; + }; + struct pivot_splits { - const struct dictionary *dict; - struct hmap splits[MAX_SPLITS]; + struct pivot_splits_var *vars; + size_t n; + char *encoding; + + size_t dindexes[MAX_SPLITS]; }; struct pivot_splits * @@ -3045,13 +3066,162 @@ pivot_splits_create (struct pivot_table *pt, if (dict_get_split_type (dict) != SPLIT_LAYERED) return NULL; - struct pivot_splits *ps = xmalloc (sizeof *ps); - *ps = (struct pivot_splits) { .dict = dict }; - for (size_t i = 0; i < dict_get_n_splits (dict); i++) + size_t n = dict_get_n_splits (dict); + assert (n <= MAX_SPLITS); + + const struct variable *const *vars = dict_get_split_vars (dict); + struct pivot_splits_var *psvars = xnmalloc (n, sizeof *psvars); + for (size_t i = 0; i < n; i++) { - hmap_init (&ps->splits[i]); + const struct variable *var = vars[i]; + struct pivot_splits_var *psvar = &psvars[i]; + struct pivot_dimension *d = pivot_dimension_create__ ( - pt, axis, pivot_value_new_variable (dict_get_split_vars (dict)[i])); + pt, axis, pivot_value_new_variable (var)); + d->root->show_label = true; + + *psvar = (struct pivot_splits_var) { + .width = var_get_width (var), + .idx = var_get_case_index (var), + .format = *var_get_print_format (var), + .values = HMAP_INITIALIZER (psvar->values), + .dimension = d, + }; } + + struct pivot_splits *ps = xmalloc (sizeof *ps); + *ps = (struct pivot_splits) { + .vars = psvars, + .n = n, + .encoding = xstrdup (dict_get_encoding (dict)), + .dindexes = { [0] = SIZE_MAX }, + }; return ps; } + +void +pivot_splits_destroy (struct pivot_splits *ps) +{ + if (!ps) + return; + + for (size_t i = 0; i < ps->n; i++) + { + struct pivot_splits_var *psvar = &ps->vars[i]; + struct pivot_splits_value *psval, *next; + HMAP_FOR_EACH_SAFE (psval, next, struct pivot_splits_value, hmap_node, + &psvar->values) + { + value_destroy (&psval->value, psvar->width); + hmap_delete (&psvar->values, &psval->hmap_node); + free (psval); + } + hmap_destroy (&psvar->values); + } + free (ps->vars); + free (ps->encoding); + free (ps); +} + +static struct pivot_splits_value * +pivot_splits_value_find (struct pivot_splits_var *psvar, + const union value *value) +{ + struct pivot_splits_value *psval; + HMAP_FOR_EACH_WITH_HASH (psval, struct pivot_splits_value, hmap_node, + value_hash (value, psvar->width, 0), &psvar->values) + if (value_equal (&psval->value, value, psvar->width)) + return psval; + return NULL; +} + +void +pivot_splits_new_split (struct pivot_splits *ps, const struct ccase *c) +{ + if (!ps) + return; + + for (size_t i = ps->n - 1; i < ps->n; i--) + { + struct pivot_splits_var *psvar = &ps->vars[i]; + const union value *value = case_data_idx (c, psvar->idx); + struct pivot_splits_value *psval = pivot_splits_value_find (psvar, value); + if (!psval) + { + psval = xmalloc (sizeof *psval); + hmap_insert (&psvar->values, &psval->hmap_node, + value_hash (value, psvar->width, 0)); + value_clone (&psval->value, value, psvar->width); + psval->leaf = pivot_category_create_leaf ( + psvar->dimension->root, + pivot_value_new_value (value, psvar->width, &psvar->format, + ps->encoding)); + } + + ps->dindexes[i] = psval->leaf; + } +} + +size_t +pivot_splits_get_dindexes (const struct pivot_splits *ps, size_t *dindexes) +{ + if (!ps) + return 0; + + assert (ps->dindexes[0] != SIZE_MAX); + for (size_t i = 0; i < ps->n; i++) + dindexes[i] = ps->dindexes[i]; + return ps->n; +} + +void +pivot_splits_put1 (struct pivot_splits *ps, struct pivot_table *table, + size_t idx1, struct pivot_value *value) +{ + size_t dindexes[1 + MAX_SPLITS]; + size_t *p = dindexes; + *p++ = idx1; + p += pivot_splits_get_dindexes (ps, p); + pivot_table_put (table, dindexes, p - dindexes, value); +} + +void +pivot_splits_put2 (struct pivot_splits *ps, struct pivot_table *table, + size_t idx1, size_t idx2, struct pivot_value *value) +{ + size_t dindexes[2 + MAX_SPLITS]; + size_t *p = dindexes; + *p++ = idx1; + *p++ = idx2; + p += pivot_splits_get_dindexes (ps, p); + pivot_table_put (table, dindexes, p - dindexes, value); +} + +void +pivot_splits_put3 (struct pivot_splits *ps, struct pivot_table *table, + size_t idx1, size_t idx2, size_t idx3, + struct pivot_value *value) +{ + size_t dindexes[3 + MAX_SPLITS]; + size_t *p = dindexes; + *p++ = idx1; + *p++ = idx2; + *p++ = idx3; + p += pivot_splits_get_dindexes (ps, p); + pivot_table_put (table, dindexes, p - dindexes, value); +} + +void +pivot_splits_put4 (struct pivot_splits *ps, struct pivot_table *table, + size_t idx1, size_t idx2, size_t idx3, size_t idx4, + struct pivot_value *value) +{ + size_t dindexes[4 + MAX_SPLITS]; + size_t *p = dindexes; + *p++ = idx1; + *p++ = idx2; + *p++ = idx3; + *p++ = idx4; + p += pivot_splits_get_dindexes (ps, p); + pivot_table_put (table, dindexes, p - dindexes, value); +} diff --git a/src/output/pivot-table.h b/src/output/pivot-table.h index 55a7d9ed8c..2904fd4f35 100644 --- a/src/output/pivot-table.h +++ b/src/output/pivot-table.h @@ -25,6 +25,7 @@ #include "libpspp/hmap.h" #include "output/table.h" +struct ccase; struct dictionary; struct pivot_value; struct variable; @@ -282,9 +283,25 @@ void pivot_dimension_dump (const struct pivot_dimension *, const struct pivot_table *, int indentation); struct pivot_splits *pivot_splits_create (struct pivot_table *, + enum pivot_axis_type, const struct dictionary *); void pivot_splits_destroy (struct pivot_splits *); +void pivot_splits_new_split (struct pivot_splits *, const struct ccase *); +size_t pivot_splits_get_dindexes (const struct pivot_splits *, + size_t *dindexes); + +void pivot_splits_put1 (struct pivot_splits *, struct pivot_table *, + size_t idx1, struct pivot_value *); +void pivot_splits_put2 (struct pivot_splits *, struct pivot_table *, + size_t idx1, size_t idx2, struct pivot_value *); +void pivot_splits_put3 (struct pivot_splits *, struct pivot_table *, + size_t idx1, size_t idx2, size_t idx3, + struct pivot_value *); +void pivot_splits_put4 (struct pivot_splits *, struct pivot_table *, + size_t idx1, size_t idx2, size_t idx3, size_t idx4, + struct pivot_value *); + size_t pivot_splits_count (const struct pivot_splits *); /* A pivot_category is a leaf (a category) or a group: