const struct variable **var,
const struct freq_tab *frq_tab);
-static void dump_statistics (const struct frq_proc *frq,
- const struct variable *wv);
+static struct frq_stats_table *frq_stats_table_submit (
+ struct frq_stats_table *, const struct frq_proc *,
+ const struct dictionary *, const struct variable *wv,
+ const struct ccase *example);
+static void frq_stats_table_destroy (struct frq_stats_table *);
static int
compare_freq (const void *a_, const void *b_, const void *aux_)
}
-/* Frees the frequency table for variable V. */
-static void
-cleanup_freq_tab (struct var_freqs *vf)
-{
- free (vf->tab.valid);
- freq_hmap_destroy (&vf->tab.data, vf->width);
-}
-
/* Add data from case C to the frequency table. */
static void
calc (struct frq_proc *frq, const struct ccase *c, const struct dataset *ds)
}
}
-/* Prepares each variable that is the target of FREQUENCIES by setting
- up its hash table. */
static void
-precalc (struct frq_proc *frq, struct casereader *input, struct dataset *ds)
+output_splits_once (bool *need_splits, const struct dataset *ds,
+ const struct ccase *c)
{
- struct ccase *c;
- size_t i;
-
- c = casereader_peek (input, 0);
- if (c != NULL)
+ if (*need_splits)
{
output_split_file_values (ds, c);
- case_unref (c);
+ *need_splits = false;
}
-
- for (i = 0; i < frq->n_vars; i++)
- hmap_init (&frq->vars[i].tab.data);
}
/* Finishes up with the variables after frequencies have been
calculated. Displays statistics, percentiles, ... */
-static void
-postcalc (struct frq_proc *frq, const struct dataset *ds)
+static struct frq_stats_table *
+postcalc (struct frq_proc *frq, const struct dataset *ds,
+ struct ccase *example, struct frq_stats_table *fst)
{
const struct dictionary *dict = dataset_dict (ds);
const struct variable *wv = dict_get_weight (dict);
- size_t i;
- for (i = 0; i < frq->n_vars; i++)
+ for (size_t i = 0; i < frq->n_vars; i++)
{
struct var_freqs *vf = &frq->vars[i];
postprocess_freq_tab (frq, vf);
calc_percentiles (frq, vf);
}
+ enum split_type st = dict_get_split_type (dict);
+ bool need_splits = true;
if (frq->n_stats)
- dump_statistics (frq, wv);
+ {
+ if (st != SPLIT_LAYERED)
+ output_splits_once (&need_splits, ds, example);
+ fst = frq_stats_table_submit (fst, frq, dict, wv, example);
+ }
- for (i = 0; i < frq->n_vars; i++)
+ for (size_t i = 0; i < frq->n_vars; i++)
{
struct var_freqs *vf = &frq->vars[i];
/* Frequencies tables. */
if (vf->tab.n_valid + vf->tab.n_missing <= frq->max_categories)
- dump_freq_table (vf, wv);
-
+ {
+ output_splits_once (&need_splits, ds, example);
+ dump_freq_table (vf, wv);
+ }
if (frq->hist && var_is_numeric (vf->var) && vf->tab.n_valid > 0)
{
if (histogram)
{
+ output_splits_once (&need_splits, ds, example);
chart_submit (histogram_chart_create (
histogram->gsl_hist, var_to_string(vf->var),
vf->tab.valid_cases,
}
if (frq->pie)
- do_piechart(frq->pie, vf->var, &vf->tab);
+ {
+ output_splits_once (&need_splits, ds, example);
+ do_piechart(frq->pie, vf->var, &vf->tab);
+ }
if (frq->bar)
- do_barchart(frq->bar, &vf->var, &vf->tab);
+ {
+ output_splits_once (&need_splits, ds, example);
+ do_barchart(frq->bar, &vf->var, &vf->tab);
+ }
+
+ free (vf->tab.valid);
+ freq_hmap_destroy (&vf->tab.data, vf->width);
+ }
+
+ return fst;
+}
+
+static void
+frq_run (struct frq_proc *frq, struct dataset *ds)
+{
+ struct frq_stats_table *fst = NULL;
+ struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds),
+ dataset_dict (ds));
+ struct casereader *group;
+ while (casegrouper_get_next_group (grouper, &group))
+ {
+ for (size_t i = 0; i < frq->n_vars; i++)
+ hmap_init (&frq->vars[i].tab.data);
+
+ struct ccase *example = casereader_peek (group, 0);
+
+ struct ccase *c;
+ for (; (c = casereader_read (group)) != NULL; case_unref (c))
+ calc (frq, c, ds);
+ fst = postcalc (frq, ds, example, fst);
+ casereader_destroy (group);
- cleanup_freq_tab (vf);
+ case_unref (example);
}
+ frq_stats_table_destroy (fst);
+ casegrouper_destroy (grouper);
+ proc_commit (ds);
}
int
}
}
- {
- struct casegrouper *grouper;
- struct casereader *group;
- bool ok;
-
- grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
- while (casegrouper_get_next_group (grouper, &group))
- {
- struct ccase *c;
- precalc (&frq, group, ds);
-
- for (; (c = casereader_read (group)) != NULL; case_unref (c))
- calc (&frq, c, ds);
- postcalc (&frq, ds);
- casereader_destroy (group);
- }
- ok = casegrouper_destroy (grouper);
- ok = proc_commit (ds) && ok;
- }
+ frq_run (&frq, ds);
free (vars);
for (size_t i = 0; i < frq.n_vars; i++)
return true;
}
+\f
+struct frq_stats_table
+ {
+ struct pivot_table *table;
+ struct pivot_splits *splits;
+ };
/* Displays a table of all the statistics requested. */
-static void
-dump_statistics (const struct frq_proc *frq, const struct variable *wv)
+static struct frq_stats_table *
+frq_stats_table_create (const struct frq_proc *frq,
+ const struct dictionary *dict,
+ const struct variable *wv)
{
if (all_string_variables (frq))
- return;
+ return NULL;
struct pivot_table *table = pivot_table_create (N_("Statistics"));
pivot_table_set_weight_var (table, wv);
struct pivot_dimension *variables
= pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Variables"));
+ for (size_t i = 0; i < frq->n_vars; i++)
+ if (!var_is_alpha (frq->vars[i].var))
+ pivot_category_create_leaf (variables->root,
+ pivot_value_new_variable (frq->vars[i].var));
struct pivot_dimension *statistics = pivot_dimension_create (
table, PIVOT_AXIS_ROW, N_("Statistics"));
pc->p * 100.0));
}
+ struct pivot_splits *splits = pivot_splits_create (table, PIVOT_AXIS_ROW,
+ dict);
+
+ struct frq_stats_table *fst = xmalloc (sizeof *fst);
+ *fst = (struct frq_stats_table) { .table = table, .splits = splits };
+ return fst;
+}
+
+static struct frq_stats_table *
+frq_stats_table_submit (struct frq_stats_table *fst,
+ const struct frq_proc *frq,
+ const struct dictionary *dict,
+ const struct variable *wv,
+ const struct ccase *example)
+{
+ if (!fst)
+ {
+ fst = frq_stats_table_create (frq, dict, wv);
+ if (!fst)
+ return NULL;
+ }
+ pivot_splits_new_split (fst->splits, example);
+
+ int var_idx = 0;
for (size_t i = 0; i < frq->n_vars; i++)
{
struct var_freqs *vf = &frq->vars[i];
const struct freq_tab *ft = &vf->tab;
- int var_idx = pivot_category_create_leaf (
- variables->root, pivot_value_new_variable (vf->var));
-
int row = 0;
- pivot_table_put2 (table, var_idx, row++,
+ pivot_splits_put2 (fst->splits, fst->table, var_idx, row++,
pivot_value_new_number (ft->valid_cases));
- pivot_table_put2 (table, var_idx, row++,
+ pivot_splits_put2 (fst->splits, fst->table, var_idx, row++,
pivot_value_new_number (
ft->total_cases - ft->valid_cases));
= (j == FRQ_ST_MODE || j == FRQ_ST_MINIMUM || j == FRQ_ST_MAXIMUM
? pivot_value_new_var_value (vf->var, &v)
: pivot_value_new_number (v.f));
- pivot_table_put2 (table, var_idx, row++, pv);
+ pivot_splits_put2 (fst->splits, fst->table, var_idx, row++, pv);
}
for (size_t j = 0; j < frq->n_percentiles; j++)
union value v = {
.f = vf->tab.n_valid ? vf->percentiles[j] : SYSMIS
};
- pivot_table_put2 (table, var_idx, row++,
- pivot_value_new_var_value (vf->var, &v));
+ pivot_splits_put2 (fst->splits, fst->table, var_idx, row++,
+ pivot_value_new_var_value (vf->var, &v));
}
+
+ var_idx++;
}
- pivot_table_submit (table);
+ if (!fst->splits)
+ {
+ frq_stats_table_destroy (fst);
+ return NULL;
+ }
+ return fst;
+}
+
+static void
+frq_stats_table_destroy (struct frq_stats_table *fst)
+{
+ if (!fst)
+ return;
+
+ pivot_table_submit (fst->table);
+ pivot_splits_destroy (fst->splits);
+ free (fst);
}
#include <stdlib.h>
#include "data/data-out.h"
+#include "data/dictionary.h"
#include "data/settings.h"
#include "data/value.h"
#include "data/variable.h"
}
/* Puts VALUE in the cell in TABLE whose indexes are given by the N indexes in
- DINDEXES. N must be the number of dimensions in TABLE. Takes ownership of
- VALUE.
+ DINDEXES. The order of the indexes is the same as the order in which the
+ dimensions were created. N must be the number of dimensions in TABLE.
+ Takes ownership of VALUE.
If VALUE is a numeric value without a specified format, this function checks
each of the categories designated by DINDEXES[] and takes the format from
free (ex);
}
}
+\f
+/* pivot_splits */
+
+struct pivot_splits_value
+ {
+ struct hmap_node hmap_node;
+ union value value;
+ int leaf;
+ };
+
+struct pivot_splits_var
+ {
+ struct pivot_dimension *dimension;
+ int width;
+ size_t idx;
+ struct fmt_spec format;
+ struct hmap values;
+ };
+
+struct pivot_splits
+ {
+ struct pivot_splits_var *vars;
+ size_t n;
+ char *encoding;
+
+ size_t dindexes[MAX_SPLITS];
+ };
+
+struct pivot_splits *
+pivot_splits_create (struct pivot_table *pt,
+ enum pivot_axis_type axis,
+ const struct dictionary *dict)
+{
+ if (dict_get_split_type (dict) != SPLIT_LAYERED)
+ return NULL;
+
+ size_t n = dict_get_n_splits (dict);
+ assert (n <= MAX_SPLITS);
+
+ const struct variable *const *vars = dict_get_split_vars (dict);
+ struct pivot_splits_var *psvars = xnmalloc (n, sizeof *psvars);
+ for (size_t i = 0; i < n; i++)
+ {
+ const struct variable *var = vars[i];
+ struct pivot_splits_var *psvar = &psvars[i];
+
+ struct pivot_dimension *d = pivot_dimension_create__ (
+ pt, axis, pivot_value_new_variable (var));
+ d->root->show_label = true;
+
+ *psvar = (struct pivot_splits_var) {
+ .width = var_get_width (var),
+ .idx = var_get_case_index (var),
+ .format = *var_get_print_format (var),
+ .values = HMAP_INITIALIZER (psvar->values),
+ .dimension = d,
+ };
+ }
+
+ struct pivot_splits *ps = xmalloc (sizeof *ps);
+ *ps = (struct pivot_splits) {
+ .vars = psvars,
+ .n = n,
+ .encoding = xstrdup (dict_get_encoding (dict)),
+ .dindexes = { [0] = SIZE_MAX },
+ };
+ return ps;
+}
+
+void
+pivot_splits_destroy (struct pivot_splits *ps)
+{
+ if (!ps)
+ return;
+
+ for (size_t i = 0; i < ps->n; i++)
+ {
+ struct pivot_splits_var *psvar = &ps->vars[i];
+ struct pivot_splits_value *psval, *next;
+ HMAP_FOR_EACH_SAFE (psval, next, struct pivot_splits_value, hmap_node,
+ &psvar->values)
+ {
+ value_destroy (&psval->value, psvar->width);
+ hmap_delete (&psvar->values, &psval->hmap_node);
+ free (psval);
+ }
+ hmap_destroy (&psvar->values);
+ }
+ free (ps->vars);
+ free (ps->encoding);
+ free (ps);
+}
+
+static struct pivot_splits_value *
+pivot_splits_value_find (struct pivot_splits_var *psvar,
+ const union value *value)
+{
+ struct pivot_splits_value *psval;
+ HMAP_FOR_EACH_WITH_HASH (psval, struct pivot_splits_value, hmap_node,
+ value_hash (value, psvar->width, 0), &psvar->values)
+ if (value_equal (&psval->value, value, psvar->width))
+ return psval;
+ return NULL;
+}
+
+void
+pivot_splits_new_split (struct pivot_splits *ps, const struct ccase *c)
+{
+ if (!ps)
+ return;
+
+ for (size_t i = ps->n - 1; i < ps->n; i--)
+ {
+ struct pivot_splits_var *psvar = &ps->vars[i];
+ const union value *value = case_data_idx (c, psvar->idx);
+ struct pivot_splits_value *psval = pivot_splits_value_find (psvar, value);
+ if (!psval)
+ {
+ psval = xmalloc (sizeof *psval);
+ hmap_insert (&psvar->values, &psval->hmap_node,
+ value_hash (value, psvar->width, 0));
+ value_clone (&psval->value, value, psvar->width);
+ psval->leaf = pivot_category_create_leaf (
+ psvar->dimension->root,
+ pivot_value_new_value (value, psvar->width, &psvar->format,
+ ps->encoding));
+ }
+
+ ps->dindexes[i] = psval->leaf;
+ }
+}
+
+size_t
+pivot_splits_get_dindexes (const struct pivot_splits *ps, size_t *dindexes)
+{
+ if (!ps)
+ return 0;
+
+ assert (ps->dindexes[0] != SIZE_MAX);
+ for (size_t i = 0; i < ps->n; i++)
+ dindexes[i] = ps->dindexes[i];
+ return ps->n;
+}
+
+void
+pivot_splits_put1 (struct pivot_splits *ps, struct pivot_table *table,
+ size_t idx1, struct pivot_value *value)
+{
+ size_t dindexes[1 + MAX_SPLITS];
+ size_t *p = dindexes;
+ *p++ = idx1;
+ p += pivot_splits_get_dindexes (ps, p);
+ pivot_table_put (table, dindexes, p - dindexes, value);
+}
+
+void
+pivot_splits_put2 (struct pivot_splits *ps, struct pivot_table *table,
+ size_t idx1, size_t idx2, struct pivot_value *value)
+{
+ size_t dindexes[2 + MAX_SPLITS];
+ size_t *p = dindexes;
+ *p++ = idx1;
+ *p++ = idx2;
+ p += pivot_splits_get_dindexes (ps, p);
+ pivot_table_put (table, dindexes, p - dindexes, value);
+}
+
+void
+pivot_splits_put3 (struct pivot_splits *ps, struct pivot_table *table,
+ size_t idx1, size_t idx2, size_t idx3,
+ struct pivot_value *value)
+{
+ size_t dindexes[3 + MAX_SPLITS];
+ size_t *p = dindexes;
+ *p++ = idx1;
+ *p++ = idx2;
+ *p++ = idx3;
+ p += pivot_splits_get_dindexes (ps, p);
+ pivot_table_put (table, dindexes, p - dindexes, value);
+}
+
+void
+pivot_splits_put4 (struct pivot_splits *ps, struct pivot_table *table,
+ size_t idx1, size_t idx2, size_t idx3, size_t idx4,
+ struct pivot_value *value)
+{
+ size_t dindexes[4 + MAX_SPLITS];
+ size_t *p = dindexes;
+ *p++ = idx1;
+ *p++ = idx2;
+ *p++ = idx3;
+ *p++ = idx4;
+ p += pivot_splits_get_dindexes (ps, p);
+ pivot_table_put (table, dindexes, p - dindexes, value);
+}