#include "libpspp/hmap.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
-#include "libpspp/pool.h"
#include "math/histogram.h"
#include "math/moments.h"
struct percentile
{
double p; /* the %ile to be calculated */
- double value; /* the %ile's value */
bool show; /* True to show this percentile in the statistics box. */
};
/* Statistics. */
double stat[FRQ_ST_count];
+ double *percentiles;
/* Variable attributes. */
int width;
struct frq_proc
{
- struct pool *pool;
-
struct var_freqs *vars;
size_t n_vars;
/* Percentiles to calculate and possibly display. */
struct percentile *percentiles;
- const struct percentile *median;
- int n_percentiles;
+ size_t median_idx;
+ size_t n_percentiles;
/* Frequency table display. */
long int max_categories; /* Maximum categories to show. */
const struct variable **var,
const struct freq_tab *frq_tab);
-static void dump_statistics (const struct frq_proc *frq,
- const struct variable *wv);
+static struct frq_stats_table *frq_stats_table_submit (
+ struct frq_stats_table *, const struct frq_proc *,
+ const struct dictionary *, const struct variable *wv,
+ const struct ccase *example);
+static void frq_stats_table_destroy (struct frq_stats_table *);
static int
compare_freq (const void *a_, const void *b_, const void *aux_)
}
/* Create a gsl_histogram from a freq_tab */
-static struct histogram *
-freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
- const struct variable *var);
+static struct histogram *freq_tab_to_hist (const struct frq_proc *,
+ const struct var_freqs *);
static void
put_freq_row (struct pivot_table *table, int var_idx,
/* Calculates all of the percentiles for VF within FRQ. */
static void
-calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf)
+calc_percentiles (const struct frq_proc *frq, struct var_freqs *vf)
{
+ if (!frq->n_percentiles)
+ return;
+
+ if (!vf->percentiles)
+ vf->percentiles = xnmalloc (frq->n_percentiles, sizeof *vf->percentiles);
+
const struct freq_tab *ft = &vf->tab;
- double W = ft->valid_cases;
- const struct freq *f;
- int percentile_idx = 0;
- double rank = 0;
+ const double W = ft->valid_cases;
+ size_t idx = 0;
- for (f = ft->valid; f < ft->missing; f++)
+ double rank = 0;
+ for (const struct freq *f = ft->valid; f < ft->missing; f++)
{
rank += f->count;
- for (; percentile_idx < frq->n_percentiles; percentile_idx++)
+ for (; idx < frq->n_percentiles; idx++)
{
- struct percentile *pc = &frq->percentiles[percentile_idx];
+ struct percentile *pc = &frq->percentiles[idx];
double tp;
tp = (settings_get_algorithm () == ENHANCED
break;
if (tp + 1 < rank || f + 1 >= ft->missing)
- pc->value = f->values[0].f;
+ vf->percentiles[idx] = f->values[0].f;
else
- pc->value = calc_percentile (pc->p, W, f->values[0].f, f[1].values[0].f);
+ vf->percentiles[idx] = calc_percentile (pc->p, W, f->values[0].f,
+ f[1].values[0].f);
}
}
- for (; percentile_idx < frq->n_percentiles; percentile_idx++)
- {
- struct percentile *pc = &frq->percentiles[percentile_idx];
- pc->value = (ft->n_valid > 0
- ? ft->valid[ft->n_valid - 1].values[0].f
- : SYSMIS);
- }
+ for (; idx < frq->n_percentiles; idx++)
+ vf->percentiles[idx] = (ft->n_valid > 0
+ ? ft->valid[ft->n_valid - 1].values[0].f
+ : SYSMIS);
}
/* Returns true iff the value in struct freq F is non-missing
}
-/* Frees the frequency table for variable V. */
-static void
-cleanup_freq_tab (struct var_freqs *vf)
-{
- free (vf->tab.valid);
- freq_hmap_destroy (&vf->tab.data, vf->width);
-}
-
/* Add data from case C to the frequency table. */
static void
calc (struct frq_proc *frq, const struct ccase *c, const struct dataset *ds)
}
}
-/* Prepares each variable that is the target of FREQUENCIES by setting
- up its hash table. */
static void
-precalc (struct frq_proc *frq, struct casereader *input, struct dataset *ds)
+output_splits_once (bool *need_splits, const struct dataset *ds,
+ const struct ccase *c)
{
- struct ccase *c;
- size_t i;
-
- c = casereader_peek (input, 0);
- if (c != NULL)
+ if (*need_splits)
{
output_split_file_values (ds, c);
- case_unref (c);
+ *need_splits = false;
}
-
- for (i = 0; i < frq->n_vars; i++)
- hmap_init (&frq->vars[i].tab.data);
}
/* Finishes up with the variables after frequencies have been
calculated. Displays statistics, percentiles, ... */
-static void
-postcalc (struct frq_proc *frq, const struct dataset *ds)
+static struct frq_stats_table *
+postcalc (struct frq_proc *frq, const struct dataset *ds,
+ struct ccase *example, struct frq_stats_table *fst)
{
const struct dictionary *dict = dataset_dict (ds);
const struct variable *wv = dict_get_weight (dict);
- size_t i;
- for (i = 0; i < frq->n_vars; i++)
+ for (size_t i = 0; i < frq->n_vars; i++)
{
struct var_freqs *vf = &frq->vars[i];
postprocess_freq_tab (frq, vf);
calc_percentiles (frq, vf);
}
+ enum split_type st = dict_get_split_type (dict);
+ bool need_splits = true;
if (frq->n_stats)
- dump_statistics (frq, wv);
+ {
+ if (st != SPLIT_LAYERED)
+ output_splits_once (&need_splits, ds, example);
+ fst = frq_stats_table_submit (fst, frq, dict, wv, example);
+ }
- for (i = 0; i < frq->n_vars; i++)
+ for (size_t i = 0; i < frq->n_vars; i++)
{
struct var_freqs *vf = &frq->vars[i];
/* Frequencies tables. */
if (vf->tab.n_valid + vf->tab.n_missing <= frq->max_categories)
- dump_freq_table (vf, wv);
-
+ {
+ output_splits_once (&need_splits, ds, example);
+ dump_freq_table (vf, wv);
+ }
if (frq->hist && var_is_numeric (vf->var) && vf->tab.n_valid > 0)
{
calc_stats (frq, vf, d);
- histogram = freq_tab_to_hist (frq, &vf->tab, vf->var);
+ histogram = freq_tab_to_hist (frq, vf);
if (histogram)
{
+ output_splits_once (&need_splits, ds, example);
chart_submit (histogram_chart_create (
histogram->gsl_hist, var_to_string(vf->var),
vf->tab.valid_cases,
}
if (frq->pie)
- do_piechart(frq->pie, vf->var, &vf->tab);
+ {
+ output_splits_once (&need_splits, ds, example);
+ do_piechart(frq->pie, vf->var, &vf->tab);
+ }
if (frq->bar)
- do_barchart(frq->bar, &vf->var, &vf->tab);
+ {
+ output_splits_once (&need_splits, ds, example);
+ do_barchart(frq->bar, &vf->var, &vf->tab);
+ }
+
+ free (vf->tab.valid);
+ freq_hmap_destroy (&vf->tab.data, vf->width);
+ }
+
+ return fst;
+}
+
+static void
+frq_run (struct frq_proc *frq, struct dataset *ds)
+{
+ struct frq_stats_table *fst = NULL;
+ struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds),
+ dataset_dict (ds));
+ struct casereader *group;
+ while (casegrouper_get_next_group (grouper, &group))
+ {
+ for (size_t i = 0; i < frq->n_vars; i++)
+ hmap_init (&frq->vars[i].tab.data);
+
+ struct ccase *example = casereader_peek (group, 0);
+
+ struct ccase *c;
+ for (; (c = casereader_read (group)) != NULL; case_unref (c))
+ calc (frq, c, ds);
+ fst = postcalc (frq, ds, example, fst);
+ casereader_destroy (group);
- cleanup_freq_tab (vf);
+ case_unref (example);
}
+ frq_stats_table_destroy (fst);
+ casegrouper_destroy (grouper);
+ proc_commit (ds);
}
int
int hi_pcnt = INT_MIN;
int hi_norm = FRQ_NONORMAL;
- frq.pool = pool_create ();
frq.sort = FRQ_AVALUE;
frq.vars = NULL;
frq.n_percentiles = o;
- frq.median = NULL;
+ frq.median_idx = SIZE_MAX;
for (i = 0; i < frq.n_percentiles; i++)
if (frq.percentiles[i].p == 0.5)
{
- frq.median = &frq.percentiles[i];
+ frq.median_idx = i;
break;
}
}
- {
- struct casegrouper *grouper;
- struct casereader *group;
- bool ok;
-
- grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
- while (casegrouper_get_next_group (grouper, &group))
- {
- struct ccase *c;
- precalc (&frq, group, ds);
-
- for (; (c = casereader_read (group)) != NULL; case_unref (c))
- calc (&frq, c, ds);
- postcalc (&frq, ds);
- casereader_destroy (group);
- }
- ok = casegrouper_destroy (grouper);
- ok = proc_commit (ds) && ok;
- }
-
+ frq_run (&frq, ds);
free (vars);
+ for (size_t i = 0; i < frq.n_vars; i++)
+ free (frq.vars[i].percentiles);
free (frq.vars);
free (frq.bar);
free (frq.pie);
free (frq.hist);
free (frq.percentiles);
- pool_destroy (frq.pool);
return CMD_SUCCESS;
error:
free (vars);
+ for (size_t i = 0; i < frq.n_vars; i++)
+ free (frq.vars[i].percentiles);
free (frq.vars);
free (frq.bar);
free (frq.pie);
free (frq.hist);
free (frq.percentiles);
- pool_destroy (frq.pool);
return CMD_FAILURE;
}
static double
-calculate_iqr (const struct frq_proc *frq)
+calculate_iqr (const struct frq_proc *frq, const struct var_freqs *vf)
{
double q1 = SYSMIS;
double q3 = SYSMIS;
struct percentile *pc = &frq->percentiles[i];
if (fabs (0.25 - pc->p) < DBL_EPSILON)
- q1 = pc->value;
+ q1 = vf->percentiles[i];
else if (fabs (0.75 - pc->p) < DBL_EPSILON)
- q3 = pc->value;
+ q3 = vf->percentiles[i];
}
return q1 == SYSMIS || q3 == SYSMIS ? SYSMIS : q3 - q1;
/* Create a gsl_histogram from a freq_tab */
static struct histogram *
-freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
- const struct variable *var)
+freq_tab_to_hist (const struct frq_proc *frq, const struct var_freqs *vf)
{
- double x_min, x_max, valid_freq;
- int i;
- double bin_width;
- struct histogram *histogram;
- double iqr;
-
/* Find out the extremes of the x value, within the range to be included in
the histogram, and sum the total frequency of those values. */
- x_min = DBL_MAX;
- x_max = -DBL_MAX;
- valid_freq = 0;
- for (i = 0; i < ft->n_valid; i++)
+ double x_min = DBL_MAX;
+ double x_max = -DBL_MAX;
+ double valid_freq = 0;
+ for (int i = 0; i < vf->tab.n_valid; i++)
{
- const struct freq *f = &ft->valid[i];
- if (chart_includes_value (frq->hist, var, f->values))
+ const struct freq *f = &vf->tab.valid[i];
+ if (chart_includes_value (frq->hist, vf->var, f->values))
{
x_min = MIN (x_min, f->values[0].f);
x_max = MAX (x_max, f->values[0].f);
if (valid_freq <= 0)
return NULL;
- iqr = calculate_iqr (frq);
-
- if (iqr > 0)
- /* Freedman-Diaconis' choice of bin width. */
- bin_width = 2 * iqr / pow (valid_freq, 1.0 / 3.0);
+ double iqr = calculate_iqr (frq, vf);
- else
- /* Sturges Rule */
- bin_width = (x_max - x_min) / (1 + log2 (valid_freq));
-
- histogram = histogram_create (bin_width, x_min, x_max);
+ double bin_width =
+ (iqr > 0
+ ? 2 * iqr / pow (valid_freq, 1.0 / 3.0) /* Freedman-Diaconis. */
+ : (x_max - x_min) / (1 + log2 (valid_freq))); /* Sturges */
+ struct histogram *histogram = histogram_create (bin_width, x_min, x_max);
if (histogram == NULL)
return NULL;
- for (i = 0; i < ft->n_valid; i++)
+ for (int i = 0; i < vf->tab.n_valid; i++)
{
- const struct freq *f = &ft->valid[i];
- if (chart_includes_value (frq->hist, var, f->values))
+ const struct freq *f = &vf->tab.valid[i];
+ if (chart_includes_value (frq->hist, vf->var, f->values))
histogram_add (histogram, f->values[0].f, f->count);
}
d[FRQ_ST_SEMEAN] = d[FRQ_ST_STDDEV] / sqrt (W);
d[FRQ_ST_SESKEWNESS] = calc_seskew (W);
d[FRQ_ST_SEKURTOSIS] = calc_sekurt (W);
- d[FRQ_ST_MEDIAN] = frq->median ? frq->median->value : SYSMIS;
+ d[FRQ_ST_MEDIAN] = (frq->median_idx != SIZE_MAX
+ ? vf->percentiles[frq->median_idx]
+ : SYSMIS);
}
static bool
return true;
}
+\f
+struct frq_stats_table
+ {
+ struct pivot_table *table;
+ struct pivot_splits *splits;
+ };
/* Displays a table of all the statistics requested. */
-static void
-dump_statistics (const struct frq_proc *frq, const struct variable *wv)
+static struct frq_stats_table *
+frq_stats_table_create (const struct frq_proc *frq,
+ const struct dictionary *dict,
+ const struct variable *wv)
{
if (all_string_variables (frq))
- return;
+ return NULL;
struct pivot_table *table = pivot_table_create (N_("Statistics"));
pivot_table_set_weight_var (table, wv);
struct pivot_dimension *variables
= pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Variables"));
+ for (size_t i = 0; i < frq->n_vars; i++)
+ if (!var_is_alpha (frq->vars[i].var))
+ pivot_category_create_leaf (variables->root,
+ pivot_value_new_variable (frq->vars[i].var));
struct pivot_dimension *statistics = pivot_dimension_create (
table, PIVOT_AXIS_ROW, N_("Statistics"));
pc->p * 100.0));
}
+ struct pivot_splits *splits = pivot_splits_create (table, PIVOT_AXIS_ROW,
+ dict);
+
+ struct frq_stats_table *fst = xmalloc (sizeof *fst);
+ *fst = (struct frq_stats_table) { .table = table, .splits = splits };
+ return fst;
+}
+
+static struct frq_stats_table *
+frq_stats_table_submit (struct frq_stats_table *fst,
+ const struct frq_proc *frq,
+ const struct dictionary *dict,
+ const struct variable *wv,
+ const struct ccase *example)
+{
+ if (!fst)
+ {
+ fst = frq_stats_table_create (frq, dict, wv);
+ if (!fst)
+ return NULL;
+ }
+ pivot_splits_new_split (fst->splits, example);
+
+ int var_idx = 0;
for (size_t i = 0; i < frq->n_vars; i++)
{
struct var_freqs *vf = &frq->vars[i];
const struct freq_tab *ft = &vf->tab;
- int var_idx = pivot_category_create_leaf (
- variables->root, pivot_value_new_variable (vf->var));
-
int row = 0;
- pivot_table_put2 (table, var_idx, row++,
+ pivot_splits_put2 (fst->splits, fst->table, var_idx, row++,
pivot_value_new_number (ft->valid_cases));
- pivot_table_put2 (table, var_idx, row++,
+ pivot_splits_put2 (fst->splits, fst->table, var_idx, row++,
pivot_value_new_number (
ft->total_cases - ft->valid_cases));
= (j == FRQ_ST_MODE || j == FRQ_ST_MINIMUM || j == FRQ_ST_MAXIMUM
? pivot_value_new_var_value (vf->var, &v)
: pivot_value_new_number (v.f));
- pivot_table_put2 (table, var_idx, row++, pv);
+ pivot_splits_put2 (fst->splits, fst->table, var_idx, row++, pv);
}
for (size_t j = 0; j < frq->n_percentiles; j++)
if (!pc->show)
continue;
- union value v = { .f = vf->tab.n_valid ? pc->value : SYSMIS };
- pivot_table_put2 (table, var_idx, row++,
- pivot_value_new_var_value (vf->var, &v));
+ union value v = {
+ .f = vf->tab.n_valid ? vf->percentiles[j] : SYSMIS
+ };
+ pivot_splits_put2 (fst->splits, fst->table, var_idx, row++,
+ pivot_value_new_var_value (vf->var, &v));
}
+
+ var_idx++;
}
- pivot_table_submit (table);
+ if (!fst->splits)
+ {
+ frq_stats_table_destroy (fst);
+ return NULL;
+ }
+ return fst;
+}
+
+static void
+frq_stats_table_destroy (struct frq_stats_table *fst)
+{
+ if (!fst)
+ return;
+
+ pivot_table_submit (fst->table);
+ pivot_splits_destroy (fst->splits);
+ free (fst);
}