#include <stdlib.h>
#include <gsl/gsl_histogram.h>
-#include <data/case.h>
-#include <data/casegrouper.h>
-#include <data/casereader.h>
-#include <data/dictionary.h>
-#include <data/format.h>
-#include <data/procedure.h>
-#include <data/settings.h>
-#include <data/value-labels.h>
-#include <data/variable.h>
-#include <language/command.h>
-#include <language/dictionary/split-file.h>
-#include <language/lexer/lexer.h>
-#include <libpspp/array.h>
-#include <libpspp/bit-vector.h>
-#include <libpspp/compiler.h>
-#include <libpspp/hash.h>
-#include <libpspp/message.h>
-#include <libpspp/misc.h>
-#include <libpspp/pool.h>
-#include <libpspp/str.h>
-#include <math/histogram.h>
-#include <math/moments.h>
-#include <output/chart-item.h>
-#include <output/charts/piechart.h>
-#include <output/charts/plot-hist.h>
-#include <output/tab.h>
-
-#include "freq.h"
-
-#include "minmax.h"
-#include "xalloc.h"
+#include "data/case.h"
+#include "data/casegrouper.h"
+#include "data/casereader.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/procedure.h"
+#include "data/settings.h"
+#include "data/value-labels.h"
+#include "data/variable.h"
+#include "language/command.h"
+#include "language/dictionary/split-file.h"
+#include "language/lexer/lexer.h"
+#include "language/stats/freq.h"
+#include "libpspp/array.h"
+#include "libpspp/bit-vector.h"
+#include "libpspp/compiler.h"
+#include "libpspp/hmap.h"
+#include "libpspp/message.h"
+#include "libpspp/misc.h"
+#include "libpspp/pool.h"
+#include "libpspp/str.h"
+#include "math/histogram.h"
+#include "math/moments.h"
+#include "output/chart-item.h"
+#include "output/charts/piechart.h"
+#include "output/charts/plot-hist.h"
+#include "output/tab.h"
+
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
/* Statistics. */
enum
{
- frq_mean = 0, frq_semean, frq_median, frq_mode, frq_stddev, frq_variance,
- frq_kurt, frq_sekurt, frq_skew, frq_seskew, frq_range, frq_min, frq_max,
- frq_sum, frq_n_stats
+ FRQ_MEAN, FRQ_SEMEAN, FRQ_MEDIAN, FRQ_MODE, FRQ_STDDEV, FRQ_VARIANCE,
+ FRQ_KURT, FRQ_SEKURT, FRQ_SKEW, FRQ_SESKEW, FRQ_RANGE, FRQ_MIN, FRQ_MAX,
+ FRQ_SUM, FRQ_N_STATS
};
/* Description of a statistic. */
};
/* Table of statistics, indexed by dsc_*. */
-static const struct frq_info st_name[frq_n_stats + 1] =
+static const struct frq_info st_name[FRQ_N_STATS + 1] =
{
{FRQ_ST_MEAN, N_("Mean")},
{FRQ_ST_SEMEAN, N_("S.E. Mean")},
/* Groups of statistics. */
#define BI BIT_INDEX
-#define frq_default \
- (BI (frq_mean) | BI (frq_stddev) | BI (frq_min) | BI (frq_max))
-#define frq_all \
- (BI (frq_sum) | BI(frq_min) | BI(frq_max) \
- | BI(frq_mean) | BI(frq_semean) | BI(frq_stddev) \
- | BI(frq_variance) | BI(frq_kurt) | BI(frq_sekurt) \
- | BI(frq_skew) | BI(frq_seskew) | BI(frq_range) \
- | BI(frq_range) | BI(frq_mode) | BI(frq_median))
+#define FRQ_DEFAULT \
+ (BI (FRQ_MEAN) | BI (FRQ_STDDEV) | BI (FRQ_MIN) | BI (FRQ_MAX))
+#define FRQ_ALL \
+ (BI (FRQ_SUM) | BI(FRQ_MIN) | BI(FRQ_MAX) \
+ | BI(FRQ_MEAN) | BI(FRQ_SEMEAN) | BI(FRQ_STDDEV) \
+ | BI(FRQ_VARIANCE) | BI(FRQ_KURT) | BI(FRQ_SEKURT) \
+ | BI(FRQ_SKEW) | BI(FRQ_SESKEW) | BI(FRQ_RANGE) \
+ | BI(FRQ_RANGE) | BI(FRQ_MODE) | BI(FRQ_MEDIAN))
/* Statistics; number of statistics. */
static unsigned long stats;
static const struct variable **v_variables;
/* Pools. */
-static struct pool *data_pool; /* For per-SPLIT FILE group data. */
static struct pool *syntax_pool; /* For syntax-related data. */
/* Frequency tables. */
/* Entire frequency table. */
struct freq_tab
{
- struct hsh_table *data; /* Undifferentiated data. */
- struct freq_mutable *valid; /* Valid freqs. */
+ struct hmap data; /* Hash table for accumulating counts. */
+ struct freq *valid; /* Valid freqs. */
int n_valid; /* Number of total freqs. */
- const struct dictionary *dict; /* The dict from whence entries in the table
- come */
+ const struct dictionary *dict; /* Source of entries in the table. */
- struct freq_mutable *missing; /* Missing freqs. */
+ struct freq *missing; /* Missing freqs. */
int n_missing; /* Number of missing freqs. */
/* Statistics. */
double *groups; /* Groups. */
/* Statistics. */
- double stat[frq_n_stats];
+ double stat[FRQ_N_STATS];
/* Variable attributes. */
int width;
static void determine_charts (void);
-static void calc_stats (const struct variable *v, double d[frq_n_stats]);
+static void calc_stats (const struct variable *v, double d[FRQ_N_STATS]);
static void precalc (struct casereader *, struct dataset *);
static void calc (const struct ccase *, const struct dataset *);
static void dump_statistics (const struct variable *, const struct variable *);
static void cleanup_freq_tab (const struct variable *);
-static hsh_compare_func compare_value_numeric_a, compare_value_alpha_a;
-static hsh_compare_func compare_value_numeric_d, compare_value_alpha_d;
-static hsh_compare_func compare_freq_numeric_a, compare_freq_alpha_a;
-static hsh_compare_func compare_freq_numeric_d, compare_freq_alpha_d;
+static algo_compare_func compare_value_numeric_a, compare_value_alpha_a;
+static algo_compare_func compare_value_numeric_d, compare_value_alpha_d;
+static algo_compare_func compare_freq_numeric_a, compare_freq_alpha_a;
+static algo_compare_func compare_freq_numeric_d, compare_freq_alpha_d;
static void do_piechart(const struct variable *var,
result = internal_cmd_frequencies (lexer, ds);
pool_destroy (syntax_pool);
syntax_pool=0;
- pool_destroy (data_pool);
- data_pool=0;
free (v_variables);
v_variables=0;
return result;
/* Figure out statistics to calculate. */
stats = 0;
if (cmd.a_statistics[FRQ_ST_DEFAULT] || !cmd.sbc_statistics)
- stats |= frq_default;
+ stats |= FRQ_DEFAULT;
if (cmd.a_statistics[FRQ_ST_ALL])
- stats |= frq_all;
+ stats |= FRQ_ALL;
if (cmd.sort != FRQ_AVALUE && cmd.sort != FRQ_DVALUE)
- stats &= ~BIT_INDEX (frq_median);
- for (i = 0; i < frq_n_stats; i++)
+ stats &= ~BIT_INDEX (FRQ_MEDIAN);
+ for (i = 0; i < FRQ_N_STATS; i++)
if (cmd.a_statistics[st_name[i].st_indx])
stats |= BIT_INDEX (i);
- if (stats & frq_kurt)
- stats |= BIT_INDEX (frq_sekurt);
- if (stats & frq_skew)
- stats |= BIT_INDEX (frq_seskew);
+ if (stats & FRQ_KURT)
+ stats |= BIT_INDEX (FRQ_SEKURT);
+ if (stats & FRQ_SKEW)
+ stats |= BIT_INDEX (FRQ_SESKEW);
/* Calculate n_stats. */
n_stats = 0;
- for (i = 0; i < frq_n_stats; i++)
+ for (i = 0; i < FRQ_N_STATS; i++)
if ((stats & BIT_INDEX (i)))
n_stats++;
add_percentile (j / (double) cmd.n_ntiles[i], true);
}
}
- if (stats & BIT_INDEX (frq_median))
+ if (stats & BIT_INDEX (FRQ_MEDIAN))
{
/* Treat the median as the 50% percentile.
We output it in the percentiles table as "50 (Median)." */
add_percentile (0.5, true);
- stats &= ~BIT_INDEX (frq_median);
+ stats &= ~BIT_INDEX (FRQ_MEDIAN);
n_stats--;
}
if (cmd.sbc_histogram)
for (i = 0; i < n_variables; i++)
{
- const struct variable *v = v_variables[i];
- const union value *val = case_data (c, v);
- struct var_freqs *vf = get_var_freqs (v);
- struct freq_tab *ft = &vf->tab;
+ const struct variable *var = v_variables[i];
+ int width = var_get_width (var);
- struct freq_mutable target;
- struct freq_mutable **fpp;
+ const union value *value = case_data (c, var);
+ size_t hash = value_hash (value, width, 0);
- target.value = *val;
- fpp = (struct freq_mutable **) hsh_probe (ft->data, &target);
+ struct hmap *hmap = &get_var_freqs (var)->tab.data;
+ struct freq *f;
- if (*fpp != NULL)
- (*fpp)->count += weight;
- else
- {
- struct freq_mutable *fp = pool_alloc (data_pool, sizeof *fp);
- fp->count = weight;
- value_init_pool (data_pool, &fp->value, vf->width);
- value_copy (&fp->value, val, vf->width);
- *fpp = fp;
- }
+ f = freq_hmap_search (hmap, value, width, hash);
+ if (f == NULL)
+ f = freq_hmap_insert (hmap, value, width, hash);
+
+ f->count += weight;
}
}
case_unref (c);
}
- pool_destroy (data_pool);
- data_pool = pool_create ();
-
for (i = 0; i < n_variables; i++)
{
const struct variable *v = v_variables[i];
struct freq_tab *ft = &get_var_freqs (v)->tab;
- ft->data = hsh_create (16, compare_freq, hash_freq, NULL, v);
+ hmap_init (&ft->data);
}
}
if (cmd.sbc_histogram && var_is_numeric (v) && ft->n_valid > 0)
{
- double d[frq_n_stats];
+ double d[FRQ_N_STATS];
struct histogram *histogram;
calc_stats (v, d);
chart_item_submit (histogram_chart_create (
histogram->gsl_hist, var_to_string(v),
vf->tab.valid_cases,
- d[frq_mean],
- d[frq_stddev],
+ d[FRQ_MEAN],
+ d[FRQ_STDDEV],
hist.draw_normal));
statistic_destroy (&histogram->parent);
/* Returns the comparison function that should be used for
sorting a frequency table by FRQ_SORT using VAL_TYPE
values. */
-static hsh_compare_func *
+static algo_compare_func *
get_freq_comparator (int frq_sort, enum val_type val_type)
{
bool is_numeric = val_type == VAL_NUMERIC;
}
}
-/* Returns true iff the value in struct freq_mutable F is non-missing
+/* Returns true iff the value in struct freq F is non-missing
for variable V. */
static bool
not_missing (const void *f_, const void *v_)
{
- const struct freq_mutable *f = f_;
+ const struct freq *f = f_;
const struct variable *v = v_;
return !var_is_value_missing (v, &f->value, MV_ANY);
static void
postprocess_freq_tab (const struct variable *v)
{
- hsh_compare_func *compare;
+ algo_compare_func *compare;
struct freq_tab *ft;
size_t count;
- void *const *data;
- struct freq_mutable *freqs, *f;
+ struct freq *freqs, *f;
size_t i;
ft = &get_var_freqs (v)->tab;
compare = get_freq_comparator (cmd.sort, var_get_type (v));
/* Extract data from hash table. */
- count = hsh_count (ft->data);
- data = hsh_data (ft->data);
-
- /* Copy dereferenced data into freqs. */
- freqs = xnmalloc (count, sizeof *freqs);
- for (i = 0; i < count; i++)
- {
- struct freq_mutable *f = data[i];
- freqs[i] = *f;
- }
+ count = hmap_count (&ft->data);
+ freqs = freq_hmap_extract (&ft->data);
/* Put data into ft. */
ft->valid = freqs;
static void
cleanup_freq_tab (const struct variable *v)
{
- struct freq_tab *ft = &get_var_freqs (v)->tab;
- free (ft->valid);
- hsh_destroy (ft->data);
+ struct var_freqs *vf = get_var_freqs (v);
+ free (vf->tab.valid);
+ freq_hmap_destroy (&vf->tab.data, vf->width);
}
/* Parses the VARIABLES subcommand, adding to
static int
compare_value_numeric_a (const void *a_, const void *b_, const void *aux UNUSED)
{
- const struct freq_mutable *a = a_;
- const struct freq_mutable *b = b_;
+ const struct freq *a = a_;
+ const struct freq *b = b_;
if (a->value.f > b->value.f)
return 1;
static int
compare_value_alpha_a (const void *a_, const void *b_, const void *v_)
{
- const struct freq_mutable *a = a_;
- const struct freq_mutable *b = b_;
+ const struct freq *a = a_;
+ const struct freq *b = b_;
const struct variable *v = v_;
struct var_freqs *vf = get_var_freqs (v);
static int
compare_freq_numeric_a (const void *a_, const void *b_, const void *aux UNUSED)
{
- const struct freq_mutable *a = a_;
- const struct freq_mutable *b = b_;
+ const struct freq *a = a_;
+ const struct freq *b = b_;
if (a->count > b->count)
return 1;
static int
compare_freq_alpha_a (const void *a_, const void *b_, const void *v_)
{
- const struct freq_mutable *a = a_;
- const struct freq_mutable *b = b_;
+ const struct freq *a = a_;
+ const struct freq *b = b_;
const struct variable *v = v_;
struct var_freqs *vf = get_var_freqs (v);
static int
compare_freq_numeric_d (const void *a_, const void *b_, const void *aux UNUSED)
{
- const struct freq_mutable *a = a_;
- const struct freq_mutable *b = b_;
+ const struct freq *a = a_;
+ const struct freq *b = b_;
if (a->count > b->count)
return -1;
static int
compare_freq_alpha_d (const void *a_, const void *b_, const void *v_)
{
- const struct freq_mutable *a = a_;
- const struct freq_mutable *b = b_;
+ const struct freq *a = a_;
+ const struct freq *b = b_;
const struct variable *v = v_;
struct var_freqs *vf = get_var_freqs (v);
int n_categories;
struct var_freqs *vf;
struct freq_tab *ft;
- struct freq_mutable *f;
+ struct freq *f;
struct tab_table *t;
int r, x;
double cum_total = 0.0;
/* Calculates all the pertinent statistics for variable V, putting them in
array D[]. */
static void
-calc_stats (const struct variable *v, double d[frq_n_stats])
+calc_stats (const struct variable *v, double d[FRQ_N_STATS])
{
struct freq_tab *ft = &get_var_freqs (v)->tab;
double W = ft->valid_cases;
struct moments *m;
- struct freq_mutable *f=0;
+ struct freq *f=0;
int most_often;
double X_mode;
moments_pass_one (m, f->value.f, f->count);
for (f = ft->valid; f < ft->missing; f++)
moments_pass_two (m, f->value.f, f->count);
- moments_calculate (m, NULL, &d[frq_mean], &d[frq_variance],
- &d[frq_skew], &d[frq_kurt]);
+ moments_calculate (m, NULL, &d[FRQ_MEAN], &d[FRQ_VARIANCE],
+ &d[FRQ_SKEW], &d[FRQ_KURT]);
moments_destroy (m);
/* Formulas below are taken from _SPSS Statistical Algorithms_. */
- d[frq_min] = ft->valid[0].value.f;
- d[frq_max] = ft->valid[ft->n_valid - 1].value.f;
- d[frq_mode] = X_mode;
- d[frq_range] = d[frq_max] - d[frq_min];
- d[frq_sum] = d[frq_mean] * W;
- d[frq_stddev] = sqrt (d[frq_variance]);
- d[frq_semean] = d[frq_stddev] / sqrt (W);
- d[frq_seskew] = calc_seskew (W);
- d[frq_sekurt] = calc_sekurt (W);
+ d[FRQ_MIN] = ft->valid[0].value.f;
+ d[FRQ_MAX] = ft->valid[ft->n_valid - 1].value.f;
+ d[FRQ_MODE] = X_mode;
+ d[FRQ_RANGE] = d[FRQ_MAX] - d[FRQ_MIN];
+ d[FRQ_SUM] = d[FRQ_MEAN] * W;
+ d[FRQ_STDDEV] = sqrt (d[FRQ_VARIANCE]);
+ d[FRQ_SEMEAN] = d[FRQ_STDDEV] / sqrt (W);
+ d[FRQ_SESKEW] = calc_seskew (W);
+ d[FRQ_SEKURT] = calc_sekurt (W);
}
/* Displays a table of all the statistics requested for variable V. */
{
const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0;
struct freq_tab *ft;
- double stat_value[frq_n_stats];
+ double stat_value[FRQ_N_STATS];
struct tab_table *t;
int i, r;
r=2; /* N missing and N valid are always dumped */
- for (i = 0; i < frq_n_stats; i++)
+ for (i = 0; i < FRQ_N_STATS; i++)
if (stats & BIT_INDEX (i))
{
tab_text (t, 0, r, TAB_LEFT | TAT_TITLE,
valid_freq = 0;
for (i = 0; i < ft->n_valid; i++)
{
- const struct freq_mutable *frq = &ft->valid[i];
+ const struct freq *frq = &ft->valid[i];
if (chart_includes_value (&hist, var, &frq->value))
{
x_min = MIN (x_min, frq->value.f);
histogram = histogram_create (bins, x_min, x_max);
for (i = 0; i < ft->n_valid; i++)
{
- const struct freq_mutable *frq = &ft->valid[i];
+ const struct freq *frq = &ft->valid[i];
if (chart_includes_value (&hist, var, &frq->value))
histogram_add (histogram, frq->value.f, frq->count);
}
}
static int
-add_slice (const struct freq_mutable *freq, const struct variable *var,
+add_slice (const struct freq *freq, const struct variable *var,
struct slice *slice)
{
if (chart_includes_value (&pie, var, &freq->value))