#include <gsl/gsl_histogram.h>
#include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/format.h>
#include <data/procedure.h>
#include <libpspp/hash.h>
#include <libpspp/magic.h>
#include <libpspp/message.h>
-#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/pool.h>
#include <libpspp/str.h>
FREQUENCIES (frq_):
*+variables=custom;
+format=cond:condense/onepage(*n:onepage_limit,"%s>=0")/!standard,
- table:limit(n:limit,"%s>0")/notable/!table,
+ table:limit(n:limit,"%s>0")/notable/!table,
labels:!labels/nolabels,
sort:!avalue/dvalue/afreq/dfreq,
spaces:!single/double,
double value; /* the %ile's value */
double x1; /* The datum value <= the percentile */
double x2; /* The datum value >= the percentile */
- int flag;
+ int flag;
int flag2; /* Set to 1 if this percentile value has been found */
};
static struct percentile *percentiles;
static int n_percentiles;
-static int implicit_50th ;
+static int implicit_50th ;
/* Groups of statistics. */
#define BI BIT_INDEX
/* Variables for which to calculate statistics. */
static size_t n_variables;
-static struct variable **v_variables;
+static const struct variable **v_variables;
/* Arenas used to store semi-permanent storage. */
static struct pool *int_pool; /* Integer mode. */
static void determine_charts (void);
-static void calc_stats (struct variable *v, double d[frq_n_stats]);
+static void calc_stats (const struct variable *v, double d[frq_n_stats]);
-static void precalc (const struct ccase *, void *, const struct dataset *);
-static bool calc (const struct ccase *, void *, const struct dataset *);
-static bool postcalc (void *, const struct dataset *);
+static void precalc (struct casereader *, struct dataset *);
+static void calc (const struct ccase *, const struct dataset *);
+static void postcalc (void);
-static void postprocess_freq_tab (struct variable *);
-static void dump_full (struct variable *);
-static void dump_condensed (struct variable *);
-static void dump_statistics (struct variable *, int show_varname);
-static void cleanup_freq_tab (struct variable *);
+static void postprocess_freq_tab (const struct variable *);
+static void dump_full (const struct variable *);
+static void dump_condensed (const struct variable *);
+static void dump_statistics (const struct variable *, int show_varname);
+static void cleanup_freq_tab (const struct variable *);
static hsh_compare_func compare_value_numeric_a, compare_value_alpha_a;
static hsh_compare_func compare_value_numeric_d, compare_value_alpha_d;
static void do_piechart(const struct variable *var,
const struct freq_tab *frq_tab);
-gsl_histogram *
+gsl_histogram *
freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var);
static int
internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds)
{
- int i;
+ struct casegrouper *grouper;
+ struct casereader *input, *group;
bool ok;
+ int i;
n_percentiles = 0;
percentiles = NULL;
cmd.sort = FRQ_AVALUE;
/* Work out what percentiles need to be calculated */
- if ( cmd.sbc_percentiles )
+ if ( cmd.sbc_percentiles )
{
- for ( i = 0 ; i < MAXLISTS ; ++i )
+ for ( i = 0 ; i < MAXLISTS ; ++i )
{
int pl;
subc_list_double *ptl_list = &cmd.dl_percentiles[i];
add_percentile (subc_list_double_at(ptl_list, pl) / 100.0 );
}
}
- if ( cmd.sbc_ntiles )
+ if ( cmd.sbc_ntiles )
{
- for ( i = 0 ; i < cmd.sbc_ntiles ; ++i )
+ for ( i = 0 ; i < cmd.sbc_ntiles ; ++i )
{
int j;
- for (j = 0; j <= cmd.n_ntiles[i]; ++j )
+ for (j = 0; j <= cmd.n_ntiles[i]; ++j )
add_percentile (j / (double) cmd.n_ntiles[i]);
}
}
-
+
/* Do it! */
- ok = procedure_with_splits (ds, precalc, calc, postcalc, NULL);
+ input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
+ NULL, NULL);
+ grouper = casegrouper_create_splits (input, dataset_dict (ds));
+ for (; casegrouper_get_next_group (grouper, &group);
+ casereader_destroy (group))
+ {
+ struct ccase c;
+
+ precalc (group, ds);
+ for (; casereader_read (group, &c); case_destroy (&c))
+ calc (&c, ds);
+ postcalc ();
+ }
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
free_frequencies(&cmd);
static void
determine_charts (void)
{
- int count = (!!cmd.sbc_histogram) + (!!cmd.sbc_barchart) +
+ int count = (!!cmd.sbc_histogram) + (!!cmd.sbc_barchart) +
(!!cmd.sbc_hbar) + (!!cmd.sbc_piechart);
if (!count)
}
/* Add data from case C to the frequency table. */
-static bool
-calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+calc (const struct ccase *c, const struct dataset *ds)
{
- double weight;
+ double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
size_t i;
- bool bad_warn = true;
-
- weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
for (i = 0; i < n_variables; i++)
{
struct freq *fp = pool_alloc (gen_pool, sizeof *fp);
fp->count = weight;
fp->value = pool_clone (gen_pool,
- val, MAX (MAX_SHORT_STRING, vf->width));
+ val,
+ MAX (MAX_SHORT_STRING, vf->width));
*fpp = fp;
}
}
NOT_REACHED ();
}
}
- return true;
}
/* Prepares each variable that is the target of FREQUENCIES by setting
up its hash table. */
static void
-precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
+precalc (struct casereader *input, struct dataset *ds)
{
+ struct ccase c;
size_t i;
- output_split_file_values (ds, first);
+ if (!casereader_peek (input, 0, &c))
+ return;
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
pool_destroy (gen_pool);
gen_pool = pool_create ();
-
+
for (i = 0; i < n_variables; i++)
{
- struct variable *v = v_variables[i];
+ const struct variable *v = v_variables[i];
struct freq_tab *ft = &get_var_freqs (v)->tab;
if (ft->mode == FRQM_GENERAL)
/* Finishes up with the variables after frequencies have been
calculated. Displays statistics, percentiles, ... */
-static bool
-postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
+static void
+postcalc (void)
{
size_t i;
for (i = 0; i < n_variables; i++)
{
- struct variable *v = v_variables[i];
+ const struct variable *v = v_variables[i];
struct var_freqs *vf = get_var_freqs (v);
struct freq_tab *ft = &vf->tab;
int n_categories;
- if ( chart == GFT_HIST)
+ if ( chart == GFT_HIST)
{
double d[frq_n_stats];
struct normal_curve norm;
}
- if ( chart == GFT_PIE)
+ if ( chart == GFT_PIE)
{
do_piechart(v_variables[i], ft);
}
cleanup_freq_tab (v);
}
-
- return true;
}
/* Returns the comparison function that should be used for
sorting a frequency table by FRQ_SORT using VAR_TYPE
variables. */
static hsh_compare_func *
-get_freq_comparator (int frq_sort, enum var_type var_type)
+get_freq_comparator (int frq_sort, enum var_type var_type)
{
bool is_numeric = var_type == VAR_NUMERIC;
switch (frq_sort)
/* Returns true iff the value in struct freq F is non-missing
for variable V. */
static bool
-not_missing (const void *f_, const void *v_)
+not_missing (const void *f_, const void *v_)
{
const struct freq *f = f_;
const struct variable *v = v_;
- return !var_is_value_missing (v, f->value);
+ return !var_is_value_missing (v, f->value, MV_ANY);
}
/* Summarizes the frequency table data for variable V. */
static void
-postprocess_freq_tab (struct variable *v)
+postprocess_freq_tab (const struct variable *v)
{
hsh_compare_func *compare;
struct freq_tab *ft;
/* Copy dereferenced data into freqs. */
freqs = xnmalloc (count, sizeof *freqs);
- for (i = 0; i < count; i++)
+ for (i = 0; i < count; i++)
{
struct freq *f = data[i];
- freqs[i] = *f;
+ freqs[i] = *f;
}
/* Put data into ft. */
/* Summary statistics. */
ft->valid_cases = 0.0;
- for(i = 0 ; i < ft->n_valid ; ++i )
+ for(i = 0 ; i < ft->n_valid ; ++i )
{
f = &ft->valid[i];
ft->valid_cases += f->count;
}
- ft->total_cases = ft->valid_cases ;
- for(i = 0 ; i < ft->n_missing ; ++i )
+ ft->total_cases = ft->valid_cases ;
+ for(i = 0 ; i < ft->n_missing ; ++i )
{
f = &ft->missing[i];
ft->total_cases += f->count;
/* Frees the frequency table for variable V. */
static void
-cleanup_freq_tab (struct variable *v)
+cleanup_freq_tab (const struct variable *v)
{
struct freq_tab *ft = &get_var_freqs (v)->tab;
assert (ft->mode == FRQM_GENERAL);
|| dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL))
return 2;
- if (!parse_variables (lexer, dataset_dict (ds), &v_variables, &n_variables,
+ if (!parse_variables_const (lexer, dataset_dict (ds), &v_variables, &n_variables,
PV_APPEND | PV_NO_SCRATCH))
return 0;
for (i = old_n_variables; i < n_variables; i++)
{
- struct variable *v = v_variables[i];
+ const struct variable *v = v_variables[i];
struct var_freqs *vf;
if (var_get_aux (v) != NULL)
vf->tab.vector = pool_nalloc (int_pool,
max - min + 1, sizeof *vf->tab.vector);
}
- else
+ else
vf->tab.vector = NULL;
vf->n_groups = 0;
vf->groups = NULL;
vf->width = var_get_width (v);
vf->print = *var_get_print_format (v);
- if (vf->width > MAX_SHORT_STRING && get_algorithm () == COMPATIBLE)
+ if (vf->width > MAX_SHORT_STRING && get_algorithm () == COMPATIBLE)
{
enum fmt_type type = var_get_print_format (v)->type;
vf->width = MAX_SHORT_STRING;
/* Variable list. */
size_t n;
- struct variable **v;
+ const struct variable **v;
- if (!parse_variables (lexer, dataset_dict (ds), &v, &n,
+ if (!parse_variables_const (lexer, dataset_dict (ds), &v, &n,
PV_NO_DUPLICATE | PV_NUMERIC))
return 0;
if (lex_match (lexer, '('))
return 0;
}
}
- else
+ else
{
nl = 0;
dl = NULL;
if (var_get_aux (v[i]) == NULL)
msg (SE, _("Variables %s specified on GROUPED but not on "
"VARIABLES."), var_get_name (v[i]));
- else
+ else
{
struct var_freqs *vf = get_var_freqs (v[i]);
-
+
if (vf->groups != NULL)
msg (SE, _("Variables %s specified multiple times on GROUPED "
"subcommand."), var_get_name (v[i]));
for (i = 0; i < n_percentiles; i++)
{
/* Do nothing if it's already in the list */
- if ( fabs(x - percentiles[i].p) < DBL_EPSILON )
+ if ( fabs(x - percentiles[i].p) < DBL_EPSILON )
return;
if (x < percentiles[i].p)
/* Displays a full frequency table for variable V. */
static void
-dump_full (struct variable *v)
+dump_full (const struct variable *v)
{
int n_categories;
struct var_freqs *vf;
/* Display condensed frequency table for variable V. */
static void
-dump_condensed (struct variable *v)
+dump_condensed (const struct variable *v)
{
int n_categories;
struct var_freqs *vf;
/* Calculates all the pertinent statistics for variable V, putting
them in array D[]. FIXME: This could be made much more optimal. */
static void
-calc_stats (struct variable *v, double d[frq_n_stats])
+calc_stats (const struct variable *v, double d[frq_n_stats])
{
struct freq_tab *ft = &get_var_freqs (v)->tab;
double W = ft->valid_cases;
struct moments *m;
- struct freq *f=0;
+ struct freq *f=0;
int most_often;
double X_mode;
/* Calculate percentiles. */
- /* If the 50th percentile was not explicitly requested then we must
+ /* If the 50th percentile was not explicitly requested then we must
calculate it anyway --- it's the median */
median_value = 0 ;
- for (i = 0; i < n_percentiles; i++)
+ for (i = 0; i < n_percentiles; i++)
{
if (percentiles[i].p == 0.5)
{
}
}
- if ( 0 == median_value )
+ if ( 0 == median_value )
{
add_percentile (0.5);
implicit_50th = 1;
}
- for (i = 0; i < n_percentiles; i++)
+ for (i = 0; i < n_percentiles; i++)
{
percentiles[i].flag = 0;
percentiles[i].flag2 = 0;
for (idx = 0; idx < ft->n_valid; ++idx)
{
static double prev_value = SYSMIS;
- f = &ft->valid[idx];
+ f = &ft->valid[idx];
rank += f->count ;
- for (i = 0; i < n_percentiles; i++)
+ for (i = 0; i < n_percentiles; i++)
{
double tp;
- if ( percentiles[i].flag2 ) continue ;
+ if ( percentiles[i].flag2 ) continue ;
- if ( get_algorithm() != COMPATIBLE )
- tp =
+ if ( get_algorithm() != COMPATIBLE )
+ tp =
(ft->valid_cases - 1) * percentiles[i].p;
else
- tp =
+ tp =
(ft->valid_cases + 1) * percentiles[i].p - 1;
- if ( percentiles[i].flag )
+ if ( percentiles[i].flag )
{
percentiles[i].x2 = f->value[0].f;
percentiles[i].x1 = prev_value;
continue;
}
- if (rank > tp )
+ if (rank > tp )
{
- if ( f->count > 1 && rank - (f->count - 1) > tp )
+ if ( f->count > 1 && rank - (f->count - 1) > tp )
{
percentiles[i].x2 = percentiles[i].x1 = f->value[0].f;
percentiles[i].flag2 = 1;
prev_value = f->value[0].f;
}
- for (i = 0; i < n_percentiles; i++)
+ for (i = 0; i < n_percentiles; i++)
{
/* Catches the case when p == 100% */
- if ( ! percentiles[i].flag2 )
+ if ( ! percentiles[i].flag2 )
percentiles[i].x1 = percentiles[i].x2 = f->value[0].f;
/*
*/
}
- for (i = 0; i < n_percentiles; i++)
+ for (i = 0; i < n_percentiles; i++)
{
struct freq_tab *ft = &get_var_freqs (v)->tab;
double s;
double dummy;
- if ( get_algorithm() != COMPATIBLE )
+ if ( get_algorithm() != COMPATIBLE )
{
s = modf((ft->valid_cases - 1) * percentiles[i].p , &dummy);
}
s = modf((ft->valid_cases + 1) * percentiles[i].p -1, &dummy);
}
- percentiles[i].value = percentiles[i].x1 +
- ( percentiles[i].x2 - percentiles[i].x1) * s ;
+ percentiles[i].value = percentiles[i].x1 +
+ ( percentiles[i].x2 - percentiles[i].x1) * s ;
- if ( percentiles[i].p == 0.50)
- median_value = &percentiles[i].value;
+ if ( percentiles[i].p == 0.50)
+ median_value = &percentiles[i].value;
}
X_mode = SYSMIS;
for (f = ft->valid; f < ft->missing; f++)
{
- if (most_often < f->count)
+ if (most_often < f->count)
{
most_often = f->count;
X_mode = f->value[0].f;
}
- else if (most_often == f->count)
+ else if (most_often == f->count)
{
/* A duplicate mode is undefined.
FIXME: keep track of *all* the modes. */
moments_calculate (m, NULL, &d[frq_mean], &d[frq_variance],
&d[frq_skew], &d[frq_kurt]);
moments_destroy (m);
-
+
/* Formulas below are taken from _SPSS Statistical Algorithms_. */
d[frq_min] = ft->valid[0].value[0].f;
d[frq_max] = ft->valid[ft->n_valid - 1].value[0].f;
/* Displays a table of all the statistics requested for variable V. */
static void
-dump_statistics (struct variable *v, int show_varname)
+dump_statistics (const struct variable *v, int show_varname)
{
struct freq_tab *ft;
double stat_value[frq_n_stats];
int n_explicit_percentiles = n_percentiles;
- if ( implicit_50th && n_percentiles > 0 )
+ if ( implicit_50th && n_percentiles > 0 )
--n_percentiles;
if (var_is_alpha (v))
tab_vline (t, TAL_1 , 2, 0, tab_nr(t) - 1);
tab_vline (t, TAL_GAP , 1, 0, tab_nr(t) - 1 ) ;
-
+
r=2; /* N missing and N valid are always dumped */
for (i = 0; i < frq_n_stats; i++)
tab_float(t, 2, 1, TAB_NONE, ft->total_cases - ft->valid_cases, 11, 0);
- for (i = 0; i < n_explicit_percentiles; i++, r++)
+ for (i = 0; i < n_explicit_percentiles; i++, r++)
{
- if ( i == 0 )
- {
+ if ( i == 0 )
+ {
tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Percentiles"));
}
struct freq *frq;
/* Find out the extremes of the x value */
- for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) )
+ for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) )
{
- if ( var_is_value_missing(var, frq->value))
+ if (var_is_value_missing(var, frq->value, MV_ANY))
continue;
if ( frq->value[0].f < x_min ) x_min = frq->value[0].f ;
hist = histogram_create(bins, x_min, x_max);
- for( i = 0 ; i < ft->n_valid ; ++i )
+ for( i = 0 ; i < ft->n_valid ; ++i )
{
frq = &ft->valid[i];
gsl_histogram_accumulate(hist, frq->value[0].f, frq->count);
static struct slice *
-freq_tab_to_slice_array(const struct freq_tab *frq_tab,
+freq_tab_to_slice_array(const struct freq_tab *frq_tab,
const struct variable *var,
int *n_slices);
The caller is responsible for freeing slices
*/
static struct slice *
-freq_tab_to_slice_array(const struct freq_tab *frq_tab,
+freq_tab_to_slice_array(const struct freq_tab *frq_tab,
const struct variable *var,
int *n_slices)
{
struct slice *slices;
*n_slices = frq_tab->n_valid;
-
+
slices = xnmalloc (*n_slices, sizeof *slices);
- for (i = 0 ; i < *n_slices ; ++i )
+ for (i = 0 ; i < *n_slices ; ++i )
{
const struct freq *frq = &frq_tab->valid[i];
}
-/*
+/*
Local Variables:
mode: c
End: