#include "libpspp/hmap.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
-#include "libpspp/pool.h"
#include "math/histogram.h"
#include "math/moments.h"
#include "math/chart-geometry.h"
-#include "output/chart-item.h"
#include "output/charts/barchart.h"
#include "output/charts/piechart.h"
#include "output/charts/plot-hist.h"
struct percentile
{
double p; /* the %ile to be calculated */
- double value; /* the %ile's value */
bool show; /* True to show this percentile in the statistics box. */
};
/* Freqency table. */
struct freq_tab tab; /* Frequencies table to use. */
- /* Percentiles. */
- int n_groups; /* Number of groups. */
- double *groups; /* Groups. */
-
/* Statistics. */
double stat[FRQ_ST_count];
+ double *percentiles;
/* Variable attributes. */
int width;
struct frq_proc
{
- struct pool *pool;
-
struct var_freqs *vars;
size_t n_vars;
/* Percentiles to calculate and possibly display. */
struct percentile *percentiles;
- const struct percentile *median;
- int n_percentiles;
+ size_t median_idx;
+ size_t n_percentiles;
/* Frequency table display. */
long int max_categories; /* Maximum categories to show. */
/* Histogram and pie chart settings. */
struct frq_chart *hist, *pie, *bar;
+
+ bool warn;
};
}
/* Create a gsl_histogram from a freq_tab */
-static struct histogram *
-freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
- const struct variable *var);
+static struct histogram *freq_tab_to_hist (const struct frq_proc *,
+ const struct var_freqs *);
static void
put_freq_row (struct pivot_table *table, int var_idx,
const struct freq_tab *ft = &vf->tab;
struct pivot_table *table = pivot_table_create__ (pivot_value_new_variable (
- vf->var));
+ vf->var), "Frequencies");
pivot_table_set_weight_var (table, wv);
pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
/* Calculates all of the percentiles for VF within FRQ. */
static void
-calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf)
+calc_percentiles (const struct frq_proc *frq, struct var_freqs *vf)
{
+ if (!frq->n_percentiles)
+ return;
+
+ vf->percentiles = xnmalloc (frq->n_percentiles, sizeof *vf->percentiles);
+
const struct freq_tab *ft = &vf->tab;
- double W = ft->valid_cases;
- const struct freq *f;
- int percentile_idx = 0;
- double rank = 0;
+ const double W = ft->valid_cases;
+ size_t idx = 0;
- for (f = ft->valid; f < ft->missing; f++)
+ double rank = 0;
+ for (const struct freq *f = ft->valid; f < ft->missing; f++)
{
rank += f->count;
- for (; percentile_idx < frq->n_percentiles; percentile_idx++)
+ for (; idx < frq->n_percentiles; idx++)
{
- struct percentile *pc = &frq->percentiles[percentile_idx];
+ struct percentile *pc = &frq->percentiles[idx];
double tp;
tp = (settings_get_algorithm () == ENHANCED
break;
if (tp + 1 < rank || f + 1 >= ft->missing)
- pc->value = f->values[0].f;
+ vf->percentiles[idx] = f->values[0].f;
else
- pc->value = calc_percentile (pc->p, W, f->values[0].f, f[1].values[0].f);
+ vf->percentiles[idx] = calc_percentile (pc->p, W, f->values[0].f,
+ f[1].values[0].f);
}
}
- for (; percentile_idx < frq->n_percentiles; percentile_idx++)
- {
- struct percentile *pc = &frq->percentiles[percentile_idx];
- pc->value = (ft->n_valid > 0
- ? ft->valid[ft->n_valid - 1].values[0].f
- : SYSMIS);
- }
+ for (; idx < frq->n_percentiles; idx++)
+ vf->percentiles[idx] = (ft->n_valid > 0
+ ? ft->valid[ft->n_valid - 1].values[0].f
+ : SYSMIS);
}
/* Returns true iff the value in struct freq F is non-missing
const struct freq *f = f_;
const struct variable *v = v_;
- return !var_is_value_missing (v, f->values, MV_ANY);
+ return !var_is_value_missing (v, f->values);
}
/* Summary statistics. */
ft->valid_cases = 0.0;
- for(i = 0 ; i < ft->n_valid ; ++i )
+ for(i = 0 ; i < ft->n_valid ; ++i)
{
f = &ft->valid[i];
ft->valid_cases += f->count;
}
ft->total_cases = ft->valid_cases ;
- for(i = 0 ; i < ft->n_missing ; ++i )
+ for(i = 0 ; i < ft->n_missing ; ++i)
{
f = &ft->missing[i];
ft->total_cases += f->count;
static void
calc (struct frq_proc *frq, const struct ccase *c, const struct dataset *ds)
{
- double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
+ double weight = dict_get_case_weight (dataset_dict (ds), c, &frq->warn);
size_t i;
for (i = 0; i < frq->n_vars; i++)
calc_stats (frq, vf, d);
- histogram = freq_tab_to_hist (frq, &vf->tab, vf->var);
+ histogram = freq_tab_to_hist (frq, vf);
- if ( histogram)
+ if (histogram)
{
- chart_item_submit (histogram_chart_create (
- histogram->gsl_hist, var_to_string(vf->var),
- vf->tab.valid_cases,
- d[FRQ_ST_MEAN],
- d[FRQ_ST_STDDEV],
- frq->hist->draw_normal));
+ chart_submit (histogram_chart_create (
+ histogram->gsl_hist, var_to_string(vf->var),
+ vf->tab.valid_cases,
+ d[FRQ_ST_MEAN],
+ d[FRQ_ST_STDDEV],
+ frq->hist->draw_normal));
statistic_destroy (&histogram->parent);
}
int hi_pcnt = INT_MIN;
int hi_norm = FRQ_NONORMAL;
- frq.pool = pool_create ();
frq.sort = FRQ_AVALUE;
frq.vars = NULL;
frq.hist = NULL;
frq.pie = NULL;
frq.bar = NULL;
+ frq.warn = true;
/* Accept an optional, completely pointless "/VARIABLES=" */
lex_match (lexer, T_SLASH);
if (lex_match_id (lexer, "VARIABLES"))
{
- if (! lex_force_match (lexer, T_EQUALS) )
+ if (! lex_force_match (lexer, T_EQUALS))
goto error;
}
PV_NO_DUPLICATE))
goto error;
- frq.vars = xzalloc (frq.n_vars * sizeof (*frq.vars));
+ frq.vars = xcalloc (frq.n_vars, sizeof (*frq.vars));
for (i = 0; i < frq.n_vars; ++i)
{
frq.vars[i].var = vars[i];
else if (lex_match_id (lexer, "LIMIT"))
{
if (!lex_force_match (lexer, T_LPAREN)
- || !lex_force_int (lexer))
+ || !lex_force_int_range (lexer, "LIMIT", 0, INT_MAX))
goto error;
frq.max_categories = lex_integer (lexer);
{
lex_match (lexer, T_EQUALS);
- if (lex_force_int (lexer))
+ if (lex_force_int_range (lexer, "NTILES", 0, INT_MAX))
{
- int i;
int n = lex_integer (lexer);
lex_get (lexer);
- for (i = 0; i < n + 1; ++i)
+ for (int i = 0; i < n + 1; ++i)
{
frq.percentiles =
xrealloc (frq.percentiles,
hi_scale = FRQ_FREQ;
if (lex_match (lexer, T_LPAREN))
{
- if (lex_force_int (lexer))
+ if (lex_force_int_range (lexer, "FREQ", 1, INT_MAX))
{
hi_freq = lex_integer (lexer);
- if (hi_freq <= 0)
- {
- lex_error (lexer, _("Histogram frequency must be greater than zero."));
- }
lex_get (lexer);
if (! lex_force_match (lexer, T_RPAREN))
goto error;
hi_scale = FRQ_PERCENT;
if (lex_match (lexer, T_LPAREN))
{
- if (lex_force_int (lexer))
+ if (lex_force_int_range (lexer, "PERCENT", 1, INT_MAX))
{
hi_pcnt = lex_integer (lexer);
- if (hi_pcnt <= 0)
- {
- lex_error (lexer, _("Histogram percentage must be greater than zero."));
- }
lex_get (lexer);
if (! lex_force_match (lexer, T_RPAREN))
goto error;
}
else if (lex_match_id (lexer, "FREQ"))
{
- if ( lex_match (lexer, T_LPAREN))
+ if (lex_match (lexer, T_LPAREN))
{
if (lex_force_num (lexer))
{
}
else if (lex_match_id (lexer, "PERCENT"))
{
- if ( lex_match (lexer, T_LPAREN))
+ if (lex_match (lexer, T_LPAREN))
{
if (lex_force_num (lexer))
{
frq.n_percentiles = o;
- frq.median = NULL;
+ frq.median_idx = SIZE_MAX;
for (i = 0; i < frq.n_percentiles; i++)
if (frq.percentiles[i].p == 0.5)
{
- frq.median = &frq.percentiles[i];
+ frq.median_idx = i;
break;
}
}
ok = proc_commit (ds) && ok;
}
-
free (vars);
+ for (size_t i = 0; i < frq.n_vars; i++)
+ free (frq.vars[i].percentiles);
free (frq.vars);
free (frq.bar);
free (frq.pie);
free (frq.hist);
free (frq.percentiles);
- pool_destroy (frq.pool);
return CMD_SUCCESS;
free (vars);
free (frq.vars);
+ for (size_t i = 0; i < frq.n_vars; i++)
+ free (frq.vars[i].percentiles);
free (frq.bar);
free (frq.pie);
free (frq.hist);
free (frq.percentiles);
- pool_destroy (frq.pool);
return CMD_FAILURE;
}
static double
-calculate_iqr (const struct frq_proc *frq)
+calculate_iqr (const struct frq_proc *frq, const struct var_freqs *vf)
{
double q1 = SYSMIS;
double q3 = SYSMIS;
struct percentile *pc = &frq->percentiles[i];
if (fabs (0.25 - pc->p) < DBL_EPSILON)
- q1 = pc->value;
+ q1 = vf->percentiles[i];
else if (fabs (0.75 - pc->p) < DBL_EPSILON)
- q3 = pc->value;
+ q3 = vf->percentiles[i];
}
return q1 == SYSMIS || q3 == SYSMIS ? SYSMIS : q3 - q1;
const struct variable *var,
const union value *value)
{
- if (!chart->include_missing && var_is_value_missing (var, value, MV_ANY))
+ if (!chart->include_missing && var_is_value_missing (var, value))
return false;
if (var_is_numeric (var)
/* Create a gsl_histogram from a freq_tab */
static struct histogram *
-freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
- const struct variable *var)
+freq_tab_to_hist (const struct frq_proc *frq, const struct var_freqs *vf)
{
- double x_min, x_max, valid_freq;
- int i;
- double bin_width;
- struct histogram *histogram;
- double iqr;
-
/* Find out the extremes of the x value, within the range to be included in
the histogram, and sum the total frequency of those values. */
- x_min = DBL_MAX;
- x_max = -DBL_MAX;
- valid_freq = 0;
- for (i = 0; i < ft->n_valid; i++)
+ double x_min = DBL_MAX;
+ double x_max = -DBL_MAX;
+ double valid_freq = 0;
+ for (int i = 0; i < vf->tab.n_valid; i++)
{
- const struct freq *f = &ft->valid[i];
- if (chart_includes_value (frq->hist, var, f->values))
+ const struct freq *f = &vf->tab.valid[i];
+ if (chart_includes_value (frq->hist, vf->var, f->values))
{
x_min = MIN (x_min, f->values[0].f);
x_max = MAX (x_max, f->values[0].f);
if (valid_freq <= 0)
return NULL;
- iqr = calculate_iqr (frq);
-
- if (iqr > 0)
- /* Freedman-Diaconis' choice of bin width. */
- bin_width = 2 * iqr / pow (valid_freq, 1.0 / 3.0);
+ double iqr = calculate_iqr (frq, vf);
- else
- /* Sturges Rule */
- bin_width = (x_max - x_min) / (1 + log2 (valid_freq));
+ double bin_width =
+ (iqr > 0
+ ? 2 * iqr / pow (valid_freq, 1.0 / 3.0) /* Freedman-Diaconis. */
+ : (x_max - x_min) / (1 + log2 (valid_freq))); /* Sturges */
- histogram = histogram_create (bin_width, x_min, x_max);
-
- if ( histogram == NULL)
+ struct histogram *histogram = histogram_create (bin_width, x_min, x_max);
+ if (histogram == NULL)
return NULL;
- for (i = 0; i < ft->n_valid; i++)
+ for (int i = 0; i < vf->tab.n_valid; i++)
{
- const struct freq *f = &ft->valid[i];
- if (chart_includes_value (frq->hist, var, f->values))
+ const struct freq *f = &vf->tab.valid[i];
+ if (chart_includes_value (frq->hist, vf->var, f->values))
histogram_add (histogram, f->values[0].f, f->count);
}
msg (SW, _("Omitting pie chart for %s, which has over 50 unique values."),
var_get_name (var));
else
- chart_item_submit (piechart_create (var, slices, n_slices));
+ chart_submit (piechart_create (var, slices, n_slices));
free (slices);
}
int n_slices;
struct freq **slices = pick_cat_counts_ptr (bar, frq_tab, &n_slices);
- chart_item_submit (barchart_create (var, 1,
- (bar->y_scale == FRQ_FREQ) ? _("Count") : _("Percent"),
- (bar->y_scale == FRQ_PERCENT),
- slices, n_slices));
+ if (n_slices < 1)
+ msg (SW, _("Omitting bar chart, which has no values."));
+ else
+ chart_submit (barchart_create (var, 1,
+ (bar->y_scale == FRQ_FREQ) ? _("Count") : _("Percent"),
+ (bar->y_scale == FRQ_PERCENT),
+ slices, n_slices));
free (slices);
}
d[FRQ_ST_SEMEAN] = d[FRQ_ST_STDDEV] / sqrt (W);
d[FRQ_ST_SESKEWNESS] = calc_seskew (W);
d[FRQ_ST_SEKURTOSIS] = calc_sekurt (W);
- d[FRQ_ST_MEDIAN] = frq->median ? frq->median->value : SYSMIS;
+ d[FRQ_ST_MEDIAN] = (frq->median_idx != SIZE_MAX
+ ? vf->percentiles[frq->median_idx]
+ : SYSMIS);
}
static bool
if (!pc->show)
continue;
- union value v = { .f = vf->tab.n_valid ? pc->value : SYSMIS };
+ union value v = {
+ .f = vf->tab.n_valid ? vf->percentiles[j] : SYSMIS
+ };
pivot_table_put2 (table, var_idx, row++,
pivot_value_new_var_value (vf->var, &v));
}