/*
PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011, 2014 Free Software Foundation, Inc.
-
+ Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011, 2014, 2015 Free Software Foundation, Inc.
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "libpspp/hmap.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
-#include "libpspp/pool.h"
#include "math/histogram.h"
#include "math/moments.h"
#include "math/chart-geometry.h"
-#include "output/chart-item.h"
+#include "output/charts/barchart.h"
#include "output/charts/piechart.h"
#include "output/charts/plot-hist.h"
-#include "output/tab.h"
+#include "output/pivot-table.h"
#include "gl/minmax.h"
#include "gl/xalloc.h"
struct percentile
{
double p; /* the %ile to be calculated */
- double value; /* the %ile's value */
bool show; /* True to show this percentile in the statistics box. */
};
if (p1->p < p2->p)
return -1;
+ if (p1->p == p2->p)
+ {
+ if (p1->show > p2->show)
+ return -1;
+
+ return (p1->show < p2->show);
+ }
+
return (p1->p > p2->p);
}
FRQ_PERCENT
};
-enum sortprops
+enum sortprops
{
FRQ_AFREQ,
FRQ_DFREQ,
/* Freqency table. */
struct freq_tab tab; /* Frequencies table to use. */
- /* Percentiles. */
- int n_groups; /* Number of groups. */
- double *groups; /* Groups. */
-
/* Statistics. */
double stat[FRQ_ST_count];
+ double *percentiles;
/* Variable attributes. */
int width;
struct frq_proc
{
- struct pool *pool;
-
struct var_freqs *vars;
size_t n_vars;
/* Percentiles to calculate and possibly display. */
struct percentile *percentiles;
- int n_percentiles, n_show_percentiles;
+ size_t median_idx;
+ size_t n_percentiles;
/* Frequency table display. */
- int max_categories; /* Maximum categories to show. */
+ long int max_categories; /* Maximum categories to show. */
int sort; /* FRQ_AVALUE or FRQ_DVALUE
or FRQ_AFREQ or FRQ_DFREQ. */
int n_stats;
/* Histogram and pie chart settings. */
- struct frq_chart *hist, *pie;
+ struct frq_chart *hist, *pie, *bar;
+
+ bool warn;
};
bool ascending_value;
};
-static void calc_stats (const struct var_freqs *vf, double d[FRQ_ST_count]);
+static void calc_stats (const struct frq_proc *,
+ const struct var_freqs *, double d[FRQ_ST_count]);
-static void do_piechart(const struct frq_chart *pie,
+static void do_piechart(const struct frq_chart *pie,
const struct variable *var,
const struct freq_tab *frq_tab);
-static void dump_statistics (const struct frq_proc *frq,
- const struct var_freqs *vf,
+static void do_barchart(const struct frq_chart *bar,
+ const struct variable **var,
+ const struct freq_tab *frq_tab);
+
+static void dump_statistics (const struct frq_proc *frq,
const struct variable *wv);
static int
}
else
{
- int cmp = value_compare_3way (&a->value, &b->value, aux->width);
+ int cmp = value_compare_3way (a->values, b->values, aux->width);
return aux->ascending_value ? cmp : -cmp;
}
}
/* Create a gsl_histogram from a freq_tab */
-static struct histogram *
-freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
- const struct variable *var);
+static struct histogram *freq_tab_to_hist (const struct frq_proc *,
+ const struct var_freqs *);
+static void
+put_freq_row (struct pivot_table *table, int var_idx,
+ double frequency, double percent,
+ double valid_percent, double cum_percent)
+{
+ double entries[] = { frequency, percent, valid_percent, cum_percent };
+ for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
+ if (entries[i] != SYSMIS)
+ pivot_table_put2 (table, i, var_idx,
+ pivot_value_new_number (entries[i]));
+}
/* Displays a full frequency table for variable V. */
static void
dump_freq_table (const struct var_freqs *vf, const struct variable *wv)
{
- const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0;
const struct freq_tab *ft = &vf->tab;
- int n_categories;
- struct freq *f;
- struct tab_table *t;
- int r, x;
- double cum_total = 0.0;
- double cum_freq = 0.0;
- static const char *headings[] = {
- N_("Value Label"),
- N_("Value"),
- N_("Frequency"),
- N_("Percent"),
- N_("Valid Percent"),
- N_("Cum Percent")
- };
+ struct pivot_table *table = pivot_table_create__ (pivot_value_new_variable (
+ vf->var), "Frequencies");
+ pivot_table_set_weight_var (table, wv);
- n_categories = ft->n_valid + ft->n_missing;
- t = tab_create (6, n_categories + 2);
- tab_headers (t, 0, 0, 1, 0);
+ pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
+ N_("Frequency"), PIVOT_RC_COUNT,
+ N_("Percent"), PIVOT_RC_PERCENT,
+ N_("Valid Percent"), PIVOT_RC_PERCENT,
+ N_("Cumulative Percent"), PIVOT_RC_PERCENT);
- for (x = 0; x < 6; x++)
- tab_text (t, x, 0, TAB_CENTER | TAT_TITLE, gettext (headings[x]));
+ struct pivot_dimension *variable = pivot_dimension_create__ (
+ table, PIVOT_AXIS_ROW, pivot_value_new_variable (vf->var));
- r = 1;
- for (f = ft->valid; f < ft->missing; f++)
+ double cum_freq = 0.0;
+ double cum_percent = 0.0;
+ struct pivot_category *valid = NULL;
+ for (const struct freq *f = ft->valid; f < ft->missing; f++)
{
- const char *label;
- double percent, valid_percent;
-
cum_freq += f->count;
-
- percent = f->count / ft->total_cases * 100.0;
- valid_percent = f->count / ft->valid_cases * 100.0;
- cum_total += valid_percent;
-
- label = var_lookup_value_label (vf->var, &f->value);
- if (label != NULL)
- tab_text (t, 0, r, TAB_LEFT, label);
-
- tab_value (t, 1, r, TAB_NONE, &f->value, vf->var, NULL);
- tab_double (t, 2, r, TAB_NONE, f->count, wfmt);
- tab_double (t, 3, r, TAB_NONE, percent, NULL);
- tab_double (t, 4, r, TAB_NONE, valid_percent, NULL);
- tab_double (t, 5, r, TAB_NONE, cum_total, NULL);
- r++;
+ double valid_percent = f->count / ft->valid_cases * 100.0;
+ cum_percent += valid_percent;
+
+ if (!valid)
+ valid = pivot_category_create_group (variable->root, N_("Valid"));
+ int var_idx = pivot_category_create_leaf (
+ valid, pivot_value_new_var_value (vf->var, &f->values[0]));
+ put_freq_row (table, var_idx, f->count,
+ f->count / ft->total_cases * 100.0,
+ valid_percent, cum_percent);
}
- for (; f < &ft->valid[n_categories]; f++)
- {
- const char *label;
+ struct pivot_category *missing = NULL;
+ size_t n_categories = ft->n_valid + ft->n_missing;
+ for (const struct freq *f = ft->missing; f < &ft->valid[n_categories]; f++)
+ {
cum_freq += f->count;
- label = var_lookup_value_label (vf->var, &f->value);
- if (label != NULL)
- tab_text (t, 0, r, TAB_LEFT, label);
-
- tab_value (t, 1, r, TAB_NONE, &f->value, vf->var, NULL);
- tab_double (t, 2, r, TAB_NONE, f->count, wfmt);
- tab_double (t, 3, r, TAB_NONE,
- f->count / ft->total_cases * 100.0, NULL);
- tab_text (t, 4, r, TAB_NONE, _("Missing"));
- r++;
+ if (!missing)
+ missing = pivot_category_create_group (variable->root, N_("Missing"));
+ int var_idx = pivot_category_create_leaf (
+ missing, pivot_value_new_var_value (vf->var, &f->values[0]));
+ put_freq_row (table, var_idx, f->count,
+ f->count / ft->total_cases * 100.0, SYSMIS, SYSMIS);
}
- tab_box (t, TAL_1, TAL_1, -1, TAL_1, 0, 0, 5, r);
- tab_hline (t, TAL_2, 0, 5, 1);
- tab_hline (t, TAL_2, 0, 5, r);
- tab_joint_text (t, 0, r, 1, r, TAB_RIGHT | TAT_TITLE, _("Total"));
- tab_vline (t, TAL_0, 1, r, r);
- tab_double (t, 2, r, TAB_NONE, cum_freq, wfmt);
- tab_fixed (t, 3, r, TAB_NONE, 100.0, 5, 1);
- tab_fixed (t, 4, r, TAB_NONE, 100.0, 5, 1);
-
- tab_title (t, "%s", var_to_string (vf->var));
- tab_submit (t);
+ int var_idx = pivot_category_create_leaf (
+ variable->root, pivot_value_new_text (N_("Total")));
+ put_freq_row (table, var_idx, cum_freq, cum_percent, SYSMIS, SYSMIS);
+
+ pivot_table_submit (table);
}
\f
/* Statistical display. */
/* Calculates all of the percentiles for VF within FRQ. */
static void
-calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf)
+calc_percentiles (const struct frq_proc *frq, struct var_freqs *vf)
{
- const struct freq_tab *ft = &vf->tab;
- double W = ft->valid_cases;
- const struct freq *f;
- int percentile_idx;
- double rank;
+ if (!frq->n_percentiles)
+ return;
- assert (ft->n_valid > 0);
+ vf->percentiles = xnmalloc (frq->n_percentiles, sizeof *vf->percentiles);
- rank = 0;
- percentile_idx = 0;
- for (f = ft->valid; f < ft->missing; f++)
+ const struct freq_tab *ft = &vf->tab;
+ const double W = ft->valid_cases;
+ size_t idx = 0;
+
+ double rank = 0;
+ for (const struct freq *f = ft->valid; f < ft->missing; f++)
{
rank += f->count;
- for (; percentile_idx < frq->n_percentiles; percentile_idx++)
+ for (; idx < frq->n_percentiles; idx++)
{
- struct percentile *pc = &frq->percentiles[percentile_idx];
+ struct percentile *pc = &frq->percentiles[idx];
double tp;
tp = (settings_get_algorithm () == ENHANCED
break;
if (tp + 1 < rank || f + 1 >= ft->missing)
- pc->value = f->value.f;
+ vf->percentiles[idx] = f->values[0].f;
else
- pc->value = calc_percentile (pc->p, W, f->value.f, f[1].value.f);
+ vf->percentiles[idx] = calc_percentile (pc->p, W, f->values[0].f,
+ f[1].values[0].f);
}
}
- for (; percentile_idx < frq->n_percentiles; percentile_idx++)
- {
- struct percentile *pc = &frq->percentiles[percentile_idx];
- pc->value = ft->valid[ft->n_valid - 1].value.f;
- }
+ for (; idx < frq->n_percentiles; idx++)
+ vf->percentiles[idx] = (ft->n_valid > 0
+ ? ft->valid[ft->n_valid - 1].values[0].f
+ : SYSMIS);
}
/* Returns true iff the value in struct freq F is non-missing
const struct freq *f = f_;
const struct variable *v = v_;
- return !var_is_value_missing (v, &f->value, MV_ANY);
+ return !var_is_value_missing (v, f->values);
}
/* Summary statistics. */
ft->valid_cases = 0.0;
- for(i = 0 ; i < ft->n_valid ; ++i )
+ for(i = 0 ; i < ft->n_valid ; ++i)
{
f = &ft->valid[i];
ft->valid_cases += f->count;
}
ft->total_cases = ft->valid_cases ;
- for(i = 0 ; i < ft->n_missing ; ++i )
+ for(i = 0 ; i < ft->n_missing ; ++i)
{
f = &ft->missing[i];
ft->total_cases += f->count;
static void
calc (struct frq_proc *frq, const struct ccase *c, const struct dataset *ds)
{
- double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
+ double weight = dict_get_case_weight (dataset_dict (ds), c, &frq->warn);
size_t i;
for (i = 0; i < frq->n_vars; i++)
for (i = 0; i < frq->n_vars; i++)
{
struct var_freqs *vf = &frq->vars[i];
-
postprocess_freq_tab (frq, vf);
+ calc_percentiles (frq, vf);
+ }
+
+ if (frq->n_stats)
+ dump_statistics (frq, wv);
+
+ for (i = 0; i < frq->n_vars; i++)
+ {
+ struct var_freqs *vf = &frq->vars[i];
/* Frequencies tables. */
if (vf->tab.n_valid + vf->tab.n_missing <= frq->max_categories)
dump_freq_table (vf, wv);
- calc_percentiles (frq, vf);
-
- /* Statistics. */
- if (frq->n_stats)
- dump_statistics (frq, vf, wv);
if (frq->hist && var_is_numeric (vf->var) && vf->tab.n_valid > 0)
{
double d[FRQ_ST_count];
struct histogram *histogram;
- calc_stats (vf, d);
+ calc_stats (frq, vf, d);
- histogram = freq_tab_to_hist (frq, &vf->tab, vf->var);
+ histogram = freq_tab_to_hist (frq, vf);
- if ( histogram)
+ if (histogram)
{
- chart_item_submit (histogram_chart_create (
- histogram->gsl_hist, var_to_string(vf->var),
- vf->tab.valid_cases,
- d[FRQ_ST_MEAN],
- d[FRQ_ST_STDDEV],
- frq->hist->draw_normal));
+ chart_submit (histogram_chart_create (
+ histogram->gsl_hist, var_to_string(vf->var),
+ vf->tab.valid_cases,
+ d[FRQ_ST_MEAN],
+ d[FRQ_ST_STDDEV],
+ frq->hist->draw_normal));
statistic_destroy (&histogram->parent);
}
if (frq->pie)
do_piechart(frq->pie, vf->var, &vf->tab);
+ if (frq->bar)
+ do_barchart(frq->bar, &vf->var, &vf->tab);
+
cleanup_freq_tab (vf);
}
}
{
int i;
struct frq_proc frq;
- const struct variable **vars;
+ const struct variable **vars = NULL;
bool sbc_barchart = false;
bool sbc_piechart = false;
double pie_min = -DBL_MAX;
double pie_max = DBL_MAX;
- bool pie_missing = false;
+ bool pie_missing = true;
+
+ double bar_min = -DBL_MAX;
+ double bar_max = DBL_MAX;
+ bool bar_freq = true;
double hi_min = -DBL_MAX;
double hi_max = DBL_MAX;
int hi_pcnt = INT_MIN;
int hi_norm = FRQ_NONORMAL;
- frq.pool = pool_create ();
frq.sort = FRQ_AVALUE;
frq.vars = NULL;
frq.n_vars = 0;
-
- frq.stats = BIT_INDEX (FRQ_ST_MEAN)
- | BIT_INDEX (FRQ_ST_STDDEV)
+
+ frq.stats = BIT_INDEX (FRQ_ST_MEAN)
+ | BIT_INDEX (FRQ_ST_STDDEV)
| BIT_INDEX (FRQ_ST_MINIMUM)
| BIT_INDEX (FRQ_ST_MAXIMUM);
frq.n_stats = 4;
- frq.max_categories = INT_MAX;
+ frq.max_categories = LONG_MAX;
frq.percentiles = NULL;
frq.n_percentiles = 0;
- frq.n_show_percentiles = 0;
frq.hist = NULL;
frq.pie = NULL;
+ frq.bar = NULL;
+ frq.warn = true;
/* Accept an optional, completely pointless "/VARIABLES=" */
lex_match (lexer, T_SLASH);
if (lex_match_id (lexer, "VARIABLES"))
{
- if (! lex_force_match (lexer, T_EQUALS) )
+ if (! lex_force_match (lexer, T_EQUALS))
goto error;
}
PV_NO_DUPLICATE))
goto error;
- frq.vars = xzalloc (frq.n_vars * sizeof (*frq.vars));
+ frq.vars = xcalloc (frq.n_vars, sizeof (*frq.vars));
for (i = 0; i < frq.n_vars; ++i)
{
frq.vars[i].var = vars[i];
+ frq.vars[i].width = var_get_width (vars[i]);
}
while (lex_token (lexer) != T_ENDCMD)
if (lex_match_id (lexer, "STATISTICS"))
{
- lex_match (lexer, T_EQUALS);
+ frq.stats = BIT_INDEX (FRQ_ST_MEAN)
+ | BIT_INDEX (FRQ_ST_STDDEV)
+ | BIT_INDEX (FRQ_ST_MINIMUM)
+ | BIT_INDEX (FRQ_ST_MAXIMUM);
+
+ frq.n_stats = 4;
- frq.stats = 0;
- frq.n_stats = 0;
+ if (lex_match (lexer, T_EQUALS))
+ {
+ frq.n_stats = 0;
+ frq.stats = 0;
+ }
while (lex_token (lexer) != T_ENDCMD
&& lex_token (lexer) != T_SLASH)
{
if (lex_match_id (lexer, "DEFAULT"))
{
- frq.stats = BIT_INDEX (FRQ_ST_MEAN)
- | BIT_INDEX (FRQ_ST_STDDEV)
+ frq.stats = BIT_INDEX (FRQ_ST_MEAN)
+ | BIT_INDEX (FRQ_ST_STDDEV)
| BIT_INDEX (FRQ_ST_MINIMUM)
| BIT_INDEX (FRQ_ST_MAXIMUM);
}
else if (lex_match_id (lexer, "VARIANCE"))
{
- frq.stats |= BIT_INDEX (FRQ_ST_MEAN);
+ frq.stats |= BIT_INDEX (FRQ_ST_VARIANCE);
frq.n_stats++;
}
else if (lex_match_id (lexer, "KURTOSIS"))
if (lex_force_num (lexer))
{
frq.percentiles =
- xrealloc (frq.percentiles,
+ xrealloc (frq.percentiles,
(frq.n_percentiles + 1)
* sizeof (*frq.percentiles));
frq.percentiles[frq.n_percentiles].p = lex_number (lexer) / 100.0;
frq.percentiles[frq.n_percentiles].show = true;
lex_get (lexer);
frq.n_percentiles++;
- frq.n_show_percentiles++;
}
else
{
lex_error (lexer, NULL);
goto error;
}
+ lex_match (lexer, T_COMMA);
}
}
else if (lex_match_id (lexer, "FORMAT"))
{
if (lex_match_id (lexer, "TABLE"))
{
-
}
else if (lex_match_id (lexer, "NOTABLE"))
{
frq.max_categories = 0;
}
+ else if (lex_match_id (lexer, "LIMIT"))
+ {
+ if (!lex_force_match (lexer, T_LPAREN)
+ || !lex_force_int_range (lexer, "LIMIT", 0, INT_MAX))
+ goto error;
+
+ frq.max_categories = lex_integer (lexer);
+ lex_get (lexer);
+
+ if (!lex_force_match (lexer, T_RPAREN))
+ goto error;
+ }
else if (lex_match_id (lexer, "AVALUE"))
{
frq.sort = FRQ_AVALUE;
{
lex_match (lexer, T_EQUALS);
- if (lex_force_int (lexer))
+ if (lex_force_int_range (lexer, "NTILES", 0, INT_MAX))
{
- int i;
int n = lex_integer (lexer);
lex_get (lexer);
- for (i = 0; i < n + 1; ++i)
+ for (int i = 0; i < n + 1; ++i)
{
frq.percentiles =
- xrealloc (frq.percentiles,
+ xrealloc (frq.percentiles,
(frq.n_percentiles + 1)
* sizeof (*frq.percentiles));
frq.percentiles[frq.n_percentiles].p =
frq.percentiles[frq.n_percentiles].show = true;
frq.n_percentiles++;
- frq.n_show_percentiles++;
}
}
else
hi_scale = FRQ_FREQ;
if (lex_match (lexer, T_LPAREN))
{
- if (lex_force_int (lexer))
+ if (lex_force_int_range (lexer, "FREQ", 1, INT_MAX))
{
hi_freq = lex_integer (lexer);
- if (hi_freq <= 0)
- {
- lex_error (lexer, _("Histogram frequency must be greater than zero."));
- }
lex_get (lexer);
- lex_force_match (lexer, T_RPAREN);
+ if (! lex_force_match (lexer, T_RPAREN))
+ goto error;
}
}
}
hi_scale = FRQ_PERCENT;
if (lex_match (lexer, T_LPAREN))
{
- if (lex_force_int (lexer))
+ if (lex_force_int_range (lexer, "PERCENT", 1, INT_MAX))
{
hi_pcnt = lex_integer (lexer);
- if (hi_pcnt <= 0)
- {
- lex_error (lexer, _("Histogram percentaage must be greater than zero."));
- }
lex_get (lexer);
- lex_force_match (lexer, T_RPAREN);
+ if (! lex_force_match (lexer, T_RPAREN))
+ goto error;
}
}
}
else if (lex_match_id (lexer, "MINIMUM"))
{
- lex_force_match (lexer, T_LPAREN);
+ if (! lex_force_match (lexer, T_LPAREN))
+ goto error;
if (lex_force_num (lexer))
{
hi_min = lex_number (lexer);
lex_get (lexer);
}
- lex_force_match (lexer, T_RPAREN);
+ if (! lex_force_match (lexer, T_RPAREN))
+ goto error;
}
else if (lex_match_id (lexer, "MAXIMUM"))
{
- lex_force_match (lexer, T_LPAREN);
+ if (! lex_force_match (lexer, T_LPAREN))
+ goto error;
if (lex_force_num (lexer))
{
hi_max = lex_number (lexer);
lex_get (lexer);
}
- lex_force_match (lexer, T_RPAREN);
+ if (! lex_force_match (lexer, T_RPAREN))
+ goto error;
}
else
{
{
if (lex_match_id (lexer, "MINIMUM"))
{
- lex_force_match (lexer, T_LPAREN);
+ if (! lex_force_match (lexer, T_LPAREN))
+ goto error;
if (lex_force_num (lexer))
{
pie_min = lex_number (lexer);
lex_get (lexer);
}
- lex_force_match (lexer, T_RPAREN);
+ if (! lex_force_match (lexer, T_RPAREN))
+ goto error;
}
else if (lex_match_id (lexer, "MAXIMUM"))
{
- lex_force_match (lexer, T_LPAREN);
+ if (! lex_force_match (lexer, T_LPAREN))
+ goto error;
if (lex_force_num (lexer))
{
pie_max = lex_number (lexer);
lex_get (lexer);
}
- lex_force_match (lexer, T_RPAREN);
+ if (! lex_force_match (lexer, T_RPAREN))
+ goto error;
}
else if (lex_match_id (lexer, "MISSING"))
{
}
sbc_piechart = true;
}
+ else if (lex_match_id (lexer, "BARCHART"))
+ {
+ lex_match (lexer, T_EQUALS);
+ while (lex_token (lexer) != T_ENDCMD
+ && lex_token (lexer) != T_SLASH)
+ {
+ if (lex_match_id (lexer, "MINIMUM"))
+ {
+ if (! lex_force_match (lexer, T_LPAREN))
+ goto error;
+ if (lex_force_num (lexer))
+ {
+ bar_min = lex_number (lexer);
+ lex_get (lexer);
+ }
+ if (! lex_force_match (lexer, T_RPAREN))
+ goto error;
+ }
+ else if (lex_match_id (lexer, "MAXIMUM"))
+ {
+ if (! lex_force_match (lexer, T_LPAREN))
+ goto error;
+ if (lex_force_num (lexer))
+ {
+ bar_max = lex_number (lexer);
+ lex_get (lexer);
+ }
+ if (! lex_force_match (lexer, T_RPAREN))
+ goto error;
+ }
+ else if (lex_match_id (lexer, "FREQ"))
+ {
+ if (lex_match (lexer, T_LPAREN))
+ {
+ if (lex_force_num (lexer))
+ {
+ lex_number (lexer);
+ lex_get (lexer);
+ }
+ if (! lex_force_match (lexer, T_RPAREN))
+ goto error;
+ }
+ bar_freq = true;
+ }
+ else if (lex_match_id (lexer, "PERCENT"))
+ {
+ if (lex_match (lexer, T_LPAREN))
+ {
+ if (lex_force_num (lexer))
+ {
+ lex_number (lexer);
+ lex_get (lexer);
+ }
+ if (! lex_force_match (lexer, T_RPAREN))
+ goto error;
+ }
+ bar_freq = false;
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+ }
+ sbc_barchart = true;
+ }
else if (lex_match_id (lexer, "MISSING"))
{
lex_match (lexer, T_EQUALS);
}
}
}
+ else if (lex_match_id (lexer, "ORDER"))
+ {
+ lex_match (lexer, T_EQUALS);
+ if (!lex_match_id (lexer, "ANALYSIS"))
+ lex_match_id (lexer, "VARIABLE");
+ }
else
{
lex_error (lexer, NULL);
}
}
- if (frq.stats & FRQ_ST_MEDIAN)
+ if (frq.stats & BIT_INDEX (FRQ_ST_MEDIAN))
{
frq.percentiles =
- xrealloc (frq.percentiles,
+ xrealloc (frq.percentiles,
(frq.n_percentiles + 1)
* sizeof (*frq.percentiles));
-
+
frq.percentiles[frq.n_percentiles].p = 0.50;
- frq.percentiles[frq.n_percentiles].show = true;
+ frq.percentiles[frq.n_percentiles].show = false;
frq.n_percentiles++;
}
/* Figure out which charts the user requested. */
{
- if (sbc_barchart)
- msg (SW, _("Bar charts are not implemented."));
-
if (sbc_histogram)
{
struct frq_chart *hist;
msg (SE, _("%s for histogram must be greater than or equal to %s, "
"but %s was specified as %.15g and %s as %.15g. "
"%s and %s will be ignored."),
- "MAX", "MIN",
- "MIN", hist->x_min,
+ "MAX", "MIN",
+ "MIN", hist->x_min,
"MAX", hist->x_max,
"MIN", "MAX");
hist->x_min = hist->x_max = SYSMIS;
}
frq.percentiles =
- xrealloc (frq.percentiles,
+ xrealloc (frq.percentiles,
(frq.n_percentiles + 2)
* sizeof (*frq.percentiles));
-
+
frq.percentiles[frq.n_percentiles].p = 0.25;
frq.percentiles[frq.n_percentiles].show = false;
frq.percentiles[frq.n_percentiles + 1].p = 0.75;
frq.percentiles[frq.n_percentiles + 1].show = false;
-
+
frq.n_percentiles+=2;
}
+ if (sbc_barchart)
+ {
+ frq.bar = xmalloc (sizeof *frq.bar);
+ frq.bar->x_min = bar_min;
+ frq.bar->x_max = bar_max;
+ frq.bar->include_missing = false;
+ frq.bar->y_scale = bar_freq ? FRQ_FREQ : FRQ_PERCENT;
+ }
+
if (sbc_piechart)
{
struct frq_chart *pie;
{
msg (SE, _("%s for pie chart must be greater than or equal to %s, "
"but %s was specified as %.15g and %s as %.15g. "
- "%s and %s will be ignored."),
- "MAX", "MIN",
+ "%s and %s will be ignored."),
+ "MAX", "MIN",
"MIN", pie->x_min,
"MAX", pie->x_max,
"MIN", "MAX");
int i,o;
double previous_p = -1;
qsort (frq.percentiles, frq.n_percentiles,
- sizeof (*frq.percentiles),
+ sizeof (*frq.percentiles),
ptile_3way);
- frq.n_show_percentiles = 0;
for (i = o = 0; i < frq.n_percentiles; ++i)
{
- frq.percentiles[o].p = frq.percentiles[i].p;
-
- if (frq.percentiles[i].show)
- frq.percentiles[o].show = true;
-
- if (frq.percentiles[i].p != previous_p)
- {
- if (frq.percentiles[i].show)
- frq.n_show_percentiles++;
-
- o++;
- }
-
+ if (frq.percentiles[i].p != previous_p)
+ {
+ frq.percentiles[o].p = frq.percentiles[i].p;
+ frq.percentiles[o].show = frq.percentiles[i].show;
+ o++;
+ }
+ else if (frq.percentiles[i].show &&
+ !frq.percentiles[o].show)
+ {
+ frq.percentiles[o].show = true;
+ }
previous_p = frq.percentiles[i].p;
}
frq.n_percentiles = o;
+
+ frq.median_idx = SIZE_MAX;
+ for (i = 0; i < frq.n_percentiles; i++)
+ if (frq.percentiles[i].p == 0.5)
+ {
+ frq.median_idx = i;
+ break;
+ }
}
{
{
struct ccase *c;
precalc (&frq, group, ds);
+
for (; (c = casereader_read (group)) != NULL; case_unref (c))
calc (&frq, c, ds);
postcalc (&frq, ds);
+ casereader_destroy (group);
}
ok = casegrouper_destroy (grouper);
ok = proc_commit (ds) && ok;
}
+ free (vars);
+ for (size_t i = 0; i < frq.n_vars; i++)
+ free (frq.vars[i].percentiles);
+ free (frq.vars);
+ free (frq.bar);
+ free (frq.pie);
+ free (frq.hist);
+ free (frq.percentiles);
return CMD_SUCCESS;
error:
+ free (vars);
+ free (frq.vars);
+ for (size_t i = 0; i < frq.n_vars; i++)
+ free (frq.vars[i].percentiles);
+ free (frq.bar);
+ free (frq.pie);
+ free (frq.hist);
+ free (frq.percentiles);
+
return CMD_FAILURE;
}
static double
-calculate_iqr (const struct frq_proc *frq)
+calculate_iqr (const struct frq_proc *frq, const struct var_freqs *vf)
{
double q1 = SYSMIS;
double q3 = SYSMIS;
struct percentile *pc = &frq->percentiles[i];
if (fabs (0.25 - pc->p) < DBL_EPSILON)
- q1 = pc->value;
+ q1 = vf->percentiles[i];
else if (fabs (0.75 - pc->p) < DBL_EPSILON)
- q3 = pc->value;
+ q3 = vf->percentiles[i];
}
return q1 == SYSMIS || q3 == SYSMIS ? SYSMIS : q3 - q1;
const struct variable *var,
const union value *value)
{
- if (!chart->include_missing && var_is_value_missing (var, value, MV_ANY))
+ if (!chart->include_missing && var_is_value_missing (var, value))
return false;
if (var_is_numeric (var)
/* Create a gsl_histogram from a freq_tab */
static struct histogram *
-freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft,
- const struct variable *var)
+freq_tab_to_hist (const struct frq_proc *frq, const struct var_freqs *vf)
{
- double x_min, x_max, valid_freq;
- int i;
- double bin_width;
- struct histogram *histogram;
- double iqr;
-
/* Find out the extremes of the x value, within the range to be included in
the histogram, and sum the total frequency of those values. */
- x_min = DBL_MAX;
- x_max = -DBL_MAX;
- valid_freq = 0;
- for (i = 0; i < ft->n_valid; i++)
+ double x_min = DBL_MAX;
+ double x_max = -DBL_MAX;
+ double valid_freq = 0;
+ for (int i = 0; i < vf->tab.n_valid; i++)
{
- const struct freq *f = &ft->valid[i];
- if (chart_includes_value (frq->hist, var, &f->value))
+ const struct freq *f = &vf->tab.valid[i];
+ if (chart_includes_value (frq->hist, vf->var, f->values))
{
- x_min = MIN (x_min, f->value.f);
- x_max = MAX (x_max, f->value.f);
+ x_min = MIN (x_min, f->values[0].f);
+ x_max = MAX (x_max, f->values[0].f);
valid_freq += f->count;
}
}
- /* Freedman-Diaconis' choice of bin width. */
- iqr = calculate_iqr (frq);
- bin_width = 2 * iqr / pow (valid_freq, 1.0 / 3.0);
+ if (valid_freq <= 0)
+ return NULL;
- histogram = histogram_create (bin_width, x_min, x_max);
+ double iqr = calculate_iqr (frq, vf);
- if ( histogram == NULL)
+ double bin_width =
+ (iqr > 0
+ ? 2 * iqr / pow (valid_freq, 1.0 / 3.0) /* Freedman-Diaconis. */
+ : (x_max - x_min) / (1 + log2 (valid_freq))); /* Sturges */
+
+ struct histogram *histogram = histogram_create (bin_width, x_min, x_max);
+ if (histogram == NULL)
return NULL;
- for (i = 0; i < ft->n_valid; i++)
+ for (int i = 0; i < vf->tab.n_valid; i++)
{
- const struct freq *f = &ft->valid[i];
- if (chart_includes_value (frq->hist, var, &f->value))
- histogram_add (histogram, f->value.f, f->count);
+ const struct freq *f = &vf->tab.valid[i];
+ if (chart_includes_value (frq->hist, vf->var, f->values))
+ histogram_add (histogram, f->values[0].f, f->count);
}
return histogram;
}
-static int
-add_slice (const struct frq_chart *pie, const struct freq *freq,
- const struct variable *var, struct slice *slice)
+
+/* Allocate an array of struct freqs and fill them from the data in FRQ_TAB,
+ according to the parameters of CATCHART
+ N_SLICES will contain the number of slices allocated.
+ The caller is responsible for freeing slices
+*/
+static struct freq *
+pick_cat_counts (const struct frq_chart *catchart,
+ const struct freq_tab *frq_tab,
+ int *n_slicesp)
{
- if (chart_includes_value (pie, var, &freq->value))
+ int n_slices = 0;
+ int i;
+ struct freq *slices = xnmalloc (frq_tab->n_valid + frq_tab->n_missing, sizeof *slices);
+
+ for (i = 0; i < frq_tab->n_valid; i++)
{
- ds_init_empty (&slice->label);
- var_append_value_name (var, &freq->value, &slice->label);
- slice->magnitude = freq->count;
- return 1;
+ const struct freq *f = &frq_tab->valid[i];
+ if (f->count > catchart->x_max)
+ continue;
+
+ if (f->count < catchart->x_min)
+ continue;
+
+ slices[n_slices] = *f;
+
+ n_slices++;
}
- else
- return 0;
+
+ if (catchart->include_missing)
+ {
+ for (i = 0; i < frq_tab->n_missing; i++)
+ {
+ const struct freq *f = &frq_tab->missing[i];
+ slices[n_slices].count += f->count;
+
+ if (i == 0)
+ slices[n_slices].values[0] = f->values[0];
+ }
+
+ if (frq_tab->n_missing > 0)
+ n_slices++;
+ }
+
+ *n_slicesp = n_slices;
+ return slices;
}
-/* Allocate an array of slices and fill them from the data in frq_tab
- n_slices will contain the number of slices allocated.
+
+/* Allocate an array of struct freqs and fill them from the data in FRQ_TAB,
+ according to the parameters of CATCHART
+ N_SLICES will contain the number of slices allocated.
The caller is responsible for freeing slices
*/
-static struct slice *
-freq_tab_to_slice_array(const struct frq_chart *pie,
- const struct freq_tab *frq_tab,
- const struct variable *var,
- int *n_slicesp)
+static struct freq **
+pick_cat_counts_ptr (const struct frq_chart *catchart,
+ const struct freq_tab *frq_tab,
+ int *n_slicesp)
{
- struct slice *slices;
- int n_slices;
+ int n_slices = 0;
int i;
-
- slices = xnmalloc (frq_tab->n_valid + frq_tab->n_missing, sizeof *slices);
- n_slices = 0;
+ struct freq **slices = xnmalloc (frq_tab->n_valid + frq_tab->n_missing, sizeof *slices);
for (i = 0; i < frq_tab->n_valid; i++)
- n_slices += add_slice (pie, &frq_tab->valid[i], var, &slices[n_slices]);
- for (i = 0; i < frq_tab->n_missing; i++)
- n_slices += add_slice (pie, &frq_tab->missing[i], var, &slices[n_slices]);
+ {
+ struct freq *f = &frq_tab->valid[i];
+ if (f->count > catchart->x_max)
+ continue;
+
+ if (f->count < catchart->x_min)
+ continue;
+
+ slices[n_slices] = f;
+
+ n_slices++;
+ }
+
+ if (catchart->include_missing)
+ {
+ for (i = 0; i < frq_tab->n_missing; i++)
+ {
+ const struct freq *f = &frq_tab->missing[i];
+ if (i == 0)
+ {
+ slices[n_slices] = xmalloc (sizeof (struct freq));
+ slices[n_slices]->values[0] = f->values[0];
+ }
+
+ slices[n_slices]->count += f->count;
+
+ }
+ }
*n_slicesp = n_slices;
return slices;
}
+
static void
do_piechart(const struct frq_chart *pie, const struct variable *var,
const struct freq_tab *frq_tab)
{
- struct slice *slices;
- int n_slices, i;
-
- slices = freq_tab_to_slice_array (pie, frq_tab, var, &n_slices);
+ int n_slices;
+ struct freq *slices = pick_cat_counts (pie, frq_tab, &n_slices);
if (n_slices < 2)
msg (SW, _("Omitting pie chart for %s, which has only %d unique values."),
msg (SW, _("Omitting pie chart for %s, which has over 50 unique values."),
var_get_name (var));
else
- chart_item_submit (piechart_create (var_to_string(var), slices, n_slices));
+ chart_submit (piechart_create (var, slices, n_slices));
- for (i = 0; i < n_slices; i++)
- ds_destroy (&slices[i].label);
free (slices);
}
+
+static void
+do_barchart(const struct frq_chart *bar, const struct variable **var,
+ const struct freq_tab *frq_tab)
+{
+ int n_slices;
+ struct freq **slices = pick_cat_counts_ptr (bar, frq_tab, &n_slices);
+
+ if (n_slices < 1)
+ msg (SW, _("Omitting bar chart, which has no values."));
+ else
+ chart_submit (barchart_create (var, 1,
+ (bar->y_scale == FRQ_FREQ) ? _("Count") : _("Percent"),
+ (bar->y_scale == FRQ_PERCENT),
+ slices, n_slices));
+ free (slices);
+}
+
+
/* Calculates all the pertinent statistics for VF, putting them in array
D[]. */
static void
-calc_stats (const struct var_freqs *vf, double d[FRQ_ST_count])
+calc_stats (const struct frq_proc *frq, const struct var_freqs *vf,
+ double d[FRQ_ST_count])
{
const struct freq_tab *ft = &vf->tab;
double W = ft->valid_cases;
const struct freq *f;
struct moments *m;
- int most_often;
- double X_mode;
-
- assert (ft->n_valid > 0);
+ int most_often = -1;
+ double X_mode = SYSMIS;
/* Calculate the mode. */
- most_often = -1;
- X_mode = SYSMIS;
for (f = ft->valid; f < ft->missing; f++)
{
if (most_often < f->count)
{
most_often = f->count;
- X_mode = f->value.f;
+ X_mode = f->values[0].f;
}
else if (most_often == f->count)
{
/* Calculate moments. */
m = moments_create (MOMENT_KURTOSIS);
for (f = ft->valid; f < ft->missing; f++)
- moments_pass_one (m, f->value.f, f->count);
+ moments_pass_one (m, f->values[0].f, f->count);
for (f = ft->valid; f < ft->missing; f++)
- moments_pass_two (m, f->value.f, f->count);
+ moments_pass_two (m, f->values[0].f, f->count);
moments_calculate (m, NULL, &d[FRQ_ST_MEAN], &d[FRQ_ST_VARIANCE],
&d[FRQ_ST_SKEWNESS], &d[FRQ_ST_KURTOSIS]);
moments_destroy (m);
- /* Formulas below are taken from _SPSS Statistical Algorithms_. */
- d[FRQ_ST_MINIMUM] = ft->valid[0].value.f;
- d[FRQ_ST_MAXIMUM] = ft->valid[ft->n_valid - 1].value.f;
+ /* Formulae below are taken from _SPSS Statistical Algorithms_. */
+ if (ft->n_valid > 0)
+ {
+ d[FRQ_ST_MINIMUM] = ft->valid[0].values[0].f;
+ d[FRQ_ST_MAXIMUM] = ft->valid[ft->n_valid - 1].values[0].f;
+ d[FRQ_ST_RANGE] = d[FRQ_ST_MAXIMUM] - d[FRQ_ST_MINIMUM];
+ }
+ else
+ {
+ d[FRQ_ST_MINIMUM] = SYSMIS;
+ d[FRQ_ST_MAXIMUM] = SYSMIS;
+ d[FRQ_ST_RANGE] = SYSMIS;
+ }
d[FRQ_ST_MODE] = X_mode;
- d[FRQ_ST_RANGE] = d[FRQ_ST_MAXIMUM] - d[FRQ_ST_MINIMUM];
d[FRQ_ST_SUM] = d[FRQ_ST_MEAN] * W;
d[FRQ_ST_STDDEV] = sqrt (d[FRQ_ST_VARIANCE]);
d[FRQ_ST_SEMEAN] = d[FRQ_ST_STDDEV] / sqrt (W);
d[FRQ_ST_SESKEWNESS] = calc_seskew (W);
d[FRQ_ST_SEKURTOSIS] = calc_sekurt (W);
+ d[FRQ_ST_MEDIAN] = (frq->median_idx != SIZE_MAX
+ ? vf->percentiles[frq->median_idx]
+ : SYSMIS);
}
-/* Displays a table of all the statistics requested for variable V. */
-static void
-dump_statistics (const struct frq_proc *frq, const struct var_freqs *vf,
- const struct variable *wv)
+static bool
+all_string_variables (const struct frq_proc *frq)
{
- const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0;
- const struct freq_tab *ft = &vf->tab;
- double stat_value[FRQ_ST_count];
- struct tab_table *t;
- int i, r;
+ for (size_t i = 0; i < frq->n_vars; i++)
+ if (var_is_numeric (frq->vars[i].var))
+ return false;
- if (var_is_alpha (vf->var))
+ return true;
+}
+
+/* Displays a table of all the statistics requested. */
+static void
+dump_statistics (const struct frq_proc *frq, const struct variable *wv)
+{
+ if (all_string_variables (frq))
return;
- if (ft->n_valid == 0)
+ struct pivot_table *table = pivot_table_create (N_("Statistics"));
+ pivot_table_set_weight_var (table, wv);
+
+ struct pivot_dimension *variables
+ = pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Variables"));
+
+ struct pivot_dimension *statistics = pivot_dimension_create (
+ table, PIVOT_AXIS_ROW, N_("Statistics"));
+ struct pivot_category *n = pivot_category_create_group (
+ statistics->root, N_("N"));
+ pivot_category_create_leaves (n,
+ N_("Valid"), PIVOT_RC_COUNT,
+ N_("Missing"), PIVOT_RC_COUNT);
+ for (int i = 0; i < FRQ_ST_count; i++)
+ if (frq->stats & BIT_INDEX (i))
+ pivot_category_create_leaf (statistics->root,
+ pivot_value_new_text (st_name[i]));
+ struct pivot_category *percentiles = NULL;
+ for (size_t i = 0; i < frq->n_percentiles; i++)
{
- msg (SW, _("No valid data for variable %s; statistics not displayed."),
- var_get_name (vf->var));
- return;
- }
- calc_stats (vf, stat_value);
-
- t = tab_create (3, ((frq->stats & FRQ_ST_MEDIAN) ? frq->n_stats - 1 : frq->n_stats) + frq->n_show_percentiles + 2);
-
- tab_box (t, TAL_1, TAL_1, -1, -1 , 0 , 0 , 2, tab_nr(t) - 1) ;
-
-
- tab_vline (t, TAL_1 , 2, 0, tab_nr(t) - 1);
- tab_vline (t, TAL_GAP , 1, 0, tab_nr(t) - 1 ) ;
-
- r = 2; /* N missing and N valid are always dumped */
+ const struct percentile *pc = &frq->percentiles[i];
- for (i = 0; i < FRQ_ST_count; i++)
- {
- if (FRQ_ST_MEDIAN == i)
- continue;
+ if (!pc->show)
+ continue;
- if (frq->stats & BIT_INDEX (i))
- {
- tab_text (t, 0, r, TAB_LEFT | TAT_TITLE,
- gettext (st_name[i]));
- tab_double (t, 2, r, TAB_NONE, stat_value[i], NULL);
- r++;
- }
+ if (!percentiles)
+ percentiles = pivot_category_create_group (
+ statistics->root, N_("Percentiles"));
+ pivot_category_create_leaf (percentiles, pivot_value_new_integer (
+ pc->p * 100.0));
}
- tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("N"));
- tab_text (t, 1, 0, TAB_LEFT | TAT_TITLE, _("Valid"));
- tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("Missing"));
+ for (size_t i = 0; i < frq->n_vars; i++)
+ {
+ struct var_freqs *vf = &frq->vars[i];
+ if (var_is_alpha (vf->var))
+ continue;
- tab_double (t, 2, 0, TAB_NONE, ft->valid_cases, wfmt);
- tab_double (t, 2, 1, TAB_NONE, ft->total_cases - ft->valid_cases, wfmt);
+ const struct freq_tab *ft = &vf->tab;
- for (i = 0; i < frq->n_percentiles; i++)
- {
- const struct percentile *pc = &frq->percentiles[i];
+ int var_idx = pivot_category_create_leaf (
+ variables->root, pivot_value_new_variable (vf->var));
- if (!pc->show)
- continue;
+ int row = 0;
+ pivot_table_put2 (table, var_idx, row++,
+ pivot_value_new_number (ft->valid_cases));
+ pivot_table_put2 (table, var_idx, row++,
+ pivot_value_new_number (
+ ft->total_cases - ft->valid_cases));
- if ( i == 0 )
- {
- tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Percentiles"));
- }
+ double stat_values[FRQ_ST_count];
+ calc_stats (frq, vf, stat_values);
+ for (int j = 0; j < FRQ_ST_count; j++)
+ {
+ if (!(frq->stats & BIT_INDEX (j)))
+ continue;
+
+ union value v = { .f = vf->tab.n_valid ? stat_values[j] : SYSMIS };
+ struct pivot_value *pv
+ = (j == FRQ_ST_MODE || j == FRQ_ST_MINIMUM || j == FRQ_ST_MAXIMUM
+ ? pivot_value_new_var_value (vf->var, &v)
+ : pivot_value_new_number (v.f));
+ pivot_table_put2 (table, var_idx, row++, pv);
+ }
- if (pc->p == 0.5)
- tab_text (t, 1, r, TAB_LEFT, _("50 (Median)"));
- else
- tab_fixed (t, 1, r, TAB_LEFT, pc->p * 100, 3, 0);
- tab_double (t, 2, r, TAB_NONE, pc->value,
- var_get_print_format (vf->var));
- r++;
+ for (size_t j = 0; j < frq->n_percentiles; j++)
+ {
+ const struct percentile *pc = &frq->percentiles[j];
+ if (!pc->show)
+ continue;
+
+ union value v = {
+ .f = vf->tab.n_valid ? vf->percentiles[j] : SYSMIS
+ };
+ pivot_table_put2 (table, var_idx, row++,
+ pivot_value_new_var_value (vf->var, &v));
+ }
}
- tab_title (t, "%s", var_to_string (vf->var));
-
- tab_submit (t);
+ pivot_table_submit (table);
}
-