/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2007, 2009 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2007, 2009, 2010 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <libpspp/str.h>
#include <math/histogram.h>
#include <math/moments.h>
-#include <output/chart.h>
+#include <output/chart-item.h>
#include <output/charts/piechart.h>
#include <output/charts/plot-hist.h>
-#include <output/manager.h>
-#include <output/output.h>
-#include <output/table.h>
+#include <output/tab.h>
#include "freq.h"
double x2; /* The datum value >= the percentile */
int flag;
int flag2; /* Set to 1 if this percentile value has been found */
+ bool show; /* True to show this percentile in the statistics box. */
};
-static void add_percentile (double x) ;
+static void add_percentile (double x, bool show);
static struct percentile *percentiles;
-static int n_percentiles;
+static int n_percentiles, n_show_percentiles;
/* Groups of statistics. */
#define BI BIT_INDEX
struct hsh_table *data; /* Undifferentiated data. */
struct freq_mutable *valid; /* Valid freqs. */
int n_valid; /* Number of total freqs. */
+ const struct dictionary *dict; /* The dict from whence entries in the table
+ come */
struct freq_mutable *missing; /* Missing freqs. */
int n_missing; /* Number of missing freqs. */
int i;
n_percentiles = 0;
+ n_show_percentiles = 0;
percentiles = NULL;
n_variables = 0;
int pl;
subc_list_double *ptl_list = &cmd.dl_percentiles[i];
for ( pl = 0 ; pl < subc_list_double_count(ptl_list); ++pl)
- add_percentile (subc_list_double_at(ptl_list, pl) / 100.0 );
+ add_percentile (subc_list_double_at(ptl_list, pl) / 100.0, true);
}
}
if ( cmd.sbc_ntiles )
{
int j;
for (j = 0; j <= cmd.n_ntiles[i]; ++j )
- add_percentile (j / (double) cmd.n_ntiles[i]);
+ add_percentile (j / (double) cmd.n_ntiles[i], true);
}
}
if (stats & BIT_INDEX (frq_median))
{
/* Treat the median as the 50% percentile.
We output it in the percentiles table as "50 (Median)." */
- add_percentile (0.5);
+ add_percentile (0.5, true);
stats &= ~BIT_INDEX (frq_median);
n_stats--;
}
+ if (chart == GFT_HIST)
+ {
+ add_percentile (0.25, false);
+ add_percentile (0.75, false);
+ }
/* Do it! */
input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
- if ( chart == GFT_HIST && var_is_numeric (v) )
+ if ( chart == GFT_HIST && var_is_numeric (v) && ft->n_valid > 0)
{
double d[frq_n_stats];
struct histogram *hist ;
hist = freq_tab_to_hist (ft,v);
- chart_submit (histogram_chart_create (
- hist, var_to_string(v),
- vf->tab.valid_cases,
- d[frq_mean],
- d[frq_stddev],
- normal));
+ chart_item_submit (histogram_chart_create (
+ hist->gsl_hist, var_to_string(v),
+ vf->tab.valid_cases,
+ d[frq_mean],
+ d[frq_stddev],
+ normal));
statistic_destroy (&hist->parent);
}
}
vf = var_attach_aux (v, xmalloc (sizeof *vf), var_dtor_free);
vf->tab.valid = vf->tab.missing = NULL;
+ vf->tab.dict = dataset_dict (ds);
vf->n_groups = 0;
vf->groups = NULL;
vf->width = var_get_width (v);
}
/* Adds X to the list of percentiles, keeping the list in proper
- order. */
+ order. If SHOW is true, the percentile will be shown in the statistics
+ box, otherwise it will be hidden. */
static void
-add_percentile (double x)
+add_percentile (double x, bool show)
{
int i;
{
/* Do nothing if it's already in the list */
if ( fabs(x - percentiles[i].p) < DBL_EPSILON )
- return;
+ {
+ if (show && !percentiles[i].show)
+ {
+ n_show_percentiles++;
+ percentiles[i].show = true;
+ }
+ return;
+ }
if (x < percentiles[i].p)
break;
n_percentiles + 1, sizeof *percentiles);
insert_element (percentiles, n_percentiles, sizeof *percentiles, i);
percentiles[i].p = x;
+ percentiles[i].show = show;
n_percentiles++;
+ if (show)
+ n_show_percentiles++;
}
}
\f
/* Frequency table display. */
-struct full_dim_aux
- {
- bool show_labels;
- };
-
-/* Sets the widths of all the columns and heights of all the rows in
- table T for driver D. */
-static void
-full_dim (struct tab_rendering *r, void *aux_)
-{
- const struct outp_driver *d = r->driver;
- const struct tab_table *t = r->table;
- const struct full_dim_aux *aux = aux_;
- int i;
-
- for (i = 0; i < tab_nc (t); i++)
- {
- r->w[i] = tab_natural_width (r, i);
- if (aux->show_labels && i == 0)
- r->w[i] = MIN (r->w[i], d->prop_em_width * 15);
- else
- r->w[i] = MAX (r->w[i], d->prop_em_width * 8);
- }
-
- for (i = 0; i < tab_nr (t); i++)
- r->h[i] = d->font_height;
-}
-
-static void
-full_dim_free (void *aux_)
-{
- struct full_dim_aux *aux = aux_;
- free (aux);
-}
-
/* Displays a full frequency table for variable V. */
static void
dump_full (const struct variable *v, const struct variable *wv)
struct freq_tab *ft;
struct freq_mutable *f;
struct tab_table *t;
- int r;
+ int r, x;
double cum_total = 0.0;
double cum_freq = 0.0;
- struct init
- {
- int c, r;
- const char *s;
- };
-
- const struct init *p;
-
- static const struct init vec[] =
- {
- {4, 0, N_("Valid")},
- {5, 0, N_("Cum")},
- {1, 1, N_("Value")},
- {2, 1, N_("Frequency")},
- {3, 1, N_("Percent")},
- {4, 1, N_("Percent")},
- {5, 1, N_("Percent")},
- {0, 0, NULL},
- {1, 0, NULL},
- {2, 0, NULL},
- {3, 0, NULL},
- {-1, -1, NULL},
+ static const char *headings[] = {
+ N_("Value"),
+ N_("Frequency"),
+ N_("Percent"),
+ N_("Valid Percent"),
+ N_("Cum Percent")
};
const bool lab = (cmd.labels == FRQ_LABELS);
- struct full_dim_aux *aux;
-
vf = get_var_freqs (v);
ft = &vf->tab;
n_categories = ft->n_valid + ft->n_missing;
- t = tab_create (5 + lab, n_categories + 3);
- tab_headers (t, 0, 0, 2, 0);
-
- aux = xmalloc (sizeof *aux);
- aux->show_labels = lab;
- tab_dim (t, full_dim, full_dim_free, aux);
+ t = tab_create (5 + lab, n_categories + 2);
+ tab_headers (t, 0, 0, 1, 0);
if (lab)
- tab_text (t, 0, 1, TAB_CENTER | TAT_TITLE, _("Value Label"));
+ tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Value Label"));
- for (p = vec; p->s; p++)
- tab_text (t, lab ? p->c : p->c - 1, p->r,
- TAB_CENTER | TAT_TITLE, gettext (p->s));
+ for (x = 0; x < 5; x++)
+ tab_text (t, lab + x, 0, TAB_CENTER | TAT_TITLE, gettext (headings[x]));
- r = 2;
+ r = 1;
for (f = ft->valid; f < ft->missing; f++)
{
double percent, valid_percent;
tab_text (t, 0, r, TAB_LEFT, label);
}
- tab_value (t, 0 + lab, r, TAB_NONE, &f->value, &vf->print);
+ tab_value (t, 0 + lab, r, TAB_NONE, &f->value, ft->dict, &vf->print);
tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt);
tab_double (t, 2 + lab, r, TAB_NONE, percent, NULL);
tab_double (t, 3 + lab, r, TAB_NONE, valid_percent, NULL);
tab_text (t, 0, r, TAB_LEFT, label);
}
- tab_value (t, 0 + lab, r, TAB_NONE, &f->value, &vf->print);
+ tab_value (t, 0 + lab, r, TAB_NONE, &f->value, ft->dict, &vf->print);
tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt);
tab_double (t, 2 + lab, r, TAB_NONE,
f->count / ft->total_cases * 100.0, NULL);
tab_box (t, TAL_1, TAL_1,
cmd.spaces == FRQ_SINGLE ? -1 : TAL_GAP, TAL_1,
0, 0, 4 + lab, r);
- tab_hline (t, TAL_2, 0, 4 + lab, 2);
+ tab_hline (t, TAL_2, 0, 4 + lab, 1);
tab_hline (t, TAL_2, 0, 4 + lab, r);
tab_joint_text (t, 0, r, 0 + lab, r, TAB_RIGHT | TAT_TITLE, _("Total"));
tab_vline (t, TAL_0, 1, r, r);
tab_submit (t);
}
-/* Sets the widths of all the columns and heights of all the rows in
- table T for driver D. */
-static void
-condensed_dim (struct tab_rendering *r, void *aux UNUSED)
-{
- struct outp_driver *d = r->driver;
- const struct tab_table *t = r->table;
-
- int cum_width = outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL);
- int zeros_width = outp_string_width (d, "000", OUTP_PROPORTIONAL);
- int max_width = MAX (cum_width, zeros_width);
-
- int i;
-
- for (i = 0; i < 2; i++)
- {
- r->w[i] = tab_natural_width (r, i);
- r->w[i] = MAX (r->w[i], d->prop_em_width * 8);
- }
- for (i = 2; i < 4; i++)
- r->w[i] = max_width;
- for (i = 0; i < tab_nr (t); i++)
- r->h[i] = d->font_height;
-}
-
/* Display condensed frequency table for variable V. */
static void
dump_condensed (const struct variable *v, const struct variable *wv)
tab_text (t, 2, 1, TAB_CENTER | TAT_TITLE, _("Pct"));
tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Cum"));
tab_text (t, 3, 1, TAB_CENTER | TAT_TITLE, _("Pct"));
- tab_dim (t, condensed_dim, NULL, NULL);
r = 2;
for (f = ft->valid; f < ft->missing; f++)
percent = f->count / ft->total_cases * 100.0;
cum_total += f->count / ft->valid_cases * 100.0;
- tab_value (t, 0, r, TAB_NONE, &f->value, &vf->print);
+ tab_value (t, 0, r, TAB_NONE, &f->value, ft->dict, &vf->print);
tab_double (t, 1, r, TAB_NONE, f->count, wfmt);
tab_double (t, 2, r, TAB_NONE, percent, NULL);
tab_double (t, 3, r, TAB_NONE, cum_total, NULL);
}
for (; f < &ft->valid[n_categories]; f++)
{
- tab_value (t, 0, r, TAB_NONE, &f->value, &vf->print);
+ tab_value (t, 0, r, TAB_NONE, &f->value, ft->dict, &vf->print);
tab_double (t, 1, r, TAB_NONE, f->count, wfmt);
tab_double (t, 2, r, TAB_NONE,
f->count / ft->total_cases * 100.0, NULL);
0, 0, 3, r - 1);
tab_hline (t, TAL_2, 0, 3, 2);
tab_title (t, "%s", var_to_string (v));
- tab_columns (t, SOM_COL_DOWN);
tab_submit (t);
}
\f
/* Statistical display. */
-/* Calculates all the pertinent statistics for variable V, putting
- them in array D[]. FIXME: This could be made much more optimal. */
+/* Calculates all the pertinent statistics for variable V, putting them in
+ array D[]. */
static void
calc_stats (const struct variable *v, double d[frq_n_stats])
{
/* Calculate percentiles. */
+ assert (ft->n_valid > 0);
+
for (i = 0; i < n_percentiles; i++)
{
percentiles[i].flag = 0;
}
calc_stats (v, stat_value);
- t = tab_create (3, n_stats + n_percentiles + 2);
- tab_dim (t, tab_natural_dimensions, NULL, NULL);
+ t = tab_create (3, n_stats + n_show_percentiles + 2);
tab_box (t, TAL_1, TAL_1, -1, -1 , 0 , 0 , 2, tab_nr(t) - 1) ;
for (i = 0; i < n_percentiles; i++, r++)
{
+ if (!percentiles[i].show)
+ continue;
+
if ( i == 0 )
{
tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Percentiles"));
var_get_print_format (v));
}
- tab_columns (t, SOM_COL_DOWN);
if (show_varname)
tab_title (t, "%s", var_to_string (v));
- else
- tab_flags (t, SOMF_NO_TITLE);
tab_submit (t);
}
+static double
+calculate_iqr (void)
+{
+ double q1 = SYSMIS;
+ double q3 = SYSMIS;
+ int i;
+
+ for (i = 0; i < n_percentiles; i++)
+ {
+ if (fabs (0.25 - percentiles[i].p) < DBL_EPSILON)
+ q1 = percentiles[i].value;
+ else if (fabs (0.75 - percentiles[i].p) < DBL_EPSILON)
+ q3 = percentiles[i].value;
+ }
+
+ return q1 == SYSMIS || q3 == SYSMIS ? SYSMIS : q3 - q1;
+}
/* Create a gsl_histogram from a freq_tab */
struct histogram *
double x_max = -DBL_MAX;
struct histogram *hist;
- const double bins = 11;
+ double iqr;
+ int bins;
struct hsh_iterator hi;
struct hsh_table *fh = ft->data;
if ( frq->value.f > x_max ) x_max = frq->value.f ;
}
+ /* Freedman-Diaconis' choice of bin width. */
+ iqr = calculate_iqr ();
+ if (iqr != SYSMIS)
+ {
+ double bin_width = 2 * iqr / pow (ft->valid_cases, 1.0 / 3.0);
+ bins = (x_max - x_min) / bin_width;
+ if (bins < 5)
+ bins = 5;
+ else if (bins > 400)
+ bins = 400;
+ }
+ else
+ bins = 5;
+
hist = histogram_create (bins, x_min, x_max);
for( i = 0 ; i < ft->n_valid ; ++i )
slices = freq_tab_to_slice_array(frq_tab, var, &n_slices);
- chart_submit (piechart_create (var_to_string(var), slices, n_slices));
+ chart_item_submit (piechart_create (var_to_string(var), slices, n_slices));
for (i = 0 ; i < n_slices ; ++i )
ds_destroy (&slices[i].label);