/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2007, 2009 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2007, 2009, 2010 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
double x2; /* The datum value >= the percentile */
int flag;
int flag2; /* Set to 1 if this percentile value has been found */
+ bool show; /* True to show this percentile in the statistics box. */
};
-static void add_percentile (double x) ;
+static void add_percentile (double x, bool show);
static struct percentile *percentiles;
-static int n_percentiles;
+static int n_percentiles, n_show_percentiles;
/* Groups of statistics. */
#define BI BIT_INDEX
int i;
n_percentiles = 0;
+ n_show_percentiles = 0;
percentiles = NULL;
n_variables = 0;
int pl;
subc_list_double *ptl_list = &cmd.dl_percentiles[i];
for ( pl = 0 ; pl < subc_list_double_count(ptl_list); ++pl)
- add_percentile (subc_list_double_at(ptl_list, pl) / 100.0 );
+ add_percentile (subc_list_double_at(ptl_list, pl) / 100.0, true);
}
}
if ( cmd.sbc_ntiles )
{
int j;
for (j = 0; j <= cmd.n_ntiles[i]; ++j )
- add_percentile (j / (double) cmd.n_ntiles[i]);
+ add_percentile (j / (double) cmd.n_ntiles[i], true);
}
}
if (stats & BIT_INDEX (frq_median))
{
/* Treat the median as the 50% percentile.
We output it in the percentiles table as "50 (Median)." */
- add_percentile (0.5);
+ add_percentile (0.5, true);
stats &= ~BIT_INDEX (frq_median);
n_stats--;
}
+ if (chart == GFT_HIST)
+ {
+ add_percentile (0.25, false);
+ add_percentile (0.75, false);
+ }
/* Do it! */
input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
- if ( chart == GFT_HIST && var_is_numeric (v) )
+ if ( chart == GFT_HIST && var_is_numeric (v) && ft->n_valid > 0)
{
double d[frq_n_stats];
struct histogram *hist ;
}
/* Adds X to the list of percentiles, keeping the list in proper
- order. */
+ order. If SHOW is true, the percentile will be shown in the statistics
+ box, otherwise it will be hidden. */
static void
-add_percentile (double x)
+add_percentile (double x, bool show)
{
int i;
{
/* Do nothing if it's already in the list */
if ( fabs(x - percentiles[i].p) < DBL_EPSILON )
- return;
+ {
+ if (show && !percentiles[i].show)
+ {
+ n_show_percentiles++;
+ percentiles[i].show = true;
+ }
+ return;
+ }
if (x < percentiles[i].p)
break;
n_percentiles + 1, sizeof *percentiles);
insert_element (percentiles, n_percentiles, sizeof *percentiles, i);
percentiles[i].p = x;
+ percentiles[i].show = show;
n_percentiles++;
+ if (show)
+ n_show_percentiles++;
}
}
\f
/* Statistical display. */
-/* Calculates all the pertinent statistics for variable V, putting
- them in array D[]. FIXME: This could be made much more optimal. */
+/* Calculates all the pertinent statistics for variable V, putting them in
+ array D[]. */
static void
calc_stats (const struct variable *v, double d[frq_n_stats])
{
/* Calculate percentiles. */
+ assert (ft->n_valid > 0);
+
for (i = 0; i < n_percentiles; i++)
{
percentiles[i].flag = 0;
}
calc_stats (v, stat_value);
- t = tab_create (3, n_stats + n_percentiles + 2);
+ t = tab_create (3, n_stats + n_show_percentiles + 2);
tab_box (t, TAL_1, TAL_1, -1, -1 , 0 , 0 , 2, tab_nr(t) - 1) ;
for (i = 0; i < n_percentiles; i++, r++)
{
+ if (!percentiles[i].show)
+ continue;
+
if ( i == 0 )
{
tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Percentiles"));
tab_submit (t);
}
+static double
+calculate_iqr (void)
+{
+ double q1 = SYSMIS;
+ double q3 = SYSMIS;
+ int i;
+
+ for (i = 0; i < n_percentiles; i++)
+ {
+ if (fabs (0.25 - percentiles[i].p) < DBL_EPSILON)
+ q1 = percentiles[i].value;
+ else if (fabs (0.75 - percentiles[i].p) < DBL_EPSILON)
+ q3 = percentiles[i].value;
+ }
+
+ return q1 == SYSMIS || q3 == SYSMIS ? SYSMIS : q3 - q1;
+}
/* Create a gsl_histogram from a freq_tab */
struct histogram *
double x_max = -DBL_MAX;
struct histogram *hist;
- const double bins = 11;
+ double iqr;
+ int bins;
struct hsh_iterator hi;
struct hsh_table *fh = ft->data;
if ( frq->value.f > x_max ) x_max = frq->value.f ;
}
+ /* Freedman-Diaconis' choice of bin width. */
+ iqr = calculate_iqr ();
+ if (iqr != SYSMIS)
+ {
+ double bin_width = 2 * iqr / pow (ft->valid_cases, 1.0 / 3.0);
+ bins = (x_max - x_min) / bin_width;
+ if (bins < 5)
+ bins = 5;
+ else if (bins > 400)
+ bins = 400;
+ }
+ else
+ bins = 5;
+
hist = histogram_create (bins, x_min, x_max);
for( i = 0 ; i < ft->n_valid ; ++i )