From: John Darrington Date: Sat, 26 Apr 2014 05:33:08 +0000 (+0200) Subject: FREQUENCIES: Fixed crash showing histograms of limited range. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=8e98d2ec1b1620280d66820e323d82b00975374e FREQUENCIES: Fixed crash showing histograms of limited range. We had been using Freedman-Diaconis rule to select the bin width. According to the literature this is better than Sturges rule. However it cannot work when the interquartile range is zero, which will happen for datasets with small range. This change uses the Freedman-Diaconis rule and falls back to Sturges when the IQR is zero. --- diff --git a/src/language/stats/frequencies.c b/src/language/stats/frequencies.c index 1f8290cf2c..7bda6a13e0 100644 --- a/src/language/stats/frequencies.c +++ b/src/language/stats/frequencies.c @@ -1195,9 +1195,16 @@ freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft, } } - /* Freedman-Diaconis' choice of bin width. */ + iqr = calculate_iqr (frq); - bin_width = 2 * iqr / pow (valid_freq, 1.0 / 3.0); + + if (iqr > 0) + /* Freedman-Diaconis' choice of bin width. */ + bin_width = 2 * iqr / pow (valid_freq, 1.0 / 3.0); + + else + /* Sturges Rule */ + bin_width = (x_max - x_min) / (1 + log2 (valid_freq)); histogram = histogram_create (bin_width, x_min, x_max); @@ -1355,7 +1362,8 @@ dump_statistics (const struct frq_proc *frq, const struct var_freqs *vf, } calc_stats (vf, stat_value); - t = tab_create (3, ((frq->stats & FRQ_ST_MEDIAN) ? frq->n_stats - 1 : frq->n_stats) + frq->n_show_percentiles + 2); + t = tab_create (3, ((frq->stats & FRQ_ST_MEDIAN) ? frq->n_stats - 1 : frq->n_stats) + + frq->n_show_percentiles + 2); tab_box (t, TAL_1, TAL_1, -1, -1 , 0 , 0 , 2, tab_nr(t) - 1) ; diff --git a/tests/language/stats/frequencies.at b/tests/language/stats/frequencies.at index a2545b0f4c..469ba82d18 100644 --- a/tests/language/stats/frequencies.at +++ b/tests/language/stats/frequencies.at @@ -516,6 +516,7 @@ FREQUENCIES VAR=x /PERCENTILES = 0 25 50 75 100. ]) + AT_CHECK([pspp -O format=csv frequencies.sps], [0], [Table: X Value Label,Value,Frequency,Percent,Valid Percent,Cum Percent @@ -540,3 +541,36 @@ Percentiles,0,1.00 ,100,5.00 ]) AT_CLEANUP + +AT_SETUP([FREQUENCIES dichotomous histogram]) +AT_DATA([frequencies.sps], [dnl +data list notable list /d4 *. +begin data. +0 +0 +0 +1 +0 +0 +0 +0 +1 +0 +0 +0 +0 +0 +1 +2 +0 +end data. + +FREQUENCIES + /VARIABLES = d4 + /FORMAT=AVALUE TABLE + /HISTOGRAM=NORMAL + . +]) + +AT_CHECK([pspp frequencies.sps], [0], [ignore]) +AT_CLEANUP