From c919bc2cf709771292738b0a64a07bd04b3417ae Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 9 Mar 2010 20:52:48 -0800 Subject: [PATCH] FREQUENCIES: Choose number of bins for histogram based on valid cases. Until now, histograms have always had exactly 11 bins. There is no "correct" number of bins, but a fixed number of bins also seems less than ideal. Use Sturges' formula, instead, to choose the number of bins. Reported by Erik Frebold . --- src/language/stats/frequencies.q | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index d00c90c907..523345be51 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -1374,7 +1374,7 @@ freq_tab_to_hist (const struct freq_tab *ft, const struct variable *var) double x_max = -DBL_MAX; struct histogram *hist; - const double bins = 11; + int bins; struct hsh_iterator hi; struct hsh_table *fh = ft->data; @@ -1390,6 +1390,11 @@ freq_tab_to_hist (const struct freq_tab *ft, const struct variable *var) if ( frq->value.f > x_max ) x_max = frq->value.f ; } + /* Sturges' formula. */ + bins = ceil (log (ft->valid_cases) / log (2) + 1); + if (bins < 5) + bins = 5; + hist = histogram_create (bins, x_min, x_max); for( i = 0 ; i < ft->n_valid ; ++i ) -- 2.30.2