From bb75ec3b2fe601c6b942f5cda283b82925dd745d Mon Sep 17 00:00:00 2001 From: John Darrington Date: Tue, 20 Mar 2012 19:03:09 +0100 Subject: [PATCH] histogram.c: histogram_create now takes bin width instead of the number of bins --- src/language/stats/examine.c | 10 +++++++++- src/language/stats/frequencies.q | 20 ++++++-------------- src/math/histogram.c | 25 ++++++++++++++----------- src/math/histogram.h | 2 +- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index 62c99afce1..5d9f642641 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -40,6 +40,7 @@ #include "math/interaction.h" #include "math/box-whisker.h" #include "math/categoricals.h" +#include "math/chart-geometry.h" #include "math/histogram.h" #include "math/moments.h" #include "math/np.h" @@ -1516,8 +1517,15 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) if (examine->histogram) { + /* Sturges Rule */ + double bin_width = abs (es[v].minimum - es[v].maximum) + / (1 + log2 (es[v].cc)) + ; + + bin_width = chart_rounded_tick (bin_width); + es[v].histogram = - histogram_create (10, es[v].minimum, es[v].maximum); + histogram_create (bin_width, es[v].minimum, es[v].maximum); } es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer); diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index a2ffe91c1c..d7754220fa 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -43,6 +43,8 @@ #include "libpspp/str.h" #include "math/histogram.h" #include "math/moments.h" +#include "math/chart-geometry.h" + #include "output/chart-item.h" #include "output/charts/piechart.h" #include "output/charts/plot-hist.h" @@ -1114,10 +1116,9 @@ freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft, { double x_min, x_max, valid_freq; int i; - + double bin_width; struct histogram *histogram; double iqr; - int bins; /* Find out the extremes of the x value, within the range to be included in the histogram, and sum the total frequency of those values. */ @@ -1137,19 +1138,10 @@ freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft, /* Freedman-Diaconis' choice of bin width. */ iqr = calculate_iqr (frq); - if (iqr != SYSMIS) - { - double bin_width = 2 * iqr / pow (valid_freq, 1.0 / 3.0); - bins = (x_max - x_min) / bin_width; - if (bins < 5) - bins = 5; - else if (bins > 400) - bins = 400; - } - else - bins = 5; + bin_width = 2 * iqr / pow (valid_freq, 1.0 / 3.0); + bin_width = chart_rounded_tick (bin_width); - histogram = histogram_create (bins, x_min, x_max); + histogram = histogram_create (bin_width, x_min, x_max); for (i = 0; i < ft->n_valid; i++) { const struct freq *f = &ft->valid[i]; diff --git a/src/math/histogram.c b/src/math/histogram.c index 1b55d204b8..89619bcca5 100644 --- a/src/math/histogram.c +++ b/src/math/histogram.c @@ -55,27 +55,30 @@ destroy (struct statistic *s) struct histogram * -histogram_create (int bins, double min, double max) +histogram_create (double bin_width, double min, double max) { + int bins; struct histogram *h = xmalloc (sizeof *h); struct statistic *stat = &h->parent; - double upper_limit, lower_limit; - - double bin_width = chart_rounded_tick ((max - min) / (double) bins); - double bin_width_2 = bin_width / 2.0; + const short max_sign = max >= 0; + const short min_sign = min >= 0; - int n = ceil (max / (bin_width_2) ) ; + double upper_limit, lower_limit; assert (max >= min); - if ( ! (n % 2 ) ) n++; - upper_limit = n * bin_width_2; + lower_limit = trunc (2 * abs (min) / bin_width) - 1; + lower_limit *= bin_width / 2; + lower_limit *= min_sign; - n = floor (min / (bin_width_2) ) ; - if ( ! (n % 2 ) ) n--; - lower_limit = n * bin_width_2; + upper_limit = trunc (2 * abs(max) / bin_width) + 1; + upper_limit *= bin_width / 2; + upper_limit *= max_sign; + + bins = (upper_limit - lower_limit) / bin_width; h->gsl_hist = gsl_histogram_alloc (bins); + gsl_histogram_set_ranges_uniform (h->gsl_hist, lower_limit, upper_limit); stat->accumulate = acc; diff --git a/src/math/histogram.h b/src/math/histogram.h index 4c0e54fb04..f03e767c5e 100644 --- a/src/math/histogram.h +++ b/src/math/histogram.h @@ -30,7 +30,7 @@ struct histogram gsl_histogram *gsl_hist; }; -struct histogram * histogram_create (int bins, double max, double min); +struct histogram * histogram_create (double bin_width, double max, double min); void histogram_add (struct histogram *h, double y, double c); -- 2.30.2