X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Fmath%2Fhistogram.c;h=9158590dd75c3a140244292004087a773eddec3c;hb=90f32f11361dafbe2193ddd6d4e7c41dd294f6b5;hp=d14eb45e3423a55077c84d8b93a370d70b6c383b;hpb=478954c1ed028744ab870f0e6a5befcc8f643758;p=pspp diff --git a/src/math/histogram.c b/src/math/histogram.c index d14eb45e34..9158590dd7 100644 --- a/src/math/histogram.c +++ b/src/math/histogram.c @@ -21,6 +21,7 @@ #include #include +#include "data/settings.h" #include "libpspp/message.h" #include "libpspp/assertion.h" #include "libpspp/cast.h" @@ -57,79 +58,96 @@ destroy (struct statistic *s) } -struct histogram * -histogram_create (double bin_width, double min, double max) -{ - int bins; - struct histogram *h = xmalloc (sizeof *h); - struct statistic *stat = &h->parent; +/* Find a bin width which is adapted to the scaling of the x axis +In the example here, the binwidth is half of the tick interval. - const double half_bin_width = bin_width / 2.0; + binwidth + > < + |....+....+....+. .+....| + LOWER 1 2 3 N_TICKS + ^LOWDBL ^HIGHDBL - /* The lower and upper limits of the histogram, in units of half - bin widths */ - int lower_limit, upper_limit; +This only works, when the min and max value for the histogram are adapted +such that (max-min) is a multiple of the binwidth. Then the location of the +first bin has to be aligned to the ticks. +*/ +static int +hist_find_pretty_no_of_bins(double bin_width_in, double min, double max, + double *adjusted_min, double *adjusted_max) +{ + double lower, interval; + int n_ticks; + double binwidth; + int nbins; - /* -1 if the lower end of the range contains more unused space - than the upper end. - +1 otherwise. */ - short sparse_end = 0; + chart_get_scale (max, min, &lower, &interval, &n_ticks); - if (max == min) + if (bin_width_in >= 2 * interval) { - msg (MW, _("Not creating histogram because the data contains less than 2 distinct values")); - free (h); - return NULL; + binwidth = floor(bin_width_in/interval) * interval; + *adjusted_min = lower; } - - assert (max > min); - - { - double ul, ll; - double lower_tail = modf (min / half_bin_width, &ll); - double upper_tail = modf (max / half_bin_width, &ul); - lower_limit = ll - 1; - upper_limit = ul + 1; - - sparse_end = lower_tail < upper_tail ? -1 : +1; - } - - /* The range must be an EVEN number of half bin_widths */ - if ( (upper_limit - lower_limit) % 2) + else if (bin_width_in >= 1.5 * interval) { - /* Extend the range at the end which gives the least unused space */ - if (sparse_end == +1) - lower_limit--; + binwidth = 1.5 * interval; + if (min < (lower + 0.5 * interval)) + *adjusted_min = lower; else - upper_limit++; - - /* Now the other end has more space */ - sparse_end *= -1; + *adjusted_min = lower + 0.5 * interval; } + else if (bin_width_in >= interval) + { + binwidth = interval; + *adjusted_min = lower; + } + else if (bin_width_in >= (2.0/3.0 * interval)) + { + binwidth = (2.0/3.0 * interval); + if (min >= lower + binwidth) + *adjusted_min = lower + binwidth; + else + *adjusted_min = lower; + } + else + { + int i; + for(i = 2; bin_width_in < interval/i; i++); + binwidth = interval/i; + *adjusted_min = floor((min - lower)/binwidth)*binwidth + lower; + } + + nbins = ceil((max-*adjusted_min)/binwidth); + *adjusted_max = nbins*binwidth + *adjusted_min; + + return nbins; +} + - /* But the range should be aligned to an ODD number of - half bin widths, so that the labels are aesthetically pleasing ones. */ - if ( lower_limit % 2 == 0) +struct histogram * +histogram_create (double bin_width_in, double min, double max) +{ + struct histogram *h; + struct statistic *stat; + int bins; + double adjusted_min, adjusted_max; + + if (max == min) { - lower_limit += -sparse_end ; - upper_limit += -sparse_end ; + msg (MW, _("Not creating histogram because the data contains less than 2 distinct values")); + return NULL; } - bins = (upper_limit - lower_limit) / 2.0; + assert (bin_width_in > 0); - /* Force the number of bins to lie in a sensible range */ - if (bins > 25) - bins = 25; + bins = hist_find_pretty_no_of_bins(bin_width_in, min, max, &adjusted_min, &adjusted_max); - if (bins < 1) - bins = 1; + h = xmalloc (sizeof *h); h->gsl_hist = gsl_histogram_alloc (bins); - gsl_histogram_set_ranges_uniform (h->gsl_hist, - lower_limit * half_bin_width, - upper_limit * half_bin_width); + gsl_histogram_set_ranges_uniform (h->gsl_hist, adjusted_min, adjusted_max); + stat = &h->parent; stat->accumulate = acc; stat->destroy = destroy;