X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fhistogram.c;h=d14eb45e3423a55077c84d8b93a370d70b6c383b;hb=478954c1ed028744ab870f0e6a5befcc8f643758;hp=3c88c3858f61c6b1c2230843a7b9151a47902956;hpb=d0b91eae59319ab2756d0d43b9cb15eb9cd3c234;p=pspp diff --git a/src/math/histogram.c b/src/math/histogram.c index 3c88c3858f..d14eb45e34 100644 --- a/src/math/histogram.c +++ b/src/math/histogram.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,16 +15,23 @@ along with this program. If not, see . */ #include -#include "histogram.h" -#include -#include -#include +#include "math/histogram.h" #include -#include "chart-geometry.h" #include +#include "libpspp/message.h" +#include "libpspp/assertion.h" +#include "libpspp/cast.h" +#include "math/chart-geometry.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + + +#include "gl/xalloc.h" void histogram_add (struct histogram *h, double y, double c) @@ -33,8 +40,6 @@ histogram_add (struct histogram *h, double y, double c) stat->accumulate (stat, NULL, c, 0, y); } - - static void acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y) { @@ -43,7 +48,6 @@ acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNU gsl_histogram_accumulate (hist->gsl_hist, y, c); } - static void destroy (struct statistic *s) { @@ -54,28 +58,77 @@ destroy (struct statistic *s) struct histogram * -histogram_create (int bins, double min, double max) +histogram_create (double bin_width, double min, double max) { + int bins; struct histogram *h = xmalloc (sizeof *h); struct statistic *stat = &h->parent; - double upper_limit, lower_limit; - double bin_width = chart_rounded_tick ((max - min) / (double) bins); - double bin_width_2 = bin_width / 2.0; + const double half_bin_width = bin_width / 2.0; - int n = ceil (max / (bin_width_2) ) ; + /* The lower and upper limits of the histogram, in units of half + bin widths */ + int lower_limit, upper_limit; - assert (max > min); + /* -1 if the lower end of the range contains more unused space + than the upper end. + +1 otherwise. */ + short sparse_end = 0; - if ( ! (n % 2 ) ) n++; - upper_limit = n * bin_width_2; + if (max == min) + { + msg (MW, _("Not creating histogram because the data contains less than 2 distinct values")); + free (h); + return NULL; + } - n = floor (min / (bin_width_2) ) ; - if ( ! (n % 2 ) ) n--; - lower_limit = n * bin_width_2; + assert (max > min); + + { + double ul, ll; + double lower_tail = modf (min / half_bin_width, &ll); + double upper_tail = modf (max / half_bin_width, &ul); + lower_limit = ll - 1; + upper_limit = ul + 1; + + sparse_end = lower_tail < upper_tail ? -1 : +1; + } + + /* The range must be an EVEN number of half bin_widths */ + if ( (upper_limit - lower_limit) % 2) + { + /* Extend the range at the end which gives the least unused space */ + if (sparse_end == +1) + lower_limit--; + else + upper_limit++; + + /* Now the other end has more space */ + sparse_end *= -1; + } + + /* But the range should be aligned to an ODD number of + half bin widths, so that the labels are aesthetically pleasing ones. */ + if ( lower_limit % 2 == 0) + { + lower_limit += -sparse_end ; + upper_limit += -sparse_end ; + } + + bins = (upper_limit - lower_limit) / 2.0; + + /* Force the number of bins to lie in a sensible range */ + if (bins > 25) + bins = 25; + + if (bins < 1) + bins = 1; h->gsl_hist = gsl_histogram_alloc (bins); - gsl_histogram_set_ranges_uniform (h->gsl_hist, lower_limit, upper_limit); + + gsl_histogram_set_ranges_uniform (h->gsl_hist, + lower_limit * half_bin_width, + upper_limit * half_bin_width); stat->accumulate = acc; stat->destroy = destroy;