X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fhistogram.c;h=5441df6bbc50b22068df11cc0977bb83bde6f599;hb=d532977937c51fd8d08fdabe57e06af4b05a8ed0;hp=c41bdc08508bc8be9e9f3d0b8b55f33fdf3cc79b;hpb=b46b794dfb9f0758aafec83f50993d1930894099;p=pspp
diff --git a/src/math/histogram.c b/src/math/histogram.c
index c41bdc0850..5441df6bbc 100644
--- a/src/math/histogram.c
+++ b/src/math/histogram.c
@@ -1,5 +1,5 @@
/* PSPP - a program for statistical analysis.
- Copyright (C) 2004, 2008, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -15,15 +15,23 @@
along with this program. If not, see . */
#include
-#include "histogram.h"
-#include
-#include
+#include "math/histogram.h"
#include
-#include "chart-geometry.h"
#include
+#include "libpspp/message.h"
+#include "libpspp/assertion.h"
+#include "libpspp/cast.h"
+#include "math/chart-geometry.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) msgid
+
+
+#include "gl/xalloc.h"
void
histogram_add (struct histogram *h, double y, double c)
@@ -32,48 +40,89 @@ histogram_add (struct histogram *h, double y, double c)
stat->accumulate (stat, NULL, c, 0, y);
}
-
-
static void
acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y)
{
- struct histogram *hist = (struct histogram *) s;
+ struct histogram *hist = UP_CAST (s, struct histogram, parent);
gsl_histogram_accumulate (hist->gsl_hist, y, c);
}
-
static void
destroy (struct statistic *s)
{
- struct histogram *h = (struct histogram *) s;
+ struct histogram *h = UP_CAST (s, struct histogram, parent);
gsl_histogram_free (h->gsl_hist);
free (s);
}
struct histogram *
-histogram_create (int bins, double min, double max)
+histogram_create (double bin_width, double min, double max)
{
+ int bins;
struct histogram *h = xmalloc (sizeof *h);
struct statistic *stat = &h->parent;
double upper_limit, lower_limit;
+ const double half_bin_width = bin_width / 2.0;
- double bin_width = chart_rounded_tick ((max - min) / (double) bins);
- double bin_width_2 = bin_width / 2.0;
+ /* -1 if the lower end of the range contains more unused space
+ than the upper end.
+ +1 otherwise. */
+ short sparse_end = 0;
- int n = ceil (max / (bin_width_2) ) ;
+ if (max == min)
+ {
+ msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
+ free (h);
+ return NULL;
+ }
assert (max > min);
- if ( ! (n % 2 ) ) n++;
- upper_limit = n * bin_width_2;
-
- n = floor (min / (bin_width_2) ) ;
- if ( ! (n % 2 ) ) n--;
- lower_limit = n * bin_width_2;
+ lower_limit = floor (min / half_bin_width) - 1;
+ upper_limit = floor (max / half_bin_width) + 1;
+
+ if (remainder (min, half_bin_width > remainder (max, half_bin_width)))
+ sparse_end = -1;
+ else
+ sparse_end = +1;
+
+ /* The range must be an EVEN number of half bin_widths */
+ if ( (int)(upper_limit - lower_limit) % 2)
+ {
+ /* Extend the range at the end which gives the least unused space */
+ if (sparse_end == +1)
+ lower_limit --;
+ else
+ upper_limit ++;
+
+ /* Now the other end has more space */
+ sparse_end *= -1;
+ }
+
+ /* But the range should be aligned to an ODD number of
+ half bin widths, so that the labels are aesthetically pleasing ones. */
+ if ( (int)lower_limit % 2 == 0)
+ {
+ lower_limit += -sparse_end ;
+ upper_limit += -sparse_end ;
+ }
+
+ bins = (upper_limit - lower_limit) / 2.0;
+
+ /* Force the number of bins to lie in a sensible range */
+ if (bins > 25)
+ bins = 25;
+
+ if (bins < 1)
+ bins = 1;
+
+ upper_limit *= half_bin_width;
+ lower_limit *= half_bin_width;
h->gsl_hist = gsl_histogram_alloc (bins);
+
gsl_histogram_set_ranges_uniform (h->gsl_hist, lower_limit, upper_limit);
stat->accumulate = acc;