1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/histogram.h"
20 #include "math/decimal.h"
22 #include <gsl/gsl_histogram.h>
25 #include "data/settings.h"
26 #include "libpspp/message.h"
27 #include "libpspp/assertion.h"
28 #include "libpspp/cast.h"
29 #include "math/chart-geometry.h"
32 #define _(msgid) gettext (msgid)
33 #define N_(msgid) msgid
36 #include "gl/xalloc.h"
39 histogram_add (struct histogram *h, double y, double c)
41 struct statistic *stat = &h->parent;
42 stat->accumulate (stat, NULL, c, 0, y);
46 acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y)
48 struct histogram *hist = UP_CAST (s, struct histogram, parent);
50 gsl_histogram_accumulate (hist->gsl_hist, y, c);
54 destroy (struct statistic *s)
56 struct histogram *h = UP_CAST (s, struct histogram, parent);
57 gsl_histogram_free (h->gsl_hist);
63 double get_slack (double limit, double half_bin_width, int *n_half_bins)
65 double ipart, remainder;
67 assert (half_bin_width > 0);
69 remainder = modf (limit / half_bin_width, &ipart);
71 /* In C modf and % behave in an unexpected (to me at any rate) manner
72 when presented with a negative value
74 For example, modf (-7.0 / 3.0) returns -2.0 R -0.3333
80 return remainder * half_bin_width;
84 /* This functions adjusts the upper and lower range of the histogram to make them fit BIN_WIDTH
85 MIN and MAX are the lowest and highest data to be plotted in the histogram.
86 ADJ_MIN and ADJ_MAX are locations of the adjusted values of MIN and MAX (the range will
87 always be equal or slightly larger).
88 Returns the number of bins.
90 The "testing_assert" expressions in this function should be algebraically correct.
91 However, due to floating point rounding they could fail, especially when small numbers
92 are involved. In normal use, therefore, testing_assert does nothing.
95 adjust_bin_ranges (double bin_width, double min, double max, double *adj_min, double *adj_max)
97 const double half_bin_width = bin_width / 2.0;
99 /* The lower and upper limits of the histogram, in units of half
101 int lower_limit, upper_limit;
103 double lower_slack = get_slack (min, half_bin_width, &lower_limit);
104 double upper_slack = -get_slack (max, half_bin_width, &upper_limit);
106 testing_assert (max > min);
108 /* If min is negative, then lower_slack may be less than zero.
109 In this case, the lower bound must be extended in the negative direction
110 so that it is less than OR EQUAL to min.
115 lower_slack += half_bin_width;
117 testing_assert (lower_limit * half_bin_width <= min);
119 /* However, the upper bound must be extended regardless, because histogram bins
120 span the range [lower, upper). In other words, the upper bound must be
124 upper_slack += half_bin_width;
125 testing_assert (upper_limit * half_bin_width > max);
127 /* The range must be an EVEN number of half bin_widths */
128 if ( (upper_limit - lower_limit) % 2)
130 /* Extend the range at the end which gives the least unused space */
131 if (upper_slack > lower_slack)
134 lower_slack += half_bin_width;
139 upper_slack += half_bin_width;
143 /* But the range should be aligned to an ODD number of
144 half bin widths, so that the labels are aesthetically pleasing ones.
145 Otherwise we are likely to get labels such as -3 -1 1 3 instead of -2 0 2 4
147 if ( lower_limit % 2 == 0)
149 /* If there is not enough slack at either end to perform a shift,
150 then we must extend the range so that there is. We must extend
151 by two half bin widths in order to preserve the EVEN condition
152 established above. Also, we extend on the end with the least
153 slack, in order to keep things as balanced as possible. */
154 if ( upper_slack > lower_slack && upper_slack <= half_bin_width)
157 lower_slack += 2 * half_bin_width;
160 if (lower_slack > upper_slack && lower_slack < half_bin_width)
163 upper_slack += 2 * half_bin_width;
166 if (upper_slack > lower_slack)
168 testing_assert (upper_slack > half_bin_width);
170 /* Adjust the range to the left */
173 upper_slack -= half_bin_width;
174 lower_slack += half_bin_width;
178 testing_assert (lower_slack >= half_bin_width);
180 /* Adjust the range to the right */
183 lower_slack -= half_bin_width;
184 upper_slack += half_bin_width;
188 /* If there are any completely empty bins, then remove them,
189 since empty bins don't really add much information to the histogram.
191 if (upper_slack > 2 * half_bin_width)
193 upper_slack -= 2 * half_bin_width;
197 if (lower_slack >= 2 * half_bin_width)
199 lower_slack -= 2 * half_bin_width;
203 *adj_min = lower_limit * half_bin_width;
204 *adj_max = upper_limit * half_bin_width;
206 testing_assert (*adj_max > max);
207 testing_assert (*adj_min <= min);
209 return (upper_limit - lower_limit) / 2.0;
215 histogram_create (double bin_width_in, double min, double max)
217 struct decimal bin_width;
218 const int MAX_BINS = 25;
220 struct statistic *stat;
222 double adjusted_min, adjusted_max;
226 msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
230 assert (bin_width_in > 0);
232 chart_rounded_tick (bin_width_in, &bin_width);
233 bins = adjust_bin_ranges (decimal_to_double (&bin_width),
234 min, max, &adjusted_min, &adjusted_max);
236 /* Force the number of bins to lie in a sensible range. */
239 chart_rounded_tick ((max - min) / (double) (MAX_BINS - 1), &bin_width);
240 bins = adjust_bin_ranges (decimal_to_double (&bin_width),
241 min, max, &adjusted_min, &adjusted_max);
244 /* Can this ever happen? */
248 h = xmalloc (sizeof *h);
250 h->gsl_hist = gsl_histogram_alloc (bins);
252 gsl_histogram_set_ranges_uniform (h->gsl_hist, adjusted_min, adjusted_max);
255 stat->accumulate = acc;
256 stat->destroy = destroy;