1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/histogram.h"
21 #include <gsl/gsl_histogram.h>
24 #include "libpspp/message.h"
25 #include "libpspp/assertion.h"
26 #include "libpspp/cast.h"
27 #include "math/chart-geometry.h"
30 #define _(msgid) gettext (msgid)
31 #define N_(msgid) msgid
34 #include "gl/xalloc.h"
37 histogram_add (struct histogram *h, double y, double c)
39 struct statistic *stat = &h->parent;
40 stat->accumulate (stat, NULL, c, 0, y);
44 acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y)
46 struct histogram *hist = UP_CAST (s, struct histogram, parent);
48 gsl_histogram_accumulate (hist->gsl_hist, y, c);
52 destroy (struct statistic *s)
54 struct histogram *h = UP_CAST (s, struct histogram, parent);
55 gsl_histogram_free (h->gsl_hist);
61 double get_slack (double limit, double half_bin_width, int *n_half_bins)
63 double ipart, remainder;
65 assert (half_bin_width > 0);
67 remainder = modf (limit / half_bin_width, &ipart);
69 /* In C modf and % behave in an unexpected (to me at any rate) manner
70 when presented with a negative value
72 For example, modf (-7.0 / 3.0) returns -2.0 R -0.3333
78 return remainder * half_bin_width;
82 /* This functions adjusts the upper and lower range of the histogram to make them fit BIN_WIDTH
83 MIN and MAX are the lowest and highest data to be plotted in the histogram.
84 ADJ_MIN and ADJ_MAX are locations of the adjusted values of MIN and MAX (the range will
85 always be equal or slightly larger).
86 Returns the number of bins.
88 The "testing_assert" expressions in this function should be algebraically correct.
89 However, due to floating point rounding they could fail, especially when small numbers
90 are involved. In normal use, therefore, testing_assert does nothing.
93 adjust_bin_ranges (double bin_width, double min, double max, double *adj_min, double *adj_max)
95 const double half_bin_width = bin_width / 2.0;
97 /* The lower and upper limits of the histogram, in units of half
99 int lower_limit, upper_limit;
101 double lower_slack = get_slack (min, half_bin_width, &lower_limit);
102 double upper_slack = -get_slack (max, half_bin_width, &upper_limit);
104 testing_assert (max > min);
106 /* If min is negative, then lower_slack may be less than zero.
107 In this case, the lower bound must be extended in the negative direction
108 so that it is less than OR EQUAL to min.
113 lower_slack += half_bin_width;
115 testing_assert (lower_limit * half_bin_width <= min);
117 /* However, the upper bound must be extended regardless, because histogram bins
118 span the range [lower, upper). In other words, the upper bound must be
122 upper_slack += half_bin_width;
123 testing_assert (upper_limit * half_bin_width > max);
125 /* The range must be an EVEN number of half bin_widths */
126 if ( (upper_limit - lower_limit) % 2)
128 /* Extend the range at the end which gives the least unused space */
129 if (upper_slack > lower_slack)
132 lower_slack += half_bin_width;
137 upper_slack += half_bin_width;
141 /* But the range should be aligned to an ODD number of
142 half bin widths, so that the labels are aesthetically pleasing ones.
143 Otherwise we are likely to get labels such as -3 -1 1 3 instead of -2 0 2 4
145 if ( lower_limit % 2 == 0)
147 /* If there is not enough slack at either end to perform a shift,
148 then we must extend the range so that there is. We must extend
149 by two half bin widths in order to preserve the EVEN condition
150 established above. Also, we extend on the end with the least
151 slack, in order to keep things as balanced as possible. */
152 if ( upper_slack > lower_slack && upper_slack <= half_bin_width)
155 lower_slack += 2 * half_bin_width;
158 if (lower_slack > upper_slack && lower_slack < half_bin_width)
161 upper_slack += 2 * half_bin_width;
164 if (upper_slack > lower_slack)
166 testing_assert (upper_slack > half_bin_width);
168 /* Adjust the range to the left */
171 upper_slack -= half_bin_width;
172 lower_slack += half_bin_width;
176 testing_assert (lower_slack >= half_bin_width);
178 /* Adjust the range to the right */
181 lower_slack -= half_bin_width;
182 upper_slack += half_bin_width;
186 /* If there are any completely empty bins, then remove them,
187 since empty bins don't really add much information to the histogram.
189 if (upper_slack > 2 * half_bin_width)
191 upper_slack -= 2 * half_bin_width;
195 if (lower_slack >= 2 * half_bin_width)
197 lower_slack -= 2 * half_bin_width;
201 *adj_min = lower_limit * half_bin_width;
202 *adj_max = upper_limit * half_bin_width;
204 testing_assert (*adj_max > max);
205 testing_assert (*adj_min <= min);
207 return (upper_limit - lower_limit) / 2.0;
213 histogram_create (double bin_width, double min, double max)
215 const int MAX_BINS = 25;
217 struct statistic *stat;
219 double adjusted_min, adjusted_max;
223 msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
227 assert (bin_width > 0);
229 bin_width = chart_rounded_tick (bin_width);
230 bins = adjust_bin_ranges (bin_width, min, max, &adjusted_min, &adjusted_max);
232 /* Force the number of bins to lie in a sensible range. */
235 bin_width = chart_rounded_tick ((max - min) / (double) (MAX_BINS - 1));
236 bins = adjust_bin_ranges (bin_width,
237 min, max, &adjusted_min, &adjusted_max);
240 /* Can this ever happen? */
244 h = xmalloc (sizeof *h);
246 h->gsl_hist = gsl_histogram_alloc (bins);
248 gsl_histogram_set_ranges_uniform (h->gsl_hist, adjusted_min, adjusted_max);
251 stat->accumulate = acc;
252 stat->destroy = destroy;