1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/histogram.h"
21 #include <gsl/gsl_histogram.h>
24 #include "data/settings.h"
25 #include "libpspp/message.h"
26 #include "libpspp/assertion.h"
27 #include "libpspp/cast.h"
28 #include "math/chart-geometry.h"
31 #define _(msgid) gettext (msgid)
32 #define N_(msgid) msgid
35 #include "gl/xalloc.h"
38 histogram_add (struct histogram *h, double y, double c)
40 struct statistic *stat = &h->parent;
41 stat->accumulate (stat, NULL, c, 0, y);
45 acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y)
47 struct histogram *hist = UP_CAST (s, struct histogram, parent);
49 gsl_histogram_accumulate (hist->gsl_hist, y, c);
53 destroy (struct statistic *s)
55 struct histogram *h = UP_CAST (s, struct histogram, parent);
56 gsl_histogram_free (h->gsl_hist);
62 double get_slack (double limit, double half_bin_width, int *n_half_bins)
64 double ipart, remainder;
66 assert (half_bin_width > 0);
68 remainder = modf (limit / half_bin_width, &ipart);
70 /* In C modf and % behave in an unexpected (to me at any rate) manner
71 when presented with a negative value
73 For example, modf (-7.0 / 3.0) returns -2.0 R -0.3333
79 return remainder * half_bin_width;
83 /* This functions adjusts the upper and lower range of the histogram to make them fit BIN_WIDTH
84 MIN and MAX are the lowest and highest data to be plotted in the histogram.
85 ADJ_MIN and ADJ_MAX are locations of the adjusted values of MIN and MAX (the range will
86 always be equal or slightly larger).
87 Returns the number of bins.
89 The "testing_assert" expressions in this function should be algebraically correct.
90 However, due to floating point rounding they could fail, especially when small numbers
91 are involved. In normal use, therefore, testing_assert does nothing.
94 adjust_bin_ranges (double bin_width, double min, double max, double *adj_min, double *adj_max)
96 const double half_bin_width = bin_width / 2.0;
98 /* The lower and upper limits of the histogram, in units of half
100 int lower_limit, upper_limit;
102 double lower_slack = get_slack (min, half_bin_width, &lower_limit);
103 double upper_slack = -get_slack (max, half_bin_width, &upper_limit);
105 testing_assert (max > min);
107 /* If min is negative, then lower_slack may be less than zero.
108 In this case, the lower bound must be extended in the negative direction
109 so that it is less than OR EQUAL to min.
114 lower_slack += half_bin_width;
116 testing_assert (lower_limit * half_bin_width <= min);
118 /* However, the upper bound must be extended regardless, because histogram bins
119 span the range [lower, upper). In other words, the upper bound must be
123 upper_slack += half_bin_width;
124 testing_assert (upper_limit * half_bin_width > max);
126 /* The range must be an EVEN number of half bin_widths */
127 if ( (upper_limit - lower_limit) % 2)
129 /* Extend the range at the end which gives the least unused space */
130 if (upper_slack > lower_slack)
133 lower_slack += half_bin_width;
138 upper_slack += half_bin_width;
142 /* But the range should be aligned to an ODD number of
143 half bin widths, so that the labels are aesthetically pleasing ones.
144 Otherwise we are likely to get labels such as -3 -1 1 3 instead of -2 0 2 4
146 if ( lower_limit % 2 == 0)
148 /* If there is not enough slack at either end to perform a shift,
149 then we must extend the range so that there is. We must extend
150 by two half bin widths in order to preserve the EVEN condition
151 established above. Also, we extend on the end with the least
152 slack, in order to keep things as balanced as possible. */
153 if ( upper_slack > lower_slack && upper_slack <= half_bin_width)
156 lower_slack += 2 * half_bin_width;
159 if (lower_slack > upper_slack && lower_slack < half_bin_width)
162 upper_slack += 2 * half_bin_width;
165 if (upper_slack > lower_slack)
167 testing_assert (upper_slack > half_bin_width);
169 /* Adjust the range to the left */
172 upper_slack -= half_bin_width;
173 lower_slack += half_bin_width;
177 testing_assert (lower_slack >= half_bin_width);
179 /* Adjust the range to the right */
182 lower_slack -= half_bin_width;
183 upper_slack += half_bin_width;
187 /* If there are any completely empty bins, then remove them,
188 since empty bins don't really add much information to the histogram.
190 if (upper_slack > 2 * half_bin_width)
192 upper_slack -= 2 * half_bin_width;
196 if (lower_slack >= 2 * half_bin_width)
198 lower_slack -= 2 * half_bin_width;
202 *adj_min = lower_limit * half_bin_width;
203 *adj_max = upper_limit * half_bin_width;
205 testing_assert (*adj_max > max);
206 testing_assert (*adj_min <= min);
208 return (upper_limit - lower_limit) / 2.0;
214 histogram_create (double bin_width, double min, double max)
216 const int MAX_BINS = 25;
218 struct statistic *stat;
220 double adjusted_min, adjusted_max;
224 msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
228 assert (bin_width > 0);
230 bin_width = chart_rounded_tick (bin_width);
231 bins = adjust_bin_ranges (bin_width, min, max, &adjusted_min, &adjusted_max);
233 /* Force the number of bins to lie in a sensible range. */
236 bin_width = chart_rounded_tick ((max - min) / (double) (MAX_BINS - 1));
237 bins = adjust_bin_ranges (bin_width,
238 min, max, &adjusted_min, &adjusted_max);
241 /* Can this ever happen? */
245 h = xmalloc (sizeof *h);
247 h->gsl_hist = gsl_histogram_alloc (bins);
249 gsl_histogram_set_ranges_uniform (h->gsl_hist, adjusted_min, adjusted_max);
252 stat->accumulate = acc;
253 stat->destroy = destroy;