1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/histogram.h"
21 #include <gsl/gsl_histogram.h>
24 #include "libpspp/message.h"
25 #include "libpspp/assertion.h"
26 #include "libpspp/cast.h"
27 #include "math/chart-geometry.h"
30 #define _(msgid) gettext (msgid)
31 #define N_(msgid) msgid
34 #include "gl/xalloc.h"
37 histogram_add (struct histogram *h, double y, double c)
39 struct statistic *stat = &h->parent;
40 stat->accumulate (stat, NULL, c, 0, y);
44 acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y)
46 struct histogram *hist = UP_CAST (s, struct histogram, parent);
48 gsl_histogram_accumulate (hist->gsl_hist, y, c);
52 destroy (struct statistic *s)
54 struct histogram *h = UP_CAST (s, struct histogram, parent);
55 gsl_histogram_free (h->gsl_hist);
61 double get_slack (double limit, double half_bin_width, int *n_half_bins)
63 double ipart, remainder;
65 assert (half_bin_width > 0);
67 remainder = modf (limit / half_bin_width, &ipart);
69 /* In C modf and % behave in an unexpected (to me at any rate) manner
70 when presented with a negative value
72 For example, modf (-7.0 / 3.0) returns -2.0 R -0.3333
78 return remainder * half_bin_width;
82 /* This functions adjusts the upper and lower range of the histogram to make them fit BIN_WIDTH
83 MIN and MAX are the lowest and highest data to be plotted in the histogram.
84 ADJ_MIN and ADJ_MAX are locations of the adjusted values of MIN and MAX (the range will
85 always be equal or slightly larger).
86 Returns the number of bins.
89 adjust_bin_ranges (double bin_width, double min, double max, double *adj_min, double *adj_max)
91 const double half_bin_width = bin_width / 2.0;
93 /* The lower and upper limits of the histogram, in units of half
95 int lower_limit, upper_limit;
97 double lower_slack = get_slack (min, half_bin_width, &lower_limit);
98 double upper_slack = -get_slack (max, half_bin_width, &upper_limit);
102 /* If min is negative, then lower_slack may be less than zero.
103 In this case, the lower bound must be extended in the negative direction
104 so that it is less than OR EQUAL to min.
109 lower_slack += half_bin_width;
111 assert (lower_limit * half_bin_width <= min);
113 /* However, the upper bound must be extended regardless, because histogram bins
114 span the range [lower, upper). In other words, the upper bound must be
118 upper_slack += half_bin_width;
119 assert (upper_limit * half_bin_width > max);
121 /* The range must be an EVEN number of half bin_widths */
122 if ( (upper_limit - lower_limit) % 2)
124 /* Extend the range at the end which gives the least unused space */
125 if (upper_slack > lower_slack)
128 lower_slack += half_bin_width;
133 upper_slack += half_bin_width;
137 /* But the range should be aligned to an ODD number of
138 half bin widths, so that the labels are aesthetically pleasing ones.
139 Otherwise we are likely to get labels such as -3 -1 1 3 instead of -2 0 2 4
141 if ( lower_limit % 2 == 0)
143 /* If there is not enough slack at either end to perform a shift,
144 then we must extend the range so that there is. We must extend
145 by two half bin widths in order to preserve the EVEN condition
146 established above. Also, we extend on the end with the least
147 slack, in order to keep things as balanced as possible. */
148 if ( upper_slack > lower_slack && upper_slack <= half_bin_width)
151 lower_slack += 2 * half_bin_width;
154 if (lower_slack > upper_slack && lower_slack < half_bin_width)
157 upper_slack += 2 * half_bin_width;
160 if (upper_slack > lower_slack)
162 assert (upper_slack > half_bin_width);
164 /* Adjust the range to the left */
167 upper_slack -= half_bin_width;
168 lower_slack += half_bin_width;
172 assert (lower_slack >= half_bin_width);
174 /* Adjust the range to the right */
177 lower_slack -= half_bin_width;
178 upper_slack += half_bin_width;
182 /* If there are any completely empty bins, then remove them,
183 since empty bins don't really add much information to the histogram.
185 if (upper_slack > 2 * half_bin_width)
187 upper_slack -= 2 * half_bin_width;
191 if (lower_slack >= 2 * half_bin_width)
193 lower_slack -= 2 * half_bin_width;
197 *adj_min = lower_limit * half_bin_width;
198 *adj_max = upper_limit * half_bin_width;
200 assert (*adj_max > max);
201 assert (*adj_min <= min);
203 return (upper_limit - lower_limit) / 2.0;
209 histogram_create (double bin_width, double min, double max)
211 const int MAX_BINS = 25;
213 struct statistic *stat;
215 double adjusted_min, adjusted_max;
219 msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
223 assert (bin_width > 0);
225 bin_width = chart_rounded_tick (bin_width);
226 bins = adjust_bin_ranges (bin_width, min, max, &adjusted_min, &adjusted_max);
228 /* Force the number of bins to lie in a sensible range. */
231 bin_width = chart_rounded_tick ((max - min) / (double) (MAX_BINS - 1));
232 bins = adjust_bin_ranges (bin_width,
233 min, max, &adjusted_min, &adjusted_max);
236 /* Can this ever happen? */
240 h = xmalloc (sizeof *h);
242 h->gsl_hist = gsl_histogram_alloc (bins);
244 gsl_histogram_set_ranges_uniform (h->gsl_hist, adjusted_min, adjusted_max);
247 stat->accumulate = acc;
248 stat->destroy = destroy;