1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/histogram.h"
21 #include <gsl/gsl_histogram.h>
24 #include "libpspp/message.h"
25 #include "libpspp/assertion.h"
26 #include "libpspp/cast.h"
27 #include "math/chart-geometry.h"
30 #define _(msgid) gettext (msgid)
31 #define N_(msgid) msgid
34 #include "gl/xalloc.h"
37 histogram_add (struct histogram *h, double y, double c)
39 struct statistic *stat = &h->parent;
40 stat->accumulate (stat, NULL, c, 0, y);
44 acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y)
46 struct histogram *hist = UP_CAST (s, struct histogram, parent);
48 gsl_histogram_accumulate (hist->gsl_hist, y, c);
52 destroy (struct statistic *s)
54 struct histogram *h = UP_CAST (s, struct histogram, parent);
55 gsl_histogram_free (h->gsl_hist);
61 double get_slack (double limit, double half_bin_width, int *n_half_bins)
63 double ipart, remainder;
65 assert (half_bin_width > 0);
67 remainder = modf (limit / half_bin_width, &ipart);
69 /* In C modf and % behave in an unexpected (to me at any rate) manner
70 when presented with a negative value
72 For example, modf (-7.0 / 3.0) returns -2.0 R -0.3333
78 return remainder * half_bin_width;
82 /* This functions adjusts the upper and lower range of the histogram to make them fit BIN_WIDTH
83 MIN and MAX are the lowest and highest data to be plotted in the histogram.
84 ADJ_MIN and ADJ_MAX are locations of the adjusted values of MIN and MAX (the range will
85 always be equal or slightly larger).
86 Returns the number of bins.
89 adjust_bin_ranges (double bin_width, double min, double max, double *adj_min, double *adj_max)
91 const double half_bin_width = bin_width / 2.0;
93 /* The lower and upper limits of the histogram, in units of half
95 int lower_limit, upper_limit;
97 double lower_slack = get_slack (min, half_bin_width, &lower_limit);
98 double upper_slack = -get_slack (max, half_bin_width, &upper_limit);
102 /* If min is negative, then lower_slack may be less than zero.
103 In this case, the lower bound must be extended in the negative direction
104 so that it is less than OR EQUAL to min.
109 lower_slack += half_bin_width;
111 assert (lower_limit * half_bin_width <= min);
113 /* However, the upper bound must be extended regardless, because histogram bins
114 span the range [lower, upper). In other words, the upper bound must be
118 upper_slack += half_bin_width;
119 assert (upper_limit * half_bin_width > max);
121 /* The range must be an EVEN number of half bin_widths */
122 if ( (upper_limit - lower_limit) % 2)
124 /* Extend the range at the end which gives the least unused space */
125 if (upper_slack > lower_slack)
128 lower_slack += half_bin_width;
133 upper_slack += half_bin_width;
137 /* But the range should be aligned to an ODD number of
138 half bin widths, so that the labels are aesthetically pleasing ones.
139 Otherwise we are likely to get labels such as -3 -1 1 3 instead of -2 0 2 4
141 if ( lower_limit % 2 == 0)
143 if (upper_slack > lower_slack && upper_slack > half_bin_width)
145 /* Adjust the range to the left */
148 upper_slack -= half_bin_width;
149 lower_slack += half_bin_width;
151 else if (lower_slack > upper_slack && lower_slack >= half_bin_width)
153 /* Adjust the range to the right */
156 lower_slack -= half_bin_width;
157 upper_slack += half_bin_width;
161 /* In this case, we cannot adjust in either direction.
162 To get the most pleasing alignment, we would have to change
163 the bin width (which would have other visual disadvantages).
168 /* If there are any completely empty bins, then remove them,
169 since empty bins don't really add much information to the histogram.
171 if (upper_slack > 2 * half_bin_width)
173 upper_slack -= 2 * half_bin_width;
177 if (lower_slack >= 2 * half_bin_width)
179 lower_slack -= 2 * half_bin_width;
183 *adj_min = lower_limit * half_bin_width;
184 *adj_max = upper_limit * half_bin_width;
186 assert (*adj_max > max);
187 assert (*adj_min <= min);
189 return (upper_limit - lower_limit) / 2.0;
195 histogram_create (double bin_width, double min, double max)
197 const int MAX_BINS = 25;
199 struct statistic *stat;
201 double adjusted_min, adjusted_max;
205 msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
209 assert (bin_width > 0);
211 bins = adjust_bin_ranges (bin_width, min, max, &adjusted_min, &adjusted_max);
213 /* Force the number of bins to lie in a sensible range. */
216 bins = adjust_bin_ranges ((max - min) / (double) (MAX_BINS - 1),
217 min, max, &adjusted_min, &adjusted_max);
220 /* Can this ever happen? */
224 h = xmalloc (sizeof *h);
226 h->gsl_hist = gsl_histogram_alloc (bins);
228 gsl_histogram_set_ranges_uniform (h->gsl_hist, adjusted_min, adjusted_max);
231 stat->accumulate = acc;
232 stat->destroy = destroy;