1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/histogram.h"
21 #include <gsl/gsl_histogram.h>
24 #include "libpspp/message.h"
25 #include "libpspp/assertion.h"
26 #include "libpspp/cast.h"
27 #include "math/chart-geometry.h"
30 #define _(msgid) gettext (msgid)
31 #define N_(msgid) msgid
34 #include "gl/xalloc.h"
37 histogram_add (struct histogram *h, double y, double c)
39 struct statistic *stat = &h->parent;
40 stat->accumulate (stat, NULL, c, 0, y);
44 acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y)
46 struct histogram *hist = UP_CAST (s, struct histogram, parent);
48 gsl_histogram_accumulate (hist->gsl_hist, y, c);
52 destroy (struct statistic *s)
54 struct histogram *h = UP_CAST (s, struct histogram, parent);
55 gsl_histogram_free (h->gsl_hist);
61 histogram_create (double bin_width, double min, double max)
64 struct histogram *h = xmalloc (sizeof *h);
65 struct statistic *stat = &h->parent;
67 const double half_bin_width = bin_width / 2.0;
69 /* The lower and upper limits of the histogram, in units of half
71 int lower_limit, upper_limit;
73 /* -1 if the lower end of the range contains more unused space
79 double lower_remainder = fabs (modf (min / half_bin_width, &ll));
80 double upper_remainder = fabs (modf (max / half_bin_width, &ul));
85 msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
94 /* If the minimum value is not aligned on a half bin width,
95 then the lower bound must be extended so that the histogram range includes it. */
96 if (lower_remainder > 0)
99 /* However, the upper bound must be extended regardless, because histogram bins
100 span the range [lower, upper) */
101 upper_limit = ul + 1;
103 /* So, in the case of the maximum value coinciding with a half bin width,
104 the upper end will be the sparse end (because is got extended by a complete
105 half bin width). In other cases, it depends which got the bigger extension. */
106 if (upper_remainder == 0)
109 sparse_end = lower_remainder < upper_remainder ? -1 : +1;
111 /* The range must be an EVEN number of half bin_widths */
112 if ( (upper_limit - lower_limit) % 2)
114 /* Extend the range at the end which gives the least unused space */
115 if (sparse_end == +1)
120 /* Now the other end has more space */
124 /* But the range should be aligned to an ODD number of
125 half bin widths, so that the labels are aesthetically pleasing ones.
126 Otherwise we are likely to get labels such as -3 -1 1 3 instead of -2 0 2 4
128 if ( lower_limit % 2 == 0)
130 /* Shift the range away from the sparse end, EXCEPT if that is the upper end,
131 and it was extended to prevent the maximum value from getting lost */
132 if (sparse_end == +1 && upper_remainder > 0)
143 bins = (upper_limit - lower_limit) / 2.0;
145 /* Force the number of bins to lie in a sensible range.
146 FIXME: We should redo the above calculations if this happens! */
153 h->gsl_hist = gsl_histogram_alloc (bins);
155 gsl_histogram_set_ranges_uniform (h->gsl_hist,
156 lower_limit * half_bin_width,
157 upper_limit * half_bin_width);
159 stat->accumulate = acc;
160 stat->destroy = destroy;