1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/histogram.h"
21 #include <gsl/gsl_histogram.h>
24 #include "libpspp/message.h"
25 #include "libpspp/assertion.h"
26 #include "libpspp/cast.h"
27 #include "math/chart-geometry.h"
30 #define _(msgid) gettext (msgid)
31 #define N_(msgid) msgid
34 #include "gl/xalloc.h"
37 histogram_add (struct histogram *h, double y, double c)
39 struct statistic *stat = &h->parent;
40 stat->accumulate (stat, NULL, c, 0, y);
44 acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y)
46 struct histogram *hist = UP_CAST (s, struct histogram, parent);
48 gsl_histogram_accumulate (hist->gsl_hist, y, c);
52 destroy (struct statistic *s)
54 struct histogram *h = UP_CAST (s, struct histogram, parent);
55 gsl_histogram_free (h->gsl_hist);
60 /* This functions adjusts the upper and lower range of the histogram to make them fit BIN_WIDTH
61 MIN and MAX are the lowest and highest data to be plotted in the histogram.
62 ADJ_MIN and ADJ_MAX are locations of the adjusted values of MIN and MAX (the range will always be
63 equal or slightly larger).
64 Returns the number of bins.
67 adjust_bin_ranges (double bin_width, double min, double max, double *adj_min, double *adj_max)
69 const double half_bin_width = bin_width / 2.0;
71 /* The lower and upper limits of the histogram, in units of half
73 int lower_limit, upper_limit;
75 /* -1 if the lower end of the range contains more unused space
81 double lower_remainder = fabs (modf (min / half_bin_width, &ll));
82 double upper_remainder = fabs (modf (max / half_bin_width, &ul));
89 /* If the minimum value is not aligned on a half bin width,
90 then the lower bound must be extended so that the histogram range includes it. */
91 if (lower_remainder > 0)
94 /* However, the upper bound must be extended regardless, because histogram bins
95 span the range [lower, upper) */
98 /* So, in the case of the maximum value coinciding with a half bin width,
99 the upper end will be the sparse end (because is got extended by a complete
100 half bin width). In other cases, it depends which got the bigger extension. */
101 if (upper_remainder == 0)
104 sparse_end = lower_remainder < upper_remainder ? -1 : +1;
106 /* The range must be an EVEN number of half bin_widths */
107 if ( (upper_limit - lower_limit) % 2)
109 /* Extend the range at the end which gives the least unused space */
110 if (sparse_end == +1)
115 /* Now the other end has more space */
119 /* But the range should be aligned to an ODD number of
120 half bin widths, so that the labels are aesthetically pleasing ones.
121 Otherwise we are likely to get labels such as -3 -1 1 3 instead of -2 0 2 4
123 if ( lower_limit % 2 == 0)
125 /* Shift the range away from the sparse end, EXCEPT if that is the upper end,
126 and it was extended to prevent the maximum value from getting lost */
127 if (sparse_end == +1 && upper_remainder > 0)
139 *adj_min = lower_limit * half_bin_width;
140 *adj_max = upper_limit * half_bin_width;
142 assert (*adj_max >= max);
143 assert (*adj_min <= min);
145 return (upper_limit - lower_limit) / 2.0;
151 histogram_create (double bin_width, double min, double max)
153 const int MAX_BINS = 25;
155 struct statistic *stat;
157 double adjusted_min, adjusted_max;
159 assert (bin_width > 0);
163 msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
167 bins = adjust_bin_ranges (bin_width, min, max, &adjusted_min, &adjusted_max);
169 /* Force the number of bins to lie in a sensible range. */
172 bins = adjust_bin_ranges ((max - min) / (double) (MAX_BINS - 1),
173 min, max, &adjusted_min, &adjusted_max);
176 /* Can this ever happen? */
180 h = xmalloc (sizeof *h);
182 h->gsl_hist = gsl_histogram_alloc (bins);
184 gsl_histogram_set_ranges_uniform (h->gsl_hist, adjusted_min, adjusted_max);
187 stat->accumulate = acc;
188 stat->destroy = destroy;