1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/histogram.h"
21 #include <gsl/gsl_histogram.h>
24 #include "data/settings.h"
25 #include "libpspp/message.h"
26 #include "libpspp/assertion.h"
27 #include "libpspp/cast.h"
28 #include "math/chart-geometry.h"
31 #define _(msgid) gettext (msgid)
32 #define N_(msgid) msgid
35 #include "gl/xalloc.h"
38 histogram_add (struct histogram *h, double y, double c)
40 gsl_histogram_accumulate (h->gsl_hist, y, c);
44 destroy (struct statistic *s)
46 struct histogram *h = UP_CAST (s, struct histogram, parent);
47 gsl_histogram_free (h->gsl_hist);
52 /* Find a bin width which is adapted to the scaling of the x axis
53 In the example here, the binwidth is half of the tick interval.
57 |....+....+....+. .+....|
61 This only works, when the min and max value for the histogram are adapted
62 such that (max-min) is a multiple of the binwidth. Then the location of the
63 first bin has to be aligned to the ticks.
66 hist_find_pretty_no_of_bins(double bin_width_in, double min, double max,
67 double *adjusted_min, double *adjusted_max)
69 double lower, interval;
74 chart_get_scale (max, min, &lower, &interval, &n_ticks);
76 if (bin_width_in >= 2 * interval)
78 binwidth = floor(bin_width_in/interval) * interval;
79 *adjusted_min = lower;
81 else if (bin_width_in >= 1.5 * interval)
83 binwidth = 1.5 * interval;
84 if (min < (lower + 0.5 * interval))
85 *adjusted_min = lower;
87 *adjusted_min = lower + 0.5 * interval;
89 else if (bin_width_in >= interval)
92 *adjusted_min = lower;
94 else if (bin_width_in >= (2.0/3.0 * interval))
96 binwidth = (2.0/3.0 * interval);
97 if (min >= lower + binwidth)
98 *adjusted_min = lower + binwidth;
100 *adjusted_min = lower;
105 for(i = 2; bin_width_in < interval/i; i++);
106 binwidth = interval/i;
107 *adjusted_min = floor((min - lower)/binwidth)*binwidth + lower;
110 nbins = ceil((max-*adjusted_min)/binwidth);
111 *adjusted_max = nbins*binwidth + *adjusted_min;
113 /* adjusted_max should never be smaller than max but if it is equal
114 then the gsl_histogram will not add the cases which have max value */
115 if (*adjusted_max <= max)
117 *adjusted_max += binwidth;
120 assert (*adjusted_min <= min);
127 histogram_create (double bin_width_in, double min, double max)
130 struct statistic *stat;
132 double adjusted_min, adjusted_max;
136 msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
140 assert (bin_width_in > 0);
142 bins = hist_find_pretty_no_of_bins(bin_width_in, min, max, &adjusted_min, &adjusted_max);
144 h = xmalloc (sizeof *h);
146 h->gsl_hist = gsl_histogram_alloc (bins);
148 /* The bin ranges could be computed with gsl_histogram_set_ranges_uniform,
149 but the number of bins is adapted to the ticks of the axis such that for example
150 data ranging from 1.0 to 7.0 with 6 bins will have bin limits at
151 2.0,3.0,4.0 and 5.0. Due to numerical accuracy the computed bin limits might
152 be 4.99999999 for a value which is expected to be 5.0. But the limits of
153 the histogram bins should be that what is expected from the displayed ticks.
154 Therefore the bin limits are computed from the rounded values which is similar
155 to the procedure at the chart_get_ticks_format. Actual bin limits should be
156 exactly what is displayed at the ticks.
157 But... I cannot reproduce the problem that I see with gsl_histogram_set_ranges_uniform
158 with the code below without rounding...
161 double *ranges = xmalloc (sizeof (double) * (bins + 1));
162 double interval = (adjusted_max - adjusted_min) / bins;
163 for (int i = 0; i < bins; i++)
164 ranges[i] = adjusted_min + interval * i;
165 ranges[bins] = adjusted_max;
166 gsl_histogram_set_ranges (h->gsl_hist, ranges, bins + 1);
171 stat->destroy = destroy;