#include <gsl/gsl_histogram.h>
#include <math.h>
+#include "data/settings.h"
#include "libpspp/message.h"
#include "libpspp/assertion.h"
#include "libpspp/cast.h"
}
-struct histogram *
-histogram_create (double bin_width, double min, double max)
-{
- int bins;
- struct histogram *h = xmalloc (sizeof *h);
- struct statistic *stat = &h->parent;
-
- const double half_bin_width = bin_width / 2.0;
+/* Find a bin width which is adapted to the scaling of the x axis
+In the example here, the binwidth is half of the tick interval.
- /* The lower and upper limits of the histogram, in units of half
- bin widths */
- int lower_limit, upper_limit;
+ binwidth
+ > <
+ |....+....+....+. .+....|
+ LOWER 1 2 3 N_TICKS
+ ^LOWDBL ^HIGHDBL
- /* -1 if the lower end of the range contains more unused space
- than the upper end.
- +1 otherwise. */
- short sparse_end = 0;
-
- double ul, ll;
- double lower_remainder = fabs (modf (min / half_bin_width, &ll));
- double upper_remainder = fabs (modf (max / half_bin_width, &ul));
+This only works, when the min and max value for the histogram are adapted
+such that (max-min) is a multiple of the binwidth. Then the location of the
+first bin has to be aligned to the ticks.
+*/
+static int
+hist_find_pretty_no_of_bins(double bin_width_in, double min, double max,
+ double *adjusted_min, double *adjusted_max)
+{
+ double lower, interval;
+ int n_ticks;
+ double binwidth;
+ int nbins;
+ chart_get_scale (max, min, &lower, &interval, &n_ticks);
- if (max == min)
+ if (bin_width_in >= 2 * interval)
{
- msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
- free (h);
- return NULL;
+ binwidth = floor(bin_width_in/interval) * interval;
+ *adjusted_min = lower;
+ }
+ else if (bin_width_in >= 1.5 * interval)
+ {
+ binwidth = 1.5 * interval;
+ if (min < (lower + 0.5 * interval))
+ *adjusted_min = lower;
+ else
+ *adjusted_min = lower + 0.5 * interval;
+ }
+ else if (bin_width_in >= interval)
+ {
+ binwidth = interval;
+ *adjusted_min = lower;
+ }
+ else if (bin_width_in >= (2.0/3.0 * interval))
+ {
+ binwidth = (2.0/3.0 * interval);
+ if (min >= lower + binwidth)
+ *adjusted_min = lower + binwidth;
+ else
+ *adjusted_min = lower;
+ }
+ else
+ {
+ int i;
+ for(i = 2; bin_width_in < interval/i; i++);
+ binwidth = interval/i;
+ *adjusted_min = floor((min - lower)/binwidth)*binwidth + lower;
}
- assert (max > min);
-
- lower_limit = ll;
+ nbins = ceil((max-*adjusted_min)/binwidth);
+ *adjusted_max = nbins*binwidth + *adjusted_min;
- /* If the minimum value is not aligned on a half bin width,
- then the lower bound must be extended so that the histogram range includes it. */
- if (lower_remainder > 0)
- lower_limit--;
+ return nbins;
+}
- /* However, the upper bound must be extended regardless, because histogram bins
- span the range [lower, upper) */
- upper_limit = ul + 1;
- /* So, in the case of the maximum value coinciding with a half bin width,
- the upper end will be the sparse end (because is got extended by a complete
- half bin width). In other cases, it depends which got the bigger extension. */
- if (upper_remainder == 0)
- sparse_end = +1;
- else
- sparse_end = lower_remainder < upper_remainder ? -1 : +1;
+struct histogram *
+histogram_create (double bin_width_in, double min, double max)
+{
+ struct histogram *h;
+ struct statistic *stat;
+ int bins;
+ double adjusted_min, adjusted_max;
- /* The range must be an EVEN number of half bin_widths */
- if ( (upper_limit - lower_limit) % 2)
+ if (max == min)
{
- /* Extend the range at the end which gives the least unused space */
- if (sparse_end == +1)
- lower_limit--;
- else
- upper_limit++;
-
- /* Now the other end has more space */
- sparse_end *= -1;
+ msg (MW, _("Not creating histogram because the data contains less than 2 distinct values"));
+ return NULL;
}
- /* But the range should be aligned to an ODD number of
- half bin widths, so that the labels are aesthetically pleasing ones.
- Otherwise we are likely to get labels such as -3 -1 1 3 instead of -2 0 2 4
- */
- if ( lower_limit % 2 == 0)
- {
- /* Shift the range away from the sparse end, EXCEPT if that is the upper end,
- and it was extended to prevent the maximum value from getting lost */
- if (sparse_end == +1 && upper_remainder > 0)
- {
- lower_limit --;
- upper_limit --;
- }
- else
- {
- lower_limit ++;
- upper_limit ++;
- }
- }
- bins = (upper_limit - lower_limit) / 2.0;
+ assert (bin_width_in > 0);
- /* Force the number of bins to lie in a sensible range.
- FIXME: We should redo the above calculations if this happens! */
- if (bins > 25)
- bins = 25;
+ bins = hist_find_pretty_no_of_bins(bin_width_in, min, max, &adjusted_min, &adjusted_max);
- if (bins < 1)
- bins = 1;
+ h = xmalloc (sizeof *h);
h->gsl_hist = gsl_histogram_alloc (bins);
- gsl_histogram_set_ranges_uniform (h->gsl_hist,
- lower_limit * half_bin_width,
- upper_limit * half_bin_width);
+ gsl_histogram_set_ranges_uniform (h->gsl_hist, adjusted_min, adjusted_max);
+ stat = &h->parent;
stat->accumulate = acc;
stat->destroy = destroy;