X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fhistogram.c;h=9158590dd75c3a140244292004087a773eddec3c;hb=50c7b5b389eb39accf65c8716f6a3f73b5b30aa6;hp=c50e9980df71d6349263b36817259bafce892b5b;hpb=164d1274fcb70c54897f2a03fc7c27152ed4821a;p=pspp diff --git a/src/math/histogram.c b/src/math/histogram.c index c50e9980df..9158590dd7 100644 --- a/src/math/histogram.c +++ b/src/math/histogram.c @@ -1,52 +1,156 @@ -/* PSPP - computes sample statistics. - Copyright (C) 2004 Free Software Foundation, Inc. +/* PSPP - a program for statistical analysis. + Copyright (C) 2004, 2008, 2009, 2011, 2012 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include -#include + +#include "math/histogram.h" + #include -#include -#include "histogram.h" -#include "chart-geometry.h" +#include + +#include "data/settings.h" +#include "libpspp/message.h" +#include "libpspp/assertion.h" +#include "libpspp/cast.h" +#include "math/chart-geometry.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + + +#include "gl/xalloc.h" +void +histogram_add (struct histogram *h, double y, double c) +{ + struct statistic *stat = &h->parent; + stat->accumulate (stat, NULL, c, 0, y); +} + +static void +acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y) +{ + struct histogram *hist = UP_CAST (s, struct histogram, parent); -gsl_histogram * -histogram_create(double bins, double x_min, double x_max) + gsl_histogram_accumulate (hist->gsl_hist, y, c); +} + +static void +destroy (struct statistic *s) { - int n; - double bin_width ; - double bin_width_2 ; - double upper_limit, lower_limit; + struct histogram *h = UP_CAST (s, struct histogram, parent); + gsl_histogram_free (h->gsl_hist); + free (s); +} + + +/* Find a bin width which is adapted to the scaling of the x axis +In the example here, the binwidth is half of the tick interval. + + binwidth + > < + |....+....+....+. .+....| + LOWER 1 2 3 N_TICKS + ^LOWDBL ^HIGHDBL + +This only works, when the min and max value for the histogram are adapted +such that (max-min) is a multiple of the binwidth. Then the location of the +first bin has to be aligned to the ticks. +*/ +static int +hist_find_pretty_no_of_bins(double bin_width_in, double min, double max, + double *adjusted_min, double *adjusted_max) +{ + double lower, interval; + int n_ticks; + double binwidth; + int nbins; + + chart_get_scale (max, min, &lower, &interval, &n_ticks); + + if (bin_width_in >= 2 * interval) + { + binwidth = floor(bin_width_in/interval) * interval; + *adjusted_min = lower; + } + else if (bin_width_in >= 1.5 * interval) + { + binwidth = 1.5 * interval; + if (min < (lower + 0.5 * interval)) + *adjusted_min = lower; + else + *adjusted_min = lower + 0.5 * interval; + } + else if (bin_width_in >= interval) + { + binwidth = interval; + *adjusted_min = lower; + } + else if (bin_width_in >= (2.0/3.0 * interval)) + { + binwidth = (2.0/3.0 * interval); + if (min >= lower + binwidth) + *adjusted_min = lower + binwidth; + else + *adjusted_min = lower; + } + else + { + int i; + for(i = 2; bin_width_in < interval/i; i++); + binwidth = interval/i; + *adjusted_min = floor((min - lower)/binwidth)*binwidth + lower; + } + + nbins = ceil((max-*adjusted_min)/binwidth); + *adjusted_max = nbins*binwidth + *adjusted_min; + + return nbins; +} + + +struct histogram * +histogram_create (double bin_width_in, double min, double max) +{ + struct histogram *h; + struct statistic *stat; + int bins; + double adjusted_min, adjusted_max; + + if (max == min) + { + msg (MW, _("Not creating histogram because the data contains less than 2 distinct values")); + return NULL; + } + + assert (bin_width_in > 0); + + bins = hist_find_pretty_no_of_bins(bin_width_in, min, max, &adjusted_min, &adjusted_max); - gsl_histogram *hist = gsl_histogram_alloc(bins); + h = xmalloc (sizeof *h); - bin_width = chart_rounded_tick((x_max - x_min)/ bins); - bin_width_2 = bin_width / 2.0; - - n = ceil( x_max / (bin_width_2) ) ; - if ( ! (n % 2 ) ) n++; - upper_limit = n * bin_width_2; + h->gsl_hist = gsl_histogram_alloc (bins); - n = floor( x_min / (bin_width_2) ) ; - if ( ! (n % 2 ) ) n--; - lower_limit = n * bin_width_2; + gsl_histogram_set_ranges_uniform (h->gsl_hist, adjusted_min, adjusted_max); - gsl_histogram_set_ranges_uniform(hist, lower_limit, upper_limit); + stat = &h->parent; + stat->accumulate = acc; + stat->destroy = destroy; - return hist; + return h; }