X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Fhistogram.c;h=264b0785351b99c9da8c3397294a0ef1bb1f716f;hb=f7870951c8cbadda63214d99cd2bbcf8abd58662;hp=e46986d2e0dff4ef6ba4a445cbc9c12aa34b7d0b;hpb=ca4012bcf0f8790ceb8539b55bbc296d0802d5d7;p=pspp diff --git a/src/math/histogram.c b/src/math/histogram.c index e46986d2e0..264b078535 100644 --- a/src/math/histogram.c +++ b/src/math/histogram.c @@ -37,25 +37,7 @@ void histogram_add (struct histogram *h, double y, double c) { - struct statistic *stat = &h->parent; - stat->accumulate (stat, NULL, c, 0, y); -} - -static void -acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y) -{ - struct histogram *hist = UP_CAST (s, struct histogram, parent); - gsl_histogram *gslh = hist->gsl_hist; - - /* Include cases which are just on the boundary */ - if (y == gsl_histogram_max (gslh)) - { - double lower, upper; - gsl_histogram_get_range (gslh, gsl_histogram_bins (gslh)-1, &lower, &upper); - gsl_histogram_accumulate (gslh, lower + (upper - lower)/2.0, c); - } - else - gsl_histogram_accumulate (hist->gsl_hist, y, c); + gsl_histogram_accumulate (h->gsl_hist, y, c); } static void @@ -128,6 +110,15 @@ hist_find_pretty_no_of_bins(double bin_width_in, double min, double max, nbins = ceil((max-*adjusted_min)/binwidth); *adjusted_max = nbins*binwidth + *adjusted_min; + /* adjusted_max should never be smaller than max but if it is equal + then the gsl_histogram will not add the cases which have max value */ + if (*adjusted_max <= max) + { + *adjusted_max += binwidth; + nbins++; + } + assert (*adjusted_min <= min); + return nbins; } @@ -154,10 +145,29 @@ histogram_create (double bin_width_in, double min, double max) h->gsl_hist = gsl_histogram_alloc (bins); - gsl_histogram_set_ranges_uniform (h->gsl_hist, adjusted_min, adjusted_max); + /* The bin ranges could be computed with gsl_histogram_set_ranges_uniform, + but the number of bins is adapted to the ticks of the axis such that for example + data ranging from 1.0 to 7.0 with 6 bins will have bin limits at + 2.0,3.0,4.0 and 5.0. Due to numerical accuracy the computed bin limits might + be 4.99999999 for a value which is expected to be 5.0. But the limits of + the histogram bins should be that what is expected from the displayed ticks. + Therefore the bin limits are computed from the rounded values which is similar + to the procedure at the chart_get_ticks_format. Actual bin limits should be + exactly what is displayed at the ticks. + But... I cannot reproduce the problem that I see with gsl_histogram_set_ranges_uniform + with the code below without rounding... + */ + { + double *ranges = xmalloc (sizeof (double) * (bins + 1)); + double interval = (adjusted_max - adjusted_min) / bins; + for (int i = 0; i < bins; i++) + ranges[i] = adjusted_min + interval * i; + ranges[bins] = adjusted_max; + gsl_histogram_set_ranges (h->gsl_hist, ranges, bins + 1); + free (ranges); + } stat = &h->parent; - stat->accumulate = acc; stat->destroy = destroy; return h;