From 379aeac5640a20a122990640fdd33bdfa592a3c1 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 4 Dec 2004 09:20:59 +0000 Subject: [PATCH] Added code to calculate the histogram limits --- po/pspp.pot | 2 +- src/ChangeLog | 6 ++++ src/chart.c | 5 +--- src/chart.h | 11 +++++++ src/factor_stats.c | 7 ++--- src/frequencies.q | 42 +++++++++++++-------------- src/histogram.c | 72 +++++++++++++++++++++++++--------------------- 7 files changed, 82 insertions(+), 63 deletions(-) diff --git a/po/pspp.pot b/po/pspp.pot index 0f5739fe..a74ef27a 100644 --- a/po/pspp.pot +++ b/po/pspp.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n" -"POT-Creation-Date: 2004-12-02 13:38+0800\n" +"POT-Creation-Date: 2004-12-02 19:27+0800\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" diff --git a/src/ChangeLog b/src/ChangeLog index 3ff99082..8ccbc7cc 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +Sat Dec 4 17:14:45 WST 2004 John Darrington + + * histogram.c chart.[ch] factor_stats.c frequencies.q + + Added code to calculate sensible histogram ranges and limits. + Thu Dec 2 13:37:43 WST 2004 John Darrington * chart.h Updated to reflect many API changes. diff --git a/src/chart.c b/src/chart.c index 3068f3e3..07111ad9 100644 --- a/src/chart.c +++ b/src/chart.c @@ -183,12 +183,9 @@ chart_finalise(struct chart *chart) -static double chart_rounded_tick(double tick); - - /* Adjust tick to be a sensible value ie: ... 0.1,0.2,0.5, 1,2,5, 10,20,50 ... */ -static double +double chart_rounded_tick(double tick) { diff --git a/src/chart.h b/src/chart.h index d22c394f..f2c9e501 100644 --- a/src/chart.h +++ b/src/chart.h @@ -77,6 +77,7 @@ int chart_initialise(struct chart *ch); void chart_finalise(struct chart *ch); +double chart_rounded_tick(double tick); void chart_write_xlabel(struct chart *ch, const char *label); void chart_write_ylabel(struct chart *ch, const char *label); @@ -118,10 +119,19 @@ struct normal_curve void histogram_write_legend(struct chart *ch, const struct normal_curve *norm); +/* Plot a gsl_histogram */ void histogram_plot(const gsl_histogram *hist, const char *factorname, const struct normal_curve *norm, short show_normal); +/* Create a gsl_histogram and set it's parameters based upon + x_min, x_max and bins. + The caller is responsible for freeing the histogram. +*/ +gsl_histogram * histogram_create(double bins, double x_min, double x_max) ; + + + struct slice { @@ -158,6 +168,7 @@ void chart_datum(struct chart *ch, int dataset, double x, double y); + enum CHART_DIM { CHART_DIM_X, diff --git a/src/factor_stats.c b/src/factor_stats.c index 2b9c48a6..2ba785a7 100644 --- a/src/factor_stats.c +++ b/src/factor_stats.c @@ -30,7 +30,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include #include #include - +#include void @@ -52,7 +52,7 @@ metrics_precalc(struct metrics *m) (hsh_free_func *) weighted_value_free, (void *) 0); - m->histogram = gsl_histogram_alloc(10); + } @@ -191,8 +191,7 @@ metrics_postcalc(struct metrics *m) m->trimmed_mean += (m->wvp[k1 + 1]->cc - tc) * m->wvp[k1 + 1]->v.f ; m->trimmed_mean /= 0.9 * m->n ; - - gsl_histogram_set_ranges_uniform(m->histogram, m->min, m->max); + m->histogram = histogram_create(10, m->min, m->max); for ( i = 0 ; i < m->n_data ; ++i ) { diff --git a/src/frequencies.q b/src/frequencies.q index 1484776a..acfeed86 100644 --- a/src/frequencies.q +++ b/src/frequencies.q @@ -279,7 +279,9 @@ static hsh_compare_func compare_freq_numeric_d, compare_freq_alpha_d; static void do_piechart(const struct variable *var, const struct freq_tab *frq_tab); -void freq_tab_to_hist(const struct freq_tab *ft, gsl_histogram *hist); +gsl_histogram * +freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var); + /* Parser and outline. */ @@ -629,16 +631,16 @@ postcalc (void *aux UNUSED) { double d[frq_n_stats]; struct normal_curve norm; + gsl_histogram *hist ; - gsl_histogram *hist = gsl_histogram_alloc(7); - norm.N = vf->tab.total_cases; + norm.N = vf->tab.valid_cases; calc_stats(v,d); norm.mean = d[frq_mean]; norm.stddev = d[frq_stddev]; - freq_tab_to_hist(ft, hist); + hist = freq_tab_to_hist(ft,v); histogram_plot(hist, var_to_string(v), &norm, normal); @@ -1537,33 +1539,32 @@ dump_statistics (struct variable *v, int show_varname) } - -/* Populate a gsl_histogram from a freq_tab */ -void -freq_tab_to_hist(const struct freq_tab *ft, gsl_histogram *hist) +/* Create a gsl_histogram from a freq_tab */ +gsl_histogram * +freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) { int i; double x_min = DBL_MAX; double x_max = -DBL_MAX; - + + gsl_histogram *hist; + const double bins = 11; + struct hsh_iterator hi; struct hsh_table *fh = ft->data; struct freq *frq; - gsl_histogram_reset(hist); - /* Find out the extremes of the x value */ - - for ( frq = hsh_first(fh, &hi); - frq != 0; - frq = hsh_next(fh, &hi) ) + for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) ) { + if ( is_missing(&frq->v, var)) + continue; + if ( frq->v.f < x_min ) x_min = frq->v.f ; if ( frq->v.f > x_max ) x_max = frq->v.f ; } - - gsl_histogram_set_ranges_uniform(hist, x_min, x_max); + hist = histogram_create(bins, x_min, x_max); for( i = 0 ; i < ft->n_valid ; ++i ) { @@ -1571,8 +1572,10 @@ freq_tab_to_hist(const struct freq_tab *ft, gsl_histogram *hist) gsl_histogram_accumulate(hist, frq->v.f, frq->c); } + return hist; } + static struct slice * freq_tab_to_slice_array(const struct freq_tab *frq_tab, const struct variable *var, @@ -1602,10 +1605,8 @@ freq_tab_to_slice_array(const struct freq_tab *frq_tab, slices[i].label = value_to_string(&frq->v, var); slices[i].magnetude = frq->c; - } - return slices; } @@ -1622,11 +1623,8 @@ do_piechart(const struct variable *var, const struct freq_tab *frq_tab) piechart_plot(var_to_string(var), slices, n_slices); free(slices); - } - - /* Local Variables: mode: c diff --git a/src/histogram.c b/src/histogram.c index 0d90edd5..9a3b9264 100644 --- a/src/histogram.c +++ b/src/histogram.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "hash.h" @@ -30,29 +31,6 @@ #include "var.h" #include "chart.h" -/* Number of bins in which to divide data */ -#define BINS 7 - -/* The approximate no of ticks on the y axis */ -#define YTICKS 10 - -#ifndef M_PI -#define M_PI ( 22.0 / 7.0 ) -#endif - - -static double gaussian(double x, double mu, double sigma ) ; - - -static double -gaussian(double x, double mu, double sigma ) -{ - return (exp( - (( x - mu )* (x - mu) / (2.0 * sigma * sigma) )) - / ( sigma * sqrt( M_PI * 2.0) )) ; -} - - - /* Write the legend of the chart */ void histogram_write_legend(struct chart *ch, const struct normal_curve *norm) @@ -116,7 +94,6 @@ hist_draw_bar(struct chart *ch, const gsl_histogram *hist, int bar) x_pos + width / 2.0, buf); } - } @@ -155,11 +132,13 @@ histogram_plot(const gsl_histogram *hist, double x_min, x_max, not_used ; double abscissa_scale ; double ordinate_scale ; - + double range ; gsl_histogram_get_range(hist, 0, &x_min, ¬_used); - gsl_histogram_get_range(hist, bins - 1, &x_max, ¬_used); - + range = not_used - x_min; + gsl_histogram_get_range(hist, bins - 1, ¬_used, &x_max); + assert(range == x_max - not_used); + abscissa_scale = (ch.data_right - ch.data_left) / (x_max - x_min); ordinate_scale = (ch.data_top - ch.data_bottom) / gsl_histogram_max_val(hist) ; @@ -170,11 +149,11 @@ histogram_plot(const gsl_histogram *hist, d += (ch.data_right - ch.data_left) / 100.0) { const double x = (d - ch.data_left) / abscissa_scale + x_min ; - - pl_fcont_r(ch.lp, d, - ch.data_bottom + norm->N * ordinate_scale * - gaussian(x, norm->mean, norm->stddev) - ); + const double y = norm->N * range * + gsl_ran_gaussian_pdf(x - norm->mean, norm->stddev); + + pl_fcont_r(ch.lp, d, ch.data_bottom + y * ordinate_scale); + } pl_endpath_r(ch.lp); @@ -182,3 +161,32 @@ histogram_plot(const gsl_histogram *hist, chart_finalise(&ch); } + +gsl_histogram * +histogram_create(double bins, double x_min, double x_max) +{ + int n; + double bin_width ; + double bin_width_2 ; + double upper_limit, lower_limit; + + gsl_histogram *hist = gsl_histogram_alloc(bins); + + + bin_width = chart_rounded_tick((x_max - x_min)/ bins); + bin_width_2 = bin_width / 2.0; + + n = ceil( x_max / (bin_width_2) ) ; + if ( ! (n % 2 ) ) n++; + upper_limit = n * bin_width_2; + + n = floor( x_min / (bin_width_2) ) ; + if ( ! (n % 2 ) ) n--; + lower_limit = n * bin_width_2; + + gsl_histogram_set_ranges_uniform(hist, lower_limit, upper_limit); + + + return hist; +} + -- 2.30.2