X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ffrequencies.q;h=972898fb28541aaaf3055e323930235e4acbd244;hb=69b9f985eb77186153b7e1cb220fa1785dff582b;hp=9fcd9a18283ac5bed3cda2f7903fa5eb893d7a20;hpb=1fc57714adfb36009ad22e2a164263b90548bf11;p=pspp diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index 9fcd9a1828..972898fb28 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2007, 2009 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2007, 2009, 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -48,12 +48,10 @@ #include #include #include -#include +#include #include #include -#include -#include -#include +#include #include "freq.h" @@ -147,13 +145,14 @@ struct percentile double x2; /* The datum value >= the percentile */ int flag; int flag2; /* Set to 1 if this percentile value has been found */ + bool show; /* True to show this percentile in the statistics box. */ }; -static void add_percentile (double x) ; +static void add_percentile (double x, bool show); static struct percentile *percentiles; -static int n_percentiles; +static int n_percentiles, n_show_percentiles; /* Groups of statistics. */ #define BI BIT_INDEX @@ -207,6 +206,8 @@ struct freq_tab struct hsh_table *data; /* Undifferentiated data. */ struct freq_mutable *valid; /* Valid freqs. */ int n_valid; /* Number of total freqs. */ + const struct dictionary *dict; /* The dict from whence entries in the table + come */ struct freq_mutable *missing; /* Missing freqs. */ int n_missing; /* Number of missing freqs. */ @@ -298,6 +299,7 @@ internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds) int i; n_percentiles = 0; + n_show_percentiles = 0; percentiles = NULL; n_variables = 0; @@ -344,7 +346,7 @@ internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds) int pl; subc_list_double *ptl_list = &cmd.dl_percentiles[i]; for ( pl = 0 ; pl < subc_list_double_count(ptl_list); ++pl) - add_percentile (subc_list_double_at(ptl_list, pl) / 100.0 ); + add_percentile (subc_list_double_at(ptl_list, pl) / 100.0, true); } } if ( cmd.sbc_ntiles ) @@ -353,14 +355,14 @@ internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds) { int j; for (j = 0; j <= cmd.n_ntiles[i]; ++j ) - add_percentile (j / (double) cmd.n_ntiles[i]); + add_percentile (j / (double) cmd.n_ntiles[i], true); } } if (stats & BIT_INDEX (frq_median)) { /* Treat the median as the 50% percentile. We output it in the percentiles table as "50 (Median)." */ - add_percentile (0.5); + add_percentile (0.5, true); stats &= ~BIT_INDEX (frq_median); n_stats--; } @@ -601,7 +603,7 @@ postcalc (const struct dataset *ds) - if ( chart == GFT_HIST && var_is_numeric (v) ) + if ( chart == GFT_HIST && var_is_numeric (v) && ft->n_valid > 0) { double d[frq_n_stats]; struct histogram *hist ; @@ -610,14 +612,14 @@ postcalc (const struct dataset *ds) hist = freq_tab_to_hist (ft,v); - chart_submit (histogram_chart_create ( - hist, var_to_string(v), - vf->tab.valid_cases, - d[frq_mean], - d[frq_stddev], - normal)); + chart_item_submit (histogram_chart_create ( + hist->gsl_hist, var_to_string(v), + vf->tab.valid_cases, + d[frq_mean], + d[frq_stddev], + normal)); - statistic_destroy ((struct statistic *)hist); + statistic_destroy (&hist->parent); } if ( chart == GFT_PIE) @@ -756,6 +758,7 @@ frq_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_freque } vf = var_attach_aux (v, xmalloc (sizeof *vf), var_dtor_free); vf->tab.valid = vf->tab.missing = NULL; + vf->tab.dict = dataset_dict (ds); vf->n_groups = 0; vf->groups = NULL; vf->width = var_get_width (v); @@ -849,9 +852,10 @@ frq_custom_grouped (struct lexer *lexer, struct dataset *ds, struct cmd_frequenc } /* Adds X to the list of percentiles, keeping the list in proper - order. */ + order. If SHOW is true, the percentile will be shown in the statistics + box, otherwise it will be hidden. */ static void -add_percentile (double x) +add_percentile (double x, bool show) { int i; @@ -859,7 +863,14 @@ add_percentile (double x) { /* Do nothing if it's already in the list */ if ( fabs(x - percentiles[i].p) < DBL_EPSILON ) - return; + { + if (show && !percentiles[i].show) + { + n_show_percentiles++; + percentiles[i].show = true; + } + return; + } if (x < percentiles[i].p) break; @@ -871,7 +882,10 @@ add_percentile (double x) n_percentiles + 1, sizeof *percentiles); insert_element (percentiles, n_percentiles, sizeof *percentiles, i); percentiles[i].p = x; + percentiles[i].show = show; n_percentiles++; + if (show) + n_show_percentiles++; } } @@ -998,41 +1012,6 @@ compare_freq_alpha_d (const void *a_, const void *b_, const void *v_) /* Frequency table display. */ -struct full_dim_aux - { - bool show_labels; - }; - -/* Sets the widths of all the columns and heights of all the rows in - table T for driver D. */ -static void -full_dim (struct tab_rendering *r, void *aux_) -{ - const struct outp_driver *d = r->driver; - const struct tab_table *t = r->table; - const struct full_dim_aux *aux = aux_; - int i; - - for (i = 0; i < tab_nc (t); i++) - { - r->w[i] = tab_natural_width (r, i); - if (aux->show_labels && i == 0) - r->w[i] = MIN (r->w[i], d->prop_em_width * 15); - else - r->w[i] = MAX (r->w[i], d->prop_em_width * 8); - } - - for (i = 0; i < tab_nr (t); i++) - r->h[i] = d->font_height; -} - -static void -full_dim_free (void *aux_) -{ - struct full_dim_aux *aux = aux_; - free (aux); -} - /* Displays a full frequency table for variable V. */ static void dump_full (const struct variable *v, const struct variable *wv) @@ -1043,56 +1022,33 @@ dump_full (const struct variable *v, const struct variable *wv) struct freq_tab *ft; struct freq_mutable *f; struct tab_table *t; - int r; + int r, x; double cum_total = 0.0; double cum_freq = 0.0; - struct init - { - int c, r; - const char *s; - }; - - const struct init *p; - - static const struct init vec[] = - { - {4, 0, N_("Valid")}, - {5, 0, N_("Cum")}, - {1, 1, N_("Value")}, - {2, 1, N_("Frequency")}, - {3, 1, N_("Percent")}, - {4, 1, N_("Percent")}, - {5, 1, N_("Percent")}, - {0, 0, NULL}, - {1, 0, NULL}, - {2, 0, NULL}, - {3, 0, NULL}, - {-1, -1, NULL}, + static const char *headings[] = { + N_("Value"), + N_("Frequency"), + N_("Percent"), + N_("Valid Percent"), + N_("Cum Percent") }; const bool lab = (cmd.labels == FRQ_LABELS); - struct full_dim_aux *aux; - vf = get_var_freqs (v); ft = &vf->tab; n_categories = ft->n_valid + ft->n_missing; - t = tab_create (5 + lab, n_categories + 3, 0); - tab_headers (t, 0, 0, 2, 0); - - aux = xmalloc (sizeof *aux); - aux->show_labels = lab; - tab_dim (t, full_dim, full_dim_free, aux); + t = tab_create (5 + lab, n_categories + 2); + tab_headers (t, 0, 0, 1, 0); if (lab) - tab_text (t, 0, 1, TAB_CENTER | TAT_TITLE, _("Value Label")); + tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Value Label")); - for (p = vec; p->s; p++) - tab_text (t, lab ? p->c : p->c - 1, p->r, - TAB_CENTER | TAT_TITLE, gettext (p->s)); + for (x = 0; x < 5; x++) + tab_text (t, lab + x, 0, TAB_CENTER | TAT_TITLE, gettext (headings[x])); - r = 2; + r = 1; for (f = ft->valid; f < ft->missing; f++) { double percent, valid_percent; @@ -1110,7 +1066,7 @@ dump_full (const struct variable *v, const struct variable *wv) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, &f->value, &vf->print); + tab_value (t, 0 + lab, r, TAB_NONE, &f->value, ft->dict, &vf->print); tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt); tab_double (t, 2 + lab, r, TAB_NONE, percent, NULL); tab_double (t, 3 + lab, r, TAB_NONE, valid_percent, NULL); @@ -1128,7 +1084,7 @@ dump_full (const struct variable *v, const struct variable *wv) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, &f->value, &vf->print); + tab_value (t, 0 + lab, r, TAB_NONE, &f->value, ft->dict, &vf->print); tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt); tab_double (t, 2 + lab, r, TAB_NONE, f->count / ft->total_cases * 100.0, NULL); @@ -1139,7 +1095,7 @@ dump_full (const struct variable *v, const struct variable *wv) tab_box (t, TAL_1, TAL_1, cmd.spaces == FRQ_SINGLE ? -1 : TAL_GAP, TAL_1, 0, 0, 4 + lab, r); - tab_hline (t, TAL_2, 0, 4 + lab, 2); + tab_hline (t, TAL_2, 0, 4 + lab, 1); tab_hline (t, TAL_2, 0, 4 + lab, r); tab_joint_text (t, 0, r, 0 + lab, r, TAB_RIGHT | TAT_TITLE, _("Total")); tab_vline (t, TAL_0, 1, r, r); @@ -1151,31 +1107,6 @@ dump_full (const struct variable *v, const struct variable *wv) tab_submit (t); } -/* Sets the widths of all the columns and heights of all the rows in - table T for driver D. */ -static void -condensed_dim (struct tab_rendering *r, void *aux UNUSED) -{ - struct outp_driver *d = r->driver; - const struct tab_table *t = r->table; - - int cum_width = outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL); - int zeros_width = outp_string_width (d, "000", OUTP_PROPORTIONAL); - int max_width = MAX (cum_width, zeros_width); - - int i; - - for (i = 0; i < 2; i++) - { - r->w[i] = tab_natural_width (r, i); - r->w[i] = MAX (r->w[i], d->prop_em_width * 8); - } - for (i = 2; i < 4; i++) - r->w[i] = max_width; - for (i = 0; i < tab_nr (t); i++) - r->h[i] = d->font_height; -} - /* Display condensed frequency table for variable V. */ static void dump_condensed (const struct variable *v, const struct variable *wv) @@ -1192,7 +1123,7 @@ dump_condensed (const struct variable *v, const struct variable *wv) vf = get_var_freqs (v); ft = &vf->tab; n_categories = ft->n_valid + ft->n_missing; - t = tab_create (4, n_categories + 2, 0); + t = tab_create (4, n_categories + 2); tab_headers (t, 0, 0, 2, 0); tab_text (t, 0, 1, TAB_CENTER | TAT_TITLE, _("Value")); @@ -1200,7 +1131,6 @@ dump_condensed (const struct variable *v, const struct variable *wv) tab_text (t, 2, 1, TAB_CENTER | TAT_TITLE, _("Pct")); tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Cum")); tab_text (t, 3, 1, TAB_CENTER | TAT_TITLE, _("Pct")); - tab_dim (t, condensed_dim, NULL, NULL); r = 2; for (f = ft->valid; f < ft->missing; f++) @@ -1210,7 +1140,7 @@ dump_condensed (const struct variable *v, const struct variable *wv) percent = f->count / ft->total_cases * 100.0; cum_total += f->count / ft->valid_cases * 100.0; - tab_value (t, 0, r, TAB_NONE, &f->value, &vf->print); + tab_value (t, 0, r, TAB_NONE, &f->value, ft->dict, &vf->print); tab_double (t, 1, r, TAB_NONE, f->count, wfmt); tab_double (t, 2, r, TAB_NONE, percent, NULL); tab_double (t, 3, r, TAB_NONE, cum_total, NULL); @@ -1218,7 +1148,7 @@ dump_condensed (const struct variable *v, const struct variable *wv) } for (; f < &ft->valid[n_categories]; f++) { - tab_value (t, 0, r, TAB_NONE, &f->value, &vf->print); + tab_value (t, 0, r, TAB_NONE, &f->value, ft->dict, &vf->print); tab_double (t, 1, r, TAB_NONE, f->count, wfmt); tab_double (t, 2, r, TAB_NONE, f->count / ft->total_cases * 100.0, NULL); @@ -1230,14 +1160,13 @@ dump_condensed (const struct variable *v, const struct variable *wv) 0, 0, 3, r - 1); tab_hline (t, TAL_2, 0, 3, 2); tab_title (t, "%s", var_to_string (v)); - tab_columns (t, SOM_COL_DOWN); tab_submit (t); } /* Statistical display. */ -/* Calculates all the pertinent statistics for variable V, putting - them in array D[]. FIXME: This could be made much more optimal. */ +/* Calculates all the pertinent statistics for variable V, putting them in + array D[]. */ static void calc_stats (const struct variable *v, double d[frq_n_stats]) { @@ -1254,6 +1183,8 @@ calc_stats (const struct variable *v, double d[frq_n_stats]) /* Calculate percentiles. */ + assert (ft->n_valid > 0); + for (i = 0; i < n_percentiles; i++) { percentiles[i].flag = 0; @@ -1398,8 +1329,7 @@ dump_statistics (const struct variable *v, bool show_varname, } calc_stats (v, stat_value); - t = tab_create (3, n_stats + n_percentiles + 2, 0); - tab_dim (t, tab_natural_dimensions, NULL, NULL); + t = tab_create (3, n_stats + n_show_percentiles + 2); tab_box (t, TAL_1, TAL_1, -1, -1 , 0 , 0 , 2, tab_nr(t) - 1) ; @@ -1427,6 +1357,9 @@ dump_statistics (const struct variable *v, bool show_varname, for (i = 0; i < n_percentiles; i++, r++) { + if (!percentiles[i].show) + continue; + if ( i == 0 ) { tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Percentiles")); @@ -1440,11 +1373,8 @@ dump_statistics (const struct variable *v, bool show_varname, var_get_print_format (v)); } - tab_columns (t, SOM_COL_DOWN); if (show_varname) tab_title (t, "%s", var_to_string (v)); - else - tab_flags (t, SOMF_NO_TITLE); tab_submit (t); @@ -1459,8 +1389,8 @@ freq_tab_to_hist (const struct freq_tab *ft, const struct variable *var) double x_min = DBL_MAX; double x_max = -DBL_MAX; - struct statistic *hist; - const double bins = 11; + struct histogram *hist; + int bins; struct hsh_iterator hi; struct hsh_table *fh = ft->data; @@ -1476,15 +1406,20 @@ freq_tab_to_hist (const struct freq_tab *ft, const struct variable *var) if ( frq->value.f > x_max ) x_max = frq->value.f ; } + /* Sturges' formula. */ + bins = ceil (log (ft->valid_cases) / log (2) + 1); + if (bins < 5) + bins = 5; + hist = histogram_create (bins, x_min, x_max); for( i = 0 ; i < ft->n_valid ; ++i ) { frq = &ft->valid[i]; - histogram_add ((struct histogram *)hist, frq->value.f, frq->count); + histogram_add (hist, frq->value.f, frq->count); } - return (struct histogram *)hist; + return hist; } @@ -1533,7 +1468,7 @@ do_piechart(const struct variable *var, const struct freq_tab *frq_tab) slices = freq_tab_to_slice_array(frq_tab, var, &n_slices); - chart_submit (piechart_create (var_to_string(var), slices, n_slices)); + chart_item_submit (piechart_create (var_to_string(var), slices, n_slices)); for (i = 0 ; i < n_slices ; ++i ) ds_destroy (&slices[i].label);