X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ffrequencies.c;h=736b6ae55599f4c6340fbce94e10d8e74c4206cf;hb=29917c4f5908454803e663d2ad78bca4bc35e805;hp=dc6eede8809629b61cfeb1d2551708b0a4365308;hpb=18e3610ace8a5286d301e2f8ebd62bb579b2ea96;p=pspp diff --git a/src/language/stats/frequencies.c b/src/language/stats/frequencies.c index dc6eede880..736b6ae555 100644 --- a/src/language/stats/frequencies.c +++ b/src/language/stats/frequencies.c @@ -1,7 +1,7 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011, 2014 Free Software Foundation, Inc. - + Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011, 2014, 2015 Free Software Foundation, Inc. + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -11,7 +11,7 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program. If not, see . */ @@ -52,10 +52,10 @@ #include "math/chart-geometry.h" -#include "output/chart-item.h" +#include "output/charts/barchart.h" #include "output/charts/piechart.h" #include "output/charts/plot-hist.h" -#include "output/tab.h" +#include "output/pivot-table.h" #include "gl/minmax.h" #include "gl/xalloc.h" @@ -82,6 +82,14 @@ ptile_3way (const void *_p1, const void *_p2) if (p1->p < p2->p) return -1; + if (p1->p == p2->p) + { + if (p1->show > p2->show) + return -1; + + return (p1->show < p2->show); + } + return (p1->p > p2->p); } @@ -98,7 +106,7 @@ enum FRQ_PERCENT }; -enum sortprops +enum sortprops { FRQ_AFREQ, FRQ_DFREQ, @@ -202,10 +210,11 @@ struct frq_proc /* Percentiles to calculate and possibly display. */ struct percentile *percentiles; - int n_percentiles, n_show_percentiles; + const struct percentile *median; + int n_percentiles; /* Frequency table display. */ - int max_categories; /* Maximum categories to show. */ + long int max_categories; /* Maximum categories to show. */ int sort; /* FRQ_AVALUE or FRQ_DVALUE or FRQ_AFREQ or FRQ_DFREQ. */ @@ -214,7 +223,9 @@ struct frq_proc int n_stats; /* Histogram and pie chart settings. */ - struct frq_chart *hist, *pie; + struct frq_chart *hist, *pie, *bar; + + bool warn; }; @@ -227,14 +238,18 @@ struct freq_compare_aux bool ascending_value; }; -static void calc_stats (const struct var_freqs *vf, double d[FRQ_ST_count]); +static void calc_stats (const struct frq_proc *, + const struct var_freqs *, double d[FRQ_ST_count]); -static void do_piechart(const struct frq_chart *pie, +static void do_piechart(const struct frq_chart *pie, const struct variable *var, const struct freq_tab *frq_tab); -static void dump_statistics (const struct frq_proc *frq, - const struct var_freqs *vf, +static void do_barchart(const struct frq_chart *bar, + const struct variable **var, + const struct freq_tab *frq_tab); + +static void dump_statistics (const struct frq_proc *frq, const struct variable *wv); static int @@ -251,7 +266,7 @@ compare_freq (const void *a_, const void *b_, const void *aux_) } else { - int cmp = value_compare_3way (&a->value, &b->value, aux->width); + int cmp = value_compare_3way (a->values, b->values, aux->width); return aux->ascending_value ? cmp : -cmp; } } @@ -261,89 +276,74 @@ static struct histogram * freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft, const struct variable *var); +static void +put_freq_row (struct pivot_table *table, int var_idx, + double frequency, double percent, + double valid_percent, double cum_percent) +{ + double entries[] = { frequency, percent, valid_percent, cum_percent }; + for (size_t i = 0; i < sizeof entries / sizeof *entries; i++) + if (entries[i] != SYSMIS) + pivot_table_put2 (table, i, var_idx, + pivot_value_new_number (entries[i])); +} /* Displays a full frequency table for variable V. */ static void dump_freq_table (const struct var_freqs *vf, const struct variable *wv) { - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; const struct freq_tab *ft = &vf->tab; - int n_categories; - struct freq *f; - struct tab_table *t; - int r, x; - double cum_total = 0.0; - double cum_freq = 0.0; - static const char *headings[] = { - N_("Value Label"), - N_("Value"), - N_("Frequency"), - N_("Percent"), - N_("Valid Percent"), - N_("Cum Percent") - }; + struct pivot_table *table = pivot_table_create__ (pivot_value_new_variable ( + vf->var), "Frequencies"); + pivot_table_set_weight_var (table, wv); - n_categories = ft->n_valid + ft->n_missing; - t = tab_create (6, n_categories + 2); - tab_set_format (t, RC_WEIGHT, wfmt); - tab_headers (t, 0, 0, 1, 0); + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("Frequency"), PIVOT_RC_COUNT, + N_("Percent"), PIVOT_RC_PERCENT, + N_("Valid Percent"), PIVOT_RC_PERCENT, + N_("Cumulative Percent"), PIVOT_RC_PERCENT); - for (x = 0; x < 6; x++) - tab_text (t, x, 0, TAB_CENTER | TAT_TITLE, gettext (headings[x])); + struct pivot_dimension *variable = pivot_dimension_create__ ( + table, PIVOT_AXIS_ROW, pivot_value_new_variable (vf->var)); - r = 1; - for (f = ft->valid; f < ft->missing; f++) + double cum_freq = 0.0; + double cum_percent = 0.0; + struct pivot_category *valid = NULL; + for (const struct freq *f = ft->valid; f < ft->missing; f++) { - const char *label; - double percent, valid_percent; - cum_freq += f->count; - - percent = f->count / ft->total_cases * 100.0; - valid_percent = f->count / ft->valid_cases * 100.0; - cum_total += valid_percent; - - label = var_lookup_value_label (vf->var, &f->value); - if (label != NULL) - tab_text (t, 0, r, TAB_LEFT, label); - - tab_value (t, 1, r, TAB_NONE, &f->value, vf->var, NULL); - tab_double (t, 2, r, TAB_NONE, f->count, NULL, RC_WEIGHT); - tab_double (t, 3, r, TAB_NONE, percent, NULL, RC_OTHER); - tab_double (t, 4, r, TAB_NONE, valid_percent, NULL, RC_OTHER); - tab_double (t, 5, r, TAB_NONE, cum_total, NULL, RC_OTHER); - r++; + double valid_percent = f->count / ft->valid_cases * 100.0; + cum_percent += valid_percent; + + if (!valid) + valid = pivot_category_create_group (variable->root, N_("Valid")); + int var_idx = pivot_category_create_leaf ( + valid, pivot_value_new_var_value (vf->var, &f->values[0])); + put_freq_row (table, var_idx, f->count, + f->count / ft->total_cases * 100.0, + valid_percent, cum_percent); } - for (; f < &ft->valid[n_categories]; f++) - { - const char *label; + struct pivot_category *missing = NULL; + size_t n_categories = ft->n_valid + ft->n_missing; + for (const struct freq *f = ft->missing; f < &ft->valid[n_categories]; f++) + { cum_freq += f->count; - label = var_lookup_value_label (vf->var, &f->value); - if (label != NULL) - tab_text (t, 0, r, TAB_LEFT, label); - - tab_value (t, 1, r, TAB_NONE, &f->value, vf->var, NULL); - tab_double (t, 2, r, TAB_NONE, f->count, NULL, RC_WEIGHT); - tab_double (t, 3, r, TAB_NONE, - f->count / ft->total_cases * 100.0, NULL, RC_OTHER); - tab_text (t, 4, r, TAB_NONE, _("Missing")); - r++; + if (!missing) + missing = pivot_category_create_group (variable->root, N_("Missing")); + int var_idx = pivot_category_create_leaf ( + missing, pivot_value_new_var_value (vf->var, &f->values[0])); + put_freq_row (table, var_idx, f->count, + f->count / ft->total_cases * 100.0, SYSMIS, SYSMIS); } - tab_box (t, TAL_1, TAL_1, -1, TAL_1, 0, 0, 5, r); - tab_hline (t, TAL_2, 0, 5, 1); - tab_hline (t, TAL_2, 0, 5, r); - tab_joint_text (t, 0, r, 1, r, TAB_RIGHT | TAT_TITLE, _("Total")); - tab_vline (t, TAL_0, 1, r, r); - tab_double (t, 2, r, TAB_NONE, cum_freq, NULL, RC_WEIGHT); - tab_double (t, 3, r, TAB_NONE, 100.0, &F_5_1, RC_OTHER); - tab_double (t, 4, r, TAB_NONE, 100.0, &F_5_1, RC_OTHER); - - tab_title (t, "%s", var_to_string (vf->var)); - tab_submit (t); + int var_idx = pivot_category_create_leaf ( + variable->root, pivot_value_new_text (N_("Total"))); + put_freq_row (table, var_idx, cum_freq, cum_percent, SYSMIS, SYSMIS); + + pivot_table_submit (table); } /* Statistical display. */ @@ -367,13 +367,9 @@ calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf) const struct freq_tab *ft = &vf->tab; double W = ft->valid_cases; const struct freq *f; - int percentile_idx; - double rank; + int percentile_idx = 0; + double rank = 0; - assert (ft->n_valid > 0); - - rank = 0; - percentile_idx = 0; for (f = ft->valid; f < ft->missing; f++) { rank += f->count; @@ -390,15 +386,17 @@ calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf) break; if (tp + 1 < rank || f + 1 >= ft->missing) - pc->value = f->value.f; + pc->value = f->values[0].f; else - pc->value = calc_percentile (pc->p, W, f->value.f, f[1].value.f); + pc->value = calc_percentile (pc->p, W, f->values[0].f, f[1].values[0].f); } } for (; percentile_idx < frq->n_percentiles; percentile_idx++) { struct percentile *pc = &frq->percentiles[percentile_idx]; - pc->value = ft->valid[ft->n_valid - 1].value.f; + pc->value = (ft->n_valid > 0 + ? ft->valid[ft->n_valid - 1].values[0].f + : SYSMIS); } } @@ -410,7 +408,7 @@ not_missing (const void *f_, const void *v_) const struct freq *f = f_; const struct variable *v = v_; - return !var_is_value_missing (v, &f->value, MV_ANY); + return !var_is_value_missing (v, f->values, MV_ANY); } @@ -444,7 +442,7 @@ postprocess_freq_tab (const struct frq_proc *frq, struct var_freqs *vf) /* Summary statistics. */ ft->valid_cases = 0.0; - for(i = 0 ; i < ft->n_valid ; ++i ) + for(i = 0 ; i < ft->n_valid ; ++i) { f = &ft->valid[i]; ft->valid_cases += f->count; @@ -452,7 +450,7 @@ postprocess_freq_tab (const struct frq_proc *frq, struct var_freqs *vf) } ft->total_cases = ft->valid_cases ; - for(i = 0 ; i < ft->n_missing ; ++i ) + for(i = 0 ; i < ft->n_missing ; ++i) { f = &ft->missing[i]; ft->total_cases += f->count; @@ -472,7 +470,7 @@ cleanup_freq_tab (struct var_freqs *vf) static void calc (struct frq_proc *frq, const struct ccase *c, const struct dataset *ds) { - double weight = dict_get_case_weight (dataset_dict (ds), c, NULL); + double weight = dict_get_case_weight (dataset_dict (ds), c, &frq->warn); size_t i; for (i = 0; i < frq->n_vars; i++) @@ -521,36 +519,39 @@ postcalc (struct frq_proc *frq, const struct dataset *ds) for (i = 0; i < frq->n_vars; i++) { struct var_freqs *vf = &frq->vars[i]; - postprocess_freq_tab (frq, vf); + calc_percentiles (frq, vf); + } + + if (frq->n_stats) + dump_statistics (frq, wv); + + for (i = 0; i < frq->n_vars; i++) + { + struct var_freqs *vf = &frq->vars[i]; /* Frequencies tables. */ if (vf->tab.n_valid + vf->tab.n_missing <= frq->max_categories) dump_freq_table (vf, wv); - calc_percentiles (frq, vf); - - /* Statistics. */ - if (frq->n_stats) - dump_statistics (frq, vf, wv); if (frq->hist && var_is_numeric (vf->var) && vf->tab.n_valid > 0) { double d[FRQ_ST_count]; struct histogram *histogram; - calc_stats (vf, d); + calc_stats (frq, vf, d); histogram = freq_tab_to_hist (frq, &vf->tab, vf->var); - if ( histogram) + if (histogram) { - chart_item_submit (histogram_chart_create ( - histogram->gsl_hist, var_to_string(vf->var), - vf->tab.valid_cases, - d[FRQ_ST_MEAN], - d[FRQ_ST_STDDEV], - frq->hist->draw_normal)); + chart_submit (histogram_chart_create ( + histogram->gsl_hist, var_to_string(vf->var), + vf->tab.valid_cases, + d[FRQ_ST_MEAN], + d[FRQ_ST_STDDEV], + frq->hist->draw_normal)); statistic_destroy (&histogram->parent); } @@ -559,6 +560,9 @@ postcalc (struct frq_proc *frq, const struct dataset *ds) if (frq->pie) do_piechart(frq->pie, vf->var, &vf->tab); + if (frq->bar) + do_barchart(frq->bar, &vf->var, &vf->tab); + cleanup_freq_tab (vf); } } @@ -568,7 +572,7 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) { int i; struct frq_proc frq; - const struct variable **vars; + const struct variable **vars = NULL; bool sbc_barchart = false; bool sbc_piechart = false; @@ -576,7 +580,11 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) double pie_min = -DBL_MAX; double pie_max = DBL_MAX; - bool pie_missing = false; + bool pie_missing = true; + + double bar_min = -DBL_MAX; + double bar_max = DBL_MAX; + bool bar_freq = true; double hi_min = -DBL_MAX; double hi_max = DBL_MAX; @@ -590,29 +598,30 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) frq.vars = NULL; frq.n_vars = 0; - - frq.stats = BIT_INDEX (FRQ_ST_MEAN) - | BIT_INDEX (FRQ_ST_STDDEV) + + frq.stats = BIT_INDEX (FRQ_ST_MEAN) + | BIT_INDEX (FRQ_ST_STDDEV) | BIT_INDEX (FRQ_ST_MINIMUM) | BIT_INDEX (FRQ_ST_MAXIMUM); frq.n_stats = 4; - frq.max_categories = INT_MAX; + frq.max_categories = LONG_MAX; frq.percentiles = NULL; frq.n_percentiles = 0; - frq.n_show_percentiles = 0; frq.hist = NULL; frq.pie = NULL; + frq.bar = NULL; + frq.warn = true; /* Accept an optional, completely pointless "/VARIABLES=" */ lex_match (lexer, T_SLASH); if (lex_match_id (lexer, "VARIABLES")) { - if (! lex_force_match (lexer, T_EQUALS) ) + if (! lex_force_match (lexer, T_EQUALS)) goto error; } @@ -635,18 +644,26 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) if (lex_match_id (lexer, "STATISTICS")) { - lex_match (lexer, T_EQUALS); + frq.stats = BIT_INDEX (FRQ_ST_MEAN) + | BIT_INDEX (FRQ_ST_STDDEV) + | BIT_INDEX (FRQ_ST_MINIMUM) + | BIT_INDEX (FRQ_ST_MAXIMUM); - frq.stats = 0; - frq.n_stats = 0; + frq.n_stats = 4; + + if (lex_match (lexer, T_EQUALS)) + { + frq.n_stats = 0; + frq.stats = 0; + } while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "DEFAULT")) { - frq.stats = BIT_INDEX (FRQ_ST_MEAN) - | BIT_INDEX (FRQ_ST_STDDEV) + frq.stats = BIT_INDEX (FRQ_ST_MEAN) + | BIT_INDEX (FRQ_ST_STDDEV) | BIT_INDEX (FRQ_ST_MINIMUM) | BIT_INDEX (FRQ_ST_MAXIMUM); @@ -679,7 +696,7 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "VARIANCE")) { - frq.stats |= BIT_INDEX (FRQ_ST_MEAN); + frq.stats |= BIT_INDEX (FRQ_ST_VARIANCE); frq.n_stats++; } else if (lex_match_id (lexer, "KURTOSIS")) @@ -748,20 +765,20 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) if (lex_force_num (lexer)) { frq.percentiles = - xrealloc (frq.percentiles, + xrealloc (frq.percentiles, (frq.n_percentiles + 1) * sizeof (*frq.percentiles)); frq.percentiles[frq.n_percentiles].p = lex_number (lexer) / 100.0; frq.percentiles[frq.n_percentiles].show = true; lex_get (lexer); frq.n_percentiles++; - frq.n_show_percentiles++; } else { lex_error (lexer, NULL); goto error; } + lex_match (lexer, T_COMMA); } } else if (lex_match_id (lexer, "FORMAT")) @@ -772,12 +789,23 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) { if (lex_match_id (lexer, "TABLE")) { - } else if (lex_match_id (lexer, "NOTABLE")) { frq.max_categories = 0; } + else if (lex_match_id (lexer, "LIMIT")) + { + if (!lex_force_match (lexer, T_LPAREN) + || !lex_force_int (lexer)) + goto error; + + frq.max_categories = lex_integer (lexer); + lex_get (lexer); + + if (!lex_force_match (lexer, T_RPAREN)) + goto error; + } else if (lex_match_id (lexer, "AVALUE")) { frq.sort = FRQ_AVALUE; @@ -813,7 +841,7 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) for (i = 0; i < n + 1; ++i) { frq.percentiles = - xrealloc (frq.percentiles, + xrealloc (frq.percentiles, (frq.n_percentiles + 1) * sizeof (*frq.percentiles)); frq.percentiles[frq.n_percentiles].p = @@ -821,7 +849,6 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) frq.percentiles[frq.n_percentiles].show = true; frq.n_percentiles++; - frq.n_show_percentiles++; } } else @@ -877,7 +904,8 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) lex_error (lexer, _("Histogram frequency must be greater than zero.")); } lex_get (lexer); - lex_force_match (lexer, T_RPAREN); + if (! lex_force_match (lexer, T_RPAREN)) + goto error; } } } @@ -894,29 +922,34 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) lex_error (lexer, _("Histogram percentage must be greater than zero.")); } lex_get (lexer); - lex_force_match (lexer, T_RPAREN); + if (! lex_force_match (lexer, T_RPAREN)) + goto error; } } } else if (lex_match_id (lexer, "MINIMUM")) { - lex_force_match (lexer, T_LPAREN); + if (! lex_force_match (lexer, T_LPAREN)) + goto error; if (lex_force_num (lexer)) { hi_min = lex_number (lexer); lex_get (lexer); } - lex_force_match (lexer, T_RPAREN); + if (! lex_force_match (lexer, T_RPAREN)) + goto error; } else if (lex_match_id (lexer, "MAXIMUM")) { - lex_force_match (lexer, T_LPAREN); + if (! lex_force_match (lexer, T_LPAREN)) + goto error; if (lex_force_num (lexer)) { hi_max = lex_number (lexer); lex_get (lexer); } - lex_force_match (lexer, T_RPAREN); + if (! lex_force_match (lexer, T_RPAREN)) + goto error; } else { @@ -933,23 +966,27 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) { if (lex_match_id (lexer, "MINIMUM")) { - lex_force_match (lexer, T_LPAREN); + if (! lex_force_match (lexer, T_LPAREN)) + goto error; if (lex_force_num (lexer)) { pie_min = lex_number (lexer); lex_get (lexer); } - lex_force_match (lexer, T_RPAREN); + if (! lex_force_match (lexer, T_RPAREN)) + goto error; } else if (lex_match_id (lexer, "MAXIMUM")) { - lex_force_match (lexer, T_LPAREN); + if (! lex_force_match (lexer, T_LPAREN)) + goto error; if (lex_force_num (lexer)) { pie_max = lex_number (lexer); lex_get (lexer); } - lex_force_match (lexer, T_RPAREN); + if (! lex_force_match (lexer, T_RPAREN)) + goto error; } else if (lex_match_id (lexer, "MISSING")) { @@ -967,6 +1004,72 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) } sbc_piechart = true; } + else if (lex_match_id (lexer, "BARCHART")) + { + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD + && lex_token (lexer) != T_SLASH) + { + if (lex_match_id (lexer, "MINIMUM")) + { + if (! lex_force_match (lexer, T_LPAREN)) + goto error; + if (lex_force_num (lexer)) + { + bar_min = lex_number (lexer); + lex_get (lexer); + } + if (! lex_force_match (lexer, T_RPAREN)) + goto error; + } + else if (lex_match_id (lexer, "MAXIMUM")) + { + if (! lex_force_match (lexer, T_LPAREN)) + goto error; + if (lex_force_num (lexer)) + { + bar_max = lex_number (lexer); + lex_get (lexer); + } + if (! lex_force_match (lexer, T_RPAREN)) + goto error; + } + else if (lex_match_id (lexer, "FREQ")) + { + if (lex_match (lexer, T_LPAREN)) + { + if (lex_force_num (lexer)) + { + lex_number (lexer); + lex_get (lexer); + } + if (! lex_force_match (lexer, T_RPAREN)) + goto error; + } + bar_freq = true; + } + else if (lex_match_id (lexer, "PERCENT")) + { + if (lex_match (lexer, T_LPAREN)) + { + if (lex_force_num (lexer)) + { + lex_number (lexer); + lex_get (lexer); + } + if (! lex_force_match (lexer, T_RPAREN)) + goto error; + } + bar_freq = false; + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + sbc_barchart = true; + } else if (lex_match_id (lexer, "MISSING")) { lex_match (lexer, T_EQUALS); @@ -987,6 +1090,12 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) } } } + else if (lex_match_id (lexer, "ORDER")) + { + lex_match (lexer, T_EQUALS); + if (!lex_match_id (lexer, "ANALYSIS")) + lex_match_id (lexer, "VARIABLE"); + } else { lex_error (lexer, NULL); @@ -997,12 +1106,12 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) if (frq.stats & BIT_INDEX (FRQ_ST_MEDIAN)) { frq.percentiles = - xrealloc (frq.percentiles, + xrealloc (frq.percentiles, (frq.n_percentiles + 1) * sizeof (*frq.percentiles)); - + frq.percentiles[frq.n_percentiles].p = 0.50; - frq.percentiles[frq.n_percentiles].show = true; + frq.percentiles[frq.n_percentiles].show = false; frq.n_percentiles++; } @@ -1011,9 +1120,6 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) /* Figure out which charts the user requested. */ { - if (sbc_barchart) - msg (SW, _("Bar charts are not implemented.")); - if (sbc_histogram) { struct frq_chart *hist; @@ -1032,27 +1138,36 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) msg (SE, _("%s for histogram must be greater than or equal to %s, " "but %s was specified as %.15g and %s as %.15g. " "%s and %s will be ignored."), - "MAX", "MIN", - "MIN", hist->x_min, + "MAX", "MIN", + "MIN", hist->x_min, "MAX", hist->x_max, "MIN", "MAX"); hist->x_min = hist->x_max = SYSMIS; } frq.percentiles = - xrealloc (frq.percentiles, + xrealloc (frq.percentiles, (frq.n_percentiles + 2) * sizeof (*frq.percentiles)); - + frq.percentiles[frq.n_percentiles].p = 0.25; frq.percentiles[frq.n_percentiles].show = false; frq.percentiles[frq.n_percentiles + 1].p = 0.75; frq.percentiles[frq.n_percentiles + 1].show = false; - + frq.n_percentiles+=2; } + if (sbc_barchart) + { + frq.bar = xmalloc (sizeof *frq.bar); + frq.bar->x_min = bar_min; + frq.bar->x_max = bar_max; + frq.bar->include_missing = false; + frq.bar->y_scale = bar_freq ? FRQ_FREQ : FRQ_PERCENT; + } + if (sbc_piechart) { struct frq_chart *pie; @@ -1067,8 +1182,8 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) { msg (SE, _("%s for pie chart must be greater than or equal to %s, " "but %s was specified as %.15g and %s as %.15g. " - "%s and %s will be ignored."), - "MAX", "MIN", + "%s and %s will be ignored."), + "MAX", "MIN", "MIN", pie->x_min, "MAX", pie->x_max, "MIN", "MAX"); @@ -1081,29 +1196,34 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) int i,o; double previous_p = -1; qsort (frq.percentiles, frq.n_percentiles, - sizeof (*frq.percentiles), + sizeof (*frq.percentiles), ptile_3way); - frq.n_show_percentiles = 0; for (i = o = 0; i < frq.n_percentiles; ++i) { - frq.percentiles[o].p = frq.percentiles[i].p; - - if (frq.percentiles[i].show) - frq.percentiles[o].show = true; - - if (frq.percentiles[i].p != previous_p) - { - if (frq.percentiles[i].show) - frq.n_show_percentiles++; - - o++; - } - + if (frq.percentiles[i].p != previous_p) + { + frq.percentiles[o].p = frq.percentiles[i].p; + frq.percentiles[o].show = frq.percentiles[i].show; + o++; + } + else if (frq.percentiles[i].show && + !frq.percentiles[o].show) + { + frq.percentiles[o].show = true; + } previous_p = frq.percentiles[i].p; } frq.n_percentiles = o; + + frq.median = NULL; + for (i = 0; i < frq.n_percentiles; i++) + if (frq.percentiles[i].p == 0.5) + { + frq.median = &frq.percentiles[i]; + break; + } } { @@ -1116,19 +1236,37 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) { struct ccase *c; precalc (&frq, group, ds); + for (; (c = casereader_read (group)) != NULL; case_unref (c)) calc (&frq, c, ds); postcalc (&frq, ds); + casereader_destroy (group); } ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; } + free (vars); + free (frq.vars); + free (frq.bar); + free (frq.pie); + free (frq.hist); + free (frq.percentiles); + pool_destroy (frq.pool); + return CMD_SUCCESS; error: + free (vars); + free (frq.vars); + free (frq.bar); + free (frq.pie); + free (frq.hist); + free (frq.percentiles); + pool_destroy (frq.pool); + return CMD_FAILURE; } @@ -1189,14 +1327,16 @@ freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft, for (i = 0; i < ft->n_valid; i++) { const struct freq *f = &ft->valid[i]; - if (chart_includes_value (frq->hist, var, &f->value)) + if (chart_includes_value (frq->hist, var, f->values)) { - x_min = MIN (x_min, f->value.f); - x_max = MAX (x_max, f->value.f); + x_min = MIN (x_min, f->values[0].f); + x_max = MAX (x_max, f->values[0].f); valid_freq += f->count; } } + if (valid_freq <= 0) + return NULL; iqr = calculate_iqr (frq); @@ -1210,69 +1350,124 @@ freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft, histogram = histogram_create (bin_width, x_min, x_max); - if ( histogram == NULL) + if (histogram == NULL) return NULL; for (i = 0; i < ft->n_valid; i++) { const struct freq *f = &ft->valid[i]; - if (chart_includes_value (frq->hist, var, &f->value)) - histogram_add (histogram, f->value.f, f->count); + if (chart_includes_value (frq->hist, var, f->values)) + histogram_add (histogram, f->values[0].f, f->count); } return histogram; } -static int -add_slice (const struct frq_chart *pie, const struct freq *freq, - const struct variable *var, struct slice *slice) + +/* Allocate an array of struct freqs and fill them from the data in FRQ_TAB, + according to the parameters of CATCHART + N_SLICES will contain the number of slices allocated. + The caller is responsible for freeing slices +*/ +static struct freq * +pick_cat_counts (const struct frq_chart *catchart, + const struct freq_tab *frq_tab, + int *n_slicesp) { - if (chart_includes_value (pie, var, &freq->value)) + int n_slices = 0; + int i; + struct freq *slices = xnmalloc (frq_tab->n_valid + frq_tab->n_missing, sizeof *slices); + + for (i = 0; i < frq_tab->n_valid; i++) { - ds_init_empty (&slice->label); - var_append_value_name (var, &freq->value, &slice->label); - slice->magnitude = freq->count; - return 1; + const struct freq *f = &frq_tab->valid[i]; + if (f->count > catchart->x_max) + continue; + + if (f->count < catchart->x_min) + continue; + + slices[n_slices] = *f; + + n_slices++; } - else - return 0; + + if (catchart->include_missing) + { + for (i = 0; i < frq_tab->n_missing; i++) + { + const struct freq *f = &frq_tab->missing[i]; + slices[n_slices].count += f->count; + + if (i == 0) + slices[n_slices].values[0] = f->values[0]; + } + + if (frq_tab->n_missing > 0) + n_slices++; + } + + *n_slicesp = n_slices; + return slices; } -/* Allocate an array of slices and fill them from the data in frq_tab - n_slices will contain the number of slices allocated. + +/* Allocate an array of struct freqs and fill them from the data in FRQ_TAB, + according to the parameters of CATCHART + N_SLICES will contain the number of slices allocated. The caller is responsible for freeing slices */ -static struct slice * -freq_tab_to_slice_array(const struct frq_chart *pie, - const struct freq_tab *frq_tab, - const struct variable *var, - int *n_slicesp) +static struct freq ** +pick_cat_counts_ptr (const struct frq_chart *catchart, + const struct freq_tab *frq_tab, + int *n_slicesp) { - struct slice *slices; - int n_slices; + int n_slices = 0; int i; - - slices = xnmalloc (frq_tab->n_valid + frq_tab->n_missing, sizeof *slices); - n_slices = 0; + struct freq **slices = xnmalloc (frq_tab->n_valid + frq_tab->n_missing, sizeof *slices); for (i = 0; i < frq_tab->n_valid; i++) - n_slices += add_slice (pie, &frq_tab->valid[i], var, &slices[n_slices]); - for (i = 0; i < frq_tab->n_missing; i++) - n_slices += add_slice (pie, &frq_tab->missing[i], var, &slices[n_slices]); + { + struct freq *f = &frq_tab->valid[i]; + if (f->count > catchart->x_max) + continue; + + if (f->count < catchart->x_min) + continue; + + slices[n_slices] = f; + + n_slices++; + } + + if (catchart->include_missing) + { + for (i = 0; i < frq_tab->n_missing; i++) + { + const struct freq *f = &frq_tab->missing[i]; + if (i == 0) + { + slices[n_slices] = xmalloc (sizeof (struct freq)); + slices[n_slices]->values[0] = f->values[0]; + } + + slices[n_slices]->count += f->count; + + } + } *n_slicesp = n_slices; return slices; } + static void do_piechart(const struct frq_chart *pie, const struct variable *var, const struct freq_tab *frq_tab) { - struct slice *slices; - int n_slices, i; - - slices = freq_tab_to_slice_array (pie, frq_tab, var, &n_slices); + int n_slices; + struct freq *slices = pick_cat_counts (pie, frq_tab, &n_slices); if (n_slices < 2) msg (SW, _("Omitting pie chart for %s, which has only %d unique values."), @@ -1281,36 +1476,47 @@ do_piechart(const struct frq_chart *pie, const struct variable *var, msg (SW, _("Omitting pie chart for %s, which has over 50 unique values."), var_get_name (var)); else - chart_item_submit (piechart_create (var_to_string(var), slices, n_slices)); + chart_submit (piechart_create (var, slices, n_slices)); + + free (slices); +} + + +static void +do_barchart(const struct frq_chart *bar, const struct variable **var, + const struct freq_tab *frq_tab) +{ + int n_slices; + struct freq **slices = pick_cat_counts_ptr (bar, frq_tab, &n_slices); - for (i = 0; i < n_slices; i++) - ds_destroy (&slices[i].label); + chart_submit (barchart_create (var, 1, + (bar->y_scale == FRQ_FREQ) ? _("Count") : _("Percent"), + (bar->y_scale == FRQ_PERCENT), + slices, n_slices)); free (slices); } + /* Calculates all the pertinent statistics for VF, putting them in array D[]. */ static void -calc_stats (const struct var_freqs *vf, double d[FRQ_ST_count]) +calc_stats (const struct frq_proc *frq, const struct var_freqs *vf, + double d[FRQ_ST_count]) { const struct freq_tab *ft = &vf->tab; double W = ft->valid_cases; const struct freq *f; struct moments *m; - int most_often; - double X_mode; - - assert (ft->n_valid > 0); + int most_often = -1; + double X_mode = SYSMIS; /* Calculate the mode. */ - most_often = -1; - X_mode = SYSMIS; for (f = ft->valid; f < ft->missing; f++) { if (most_often < f->count) { most_often = f->count; - X_mode = f->value.f; + X_mode = f->values[0].f; } else if (most_often == f->count) { @@ -1323,102 +1529,128 @@ calc_stats (const struct var_freqs *vf, double d[FRQ_ST_count]) /* Calculate moments. */ m = moments_create (MOMENT_KURTOSIS); for (f = ft->valid; f < ft->missing; f++) - moments_pass_one (m, f->value.f, f->count); + moments_pass_one (m, f->values[0].f, f->count); for (f = ft->valid; f < ft->missing; f++) - moments_pass_two (m, f->value.f, f->count); + moments_pass_two (m, f->values[0].f, f->count); moments_calculate (m, NULL, &d[FRQ_ST_MEAN], &d[FRQ_ST_VARIANCE], &d[FRQ_ST_SKEWNESS], &d[FRQ_ST_KURTOSIS]); moments_destroy (m); - /* Formulas below are taken from _SPSS Statistical Algorithms_. */ - d[FRQ_ST_MINIMUM] = ft->valid[0].value.f; - d[FRQ_ST_MAXIMUM] = ft->valid[ft->n_valid - 1].value.f; + /* Formulae below are taken from _SPSS Statistical Algorithms_. */ + if (ft->n_valid > 0) + { + d[FRQ_ST_MINIMUM] = ft->valid[0].values[0].f; + d[FRQ_ST_MAXIMUM] = ft->valid[ft->n_valid - 1].values[0].f; + d[FRQ_ST_RANGE] = d[FRQ_ST_MAXIMUM] - d[FRQ_ST_MINIMUM]; + } + else + { + d[FRQ_ST_MINIMUM] = SYSMIS; + d[FRQ_ST_MAXIMUM] = SYSMIS; + d[FRQ_ST_RANGE] = SYSMIS; + } d[FRQ_ST_MODE] = X_mode; - d[FRQ_ST_RANGE] = d[FRQ_ST_MAXIMUM] - d[FRQ_ST_MINIMUM]; d[FRQ_ST_SUM] = d[FRQ_ST_MEAN] * W; d[FRQ_ST_STDDEV] = sqrt (d[FRQ_ST_VARIANCE]); d[FRQ_ST_SEMEAN] = d[FRQ_ST_STDDEV] / sqrt (W); d[FRQ_ST_SESKEWNESS] = calc_seskew (W); d[FRQ_ST_SEKURTOSIS] = calc_sekurt (W); + d[FRQ_ST_MEDIAN] = frq->median ? frq->median->value : SYSMIS; } -/* Displays a table of all the statistics requested for variable V. */ -static void -dump_statistics (const struct frq_proc *frq, const struct var_freqs *vf, - const struct variable *wv) +static bool +all_string_variables (const struct frq_proc *frq) { - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - const struct freq_tab *ft = &vf->tab; - double stat_value[FRQ_ST_count]; - struct tab_table *t; - int i, r; + for (size_t i = 0; i < frq->n_vars; i++) + if (var_is_numeric (frq->vars[i].var)) + return false; - if (var_is_alpha (vf->var)) + return true; +} + +/* Displays a table of all the statistics requested. */ +static void +dump_statistics (const struct frq_proc *frq, const struct variable *wv) +{ + if (all_string_variables (frq)) return; - if (ft->n_valid == 0) + struct pivot_table *table = pivot_table_create (N_("Statistics")); + pivot_table_set_weight_var (table, wv); + + struct pivot_dimension *variables + = pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Variables")); + + struct pivot_dimension *statistics = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Statistics")); + struct pivot_category *n = pivot_category_create_group ( + statistics->root, N_("N")); + pivot_category_create_leaves (n, + N_("Valid"), PIVOT_RC_COUNT, + N_("Missing"), PIVOT_RC_COUNT); + for (int i = 0; i < FRQ_ST_count; i++) + if (frq->stats & BIT_INDEX (i)) + pivot_category_create_leaf (statistics->root, + pivot_value_new_text (st_name[i])); + struct pivot_category *percentiles = NULL; + for (size_t i = 0; i < frq->n_percentiles; i++) { - msg (SW, _("No valid data for variable %s; statistics not displayed."), - var_get_name (vf->var)); - return; - } - calc_stats (vf, stat_value); - - t = tab_create (3, ((frq->stats & BIT_INDEX (FRQ_ST_MEDIAN)) ? frq->n_stats - 1 : frq->n_stats) - + frq->n_show_percentiles + 2); - tab_set_format (t, RC_WEIGHT, wfmt); - tab_box (t, TAL_1, TAL_1, -1, -1 , 0 , 0 , 2, tab_nr(t) - 1) ; - + const struct percentile *pc = &frq->percentiles[i]; - tab_vline (t, TAL_1 , 2, 0, tab_nr(t) - 1); - tab_vline (t, TAL_GAP , 1, 0, tab_nr(t) - 1 ) ; + if (!pc->show) + continue; - r = 2; /* N missing and N valid are always dumped */ + if (!percentiles) + percentiles = pivot_category_create_group ( + statistics->root, N_("Percentiles")); + pivot_category_create_leaf (percentiles, pivot_value_new_integer ( + pc->p * 100.0)); + } - for (i = 0; i < FRQ_ST_count; i++) + for (size_t i = 0; i < frq->n_vars; i++) { - if (FRQ_ST_MEDIAN == i) - continue; - - if (frq->stats & BIT_INDEX (i)) - { - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, - gettext (st_name[i])); - tab_double (t, 2, r, TAB_NONE, stat_value[i], NULL, RC_OTHER); - r++; - } - } + struct var_freqs *vf = &frq->vars[i]; + if (var_is_alpha (vf->var)) + continue; - tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("N")); - tab_text (t, 1, 0, TAB_LEFT | TAT_TITLE, _("Valid")); - tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("Missing")); + const struct freq_tab *ft = &vf->tab; - tab_double (t, 2, 0, TAB_NONE, ft->valid_cases, NULL, RC_WEIGHT); - tab_double (t, 2, 1, TAB_NONE, ft->total_cases - ft->valid_cases, NULL, RC_WEIGHT); + int var_idx = pivot_category_create_leaf ( + variables->root, pivot_value_new_variable (vf->var)); - for (i = 0; i < frq->n_percentiles; i++) - { - const struct percentile *pc = &frq->percentiles[i]; + int row = 0; + pivot_table_put2 (table, var_idx, row++, + pivot_value_new_number (ft->valid_cases)); + pivot_table_put2 (table, var_idx, row++, + pivot_value_new_number ( + ft->total_cases - ft->valid_cases)); - if (!pc->show) - continue; + double stat_values[FRQ_ST_count]; + calc_stats (frq, vf, stat_values); + for (int j = 0; j < FRQ_ST_count; j++) + { + if (!(frq->stats & BIT_INDEX (j))) + continue; + + union value v = { .f = vf->tab.n_valid ? stat_values[j] : SYSMIS }; + struct pivot_value *pv + = (j == FRQ_ST_MODE || j == FRQ_ST_MINIMUM || j == FRQ_ST_MAXIMUM + ? pivot_value_new_var_value (vf->var, &v) + : pivot_value_new_number (v.f)); + pivot_table_put2 (table, var_idx, row++, pv); + } - if ( i == 0 ) - { - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Percentiles")); - } + for (size_t j = 0; j < frq->n_percentiles; j++) + { + const struct percentile *pc = &frq->percentiles[j]; + if (!pc->show) + continue; - if (pc->p == 0.5) - tab_text (t, 1, r, TAB_LEFT, _("50 (Median)")); - else - tab_double (t, 1, r, TAB_LEFT, pc->p * 100, NULL, RC_INTEGER); - tab_double (t, 2, r, TAB_NONE, pc->value, - var_get_print_format (vf->var), RC_OTHER); - r++; + union value v = { .f = vf->tab.n_valid ? pc->value : SYSMIS }; + pivot_table_put2 (table, var_idx, row++, + pivot_value_new_var_value (vf->var, &v)); + } } - tab_title (t, "%s", var_to_string (vf->var)); - - tab_submit (t); + pivot_table_submit (table); } -