X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ffrequencies.c;h=8fb96b0d57e2316d59ecda71099d317d189e10db;hb=3368d7a7d2e5fd6c284781aa327daff250922756;hp=da1a3828d1d13a2ca4f42768537258b67fc08eff;hpb=e1335ea67114822554e0d51bd5e0b49401b17dc4;p=pspp diff --git a/src/language/stats/frequencies.c b/src/language/stats/frequencies.c index da1a3828d1..8fb96b0d57 100644 --- a/src/language/stats/frequencies.c +++ b/src/language/stats/frequencies.c @@ -1,6 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011, 2014 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011, 2014, 2015 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -53,6 +53,7 @@ #include "output/chart-item.h" +#include "output/charts/barchart.h" #include "output/charts/piechart.h" #include "output/charts/plot-hist.h" #include "output/tab.h" @@ -82,6 +83,14 @@ ptile_3way (const void *_p1, const void *_p2) if (p1->p < p2->p) return -1; + if (p1->p == p2->p) + { + if (p1->show > p2->show) + return -1; + + return (p1->show < p2->show); + } + return (p1->p > p2->p); } @@ -205,7 +214,7 @@ struct frq_proc int n_percentiles, n_show_percentiles; /* Frequency table display. */ - int max_categories; /* Maximum categories to show. */ + long int max_categories; /* Maximum categories to show. */ int sort; /* FRQ_AVALUE or FRQ_DVALUE or FRQ_AFREQ or FRQ_DFREQ. */ @@ -214,7 +223,7 @@ struct frq_proc int n_stats; /* Histogram and pie chart settings. */ - struct frq_chart *hist, *pie; + struct frq_chart *hist, *pie, *bar; }; @@ -233,6 +242,10 @@ static void do_piechart(const struct frq_chart *pie, const struct variable *var, const struct freq_tab *frq_tab); +static void do_barchart(const struct frq_chart *bar, + const struct variable **var, + const struct freq_tab *frq_tab); + static void dump_statistics (const struct frq_proc *frq, const struct var_freqs *vf, const struct variable *wv); @@ -251,7 +264,7 @@ compare_freq (const void *a_, const void *b_, const void *aux_) } else { - int cmp = value_compare_3way (&a->value, &b->value, aux->width); + int cmp = value_compare_3way (a->values, b->values, aux->width); return aux->ascending_value ? cmp : -cmp; } } @@ -304,11 +317,11 @@ dump_freq_table (const struct var_freqs *vf, const struct variable *wv) valid_percent = f->count / ft->valid_cases * 100.0; cum_total += valid_percent; - label = var_lookup_value_label (vf->var, &f->value); + label = var_lookup_value_label (vf->var, f->values); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); - tab_value (t, 1, r, TAB_NONE, &f->value, vf->var, NULL); + tab_value (t, 1, r, TAB_NONE, f->values, vf->var, NULL); tab_double (t, 2, r, TAB_NONE, f->count, NULL, RC_WEIGHT); tab_double (t, 3, r, TAB_NONE, percent, NULL, RC_OTHER); tab_double (t, 4, r, TAB_NONE, valid_percent, NULL, RC_OTHER); @@ -321,11 +334,11 @@ dump_freq_table (const struct var_freqs *vf, const struct variable *wv) cum_freq += f->count; - label = var_lookup_value_label (vf->var, &f->value); + label = var_lookup_value_label (vf->var, f->values); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); - tab_value (t, 1, r, TAB_NONE, &f->value, vf->var, NULL); + tab_value (t, 1, r, TAB_NONE, f->values, vf->var, NULL); tab_double (t, 2, r, TAB_NONE, f->count, NULL, RC_WEIGHT); tab_double (t, 3, r, TAB_NONE, f->count / ft->total_cases * 100.0, NULL, RC_OTHER); @@ -367,13 +380,9 @@ calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf) const struct freq_tab *ft = &vf->tab; double W = ft->valid_cases; const struct freq *f; - int percentile_idx; - double rank; - - assert (ft->n_valid > 0); + int percentile_idx = 0; + double rank = 0; - rank = 0; - percentile_idx = 0; for (f = ft->valid; f < ft->missing; f++) { rank += f->count; @@ -390,15 +399,15 @@ calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf) break; if (tp + 1 < rank || f + 1 >= ft->missing) - pc->value = f->value.f; + pc->value = f->values[0].f; else - pc->value = calc_percentile (pc->p, W, f->value.f, f[1].value.f); + pc->value = calc_percentile (pc->p, W, f->values[0].f, f[1].values[0].f); } } for (; percentile_idx < frq->n_percentiles; percentile_idx++) { struct percentile *pc = &frq->percentiles[percentile_idx]; - pc->value = ft->valid[ft->n_valid - 1].value.f; + pc->value = ft->valid[ft->n_valid - 1].values[0].f; } } @@ -410,7 +419,7 @@ not_missing (const void *f_, const void *v_) const struct freq *f = f_; const struct variable *v = v_; - return !var_is_value_missing (v, &f->value, MV_ANY); + return !var_is_value_missing (v, f->values, MV_ANY); } @@ -559,6 +568,9 @@ postcalc (struct frq_proc *frq, const struct dataset *ds) if (frq->pie) do_piechart(frq->pie, vf->var, &vf->tab); + if (frq->bar) + do_barchart(frq->bar, &vf->var, &vf->tab); + cleanup_freq_tab (vf); } } @@ -576,7 +588,11 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) double pie_min = -DBL_MAX; double pie_max = DBL_MAX; - bool pie_missing = false; + bool pie_missing = true; + + double bar_min = -DBL_MAX; + double bar_max = DBL_MAX; + bool bar_freq = true; double hi_min = -DBL_MAX; double hi_max = DBL_MAX; @@ -598,7 +614,7 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) frq.n_stats = 4; - frq.max_categories = INT_MAX; + frq.max_categories = LONG_MAX; frq.percentiles = NULL; frq.n_percentiles = 0; @@ -606,6 +622,7 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) frq.hist = NULL; frq.pie = NULL; + frq.bar = NULL; /* Accept an optional, completely pointless "/VARIABLES=" */ @@ -770,6 +787,7 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) lex_error (lexer, NULL); goto error; } + lex_match (lexer, T_COMMA); } } else if (lex_match_id (lexer, "FORMAT")) @@ -780,12 +798,23 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) { if (lex_match_id (lexer, "TABLE")) { - } else if (lex_match_id (lexer, "NOTABLE")) { frq.max_categories = 0; } + else if (lex_match_id (lexer, "LIMIT")) + { + if (!lex_force_match (lexer, T_LPAREN) + || !lex_force_int (lexer)) + goto error; + + frq.max_categories = lex_integer (lexer); + lex_get (lexer); + + if (!lex_force_match (lexer, T_RPAREN)) + goto error; + } else if (lex_match_id (lexer, "AVALUE")) { frq.sort = FRQ_AVALUE; @@ -975,6 +1004,66 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) } sbc_piechart = true; } + else if (lex_match_id (lexer, "BARCHART")) + { + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD + && lex_token (lexer) != T_SLASH) + { + if (lex_match_id (lexer, "MINIMUM")) + { + lex_force_match (lexer, T_LPAREN); + if (lex_force_num (lexer)) + { + bar_min = lex_number (lexer); + lex_get (lexer); + } + lex_force_match (lexer, T_RPAREN); + } + else if (lex_match_id (lexer, "MAXIMUM")) + { + lex_force_match (lexer, T_LPAREN); + if (lex_force_num (lexer)) + { + bar_max = lex_number (lexer); + lex_get (lexer); + } + lex_force_match (lexer, T_RPAREN); + } + else if (lex_match_id (lexer, "FREQ")) + { + if ( lex_match (lexer, T_LPAREN)) + { + if (lex_force_num (lexer)) + { + lex_number (lexer); + lex_get (lexer); + } + lex_force_match (lexer, T_RPAREN); + } + bar_freq = true; + } + else if (lex_match_id (lexer, "PERCENT")) + { + if ( lex_match (lexer, T_LPAREN)) + { + if (lex_force_num (lexer)) + { + lex_number (lexer); + lex_get (lexer); + } + lex_force_match (lexer, T_RPAREN); + } + bar_freq = false; + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + sbc_barchart = true; + } else if (lex_match_id (lexer, "MISSING")) { lex_match (lexer, T_EQUALS); @@ -995,6 +1084,12 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) } } } + else if (lex_match_id (lexer, "ORDER")) + { + lex_match (lexer, T_EQUALS); + if (!lex_match_id (lexer, "ANALYSIS")) + lex_match_id (lexer, "VARIABLE"); + } else { lex_error (lexer, NULL); @@ -1019,9 +1114,6 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) /* Figure out which charts the user requested. */ { - if (sbc_barchart) - msg (SW, _("Bar charts are not implemented.")); - if (sbc_histogram) { struct frq_chart *hist; @@ -1061,6 +1153,15 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) frq.n_percentiles+=2; } + if (sbc_barchart) + { + frq.bar = xmalloc (sizeof *frq.bar); + frq.bar->x_min = bar_min; + frq.bar->x_max = bar_max; + frq.bar->include_missing = false; + frq.bar->y_scale = bar_freq ? FRQ_FREQ : FRQ_PERCENT; + } + if (sbc_piechart) { struct frq_chart *pie; @@ -1197,10 +1298,10 @@ freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft, for (i = 0; i < ft->n_valid; i++) { const struct freq *f = &ft->valid[i]; - if (chart_includes_value (frq->hist, var, &f->value)) + if (chart_includes_value (frq->hist, var, f->values)) { - x_min = MIN (x_min, f->value.f); - x_max = MAX (x_max, f->value.f); + x_min = MIN (x_min, f->values[0].f); + x_max = MAX (x_max, f->values[0].f); valid_freq += f->count; } } @@ -1224,63 +1325,118 @@ freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft, for (i = 0; i < ft->n_valid; i++) { const struct freq *f = &ft->valid[i]; - if (chart_includes_value (frq->hist, var, &f->value)) - histogram_add (histogram, f->value.f, f->count); + if (chart_includes_value (frq->hist, var, f->values)) + histogram_add (histogram, f->values[0].f, f->count); } return histogram; } -static int -add_slice (const struct frq_chart *pie, const struct freq *freq, - const struct variable *var, struct slice *slice) + +/* Allocate an array of struct freqs and fill them from the data in FRQ_TAB, + according to the parameters of CATCHART + N_SLICES will contain the number of slices allocated. + The caller is responsible for freeing slices +*/ +static struct freq * +pick_cat_counts (const struct frq_chart *catchart, + const struct freq_tab *frq_tab, + int *n_slicesp) { - if (chart_includes_value (pie, var, &freq->value)) + int n_slices = 0; + int i; + struct freq *slices = xnmalloc (frq_tab->n_valid + frq_tab->n_missing, sizeof *slices); + + for (i = 0; i < frq_tab->n_valid; i++) { - ds_init_empty (&slice->label); - var_append_value_name (var, &freq->value, &slice->label); - slice->magnitude = freq->count; - return 1; + const struct freq *f = &frq_tab->valid[i]; + if (f->count > catchart->x_max) + continue; + + if (f->count < catchart->x_min) + continue; + + slices[n_slices] = *f; + + n_slices++; } - else - return 0; + + if (catchart->include_missing) + { + for (i = 0; i < frq_tab->n_missing; i++) + { + const struct freq *f = &frq_tab->missing[i]; + slices[n_slices].count += f->count; + + if (i == 0) + slices[n_slices].values[0] = f->values[0]; + } + + if (frq_tab->n_missing > 0) + n_slices++; + } + + *n_slicesp = n_slices; + return slices; } -/* Allocate an array of slices and fill them from the data in frq_tab - n_slices will contain the number of slices allocated. + +/* Allocate an array of struct freqs and fill them from the data in FRQ_TAB, + according to the parameters of CATCHART + N_SLICES will contain the number of slices allocated. The caller is responsible for freeing slices */ -static struct slice * -freq_tab_to_slice_array(const struct frq_chart *pie, - const struct freq_tab *frq_tab, - const struct variable *var, - int *n_slicesp) +static struct freq ** +pick_cat_counts_ptr (const struct frq_chart *catchart, + const struct freq_tab *frq_tab, + int *n_slicesp) { - struct slice *slices; - int n_slices; + int n_slices = 0; int i; + struct freq **slices = xnmalloc (frq_tab->n_valid + frq_tab->n_missing, sizeof *slices); + + for (i = 0; i < frq_tab->n_valid; i++) + { + struct freq *f = &frq_tab->valid[i]; + if (f->count > catchart->x_max) + continue; - slices = xnmalloc (frq_tab->n_valid + frq_tab->n_missing, sizeof *slices); - n_slices = 0; + if (f->count < catchart->x_min) + continue; + + slices[n_slices] = f; + + n_slices++; + } - for (i = 0; i < frq_tab->n_valid; i++) - n_slices += add_slice (pie, &frq_tab->valid[i], var, &slices[n_slices]); - for (i = 0; i < frq_tab->n_missing; i++) - n_slices += add_slice (pie, &frq_tab->missing[i], var, &slices[n_slices]); + if (catchart->include_missing) + { + for (i = 0; i < frq_tab->n_missing; i++) + { + const struct freq *f = &frq_tab->missing[i]; + if (i == 0) + { + slices[n_slices] = xmalloc (sizeof (struct freq)); + slices[n_slices]->values[0] = f->values[0]; + } + + slices[n_slices]->count += f->count; + + } + } *n_slicesp = n_slices; return slices; } + static void do_piechart(const struct frq_chart *pie, const struct variable *var, const struct freq_tab *frq_tab) { - struct slice *slices; - int n_slices, i; - - slices = freq_tab_to_slice_array (pie, frq_tab, var, &n_slices); + int n_slices; + struct freq *slices = pick_cat_counts (pie, frq_tab, &n_slices); if (n_slices < 2) msg (SW, _("Omitting pie chart for %s, which has only %d unique values."), @@ -1289,13 +1445,26 @@ do_piechart(const struct frq_chart *pie, const struct variable *var, msg (SW, _("Omitting pie chart for %s, which has over 50 unique values."), var_get_name (var)); else - chart_item_submit (piechart_create (var_to_string(var), slices, n_slices)); + chart_item_submit (piechart_create (var, slices, n_slices)); + + free (slices); +} + + +static void +do_barchart(const struct frq_chart *bar, const struct variable **var, + const struct freq_tab *frq_tab) +{ + int n_slices; + struct freq **slices = pick_cat_counts_ptr (bar, frq_tab, &n_slices); - for (i = 0; i < n_slices; i++) - ds_destroy (&slices[i].label); + chart_item_submit (barchart_create (var, 1, + (bar->y_scale == FRQ_FREQ) ? _("Count") : _("Percent"), + slices, n_slices)); free (slices); } + /* Calculates all the pertinent statistics for VF, putting them in array D[]. */ static void @@ -1305,20 +1474,16 @@ calc_stats (const struct var_freqs *vf, double d[FRQ_ST_count]) double W = ft->valid_cases; const struct freq *f; struct moments *m; - int most_often; - double X_mode; - - assert (ft->n_valid > 0); + int most_often = -1; + double X_mode = SYSMIS; /* Calculate the mode. */ - most_often = -1; - X_mode = SYSMIS; for (f = ft->valid; f < ft->missing; f++) { if (most_often < f->count) { most_often = f->count; - X_mode = f->value.f; + X_mode = f->values[0].f; } else if (most_often == f->count) { @@ -1331,16 +1496,16 @@ calc_stats (const struct var_freqs *vf, double d[FRQ_ST_count]) /* Calculate moments. */ m = moments_create (MOMENT_KURTOSIS); for (f = ft->valid; f < ft->missing; f++) - moments_pass_one (m, f->value.f, f->count); + moments_pass_one (m, f->values[0].f, f->count); for (f = ft->valid; f < ft->missing; f++) - moments_pass_two (m, f->value.f, f->count); + moments_pass_two (m, f->values[0].f, f->count); moments_calculate (m, NULL, &d[FRQ_ST_MEAN], &d[FRQ_ST_VARIANCE], &d[FRQ_ST_SKEWNESS], &d[FRQ_ST_KURTOSIS]); moments_destroy (m); - /* Formulas below are taken from _SPSS Statistical Algorithms_. */ - d[FRQ_ST_MINIMUM] = ft->valid[0].value.f; - d[FRQ_ST_MAXIMUM] = ft->valid[ft->n_valid - 1].value.f; + /* Formulae below are taken from _SPSS Statistical Algorithms_. */ + d[FRQ_ST_MINIMUM] = ft->valid[0].values[0].f; + d[FRQ_ST_MAXIMUM] = ft->valid[ft->n_valid - 1].values[0].f; d[FRQ_ST_MODE] = X_mode; d[FRQ_ST_RANGE] = d[FRQ_ST_MAXIMUM] - d[FRQ_ST_MINIMUM]; d[FRQ_ST_SUM] = d[FRQ_ST_MEAN] * W; @@ -1359,30 +1524,22 @@ dump_statistics (const struct frq_proc *frq, const struct var_freqs *vf, const struct freq_tab *ft = &vf->tab; double stat_value[FRQ_ST_count]; struct tab_table *t; - int i, r; + int i, r = 2; /* N missing and N valid are always dumped */ if (var_is_alpha (vf->var)) return; - if (ft->n_valid == 0) - { - msg (SW, _("No valid data for variable %s; statistics not displayed."), - var_get_name (vf->var)); - return; - } calc_stats (vf, stat_value); t = tab_create (3, ((frq->stats & BIT_INDEX (FRQ_ST_MEDIAN)) ? frq->n_stats - 1 : frq->n_stats) - + frq->n_show_percentiles + 2); + + frq->n_show_percentiles + 2); + tab_set_format (t, RC_WEIGHT, wfmt); tab_box (t, TAL_1, TAL_1, -1, -1 , 0 , 0 , 2, tab_nr(t) - 1) ; - tab_vline (t, TAL_1 , 2, 0, tab_nr(t) - 1); tab_vline (t, TAL_GAP , 1, 0, tab_nr(t) - 1 ) ; - r = 2; /* N missing and N valid are always dumped */ - for (i = 0; i < FRQ_ST_count; i++) { if (FRQ_ST_MEDIAN == i) @@ -1392,7 +1549,11 @@ dump_statistics (const struct frq_proc *frq, const struct var_freqs *vf, { tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, gettext (st_name[i])); - tab_double (t, 2, r, TAB_NONE, stat_value[i], NULL, RC_OTHER); + + if (vf->tab.n_valid <= 0 && r >= 2) + tab_text (t, 2, r, 0, "."); + else + tab_double (t, 2, r, TAB_NONE, stat_value[i], NULL, RC_OTHER); r++; } } @@ -1416,13 +1577,21 @@ dump_statistics (const struct frq_proc *frq, const struct var_freqs *vf, tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Percentiles")); } + if (vf->tab.n_valid <= 0) + { + tab_text (t, 2, r, 0, "."); + ++r; + continue; + } + if (pc->p == 0.5) tab_text (t, 1, r, TAB_LEFT, _("50 (Median)")); else tab_double (t, 1, r, TAB_LEFT, pc->p * 100, NULL, RC_INTEGER); tab_double (t, 2, r, TAB_NONE, pc->value, var_get_print_format (vf->var), RC_OTHER); - r++; + + ++r; } tab_title (t, "%s", var_to_string (vf->var));