X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ffrequencies.c;h=00f6633cc2b182ba24418977d0d164b4d60aab51;hb=3b7a1d755ef39bb2fb78e94cba2d33a443bec624;hp=e5462d083d2fd44d6e24e2e23960d360c6ad8f6f;hpb=2ba9563cb1fddad9430be0c415dc81456f006281;p=pspp diff --git a/src/language/stats/frequencies.c b/src/language/stats/frequencies.c index e5462d083d..00f6633cc2 100644 --- a/src/language/stats/frequencies.c +++ b/src/language/stats/frequencies.c @@ -20,7 +20,6 @@ #include #include - #include "data/case.h" #include "data/casegrouper.h" #include "data/casereader.h" @@ -45,7 +44,6 @@ #include "libpspp/hmap.h" #include "libpspp/message.h" #include "libpspp/misc.h" -#include "libpspp/pool.h" #include "math/histogram.h" #include "math/moments.h" @@ -67,39 +65,20 @@ /* Percentiles to calculate. */ struct percentile -{ - double p; /* the %ile to be calculated */ - double value; /* the %ile's value */ - bool show; /* True to show this percentile in the statistics box. */ -}; + { + double p; /* The percentile to calculate, between 0 and 1. */ + bool show; /* True to show this percentile in the statistics box. */ + }; static int -ptile_3way (const void *_p1, const void *_p2) +percentile_compare_3way (const void *a_, const void *b_) { - const struct percentile *p1 = _p1; - const struct percentile *p2 = _p2; - - if (p1->p < p2->p) - return -1; - - if (p1->p == p2->p) - { - if (p1->show > p2->show) - return -1; - - return (p1->show < p2->show); - } + const struct percentile *a = a_; + const struct percentile *b = b_; - return (p1->p > p2->p); + return a->p < b->p ? -1 : a->p > b->p; } - -enum - { - FRQ_NONORMAL, - FRQ_NORMAL - }; - enum { FRQ_FREQ, @@ -114,54 +93,56 @@ enum sortprops FRQ_DVALUE }; -/* Array indices for STATISTICS subcommand. */ -enum +#define STATISTICS \ + S(FRQ_ST_MEAN, "MEAN", N_("Mean")) \ + S(FRQ_ST_SEMEAN, "SEMEAN", N_("S.E. Mean")) \ + S(FRQ_ST_MEDIAN, "MEDIAN", N_("Median")) \ + S(FRQ_ST_MODE, "MODE", N_("Mode")) \ + S(FRQ_ST_STDDEV, "STDDEV", N_("Std Dev")) \ + S(FRQ_ST_VARIANCE, "VARIANCE", N_("Variance")) \ + S(FRQ_ST_KURTOSIS, "KURTOSIS", N_("Kurtosis")) \ + S(FRQ_ST_SEKURTOSIS, "SEKURTOSIS",N_("S.E. Kurt")) \ + S(FRQ_ST_SKEWNESS, "SKEWNESS", N_("Skewness")) \ + S(FRQ_ST_SESKEWNESS, "SESKEWNESS",N_("S.E. Skew")) \ + S(FRQ_ST_RANGE, "RANGE", N_("Range")) \ + S(FRQ_ST_MINIMUM, "MINIMUM", N_("Minimum")) \ + S(FRQ_ST_MAXIMUM, "MAXIMUM", N_("Maximum")) \ + S(FRQ_ST_SUM, "SUM", N_("Sum")) + +enum frq_statistic { - FRQ_ST_MEAN, - FRQ_ST_SEMEAN, - FRQ_ST_MEDIAN, - FRQ_ST_MODE, - FRQ_ST_STDDEV, - FRQ_ST_VARIANCE, - FRQ_ST_KURTOSIS, - FRQ_ST_SEKURTOSIS, - FRQ_ST_SKEWNESS, - FRQ_ST_SESKEWNESS, - FRQ_ST_RANGE, - FRQ_ST_MINIMUM, - FRQ_ST_MAXIMUM, - FRQ_ST_SUM, - FRQ_ST_count +#define S(ENUM, KEYWORD, NAME) ENUM, +STATISTICS +#undef S }; -/* Description of statistics. */ -static const char *st_name[FRQ_ST_count] = -{ - N_("Mean"), - N_("S.E. Mean"), - N_("Median"), - N_("Mode"), - N_("Std Dev"), - N_("Variance"), - N_("Kurtosis"), - N_("S.E. Kurt"), - N_("Skewness"), - N_("S.E. Skew"), - N_("Range"), - N_("Minimum"), - N_("Maximum"), - N_("Sum") +enum { +#define S(ENUM, KEYWORD, NAME) +1 + FRQ_ST_count = STATISTICS, +#undef S +}; + +static const char *st_keywords[FRQ_ST_count] = { +#define S(ENUM, KEYWORD, NAME) KEYWORD, + STATISTICS +#undef S +}; + +static const char *st_names[FRQ_ST_count] = { +#define S(ENUM, KEYWORD, NAME) NAME, + STATISTICS +#undef S }; struct freq_tab { struct hmap data; /* Hash table for accumulating counts. */ struct freq *valid; /* Valid freqs. */ - int n_valid; /* Number of total freqs. */ + size_t n_valid; /* Number of total freqs. */ const struct dictionary *dict; /* Source of entries in the table. */ struct freq *missing; /* Missing freqs. */ - int n_missing; /* Number of missing freqs. */ + size_t n_missing; /* Number of missing freqs. */ /* Statistics. */ double total_cases; /* Sum of weights of all cases. */ @@ -190,12 +171,9 @@ struct var_freqs /* Freqency table. */ struct freq_tab tab; /* Frequencies table to use. */ - /* Percentiles. */ - int n_groups; /* Number of groups. */ - double *groups; /* Groups. */ - /* Statistics. */ double stat[FRQ_ST_count]; + double *percentiles; /* Variable attributes. */ int width; @@ -203,24 +181,21 @@ struct var_freqs struct frq_proc { - struct pool *pool; - struct var_freqs *vars; size_t n_vars; /* Percentiles to calculate and possibly display. */ struct percentile *percentiles; - const struct percentile *median; - int n_percentiles; + size_t median_idx; + size_t n_percentiles; /* Frequency table display. */ long int max_categories; /* Maximum categories to show. */ int sort; /* FRQ_AVALUE or FRQ_DVALUE or FRQ_AFREQ or FRQ_DFREQ. */ - /* Statistics; number of statistics. */ + /* Statistics. */ unsigned long stats; - int n_stats; /* Histogram and pie chart settings. */ struct frq_chart *hist, *pie, *bar; @@ -249,8 +224,11 @@ static void do_barchart(const struct frq_chart *bar, const struct variable **var, const struct freq_tab *frq_tab); -static void dump_statistics (const struct frq_proc *frq, - const struct variable *wv); +static struct frq_stats_table *frq_stats_table_submit ( + struct frq_stats_table *, const struct frq_proc *, + const struct dictionary *, const struct variable *wv, + const struct ccase *example); +static void frq_stats_table_destroy (struct frq_stats_table *); static int compare_freq (const void *a_, const void *b_, const void *aux_) @@ -272,9 +250,8 @@ compare_freq (const void *a_, const void *b_, const void *aux_) } /* Create a gsl_histogram from a freq_tab */ -static struct histogram * -freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft, - const struct variable *var); +static struct histogram *freq_tab_to_hist (const struct frq_proc *, + const struct var_freqs *); static void put_freq_row (struct pivot_table *table, int var_idx, @@ -362,20 +339,25 @@ calc_percentile (double p, double valid_cases, double x1, double x2) /* Calculates all of the percentiles for VF within FRQ. */ static void -calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf) +calc_percentiles (const struct frq_proc *frq, struct var_freqs *vf) { + if (!frq->n_percentiles) + return; + + if (!vf->percentiles) + vf->percentiles = xnmalloc (frq->n_percentiles, sizeof *vf->percentiles); + const struct freq_tab *ft = &vf->tab; - double W = ft->valid_cases; - const struct freq *f; - int percentile_idx = 0; - double rank = 0; + const double W = ft->valid_cases; + size_t idx = 0; - for (f = ft->valid; f < ft->missing; f++) + double rank = 0; + for (const struct freq *f = ft->valid; f < ft->missing; f++) { rank += f->count; - for (; percentile_idx < frq->n_percentiles; percentile_idx++) + for (; idx < frq->n_percentiles; idx++) { - struct percentile *pc = &frq->percentiles[percentile_idx]; + struct percentile *pc = &frq->percentiles[idx]; double tp; tp = (settings_get_algorithm () == ENHANCED @@ -386,18 +368,16 @@ calc_percentiles (const struct frq_proc *frq, const struct var_freqs *vf) break; if (tp + 1 < rank || f + 1 >= ft->missing) - pc->value = f->values[0].f; + vf->percentiles[idx] = f->values[0].f; else - pc->value = calc_percentile (pc->p, W, f->values[0].f, f[1].values[0].f); + vf->percentiles[idx] = calc_percentile (pc->p, W, f->values[0].f, + f[1].values[0].f); } } - for (; percentile_idx < frq->n_percentiles; percentile_idx++) - { - struct percentile *pc = &frq->percentiles[percentile_idx]; - pc->value = (ft->n_valid > 0 - ? ft->valid[ft->n_valid - 1].values[0].f - : SYSMIS); - } + for (; idx < frq->n_percentiles; idx++) + vf->percentiles[idx] = (ft->n_valid > 0 + ? ft->valid[ft->n_valid - 1].values[0].f + : SYSMIS); } /* Returns true iff the value in struct freq F is non-missing @@ -408,23 +388,18 @@ not_missing (const void *f_, const void *v_) const struct freq *f = f_; const struct variable *v = v_; - return !var_is_value_missing (v, f->values, MV_ANY); + return !var_is_value_missing (v, f->values); } - /* Summarizes the frequency table data for variable V. */ static void postprocess_freq_tab (const struct frq_proc *frq, struct var_freqs *vf) { struct freq_tab *ft = &vf->tab; - struct freq_compare_aux aux; - size_t count; - struct freq *freqs, *f; - size_t i; /* Extract data from hash table. */ - count = hmap_count (&ft->data); - freqs = freq_hmap_extract (&ft->data); + size_t count = hmap_count (&ft->data); + struct freq *freqs = freq_hmap_extract (&ft->data); /* Put data into ft. */ ft->valid = freqs; @@ -433,37 +408,23 @@ postprocess_freq_tab (const struct frq_proc *frq, struct var_freqs *vf) ft->n_missing = count - ft->n_valid; /* Sort data. */ - aux.by_freq = frq->sort == FRQ_AFREQ || frq->sort == FRQ_DFREQ; - aux.ascending_freq = frq->sort != FRQ_DFREQ; - aux.width = vf->width; - aux.ascending_value = frq->sort != FRQ_DVALUE; + struct freq_compare_aux aux = { + .by_freq = frq->sort == FRQ_AFREQ || frq->sort == FRQ_DFREQ, + .ascending_freq = frq->sort != FRQ_DFREQ, + .width = vf->width, + .ascending_value = frq->sort != FRQ_DVALUE, + }; sort (ft->valid, ft->n_valid, sizeof *ft->valid, compare_freq, &aux); sort (ft->missing, ft->n_missing, sizeof *ft->missing, compare_freq, &aux); /* Summary statistics. */ ft->valid_cases = 0.0; - for(i = 0 ; i < ft->n_valid ; ++i) - { - f = &ft->valid[i]; - ft->valid_cases += f->count; + for (size_t i = 0; i < ft->n_valid; ++i) + ft->valid_cases += ft->valid[i].count; - } - - ft->total_cases = ft->valid_cases ; - for(i = 0 ; i < ft->n_missing ; ++i) - { - f = &ft->missing[i]; - ft->total_cases += f->count; - } - -} - -/* Frees the frequency table for variable V. */ -static void -cleanup_freq_tab (struct var_freqs *vf) -{ - free (vf->tab.valid); - freq_hmap_destroy (&vf->tab.data, vf->width); + ft->total_cases = ft->valid_cases; + for (size_t i = 0; i < ft->n_missing; ++i) + ft->total_cases += ft->missing[i].count; } /* Add data from case C to the frequency table. */ @@ -471,9 +432,7 @@ static void calc (struct frq_proc *frq, const struct ccase *c, const struct dataset *ds) { double weight = dict_get_case_weight (dataset_dict (ds), c, &frq->warn); - size_t i; - - for (i = 0; i < frq->n_vars; i++) + for (size_t i = 0; i < frq->n_vars; i++) { struct var_freqs *vf = &frq->vars[i]; const union value *value = case_data (c, vf->var); @@ -488,52 +447,52 @@ calc (struct frq_proc *frq, const struct ccase *c, const struct dataset *ds) } } -/* Prepares each variable that is the target of FREQUENCIES by setting - up its hash table. */ static void -precalc (struct frq_proc *frq, struct casereader *input, struct dataset *ds) +output_splits_once (bool *need_splits, const struct dataset *ds, + const struct ccase *c) { - struct ccase *c; - size_t i; - - c = casereader_peek (input, 0); - if (c != NULL) + if (*need_splits) { output_split_file_values (ds, c); - case_unref (c); + *need_splits = false; } - - for (i = 0; i < frq->n_vars; i++) - hmap_init (&frq->vars[i].tab.data); } /* Finishes up with the variables after frequencies have been calculated. Displays statistics, percentiles, ... */ -static void -postcalc (struct frq_proc *frq, const struct dataset *ds) +static struct frq_stats_table * +postcalc (struct frq_proc *frq, const struct dataset *ds, + struct ccase *example, struct frq_stats_table *fst) { const struct dictionary *dict = dataset_dict (ds); const struct variable *wv = dict_get_weight (dict); - size_t i; - for (i = 0; i < frq->n_vars; i++) + for (size_t i = 0; i < frq->n_vars; i++) { struct var_freqs *vf = &frq->vars[i]; postprocess_freq_tab (frq, vf); calc_percentiles (frq, vf); } - if (frq->n_stats) - dump_statistics (frq, wv); + enum split_type st = dict_get_split_type (dict); + bool need_splits = true; + if (frq->stats) + { + if (st != SPLIT_LAYERED) + output_splits_once (&need_splits, ds, example); + fst = frq_stats_table_submit (fst, frq, dict, wv, example); + } - for (i = 0; i < frq->n_vars; i++) + for (size_t i = 0; i < frq->n_vars; i++) { struct var_freqs *vf = &frq->vars[i]; /* Frequencies tables. */ if (vf->tab.n_valid + vf->tab.n_missing <= frq->max_categories) - dump_freq_table (vf, wv); - + { + output_splits_once (&need_splits, ds, example); + dump_freq_table (vf, wv); + } if (frq->hist && var_is_numeric (vf->var) && vf->tab.n_valid > 0) { @@ -542,10 +501,11 @@ postcalc (struct frq_proc *frq, const struct dataset *ds) calc_stats (frq, vf, d); - histogram = freq_tab_to_hist (frq, &vf->tab, vf->var); + histogram = freq_tab_to_hist (frq, vf); if (histogram) { + output_splits_once (&need_splits, ds, example); chart_submit (histogram_chart_create ( histogram->gsl_hist, var_to_string(vf->var), vf->tab.valid_cases, @@ -558,81 +518,94 @@ postcalc (struct frq_proc *frq, const struct dataset *ds) } if (frq->pie) - do_piechart(frq->pie, vf->var, &vf->tab); + { + output_splits_once (&need_splits, ds, example); + do_piechart(frq->pie, vf->var, &vf->tab); + } if (frq->bar) - do_barchart(frq->bar, &vf->var, &vf->tab); + { + output_splits_once (&need_splits, ds, example); + do_barchart(frq->bar, &vf->var, &vf->tab); + } - cleanup_freq_tab (vf); + free (vf->tab.valid); + freq_hmap_destroy (&vf->tab.data, vf->width); } + + return fst; } -int -cmd_frequencies (struct lexer *lexer, struct dataset *ds) +static void +frq_run (struct frq_proc *frq, struct dataset *ds) { - int i; - struct frq_proc frq; - const struct variable **vars = NULL; - - bool sbc_barchart = false; - bool sbc_piechart = false; - bool sbc_histogram = false; - - double pie_min = -DBL_MAX; - double pie_max = DBL_MAX; - bool pie_missing = true; - - double bar_min = -DBL_MAX; - double bar_max = DBL_MAX; - bool bar_freq = true; - - double hi_min = -DBL_MAX; - double hi_max = DBL_MAX; - int hi_scale = FRQ_FREQ; - int hi_freq = INT_MIN; - int hi_pcnt = INT_MIN; - int hi_norm = FRQ_NONORMAL; - - frq.pool = pool_create (); - frq.sort = FRQ_AVALUE; - - frq.vars = NULL; - frq.n_vars = 0; + struct frq_stats_table *fst = NULL; + struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), + dataset_dict (ds)); + struct casereader *group; + while (casegrouper_get_next_group (grouper, &group)) + { + for (size_t i = 0; i < frq->n_vars; i++) + hmap_init (&frq->vars[i].tab.data); - frq.stats = BIT_INDEX (FRQ_ST_MEAN) - | BIT_INDEX (FRQ_ST_STDDEV) - | BIT_INDEX (FRQ_ST_MINIMUM) - | BIT_INDEX (FRQ_ST_MAXIMUM); + struct ccase *example = casereader_peek (group, 0); - frq.n_stats = 4; + struct ccase *c; + for (; (c = casereader_read (group)) != NULL; case_unref (c)) + calc (frq, c, ds); + fst = postcalc (frq, ds, example, fst); + casereader_destroy (group); - frq.max_categories = LONG_MAX; + case_unref (example); + } + frq_stats_table_destroy (fst); + casegrouper_destroy (grouper); + proc_commit (ds); +} - frq.percentiles = NULL; - frq.n_percentiles = 0; +static void +add_percentile (struct frq_proc *frq, double p, bool show, + size_t *allocated_percentiles) +{ + if (frq->n_percentiles >= *allocated_percentiles) + frq->percentiles = x2nrealloc (frq->percentiles, allocated_percentiles, + sizeof *frq->percentiles); + frq->percentiles[frq->n_percentiles++] = (struct percentile) { + .p = p, + .show = show, + }; +} - frq.hist = NULL; - frq.pie = NULL; - frq.bar = NULL; - frq.warn = true; +int +cmd_frequencies (struct lexer *lexer, struct dataset *ds) +{ + bool ok = false; + const struct variable **vars = NULL; + size_t allocated_percentiles = 0; + + const unsigned long DEFAULT_STATS = (BIT_INDEX (FRQ_ST_MEAN) + | BIT_INDEX (FRQ_ST_STDDEV) + | BIT_INDEX (FRQ_ST_MINIMUM) + | BIT_INDEX (FRQ_ST_MAXIMUM)); + struct frq_proc frq = { + .sort = FRQ_AVALUE, + .stats = DEFAULT_STATS, + .max_categories = LONG_MAX, + .median_idx = SIZE_MAX, + .warn = true, + }; - /* Accept an optional, completely pointless "/VARIABLES=" */ lex_match (lexer, T_SLASH); - if (lex_match_id (lexer, "VARIABLES")) - { - if (! lex_force_match (lexer, T_EQUALS)) - goto error; - } + if (lex_match_id (lexer, "VARIABLES") && !lex_force_match (lexer, T_EQUALS)) + goto done; if (!parse_variables_const (lexer, dataset_dict (ds), - &vars, - &frq.n_vars, - PV_NO_DUPLICATE)) - goto error; + &vars, &frq.n_vars, PV_NO_DUPLICATE)) + goto done; - frq.vars = xcalloc (frq.n_vars, sizeof (*frq.vars)); - for (i = 0; i < frq.n_vars; ++i) + frq.vars = xcalloc (frq.n_vars, sizeof *frq.vars); + for (size_t i = 0; i < frq.n_vars; ++i) { frq.vars[i].var = vars[i]; frq.vars[i].width = var_get_width (vars[i]); @@ -644,117 +617,41 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) if (lex_match_id (lexer, "STATISTICS")) { - frq.stats = BIT_INDEX (FRQ_ST_MEAN) - | BIT_INDEX (FRQ_ST_STDDEV) - | BIT_INDEX (FRQ_ST_MINIMUM) - | BIT_INDEX (FRQ_ST_MAXIMUM); - - frq.n_stats = 4; - - if (lex_match (lexer, T_EQUALS)) - { - frq.n_stats = 0; - frq.stats = 0; - } + lex_match (lexer, T_EQUALS); + frq.stats = 0; + int ofs = lex_ofs (lexer); while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { - if (lex_match_id (lexer, "DEFAULT")) - { - frq.stats = BIT_INDEX (FRQ_ST_MEAN) - | BIT_INDEX (FRQ_ST_STDDEV) - | BIT_INDEX (FRQ_ST_MINIMUM) - | BIT_INDEX (FRQ_ST_MAXIMUM); + for (int s = 0; s < FRQ_ST_count; s++) + if (lex_match_id (lexer, st_keywords[s])) + { + frq.stats |= 1 << s; + goto next; + } - frq.n_stats = 4; - } - else if (lex_match_id (lexer, "MEAN")) - { - frq.stats |= BIT_INDEX (FRQ_ST_MEAN); - frq.n_stats++; - } - else if (lex_match_id (lexer, "SEMEAN")) - { - frq.stats |= BIT_INDEX (FRQ_ST_SEMEAN); - frq.n_stats++; - } - else if (lex_match_id (lexer, "MEDIAN")) - { - frq.stats |= BIT_INDEX (FRQ_ST_MEDIAN); - frq.n_stats++; - } - else if (lex_match_id (lexer, "MODE")) - { - frq.stats |= BIT_INDEX (FRQ_ST_MODE); - frq.n_stats++; - } - else if (lex_match_id (lexer, "STDDEV")) - { - frq.stats |= BIT_INDEX (FRQ_ST_STDDEV); - frq.n_stats++; - } - else if (lex_match_id (lexer, "VARIANCE")) - { - frq.stats |= BIT_INDEX (FRQ_ST_VARIANCE); - frq.n_stats++; - } - else if (lex_match_id (lexer, "KURTOSIS")) - { - frq.stats |= BIT_INDEX (FRQ_ST_KURTOSIS); - frq.n_stats++; - } - else if (lex_match_id (lexer, "SKEWNESS")) - { - frq.stats |= BIT_INDEX (FRQ_ST_SKEWNESS); - frq.n_stats++; - } - else if (lex_match_id (lexer, "RANGE")) - { - frq.stats |= BIT_INDEX (FRQ_ST_RANGE); - frq.n_stats++; - } - else if (lex_match_id (lexer, "MINIMUM")) - { - frq.stats |= BIT_INDEX (FRQ_ST_MINIMUM); - frq.n_stats++; - } - else if (lex_match_id (lexer, "MAXIMUM")) - { - frq.stats |= BIT_INDEX (FRQ_ST_MAXIMUM); - frq.n_stats++; - } - else if (lex_match_id (lexer, "SUM")) - { - frq.stats |= BIT_INDEX (FRQ_ST_SUM); - frq.n_stats++; - } - else if (lex_match_id (lexer, "SESKEWNESS")) - { - frq.stats |= BIT_INDEX (FRQ_ST_SESKEWNESS); - frq.n_stats++; - } - else if (lex_match_id (lexer, "SEKURTOSIS")) - { - frq.stats |= BIT_INDEX (FRQ_ST_SEKURTOSIS); - frq.n_stats++; - } - else if (lex_match_id (lexer, "NONE")) - { - frq.stats = 0; - frq.n_stats = 0; - } + if (lex_match_id (lexer, "DEFAULT")) + frq.stats = DEFAULT_STATS; else if (lex_match (lexer, T_ALL)) - { - frq.stats = ~0; - frq.n_stats = FRQ_ST_count; - } + frq.stats = (1 << FRQ_ST_count) - 1; + else if (lex_match_id (lexer, "NONE")) + frq.stats = 0; else { - lex_error (lexer, NULL); - goto error; +#define S(ENUM, KEYWORD, NAME) KEYWORD, + lex_error_expecting (lexer, + STATISTICS + "DEFAULT", "ALL", "NONE"); +#undef S + goto done; } + + next:; } + + if (lex_ofs (lexer) == ofs) + frq.stats = DEFAULT_STATS; } else if (lex_match_id (lexer, "PERCENTILES")) { @@ -762,22 +659,11 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { - if (lex_force_num (lexer)) - { - frq.percentiles = - xrealloc (frq.percentiles, - (frq.n_percentiles + 1) - * sizeof (*frq.percentiles)); - frq.percentiles[frq.n_percentiles].p = lex_number (lexer) / 100.0; - frq.percentiles[frq.n_percentiles].show = true; - lex_get (lexer); - frq.n_percentiles++; - } - else - { - lex_error (lexer, NULL); - goto error; - } + if (!lex_force_num_range_closed (lexer, "PERCENTILES", 0, 100)) + goto done; + add_percentile (&frq, lex_number (lexer) / 100.0, true, + &allocated_percentiles); + lex_get (lexer); lex_match (lexer, T_COMMA); } } @@ -791,41 +677,33 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) { } else if (lex_match_id (lexer, "NOTABLE")) - { - frq.max_categories = 0; - } + frq.max_categories = 0; else if (lex_match_id (lexer, "LIMIT")) { if (!lex_force_match (lexer, T_LPAREN) || !lex_force_int_range (lexer, "LIMIT", 0, INT_MAX)) - goto error; + goto done; frq.max_categories = lex_integer (lexer); lex_get (lexer); if (!lex_force_match (lexer, T_RPAREN)) - goto error; + goto done; } else if (lex_match_id (lexer, "AVALUE")) - { - frq.sort = FRQ_AVALUE; - } + frq.sort = FRQ_AVALUE; else if (lex_match_id (lexer, "DVALUE")) - { - frq.sort = FRQ_DVALUE; - } + frq.sort = FRQ_DVALUE; else if (lex_match_id (lexer, "AFREQ")) - { - frq.sort = FRQ_AFREQ; - } + frq.sort = FRQ_AFREQ; else if (lex_match_id (lexer, "DFREQ")) - { - frq.sort = FRQ_DFREQ; - } + frq.sort = FRQ_DFREQ; else { - lex_error (lexer, NULL); - goto error; + lex_error_expecting (lexer, "TABLE", "NOTABLE", + "LIMIT", "AVALUE", "DVALUE", + "AFREQ", "DFREQ"); + goto done; } } } @@ -833,75 +711,57 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) { lex_match (lexer, T_EQUALS); - if (lex_force_int_range (lexer, "NTILES", 0, INT_MAX)) - { - int n = lex_integer (lexer); - lex_get (lexer); - for (int i = 0; i < n + 1; ++i) - { - frq.percentiles = - xrealloc (frq.percentiles, - (frq.n_percentiles + 1) - * sizeof (*frq.percentiles)); - frq.percentiles[frq.n_percentiles].p = - i / (double) n ; - frq.percentiles[frq.n_percentiles].show = true; - - frq.n_percentiles++; - } - } - else - { - lex_error (lexer, NULL); - goto error; - } + if (!lex_force_int_range (lexer, "NTILES", 0, INT_MAX)) + goto done; + + int n = lex_integer (lexer); + lex_get (lexer); + for (int i = 0; i < n + 1; ++i) + add_percentile (&frq, i / (double) n, true, &allocated_percentiles); } else if (lex_match_id (lexer, "ALGORITHM")) { lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "COMPATIBLE")) - { - settings_set_cmd_algorithm (COMPATIBLE); - } + settings_set_cmd_algorithm (COMPATIBLE); else if (lex_match_id (lexer, "ENHANCED")) - { - settings_set_cmd_algorithm (ENHANCED); - } + settings_set_cmd_algorithm (ENHANCED); else { - lex_error (lexer, NULL); - goto error; + lex_error_expecting (lexer, "COMPATIBLE", "ENHANCED"); + goto done; } } else if (lex_match_id (lexer, "HISTOGRAM")) { + double hi_min = -DBL_MAX; + double hi_max = DBL_MAX; + int hi_scale = FRQ_FREQ; + int hi_freq = INT_MIN; + int hi_pcnt = INT_MIN; + bool hi_draw_normal = false; + lex_match (lexer, T_EQUALS); - sbc_histogram = true; while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "NORMAL")) - { - hi_norm = FRQ_NORMAL; - } + hi_draw_normal = true; else if (lex_match_id (lexer, "NONORMAL")) - { - hi_norm = FRQ_NONORMAL; - } + hi_draw_normal = false; else if (lex_match_id (lexer, "FREQ")) { hi_scale = FRQ_FREQ; if (lex_match (lexer, T_LPAREN)) { - if (lex_force_int_range (lexer, "FREQ", 1, INT_MAX)) - { - hi_freq = lex_integer (lexer); - lex_get (lexer); - if (! lex_force_match (lexer, T_RPAREN)) - goto error; - } + if (!lex_force_int_range (lexer, "FREQ", 1, INT_MAX)) + goto done; + hi_freq = lex_integer (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto done; } } else if (lex_match_id (lexer, "PERCENT")) @@ -909,133 +769,152 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) hi_scale = FRQ_PERCENT; if (lex_match (lexer, T_LPAREN)) { - if (lex_force_int_range (lexer, "PERCENT", 1, INT_MAX)) - { - hi_pcnt = lex_integer (lexer); - lex_get (lexer); - if (! lex_force_match (lexer, T_RPAREN)) - goto error; - } + if (!lex_force_int_range (lexer, "PERCENT", 1, INT_MAX)) + goto done; + hi_pcnt = lex_integer (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto done; } } else if (lex_match_id (lexer, "MINIMUM")) { - if (! lex_force_match (lexer, T_LPAREN)) - goto error; - if (lex_force_num (lexer)) - { - hi_min = lex_number (lexer); - lex_get (lexer); - } - if (! lex_force_match (lexer, T_RPAREN)) - goto error; + if (!lex_force_match (lexer, T_LPAREN) + || !lex_force_num_range_closed (lexer, "MINIMUM", + -DBL_MAX, hi_max)) + goto done; + hi_min = lex_number (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto done; } else if (lex_match_id (lexer, "MAXIMUM")) { - if (! lex_force_match (lexer, T_LPAREN)) - goto error; - if (lex_force_num (lexer)) - { - hi_max = lex_number (lexer); - lex_get (lexer); - } - if (! lex_force_match (lexer, T_RPAREN)) - goto error; + if (!lex_force_match (lexer, T_LPAREN) + || !lex_force_num_range_closed (lexer, "MAXIMUM", + hi_min, DBL_MAX)) + goto done; + hi_max = lex_number (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto done; } else { - lex_error (lexer, NULL); - goto error; + lex_error_expecting (lexer, "NORMAL", "NONORMAL", + "FREQ", "PERCENT", "MINIMUM", "MAXIMUM"); + goto done; } } + + free (frq.hist); + frq.hist = xmalloc (sizeof *frq.hist); + *frq.hist = (struct frq_chart) { + .x_min = hi_min, + .x_max = hi_max, + .y_scale = hi_scale, + .y_max = hi_scale == FRQ_FREQ ? hi_freq : hi_pcnt, + .draw_normal = hi_draw_normal, + .include_missing = false, + }; + + add_percentile (&frq, .25, false, &allocated_percentiles); + add_percentile (&frq, .75, false, &allocated_percentiles); } else if (lex_match_id (lexer, "PIECHART")) { + double pie_min = -DBL_MAX; + double pie_max = DBL_MAX; + bool pie_missing = true; + lex_match (lexer, T_EQUALS); while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "MINIMUM")) { - if (! lex_force_match (lexer, T_LPAREN)) - goto error; - if (lex_force_num (lexer)) - { - pie_min = lex_number (lexer); - lex_get (lexer); - } - if (! lex_force_match (lexer, T_RPAREN)) - goto error; + if (!lex_force_match (lexer, T_LPAREN) + || !lex_force_num_range_closed (lexer, "MINIMUM", + -DBL_MAX, pie_max)) + goto done; + pie_min = lex_number (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto done; } else if (lex_match_id (lexer, "MAXIMUM")) { - if (! lex_force_match (lexer, T_LPAREN)) - goto error; - if (lex_force_num (lexer)) - { - pie_max = lex_number (lexer); - lex_get (lexer); - } - if (! lex_force_match (lexer, T_RPAREN)) - goto error; + if (!lex_force_match (lexer, T_LPAREN) + || !lex_force_num_range_closed (lexer, "MAXIMUM", + pie_min, DBL_MAX)) + goto done; + pie_max = lex_number (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto done; } else if (lex_match_id (lexer, "MISSING")) - { - pie_missing = true; - } + pie_missing = true; else if (lex_match_id (lexer, "NOMISSING")) - { - pie_missing = false; - } + pie_missing = false; else { - lex_error (lexer, NULL); - goto error; + lex_error_expecting (lexer, "MINIMUM", "MAXIMUM", + "MISSING", "NOMISSING"); + goto done; } } - sbc_piechart = true; - } + + free (frq.pie); + frq.pie = xmalloc (sizeof *frq.pie); + *frq.pie = (struct frq_chart) { + .x_min = pie_min, + .x_max = pie_max, + .include_missing = pie_missing, + }; + } else if (lex_match_id (lexer, "BARCHART")) { + double bar_min = -DBL_MAX; + double bar_max = DBL_MAX; + bool bar_freq = true; + lex_match (lexer, T_EQUALS); while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "MINIMUM")) { - if (! lex_force_match (lexer, T_LPAREN)) - goto error; - if (lex_force_num (lexer)) - { - bar_min = lex_number (lexer); - lex_get (lexer); - } - if (! lex_force_match (lexer, T_RPAREN)) - goto error; + if (!lex_force_match (lexer, T_LPAREN) + || !lex_force_num_range_closed (lexer, "MINIMUM", + -DBL_MAX, bar_max)) + goto done; + bar_min = lex_number (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto done; } else if (lex_match_id (lexer, "MAXIMUM")) { - if (! lex_force_match (lexer, T_LPAREN)) - goto error; - if (lex_force_num (lexer)) - { - bar_max = lex_number (lexer); - lex_get (lexer); - } - if (! lex_force_match (lexer, T_RPAREN)) - goto error; + if (!lex_force_match (lexer, T_LPAREN) + || !lex_force_num_range_closed (lexer, "MAXIMUM", + bar_min, DBL_MAX)) + goto done; + bar_max = lex_number (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto done; } else if (lex_match_id (lexer, "FREQ")) { if (lex_match (lexer, T_LPAREN)) { - if (lex_force_num (lexer)) - { - lex_number (lexer); - lex_get (lexer); - } - if (! lex_force_match (lexer, T_RPAREN)) - goto error; + if (!lex_force_num_range_open (lexer, "FREQ", 0, DBL_MAX)) + goto done; + /* XXX TODO */ + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto done; } bar_freq = true; } @@ -1043,23 +922,32 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) { if (lex_match (lexer, T_LPAREN)) { - if (lex_force_num (lexer)) - { - lex_number (lexer); - lex_get (lexer); - } - if (! lex_force_match (lexer, T_RPAREN)) - goto error; + if (!lex_force_num_range_open (lexer, "PERCENT", + 0, DBL_MAX)) + goto done; + /* XXX TODO */ + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto done; } bar_freq = false; } else { - lex_error (lexer, NULL); - goto error; + lex_error_expecting (lexer, "MINIMUM", "MAXIMUM", + "FREQ", "PERCENT"); + goto done; } } - sbc_barchart = true; + + free (frq.bar); + frq.bar = xmalloc (sizeof *frq.bar); + *frq.bar = (struct frq_chart) { + .x_min = bar_min, + .x_max = bar_max, + .include_missing = false, + .y_scale = bar_freq ? FRQ_FREQ : FRQ_PERCENT, + }; } else if (lex_match_id (lexer, "MISSING")) { @@ -1070,214 +958,100 @@ cmd_frequencies (struct lexer *lexer, struct dataset *ds) { if (lex_match_id (lexer, "EXCLUDE")) { + /* XXX TODO */ } else if (lex_match_id (lexer, "INCLUDE")) { + /* XXX TODO */ } else { - lex_error (lexer, NULL); - goto error; + lex_error_expecting (lexer, "EXCLUDE", "INCLUDE"); + goto done; } } } else if (lex_match_id (lexer, "ORDER")) { lex_match (lexer, T_EQUALS); - if (!lex_match_id (lexer, "ANALYSIS")) - lex_match_id (lexer, "VARIABLE"); + /* XXX TODO */ + if (!lex_match_id (lexer, "ANALYSIS") + && !lex_match_id (lexer, "VARIABLE")) + { + lex_error_expecting (lexer, "ANALYSIS", "VARIABLE"); + goto done; + } } else { - lex_error (lexer, NULL); - goto error; + lex_error_expecting (lexer, "STATISTICS", "PERCENTILES", "FORMAT", + "NTILES", "ALGORITHM", "HISTOGRAM", "PIECHART", + "BARCHART", "MISSING", "ORDER"); + goto done; } } if (frq.stats & BIT_INDEX (FRQ_ST_MEDIAN)) - { - frq.percentiles = - xrealloc (frq.percentiles, - (frq.n_percentiles + 1) - * sizeof (*frq.percentiles)); - - frq.percentiles[frq.n_percentiles].p = 0.50; - frq.percentiles[frq.n_percentiles].show = false; - - frq.n_percentiles++; - } - - -/* Figure out which charts the user requested. */ - - { - if (sbc_histogram) - { - struct frq_chart *hist; - - hist = frq.hist = xmalloc (sizeof *frq.hist); - hist->x_min = hi_min; - hist->x_max = hi_max; - hist->y_scale = hi_scale; - hist->y_max = hi_scale == FRQ_FREQ ? hi_freq : hi_pcnt; - hist->draw_normal = hi_norm != FRQ_NONORMAL; - hist->include_missing = false; - - if (hist->x_min != SYSMIS && hist->x_max != SYSMIS - && hist->x_min >= hist->x_max) - { - msg (SE, _("%s for histogram must be greater than or equal to %s, " - "but %s was specified as %.15g and %s as %.15g. " - "%s and %s will be ignored."), - "MAX", "MIN", - "MIN", hist->x_min, - "MAX", hist->x_max, - "MIN", "MAX"); - hist->x_min = hist->x_max = SYSMIS; - } - - frq.percentiles = - xrealloc (frq.percentiles, - (frq.n_percentiles + 2) - * sizeof (*frq.percentiles)); - - frq.percentiles[frq.n_percentiles].p = 0.25; - frq.percentiles[frq.n_percentiles].show = false; - - frq.percentiles[frq.n_percentiles + 1].p = 0.75; - frq.percentiles[frq.n_percentiles + 1].show = false; - - frq.n_percentiles+=2; - } - - if (sbc_barchart) - { - frq.bar = xmalloc (sizeof *frq.bar); - frq.bar->x_min = bar_min; - frq.bar->x_max = bar_max; - frq.bar->include_missing = false; - frq.bar->y_scale = bar_freq ? FRQ_FREQ : FRQ_PERCENT; - } - - if (sbc_piechart) - { - struct frq_chart *pie; - - pie = frq.pie = xmalloc (sizeof *frq.pie); - pie->x_min = pie_min; - pie->x_max = pie_max; - pie->include_missing = pie_missing; - - if (pie->x_min != SYSMIS && pie->x_max != SYSMIS - && pie->x_min >= pie->x_max) - { - msg (SE, _("%s for pie chart must be greater than or equal to %s, " - "but %s was specified as %.15g and %s as %.15g. " - "%s and %s will be ignored."), - "MAX", "MIN", - "MIN", pie->x_min, - "MAX", pie->x_max, - "MIN", "MAX"); - pie->x_min = pie->x_max = SYSMIS; - } - } - } - - { - int i,o; - double previous_p = -1; - qsort (frq.percentiles, frq.n_percentiles, - sizeof (*frq.percentiles), - ptile_3way); - - for (i = o = 0; i < frq.n_percentiles; ++i) - { - if (frq.percentiles[i].p != previous_p) - { - frq.percentiles[o].p = frq.percentiles[i].p; - frq.percentiles[o].show = frq.percentiles[i].show; - o++; - } - else if (frq.percentiles[i].show && - !frq.percentiles[o].show) - { - frq.percentiles[o].show = true; - } - previous_p = frq.percentiles[i].p; - } + add_percentile (&frq, .5, false, &allocated_percentiles); - frq.n_percentiles = o; + if (frq.n_percentiles > 0) + { + qsort (frq.percentiles, frq.n_percentiles, sizeof *frq.percentiles, + percentile_compare_3way); - frq.median = NULL; - for (i = 0; i < frq.n_percentiles; i++) - if (frq.percentiles[i].p == 0.5) + /* Combine equal percentiles. */ + size_t o = 1; + for (int i = 1; i < frq.n_percentiles; ++i) { - frq.median = &frq.percentiles[i]; - break; + struct percentile *prev = &frq.percentiles[o - 1]; + struct percentile *this = &frq.percentiles[i]; + if (this->p != prev->p) + frq.percentiles[o++] = *this; + else if (this->show) + prev->show = true; } - } + frq.n_percentiles = o; - { - struct casegrouper *grouper; - struct casereader *group; - bool ok; - - grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); - while (casegrouper_get_next_group (grouper, &group)) - { - struct ccase *c; - precalc (&frq, group, ds); - - for (; (c = casereader_read (group)) != NULL; case_unref (c)) - calc (&frq, c, ds); - postcalc (&frq, ds); - casereader_destroy (group); - } - ok = casegrouper_destroy (grouper); - ok = proc_commit (ds) && ok; - } - - - free (vars); - free (frq.vars); - free (frq.bar); - free (frq.pie); - free (frq.hist); - free (frq.percentiles); - pool_destroy (frq.pool); - - return CMD_SUCCESS; + for (size_t i = 0; i < frq.n_percentiles; i++) + if (frq.percentiles[i].p == 0.5) + { + frq.median_idx = i; + break; + } + } - error: + frq_run (&frq, ds); + ok = true; +done: free (vars); + for (size_t i = 0; i < frq.n_vars; i++) + free (frq.vars[i].percentiles); free (frq.vars); free (frq.bar); free (frq.pie); free (frq.hist); free (frq.percentiles); - pool_destroy (frq.pool); - return CMD_FAILURE; + return ok ? CMD_SUCCESS : CMD_FAILURE; } static double -calculate_iqr (const struct frq_proc *frq) +calculate_iqr (const struct frq_proc *frq, const struct var_freqs *vf) { double q1 = SYSMIS; double q3 = SYSMIS; - int i; /* This cannot work unless the 25th and 75th percentile are calculated */ assert (frq->n_percentiles >= 2); - for (i = 0; i < frq->n_percentiles; i++) + for (int i = 0; i < frq->n_percentiles; i++) { struct percentile *pc = &frq->percentiles[i]; if (fabs (0.25 - pc->p) < DBL_EPSILON) - q1 = pc->value; + q1 = vf->percentiles[i]; else if (fabs (0.75 - pc->p) < DBL_EPSILON) - q3 = pc->value; + q3 = vf->percentiles[i]; } return q1 == SYSMIS || q3 == SYSMIS ? SYSMIS : q3 - q1; @@ -1288,7 +1062,7 @@ chart_includes_value (const struct frq_chart *chart, const struct variable *var, const union value *value) { - if (!chart->include_missing && var_is_value_missing (var, value, MV_ANY)) + if (!chart->include_missing && var_is_value_missing (var, value)) return false; if (var_is_numeric (var) @@ -1301,24 +1075,17 @@ chart_includes_value (const struct frq_chart *chart, /* Create a gsl_histogram from a freq_tab */ static struct histogram * -freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft, - const struct variable *var) +freq_tab_to_hist (const struct frq_proc *frq, const struct var_freqs *vf) { - double x_min, x_max, valid_freq; - int i; - double bin_width; - struct histogram *histogram; - double iqr; - /* Find out the extremes of the x value, within the range to be included in the histogram, and sum the total frequency of those values. */ - x_min = DBL_MAX; - x_max = -DBL_MAX; - valid_freq = 0; - for (i = 0; i < ft->n_valid; i++) + double x_min = DBL_MAX; + double x_max = -DBL_MAX; + double valid_freq = 0; + for (size_t i = 0; i < vf->tab.n_valid; i++) { - const struct freq *f = &ft->valid[i]; - if (chart_includes_value (frq->hist, var, f->values)) + const struct freq *f = &vf->tab.valid[i]; + if (chart_includes_value (frq->hist, vf->var, f->values)) { x_min = MIN (x_min, f->values[0].f); x_max = MAX (x_max, f->values[0].f); @@ -1329,25 +1096,21 @@ freq_tab_to_hist (const struct frq_proc *frq, const struct freq_tab *ft, if (valid_freq <= 0) return NULL; - iqr = calculate_iqr (frq); - - if (iqr > 0) - /* Freedman-Diaconis' choice of bin width. */ - bin_width = 2 * iqr / pow (valid_freq, 1.0 / 3.0); - - else - /* Sturges Rule */ - bin_width = (x_max - x_min) / (1 + log2 (valid_freq)); + double iqr = calculate_iqr (frq, vf); - histogram = histogram_create (bin_width, x_min, x_max); + double bin_width = + (iqr > 0 + ? 2 * iqr / pow (valid_freq, 1.0 / 3.0) /* Freedman-Diaconis. */ + : (x_max - x_min) / (1 + log2 (valid_freq))); /* Sturges */ + struct histogram *histogram = histogram_create (bin_width, x_min, x_max); if (histogram == NULL) return NULL; - for (i = 0; i < ft->n_valid; i++) + for (size_t i = 0; i < vf->tab.n_valid; i++) { - const struct freq *f = &ft->valid[i]; - if (chart_includes_value (frq->hist, var, f->values)) + const struct freq *f = &vf->tab.valid[i]; + if (chart_includes_value (frq->hist, vf->var, f->values)) histogram_add (histogram, f->values[0].f, f->count); } @@ -1366,26 +1129,19 @@ pick_cat_counts (const struct frq_chart *catchart, int *n_slicesp) { int n_slices = 0; - int i; struct freq *slices = xnmalloc (frq_tab->n_valid + frq_tab->n_missing, sizeof *slices); - for (i = 0; i < frq_tab->n_valid; i++) + for (size_t i = 0; i < frq_tab->n_valid; i++) { - const struct freq *f = &frq_tab->valid[i]; - if (f->count > catchart->x_max) - continue; - - if (f->count < catchart->x_min) - continue; - - slices[n_slices] = *f; - - n_slices++; + struct freq *f = &frq_tab->valid[i]; + if (f->count >= catchart->x_min && f->count <= catchart->x_max) + slices[n_slices++] = *f; } + if (catchart->include_missing) { - for (i = 0; i < frq_tab->n_missing; i++) + for (size_t i = 0; i < frq_tab->n_missing; i++) { const struct freq *f = &frq_tab->missing[i]; slices[n_slices].count += f->count; @@ -1414,48 +1170,35 @@ pick_cat_counts_ptr (const struct frq_chart *catchart, int *n_slicesp) { int n_slices = 0; - int i; struct freq **slices = xnmalloc (frq_tab->n_valid + frq_tab->n_missing, sizeof *slices); - for (i = 0; i < frq_tab->n_valid; i++) + for (size_t i = 0; i < frq_tab->n_valid; i++) { struct freq *f = &frq_tab->valid[i]; - if (f->count > catchart->x_max) - continue; - - if (f->count < catchart->x_min) - continue; - - slices[n_slices] = f; - - n_slices++; + if (f->count >= catchart->x_min && f->count <= catchart->x_max) + slices[n_slices++] = f; } if (catchart->include_missing) - { - for (i = 0; i < frq_tab->n_missing; i++) - { - const struct freq *f = &frq_tab->missing[i]; - if (i == 0) - { - slices[n_slices] = xmalloc (sizeof (struct freq)); - slices[n_slices]->values[0] = f->values[0]; - } - - slices[n_slices]->count += f->count; + for (size_t i = 0; i < frq_tab->n_missing; i++) + { + const struct freq *f = &frq_tab->missing[i]; + if (i == 0) + { + slices[n_slices] = xmalloc (sizeof *slices[n_slices]); + slices[n_slices]->values[0] = f->values[0]; + } - } - } + slices[n_slices]->count += f->count; + } *n_slicesp = n_slices; return slices; } - - static void -do_piechart(const struct frq_chart *pie, const struct variable *var, - const struct freq_tab *frq_tab) +do_piechart (const struct frq_chart *pie, const struct variable *var, + const struct freq_tab *frq_tab) { int n_slices; struct freq *slices = pick_cat_counts (pie, frq_tab, &n_slices); @@ -1472,10 +1215,9 @@ do_piechart(const struct frq_chart *pie, const struct variable *var, free (slices); } - static void -do_barchart(const struct frq_chart *bar, const struct variable **var, - const struct freq_tab *frq_tab) +do_barchart (const struct frq_chart *bar, const struct variable **var, + const struct freq_tab *frq_tab) { int n_slices; struct freq **slices = pick_cat_counts_ptr (bar, frq_tab, &n_slices); @@ -1483,14 +1225,14 @@ do_barchart(const struct frq_chart *bar, const struct variable **var, if (n_slices < 1) msg (SW, _("Omitting bar chart, which has no values.")); else - chart_submit (barchart_create (var, 1, - (bar->y_scale == FRQ_FREQ) ? _("Count") : _("Percent"), - (bar->y_scale == FRQ_PERCENT), - slices, n_slices)); + chart_submit (barchart_create ( + var, 1, + bar->y_scale == FRQ_FREQ ? _("Count") : _("Percent"), + bar->y_scale == FRQ_PERCENT, + slices, n_slices)); free (slices); } - /* Calculates all the pertinent statistics for VF, putting them in array D[]. */ static void @@ -1498,39 +1240,30 @@ calc_stats (const struct frq_proc *frq, const struct var_freqs *vf, double d[FRQ_ST_count]) { const struct freq_tab *ft = &vf->tab; - double W = ft->valid_cases; - const struct freq *f; - struct moments *m; + + /* Calculate the mode. If there is more than one mode, we take the + smallest. */ int most_often = -1; double X_mode = SYSMIS; - - /* Calculate the mode. */ - for (f = ft->valid; f < ft->missing; f++) - { - if (most_often < f->count) - { - most_often = f->count; - X_mode = f->values[0].f; - } - else if (most_often == f->count) - { - /* A duplicate mode is undefined. - FIXME: keep track of *all* the modes. */ - X_mode = SYSMIS; - } - } + for (const struct freq *f = ft->valid; f < ft->missing; f++) + if (most_often < f->count) + { + most_often = f->count; + X_mode = f->values[0].f; + } /* Calculate moments. */ - m = moments_create (MOMENT_KURTOSIS); - for (f = ft->valid; f < ft->missing; f++) + struct moments *m = moments_create (MOMENT_KURTOSIS); + for (const struct freq *f = ft->valid; f < ft->missing; f++) moments_pass_one (m, f->values[0].f, f->count); - for (f = ft->valid; f < ft->missing; f++) + for (const struct freq *f = ft->valid; f < ft->missing; f++) moments_pass_two (m, f->values[0].f, f->count); moments_calculate (m, NULL, &d[FRQ_ST_MEAN], &d[FRQ_ST_VARIANCE], &d[FRQ_ST_SKEWNESS], &d[FRQ_ST_KURTOSIS]); moments_destroy (m); /* Formulae below are taken from _SPSS Statistical Algorithms_. */ + double W = ft->valid_cases; if (ft->n_valid > 0) { d[FRQ_ST_MINIMUM] = ft->valid[0].values[0].f; @@ -1549,7 +1282,9 @@ calc_stats (const struct frq_proc *frq, const struct var_freqs *vf, d[FRQ_ST_SEMEAN] = d[FRQ_ST_STDDEV] / sqrt (W); d[FRQ_ST_SESKEWNESS] = calc_seskew (W); d[FRQ_ST_SEKURTOSIS] = calc_sekurt (W); - d[FRQ_ST_MEDIAN] = frq->median ? frq->median->value : SYSMIS; + d[FRQ_ST_MEDIAN] = (frq->median_idx != SIZE_MAX + ? vf->percentiles[frq->median_idx] + : SYSMIS); } static bool @@ -1561,19 +1296,31 @@ all_string_variables (const struct frq_proc *frq) return true; } + +struct frq_stats_table + { + struct pivot_table *table; + struct pivot_splits *splits; + }; /* Displays a table of all the statistics requested. */ -static void -dump_statistics (const struct frq_proc *frq, const struct variable *wv) +static struct frq_stats_table * +frq_stats_table_create (const struct frq_proc *frq, + const struct dictionary *dict, + const struct variable *wv) { if (all_string_variables (frq)) - return; + return NULL; struct pivot_table *table = pivot_table_create (N_("Statistics")); pivot_table_set_weight_var (table, wv); struct pivot_dimension *variables = pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Variables")); + for (size_t i = 0; i < frq->n_vars; i++) + if (!var_is_alpha (frq->vars[i].var)) + pivot_category_create_leaf (variables->root, + pivot_value_new_variable (frq->vars[i].var)); struct pivot_dimension *statistics = pivot_dimension_create ( table, PIVOT_AXIS_ROW, N_("Statistics")); @@ -1585,7 +1332,7 @@ dump_statistics (const struct frq_proc *frq, const struct variable *wv) for (int i = 0; i < FRQ_ST_count; i++) if (frq->stats & BIT_INDEX (i)) pivot_category_create_leaf (statistics->root, - pivot_value_new_text (st_name[i])); + pivot_value_new_text (st_names[i])); struct pivot_category *percentiles = NULL; for (size_t i = 0; i < frq->n_percentiles; i++) { @@ -1601,6 +1348,30 @@ dump_statistics (const struct frq_proc *frq, const struct variable *wv) pc->p * 100.0)); } + struct pivot_splits *splits = pivot_splits_create (table, PIVOT_AXIS_COLUMN, + dict); + + struct frq_stats_table *fst = xmalloc (sizeof *fst); + *fst = (struct frq_stats_table) { .table = table, .splits = splits }; + return fst; +} + +static struct frq_stats_table * +frq_stats_table_submit (struct frq_stats_table *fst, + const struct frq_proc *frq, + const struct dictionary *dict, + const struct variable *wv, + const struct ccase *example) +{ + if (!fst) + { + fst = frq_stats_table_create (frq, dict, wv); + if (!fst) + return NULL; + } + pivot_splits_new_split (fst->splits, example); + + int var_idx = 0; for (size_t i = 0; i < frq->n_vars; i++) { struct var_freqs *vf = &frq->vars[i]; @@ -1609,13 +1380,10 @@ dump_statistics (const struct frq_proc *frq, const struct variable *wv) const struct freq_tab *ft = &vf->tab; - int var_idx = pivot_category_create_leaf ( - variables->root, pivot_value_new_variable (vf->var)); - int row = 0; - pivot_table_put2 (table, var_idx, row++, + pivot_splits_put2 (fst->splits, fst->table, var_idx, row++, pivot_value_new_number (ft->valid_cases)); - pivot_table_put2 (table, var_idx, row++, + pivot_splits_put2 (fst->splits, fst->table, var_idx, row++, pivot_value_new_number ( ft->total_cases - ft->valid_cases)); @@ -1631,7 +1399,7 @@ dump_statistics (const struct frq_proc *frq, const struct variable *wv) = (j == FRQ_ST_MODE || j == FRQ_ST_MINIMUM || j == FRQ_ST_MAXIMUM ? pivot_value_new_var_value (vf->var, &v) : pivot_value_new_number (v.f)); - pivot_table_put2 (table, var_idx, row++, pv); + pivot_splits_put2 (fst->splits, fst->table, var_idx, row++, pv); } for (size_t j = 0; j < frq->n_percentiles; j++) @@ -1640,11 +1408,31 @@ dump_statistics (const struct frq_proc *frq, const struct variable *wv) if (!pc->show) continue; - union value v = { .f = vf->tab.n_valid ? pc->value : SYSMIS }; - pivot_table_put2 (table, var_idx, row++, - pivot_value_new_var_value (vf->var, &v)); + union value v = { + .f = vf->tab.n_valid ? vf->percentiles[j] : SYSMIS + }; + pivot_splits_put2 (fst->splits, fst->table, var_idx, row++, + pivot_value_new_var_value (vf->var, &v)); } + + var_idx++; } - pivot_table_submit (table); + if (!fst->splits) + { + frq_stats_table_destroy (fst); + return NULL; + } + return fst; +} + +static void +frq_stats_table_destroy (struct frq_stats_table *fst) +{ + if (!fst) + return; + + pivot_table_submit (fst->table); + pivot_splits_destroy (fst->splits); + free (fst); }