X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ffrequencies.q;h=535e69070f8e2761d09c7ef245f6475a68d5af57;hb=08e4a9535a7e1de9cc4b6e619e70805ab951b323;hp=929f987aa093a2c6609f1c5e3fe769feaa08305e;hpb=3816248a008a4af75aac6319d0c9929cb7ff679e;p=pspp-builds.git diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index 929f987a..535e6907 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -1,6 +1,5 @@ /* PSPP - computes sample statistics. Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -31,6 +30,7 @@ #include #include +#include #include #include #include @@ -58,6 +58,8 @@ #include #include +#include "freq.h" + #include "minmax.h" #include "gettext.h" @@ -203,13 +205,6 @@ static struct pool *gen_pool; /* General mode. */ /* Frequency tables. */ -/* Frequency table entry. */ -struct freq - { - union value *v; /* The value. */ - double c; /* The number of occurrences of the value. */ - }; - /* Types of frequency tables. */ enum { @@ -269,9 +264,7 @@ struct var_freqs static inline struct var_freqs * get_var_freqs (const struct variable *v) { - assert (v != NULL); - assert (v->aux != NULL); - return v->aux; + return var_get_aux (v); } static void determine_charts (void); @@ -288,7 +281,6 @@ static void dump_condensed (struct variable *); static void dump_statistics (struct variable *, int show_varname); static void cleanup_freq_tab (struct variable *); -static hsh_hash_func hash_value_numeric, hash_value_alpha; static hsh_compare_func compare_value_numeric_a, compare_value_alpha_a; static hsh_compare_func compare_value_numeric_d, compare_value_alpha_d; static hsh_compare_func compare_freq_numeric_a, compare_freq_alpha_a; @@ -516,7 +508,7 @@ calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) for (i = 0; i < n_variables; i++) { const struct variable *v = v_variables[i]; - const union value *val = case_data (c, v->fv); + const union value *val = case_data (c, v); struct var_freqs *vf = get_var_freqs (v); struct freq_tab *ft = &vf->tab; @@ -528,16 +520,16 @@ calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) struct freq target; struct freq **fpp; - target.v = (union value *) val; + target.value = (union value *) val; fpp = (struct freq **) hsh_probe (ft->data, &target); if (*fpp != NULL) - (*fpp)->c += weight; + (*fpp)->count += weight; else { struct freq *fp = pool_alloc (gen_pool, sizeof *fp); - fp->c = weight; - fp->v = pool_clone (gen_pool, + fp->count = weight; + fp->value = pool_clone (gen_pool, val, MAX (MAX_SHORT_STRING, vf->width)); *fpp = fp; } @@ -582,20 +574,7 @@ precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds) if (ft->mode == FRQM_GENERAL) { - hsh_hash_func *hash; - hsh_compare_func *compare; - - if (v->type == NUMERIC) - { - hash = hash_value_numeric; - compare = compare_value_numeric_a; - } - else - { - hash = hash_value_alpha; - compare = compare_value_alpha_a; - } - ft->data = hsh_create (16, compare, hash, NULL, v); + ft->data = hsh_create (16, compare_freq, hash_freq, NULL, v); } else { @@ -695,23 +674,22 @@ postcalc (void *aux UNUSED, const struct dataset *ds UNUSED) sorting a frequency table by FRQ_SORT using VAR_TYPE variables. */ static hsh_compare_func * -get_freq_comparator (int frq_sort, int var_type) +get_freq_comparator (int frq_sort, enum var_type var_type) { - /* Note that q2c generates tags beginning with 1000. */ - switch (frq_sort | (var_type << 16)) + bool is_numeric = var_type == VAR_NUMERIC; + switch (frq_sort) { - case FRQ_AVALUE | (NUMERIC << 16): return compare_value_numeric_a; - case FRQ_AVALUE | (ALPHA << 16): return compare_value_alpha_a; - case FRQ_DVALUE | (NUMERIC << 16): return compare_value_numeric_d; - case FRQ_DVALUE | (ALPHA << 16): return compare_value_alpha_d; - case FRQ_AFREQ | (NUMERIC << 16): return compare_freq_numeric_a; - case FRQ_AFREQ | (ALPHA << 16): return compare_freq_alpha_a; - case FRQ_DFREQ | (NUMERIC << 16): return compare_freq_numeric_d; - case FRQ_DFREQ | (ALPHA << 16): return compare_freq_alpha_d; - default: NOT_REACHED (); + case FRQ_AVALUE: + return is_numeric ? compare_value_numeric_a : compare_value_alpha_a; + case FRQ_DVALUE: + return is_numeric ? compare_value_numeric_d : compare_value_alpha_d; + case FRQ_AFREQ: + return is_numeric ? compare_freq_numeric_a : compare_freq_alpha_a; + case FRQ_DFREQ: + return is_numeric ? compare_freq_numeric_d : compare_freq_alpha_d; + default: + NOT_REACHED (); } - - return 0; } /* Returns true iff the value in struct freq F is non-missing @@ -722,7 +700,7 @@ not_missing (const void *f_, const void *v_) const struct freq *f = f_; const struct variable *v = v_; - return !mv_is_value_missing (&v->miss, f->v); + return !var_is_value_missing (v, f->value); } /* Summarizes the frequency table data for variable V. */ @@ -738,7 +716,7 @@ postprocess_freq_tab (struct variable *v) ft = &get_var_freqs (v)->tab; assert (ft->mode == FRQM_GENERAL); - compare = get_freq_comparator (cmd.sort, v->type); + compare = get_freq_comparator (cmd.sort, var_get_type (v)); /* Extract data from hash table. */ count = hsh_count (ft->data); @@ -767,7 +745,7 @@ postprocess_freq_tab (struct variable *v) for(i = 0 ; i < ft->n_valid ; ++i ) { f = &ft->valid[i]; - ft->valid_cases += f->c; + ft->valid_cases += f->count; } @@ -775,7 +753,7 @@ postprocess_freq_tab (struct variable *v) for(i = 0 ; i < ft->n_missing ; ++i ) { f = &ft->missing[i]; - ft->total_cases += f->c; + ft->total_cases += f->count; } } @@ -840,16 +818,16 @@ frq_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_freque struct variable *v = v_variables[i]; struct var_freqs *vf; - if (v->aux != NULL) + if (var_get_aux (v) != NULL) { msg (SE, _("Variable %s specified multiple times on VARIABLES " - "subcommand."), v->name); + "subcommand."), var_get_name (v)); return 0; } - if (mode == FRQM_INTEGER && v->type != NUMERIC) + if (mode == FRQM_INTEGER && !var_is_numeric (v)) { msg (SE, _("Integer mode specified, but %s is not a numeric " - "variable."), v->name); + "variable."), var_get_name (v)); return 0; } @@ -867,12 +845,13 @@ frq_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_freque vf->tab.vector = NULL; vf->n_groups = 0; vf->groups = NULL; - vf->width = v->width; - vf->print = v->print; + vf->width = var_get_width (v); + vf->print = *var_get_print_format (v); if (vf->width > MAX_SHORT_STRING && get_algorithm () == COMPATIBLE) { + enum fmt_type type = var_get_print_format (v)->type; vf->width = MAX_SHORT_STRING; - vf->print.w = MAX_SHORT_STRING * (v->print.type == FMT_AHEX ? 2 : 1); + vf->print.w = MAX_SHORT_STRING * (type == FMT_AHEX ? 2 : 1); } } return 1; @@ -932,16 +911,16 @@ frq_custom_grouped (struct lexer *lexer, struct dataset *ds, struct cmd_frequenc } for (i = 0; i < n; i++) - if (v[i]->aux == NULL) + if (var_get_aux (v[i]) == NULL) msg (SE, _("Variables %s specified on GROUPED but not on " - "VARIABLES."), v[i]->name); + "VARIABLES."), var_get_name (v[i])); else { struct var_freqs *vf = get_var_freqs (v[i]); if (vf->groups != NULL) msg (SE, _("Variables %s specified multiple times on GROUPED " - "subcommand."), v[i]->name); + "subcommand."), var_get_name (v[i])); else { vf->n_groups = nl; @@ -995,25 +974,6 @@ add_percentile (double x) /* Comparison functions. */ -/* Hash of numeric values. */ -static unsigned -hash_value_numeric (const void *value_, const void *aux UNUSED) -{ - const struct freq *value = value_; - return hsh_hash_double (value->v[0].f); -} - -/* Hash of string values. */ -static unsigned -hash_value_alpha (const void *value_, const void *v_) -{ - const struct freq *value = value_; - const struct variable *v = v_; - struct var_freqs *vf = get_var_freqs (v); - - return hsh_hash_bytes (value->v[0].s, vf->width); -} - /* Ascending numeric compare of values. */ static int compare_value_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) @@ -1021,9 +981,9 @@ compare_value_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) const struct freq *a = a_; const struct freq *b = b_; - if (a->v[0].f > b->v[0].f) + if (a->value[0].f > b->value[0].f) return 1; - else if (a->v[0].f < b->v[0].f) + else if (a->value[0].f < b->value[0].f) return -1; else return 0; @@ -1038,7 +998,7 @@ compare_value_alpha_a (const void *a_, const void *b_, const void *v_) const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); - return memcmp (a->v[0].s, b->v[0].s, vf->width); + return memcmp (a->value[0].s, b->value[0].s, vf->width); } /* Descending numeric compare of values. */ @@ -1063,14 +1023,14 @@ compare_freq_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) const struct freq *a = a_; const struct freq *b = b_; - if (a->c > b->c) + if (a->count > b->count) return 1; - else if (a->c < b->c) + else if (a->count < b->count) return -1; - if (a->v[0].f > b->v[0].f) + if (a->value[0].f > b->value[0].f) return 1; - else if (a->v[0].f < b->v[0].f) + else if (a->value[0].f < b->value[0].f) return -1; else return 0; @@ -1086,12 +1046,12 @@ compare_freq_alpha_a (const void *a_, const void *b_, const void *v_) const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); - if (a->c > b->c) + if (a->count > b->count) return 1; - else if (a->c < b->c) + else if (a->count < b->count) return -1; else - return memcmp (a->v[0].s, b->v[0].s, vf->width); + return memcmp (a->value[0].s, b->value[0].s, vf->width); } /* Descending numeric compare of frequency; @@ -1102,14 +1062,14 @@ compare_freq_numeric_d (const void *a_, const void *b_, const void *aux UNUSED) const struct freq *a = a_; const struct freq *b = b_; - if (a->c > b->c) + if (a->count > b->count) return -1; - else if (a->c < b->c) + else if (a->count < b->count) return 1; - if (a->v[0].f > b->v[0].f) + if (a->value[0].f > b->value[0].f) return 1; - else if (a->v[0].f < b->v[0].f) + else if (a->value[0].f < b->value[0].f) return -1; else return 0; @@ -1125,12 +1085,12 @@ compare_freq_alpha_d (const void *a_, const void *b_, const void *v_) const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); - if (a->c > b->c) + if (a->count > b->count) return -1; - else if (a->c < b->c) + else if (a->count < b->count) return 1; else - return memcmp (a->v[0].s, b->v[0].s, vf->width); + return memcmp (a->value[0].s, b->value[0].s, vf->width); } /* Frequency table display. */ @@ -1144,9 +1104,9 @@ full_dim (struct tab_table *t, struct outp_driver *d) int i; if (lab) - t->w[0] = min (tab_natural_width (t, d, 0), d->prop_em_width * 15); + t->w[0] = MIN (tab_natural_width (t, d, 0), d->prop_em_width * 15); for (i = lab; i < lab + 5; i++) - t->w[i] = max (tab_natural_width (t, d, i), d->prop_em_width * 8); + t->w[i] = MAX (tab_natural_width (t, d, i), d->prop_em_width * 8); for (i = 0; i < t->nr; i++) t->h[i] = d->font_height; } @@ -1208,21 +1168,26 @@ dump_full (struct variable *v) { double percent, valid_percent; - cum_freq += f->c; + cum_freq += f->count; - percent = f->c / ft->total_cases * 100.0; - valid_percent = f->c / ft->valid_cases * 100.0; + percent = f->count / ft->total_cases * 100.0; + valid_percent = f->count / ft->valid_cases * 100.0; cum_total += valid_percent; if (lab) { - const char *label = val_labs_find (v->val_labs, f->v[0]); +#if 0 +<<<<<<< frequencies.q + const char *label = val_labs_find (v->val_labs, f->value[0]); +======= +#endif + const char *label = var_lookup_value_label (v, &f->value[0]); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1 + lab, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0 + lab, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1 + lab, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2 + lab, r, TAB_NONE, percent, 5, 1); tab_float (t, 3 + lab, r, TAB_NONE, valid_percent, 5, 1); tab_float (t, 4 + lab, r, TAB_NONE, cum_total, 5, 1); @@ -1230,19 +1195,19 @@ dump_full (struct variable *v) } for (; f < &ft->valid[n_categories]; f++) { - cum_freq += f->c; + cum_freq += f->count; if (lab) { - const char *label = val_labs_find (v->val_labs, f->v[0]); + const char *label = var_lookup_value_label (v, &f->value[0]); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1 + lab, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0 + lab, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1 + lab, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2 + lab, r, TAB_NONE, - f->c / ft->total_cases * 100.0, 5, 1); + f->count / ft->total_cases * 100.0, 5, 1); tab_text (t, 3 + lab, r, TAB_NONE, _("Missing")); r++; } @@ -1258,9 +1223,8 @@ dump_full (struct variable *v) tab_float (t, 2 + lab, r, TAB_NONE, 100.0, 5, 1); tab_float (t, 3 + lab, r, TAB_NONE, 100.0, 5, 1); - tab_title (t, "%s: %s", v->name, v->label ? v->label : ""); + tab_title (t, "%s", var_to_string (v)); tab_submit (t); - } /* Sets the widths of all the columns and heights of all the rows in @@ -1268,14 +1232,14 @@ dump_full (struct variable *v) static void condensed_dim (struct tab_table *t, struct outp_driver *d) { - int cum_w = max (outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL), - max (outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL), + int cum_w = MAX (outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL), + MAX (outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL), outp_string_width (d, "000", OUTP_PROPORTIONAL))); int i; for (i = 0; i < 2; i++) - t->w[i] = max (tab_natural_width (t, d, i), d->prop_em_width * 8); + t->w[i] = MAX (tab_natural_width (t, d, i), d->prop_em_width * 8); for (i = 2; i < 4; i++) t->w[i] = cum_w; for (i = 0; i < t->nr; i++) @@ -1312,21 +1276,21 @@ dump_condensed (struct variable *v) { double percent; - percent = f->c / ft->total_cases * 100.0; - cum_total += f->c / ft->valid_cases * 100.0; + percent = f->count / ft->total_cases * 100.0; + cum_total += f->count / ft->valid_cases * 100.0; - tab_value (t, 0, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2, r, TAB_NONE, percent, 3, 0); tab_float (t, 3, r, TAB_NONE, cum_total, 3, 0); r++; } for (; f < &ft->valid[n_categories]; f++) { - tab_value (t, 0, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2, r, TAB_NONE, - f->c / ft->total_cases * 100.0, 3, 0); + f->count / ft->total_cases * 100.0, 3, 0); r++; } @@ -1334,7 +1298,7 @@ dump_condensed (struct variable *v) cmd.spaces == FRQ_SINGLE ? -1 : TAL_GAP, TAL_1, 0, 0, 3, r - 1); tab_hline (t, TAL_2, 0, 3, 2); - tab_title (t, "%s: %s", v->name, v->label ? v->label : ""); + tab_title (t, "%s", var_to_string (v)); tab_columns (t, SOM_COL_DOWN, 1); tab_submit (t); } @@ -1389,7 +1353,7 @@ calc_stats (struct variable *v, double d[frq_n_stats]) { static double prev_value = SYSMIS; f = &ft->valid[idx]; - rank += f->c ; + rank += f->count ; for (i = 0; i < n_percentiles; i++) { double tp; @@ -1404,7 +1368,7 @@ calc_stats (struct variable *v, double d[frq_n_stats]) if ( percentiles[i].flag ) { - percentiles[i].x2 = f->v[0].f; + percentiles[i].x2 = f->value[0].f; percentiles[i].x1 = prev_value; percentiles[i].flag2 = 1; continue; @@ -1412,9 +1376,9 @@ calc_stats (struct variable *v, double d[frq_n_stats]) if (rank > tp ) { - if ( f->c > 1 && rank - (f->c - 1) > tp ) + if ( f->count > 1 && rank - (f->count - 1) > tp ) { - percentiles[i].x2 = percentiles[i].x1 = f->v[0].f; + percentiles[i].x2 = percentiles[i].x1 = f->value[0].f; percentiles[i].flag2 = 1; } else @@ -1425,14 +1389,14 @@ calc_stats (struct variable *v, double d[frq_n_stats]) continue; } } - prev_value = f->v[0].f; + prev_value = f->value[0].f; } for (i = 0; i < n_percentiles; i++) { /* Catches the case when p == 100% */ if ( ! percentiles[i].flag2 ) - percentiles[i].x1 = percentiles[i].x2 = f->v[0].f; + percentiles[i].x1 = percentiles[i].x2 = f->value[0].f; /* printf("percentile %d (p==%.2f); X1 = %g; X2 = %g\n", @@ -1468,12 +1432,12 @@ calc_stats (struct variable *v, double d[frq_n_stats]) X_mode = SYSMIS; for (f = ft->valid; f < ft->missing; f++) { - if (most_often < f->c) + if (most_often < f->count) { - most_often = f->c; - X_mode = f->v[0].f; + most_often = f->count; + X_mode = f->value[0].f; } - else if (most_often == f->c) + else if (most_often == f->count) { /* A duplicate mode is undefined. FIXME: keep track of *all* the modes. */ @@ -1484,16 +1448,16 @@ calc_stats (struct variable *v, double d[frq_n_stats]) /* Calculate moments. */ m = moments_create (MOMENT_KURTOSIS); for (f = ft->valid; f < ft->missing; f++) - moments_pass_one (m, f->v[0].f, f->c); + moments_pass_one (m, f->value[0].f, f->count); for (f = ft->valid; f < ft->missing; f++) - moments_pass_two (m, f->v[0].f, f->c); + moments_pass_two (m, f->value[0].f, f->count); moments_calculate (m, NULL, &d[frq_mean], &d[frq_variance], &d[frq_skew], &d[frq_kurt]); moments_destroy (m); /* Formulas below are taken from _SPSS Statistical Algorithms_. */ - d[frq_min] = ft->valid[0].v[0].f; - d[frq_max] = ft->valid[ft->n_valid - 1].v[0].f; + d[frq_min] = ft->valid[0].value[0].f; + d[frq_max] = ft->valid[ft->n_valid - 1].value[0].f; d[frq_mode] = X_mode; d[frq_range] = d[frq_max] - d[frq_min]; d[frq_median] = *median_value; @@ -1518,13 +1482,13 @@ dump_statistics (struct variable *v, int show_varname) if ( implicit_50th && n_percentiles > 0 ) --n_percentiles; - if (v->type == ALPHA) + if (var_is_alpha (v)) return; ft = &get_var_freqs (v)->tab; if (ft->n_valid == 0) { msg (SW, _("No valid data for variable %s; statistics not displayed."), - v->name); + var_get_name (v)); return; } calc_stats (v, stat_value); @@ -1571,12 +1535,7 @@ dump_statistics (struct variable *v, int show_varname) tab_columns (t, SOM_COL_DOWN, 1); if (show_varname) - { - if (v->label) - tab_title (t, "%s: %s", v->name, v->label); - else - tab_title (t, "%s", v->name); - } + tab_title (t, "%s", var_to_string (v)); else tab_flags (t, SOMF_NO_TITLE); @@ -1603,11 +1562,11 @@ freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) /* Find out the extremes of the x value */ for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) ) { - if ( mv_is_value_missing(&var->miss, frq->v)) + if ( var_is_value_missing(var, frq->value)) continue; - if ( frq->v[0].f < x_min ) x_min = frq->v[0].f ; - if ( frq->v[0].f > x_max ) x_max = frq->v[0].f ; + if ( frq->value[0].f < x_min ) x_min = frq->value[0].f ; + if ( frq->value[0].f > x_max ) x_max = frq->value[0].f ; } hist = histogram_create(bins, x_min, x_max); @@ -1615,7 +1574,7 @@ freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) for( i = 0 ; i < ft->n_valid ; ++i ) { frq = &ft->valid[i]; - gsl_histogram_accumulate(hist, frq->v[0].f, frq->c); + gsl_histogram_accumulate(hist, frq->value[0].f, frq->count); } return hist; @@ -1648,9 +1607,8 @@ freq_tab_to_slice_array(const struct freq_tab *frq_tab, { const struct freq *frq = &frq_tab->valid[i]; - slices[i].label = value_to_string(frq->v, var); - - slices[i].magnetude = frq->c; + slices[i].label = var_get_value_name (var, frq->value); + slices[i].magnetude = frq->count; } return slices;