X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ffrequencies.q;h=34223e104fea78361dbfec63e378dcc2ffed4c17;hb=13e7983c183d6448d09cd8ed9644351b676af4d3;hp=8256f3d8a450f3261ab741cfc0d4417475fd1a85;hpb=888d0f91d57e0c3c5a4206c30ac71eb87bf44227;p=pspp diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index 8256f3d8a4..34223e104f 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -1,6 +1,5 @@ /* PSPP - computes sample statistics. Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -31,6 +30,7 @@ #include #include +#include #include #include #include @@ -58,6 +58,8 @@ #include #include +#include "freq.h" + #include "minmax.h" #include "gettext.h" @@ -203,13 +205,6 @@ static struct pool *gen_pool; /* General mode. */ /* Frequency tables. */ -/* Frequency table entry. */ -struct freq - { - union value *v; /* The value. */ - double c; /* The number of occurrences of the value. */ - }; - /* Types of frequency tables. */ enum { @@ -269,9 +264,7 @@ struct var_freqs static inline struct var_freqs * get_var_freqs (const struct variable *v) { - assert (v != NULL); - assert (v->aux != NULL); - return v->aux; + return var_get_aux (v); } static void determine_charts (void); @@ -288,7 +281,6 @@ static void dump_condensed (struct variable *); static void dump_statistics (struct variable *, int show_varname); static void cleanup_freq_tab (struct variable *); -static hsh_hash_func hash_value_numeric, hash_value_alpha; static hsh_compare_func compare_value_numeric_a, compare_value_alpha_a; static hsh_compare_func compare_value_numeric_d, compare_value_alpha_d; static hsh_compare_func compare_freq_numeric_a, compare_freq_alpha_a; @@ -516,7 +508,7 @@ calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) for (i = 0; i < n_variables; i++) { const struct variable *v = v_variables[i]; - const union value *val = case_data (c, v->fv); + const union value *val = case_data (c, v); struct var_freqs *vf = get_var_freqs (v); struct freq_tab *ft = &vf->tab; @@ -528,16 +520,16 @@ calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) struct freq target; struct freq **fpp; - target.v = (union value *) val; + target.value = (union value *) val; fpp = (struct freq **) hsh_probe (ft->data, &target); if (*fpp != NULL) - (*fpp)->c += weight; + (*fpp)->count += weight; else { struct freq *fp = pool_alloc (gen_pool, sizeof *fp); - fp->c = weight; - fp->v = pool_clone (gen_pool, + fp->count = weight; + fp->value = pool_clone (gen_pool, val, MAX (MAX_SHORT_STRING, vf->width)); *fpp = fp; } @@ -582,20 +574,7 @@ precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds) if (ft->mode == FRQM_GENERAL) { - hsh_hash_func *hash; - hsh_compare_func *compare; - - if (var_is_numeric (v)) - { - hash = hash_value_numeric; - compare = compare_value_numeric_a; - } - else - { - hash = hash_value_alpha; - compare = compare_value_alpha_a; - } - ft->data = hsh_create (16, compare, hash, NULL, v); + ft->data = hsh_create (16, compare_freq, hash_freq, NULL, v); } else { @@ -695,23 +674,22 @@ postcalc (void *aux UNUSED, const struct dataset *ds UNUSED) sorting a frequency table by FRQ_SORT using VAR_TYPE variables. */ static hsh_compare_func * -get_freq_comparator (int frq_sort, int var_type) +get_freq_comparator (int frq_sort, enum var_type var_type) { - /* Note that q2c generates tags beginning with 1000. */ - switch (frq_sort | (var_type << 16)) + bool is_numeric = var_type == VAR_NUMERIC; + switch (frq_sort) { - case FRQ_AVALUE | (NUMERIC << 16): return compare_value_numeric_a; - case FRQ_AVALUE | (ALPHA << 16): return compare_value_alpha_a; - case FRQ_DVALUE | (NUMERIC << 16): return compare_value_numeric_d; - case FRQ_DVALUE | (ALPHA << 16): return compare_value_alpha_d; - case FRQ_AFREQ | (NUMERIC << 16): return compare_freq_numeric_a; - case FRQ_AFREQ | (ALPHA << 16): return compare_freq_alpha_a; - case FRQ_DFREQ | (NUMERIC << 16): return compare_freq_numeric_d; - case FRQ_DFREQ | (ALPHA << 16): return compare_freq_alpha_d; - default: NOT_REACHED (); + case FRQ_AVALUE: + return is_numeric ? compare_value_numeric_a : compare_value_alpha_a; + case FRQ_DVALUE: + return is_numeric ? compare_value_numeric_d : compare_value_alpha_d; + case FRQ_AFREQ: + return is_numeric ? compare_freq_numeric_a : compare_freq_alpha_a; + case FRQ_DFREQ: + return is_numeric ? compare_freq_numeric_d : compare_freq_alpha_d; + default: + NOT_REACHED (); } - - return 0; } /* Returns true iff the value in struct freq F is non-missing @@ -722,7 +700,7 @@ not_missing (const void *f_, const void *v_) const struct freq *f = f_; const struct variable *v = v_; - return !var_is_value_missing (v, f->v); + return !var_is_value_missing (v, f->value, MV_ANY); } /* Summarizes the frequency table data for variable V. */ @@ -767,7 +745,7 @@ postprocess_freq_tab (struct variable *v) for(i = 0 ; i < ft->n_valid ; ++i ) { f = &ft->valid[i]; - ft->valid_cases += f->c; + ft->valid_cases += f->count; } @@ -775,7 +753,7 @@ postprocess_freq_tab (struct variable *v) for(i = 0 ; i < ft->n_missing ; ++i ) { f = &ft->missing[i]; - ft->total_cases += f->c; + ft->total_cases += f->count; } } @@ -840,7 +818,7 @@ frq_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_freque struct variable *v = v_variables[i]; struct var_freqs *vf; - if (v->aux != NULL) + if (var_get_aux (v) != NULL) { msg (SE, _("Variable %s specified multiple times on VARIABLES " "subcommand."), var_get_name (v)); @@ -933,7 +911,7 @@ frq_custom_grouped (struct lexer *lexer, struct dataset *ds, struct cmd_frequenc } for (i = 0; i < n; i++) - if (v[i]->aux == NULL) + if (var_get_aux (v[i]) == NULL) msg (SE, _("Variables %s specified on GROUPED but not on " "VARIABLES."), var_get_name (v[i])); else @@ -996,25 +974,6 @@ add_percentile (double x) /* Comparison functions. */ -/* Hash of numeric values. */ -static unsigned -hash_value_numeric (const void *value_, const void *aux UNUSED) -{ - const struct freq *value = value_; - return hsh_hash_double (value->v[0].f); -} - -/* Hash of string values. */ -static unsigned -hash_value_alpha (const void *value_, const void *v_) -{ - const struct freq *value = value_; - const struct variable *v = v_; - struct var_freqs *vf = get_var_freqs (v); - - return hsh_hash_bytes (value->v[0].s, vf->width); -} - /* Ascending numeric compare of values. */ static int compare_value_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) @@ -1022,9 +981,9 @@ compare_value_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) const struct freq *a = a_; const struct freq *b = b_; - if (a->v[0].f > b->v[0].f) + if (a->value[0].f > b->value[0].f) return 1; - else if (a->v[0].f < b->v[0].f) + else if (a->value[0].f < b->value[0].f) return -1; else return 0; @@ -1039,7 +998,7 @@ compare_value_alpha_a (const void *a_, const void *b_, const void *v_) const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); - return memcmp (a->v[0].s, b->v[0].s, vf->width); + return memcmp (a->value[0].s, b->value[0].s, vf->width); } /* Descending numeric compare of values. */ @@ -1064,14 +1023,14 @@ compare_freq_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) const struct freq *a = a_; const struct freq *b = b_; - if (a->c > b->c) + if (a->count > b->count) return 1; - else if (a->c < b->c) + else if (a->count < b->count) return -1; - if (a->v[0].f > b->v[0].f) + if (a->value[0].f > b->value[0].f) return 1; - else if (a->v[0].f < b->v[0].f) + else if (a->value[0].f < b->value[0].f) return -1; else return 0; @@ -1087,12 +1046,12 @@ compare_freq_alpha_a (const void *a_, const void *b_, const void *v_) const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); - if (a->c > b->c) + if (a->count > b->count) return 1; - else if (a->c < b->c) + else if (a->count < b->count) return -1; else - return memcmp (a->v[0].s, b->v[0].s, vf->width); + return memcmp (a->value[0].s, b->value[0].s, vf->width); } /* Descending numeric compare of frequency; @@ -1103,14 +1062,14 @@ compare_freq_numeric_d (const void *a_, const void *b_, const void *aux UNUSED) const struct freq *a = a_; const struct freq *b = b_; - if (a->c > b->c) + if (a->count > b->count) return -1; - else if (a->c < b->c) + else if (a->count < b->count) return 1; - if (a->v[0].f > b->v[0].f) + if (a->value[0].f > b->value[0].f) return 1; - else if (a->v[0].f < b->v[0].f) + else if (a->value[0].f < b->value[0].f) return -1; else return 0; @@ -1126,12 +1085,12 @@ compare_freq_alpha_d (const void *a_, const void *b_, const void *v_) const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); - if (a->c > b->c) + if (a->count > b->count) return -1; - else if (a->c < b->c) + else if (a->count < b->count) return 1; else - return memcmp (a->v[0].s, b->v[0].s, vf->width); + return memcmp (a->value[0].s, b->value[0].s, vf->width); } /* Frequency table display. */ @@ -1141,13 +1100,19 @@ compare_freq_alpha_d (const void *a_, const void *b_, const void *v_) static void full_dim (struct tab_table *t, struct outp_driver *d) { - int lab = cmd.labels == FRQ_LABELS; - int i; + int i = 0; + int columns = 5; - if (lab) + if (cmd.labels == FRQ_LABELS) + { t->w[0] = MIN (tab_natural_width (t, d, 0), d->prop_em_width * 15); - for (i = lab; i < lab + 5; i++) + i = 1; + columns ++; + } + + for (;i < columns; i++) t->w[i] = MAX (tab_natural_width (t, d, i), d->prop_em_width * 8); + for (i = 0; i < t->nr; i++) t->h[i] = d->font_height; } @@ -1189,7 +1154,7 @@ dump_full (struct variable *v) {-1, -1, NULL}, }; - int lab = cmd.labels == FRQ_LABELS; + const bool lab = (cmd.labels == FRQ_LABELS); vf = get_var_freqs (v); ft = &vf->tab; @@ -1200,8 +1165,9 @@ dump_full (struct variable *v) if (lab) tab_text (t, 0, 1, TAB_CENTER | TAT_TITLE, _("Value Label")); + for (p = vec; p->s; p++) - tab_text (t, p->c - (p->r ? !lab : 0), p->r, + tab_text (t, lab ? p->c : p->c - 1, p->r, TAB_CENTER | TAT_TITLE, gettext (p->s)); r = 2; @@ -1209,21 +1175,21 @@ dump_full (struct variable *v) { double percent, valid_percent; - cum_freq += f->c; + cum_freq += f->count; - percent = f->c / ft->total_cases * 100.0; - valid_percent = f->c / ft->valid_cases * 100.0; + percent = f->count / ft->total_cases * 100.0; + valid_percent = f->count / ft->valid_cases * 100.0; cum_total += valid_percent; if (lab) { - const char *label = val_labs_find (v->val_labs, f->v[0]); + const char *label = var_lookup_value_label (v, &f->value[0]); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1 + lab, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0 + lab, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1 + lab, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2 + lab, r, TAB_NONE, percent, 5, 1); tab_float (t, 3 + lab, r, TAB_NONE, valid_percent, 5, 1); tab_float (t, 4 + lab, r, TAB_NONE, cum_total, 5, 1); @@ -1231,19 +1197,19 @@ dump_full (struct variable *v) } for (; f < &ft->valid[n_categories]; f++) { - cum_freq += f->c; + cum_freq += f->count; if (lab) { - const char *label = val_labs_find (v->val_labs, f->v[0]); + const char *label = var_lookup_value_label (v, &f->value[0]); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1 + lab, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0 + lab, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1 + lab, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2 + lab, r, TAB_NONE, - f->c / ft->total_cases * 100.0, 5, 1); + f->count / ft->total_cases * 100.0, 5, 1); tab_text (t, 3 + lab, r, TAB_NONE, _("Missing")); r++; } @@ -1312,21 +1278,21 @@ dump_condensed (struct variable *v) { double percent; - percent = f->c / ft->total_cases * 100.0; - cum_total += f->c / ft->valid_cases * 100.0; + percent = f->count / ft->total_cases * 100.0; + cum_total += f->count / ft->valid_cases * 100.0; - tab_value (t, 0, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2, r, TAB_NONE, percent, 3, 0); tab_float (t, 3, r, TAB_NONE, cum_total, 3, 0); r++; } for (; f < &ft->valid[n_categories]; f++) { - tab_value (t, 0, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2, r, TAB_NONE, - f->c / ft->total_cases * 100.0, 3, 0); + f->count / ft->total_cases * 100.0, 3, 0); r++; } @@ -1389,7 +1355,7 @@ calc_stats (struct variable *v, double d[frq_n_stats]) { static double prev_value = SYSMIS; f = &ft->valid[idx]; - rank += f->c ; + rank += f->count ; for (i = 0; i < n_percentiles; i++) { double tp; @@ -1404,7 +1370,7 @@ calc_stats (struct variable *v, double d[frq_n_stats]) if ( percentiles[i].flag ) { - percentiles[i].x2 = f->v[0].f; + percentiles[i].x2 = f->value[0].f; percentiles[i].x1 = prev_value; percentiles[i].flag2 = 1; continue; @@ -1412,9 +1378,9 @@ calc_stats (struct variable *v, double d[frq_n_stats]) if (rank > tp ) { - if ( f->c > 1 && rank - (f->c - 1) > tp ) + if ( f->count > 1 && rank - (f->count - 1) > tp ) { - percentiles[i].x2 = percentiles[i].x1 = f->v[0].f; + percentiles[i].x2 = percentiles[i].x1 = f->value[0].f; percentiles[i].flag2 = 1; } else @@ -1425,14 +1391,14 @@ calc_stats (struct variable *v, double d[frq_n_stats]) continue; } } - prev_value = f->v[0].f; + prev_value = f->value[0].f; } for (i = 0; i < n_percentiles; i++) { /* Catches the case when p == 100% */ if ( ! percentiles[i].flag2 ) - percentiles[i].x1 = percentiles[i].x2 = f->v[0].f; + percentiles[i].x1 = percentiles[i].x2 = f->value[0].f; /* printf("percentile %d (p==%.2f); X1 = %g; X2 = %g\n", @@ -1468,12 +1434,12 @@ calc_stats (struct variable *v, double d[frq_n_stats]) X_mode = SYSMIS; for (f = ft->valid; f < ft->missing; f++) { - if (most_often < f->c) + if (most_often < f->count) { - most_often = f->c; - X_mode = f->v[0].f; + most_often = f->count; + X_mode = f->value[0].f; } - else if (most_often == f->c) + else if (most_often == f->count) { /* A duplicate mode is undefined. FIXME: keep track of *all* the modes. */ @@ -1484,16 +1450,16 @@ calc_stats (struct variable *v, double d[frq_n_stats]) /* Calculate moments. */ m = moments_create (MOMENT_KURTOSIS); for (f = ft->valid; f < ft->missing; f++) - moments_pass_one (m, f->v[0].f, f->c); + moments_pass_one (m, f->value[0].f, f->count); for (f = ft->valid; f < ft->missing; f++) - moments_pass_two (m, f->v[0].f, f->c); + moments_pass_two (m, f->value[0].f, f->count); moments_calculate (m, NULL, &d[frq_mean], &d[frq_variance], &d[frq_skew], &d[frq_kurt]); moments_destroy (m); /* Formulas below are taken from _SPSS Statistical Algorithms_. */ - d[frq_min] = ft->valid[0].v[0].f; - d[frq_max] = ft->valid[ft->n_valid - 1].v[0].f; + d[frq_min] = ft->valid[0].value[0].f; + d[frq_max] = ft->valid[ft->n_valid - 1].value[0].f; d[frq_mode] = X_mode; d[frq_range] = d[frq_max] - d[frq_min]; d[frq_median] = *median_value; @@ -1598,11 +1564,11 @@ freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) /* Find out the extremes of the x value */ for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) ) { - if ( var_is_value_missing(var, frq->v)) + if (var_is_value_missing(var, frq->value, MV_ANY)) continue; - if ( frq->v[0].f < x_min ) x_min = frq->v[0].f ; - if ( frq->v[0].f > x_max ) x_max = frq->v[0].f ; + if ( frq->value[0].f < x_min ) x_min = frq->value[0].f ; + if ( frq->value[0].f > x_max ) x_max = frq->value[0].f ; } hist = histogram_create(bins, x_min, x_max); @@ -1610,7 +1576,7 @@ freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) for( i = 0 ; i < ft->n_valid ; ++i ) { frq = &ft->valid[i]; - gsl_histogram_accumulate(hist, frq->v[0].f, frq->c); + gsl_histogram_accumulate(hist, frq->value[0].f, frq->count); } return hist; @@ -1643,9 +1609,8 @@ freq_tab_to_slice_array(const struct freq_tab *frq_tab, { const struct freq *frq = &frq_tab->valid[i]; - slices[i].label = value_to_string(frq->v, var); - - slices[i].magnetude = frq->c; + slices[i].label = var_get_value_name (var, frq->value); + slices[i].magnetude = frq->count; } return slices;