X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ffrequencies.q;h=535e69070f8e2761d09c7ef245f6475a68d5af57;hb=08e4a9535a7e1de9cc4b6e619e70805ab951b323;hp=9b338a83535de77aa11863706114efd0b6fa8943;hpb=a19b858e0ac3c69e4a28c0ca6d8674427268a863;p=pspp-builds.git diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index 9b338a83..535e6907 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -1,6 +1,5 @@ /* PSPP - computes sample statistics. Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -24,36 +23,44 @@ */ #include -#include + #include #include #include -#include -#include #include -#include #include -#include -#include +#include +#include +#include +#include +#include #include +#include #include -#include -#include +#include #include +#include +#include +#include #include +#include +#include #include -#include -#include +#include #include -#include -#include -#include -#include -#include -#include +#include +#include #include +#include #include +#include +#include +#include + +#include "freq.h" + +#include "minmax.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -61,17 +68,15 @@ /* (headers) */ -#include - /* (specification) FREQUENCIES (frq_): - *variables=custom; - format=cond:condense/onepage(*n:onepage_limit,"%s>=0")/!standard, - table:limit(n:limit,"%s>0")/notable/!table, - labels:!labels/nolabels, - sort:!avalue/dvalue/afreq/dfreq, - spaces:!single/double, - paging:newpage/!oldpage; + *+variables=custom; + +format=cond:condense/onepage(*n:onepage_limit,"%s>=0")/!standard, + table:limit(n:limit,"%s>0")/notable/!table, + labels:!labels/nolabels, + sort:!avalue/dvalue/afreq/dfreq, + spaces:!single/double, + paging:newpage/!oldpage; missing=miss:include/!exclude; barchart(ba_)=:minimum(d:min), :maximum(d:max), @@ -89,12 +94,12 @@ scale:freq(*n:freq,"%s>0")/percent(*n:pcnt,"%s>0"), norm:!nonormal/normal, incr:increment(d:inc,"%s>0"); - grouped=custom; - ntiles=integer; + +grouped=custom; + +ntiles=integer; +percentiles = double list; - statistics[st_]=1|mean,2|semean,3|median,4|mode,5|stddev,6|variance, - 7|kurtosis,8|skewness,9|range,10|minimum,11|maximum,12|sum, - 13|default,14|seskewness,15|sekurtosis,all,none. + +statistics[st_]=1|mean,2|semean,3|median,4|mode,5|stddev,6|variance, + 7|kurtosis,8|skewness,9|range,10|minimum,11|maximum,12|sum, + 13|default,14|seskewness,15|sekurtosis,all,none. */ /* (declarations) */ /* (functions) */ @@ -115,7 +120,7 @@ struct frq_info }; /* Table of statistics, indexed by dsc_*. */ -static struct frq_info st_name[frq_n_stats + 1] = +static const struct frq_info st_name[frq_n_stats + 1] = { {FRQ_ST_MEAN, N_("Mean")}, {FRQ_ST_SEMEAN, N_("S.E. Mean")}, @@ -200,13 +205,6 @@ static struct pool *gen_pool; /* General mode. */ /* Frequency tables. */ -/* Frequency table entry. */ -struct freq - { - union value v; /* The value. */ - double c; /* The number of occurrences of the value. */ - }; - /* Types of frequency tables. */ enum { @@ -253,23 +251,29 @@ struct var_freqs /* Statistics. */ double stat[frq_n_stats]; + + /* Width and format for analysis and display. + This is normally the same as "width" and "print" in struct + variable, but in SPSS-compatible mode only the first + MAX_SHORT_STRING bytes of long string variables are + included. */ + int width; + struct fmt_spec print; }; static inline struct var_freqs * -get_var_freqs (struct variable *v) +get_var_freqs (const struct variable *v) { - assert (v != NULL); - assert (v->aux != NULL); - return v->aux; + return var_get_aux (v); } static void determine_charts (void); static void calc_stats (struct variable *v, double d[frq_n_stats]); -static void precalc (void *); -static bool calc (struct ccase *, void *); -static void postcalc (void *); +static void precalc (const struct ccase *, void *, const struct dataset *); +static bool calc (const struct ccase *, void *, const struct dataset *); +static bool postcalc (void *, const struct dataset *); static void postprocess_freq_tab (struct variable *); static void dump_full (struct variable *); @@ -277,7 +281,6 @@ static void dump_condensed (struct variable *); static void dump_statistics (struct variable *, int show_varname); static void cleanup_freq_tab (struct variable *); -static hsh_hash_func hash_value_numeric, hash_value_alpha; static hsh_compare_func compare_value_numeric_a, compare_value_alpha_a; static hsh_compare_func compare_value_numeric_d, compare_value_alpha_d; static hsh_compare_func compare_freq_numeric_a, compare_freq_alpha_a; @@ -294,15 +297,15 @@ freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var); /* Parser and outline. */ -static int internal_cmd_frequencies (void); +static int internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds); int -cmd_frequencies (void) +cmd_frequencies (struct lexer *lexer, struct dataset *ds) { int result; int_pool = pool_create (); - result = internal_cmd_frequencies (); + result = internal_cmd_frequencies (lexer, ds); pool_destroy (int_pool); int_pool=0; pool_destroy (gen_pool); @@ -313,7 +316,7 @@ cmd_frequencies (void) } static int -internal_cmd_frequencies (void) +internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds) { int i; bool ok; @@ -324,7 +327,7 @@ internal_cmd_frequencies (void) n_variables = 0; v_variables = NULL; - if (!parse_frequencies (&cmd)) + if (!parse_frequencies (lexer, ds, &cmd, NULL)) return CMD_FAILURE; if (cmd.onepage_limit == NOT_LONG) @@ -365,7 +368,7 @@ internal_cmd_frequencies (void) int pl; subc_list_double *ptl_list = &cmd.dl_percentiles[i]; for ( pl = 0 ; pl < subc_list_double_count(ptl_list); ++pl) - add_percentile(subc_list_double_at(ptl_list,pl) / 100.0 ); + add_percentile (subc_list_double_at(ptl_list, pl) / 100.0 ); } } if ( cmd.sbc_ntiles ) @@ -374,13 +377,13 @@ internal_cmd_frequencies (void) { int j; for (j = 0; j <= cmd.n_ntiles[i]; ++j ) - add_percentile(j / (double) cmd.n_ntiles[i]); + add_percentile (j / (double) cmd.n_ntiles[i]); } } /* Do it! */ - ok = procedure_with_splits (precalc, calc, postcalc, NULL); + ok = procedure_with_splits (ds, precalc, calc, postcalc, NULL); free_frequencies(&cmd); @@ -494,35 +497,41 @@ determine_charts (void) /* Add data from case C to the frequency table. */ static bool -calc (struct ccase *c, void *aux UNUSED) +calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) { double weight; size_t i; - int bad_warn = 1; + bool bad_warn = true; - weight = dict_get_case_weight (default_dict, c, &bad_warn); + weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn); for (i = 0; i < n_variables; i++) { - struct variable *v = v_variables[i]; - const union value *val = case_data (c, v->fv); - struct freq_tab *ft = &get_var_freqs (v)->tab; + const struct variable *v = v_variables[i]; + const union value *val = case_data (c, v); + struct var_freqs *vf = get_var_freqs (v); + struct freq_tab *ft = &vf->tab; switch (ft->mode) { case FRQM_GENERAL: { - /* General mode. */ - struct freq **fpp = (struct freq **) hsh_probe (ft->data, val); + struct freq target; + struct freq **fpp; + + target.value = (union value *) val; + fpp = (struct freq **) hsh_probe (ft->data, &target); if (*fpp != NULL) - (*fpp)->c += weight; + (*fpp)->count += weight; else { - struct freq *fp = *fpp = pool_alloc (gen_pool, sizeof *fp); - fp->v = *val; - fp->c = weight; + struct freq *fp = pool_alloc (gen_pool, sizeof *fp); + fp->count = weight; + fp->value = pool_clone (gen_pool, + val, MAX (MAX_SHORT_STRING, vf->width)); + *fpp = fp; } } break; @@ -540,7 +549,7 @@ calc (struct ccase *c, void *aux UNUSED) ft->out_of_range += weight; break; default: - assert (0); + NOT_REACHED (); } } return true; @@ -549,10 +558,12 @@ calc (struct ccase *c, void *aux UNUSED) /* Prepares each variable that is the target of FREQUENCIES by setting up its hash table. */ static void -precalc (void *aux UNUSED) +precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds) { size_t i; + output_split_file_values (ds, first); + pool_destroy (gen_pool); gen_pool = pool_create (); @@ -563,20 +574,7 @@ precalc (void *aux UNUSED) if (ft->mode == FRQM_GENERAL) { - hsh_hash_func *hash; - hsh_compare_func *compare; - - if (v->type == NUMERIC) - { - hash = hash_value_numeric; - compare = compare_value_numeric_a; - } - else - { - hash = hash_value_alpha; - compare = compare_value_alpha_a; - } - ft->data = hsh_create (16, compare, hash, NULL, v); + ft->data = hsh_create (16, compare_freq, hash_freq, NULL, v); } else { @@ -592,8 +590,8 @@ precalc (void *aux UNUSED) /* Finishes up with the variables after frequencies have been calculated. Displays statistics, percentiles, ... */ -static void -postcalc (void *aux UNUSED) +static bool +postcalc (void *aux UNUSED, const struct dataset *ds UNUSED) { size_t i; @@ -626,7 +624,7 @@ postcalc (void *aux UNUSED) dump_full (v); break; default: - assert (0); + NOT_REACHED (); } else dumped_freq_tab = 0; @@ -646,7 +644,7 @@ postcalc (void *aux UNUSED) norm.N = vf->tab.valid_cases; - calc_stats(v,d); + calc_stats (v, d); norm.mean = d[frq_mean]; norm.stddev = d[frq_stddev]; @@ -668,40 +666,41 @@ postcalc (void *aux UNUSED) cleanup_freq_tab (v); } + + return true; } /* Returns the comparison function that should be used for sorting a frequency table by FRQ_SORT using VAR_TYPE variables. */ static hsh_compare_func * -get_freq_comparator (int frq_sort, int var_type) +get_freq_comparator (int frq_sort, enum var_type var_type) { - /* Note that q2c generates tags beginning with 1000. */ - switch (frq_sort | (var_type << 16)) + bool is_numeric = var_type == VAR_NUMERIC; + switch (frq_sort) { - case FRQ_AVALUE | (NUMERIC << 16): return compare_value_numeric_a; - case FRQ_AVALUE | (ALPHA << 16): return compare_value_alpha_a; - case FRQ_DVALUE | (NUMERIC << 16): return compare_value_numeric_d; - case FRQ_DVALUE | (ALPHA << 16): return compare_value_alpha_d; - case FRQ_AFREQ | (NUMERIC << 16): return compare_freq_numeric_a; - case FRQ_AFREQ | (ALPHA << 16): return compare_freq_alpha_a; - case FRQ_DFREQ | (NUMERIC << 16): return compare_freq_numeric_d; - case FRQ_DFREQ | (ALPHA << 16): return compare_freq_alpha_d; - default: assert (0); + case FRQ_AVALUE: + return is_numeric ? compare_value_numeric_a : compare_value_alpha_a; + case FRQ_DVALUE: + return is_numeric ? compare_value_numeric_d : compare_value_alpha_d; + case FRQ_AFREQ: + return is_numeric ? compare_freq_numeric_a : compare_freq_alpha_a; + case FRQ_DFREQ: + return is_numeric ? compare_freq_numeric_d : compare_freq_alpha_d; + default: + NOT_REACHED (); } - - return 0; } -/* Returns nonzero iff the value in struct freq F is non-missing +/* Returns true iff the value in struct freq F is non-missing for variable V. */ -static int -not_missing (const void *f_, void *v_) +static bool +not_missing (const void *f_, const void *v_) { const struct freq *f = f_; - struct variable *v = v_; + const struct variable *v = v_; - return !mv_is_value_missing (&v->miss, &f->v); + return !var_is_value_missing (v, f->value); } /* Summarizes the frequency table data for variable V. */ @@ -717,7 +716,7 @@ postprocess_freq_tab (struct variable *v) ft = &get_var_freqs (v)->tab; assert (ft->mode == FRQM_GENERAL); - compare = get_freq_comparator (cmd.sort, v->type); + compare = get_freq_comparator (cmd.sort, var_get_type (v)); /* Extract data from hash table. */ count = hsh_count (ft->data); @@ -746,7 +745,7 @@ postprocess_freq_tab (struct variable *v) for(i = 0 ; i < ft->n_valid ; ++i ) { f = &ft->valid[i]; - ft->valid_cases += f->c; + ft->valid_cases += f->count; } @@ -754,7 +753,7 @@ postprocess_freq_tab (struct variable *v) for(i = 0 ; i < ft->n_missing ; ++i ) { f = &ft->missing[i]; - ft->total_cases += f->c; + ft->total_cases += f->count; } } @@ -772,7 +771,7 @@ cleanup_freq_tab (struct variable *v) /* Parses the VARIABLES subcommand, adding to {n_variables,v_variables}. */ static int -frq_custom_variables (struct cmd_frequencies *cmd UNUSED) +frq_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_frequencies *cmd UNUSED, void *aux UNUSED) { int mode; int min = 0, max = 0; @@ -780,31 +779,31 @@ frq_custom_variables (struct cmd_frequencies *cmd UNUSED) size_t old_n_variables = n_variables; size_t i; - lex_match ('='); - if (token != T_ALL && (token != T_ID - || dict_lookup_var (default_dict, tokid) == NULL)) + lex_match (lexer, '='); + if (lex_token (lexer) != T_ALL && (lex_token (lexer) != T_ID + || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)) return 2; - if (!parse_variables (default_dict, &v_variables, &n_variables, + if (!parse_variables (lexer, dataset_dict (ds), &v_variables, &n_variables, PV_APPEND | PV_NO_SCRATCH)) return 0; - if (!lex_match ('(')) + if (!lex_match (lexer, '(')) mode = FRQM_GENERAL; else { mode = FRQM_INTEGER; - if (!lex_force_int ()) + if (!lex_force_int (lexer)) return 0; - min = lex_integer (); - lex_get (); - if (!lex_force_match (',')) + min = lex_integer (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, ',')) return 0; - if (!lex_force_int ()) + if (!lex_force_int (lexer)) return 0; - max = lex_integer (); - lex_get (); - if (!lex_force_match (')')) + max = lex_integer (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, ')')) return 0; if (max < min) { @@ -819,16 +818,16 @@ frq_custom_variables (struct cmd_frequencies *cmd UNUSED) struct variable *v = v_variables[i]; struct var_freqs *vf; - if (v->aux != NULL) + if (var_get_aux (v) != NULL) { msg (SE, _("Variable %s specified multiple times on VARIABLES " - "subcommand."), v->name); + "subcommand."), var_get_name (v)); return 0; } - if (mode == FRQM_INTEGER && v->type != NUMERIC) + if (mode == FRQM_INTEGER && !var_is_numeric (v)) { msg (SE, _("Integer mode specified, but %s is not a numeric " - "variable."), v->name); + "variable."), var_get_name (v)); return 0; } @@ -842,10 +841,18 @@ frq_custom_variables (struct cmd_frequencies *cmd UNUSED) vf->tab.vector = pool_nalloc (int_pool, max - min + 1, sizeof *vf->tab.vector); } - else - vf->tab.vector = NULL; + else + vf->tab.vector = NULL; vf->n_groups = 0; vf->groups = NULL; + vf->width = var_get_width (v); + vf->print = *var_get_print_format (v); + if (vf->width > MAX_SHORT_STRING && get_algorithm () == COMPATIBLE) + { + enum fmt_type type = var_get_print_format (v)->type; + vf->width = MAX_SHORT_STRING; + vf->print.w = MAX_SHORT_STRING * (type == FMT_AHEX ? 2 : 1); + } } return 1; } @@ -853,11 +860,11 @@ frq_custom_variables (struct cmd_frequencies *cmd UNUSED) /* Parses the GROUPED subcommand, setting the n_grouped, grouped fields of specified variables. */ static int -frq_custom_grouped (struct cmd_frequencies *cmd UNUSED) +frq_custom_grouped (struct lexer *lexer, struct dataset *ds, struct cmd_frequencies *cmd UNUSED, void *aux UNUSED) { - lex_match ('='); - if ((token == T_ID && dict_lookup_var (default_dict, tokid) != NULL) - || token == T_ID) + lex_match (lexer, '='); + if ((lex_token (lexer) == T_ID && dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) != NULL) + || lex_token (lexer) == T_ID) for (;;) { size_t i; @@ -870,27 +877,27 @@ frq_custom_grouped (struct cmd_frequencies *cmd UNUSED) size_t n; struct variable **v; - if (!parse_variables (default_dict, &v, &n, + if (!parse_variables (lexer, dataset_dict (ds), &v, &n, PV_NO_DUPLICATE | PV_NUMERIC)) return 0; - if (lex_match ('(')) + if (lex_match (lexer, '(')) { nl = ml = 0; dl = NULL; - while (lex_integer ()) + while (lex_integer (lexer)) { if (nl >= ml) { ml += 16; dl = pool_nrealloc (int_pool, dl, ml, sizeof *dl); } - dl[nl++] = tokval; - lex_get (); - lex_match (','); + dl[nl++] = lex_tokval (lexer); + lex_get (lexer); + lex_match (lexer, ','); } /* Note that nl might still be 0 and dl might still be NULL. That's okay. */ - if (!lex_match (')')) + if (!lex_match (lexer, ')')) { free (v); msg (SE, _("`)' expected after GROUPED interval list.")); @@ -904,16 +911,16 @@ frq_custom_grouped (struct cmd_frequencies *cmd UNUSED) } for (i = 0; i < n; i++) - if (v[i]->aux == NULL) + if (var_get_aux (v[i]) == NULL) msg (SE, _("Variables %s specified on GROUPED but not on " - "VARIABLES."), v[i]->name); + "VARIABLES."), var_get_name (v[i])); else { struct var_freqs *vf = get_var_freqs (v[i]); if (vf->groups != NULL) msg (SE, _("Variables %s specified multiple times on GROUPED " - "subcommand."), v[i]->name); + "subcommand."), var_get_name (v[i])); else { vf->n_groups = nl; @@ -921,12 +928,12 @@ frq_custom_grouped (struct cmd_frequencies *cmd UNUSED) } } free (v); - if (!lex_match ('/')) + if (!lex_match (lexer, '/')) break; - if ((token != T_ID || dict_lookup_var (default_dict, tokid) != NULL) - && token != T_ALL) + if ((lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) != NULL) + && lex_token (lexer) != T_ALL) { - lex_put_back ('/'); + lex_put_back (lexer, '/'); break; } } @@ -951,7 +958,7 @@ add_percentile (double x) break; } - if (i >= n_percentiles || tokval != percentiles[i].p) + if (i >= n_percentiles || x != percentiles[i].p) { percentiles = pool_nrealloc (int_pool, percentiles, n_percentiles + 1, sizeof *percentiles); @@ -967,34 +974,16 @@ add_percentile (double x) /* Comparison functions. */ -/* Hash of numeric values. */ -static unsigned -hash_value_numeric (const void *value_, void *foo UNUSED) -{ - const struct freq *value = value_; - return hsh_hash_double (value->v.f); -} - -/* Hash of string values. */ -static unsigned -hash_value_alpha (const void *value_, void *v_) -{ - const struct freq *value = value_; - struct variable *v = v_; - - return hsh_hash_bytes (value->v.s, v->width); -} - /* Ascending numeric compare of values. */ static int -compare_value_numeric_a (const void *a_, const void *b_, void *foo UNUSED) +compare_value_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) { const struct freq *a = a_; const struct freq *b = b_; - if (a->v.f > b->v.f) + if (a->value[0].f > b->value[0].f) return 1; - else if (a->v.f < b->v.f) + else if (a->value[0].f < b->value[0].f) return -1; else return 0; @@ -1002,25 +991,26 @@ compare_value_numeric_a (const void *a_, const void *b_, void *foo UNUSED) /* Ascending string compare of values. */ static int -compare_value_alpha_a (const void *a_, const void *b_, void *v_) +compare_value_alpha_a (const void *a_, const void *b_, const void *v_) { const struct freq *a = a_; const struct freq *b = b_; const struct variable *v = v_; + struct var_freqs *vf = get_var_freqs (v); - return memcmp (a->v.s, b->v.s, v->width); + return memcmp (a->value[0].s, b->value[0].s, vf->width); } /* Descending numeric compare of values. */ static int -compare_value_numeric_d (const void *a, const void *b, void *foo UNUSED) +compare_value_numeric_d (const void *a, const void *b, const void *aux UNUSED) { - return -compare_value_numeric_a (a, b, foo); + return -compare_value_numeric_a (a, b, aux); } /* Descending string compare of values. */ static int -compare_value_alpha_d (const void *a, const void *b, void *v) +compare_value_alpha_d (const void *a, const void *b, const void *v) { return -compare_value_alpha_a (a, b, v); } @@ -1028,19 +1018,19 @@ compare_value_alpha_d (const void *a, const void *b, void *v) /* Ascending numeric compare of frequency; secondary key on ascending numeric value. */ static int -compare_freq_numeric_a (const void *a_, const void *b_, void *foo UNUSED) +compare_freq_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) { const struct freq *a = a_; const struct freq *b = b_; - if (a->c > b->c) + if (a->count > b->count) return 1; - else if (a->c < b->c) + else if (a->count < b->count) return -1; - if (a->v.f > b->v.f) + if (a->value[0].f > b->value[0].f) return 1; - else if (a->v.f < b->v.f) + else if (a->value[0].f < b->value[0].f) return -1; else return 0; @@ -1049,36 +1039,37 @@ compare_freq_numeric_a (const void *a_, const void *b_, void *foo UNUSED) /* Ascending numeric compare of frequency; secondary key on ascending string value. */ static int -compare_freq_alpha_a (const void *a_, const void *b_, void *v_) +compare_freq_alpha_a (const void *a_, const void *b_, const void *v_) { const struct freq *a = a_; const struct freq *b = b_; const struct variable *v = v_; + struct var_freqs *vf = get_var_freqs (v); - if (a->c > b->c) + if (a->count > b->count) return 1; - else if (a->c < b->c) + else if (a->count < b->count) return -1; else - return memcmp (a->v.s, b->v.s, v->width); + return memcmp (a->value[0].s, b->value[0].s, vf->width); } /* Descending numeric compare of frequency; secondary key on ascending numeric value. */ static int -compare_freq_numeric_d (const void *a_, const void *b_, void *foo UNUSED) +compare_freq_numeric_d (const void *a_, const void *b_, const void *aux UNUSED) { const struct freq *a = a_; const struct freq *b = b_; - if (a->c > b->c) + if (a->count > b->count) return -1; - else if (a->c < b->c) + else if (a->count < b->count) return 1; - if (a->v.f > b->v.f) + if (a->value[0].f > b->value[0].f) return 1; - else if (a->v.f < b->v.f) + else if (a->value[0].f < b->value[0].f) return -1; else return 0; @@ -1087,18 +1078,19 @@ compare_freq_numeric_d (const void *a_, const void *b_, void *foo UNUSED) /* Descending numeric compare of frequency; secondary key on ascending string value. */ static int -compare_freq_alpha_d (const void *a_, const void *b_, void *v_) +compare_freq_alpha_d (const void *a_, const void *b_, const void *v_) { const struct freq *a = a_; const struct freq *b = b_; const struct variable *v = v_; + struct var_freqs *vf = get_var_freqs (v); - if (a->c > b->c) + if (a->count > b->count) return -1; - else if (a->c < b->c) + else if (a->count < b->count) return 1; else - return memcmp (a->v.s, b->v.s, v->width); + return memcmp (a->value[0].s, b->value[0].s, vf->width); } /* Frequency table display. */ @@ -1112,9 +1104,9 @@ full_dim (struct tab_table *t, struct outp_driver *d) int i; if (lab) - t->w[0] = min (tab_natural_width (t, d, 0), d->prop_em_width * 15); + t->w[0] = MIN (tab_natural_width (t, d, 0), d->prop_em_width * 15); for (i = lab; i < lab + 5; i++) - t->w[i] = max (tab_natural_width (t, d, i), d->prop_em_width * 8); + t->w[i] = MAX (tab_natural_width (t, d, i), d->prop_em_width * 8); for (i = 0; i < t->nr; i++) t->h[i] = d->font_height; } @@ -1124,6 +1116,7 @@ static void dump_full (struct variable *v) { int n_categories; + struct var_freqs *vf; struct freq_tab *ft; struct freq *f; struct tab_table *t; @@ -1137,9 +1130,9 @@ dump_full (struct variable *v) const char *s; }; - struct init *p; + const struct init *p; - static struct init vec[] = + static const struct init vec[] = { {4, 0, N_("Valid")}, {5, 0, N_("Cum")}, @@ -1157,7 +1150,8 @@ dump_full (struct variable *v) int lab = cmd.labels == FRQ_LABELS; - ft = &get_var_freqs (v)->tab; + vf = get_var_freqs (v); + ft = &vf->tab; n_categories = ft->n_valid + ft->n_missing; t = tab_create (5 + lab, n_categories + 3, 0); tab_headers (t, 0, 0, 2, 0); @@ -1174,21 +1168,26 @@ dump_full (struct variable *v) { double percent, valid_percent; - cum_freq += f->c; + cum_freq += f->count; - percent = f->c / ft->total_cases * 100.0; - valid_percent = f->c / ft->valid_cases * 100.0; + percent = f->count / ft->total_cases * 100.0; + valid_percent = f->count / ft->valid_cases * 100.0; cum_total += valid_percent; if (lab) { - const char *label = val_labs_find (v->val_labs, f->v); +#if 0 +<<<<<<< frequencies.q + const char *label = val_labs_find (v->val_labs, f->value[0]); +======= +#endif + const char *label = var_lookup_value_label (v, &f->value[0]); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, &f->v, &v->print); - tab_float (t, 1 + lab, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0 + lab, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1 + lab, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2 + lab, r, TAB_NONE, percent, 5, 1); tab_float (t, 3 + lab, r, TAB_NONE, valid_percent, 5, 1); tab_float (t, 4 + lab, r, TAB_NONE, cum_total, 5, 1); @@ -1196,25 +1195,25 @@ dump_full (struct variable *v) } for (; f < &ft->valid[n_categories]; f++) { - cum_freq += f->c; + cum_freq += f->count; if (lab) { - const char *label = val_labs_find (v->val_labs, f->v); + const char *label = var_lookup_value_label (v, &f->value[0]); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, &f->v, &v->print); - tab_float (t, 1 + lab, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0 + lab, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1 + lab, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2 + lab, r, TAB_NONE, - f->c / ft->total_cases * 100.0, 5, 1); + f->count / ft->total_cases * 100.0, 5, 1); tab_text (t, 3 + lab, r, TAB_NONE, _("Missing")); r++; } tab_box (t, TAL_1, TAL_1, - cmd.spaces == FRQ_SINGLE ? -1 : (TAL_1 | TAL_SPACING), TAL_1, + cmd.spaces == FRQ_SINGLE ? -1 : TAL_GAP, TAL_1, 0, 0, 4 + lab, r); tab_hline (t, TAL_2, 0, 4 + lab, 2); tab_hline (t, TAL_2, 0, 4 + lab, r); @@ -1224,9 +1223,8 @@ dump_full (struct variable *v) tab_float (t, 2 + lab, r, TAB_NONE, 100.0, 5, 1); tab_float (t, 3 + lab, r, TAB_NONE, 100.0, 5, 1); - tab_title (t, 1, "%s: %s", v->name, v->label ? v->label : ""); + tab_title (t, "%s", var_to_string (v)); tab_submit (t); - } /* Sets the widths of all the columns and heights of all the rows in @@ -1234,14 +1232,14 @@ dump_full (struct variable *v) static void condensed_dim (struct tab_table *t, struct outp_driver *d) { - int cum_w = max (outp_string_width (d, _("Cum")), - max (outp_string_width (d, _("Cum")), - outp_string_width (d, "000"))); + int cum_w = MAX (outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL), + MAX (outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL), + outp_string_width (d, "000", OUTP_PROPORTIONAL))); int i; for (i = 0; i < 2; i++) - t->w[i] = max (tab_natural_width (t, d, i), d->prop_em_width * 8); + t->w[i] = MAX (tab_natural_width (t, d, i), d->prop_em_width * 8); for (i = 2; i < 4; i++) t->w[i] = cum_w; for (i = 0; i < t->nr; i++) @@ -1253,13 +1251,15 @@ static void dump_condensed (struct variable *v) { int n_categories; + struct var_freqs *vf; struct freq_tab *ft; struct freq *f; struct tab_table *t; int r; double cum_total = 0.0; - ft = &get_var_freqs (v)->tab; + vf = get_var_freqs (v); + ft = &vf->tab; n_categories = ft->n_valid + ft->n_missing; t = tab_create (4, n_categories + 2, 0); @@ -1276,29 +1276,29 @@ dump_condensed (struct variable *v) { double percent; - percent = f->c / ft->total_cases * 100.0; - cum_total += f->c / ft->valid_cases * 100.0; + percent = f->count / ft->total_cases * 100.0; + cum_total += f->count / ft->valid_cases * 100.0; - tab_value (t, 0, r, TAB_NONE, &f->v, &v->print); - tab_float (t, 1, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2, r, TAB_NONE, percent, 3, 0); tab_float (t, 3, r, TAB_NONE, cum_total, 3, 0); r++; } for (; f < &ft->valid[n_categories]; f++) { - tab_value (t, 0, r, TAB_NONE, &f->v, &v->print); - tab_float (t, 1, r, TAB_NONE, f->c, 8, 0); + tab_value (t, 0, r, TAB_NONE, f->value, &vf->print); + tab_float (t, 1, r, TAB_NONE, f->count, 8, 0); tab_float (t, 2, r, TAB_NONE, - f->c / ft->total_cases * 100.0, 3, 0); + f->count / ft->total_cases * 100.0, 3, 0); r++; } tab_box (t, TAL_1, TAL_1, - cmd.spaces == FRQ_SINGLE ? -1 : (TAL_1 | TAL_SPACING), TAL_1, + cmd.spaces == FRQ_SINGLE ? -1 : TAL_GAP, TAL_1, 0, 0, 3, r - 1); tab_hline (t, TAL_2, 0, 3, 2); - tab_title (t, 1, "%s: %s", v->name, v->label ? v->label : ""); + tab_title (t, "%s", var_to_string (v)); tab_columns (t, SOM_COL_DOWN, 1); tab_submit (t); } @@ -1353,7 +1353,7 @@ calc_stats (struct variable *v, double d[frq_n_stats]) { static double prev_value = SYSMIS; f = &ft->valid[idx]; - rank += f->c ; + rank += f->count ; for (i = 0; i < n_percentiles; i++) { double tp; @@ -1368,7 +1368,7 @@ calc_stats (struct variable *v, double d[frq_n_stats]) if ( percentiles[i].flag ) { - percentiles[i].x2 = f->v.f; + percentiles[i].x2 = f->value[0].f; percentiles[i].x1 = prev_value; percentiles[i].flag2 = 1; continue; @@ -1376,9 +1376,9 @@ calc_stats (struct variable *v, double d[frq_n_stats]) if (rank > tp ) { - if ( f->c > 1 && rank - (f->c - 1) > tp ) + if ( f->count > 1 && rank - (f->count - 1) > tp ) { - percentiles[i].x2 = percentiles[i].x1 = f->v.f; + percentiles[i].x2 = percentiles[i].x1 = f->value[0].f; percentiles[i].flag2 = 1; } else @@ -1389,14 +1389,14 @@ calc_stats (struct variable *v, double d[frq_n_stats]) continue; } } - prev_value = f->v.f; + prev_value = f->value[0].f; } for (i = 0; i < n_percentiles; i++) { /* Catches the case when p == 100% */ if ( ! percentiles[i].flag2 ) - percentiles[i].x1 = percentiles[i].x2 = f->v.f; + percentiles[i].x1 = percentiles[i].x2 = f->value[0].f; /* printf("percentile %d (p==%.2f); X1 = %g; X2 = %g\n", @@ -1432,12 +1432,12 @@ calc_stats (struct variable *v, double d[frq_n_stats]) X_mode = SYSMIS; for (f = ft->valid; f < ft->missing; f++) { - if (most_often < f->c) + if (most_often < f->count) { - most_often = f->c; - X_mode = f->v.f; + most_often = f->count; + X_mode = f->value[0].f; } - else if (most_often == f->c) + else if (most_often == f->count) { /* A duplicate mode is undefined. FIXME: keep track of *all* the modes. */ @@ -1448,16 +1448,16 @@ calc_stats (struct variable *v, double d[frq_n_stats]) /* Calculate moments. */ m = moments_create (MOMENT_KURTOSIS); for (f = ft->valid; f < ft->missing; f++) - moments_pass_one (m, f->v.f, f->c); + moments_pass_one (m, f->value[0].f, f->count); for (f = ft->valid; f < ft->missing; f++) - moments_pass_two (m, f->v.f, f->c); + moments_pass_two (m, f->value[0].f, f->count); moments_calculate (m, NULL, &d[frq_mean], &d[frq_variance], &d[frq_skew], &d[frq_kurt]); moments_destroy (m); /* Formulas below are taken from _SPSS Statistical Algorithms_. */ - d[frq_min] = ft->valid[0].v.f; - d[frq_max] = ft->valid[ft->n_valid - 1].v.f; + d[frq_min] = ft->valid[0].value[0].f; + d[frq_max] = ft->valid[ft->n_valid - 1].value[0].f; d[frq_mode] = X_mode; d[frq_range] = d[frq_max] - d[frq_min]; d[frq_median] = *median_value; @@ -1482,13 +1482,13 @@ dump_statistics (struct variable *v, int show_varname) if ( implicit_50th && n_percentiles > 0 ) --n_percentiles; - if (v->type == ALPHA) + if (var_is_alpha (v)) return; ft = &get_var_freqs (v)->tab; if (ft->n_valid == 0) { msg (SW, _("No valid data for variable %s; statistics not displayed."), - v->name); + var_get_name (v)); return; } calc_stats (v, stat_value); @@ -1500,7 +1500,7 @@ dump_statistics (struct variable *v, int show_varname) tab_vline (t, TAL_1 , 2, 0, tab_nr(t) - 1); - tab_vline (t, TAL_1 | TAL_SPACING , 1, 0, tab_nr(t) - 1 ) ; + tab_vline (t, TAL_GAP , 1, 0, tab_nr(t) - 1 ) ; r=2; /* N missing and N valid are always dumped */ @@ -1535,12 +1535,7 @@ dump_statistics (struct variable *v, int show_varname) tab_columns (t, SOM_COL_DOWN, 1); if (show_varname) - { - if (v->label) - tab_title (t, 1, "%s: %s", v->name, v->label); - else - tab_title (t, 0, v->name); - } + tab_title (t, "%s", var_to_string (v)); else tab_flags (t, SOMF_NO_TITLE); @@ -1567,11 +1562,11 @@ freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) /* Find out the extremes of the x value */ for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) ) { - if ( mv_is_value_missing(&var->miss, &frq->v)) + if ( var_is_value_missing(var, frq->value)) continue; - if ( frq->v.f < x_min ) x_min = frq->v.f ; - if ( frq->v.f > x_max ) x_max = frq->v.f ; + if ( frq->value[0].f < x_min ) x_min = frq->value[0].f ; + if ( frq->value[0].f > x_max ) x_max = frq->value[0].f ; } hist = histogram_create(bins, x_min, x_max); @@ -1579,7 +1574,7 @@ freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) for( i = 0 ; i < ft->n_valid ; ++i ) { frq = &ft->valid[i]; - gsl_histogram_accumulate(hist, frq->v.f, frq->c); + gsl_histogram_accumulate(hist, frq->value[0].f, frq->count); } return hist; @@ -1612,9 +1607,8 @@ freq_tab_to_slice_array(const struct freq_tab *frq_tab, { const struct freq *frq = &frq_tab->valid[i]; - slices[i].label = value_to_string(&frq->v, var); - - slices[i].magnetude = frq->c; + slices[i].label = var_get_value_name (var, frq->value); + slices[i].magnetude = frq->count; } return slices;