X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ffrequencies.q;h=83b864c9f695c4494c5b5f2204b9d4ecd0054c46;hb=44c2b035a7526fdf366271de915fc774df302f96;hp=5c3de42cac15096a53f9e1eb7b7e1b5cd8bd114b;hpb=82a9572a7a0ec2f7fc572cc9807bc5205a5e8a8d;p=pspp-builds.git diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index 5c3de42c..83b864c9 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -1,21 +1,18 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2007, 2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ /* TODO: @@ -30,7 +27,10 @@ #include #include +#include +#include #include +#include #include #include #include @@ -38,13 +38,10 @@ #include #include #include -#include #include #include #include #include -#include -#include #include #include #include @@ -58,7 +55,10 @@ #include #include +#include "freq.h" + #include "minmax.h" +#include "xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -70,7 +70,7 @@ FREQUENCIES (frq_): *+variables=custom; +format=cond:condense/onepage(*n:onepage_limit,"%s>=0")/!standard, - table:limit(n:limit,"%s>0")/notable/!table, + table:limit(n:limit,"%s>0")/notable/!table, labels:!labels/nolabels, sort:!avalue/dvalue/afreq/dfreq, spaces:!single/double, @@ -118,7 +118,7 @@ struct frq_info }; /* Table of statistics, indexed by dsc_*. */ -static struct frq_info st_name[frq_n_stats + 1] = +static const struct frq_info st_name[frq_n_stats + 1] = { {FRQ_ST_MEAN, N_("Mean")}, {FRQ_ST_SEMEAN, N_("S.E. Mean")}, @@ -145,7 +145,7 @@ struct percentile double value; /* the %ile's value */ double x1; /* The datum value <= the percentile */ double x2; /* The datum value >= the percentile */ - int flag; + int flag; int flag2; /* Set to 1 if this percentile value has been found */ }; @@ -155,8 +155,6 @@ static void add_percentile (double x) ; static struct percentile *percentiles; static int n_percentiles; -static int implicit_50th ; - /* Groups of statistics. */ #define BI BIT_INDEX #define frq_default \ @@ -195,47 +193,24 @@ static int normal; /* FIXME */ /* Variables for which to calculate statistics. */ static size_t n_variables; -static struct variable **v_variables; +static const struct variable **v_variables; -/* Arenas used to store semi-permanent storage. */ -static struct pool *int_pool; /* Integer mode. */ -static struct pool *gen_pool; /* General mode. */ +/* Pools. */ +static struct pool *data_pool; /* For per-SPLIT FILE group data. */ +static struct pool *syntax_pool; /* For syntax-related data. */ /* Frequency tables. */ -/* Frequency table entry. */ -struct freq - { - union value *v; /* The value. */ - double c; /* The number of occurrences of the value. */ - }; - -/* Types of frequency tables. */ -enum - { - FRQM_GENERAL, - FRQM_INTEGER - }; - /* Entire frequency table. */ struct freq_tab { - int mode; /* FRQM_GENERAL or FRQM_INTEGER. */ - - /* General mode. */ struct hsh_table *data; /* Undifferentiated data. */ - - /* Integer mode. */ - double *vector; /* Frequencies proper. */ - int min, max; /* The boundaries of the table. */ - double out_of_range; /* Sum of weights of out-of-range values. */ - double sysmis; /* Sum of weights of SYSMIS values. */ - - /* All modes. */ - struct freq *valid; /* Valid freqs. */ + struct freq_mutable *valid; /* Valid freqs. */ int n_valid; /* Number of total freqs. */ + const struct dictionary *dict; /* The dict from whence entries in the table + come */ - struct freq *missing; /* Missing freqs. */ + struct freq_mutable *missing; /* Missing freqs. */ int n_missing; /* Number of missing freqs. */ /* Statistics. */ @@ -257,38 +232,31 @@ struct var_freqs /* Statistics. */ double stat[frq_n_stats]; - /* Width and format for analysis and display. - This is normally the same as "width" and "print" in struct - variable, but in SPSS-compatible mode only the first - MAX_SHORT_STRING bytes of long string variables are - included. */ + /* Variable attributes. */ int width; struct fmt_spec print; }; static inline struct var_freqs * -get_var_freqs (struct variable *v) +get_var_freqs (const struct variable *v) { - assert (v != NULL); - assert (v->aux != NULL); - return v->aux; + return var_get_aux (v); } static void determine_charts (void); -static void calc_stats (struct variable *v, double d[frq_n_stats]); +static void calc_stats (const struct variable *v, double d[frq_n_stats]); -static void precalc (const struct ccase *, void *); -static bool calc (const struct ccase *, void *); -static void postcalc (void *); +static void precalc (struct casereader *, struct dataset *); +static void calc (const struct ccase *, const struct dataset *); +static void postcalc (const struct dataset *); -static void postprocess_freq_tab (struct variable *); -static void dump_full (struct variable *); -static void dump_condensed (struct variable *); -static void dump_statistics (struct variable *, int show_varname); -static void cleanup_freq_tab (struct variable *); +static void postprocess_freq_tab (const struct variable *); +static void dump_full ( const struct variable *, const struct variable *); +static void dump_condensed (const struct variable *, const struct variable *); +static void dump_statistics (const struct variable *, bool show_varname, const struct variable *); +static void cleanup_freq_tab (const struct variable *); -static hsh_hash_func hash_value_numeric, hash_value_alpha; static hsh_compare_func compare_value_numeric_a, compare_value_alpha_a; static hsh_compare_func compare_value_numeric_d, compare_value_alpha_d; static hsh_compare_func compare_freq_numeric_a, compare_freq_alpha_a; @@ -298,36 +266,38 @@ static hsh_compare_func compare_freq_numeric_d, compare_freq_alpha_d; static void do_piechart(const struct variable *var, const struct freq_tab *frq_tab); -gsl_histogram * +struct histogram * freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var); /* Parser and outline. */ -static int internal_cmd_frequencies (void); +static int internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds); int -cmd_frequencies (void) +cmd_frequencies (struct lexer *lexer, struct dataset *ds) { int result; - int_pool = pool_create (); - result = internal_cmd_frequencies (); - pool_destroy (int_pool); - int_pool=0; - pool_destroy (gen_pool); - gen_pool=0; + syntax_pool = pool_create (); + result = internal_cmd_frequencies (lexer, ds); + pool_destroy (syntax_pool); + syntax_pool=0; + pool_destroy (data_pool); + data_pool=0; free (v_variables); v_variables=0; return result; } static int -internal_cmd_frequencies (void) +internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds) { - int i; + struct casegrouper *grouper; + struct casereader *input, *group; bool ok; + int i; n_percentiles = 0; percentiles = NULL; @@ -335,10 +305,10 @@ internal_cmd_frequencies (void) n_variables = 0; v_variables = NULL; - if (!parse_frequencies (&cmd, NULL)) + if (!parse_frequencies (lexer, ds, &cmd, NULL)) return CMD_FAILURE; - if (cmd.onepage_limit == NOT_LONG) + if (cmd.onepage_limit == LONG_MIN) cmd.onepage_limit = 50; /* Figure out statistics to calculate. */ @@ -348,14 +318,14 @@ internal_cmd_frequencies (void) if (cmd.a_statistics[FRQ_ST_ALL]) stats |= frq_all; if (cmd.sort != FRQ_AVALUE && cmd.sort != FRQ_DVALUE) - stats &= ~frq_median; + stats &= ~BIT_INDEX (frq_median); for (i = 0; i < frq_n_stats; i++) if (cmd.a_statistics[st_name[i].st_indx]) stats |= BIT_INDEX (i); if (stats & frq_kurt) - stats |= frq_sekurt; + stats |= BIT_INDEX (frq_sekurt); if (stats & frq_skew) - stats |= frq_seskew; + stats |= BIT_INDEX (frq_seskew); /* Calculate n_stats. */ n_stats = 0; @@ -369,29 +339,50 @@ internal_cmd_frequencies (void) cmd.sort = FRQ_AVALUE; /* Work out what percentiles need to be calculated */ - if ( cmd.sbc_percentiles ) + if ( cmd.sbc_percentiles ) { - for ( i = 0 ; i < MAXLISTS ; ++i ) + for ( i = 0 ; i < MAXLISTS ; ++i ) { int pl; subc_list_double *ptl_list = &cmd.dl_percentiles[i]; for ( pl = 0 ; pl < subc_list_double_count(ptl_list); ++pl) - add_percentile(subc_list_double_at(ptl_list,pl) / 100.0 ); + add_percentile (subc_list_double_at(ptl_list, pl) / 100.0 ); } } - if ( cmd.sbc_ntiles ) + if ( cmd.sbc_ntiles ) { - for ( i = 0 ; i < cmd.sbc_ntiles ; ++i ) + for ( i = 0 ; i < cmd.sbc_ntiles ; ++i ) { int j; - for (j = 0; j <= cmd.n_ntiles[i]; ++j ) - add_percentile(j / (double) cmd.n_ntiles[i]); + for (j = 0; j <= cmd.n_ntiles[i]; ++j ) + add_percentile (j / (double) cmd.n_ntiles[i]); } } - + if (stats & BIT_INDEX (frq_median)) + { + /* Treat the median as the 50% percentile. + We output it in the percentiles table as "50 (Median)." */ + add_percentile (0.5); + stats &= ~BIT_INDEX (frq_median); + n_stats--; + } /* Do it! */ - ok = procedure_with_splits (precalc, calc, postcalc, NULL); + input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds), + NULL, NULL); + grouper = casegrouper_create_splits (input, dataset_dict (ds)); + for (; casegrouper_get_next_group (grouper, &group); + casereader_destroy (group)) + { + struct ccase *c; + + precalc (group, ds); + for (; (c = casereader_read (group)) != NULL; case_unref (c)) + calc (c, ds); + postcalc (ds); + } + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; free_frequencies(&cmd); @@ -402,7 +393,7 @@ internal_cmd_frequencies (void) static void determine_charts (void) { - int count = (!!cmd.sbc_histogram) + (!!cmd.sbc_barchart) + + int count = (!!cmd.sbc_histogram) + (!!cmd.sbc_barchart) + (!!cmd.sbc_hbar) + (!!cmd.sbc_piechart); if (!count) @@ -504,121 +495,77 @@ determine_charts (void) } /* Add data from case C to the frequency table. */ -static bool -calc (const struct ccase *c, void *aux UNUSED) +static void +calc (const struct ccase *c, const struct dataset *ds) { - double weight; + double weight = dict_get_case_weight (dataset_dict (ds), c, NULL); size_t i; - int bad_warn = 1; - - weight = dict_get_case_weight (default_dict, c, &bad_warn); for (i = 0; i < n_variables; i++) { - struct variable *v = v_variables[i]; - const union value *val = case_data (c, v->fv); + const struct variable *v = v_variables[i]; + const union value *val = case_data (c, v); struct var_freqs *vf = get_var_freqs (v); struct freq_tab *ft = &vf->tab; - switch (ft->mode) - { - case FRQM_GENERAL: - { - /* General mode. */ - struct freq target; - struct freq **fpp; - - target.v = (union value *) val; - fpp = (struct freq **) hsh_probe (ft->data, &target); - - if (*fpp != NULL) - (*fpp)->c += weight; - else - { - struct freq *fp = pool_alloc (gen_pool, sizeof *fp); - fp->c = weight; - fp->v = pool_clone (gen_pool, - val, MAX (MAX_SHORT_STRING, vf->width)); - *fpp = fp; - } - } - break; - case FRQM_INTEGER: - /* Integer mode. */ - if (val->f == SYSMIS) - ft->sysmis += weight; - else if (val->f > INT_MIN+1 && val->f < INT_MAX-1) - { - int i = val->f; - if (i >= ft->min && i <= ft->max) - ft->vector[i - ft->min] += weight; - } - else - ft->out_of_range += weight; - break; - default: - assert (0); - } + struct freq_mutable target; + struct freq_mutable **fpp; + + target.value = *val; + fpp = (struct freq_mutable **) hsh_probe (ft->data, &target); + + if (*fpp != NULL) + (*fpp)->count += weight; + else + { + struct freq_mutable *fp = pool_alloc (data_pool, sizeof *fp); + fp->count = weight; + value_init_pool (data_pool, &fp->value, vf->width); + value_copy (&fp->value, val, vf->width); + *fpp = fp; + } } - return true; } /* Prepares each variable that is the target of FREQUENCIES by setting up its hash table. */ static void -precalc (const struct ccase *first, void *aux UNUSED) +precalc (struct casereader *input, struct dataset *ds) { + struct ccase *c; size_t i; - output_split_file_values (first); + c = casereader_peek (input, 0); + if (c != NULL) + { + output_split_file_values (ds, c); + case_unref (c); + } + + pool_destroy (data_pool); + data_pool = pool_create (); - pool_destroy (gen_pool); - gen_pool = pool_create (); - for (i = 0; i < n_variables; i++) { - struct variable *v = v_variables[i]; + const struct variable *v = v_variables[i]; struct freq_tab *ft = &get_var_freqs (v)->tab; - if (ft->mode == FRQM_GENERAL) - { - hsh_hash_func *hash; - hsh_compare_func *compare; - - if (v->type == NUMERIC) - { - hash = hash_value_numeric; - compare = compare_value_numeric_a; - } - else - { - hash = hash_value_alpha; - compare = compare_value_alpha_a; - } - ft->data = hsh_create (16, compare, hash, NULL, v); - } - else - { - int j; - - for (j = (ft->max - ft->min); j >= 0; j--) - ft->vector[j] = 0.0; - ft->out_of_range = 0.0; - ft->sysmis = 0.0; - } + ft->data = hsh_create (16, compare_freq, hash_freq, NULL, v); } } /* Finishes up with the variables after frequencies have been calculated. Displays statistics, percentiles, ... */ static void -postcalc (void *aux UNUSED) +postcalc (const struct dataset *ds) { + const struct dictionary *dict = dataset_dict (ds); + const struct variable *wv = dict_get_weight (dict); size_t i; for (i = 0; i < n_variables; i++) { - struct variable *v = v_variables[i]; + const struct variable *v = v_variables[i]; struct var_freqs *vf = get_var_freqs (v); struct freq_tab *ft = &vf->tab; int n_categories; @@ -633,110 +580,103 @@ postcalc (void *aux UNUSED) switch (cmd.cond) { case FRQ_CONDENSE: - dump_condensed (v); + dump_condensed (v, wv); break; case FRQ_STANDARD: - dump_full (v); + dump_full (v, wv); break; case FRQ_ONEPAGE: if (n_categories > cmd.onepage_limit) - dump_condensed (v); + dump_condensed (v, wv); else - dump_full (v); + dump_full (v, wv); break; default: - assert (0); + NOT_REACHED (); } else dumped_freq_tab = 0; /* Statistics. */ if (n_stats) - dump_statistics (v, !dumped_freq_tab); + dump_statistics (v, !dumped_freq_tab, wv); - if ( chart == GFT_HIST) + if ( chart == GFT_HIST && var_is_numeric (v) ) { double d[frq_n_stats]; - struct normal_curve norm; - gsl_histogram *hist ; + struct histogram *hist ; + calc_stats (v, d); - norm.N = vf->tab.valid_cases; + hist = freq_tab_to_hist (ft,v); - calc_stats(v,d); - norm.mean = d[frq_mean]; - norm.stddev = d[frq_stddev]; + histogram_plot_n (hist, var_to_string(v), + vf->tab.valid_cases, + d[frq_mean], + d[frq_stddev], + normal); - hist = freq_tab_to_hist(ft,v); - - histogram_plot(hist, var_to_string(v), &norm, normal); - - gsl_histogram_free(hist); + statistic_destroy ((struct statistic *)hist); } - - if ( chart == GFT_PIE) + if ( chart == GFT_PIE) { do_piechart(v_variables[i], ft); } - - cleanup_freq_tab (v); } } /* Returns the comparison function that should be used for - sorting a frequency table by FRQ_SORT using VAR_TYPE - variables. */ + sorting a frequency table by FRQ_SORT using VAL_TYPE + values. */ static hsh_compare_func * -get_freq_comparator (int frq_sort, int var_type) +get_freq_comparator (int frq_sort, enum val_type val_type) { - /* Note that q2c generates tags beginning with 1000. */ - switch (frq_sort | (var_type << 16)) + bool is_numeric = val_type == VAL_NUMERIC; + switch (frq_sort) { - case FRQ_AVALUE | (NUMERIC << 16): return compare_value_numeric_a; - case FRQ_AVALUE | (ALPHA << 16): return compare_value_alpha_a; - case FRQ_DVALUE | (NUMERIC << 16): return compare_value_numeric_d; - case FRQ_DVALUE | (ALPHA << 16): return compare_value_alpha_d; - case FRQ_AFREQ | (NUMERIC << 16): return compare_freq_numeric_a; - case FRQ_AFREQ | (ALPHA << 16): return compare_freq_alpha_a; - case FRQ_DFREQ | (NUMERIC << 16): return compare_freq_numeric_d; - case FRQ_DFREQ | (ALPHA << 16): return compare_freq_alpha_d; - default: assert (0); + case FRQ_AVALUE: + return is_numeric ? compare_value_numeric_a : compare_value_alpha_a; + case FRQ_DVALUE: + return is_numeric ? compare_value_numeric_d : compare_value_alpha_d; + case FRQ_AFREQ: + return is_numeric ? compare_freq_numeric_a : compare_freq_alpha_a; + case FRQ_DFREQ: + return is_numeric ? compare_freq_numeric_d : compare_freq_alpha_d; + default: + NOT_REACHED (); } - - return 0; } -/* Returns nonzero iff the value in struct freq F is non-missing +/* Returns true iff the value in struct freq_mutable F is non-missing for variable V. */ -static int -not_missing (const void *f_, void *v_) +static bool +not_missing (const void *f_, const void *v_) { - const struct freq *f = f_; - struct variable *v = v_; + const struct freq_mutable *f = f_; + const struct variable *v = v_; - return !mv_is_value_missing (&v->miss, f->v); + return !var_is_value_missing (v, &f->value, MV_ANY); } /* Summarizes the frequency table data for variable V. */ static void -postprocess_freq_tab (struct variable *v) +postprocess_freq_tab (const struct variable *v) { hsh_compare_func *compare; struct freq_tab *ft; size_t count; void *const *data; - struct freq *freqs, *f; + struct freq_mutable *freqs, *f; size_t i; ft = &get_var_freqs (v)->tab; - assert (ft->mode == FRQM_GENERAL); - compare = get_freq_comparator (cmd.sort, v->type); + compare = get_freq_comparator (cmd.sort, var_get_type (v)); /* Extract data from hash table. */ count = hsh_count (ft->data); @@ -744,10 +684,10 @@ postprocess_freq_tab (struct variable *v) /* Copy dereferenced data into freqs. */ freqs = xnmalloc (count, sizeof *freqs); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { - struct freq *f = data[i]; - freqs[i] = *f; + struct freq_mutable *f = data[i]; + freqs[i] = *f; } /* Put data into ft. */ @@ -762,28 +702,27 @@ postprocess_freq_tab (struct variable *v) /* Summary statistics. */ ft->valid_cases = 0.0; - for(i = 0 ; i < ft->n_valid ; ++i ) + for(i = 0 ; i < ft->n_valid ; ++i ) { f = &ft->valid[i]; - ft->valid_cases += f->c; + ft->valid_cases += f->count; } - ft->total_cases = ft->valid_cases ; - for(i = 0 ; i < ft->n_missing ; ++i ) + ft->total_cases = ft->valid_cases ; + for(i = 0 ; i < ft->n_missing ; ++i ) { f = &ft->missing[i]; - ft->total_cases += f->c; + ft->total_cases += f->count; } } /* Frees the frequency table for variable V. */ static void -cleanup_freq_tab (struct variable *v) +cleanup_freq_tab (const struct variable *v) { struct freq_tab *ft = &get_var_freqs (v)->tab; - assert (ft->mode == FRQM_GENERAL); free (ft->valid); hsh_destroy (ft->data); } @@ -791,87 +730,38 @@ cleanup_freq_tab (struct variable *v) /* Parses the VARIABLES subcommand, adding to {n_variables,v_variables}. */ static int -frq_custom_variables (struct cmd_frequencies *cmd UNUSED, void *aux UNUSED) +frq_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_frequencies *cmd UNUSED, void *aux UNUSED) { - int mode; - int min = 0, max = 0; - size_t old_n_variables = n_variables; size_t i; - lex_match ('='); - if (token != T_ALL && (token != T_ID - || dict_lookup_var (default_dict, tokid) == NULL)) + lex_match (lexer, '='); + if (lex_token (lexer) != T_ALL && (lex_token (lexer) != T_ID + || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)) return 2; - if (!parse_variables (default_dict, &v_variables, &n_variables, + if (!parse_variables_const (lexer, dataset_dict (ds), &v_variables, &n_variables, PV_APPEND | PV_NO_SCRATCH)) return 0; - if (!lex_match ('(')) - mode = FRQM_GENERAL; - else - { - mode = FRQM_INTEGER; - if (!lex_force_int ()) - return 0; - min = lex_integer (); - lex_get (); - if (!lex_force_match (',')) - return 0; - if (!lex_force_int ()) - return 0; - max = lex_integer (); - lex_get (); - if (!lex_force_match (')')) - return 0; - if (max < min) - { - msg (SE, _("Upper limit of integer mode value range must be " - "greater than lower limit.")); - return 0; - } - } - for (i = old_n_variables; i < n_variables; i++) { - struct variable *v = v_variables[i]; + const struct variable *v = v_variables[i]; struct var_freqs *vf; - if (v->aux != NULL) + if (var_get_aux (v) != NULL) { msg (SE, _("Variable %s specified multiple times on VARIABLES " - "subcommand."), v->name); + "subcommand."), var_get_name (v)); return 0; } - if (mode == FRQM_INTEGER && v->type != NUMERIC) - { - msg (SE, _("Integer mode specified, but %s is not a numeric " - "variable."), v->name); - return 0; - } - vf = var_attach_aux (v, xmalloc (sizeof *vf), var_dtor_free); - vf->tab.mode = mode; vf->tab.valid = vf->tab.missing = NULL; - if (mode == FRQM_INTEGER) - { - vf->tab.min = min; - vf->tab.max = max; - vf->tab.vector = pool_nalloc (int_pool, - max - min + 1, sizeof *vf->tab.vector); - } - else - vf->tab.vector = NULL; + vf->tab.dict = dataset_dict (ds); vf->n_groups = 0; vf->groups = NULL; - vf->width = v->width; - vf->print = v->print; - if (vf->width > MAX_SHORT_STRING && get_algorithm () == COMPATIBLE) - { - vf->width = MAX_SHORT_STRING; - vf->print.w = MAX_SHORT_STRING * (v->print.type == FMT_AHEX ? 2 : 1); - } + vf->width = var_get_width (v); + vf->print = *var_get_print_format (v); } return 1; } @@ -879,11 +769,11 @@ frq_custom_variables (struct cmd_frequencies *cmd UNUSED, void *aux UNUSED) /* Parses the GROUPED subcommand, setting the n_grouped, grouped fields of specified variables. */ static int -frq_custom_grouped (struct cmd_frequencies *cmd UNUSED, void *aux UNUSED) +frq_custom_grouped (struct lexer *lexer, struct dataset *ds, struct cmd_frequencies *cmd UNUSED, void *aux UNUSED) { - lex_match ('='); - if ((token == T_ID && dict_lookup_var (default_dict, tokid) != NULL) - || token == T_ID) + lex_match (lexer, '='); + if ((lex_token (lexer) == T_ID && dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) != NULL) + || lex_token (lexer) == T_ID) for (;;) { size_t i; @@ -894,52 +784,52 @@ frq_custom_grouped (struct cmd_frequencies *cmd UNUSED, void *aux UNUSED) /* Variable list. */ size_t n; - struct variable **v; + const struct variable **v; - if (!parse_variables (default_dict, &v, &n, + if (!parse_variables_const (lexer, dataset_dict (ds), &v, &n, PV_NO_DUPLICATE | PV_NUMERIC)) return 0; - if (lex_match ('(')) + if (lex_match (lexer, '(')) { nl = ml = 0; dl = NULL; - while (lex_integer ()) + while (lex_integer (lexer)) { if (nl >= ml) { ml += 16; - dl = pool_nrealloc (int_pool, dl, ml, sizeof *dl); + dl = pool_nrealloc (syntax_pool, dl, ml, sizeof *dl); } - dl[nl++] = tokval; - lex_get (); - lex_match (','); + dl[nl++] = lex_tokval (lexer); + lex_get (lexer); + lex_match (lexer, ','); } /* Note that nl might still be 0 and dl might still be NULL. That's okay. */ - if (!lex_match (')')) + if (!lex_match (lexer, ')')) { free (v); msg (SE, _("`)' expected after GROUPED interval list.")); return 0; } } - else + else { nl = 0; dl = NULL; } for (i = 0; i < n; i++) - if (v[i]->aux == NULL) + if (var_get_aux (v[i]) == NULL) msg (SE, _("Variables %s specified on GROUPED but not on " - "VARIABLES."), v[i]->name); - else + "VARIABLES."), var_get_name (v[i])); + else { struct var_freqs *vf = get_var_freqs (v[i]); - + if (vf->groups != NULL) msg (SE, _("Variables %s specified multiple times on GROUPED " - "subcommand."), v[i]->name); + "subcommand."), var_get_name (v[i])); else { vf->n_groups = nl; @@ -947,12 +837,12 @@ frq_custom_grouped (struct cmd_frequencies *cmd UNUSED, void *aux UNUSED) } } free (v); - if (!lex_match ('/')) + if (!lex_match (lexer, '/')) break; - if ((token != T_ID || dict_lookup_var (default_dict, tokid) != NULL) - && token != T_ALL) + if ((lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) != NULL) + && lex_token (lexer) != T_ALL) { - lex_put_back ('/'); + lex_put_back (lexer, '/'); break; } } @@ -970,22 +860,18 @@ add_percentile (double x) for (i = 0; i < n_percentiles; i++) { /* Do nothing if it's already in the list */ - if ( fabs(x - percentiles[i].p) < DBL_EPSILON ) + if ( fabs(x - percentiles[i].p) < DBL_EPSILON ) return; if (x < percentiles[i].p) break; } - if (i >= n_percentiles || tokval != percentiles[i].p) + if (i >= n_percentiles || x != percentiles[i].p) { - percentiles = pool_nrealloc (int_pool, percentiles, + percentiles = pool_nrealloc (syntax_pool, percentiles, n_percentiles + 1, sizeof *percentiles); - - if (i < n_percentiles) - memmove (&percentiles[i + 1], &percentiles[i], - (n_percentiles - i) * sizeof (struct percentile) ); - + insert_element (percentiles, n_percentiles, sizeof *percentiles, i); percentiles[i].p = x; n_percentiles++; } @@ -993,35 +879,16 @@ add_percentile (double x) /* Comparison functions. */ -/* Hash of numeric values. */ -static unsigned -hash_value_numeric (const void *value_, void *foo UNUSED) -{ - const struct freq *value = value_; - return hsh_hash_double (value->v[0].f); -} - -/* Hash of string values. */ -static unsigned -hash_value_alpha (const void *value_, void *v_) -{ - const struct freq *value = value_; - struct variable *v = v_; - struct var_freqs *vf = get_var_freqs (v); - - return hsh_hash_bytes (value->v[0].s, vf->width); -} - /* Ascending numeric compare of values. */ static int -compare_value_numeric_a (const void *a_, const void *b_, void *foo UNUSED) +compare_value_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) { - const struct freq *a = a_; - const struct freq *b = b_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; - if (a->v[0].f > b->v[0].f) + if (a->value.f > b->value.f) return 1; - else if (a->v[0].f < b->v[0].f) + else if (a->value.f < b->value.f) return -1; else return 0; @@ -1029,26 +896,26 @@ compare_value_numeric_a (const void *a_, const void *b_, void *foo UNUSED) /* Ascending string compare of values. */ static int -compare_value_alpha_a (const void *a_, const void *b_, void *v_) +compare_value_alpha_a (const void *a_, const void *b_, const void *v_) { - const struct freq *a = a_; - const struct freq *b = b_; - struct variable *v = v_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; + const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); - return memcmp (a->v[0].s, b->v[0].s, vf->width); + return value_compare_3way (&a->value, &b->value, vf->width); } /* Descending numeric compare of values. */ static int -compare_value_numeric_d (const void *a, const void *b, void *foo UNUSED) +compare_value_numeric_d (const void *a, const void *b, const void *aux UNUSED) { - return -compare_value_numeric_a (a, b, foo); + return -compare_value_numeric_a (a, b, aux); } /* Descending string compare of values. */ static int -compare_value_alpha_d (const void *a, const void *b, void *v) +compare_value_alpha_d (const void *a, const void *b, const void *v) { return -compare_value_alpha_a (a, b, v); } @@ -1056,19 +923,19 @@ compare_value_alpha_d (const void *a, const void *b, void *v) /* Ascending numeric compare of frequency; secondary key on ascending numeric value. */ static int -compare_freq_numeric_a (const void *a_, const void *b_, void *foo UNUSED) +compare_freq_numeric_a (const void *a_, const void *b_, const void *aux UNUSED) { - const struct freq *a = a_; - const struct freq *b = b_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; - if (a->c > b->c) + if (a->count > b->count) return 1; - else if (a->c < b->c) + else if (a->count < b->count) return -1; - if (a->v[0].f > b->v[0].f) + if (a->value.f > b->value.f) return 1; - else if (a->v[0].f < b->v[0].f) + else if (a->value.f < b->value.f) return -1; else return 0; @@ -1077,37 +944,37 @@ compare_freq_numeric_a (const void *a_, const void *b_, void *foo UNUSED) /* Ascending numeric compare of frequency; secondary key on ascending string value. */ static int -compare_freq_alpha_a (const void *a_, const void *b_, void *v_) +compare_freq_alpha_a (const void *a_, const void *b_, const void *v_) { - const struct freq *a = a_; - const struct freq *b = b_; - struct variable *v = v_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; + const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); - if (a->c > b->c) + if (a->count > b->count) return 1; - else if (a->c < b->c) + else if (a->count < b->count) return -1; else - return memcmp (a->v[0].s, b->v[0].s, vf->width); + return value_compare_3way (&a->value, &b->value, vf->width); } /* Descending numeric compare of frequency; secondary key on ascending numeric value. */ static int -compare_freq_numeric_d (const void *a_, const void *b_, void *foo UNUSED) +compare_freq_numeric_d (const void *a_, const void *b_, const void *aux UNUSED) { - const struct freq *a = a_; - const struct freq *b = b_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; - if (a->c > b->c) + if (a->count > b->count) return -1; - else if (a->c < b->c) + else if (a->count < b->count) return 1; - if (a->v[0].f > b->v[0].f) + if (a->value.f > b->value.f) return 1; - else if (a->v[0].f < b->v[0].f) + else if (a->value.f < b->value.f) return -1; else return 0; @@ -1116,19 +983,19 @@ compare_freq_numeric_d (const void *a_, const void *b_, void *foo UNUSED) /* Descending numeric compare of frequency; secondary key on ascending string value. */ static int -compare_freq_alpha_d (const void *a_, const void *b_, void *v_) +compare_freq_alpha_d (const void *a_, const void *b_, const void *v_) { - const struct freq *a = a_; - const struct freq *b = b_; - struct variable *v = v_; + const struct freq_mutable *a = a_; + const struct freq_mutable *b = b_; + const struct variable *v = v_; struct var_freqs *vf = get_var_freqs (v); - if (a->c > b->c) + if (a->count > b->count) return -1; - else if (a->c < b->c) + else if (a->count < b->count) return 1; else - return memcmp (a->v[0].s, b->v[0].s, vf->width); + return value_compare_3way (&a->value, &b->value, vf->width); } /* Frequency table display. */ @@ -1136,27 +1003,34 @@ compare_freq_alpha_d (const void *a_, const void *b_, void *v_) /* Sets the widths of all the columns and heights of all the rows in table T for driver D. */ static void -full_dim (struct tab_table *t, struct outp_driver *d) +full_dim (struct tab_table *t, struct outp_driver *d, void *aux UNUSED) { - int lab = cmd.labels == FRQ_LABELS; - int i; + int i = 0; + int columns = 5; + + if (cmd.labels == FRQ_LABELS) + { + t->w[0] = MIN (tab_natural_width (t, d, 0), d->prop_em_width * 15); + i = 1; + columns ++; + } + + for (;i < columns; i++) + t->w[i] = MAX (tab_natural_width (t, d, i), d->prop_em_width * 8); - if (lab) - t->w[0] = min (tab_natural_width (t, d, 0), d->prop_em_width * 15); - for (i = lab; i < lab + 5; i++) - t->w[i] = max (tab_natural_width (t, d, i), d->prop_em_width * 8); for (i = 0; i < t->nr; i++) t->h[i] = d->font_height; } /* Displays a full frequency table for variable V. */ static void -dump_full (struct variable *v) +dump_full (const struct variable *v, const struct variable *wv) { + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; int n_categories; struct var_freqs *vf; struct freq_tab *ft; - struct freq *f; + struct freq_mutable *f; struct tab_table *t; int r; double cum_total = 0.0; @@ -1168,9 +1042,9 @@ dump_full (struct variable *v) const char *s; }; - struct init *p; + const struct init *p; - static struct init vec[] = + static const struct init vec[] = { {4, 0, N_("Valid")}, {5, 0, N_("Cum")}, @@ -1186,19 +1060,20 @@ dump_full (struct variable *v) {-1, -1, NULL}, }; - int lab = cmd.labels == FRQ_LABELS; + const bool lab = (cmd.labels == FRQ_LABELS); vf = get_var_freqs (v); ft = &vf->tab; n_categories = ft->n_valid + ft->n_missing; t = tab_create (5 + lab, n_categories + 3, 0); tab_headers (t, 0, 0, 2, 0); - tab_dim (t, full_dim); + tab_dim (t, full_dim, NULL); if (lab) tab_text (t, 0, 1, TAB_CENTER | TAT_TITLE, _("Value Label")); + for (p = vec; p->s; p++) - tab_text (t, p->c - (p->r ? !lab : 0), p->r, + tab_text (t, lab ? p->c : p->c - 1, p->r, TAB_CENTER | TAT_TITLE, gettext (p->s)); r = 2; @@ -1206,41 +1081,41 @@ dump_full (struct variable *v) { double percent, valid_percent; - cum_freq += f->c; + cum_freq += f->count; - percent = f->c / ft->total_cases * 100.0; - valid_percent = f->c / ft->valid_cases * 100.0; + percent = f->count / ft->total_cases * 100.0; + valid_percent = f->count / ft->valid_cases * 100.0; cum_total += valid_percent; if (lab) { - const char *label = val_labs_find (v->val_labs, f->v[0]); + const char *label = var_lookup_value_label (v, &f->value); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1 + lab, r, TAB_NONE, f->c, 8, 0); - tab_float (t, 2 + lab, r, TAB_NONE, percent, 5, 1); - tab_float (t, 3 + lab, r, TAB_NONE, valid_percent, 5, 1); - tab_float (t, 4 + lab, r, TAB_NONE, cum_total, 5, 1); + tab_value (t, 0 + lab, r, TAB_NONE, &f->value, ft->dict, &vf->print); + tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt); + tab_double (t, 2 + lab, r, TAB_NONE, percent, NULL); + tab_double (t, 3 + lab, r, TAB_NONE, valid_percent, NULL); + tab_double (t, 4 + lab, r, TAB_NONE, cum_total, NULL); r++; } for (; f < &ft->valid[n_categories]; f++) { - cum_freq += f->c; + cum_freq += f->count; if (lab) { - const char *label = val_labs_find (v->val_labs, f->v[0]); + const char *label = var_lookup_value_label (v, &f->value); if (label != NULL) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1 + lab, r, TAB_NONE, f->c, 8, 0); - tab_float (t, 2 + lab, r, TAB_NONE, - f->c / ft->total_cases * 100.0, 5, 1); + tab_value (t, 0 + lab, r, TAB_NONE, &f->value, ft->dict, &vf->print); + tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt); + tab_double (t, 2 + lab, r, TAB_NONE, + f->count / ft->total_cases * 100.0, NULL); tab_text (t, 3 + lab, r, TAB_NONE, _("Missing")); r++; } @@ -1252,28 +1127,27 @@ dump_full (struct variable *v) tab_hline (t, TAL_2, 0, 4 + lab, r); tab_joint_text (t, 0, r, 0 + lab, r, TAB_RIGHT | TAT_TITLE, _("Total")); tab_vline (t, TAL_0, 1, r, r); - tab_float (t, 1 + lab, r, TAB_NONE, cum_freq, 8, 0); - tab_float (t, 2 + lab, r, TAB_NONE, 100.0, 5, 1); - tab_float (t, 3 + lab, r, TAB_NONE, 100.0, 5, 1); + tab_double (t, 1 + lab, r, TAB_NONE, cum_freq, wfmt); + tab_fixed (t, 2 + lab, r, TAB_NONE, 100.0, 5, 1); + tab_fixed (t, 3 + lab, r, TAB_NONE, 100.0, 5, 1); - tab_title (t, "%s: %s", v->name, v->label ? v->label : ""); + tab_title (t, "%s", var_to_string (v)); tab_submit (t); - } /* Sets the widths of all the columns and heights of all the rows in table T for driver D. */ static void -condensed_dim (struct tab_table *t, struct outp_driver *d) +condensed_dim (struct tab_table *t, struct outp_driver *d, void *aux UNUSED) { - int cum_w = max (outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL), - max (outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL), + int cum_w = MAX (outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL), + MAX (outp_string_width (d, _("Cum"), OUTP_PROPORTIONAL), outp_string_width (d, "000", OUTP_PROPORTIONAL))); int i; for (i = 0; i < 2; i++) - t->w[i] = max (tab_natural_width (t, d, i), d->prop_em_width * 8); + t->w[i] = MAX (tab_natural_width (t, d, i), d->prop_em_width * 8); for (i = 2; i < 4; i++) t->w[i] = cum_w; for (i = 0; i < t->nr; i++) @@ -1282,12 +1156,13 @@ condensed_dim (struct tab_table *t, struct outp_driver *d) /* Display condensed frequency table for variable V. */ static void -dump_condensed (struct variable *v) +dump_condensed (const struct variable *v, const struct variable *wv) { + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; int n_categories; struct var_freqs *vf; struct freq_tab *ft; - struct freq *f; + struct freq_mutable *f; struct tab_table *t; int r; double cum_total = 0.0; @@ -1303,28 +1178,28 @@ dump_condensed (struct variable *v) tab_text (t, 2, 1, TAB_CENTER | TAT_TITLE, _("Pct")); tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Cum")); tab_text (t, 3, 1, TAB_CENTER | TAT_TITLE, _("Pct")); - tab_dim (t, condensed_dim); + tab_dim (t, condensed_dim, NULL); r = 2; for (f = ft->valid; f < ft->missing; f++) { double percent; - percent = f->c / ft->total_cases * 100.0; - cum_total += f->c / ft->valid_cases * 100.0; + percent = f->count / ft->total_cases * 100.0; + cum_total += f->count / ft->valid_cases * 100.0; - tab_value (t, 0, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1, r, TAB_NONE, f->c, 8, 0); - tab_float (t, 2, r, TAB_NONE, percent, 3, 0); - tab_float (t, 3, r, TAB_NONE, cum_total, 3, 0); + tab_value (t, 0, r, TAB_NONE, &f->value, ft->dict, &vf->print); + tab_double (t, 1, r, TAB_NONE, f->count, wfmt); + tab_double (t, 2, r, TAB_NONE, percent, NULL); + tab_double (t, 3, r, TAB_NONE, cum_total, NULL); r++; } for (; f < &ft->valid[n_categories]; f++) { - tab_value (t, 0, r, TAB_NONE, f->v, &vf->print); - tab_float (t, 1, r, TAB_NONE, f->c, 8, 0); - tab_float (t, 2, r, TAB_NONE, - f->c / ft->total_cases * 100.0, 3, 0); + tab_value (t, 0, r, TAB_NONE, &f->value, ft->dict, &vf->print); + tab_double (t, 1, r, TAB_NONE, f->count, wfmt); + tab_double (t, 2, r, TAB_NONE, + f->count / ft->total_cases * 100.0, NULL); r++; } @@ -1332,7 +1207,7 @@ dump_condensed (struct variable *v) cmd.spaces == FRQ_SINGLE ? -1 : TAL_GAP, TAL_1, 0, 0, 3, r - 1); tab_hline (t, TAL_2, 0, 3, 2); - tab_title (t, "%s: %s", v->name, v->label ? v->label : ""); + tab_title (t, "%s", var_to_string (v)); tab_columns (t, SOM_COL_DOWN, 1); tab_submit (t); } @@ -1342,41 +1217,22 @@ dump_condensed (struct variable *v) /* Calculates all the pertinent statistics for variable V, putting them in array D[]. FIXME: This could be made much more optimal. */ static void -calc_stats (struct variable *v, double d[frq_n_stats]) +calc_stats (const struct variable *v, double d[frq_n_stats]) { struct freq_tab *ft = &get_var_freqs (v)->tab; double W = ft->valid_cases; struct moments *m; - struct freq *f=0; + struct freq_mutable *f=0; int most_often; double X_mode; double rank; int i = 0; int idx; - double *median_value; /* Calculate percentiles. */ - /* If the 50th percentile was not explicitly requested then we must - calculate it anyway --- it's the median */ - median_value = 0 ; - for (i = 0; i < n_percentiles; i++) - { - if (percentiles[i].p == 0.5) - { - median_value = &percentiles[i].value; - break; - } - } - - if ( 0 == median_value ) - { - add_percentile (0.5); - implicit_50th = 1; - } - - for (i = 0; i < n_percentiles; i++) + for (i = 0; i < n_percentiles; i++) { percentiles[i].flag = 0; percentiles[i].flag2 = 0; @@ -1386,33 +1242,33 @@ calc_stats (struct variable *v, double d[frq_n_stats]) for (idx = 0; idx < ft->n_valid; ++idx) { static double prev_value = SYSMIS; - f = &ft->valid[idx]; - rank += f->c ; - for (i = 0; i < n_percentiles; i++) + f = &ft->valid[idx]; + rank += f->count ; + for (i = 0; i < n_percentiles; i++) { double tp; - if ( percentiles[i].flag2 ) continue ; + if ( percentiles[i].flag2 ) continue ; - if ( get_algorithm() != COMPATIBLE ) - tp = + if ( settings_get_algorithm () != COMPATIBLE ) + tp = (ft->valid_cases - 1) * percentiles[i].p; else - tp = + tp = (ft->valid_cases + 1) * percentiles[i].p - 1; - if ( percentiles[i].flag ) + if ( percentiles[i].flag ) { - percentiles[i].x2 = f->v[0].f; + percentiles[i].x2 = f->value.f; percentiles[i].x1 = prev_value; percentiles[i].flag2 = 1; continue; } - if (rank > tp ) + if (rank > tp ) { - if ( f->c > 1 && rank - (f->c - 1) > tp ) + if ( f->count > 1 && rank - (f->count - 1) > tp ) { - percentiles[i].x2 = percentiles[i].x1 = f->v[0].f; + percentiles[i].x2 = percentiles[i].x1 = f->value.f; percentiles[i].flag2 = 1; } else @@ -1423,14 +1279,14 @@ calc_stats (struct variable *v, double d[frq_n_stats]) continue; } } - prev_value = f->v[0].f; + prev_value = f->value.f; } - for (i = 0; i < n_percentiles; i++) + for (i = 0; i < n_percentiles; i++) { /* Catches the case when p == 100% */ - if ( ! percentiles[i].flag2 ) - percentiles[i].x1 = percentiles[i].x2 = f->v[0].f; + if ( ! percentiles[i].flag2 ) + percentiles[i].x1 = percentiles[i].x2 = f->value.f; /* printf("percentile %d (p==%.2f); X1 = %g; X2 = %g\n", @@ -1438,13 +1294,13 @@ calc_stats (struct variable *v, double d[frq_n_stats]) */ } - for (i = 0; i < n_percentiles; i++) + for (i = 0; i < n_percentiles; i++) { struct freq_tab *ft = &get_var_freqs (v)->tab; double s; double dummy; - if ( get_algorithm() != COMPATIBLE ) + if ( settings_get_algorithm () != COMPATIBLE ) { s = modf((ft->valid_cases - 1) * percentiles[i].p , &dummy); } @@ -1453,11 +1309,8 @@ calc_stats (struct variable *v, double d[frq_n_stats]) s = modf((ft->valid_cases + 1) * percentiles[i].p -1, &dummy); } - percentiles[i].value = percentiles[i].x1 + - ( percentiles[i].x2 - percentiles[i].x1) * s ; - - if ( percentiles[i].p == 0.50) - median_value = &percentiles[i].value; + percentiles[i].value = percentiles[i].x1 + + ( percentiles[i].x2 - percentiles[i].x1) * s ; } @@ -1466,12 +1319,12 @@ calc_stats (struct variable *v, double d[frq_n_stats]) X_mode = SYSMIS; for (f = ft->valid; f < ft->missing; f++) { - if (most_often < f->c) + if (most_often < f->count) { - most_often = f->c; - X_mode = f->v[0].f; + most_often = f->count; + X_mode = f->value.f; } - else if (most_often == f->c) + else if (most_often == f->count) { /* A duplicate mode is undefined. FIXME: keep track of *all* the modes. */ @@ -1482,19 +1335,18 @@ calc_stats (struct variable *v, double d[frq_n_stats]) /* Calculate moments. */ m = moments_create (MOMENT_KURTOSIS); for (f = ft->valid; f < ft->missing; f++) - moments_pass_one (m, f->v[0].f, f->c); + moments_pass_one (m, f->value.f, f->count); for (f = ft->valid; f < ft->missing; f++) - moments_pass_two (m, f->v[0].f, f->c); + moments_pass_two (m, f->value.f, f->count); moments_calculate (m, NULL, &d[frq_mean], &d[frq_variance], &d[frq_skew], &d[frq_kurt]); moments_destroy (m); - + /* Formulas below are taken from _SPSS Statistical Algorithms_. */ - d[frq_min] = ft->valid[0].v[0].f; - d[frq_max] = ft->valid[ft->n_valid - 1].v[0].f; + d[frq_min] = ft->valid[0].value.f; + d[frq_max] = ft->valid[ft->n_valid - 1].value.f; d[frq_mode] = X_mode; d[frq_range] = d[frq_max] - d[frq_min]; - d[frq_median] = *median_value; d[frq_sum] = d[frq_mean] * W; d[frq_stddev] = sqrt (d[frq_variance]); d[frq_semean] = d[frq_stddev] / sqrt (W); @@ -1504,38 +1356,35 @@ calc_stats (struct variable *v, double d[frq_n_stats]) /* Displays a table of all the statistics requested for variable V. */ static void -dump_statistics (struct variable *v, int show_varname) +dump_statistics (const struct variable *v, bool show_varname, + const struct variable *wv) { + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; struct freq_tab *ft; double stat_value[frq_n_stats]; struct tab_table *t; int i, r; - int n_explicit_percentiles = n_percentiles; - - if ( implicit_50th && n_percentiles > 0 ) - --n_percentiles; - - if (v->type == ALPHA) + if (var_is_alpha (v)) return; ft = &get_var_freqs (v)->tab; if (ft->n_valid == 0) { msg (SW, _("No valid data for variable %s; statistics not displayed."), - v->name); + var_get_name (v)); return; } calc_stats (v, stat_value); - t = tab_create (3, n_stats + n_explicit_percentiles + 2, 0); - tab_dim (t, tab_natural_dimensions); + t = tab_create (3, n_stats + n_percentiles + 2, 0); + tab_dim (t, tab_natural_dimensions, NULL); tab_box (t, TAL_1, TAL_1, -1, -1 , 0 , 0 , 2, tab_nr(t) - 1) ; tab_vline (t, TAL_1 , 2, 0, tab_nr(t) - 1); tab_vline (t, TAL_GAP , 1, 0, tab_nr(t) - 1 ) ; - + r=2; /* N missing and N valid are always dumped */ for (i = 0; i < frq_n_stats; i++) @@ -1543,7 +1392,7 @@ dump_statistics (struct variable *v, int show_varname) { tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, gettext (st_name[i].s10)); - tab_float (t, 2, r, TAB_NONE, stat_value[i], 11, 3); + tab_double (t, 2, r, TAB_NONE, stat_value[i], NULL); r++; } @@ -1551,30 +1400,27 @@ dump_statistics (struct variable *v, int show_varname) tab_text (t, 1, 0, TAB_LEFT | TAT_TITLE, _("Valid")); tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("Missing")); - tab_float(t, 2, 0, TAB_NONE, ft->valid_cases, 11, 0); - tab_float(t, 2, 1, TAB_NONE, ft->total_cases - ft->valid_cases, 11, 0); + tab_double (t, 2, 0, TAB_NONE, ft->valid_cases, wfmt); + tab_double (t, 2, 1, TAB_NONE, ft->total_cases - ft->valid_cases, wfmt); - - for (i = 0; i < n_explicit_percentiles; i++, r++) + for (i = 0; i < n_percentiles; i++, r++) { - if ( i == 0 ) - { + if ( i == 0 ) + { tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Percentiles")); } - tab_float (t, 1, r, TAB_LEFT, percentiles[i].p * 100, 3, 0 ); - tab_float (t, 2, r, TAB_NONE, percentiles[i].value, 11, 3); - + if (percentiles[i].p == 0.5) + tab_text (t, 1, r, TAB_LEFT, _("50 (Median)")); + else + tab_fixed (t, 1, r, TAB_LEFT, percentiles[i].p * 100, 3, 0); + tab_double (t, 2, r, TAB_NONE, percentiles[i].value, + var_get_print_format (v)); } tab_columns (t, SOM_COL_DOWN, 1); if (show_varname) - { - if (v->label) - tab_title (t, "%s: %s", v->name, v->label); - else - tab_title (t, "%s", v->name); - } + tab_title (t, "%s", var_to_string (v)); else tab_flags (t, SOMF_NO_TITLE); @@ -1584,44 +1430,44 @@ dump_statistics (struct variable *v, int show_varname) /* Create a gsl_histogram from a freq_tab */ -gsl_histogram * -freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) +struct histogram * +freq_tab_to_hist (const struct freq_tab *ft, const struct variable *var) { int i; double x_min = DBL_MAX; double x_max = -DBL_MAX; - gsl_histogram *hist; + struct statistic *hist; const double bins = 11; struct hsh_iterator hi; struct hsh_table *fh = ft->data; - struct freq *frq; + struct freq_mutable *frq; /* Find out the extremes of the x value */ - for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) ) + for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) ) { - if ( mv_is_value_missing(&var->miss, frq->v)) + if (var_is_value_missing(var, &frq->value, MV_ANY)) continue; - if ( frq->v[0].f < x_min ) x_min = frq->v[0].f ; - if ( frq->v[0].f > x_max ) x_max = frq->v[0].f ; + if ( frq->value.f < x_min ) x_min = frq->value.f ; + if ( frq->value.f > x_max ) x_max = frq->value.f ; } - hist = histogram_create(bins, x_min, x_max); + hist = histogram_create (bins, x_min, x_max); - for( i = 0 ; i < ft->n_valid ; ++i ) + for( i = 0 ; i < ft->n_valid ; ++i ) { frq = &ft->valid[i]; - gsl_histogram_accumulate(hist, frq->v[0].f, frq->c); + histogram_add ((struct histogram *)hist, frq->value.f, frq->count); } - return hist; + return (struct histogram *)hist; } static struct slice * -freq_tab_to_slice_array(const struct freq_tab *frq_tab, +freq_tab_to_slice_array(const struct freq_tab *frq_tab, const struct variable *var, int *n_slices); @@ -1631,7 +1477,7 @@ freq_tab_to_slice_array(const struct freq_tab *frq_tab, The caller is responsible for freeing slices */ static struct slice * -freq_tab_to_slice_array(const struct freq_tab *frq_tab, +freq_tab_to_slice_array(const struct freq_tab *frq_tab, const struct variable *var, int *n_slices) { @@ -1639,16 +1485,16 @@ freq_tab_to_slice_array(const struct freq_tab *frq_tab, struct slice *slices; *n_slices = frq_tab->n_valid; - + slices = xnmalloc (*n_slices, sizeof *slices); - for (i = 0 ; i < *n_slices ; ++i ) + for (i = 0 ; i < *n_slices ; ++i ) { - const struct freq *frq = &frq_tab->valid[i]; - - slices[i].label = value_to_string(frq->v, var); + const struct freq_mutable *frq = &frq_tab->valid[i]; - slices[i].magnetude = frq->c; + ds_init_empty (&slices[i].label); + var_append_value_name (var, &frq->value, &slices[i].label); + slices[i].magnetude = frq->count; } return slices; @@ -1661,17 +1507,22 @@ static void do_piechart(const struct variable *var, const struct freq_tab *frq_tab) { struct slice *slices; - int n_slices; + int n_slices, i; slices = freq_tab_to_slice_array(frq_tab, var, &n_slices); piechart_plot(var_to_string(var), slices, n_slices); + for (i = 0 ; i < n_slices ; ++i ) + { + ds_destroy (&slices[i].label); + } + free(slices); } -/* +/* Local Variables: mode: c End: