X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fcrosstabs.q;h=00ec61369cfc04edc9e48846d04fb1152027a21d;hb=19d0debdc5b72e1bb6c79956403a4d3bc054f300;hp=d11d2d8d79d658e9b201e723d9d989523e17a44a;hpb=cb05567731adc7c890d3146102ff01068ba796dd;p=pspp-builds.git diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q index d11d2d8d..00ec6136 100644 --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. Written by Ben Pfaff . This program is free software; you can redistribute it and/or @@ -30,29 +30,37 @@ */ #include -#include + #include +#include #include #include -#include -#include -#include + #include +#include #include -#include -#include +#include +#include +#include #include -#include +#include #include -#include +#include +#include +#include +#include +#include +#include #include +#include +#include #include -#include +#include #include +#include #include -#include -#include -#include + +#include "minmax.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -60,13 +68,11 @@ /* (headers) */ -#include - /* (specification) crosstabs (crs_): *^tables=custom; +variables=custom; - +missing=miss:!table/include/report; + missing=miss:!table/include/report; +write[wr_]=none,cells,all; +format=fmt:!labels/nolabels/novallabs, val:!avalue/dvalue, @@ -172,11 +178,11 @@ static struct cmd_crosstabs cmd; static struct pool *pl_tc; /* For table cells. */ static struct pool *pl_col; /* For column data. */ -static int internal_cmd_crosstabs (void); -static void precalc (void *); -static bool calc_general (struct ccase *, void *); -static bool calc_integer (struct ccase *, void *); -static void postcalc (void *); +static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds); +static void precalc (const struct ccase *, void *, const struct dataset *); +static bool calc_general (const struct ccase *, void *, const struct dataset *); +static bool calc_integer (const struct ccase *, void *, const struct dataset *); +static bool postcalc (void *, const struct dataset *); static void submit (struct tab_table *); static void format_short (char *s, const struct fmt_spec *fp, @@ -184,9 +190,9 @@ static void format_short (char *s, const struct fmt_spec *fp, /* Parse and execute CROSSTABS, then clean up. */ int -cmd_crosstabs (void) +cmd_crosstabs (struct lexer *lexer, struct dataset *ds) { - int result = internal_cmd_crosstabs (); + int result = internal_cmd_crosstabs (lexer, ds); free (variables); pool_destroy (pl_tc); @@ -197,7 +203,7 @@ cmd_crosstabs (void) /* Parses and executes the CROSSTABS procedure. */ static int -internal_cmd_crosstabs (void) +internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds) { int i; bool ok; @@ -209,7 +215,7 @@ internal_cmd_crosstabs (void) pl_tc = pool_create (); pl_col = pool_create (); - if (!parse_crosstabs (&cmd)) + if (!parse_crosstabs (lexer, ds, &cmd, NULL)) return CMD_FAILURE; mode = variables ? INTEGER : GENERAL; @@ -290,7 +296,7 @@ internal_cmd_crosstabs (void) else write = CRS_WR_NONE; - ok = procedure_with_splits (precalc, + ok = procedure_with_splits (ds, precalc, mode == GENERAL ? calc_general : calc_integer, postcalc, NULL); @@ -299,7 +305,7 @@ internal_cmd_crosstabs (void) /* Parses the TABLES subcommand. */ static int -crs_custom_tables (struct cmd_crosstabs *cmd UNUSED) +crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED) { struct var_set *var_set; int n_by; @@ -309,23 +315,23 @@ crs_custom_tables (struct cmd_crosstabs *cmd UNUSED) int success = 0; /* Ensure that this is a TABLES subcommand. */ - if (!lex_match_id ("TABLES") - && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) - && token != T_ALL) + if (!lex_match_id (lexer, "TABLES") + && (lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL) + && lex_token (lexer) != T_ALL) return 2; - lex_match ('='); + lex_match (lexer, '='); if (variables != NULL) var_set = var_set_create_from_array (variables, variables_cnt); else - var_set = var_set_create_from_dict (default_dict); + var_set = var_set_create_from_dict (dataset_dict (ds)); assert (var_set != NULL); for (n_by = 0; ;) { by = xnrealloc (by, n_by + 1, sizeof *by); by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar); - if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by], + if (!parse_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by], PV_NO_DUPLICATE | PV_NO_SCRATCH)) goto done; if (xalloc_oversized (nx, by_nvar[n_by])) @@ -336,11 +342,11 @@ crs_custom_tables (struct cmd_crosstabs *cmd UNUSED) nx *= by_nvar[n_by]; n_by++; - if (!lex_match (T_BY)) + if (!lex_match (lexer, T_BY)) { if (n_by < 2) { - lex_error (_("expecting BY")); + lex_error (lexer, _("expecting BY")); goto done; } else @@ -403,7 +409,7 @@ crs_custom_tables (struct cmd_crosstabs *cmd UNUSED) /* Parses the VARIABLES subcommand. */ static int -crs_custom_variables (struct cmd_crosstabs *cmd UNUSED) +crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED) { if (nxtab) { @@ -411,7 +417,7 @@ crs_custom_variables (struct cmd_crosstabs *cmd UNUSED) return 0; } - lex_match ('='); + lex_match (lexer, '='); for (;;) { @@ -420,42 +426,43 @@ crs_custom_variables (struct cmd_crosstabs *cmd UNUSED) long min, max; - if (!parse_variables (default_dict, &variables, &variables_cnt, + if (!parse_variables (lexer, dataset_dict (ds), + &variables, &variables_cnt, (PV_APPEND | PV_NUMERIC | PV_NO_DUPLICATE | PV_NO_SCRATCH))) return 0; - if (token != '(') + if (lex_token (lexer) != '(') { - lex_error ("expecting `('"); + lex_error (lexer, "expecting `('"); goto lossage; } - lex_get (); + lex_get (lexer); - if (!lex_force_int ()) + if (!lex_force_int (lexer)) goto lossage; - min = lex_integer (); - lex_get (); + min = lex_integer (lexer); + lex_get (lexer); - lex_match (','); + lex_match (lexer, ','); - if (!lex_force_int ()) + if (!lex_force_int (lexer)) goto lossage; - max = lex_integer (); + max = lex_integer (lexer); if (max < min) { msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."), max, min); goto lossage; } - lex_get (); + lex_get (lexer); - if (token != ')') + if (lex_token (lexer) != ')') { - lex_error ("expecting `)'"); + lex_error (lexer, "expecting `)'"); goto lossage; } - lex_get (); + lex_get (lexer); for (i = orig_nv; i < variables_cnt; i++) { @@ -466,7 +473,7 @@ crs_custom_variables (struct cmd_crosstabs *cmd UNUSED) var_attach_aux (variables[i], vr, var_dtor_free); } - if (token == '/') + if (lex_token (lexer) == '/') break; } @@ -480,13 +487,14 @@ crs_custom_variables (struct cmd_crosstabs *cmd UNUSED) /* Data file processing. */ -static int compare_table_entry (const void *, const void *, void *); -static unsigned hash_table_entry (const void *, void *); +static int compare_table_entry (const void *, const void *, const void *); +static unsigned hash_table_entry (const void *, const void *); /* Set up the crosstabulation tables for processing. */ -static void -precalc (void *aux UNUSED) +static void +precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds) { + output_split_file_values (ds, first); if (mode == GENERAL) { gen_tab = hsh_create (512, compare_table_entry, hash_table_entry, @@ -554,16 +562,17 @@ precalc (void *aux UNUSED) n_sorted_tab + 1, sizeof *sorted_tab); sorted_tab[n_sorted_tab] = NULL; } + } /* Form crosstabulations for general mode. */ static bool -calc_general (struct ccase *c, void *aux UNUSED) +calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) { - int bad_warn = 1; + bool bad_warn = true; /* Case weight. */ - double weight = dict_get_case_weight (default_dict, c, &bad_warn); + double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn); /* Flattened current table index. */ int t; @@ -632,12 +641,12 @@ calc_general (struct ccase *c, void *aux UNUSED) } static bool -calc_integer (struct ccase *c, void *aux UNUSED) +calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds) { - int bad_warn = 1; + bool bad_warn = true; /* Case weight. */ - double weight = dict_get_case_weight (default_dict, c, &bad_warn); + double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn); /* Flattened current table index. */ int t; @@ -692,7 +701,7 @@ calc_integer (struct ccase *c, void *aux UNUSED) /* Compare the table_entry's at A and B and return a strcmp()-type result. */ static int -compare_table_entry (const void *a_, const void *b_, void *foo UNUSED) +compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED) { const struct table_entry *a = a_; const struct table_entry *b = b_; @@ -732,7 +741,7 @@ compare_table_entry (const void *a_, const void *b_, void *foo UNUSED) /* Calculate a hash value from table_entry A. */ static unsigned -hash_table_entry (const void *a_, void *foo UNUSED) +hash_table_entry (const void *a_, const void *aux UNUSED) { const struct table_entry *a = a_; unsigned long hash; @@ -757,8 +766,8 @@ static void output_pivot_table (struct table_entry **, struct table_entry **, int *, int *, int *); static void make_summary_table (void); -static void -postcalc (void *aux UNUSED) +static bool +postcalc (void *aux UNUSED, const struct dataset *ds UNUSED) { if (mode == GENERAL) { @@ -794,6 +803,8 @@ postcalc (void *aux UNUSED) } hsh_destroy (gen_tab); + + return true; } static void insert_summary (struct tab_table *, int tab_index, double valid); @@ -809,7 +820,7 @@ make_summary_table (void) int cur_tab = 0; summary = tab_create (7, 3 + nxtab, 1); - tab_title (summary, 0, _("Summary.")); + tab_title (summary, _("Summary.")); tab_headers (summary, 1, 0, 3, 0); tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases")); tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid")); @@ -945,7 +956,7 @@ static int n_rows; static int ns_cols, ns_rows; /* Crosstabulation. */ -static struct crosstab *x; +static const struct crosstab *x; /* Number of variables from the crosstabulation to consider. This is either x->nvar, if pivoting is on, or 2, if pivoting is off. */ @@ -1088,7 +1099,7 @@ output_pivot_table (struct table_entry **pb, struct table_entry **pe, } strcpy (cp, "]."); - tab_title (table, 0, title); + tab_title (table, "%s", title); local_free (title); } @@ -1104,7 +1115,7 @@ output_pivot_table (struct table_entry **pb, struct table_entry **pe, (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1); tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0); - tab_title (chisq, 0, "Chi-square tests."); + tab_title (chisq, _("Chi-square tests.")); tab_offset (chisq, nvar - 2, 0); tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic")); @@ -1130,7 +1141,7 @@ output_pivot_table (struct table_entry **pb, struct table_entry **pe, { sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1); tab_headers (sym, 2 + (nvar - 2), 0, 1, 0); - tab_title (sym, 0, "Symmetric measures."); + tab_title (sym, _("Symmetric measures.")); tab_offset (sym, nvar - 2, 0); tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category")); @@ -1149,7 +1160,7 @@ output_pivot_table (struct table_entry **pb, struct table_entry **pe, { risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1); tab_headers (risk, 1 + nvar - 2, 0, 2, 0); - tab_title (risk, 0, "Risk estimate."); + tab_title (risk, _("Risk estimate.")); tab_offset (risk, nvar - 2, 0); tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF, @@ -1171,7 +1182,7 @@ output_pivot_table (struct table_entry **pb, struct table_entry **pe, { direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1); tab_headers (direct, 3 + (nvar - 2), 0, 1, 0); - tab_title (direct, 0, "Directional measures."); + tab_title (direct, _("Directional measures.")); tab_offset (direct, nvar - 2, 0); tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category")); @@ -1213,7 +1224,7 @@ output_pivot_table (struct table_entry **pb, struct table_entry **pe, /* Allocate table space for the matrix. */ if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table)) tab_realloc (table, -1, - max (tab_nr (table) + (n_rows + 1) * num_cells, + MAX (tab_nr (table) + (n_rows + 1) * num_cells, tab_nr (table) * (pe - pb) / (te - tb))); if (mode == GENERAL) @@ -1466,7 +1477,7 @@ submit (struct tab_table *t) tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1); tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1, tab_nr (t) - 1); - tab_box (t, -1, -1, -1, TAL_1 | TAL_SPACING, 0, tab_t (t), tab_l (t) - 1, + tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1, tab_nr (t) - 1); tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1); tab_dim (t, crosstabs_dim); @@ -1481,14 +1492,20 @@ crosstabs_dim (struct tab_table *t, struct outp_driver *d) int i; /* Width of a numerical column. */ - int c = outp_string_width (d, "0.000000"); + int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL); if (cmd.miss == CRS_REPORT) - c += outp_string_width (d, "M"); + c += outp_string_width (d, "M", OUTP_PROPORTIONAL); /* Set width for header columns. */ if (t->l != 0) { - int w = (d->width - t->vr_tot - c * (t->nc - t->l)) / t->l; + size_t i; + int w; + + w = d->width - c * (t->nc - t->l); + for (i = 0; i <= t->nc; i++) + w -= t->wrv[i]; + w /= t->l; if (w < d->prop_em_width * 8) w = d->prop_em_width * 8; @@ -1597,7 +1614,7 @@ find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot) result. WIDTH_ points to an int which is either 0 for a numeric value or a string width for a string value. */ static int -compare_value (const void *a_, const void *b_, void *width_) +compare_value (const void *a_, const void *b_, const void *width_) { const union value *a = a_; const union value *b = b_; @@ -1653,7 +1670,7 @@ static void table_value_missing (struct tab_table *table, int c, int r, unsigned char opt, const union value *v, const struct variable *var) { - struct fixed_string s; + struct substring s; const char *label = val_labs_find (var->val_labs, *v); if (label) @@ -1692,20 +1709,20 @@ display_dimensions (struct tab_table *table, int first_difference, struct table_ } /* Put VALUE into cell (C,R) of TABLE, suffixed with character - SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is + SUFFIX if nonzero. If MARK_MISSING is true the entry is additionally suffixed with a letter `M'. */ static void format_cell_entry (struct tab_table *table, int c, int r, double value, - char suffix, int mark_missing) + char suffix, bool mark_missing) { const struct fmt_spec f = {FMT_F, 10, 1}; union value v; - struct fixed_string s; + struct substring s; s.length = 10; s.string = tab_alloc (table, 16); v.f = value; - data_out (s.string, &f, &v); + data_out (&v, &f, s.string); while (*s.string == ' ') { s.length--; @@ -1745,13 +1762,13 @@ display_crosstabulation (void) tab_hline (table, TAL_1, -1, n_cols, 0); for (c = 0; c < n_cols; c++) { - int mark_missing = 0; + bool mark_missing = false; double expected_value = row_tot[r] * col_tot[c] / W; if (cmd.miss == CRS_REPORT && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f) || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))) - mark_missing = 1; + mark_missing = true; for (i = 0; i < num_cells; i++) { double v; @@ -1790,8 +1807,7 @@ display_crosstabulation (void) * (1. - col_tot[c] / W))); break; default: - assert (0); - abort (); + NOT_REACHED (); } format_cell_entry (table, c, i, v, suffix, mark_missing); @@ -1812,11 +1828,11 @@ display_crosstabulation (void) for (r = 0; r < n_rows; r++) { char suffix = 0; - int mark_missing = 0; + bool mark_missing = false; if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f)) - mark_missing = 1; + mark_missing = true; for (i = 0; i < num_cells; i++) { @@ -1846,8 +1862,7 @@ display_crosstabulation (void) v = 0.; break; default: - assert (0); - abort (); + NOT_REACHED (); } format_cell_entry (table, n_cols, 0, v, suffix, mark_missing); @@ -1866,13 +1881,13 @@ display_crosstabulation (void) for (c = 0; c <= n_cols; c++) { double ct = c < n_cols ? col_tot[c] : W; - int mark_missing = 0; + bool mark_missing = false; char suffix = 0; int i; if (cmd.miss == CRS_REPORT && c < n_cols && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)) - mark_missing = 1; + mark_missing = true; for (i = 0; i < num_cells; i++) { @@ -1902,8 +1917,7 @@ display_crosstabulation (void) case CRS_CL_ASRESIDUAL: continue; default: - assert (0); - abort (); + NOT_REACHED (); } format_cell_entry (table, c, i, v, suffix, mark_missing); @@ -2265,9 +2279,9 @@ calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2) { int x; - if (min (c, d) < min (a, b)) + if (MIN (c, d) < MIN (a, b)) swap (&a, &c), swap (&b, &d); - if (min (b, d) < min (a, c)) + if (MIN (b, d) < MIN (a, c)) swap (&a, &b), swap (&c, &d); if (b * c < a * d) { @@ -2459,7 +2473,7 @@ static int calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], double t[N_SYMMETRIC]) { - int q = min (ns_rows, ns_cols); + int q = MIN (ns_rows, ns_cols); if (q <= 1) return 0; @@ -3176,22 +3190,22 @@ format_short (char *s, const struct fmt_spec *fp, const union value *v) struct fmt_spec fmt_subst; /* Limit to short string width. */ - if (formats[fp->type].cat & FCAT_STRING) + if (fmt_is_string (fp->type)) { fmt_subst = *fp; assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX); if (fmt_subst.type == FMT_A) - fmt_subst.w = min (8, fmt_subst.w); + fmt_subst.w = MIN (8, fmt_subst.w); else - fmt_subst.w = min (16, fmt_subst.w); + fmt_subst.w = MIN (16, fmt_subst.w); fp = &fmt_subst; } /* Format. */ - data_out (s, fp, v); - + data_out (v, fp, s); + /* Null terminate. */ s[fp->w] = '\0'; }