X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fcrosstabs.q;h=e5f174f00773788b18025a8d735d877939f3c5eb;hb=29917c4f5908454803e663d2ad78bca4bc35e805;hp=8347bb5f09f81637bbceba245463c0397388bb78;hpb=9e9ae654181e7f0cb71946db5a0bd95c9a70a689;p=pspp diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q index 8347bb5f09..e5f174f007 100644 --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2016 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,52 +16,51 @@ /* FIXME: - - Pearson's R (but not Spearman!) is off a little. - - T values for Spearman's R and Pearson's R are wrong. - - How to calculate significance of symmetric and directional measures? - - Asymmetric ASEs and T values for lambda are wrong. - - ASE of Goodman and Kruskal's tau is not calculated. - - ASE of symmetric somers' d is wrong. - - Approx. T of uncertainty coefficient is wrong. + - How to calculate significance of some symmetric and directional measures? + - How to calculate ASE for symmetric Somers ' d? + - How to calculate ASE for Goodman and Kruskal's tau? + - How to calculate approx. T of symmetric uncertainty coefficient? */ #include #include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "minmax.h" -#include "xalloc.h" -#include "xsize.h" +#include "data/case.h" +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/data-out.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/value-labels.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/stats/freq.h" +#include "language/dictionary/split-file.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" +#include "libpspp/array.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/hash-functions.h" +#include "libpspp/hmap.h" +#include "libpspp/hmapx.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" +#include "libpspp/str.h" +#include "output/pivot-table.h" +#include "output/charts/barchart.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" +#include "gl/xsize.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -74,13 +73,15 @@ *^tables=custom; +variables=custom; missing=miss:!table/include/report; + count=roundwhat:asis/case/!cell, + roundhow:!round/truncate; +write[wr_]=none,cells,all; - +format=fmt:!labels/nolabels/novallabs, - val:!avalue/dvalue, + +format=val:!avalue/dvalue, indx:!noindex/index, tabl:!tables/notables, box:!box/nobox, pivot:!pivot/nopivot; + +barchart=; +cells[cl_]=count,expected,row,column,total,residual,sresidual, asresidual,all,none; +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d, @@ -98,22 +99,8 @@ /* Number of directional statistics. */ #define N_DIRECTIONAL 13 -/* A single table entry for general mode. */ -struct table_entry - { - struct hmap_node node; /* Entry in hash table. */ - double freq; /* Frequency count. */ - union value values[1]; /* Values. */ - }; -static size_t -table_entry_size (size_t n_values) -{ - return (offsetof (struct table_entry, values) - + n_values * sizeof (union value)); -} - -/* Indexes into the 'vars' member of struct pivot_table and +/* Indexes into the 'vars' member of struct crosstabulation and struct crosstab member. */ enum { @@ -122,34 +109,34 @@ enum /* Higher indexes cause multiple tables to be output. */ }; +struct xtab_var + { + const struct variable *var; + union value *values; + size_t n_values; + }; + /* A crosstabulation of 2 or more variables. */ -struct pivot_table +struct crosstabulation { + struct crosstabs_proc *proc; struct fmt_spec weight_format; /* Format for weight variable. */ double missing; /* Weight of missing cases. */ /* Variables (2 or more). */ int n_vars; - const struct variable **vars; + struct xtab_var *vars; /* Constants (0 or more). */ int n_consts; - const struct variable **const_vars; - union value *const_values; + struct xtab_var *const_vars; + size_t *const_indexes; /* Data. */ struct hmap data; - struct table_entry **entries; + struct freq **entries; size_t n_entries; - /* Column values, number of columns. */ - union value *cols; - int n_cols; - - /* Row values, number of rows. */ - union value *rows; - int n_rows; - /* Number of statistically interesting columns/rows (columns/rows with data in them). */ int ns_cols, ns_rows; @@ -164,31 +151,30 @@ struct pivot_table /* Integer mode variable info. */ struct var_range { + struct hmap_node hmap_node; /* In struct crosstabs_proc var_ranges map. */ + const struct variable *var; /* The variable. */ int min; /* Minimum value. */ int max; /* Maximum value + 1. */ int count; /* max - min. */ }; -static inline struct var_range * -get_var_range (const struct variable *v) -{ - return var_get_aux (v); -} - struct crosstabs_proc { + const struct dictionary *dict; enum { INTEGER, GENERAL } mode; enum mv_class exclude; bool pivot; + bool barchart; bool bad_warn; struct fmt_spec weight_format; /* Variables specifies on VARIABLES. */ const struct variable **variables; size_t n_variables; + struct hmap var_ranges; /* TABLES. */ - struct pivot_table *pivots; + struct crosstabulation *pivots; int n_pivots; /* CELLS. */ @@ -196,139 +182,154 @@ struct crosstabs_proc unsigned int cells; /* Bit k is 1 if cell k is requested. */ int a_cells[CRS_CL_count]; /* 0...n_cells-1 are the requested cells. */ + /* Rounding of cells. */ + bool round_case_weights; /* Round case weights? */ + bool round_cells; /* If !round_case_weights, round cells? */ + bool round_down; /* Round down? (otherwise to nearest) */ + /* STATISTICS. */ unsigned int statistics; /* Bit k is 1 if statistic k is requested. */ - }; -static void -init_proc (struct crosstabs_proc *proc, struct dataset *ds) -{ - const struct variable *wv = dict_get_weight (dataset_dict (ds)); - proc->bad_warn = true; - proc->variables = NULL; - proc->n_variables = 0; - proc->pivots = NULL; - proc->n_pivots = 0; - proc->weight_format = wv ? *var_get_print_format (wv) : F_8_0; -} - -static void -free_proc (struct crosstabs_proc *proc) -{ - struct pivot_table *pt; + bool descending; /* True if descending sort order is requested. */ + }; - free (proc->variables); - for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) - { - free (pt->vars); - free (pt->const_vars); - /* We must not call value_destroy on const_values because - it is a wild pointer; it never pointed to anything owned - by the pivot_table. - - The rest of the data was allocated and destroyed at a - lower level already. */ - free (pt); - } -} +const struct var_range *get_var_range (const struct crosstabs_proc *, + const struct variable *); -static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds, - struct crosstabs_proc *); -static bool should_tabulate_case (const struct pivot_table *, +static bool should_tabulate_case (const struct crosstabulation *, const struct ccase *, enum mv_class exclude); -static void tabulate_general_case (struct pivot_table *, const struct ccase *, +static void tabulate_general_case (struct crosstabulation *, const struct ccase *, double weight); -static void tabulate_integer_case (struct pivot_table *, const struct ccase *, +static void tabulate_integer_case (struct crosstabulation *, const struct ccase *, double weight); static void postcalc (struct crosstabs_proc *); -static void submit (struct crosstabs_proc *, struct pivot_table *, - struct tab_table *); -/* Parse and execute CROSSTABS, then clean up. */ -int -cmd_crosstabs (struct lexer *lexer, struct dataset *ds) +static double +round_weight (const struct crosstabs_proc *proc, double weight) { - struct crosstabs_proc proc; - int result; + return proc->round_down ? floor (weight) : floor (weight + 0.5); +} - init_proc (&proc, ds); - result = internal_cmd_crosstabs (lexer, ds, &proc); - free_proc (&proc); +#define FOR_EACH_POPULATED_COLUMN(C, XT) \ + for (int C = next_populated_column (0, XT); \ + C < (XT)->vars[COL_VAR].n_values; \ + C = next_populated_column (C + 1, XT)) +static int +next_populated_column (int c, const struct crosstabulation *xt) +{ + int n_columns = xt->vars[COL_VAR].n_values; + for (; c < n_columns; c++) + if (xt->col_tot[c]) + break; + return c; +} - return result; +#define FOR_EACH_POPULATED_ROW(R, XT) \ + for (int R = next_populated_row (0, XT); R < (XT)->vars[ROW_VAR].n_values; \ + R = next_populated_row (R + 1, XT)) +static int +next_populated_row (int r, const struct crosstabulation *xt) +{ + int n_rows = xt->vars[ROW_VAR].n_values; + for (; r < n_rows; r++) + if (xt->row_tot[r]) + break; + return r; } /* Parses and executes the CROSSTABS procedure. */ -static int -internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds, - struct crosstabs_proc *proc) +int +cmd_crosstabs (struct lexer *lexer, struct dataset *ds) { + struct var_range *range, *next_range; + struct crosstabs_proc proc; struct casegrouper *grouper; struct casereader *input, *group; struct cmd_crosstabs cmd; - struct pivot_table *pt; + struct crosstabulation *xt; + int result; bool ok; int i; - if (!parse_crosstabs (lexer, ds, &cmd, proc)) - return CMD_FAILURE; + proc.dict = dataset_dict (ds); + proc.bad_warn = true; + proc.variables = NULL; + proc.n_variables = 0; + hmap_init (&proc.var_ranges); + proc.pivots = NULL; + proc.n_pivots = 0; + proc.descending = false; + proc.weight_format = *dict_get_weight_format (dataset_dict (ds)); - proc->mode = proc->n_variables ? INTEGER : GENERAL; + if (!parse_crosstabs (lexer, ds, &cmd, &proc)) + { + result = CMD_FAILURE; + goto exit; + } + + proc.mode = proc.n_variables ? INTEGER : GENERAL; + proc.barchart = cmd.sbc_barchart > 0; + + proc.descending = cmd.val == CRS_DVALUE; + + proc.round_case_weights = cmd.sbc_count && cmd.roundwhat == CRS_CASE; + proc.round_cells = cmd.sbc_count && cmd.roundwhat == CRS_CELL; + proc.round_down = cmd.roundhow == CRS_TRUNCATE; /* CELLS. */ if (!cmd.sbc_cells) - proc->cells = 1u << CRS_CL_COUNT; + proc.cells = 1u << CRS_CL_COUNT; else if (cmd.a_cells[CRS_CL_ALL]) - proc->cells = UINT_MAX; + proc.cells = UINT_MAX; else { - proc->cells = 0; + proc.cells = 0; for (i = 0; i < CRS_CL_count; i++) if (cmd.a_cells[i]) - proc->cells |= 1u << i; - if (proc->cells == 0) - proc->cells = ((1u << CRS_CL_COUNT) + proc.cells |= 1u << i; + if (proc.cells == 0) + proc.cells = ((1u << CRS_CL_COUNT) | (1u << CRS_CL_ROW) | (1u << CRS_CL_COLUMN) | (1u << CRS_CL_TOTAL)); } - proc->cells &= ((1u << CRS_CL_count) - 1); - proc->cells &= ~((1u << CRS_CL_NONE) | (1u << CRS_CL_ALL)); - proc->n_cells = 0; + proc.cells &= ((1u << CRS_CL_count) - 1); + proc.cells &= ~((1u << CRS_CL_NONE) | (1u << CRS_CL_ALL)); + proc.n_cells = 0; for (i = 0; i < CRS_CL_count; i++) - if (proc->cells & (1u << i)) - proc->a_cells[proc->n_cells++] = i; + if (proc.cells & (1u << i)) + proc.a_cells[proc.n_cells++] = i; /* STATISTICS. */ if (cmd.a_statistics[CRS_ST_ALL]) - proc->statistics = UINT_MAX; + proc.statistics = UINT_MAX; else if (cmd.sbc_statistics) { int i; - proc->statistics = 0; + proc.statistics = 0; for (i = 0; i < CRS_ST_count; i++) if (cmd.a_statistics[i]) - proc->statistics |= 1u << i; - if (proc->statistics == 0) - proc->statistics |= 1u << CRS_ST_CHISQ; + proc.statistics |= 1u << i; + if (proc.statistics == 0) + proc.statistics |= 1u << CRS_ST_CHISQ; } else - proc->statistics = 0; + proc.statistics = 0; /* MISSING. */ - proc->exclude = (cmd.miss == CRS_TABLE ? MV_ANY + proc.exclude = (cmd.miss == CRS_TABLE ? MV_ANY : cmd.miss == CRS_INCLUDE ? MV_SYSTEM : MV_NEVER); - if (proc->mode == GENERAL && proc->mode == MV_NEVER) + if (proc.mode == GENERAL && proc.exclude == MV_NEVER) { - msg (SE, _("Missing mode REPORT not allowed in general mode. " - "Assuming MISSING=TABLE.")); - proc->mode = MV_ANY; + msg (SE, _("Missing mode %s not allowed in general mode. " + "Assuming %s."), "REPORT", "MISSING=TABLE"); + proc.exclude = MV_ANY; } /* PIVOT. */ - proc->pivot = cmd.pivot == CRS_PIVOT; + proc.pivot = cmd.pivot == CRS_PIVOT; input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds), NULL, NULL); @@ -345,31 +346,59 @@ internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds, case_unref (c); } + /* Initialize hash tables. */ + for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++) + hmap_init (&xt->data); + /* Tabulate. */ for (; (c = casereader_read (group)) != NULL; case_unref (c)) - for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) + for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++) { double weight = dict_get_case_weight (dataset_dict (ds), c, - &proc->bad_warn); - if (should_tabulate_case (pt, c, proc->exclude)) + &proc.bad_warn); + if (cmd.roundwhat == CRS_CASE) { - if (proc->mode == GENERAL) - tabulate_general_case (pt, c, weight); + weight = round_weight (&proc, weight); + if (weight == 0.) + continue; + } + if (should_tabulate_case (xt, c, proc.exclude)) + { + if (proc.mode == GENERAL) + tabulate_general_case (xt, c, weight); else - tabulate_integer_case (pt, c, weight); + tabulate_integer_case (xt, c, weight); } else - pt->missing += weight; + xt->missing += weight; } casereader_destroy (group); /* Output. */ - postcalc (proc); + postcalc (&proc); } ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; - return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; + result = ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; + +exit: + free (proc.variables); + HMAP_FOR_EACH_SAFE (range, next_range, struct var_range, hmap_node, + &proc.var_ranges) + { + hmap_delete (&proc.var_ranges, &range->hmap_node); + free (range); + } + for (xt = &proc.pivots[0]; xt < &proc.pivots[proc.n_pivots]; xt++) + { + free (xt->vars); + free (xt->const_vars); + free (xt->const_indexes); + } + free (proc.pivots); + + return result; } /* Parses the TABLES subcommand. */ @@ -390,10 +419,10 @@ crs_custom_tables (struct lexer *lexer, struct dataset *ds, /* Ensure that this is a TABLES subcommand. */ if (!lex_match_id (lexer, "TABLES") && (lex_token (lexer) != T_ID || - dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL) + dict_lookup_var (dataset_dict (ds), lex_tokcstr (lexer)) == NULL) && lex_token (lexer) != T_ALL) return 2; - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (proc->variables != NULL) var_set = const_var_set_create_from_array (proc->variables, @@ -420,10 +449,7 @@ crs_custom_tables (struct lexer *lexer, struct dataset *ds, if (!lex_match (lexer, T_BY)) { if (n_by < 2) - { - lex_error (lexer, _("expecting BY")); - goto done; - } + goto done; else break; } @@ -434,22 +460,20 @@ crs_custom_tables (struct lexer *lexer, struct dataset *ds, proc->n_pivots + nx, sizeof *proc->pivots); for (i = 0; i < nx; i++) { - struct pivot_table *pt = &proc->pivots[proc->n_pivots++]; + struct crosstabulation *xt = &proc->pivots[proc->n_pivots++]; int j; - pt->weight_format = proc->weight_format; - pt->missing = 0.; - pt->n_vars = n_by; - pt->vars = xmalloc (n_by * sizeof *pt->vars); - pt->n_consts = 0; - pt->const_vars = NULL; - pt->const_values = NULL; - hmap_init (&pt->data); - pt->entries = NULL; - pt->n_entries = 0; + xt->proc = proc; + xt->weight_format = proc->weight_format; + xt->missing = 0.; + xt->n_vars = n_by; + xt->vars = xcalloc (n_by, sizeof *xt->vars); + xt->n_consts = 0; + xt->const_vars = NULL; + xt->const_indexes = NULL; for (j = 0; j < n_by; j++) - pt->vars[j] = by[j][by_iter[j]]; + xt->vars[j].var = by[j][by_iter[j]]; for (j = n_by - 1; j >= 0; j--) { @@ -481,11 +505,11 @@ crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct crosstabs_proc *proc = proc_; if (proc->n_pivots) { - msg (SE, _("VARIABLES must be specified before TABLES.")); + msg (SE, _("%s must be specified before %s."), "VARIABLES", "TABLES"); return 0; } - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); for (;;) { @@ -500,19 +524,15 @@ crs_custom_variables (struct lexer *lexer, struct dataset *ds, | PV_NO_DUPLICATE | PV_NO_SCRATCH))) return 0; - if (lex_token (lexer) != '(') - { - lex_error (lexer, "expecting `('"); + if (!lex_force_match (lexer, T_LPAREN)) goto lossage; - } - lex_get (lexer); if (!lex_force_int (lexer)) goto lossage; min = lex_integer (lexer); lex_get (lexer); - lex_match (lexer, ','); + lex_match (lexer, T_COMMA); if (!lex_force_int (lexer)) goto lossage; @@ -525,23 +545,23 @@ crs_custom_variables (struct lexer *lexer, struct dataset *ds, } lex_get (lexer); - if (lex_token (lexer) != ')') - { - lex_error (lexer, "expecting `)'"); - goto lossage; - } - lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto lossage; for (i = orig_nv; i < proc->n_variables; i++) { + const struct variable *var = proc->variables[i]; struct var_range *vr = xmalloc (sizeof *vr); + + vr->var = var; vr->min = min; - vr->max = max + 1.; + vr->max = max; vr->count = max - min + 1; - var_attach_aux (proc->variables[i], vr, var_dtor_free); + hmap_insert (&proc->var_ranges, &vr->hmap_node, + hash_pointer (var, 0)); } - if (lex_token (lexer) == '/') + if (lex_token (lexer) == T_SLASH) break; } @@ -556,15 +576,31 @@ crs_custom_variables (struct lexer *lexer, struct dataset *ds, /* Data file processing. */ +const struct var_range * +get_var_range (const struct crosstabs_proc *proc, const struct variable *var) +{ + if (!hmap_is_empty (&proc->var_ranges)) + { + const struct var_range *range; + + HMAP_FOR_EACH_IN_BUCKET (range, struct var_range, hmap_node, + hash_pointer (var, 0), &proc->var_ranges) + if (range->var == var) + return range; + } + + return NULL; +} + static bool -should_tabulate_case (const struct pivot_table *pt, const struct ccase *c, +should_tabulate_case (const struct crosstabulation *xt, const struct ccase *c, enum mv_class exclude) { int j; - for (j = 0; j < pt->n_vars; j++) + for (j = 0; j < xt->n_vars; j++) { - const struct variable *var = pt->vars[j]; - struct var_range *range = get_var_range (var); + const struct variable *var = xt->vars[j].var; + const struct var_range *range = get_var_range (xt->proc, var); if (var_is_value_missing (var, case_data (c, var), exclude)) return false; @@ -572,7 +608,7 @@ should_tabulate_case (const struct pivot_table *pt, const struct ccase *c, if (range != NULL) { double num = case_num (c, var); - if (num < range->min || num > range->max) + if (num < range->min || num >= range->max + 1.) return false; } } @@ -580,514 +616,552 @@ should_tabulate_case (const struct pivot_table *pt, const struct ccase *c, } static void -tabulate_integer_case (struct pivot_table *pt, const struct ccase *c, +tabulate_integer_case (struct crosstabulation *xt, const struct ccase *c, double weight) { - struct table_entry *te; + struct freq *te; size_t hash; int j; hash = 0; - for (j = 0; j < pt->n_vars; j++) + for (j = 0; j < xt->n_vars; j++) { /* Throw away fractional parts of values. */ - hash = hash_int (case_num (c, pt->vars[j]), hash); + hash = hash_int (case_num (c, xt->vars[j].var), hash); } - HMAP_FOR_EACH_WITH_HASH (te, struct table_entry, node, hash, &pt->data) + HMAP_FOR_EACH_WITH_HASH (te, struct freq, node, hash, &xt->data) { - for (j = 0; j < pt->n_vars; j++) - if ((int) case_num (c, pt->vars[j]) != (int) te->values[j].f) + for (j = 0; j < xt->n_vars; j++) + if ((int) case_num (c, xt->vars[j].var) != (int) te->values[j].f) goto no_match; /* Found an existing entry. */ - te->freq += weight; + te->count += weight; return; no_match: ; } /* No existing entry. Create a new one. */ - te = xmalloc (table_entry_size (pt->n_vars)); - te->freq = weight; - for (j = 0; j < pt->n_vars; j++) - te->values[j].f = (int) case_num (c, pt->vars[j]); - hmap_insert (&pt->data, &te->node, hash); + te = xmalloc (table_entry_size (xt->n_vars)); + te->count = weight; + for (j = 0; j < xt->n_vars; j++) + te->values[j].f = (int) case_num (c, xt->vars[j].var); + hmap_insert (&xt->data, &te->node, hash); } static void -tabulate_general_case (struct pivot_table *pt, const struct ccase *c, +tabulate_general_case (struct crosstabulation *xt, const struct ccase *c, double weight) { - struct table_entry *te; + struct freq *te; size_t hash; int j; hash = 0; - for (j = 0; j < pt->n_vars; j++) + for (j = 0; j < xt->n_vars; j++) { - const struct variable *var = pt->vars[j]; + const struct variable *var = xt->vars[j].var; hash = value_hash (case_data (c, var), var_get_width (var), hash); } - HMAP_FOR_EACH_WITH_HASH (te, struct table_entry, node, hash, &pt->data) + HMAP_FOR_EACH_WITH_HASH (te, struct freq, node, hash, &xt->data) { - for (j = 0; j < pt->n_vars; j++) + for (j = 0; j < xt->n_vars; j++) { - const struct variable *var = pt->vars[j]; + const struct variable *var = xt->vars[j].var; if (!value_equal (case_data (c, var), &te->values[j], var_get_width (var))) goto no_match; } /* Found an existing entry. */ - te->freq += weight; + te->count += weight; return; no_match: ; } /* No existing entry. Create a new one. */ - te = xmalloc (table_entry_size (pt->n_vars)); - te->freq = weight; - for (j = 0; j < pt->n_vars; j++) + te = xmalloc (table_entry_size (xt->n_vars)); + te->count = weight; + for (j = 0; j < xt->n_vars; j++) { - const struct variable *var = pt->vars[j]; - int width = var_get_width (var); - value_init (&te->values[j], width); - value_copy (&te->values[j], case_data (c, var), width); + const struct variable *var = xt->vars[j].var; + value_clone (&te->values[j], case_data (c, var), var_get_width (var)); } - hmap_insert (&pt->data, &te->node, hash); + hmap_insert (&xt->data, &te->node, hash); } /* Post-data reading calculations. */ -static int compare_table_entry_vars_3way (const struct table_entry *a, - const struct table_entry *b, - const struct pivot_table *pt, +static int compare_table_entry_vars_3way (const struct freq *a, + const struct freq *b, + const struct crosstabulation *xt, int idx0, int idx1); static int compare_table_entry_3way (const void *ap_, const void *bp_, - const void *pt_); -static void enum_var_values (const struct pivot_table *, int var_idx, - union value **valuesp, int *n_values); -static void output_pivot_table (struct crosstabs_proc *, - struct pivot_table *); -static void make_pivot_table_subset (struct pivot_table *pt, + const void *xt_); +static int compare_table_entry_3way_inv (const void *ap_, const void *bp_, + const void *xt_); + +static void enum_var_values (const struct crosstabulation *, int var_idx, + bool descending); +static void free_var_values (const struct crosstabulation *, int var_idx); +static void output_crosstabulation (struct crosstabs_proc *, + struct crosstabulation *); +static void make_crosstabulation_subset (struct crosstabulation *xt, size_t row0, size_t row1, - struct pivot_table *subset); + struct crosstabulation *subset); static void make_summary_table (struct crosstabs_proc *); -static bool find_crosstab (struct pivot_table *, size_t *row0p, size_t *row1p); +static bool find_crosstab (struct crosstabulation *, size_t *row0p, + size_t *row1p); static void postcalc (struct crosstabs_proc *proc) { - struct pivot_table *pt; + + /* Round hash table entries, if requested + + If this causes any of the cell counts to fall to zero, delete those + cells. */ + if (proc->round_cells) + for (struct crosstabulation *xt = proc->pivots; + xt < &proc->pivots[proc->n_pivots]; xt++) + { + struct freq *e, *next; + HMAP_FOR_EACH_SAFE (e, next, struct freq, node, &xt->data) + { + e->count = round_weight (proc, e->count); + if (e->count == 0.0) + { + hmap_delete (&xt->data, &e->node); + free (e); + } + } + } /* Convert hash tables into sorted arrays of entries. */ - for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) + for (struct crosstabulation *xt = proc->pivots; + xt < &proc->pivots[proc->n_pivots]; xt++) { - struct table_entry *e; - size_t i; + struct freq *e; + + xt->n_entries = hmap_count (&xt->data); + xt->entries = xnmalloc (xt->n_entries, sizeof *xt->entries); + size_t i = 0; + HMAP_FOR_EACH (e, struct freq, node, &xt->data) + xt->entries[i++] = e; + hmap_destroy (&xt->data); - pt->n_entries = hmap_count (&pt->data); - pt->entries = xnmalloc (pt->n_entries, sizeof *pt->entries); - i = 0; - HMAP_FOR_EACH (e, struct table_entry, node, &pt->data) - pt->entries[i++] = e; - hmap_destroy (&pt->data); + sort (xt->entries, xt->n_entries, sizeof *xt->entries, + proc->descending ? compare_table_entry_3way_inv : compare_table_entry_3way, + xt); - sort (pt->entries, pt->n_entries, sizeof *pt->entries, - compare_table_entry_3way, pt); } make_summary_table (proc); /* Output each pivot table. */ - for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) + for (struct crosstabulation *xt = proc->pivots; + xt < &proc->pivots[proc->n_pivots]; xt++) { - if (proc->pivot || pt->n_vars == 2) - output_pivot_table (proc, pt); + if (proc->pivot || xt->n_vars == 2) + output_crosstabulation (proc, xt); else { size_t row0 = 0, row1 = 0; - while (find_crosstab (pt, &row0, &row1)) + while (find_crosstab (xt, &row0, &row1)) { - struct pivot_table subset; - make_pivot_table_subset (pt, row0, row1, &subset); - output_pivot_table (proc, &subset); + struct crosstabulation subset; + make_crosstabulation_subset (xt, row0, row1, &subset); + output_crosstabulation (proc, &subset); + free (subset.const_indexes); } } + if (proc->barchart) + { + int n_vars = (xt->n_vars > 2 ? 2 : xt->n_vars); + const struct variable **vars = xcalloc (n_vars, sizeof *vars); + for (size_t i = 0; i < n_vars; i++) + vars[i] = xt->vars[i].var; + chart_submit (barchart_create (vars, n_vars, _("Count"), + false, + xt->entries, xt->n_entries)); + free (vars); + } } /* Free output and prepare for next split file. */ - for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) + for (struct crosstabulation *xt = proc->pivots; + xt < &proc->pivots[proc->n_pivots]; xt++) { - size_t i; + xt->missing = 0.0; - pt->missing = 0.0; + /* Free the members that were allocated in this function(and the values + owned by the entries. - /* Free only the members that were allocated in this - function. The other pointer members are either both - allocated and destroyed at a lower level (in - output_pivot_table), or both allocated and destroyed at - a higher level (in crs_custom_tables and free_proc, + The other pointer members are either both allocated and destroyed at a + lower level (in output_crosstabulation), or both allocated and + destroyed at a higher level (in crs_custom_tables and free_proc, respectively). */ - for (i = 0; i < pt->n_entries; i++) - free (pt->entries[i]); - free (pt->entries); + for (size_t i = 0; i < xt->n_vars; i++) + { + int width = var_get_width (xt->vars[i].var); + if (value_needs_init (width)) + { + size_t j; + + for (j = 0; j < xt->n_entries; j++) + value_destroy (&xt->entries[j]->values[i], width); + } + } + + for (size_t i = 0; i < xt->n_entries; i++) + free (xt->entries[i]); + free (xt->entries); } } static void -make_pivot_table_subset (struct pivot_table *pt, size_t row0, size_t row1, - struct pivot_table *subset) +make_crosstabulation_subset (struct crosstabulation *xt, size_t row0, + size_t row1, struct crosstabulation *subset) { - *subset = *pt; - if (pt->n_vars > 2) + *subset = *xt; + if (xt->n_vars > 2) { - assert (pt->n_consts == 0); - subset->missing = pt->missing; + assert (xt->n_consts == 0); subset->n_vars = 2; - subset->vars = pt->vars; - subset->n_consts = pt->n_vars - 2; - subset->const_vars = pt->vars + 2; - subset->const_values = &pt->entries[row0]->values[2]; + subset->vars = xt->vars; + + subset->n_consts = xt->n_vars - 2; + subset->const_vars = xt->vars + 2; + subset->const_indexes = xcalloc (subset->n_consts, + sizeof *subset->const_indexes); + for (size_t i = 0; i < subset->n_consts; i++) + { + const union value *value = &xt->entries[row0]->values[2 + i]; + + for (size_t j = 0; j < xt->vars[2 + i].n_values; j++) + if (value_equal (&xt->vars[2 + i].values[j], value, + var_get_width (xt->vars[2 + i].var))) + { + subset->const_indexes[i] = j; + goto found; + } + NOT_REACHED (); + found: ; + } } - subset->entries = &pt->entries[row0]; + subset->entries = &xt->entries[row0]; subset->n_entries = row1 - row0; } static int -compare_table_entry_var_3way (const struct table_entry *a, - const struct table_entry *b, - const struct pivot_table *pt, +compare_table_entry_var_3way (const struct freq *a, + const struct freq *b, + const struct crosstabulation *xt, int idx) { return value_compare_3way (&a->values[idx], &b->values[idx], - var_get_width (pt->vars[idx])); + var_get_width (xt->vars[idx].var)); } static int -compare_table_entry_vars_3way (const struct table_entry *a, - const struct table_entry *b, - const struct pivot_table *pt, +compare_table_entry_vars_3way (const struct freq *a, + const struct freq *b, + const struct crosstabulation *xt, int idx0, int idx1) { int i; for (i = idx1 - 1; i >= idx0; i--) { - int cmp = compare_table_entry_var_3way (a, b, pt, i); + int cmp = compare_table_entry_var_3way (a, b, xt, i); if (cmp != 0) return cmp; } return 0; } -/* Compare the struct table_entry at *AP to the one at *BP and +/* Compare the struct freq at *AP to the one at *BP and return a strcmp()-type result. */ static int -compare_table_entry_3way (const void *ap_, const void *bp_, const void *pt_) +compare_table_entry_3way (const void *ap_, const void *bp_, const void *xt_) { - const struct table_entry *const *ap = ap_; - const struct table_entry *const *bp = bp_; - const struct table_entry *a = *ap; - const struct table_entry *b = *bp; - const struct pivot_table *pt = pt_; + const struct freq *const *ap = ap_; + const struct freq *const *bp = bp_; + const struct freq *a = *ap; + const struct freq *b = *bp; + const struct crosstabulation *xt = xt_; int cmp; - cmp = compare_table_entry_vars_3way (a, b, pt, 2, pt->n_vars); + cmp = compare_table_entry_vars_3way (a, b, xt, 2, xt->n_vars); if (cmp != 0) return cmp; - cmp = compare_table_entry_var_3way (a, b, pt, ROW_VAR); + cmp = compare_table_entry_var_3way (a, b, xt, ROW_VAR); if (cmp != 0) return cmp; - return compare_table_entry_var_3way (a, b, pt, COL_VAR); + return compare_table_entry_var_3way (a, b, xt, COL_VAR); } +/* Inverted version of compare_table_entry_3way */ static int -find_first_difference (const struct pivot_table *pt, size_t row) +compare_table_entry_3way_inv (const void *ap_, const void *bp_, const void *xt_) { - if (row == 0) - return pt->n_vars - 1; - else - { - const struct table_entry *a = pt->entries[row]; - const struct table_entry *b = pt->entries[row - 1]; - int col; - - for (col = pt->n_vars - 1; col >= 0; col--) - if (compare_table_entry_var_3way (a, b, pt, col)) - return col; - NOT_REACHED (); - } + return -compare_table_entry_3way (ap_, bp_, xt_); } /* Output a table summarizing the cases processed. */ static void make_summary_table (struct crosstabs_proc *proc) { - struct tab_table *summary; - struct pivot_table *pt; - struct string name; - int i; + struct pivot_table *table = pivot_table_create (N_("Summary")); + pivot_table_set_weight_var (table, dict_get_weight (proc->dict)); - summary = tab_create (7, 3 + proc->n_pivots, 1); - tab_title (summary, _("Summary.")); - tab_headers (summary, 1, 0, 3, 0); - tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases")); - tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid")); - tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing")); - tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total")); - tab_hline (summary, TAL_1, 1, 6, 1); - tab_hline (summary, TAL_1, 1, 6, 2); - tab_vline (summary, TAL_1, 3, 1, 1); - tab_vline (summary, TAL_1, 5, 1, 1); - for (i = 0; i < 3; i++) - { - tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N")); - tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent")); - } - tab_offset (summary, 0, 3); + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("N"), PIVOT_RC_COUNT, + N_("Percent"), PIVOT_RC_PERCENT); - ds_init_empty (&name); - for (pt = &proc->pivots[0]; pt < &proc->pivots[proc->n_pivots]; pt++) - { - double valid; - double n[3]; - size_t i; - - tab_hline (summary, TAL_1, 0, 6, 0); + struct pivot_dimension *cases = pivot_dimension_create ( + table, PIVOT_AXIS_COLUMN, N_("Cases"), + N_("Valid"), N_("Missing"), N_("Total")); + cases->root->show_label = true; - ds_clear (&name); - for (i = 0; i < pt->n_vars; i++) + struct pivot_dimension *tables = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Crosstabulation")); + for (struct crosstabulation *xt = &proc->pivots[0]; + xt < &proc->pivots[proc->n_pivots]; xt++) + { + struct string name = DS_EMPTY_INITIALIZER; + for (size_t i = 0; i < xt->n_vars; i++) { if (i > 0) - ds_put_cstr (&name, " * "); - ds_put_cstr (&name, var_to_string (pt->vars[i])); + ds_put_cstr (&name, " × "); + ds_put_cstr (&name, var_to_string (xt->vars[i].var)); } - tab_text (summary, 0, 0, TAB_LEFT, ds_cstr (&name)); - valid = 0.; - for (i = 0; i < pt->n_entries; i++) - valid += pt->entries[i]->freq; + int row = pivot_category_create_leaf ( + tables->root, + pivot_value_new_user_text_nocopy (ds_steal_cstr (&name))); + + double valid = 0.; + for (size_t i = 0; i < xt->n_entries; i++) + valid += xt->entries[i]->count; + double n[3]; n[0] = valid; - n[1] = pt->missing; + n[1] = xt->missing; n[2] = n[0] + n[1]; - for (i = 0; i < 3; i++) + for (int i = 0; i < 3; i++) { - tab_double (summary, i * 2 + 1, 0, TAB_RIGHT, n[i], - &proc->weight_format); - tab_text_format (summary, i * 2 + 2, 0, TAB_RIGHT, "%.1f%%", - n[i] / n[2] * 100.); + pivot_table_put3 (table, 0, i, row, pivot_value_new_number (n[i])); + pivot_table_put3 (table, 1, i, row, + pivot_value_new_number (n[i] / n[2] * 100.0)); } - - tab_next_row (summary); } - ds_destroy (&name); - submit (proc, NULL, summary); + pivot_table_submit (table); } /* Output. */ -static struct tab_table *create_crosstab_table (struct crosstabs_proc *, - struct pivot_table *); -static struct tab_table *create_chisq_table (struct pivot_table *); -static struct tab_table *create_sym_table (struct pivot_table *); -static struct tab_table *create_risk_table (struct pivot_table *); -static struct tab_table *create_direct_table (struct pivot_table *); -static void display_dimensions (struct crosstabs_proc *, struct pivot_table *, - struct tab_table *, int first_difference); +static struct pivot_table *create_crosstab_table ( + struct crosstabs_proc *, struct crosstabulation *, + size_t crs_leaves[CRS_CL_count]); +static struct pivot_table *create_chisq_table (struct crosstabulation *); +static struct pivot_table *create_sym_table (struct crosstabulation *); +static struct pivot_table *create_risk_table ( + struct crosstabulation *, struct pivot_dimension **risk_statistics); +static struct pivot_table *create_direct_table (struct crosstabulation *); static void display_crosstabulation (struct crosstabs_proc *, + struct crosstabulation *, struct pivot_table *, - struct tab_table *); -static void display_chisq (struct pivot_table *, struct tab_table *, - bool *showed_fisher); -static void display_symmetric (struct crosstabs_proc *, struct pivot_table *, - struct tab_table *); -static void display_risk (struct pivot_table *, struct tab_table *); -static void display_directional (struct crosstabs_proc *, struct pivot_table *, - struct tab_table *); -static void crosstabs_dim (struct tab_table *, struct outp_driver *, - void *proc); -static void table_value_missing (struct crosstabs_proc *proc, - struct tab_table *table, int c, int r, - unsigned char opt, const union value *v, - const struct variable *var); -static void delete_missing (struct pivot_table *); -static void build_matrix (struct pivot_table *); - -/* Output pivot table beginning at PB and continuing until PE, - exclusive. For efficiency, *MATP is a pointer to a matrix that can - hold *MAXROWS entries. */ + size_t crs_leaves[CRS_CL_count]); +static void display_chisq (struct crosstabulation *, struct pivot_table *); +static void display_symmetric (struct crosstabs_proc *, + struct crosstabulation *, struct pivot_table *); +static void display_risk (struct crosstabulation *, struct pivot_table *, + struct pivot_dimension *risk_statistics); +static void display_directional (struct crosstabs_proc *, + struct crosstabulation *, + struct pivot_table *); +static void delete_missing (struct crosstabulation *); +static void build_matrix (struct crosstabulation *); + +/* Output pivot table XT in the context of PROC. */ static void -output_pivot_table (struct crosstabs_proc *proc, struct pivot_table *pt) +output_crosstabulation (struct crosstabs_proc *proc, struct crosstabulation *xt) { - struct tab_table *table = NULL; /* Crosstabulation table. */ - struct tab_table *chisq = NULL; /* Chi-square table. */ - bool showed_fisher = false; - struct tab_table *sym = NULL; /* Symmetric measures table. */ - struct tab_table *risk = NULL; /* Risk estimate table. */ - struct tab_table *direct = NULL; /* Directional measures table. */ - size_t row0, row1; - - enum_var_values (pt, COL_VAR, &pt->cols, &pt->n_cols); - - if (proc->cells) - table = create_crosstab_table (proc, pt); - if (proc->statistics & (1u << CRS_ST_CHISQ)) - chisq = create_chisq_table (pt); - if (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC) - | (1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU) - | (1u << CRS_ST_GAMMA) | (1u << CRS_ST_CORR) - | (1u << CRS_ST_KAPPA))) - sym = create_sym_table (pt); - if (proc->statistics & (1u << CRS_ST_RISK)) - risk = create_risk_table (pt); - if (proc->statistics & ((1u << CRS_ST_LAMBDA) | (1u << CRS_ST_UC) - | (1u << CRS_ST_D) | (1u << CRS_ST_ETA))) - direct = create_direct_table (pt); - - row0 = row1 = 0; - while (find_crosstab (pt, &row0, &row1)) + for (size_t i = 0; i < xt->n_vars; i++) + enum_var_values (xt, i, proc->descending); + + if (xt->vars[COL_VAR].n_values == 0) { - struct pivot_table x; - int first_difference; + struct string vars; + int i; - make_pivot_table_subset (pt, row0, row1, &x); + ds_init_cstr (&vars, var_to_string (xt->vars[0].var)); + for (i = 1; i < xt->n_vars; i++) + ds_put_format (&vars, " × %s", var_to_string (xt->vars[i].var)); - /* Find all the row variable values. */ - enum_var_values (&x, ROW_VAR, &x.rows, &x.n_rows); + /* TRANSLATORS: The %s here describes a crosstabulation. It takes the + form "var1 * var2 * var3 * ...". */ + msg (SW, _("Crosstabulation %s contained no non-missing cases."), + ds_cstr (&vars)); - if (size_overflow_p (xtimes (xtimes (x.n_rows, x.n_cols), - sizeof (double)))) - xalloc_die (); - x.row_tot = xmalloc (x.n_rows * sizeof *x.row_tot); - x.col_tot = xmalloc (x.n_cols * sizeof *x.col_tot); - x.mat = xmalloc (x.n_rows * x.n_cols * sizeof *x.mat); + ds_destroy (&vars); + for (size_t i = 0; i < xt->n_vars; i++) + free_var_values (xt, i); + return; + } - /* Allocate table space for the matrix. */ - if (table - && tab_row (table) + (x.n_rows + 1) * proc->n_cells > tab_nr (table)) - tab_realloc (table, -1, - MAX (tab_nr (table) + (x.n_rows + 1) * proc->n_cells, - tab_nr (table) * pt->n_entries / x.n_entries)); + size_t crs_leaves[CRS_CL_count]; + struct pivot_table *table = (proc->cells + ? create_crosstab_table (proc, xt, crs_leaves) + : NULL); + struct pivot_table *chisq = (proc->statistics & (1u << CRS_ST_CHISQ) + ? create_chisq_table (xt) + : NULL); + struct pivot_table *sym + = (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC) + | (1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU) + | (1u << CRS_ST_GAMMA) | (1u << CRS_ST_CORR) + | (1u << CRS_ST_KAPPA)) + ? create_sym_table (xt) + : NULL); + struct pivot_dimension *risk_statistics = NULL; + struct pivot_table *risk = (proc->statistics & (1u << CRS_ST_RISK) + ? create_risk_table (xt, &risk_statistics) + : NULL); + struct pivot_table *direct + = (proc->statistics & ((1u << CRS_ST_LAMBDA) | (1u << CRS_ST_UC) + | (1u << CRS_ST_D) | (1u << CRS_ST_ETA)) + ? create_direct_table (xt) + : NULL); + + size_t row0 = 0; + size_t row1 = 0; + while (find_crosstab (xt, &row0, &row1)) + { + struct crosstabulation x; + + make_crosstabulation_subset (xt, row0, row1, &x); + + size_t n_rows = x.vars[ROW_VAR].n_values; + size_t n_cols = x.vars[COL_VAR].n_values; + if (size_overflow_p (xtimes (xtimes (n_rows, n_cols), sizeof (double)))) + xalloc_die (); + x.row_tot = xmalloc (n_rows * sizeof *x.row_tot); + x.col_tot = xmalloc (n_cols * sizeof *x.col_tot); + x.mat = xmalloc (n_rows * n_cols * sizeof *x.mat); build_matrix (&x); /* Find the first variable that differs from the last subtable. */ - first_difference = find_first_difference (pt, row0); if (table) - { - display_dimensions (proc, &x, table, first_difference); - display_crosstabulation (proc, &x, table); - } + display_crosstabulation (proc, &x, table, crs_leaves); if (proc->exclude == MV_NEVER) delete_missing (&x); if (chisq) - { - display_dimensions (proc, &x, chisq, first_difference); - display_chisq (&x, chisq, &showed_fisher); - } + display_chisq (&x, chisq); + if (sym) - { - display_dimensions (proc, &x, sym, first_difference); - display_symmetric (proc, &x, sym); - } + display_symmetric (proc, &x, sym); if (risk) - { - display_dimensions (proc, &x, risk, first_difference); - display_risk (&x, risk); - } + display_risk (&x, risk, risk_statistics); if (direct) - { - display_dimensions (proc, &x, direct, first_difference); - display_directional (proc, &x, direct); - } - - /* Free the parts of x that are not owned by pt. In - particular we must not free x.cols, which is the same as - pt->cols, which is freed at the end of this function. */ - free (x.rows); + display_directional (proc, &x, direct); free (x.mat); free (x.row_tot); free (x.col_tot); + free (x.const_indexes); } - submit (proc, NULL, table); + if (table) + pivot_table_submit (table); if (chisq) + pivot_table_submit (chisq); + + if (sym) + pivot_table_submit (sym); + + if (risk) { - if (!showed_fisher) - tab_resize (chisq, 4 + (pt->n_vars - 2), -1); - submit (proc, pt, chisq); + if (!pivot_table_is_empty (risk)) + pivot_table_submit (risk); + else + pivot_table_unref (risk); } - submit (proc, pt, sym); - submit (proc, pt, risk); - submit (proc, pt, direct); + if (direct) + pivot_table_submit (direct); - free (pt->cols); + for (size_t i = 0; i < xt->n_vars; i++) + free_var_values (xt, i); } static void -build_matrix (struct pivot_table *x) +build_matrix (struct crosstabulation *x) { - const int col_var_width = var_get_width (x->vars[COL_VAR]); - const int row_var_width = var_get_width (x->vars[ROW_VAR]); + const int col_var_width = var_get_width (x->vars[COL_VAR].var); + const int row_var_width = var_get_width (x->vars[ROW_VAR].var); + size_t n_rows = x->vars[ROW_VAR].n_values; + size_t n_cols = x->vars[COL_VAR].n_values; int col, row; double *mp; - struct table_entry **p; + struct freq **p; mp = x->mat; col = row = 0; for (p = x->entries; p < &x->entries[x->n_entries]; p++) { - const struct table_entry *te = *p; + const struct freq *te = *p; - while (!value_equal (&x->rows[row], &te->values[ROW_VAR], row_var_width)) + while (!value_equal (&x->vars[ROW_VAR].values[row], + &te->values[ROW_VAR], row_var_width)) { - for (; col < x->n_cols; col++) + for (; col < n_cols; col++) *mp++ = 0.0; col = 0; row++; } - while (!value_equal (&x->cols[col], &te->values[COL_VAR], col_var_width)) + while (!value_equal (&x->vars[COL_VAR].values[col], + &te->values[COL_VAR], col_var_width)) { *mp++ = 0.0; col++; } - *mp++ = te->freq; - if (++col >= x->n_cols) + *mp++ = te->count; + if (++col >= n_cols) { col = 0; row++; } } - while (mp < &x->mat[x->n_cols * x->n_rows]) + while (mp < &x->mat[n_cols * n_rows]) *mp++ = 0.0; - assert (mp == &x->mat[x->n_cols * x->n_rows]); + assert (mp == &x->mat[n_cols * n_rows]); /* Column totals, row totals, ns_rows. */ mp = x->mat; - for (col = 0; col < x->n_cols; col++) + for (col = 0; col < n_cols; col++) x->col_tot[col] = 0.0; - for (row = 0; row < x->n_rows; row++) + for (row = 0; row < n_rows; row++) x->row_tot[row] = 0.0; x->ns_rows = 0; - for (row = 0; row < x->n_rows; row++) + for (row = 0; row < n_rows; row++) { bool row_is_empty = true; - for (col = 0; col < x->n_cols; col++) + for (col = 0; col < n_cols; col++) { if (*mp != 0.0) { @@ -1100,13 +1174,13 @@ build_matrix (struct pivot_table *x) if (!row_is_empty) x->ns_rows++; } - assert (mp == &x->mat[x->n_cols * x->n_rows]); + assert (mp == &x->mat[n_cols * n_rows]); /* ns_cols. */ x->ns_cols = 0; - for (col = 0; col < x->n_cols; col++) - for (row = 0; row < x->n_rows; row++) - if (x->mat[col + row * x->n_cols] != 0.0) + for (col = 0; col < n_cols; col++) + for (row = 0; row < n_rows; row++) + if (x->mat[col + row * n_cols] != 0.0) { x->ns_cols++; break; @@ -1114,317 +1188,290 @@ build_matrix (struct pivot_table *x) /* Grand total. */ x->total = 0.0; - for (col = 0; col < x->n_cols; col++) + for (col = 0; col < n_cols; col++) x->total += x->col_tot[col]; } -static struct tab_table * -create_crosstab_table (struct crosstabs_proc *proc, struct pivot_table *pt) +static void +add_var_dimension (struct pivot_table *table, const struct xtab_var *var, + enum pivot_axis_type axis_type, bool total) { - struct tuple - { - int value; - const char *name; - }; - static const struct tuple names[] = - { - {CRS_CL_COUNT, N_("count")}, - {CRS_CL_ROW, N_("row %")}, - {CRS_CL_COLUMN, N_("column %")}, - {CRS_CL_TOTAL, N_("total %")}, - {CRS_CL_EXPECTED, N_("expected")}, - {CRS_CL_RESIDUAL, N_("residual")}, - {CRS_CL_SRESIDUAL, N_("std. resid.")}, - {CRS_CL_ASRESIDUAL, N_("adj. resid.")}, - }; - const int n_names = sizeof names / sizeof *names; - const struct tuple *t; + struct pivot_dimension *d = pivot_dimension_create__ ( + table, axis_type, pivot_value_new_variable (var->var)); - struct tab_table *table; - struct string title; - int i; + struct pivot_footnote *missing_footnote = pivot_table_create_footnote ( + table, pivot_value_new_text (N_("Missing value"))); - table = tab_create (pt->n_consts + 1 + pt->n_cols + 1, - (pt->n_entries / pt->n_cols) * 3 / 2 * proc->n_cells + 10, - true); - tab_headers (table, pt->n_consts + 1, 0, 2, 0); - - /* First header line. */ - tab_joint_text (table, pt->n_consts + 1, 0, - (pt->n_consts + 1) + (pt->n_cols - 1), 0, - TAB_CENTER | TAT_TITLE, var_get_name (pt->vars[COL_VAR])); - - tab_hline (table, TAL_1, pt->n_consts + 1, - pt->n_consts + 2 + pt->n_cols - 2, 1); - - /* Second header line. */ - for (i = 2; i < pt->n_consts + 2; i++) - tab_joint_text (table, pt->n_consts + 2 - i - 1, 0, - pt->n_consts + 2 - i - 1, 1, - TAB_RIGHT | TAT_TITLE, var_to_string (pt->vars[i])); - tab_text (table, pt->n_consts + 2 - 2, 1, TAB_RIGHT | TAT_TITLE, - var_get_name (pt->vars[ROW_VAR])); - for (i = 0; i < pt->n_cols; i++) - table_value_missing (proc, table, pt->n_consts + 2 + i - 1, 1, TAB_RIGHT, - &pt->cols[i], pt->vars[COL_VAR]); - tab_text (table, pt->n_consts + 2 + pt->n_cols - 1, 1, TAB_CENTER, _("Total")); - - tab_hline (table, TAL_1, 0, pt->n_consts + 2 + pt->n_cols - 1, 2); - tab_vline (table, TAL_1, pt->n_consts + 2 + pt->n_cols - 1, 0, 1); + struct pivot_category *group = pivot_category_create_group__ ( + d->root, pivot_value_new_variable (var->var)); + for (size_t j = 0; j < var->n_values; j++) + { + struct pivot_value *value = pivot_value_new_var_value ( + var->var, &var->values[j]); + if (var_is_value_missing (var->var, &var->values[j], MV_ANY)) + pivot_value_add_footnote (value, missing_footnote); + pivot_category_create_leaf (group, value); + } + if (total) + pivot_category_create_leaf (d->root, pivot_value_new_text (N_("Total"))); +} + +static struct pivot_table * +create_crosstab_table (struct crosstabs_proc *proc, struct crosstabulation *xt, + size_t crs_leaves[CRS_CL_count]) +{ /* Title. */ - ds_init_empty (&title); - for (i = 0; i < pt->n_consts + 2; i++) + struct string title = DS_EMPTY_INITIALIZER; + for (size_t i = 0; i < xt->n_vars; i++) { if (i) - ds_put_cstr (&title, " * "); - ds_put_cstr (&title, var_get_name (pt->vars[i])); + ds_put_cstr (&title, " × "); + ds_put_cstr (&title, var_to_string (xt->vars[i].var)); } - for (i = 0; i < pt->n_consts; i++) + for (size_t i = 0; i < xt->n_consts; i++) { - const struct variable *var = pt->const_vars[i]; - size_t ofs; - - ds_put_format (&title, ", %s=", var_get_name (var)); - - /* Insert the formatted value of the variable, then trim - leading spaces in what was just inserted. */ - ofs = ds_length (&title); - data_out (&pt->const_values[i], var_get_print_format (var), - ds_put_uninit (&title, var_get_width (var))); - ds_remove (&title, ofs, ss_cspan (ds_substr (&title, ofs, SIZE_MAX), - ss_cstr (" "))); + const struct variable *var = xt->const_vars[i].var; + const union value *value = &xt->entries[0]->values[2 + i]; + char *s; + + ds_put_format (&title, ", %s=", var_to_string (var)); + + /* Insert the formatted value of VAR without any leading spaces. */ + s = data_out (value, var_get_encoding (var), var_get_print_format (var), + settings_get_fmt_settings ()); + ds_put_cstr (&title, s + strspn (s, " ")); + free (s); } + struct pivot_table *table = pivot_table_create__ ( + pivot_value_new_user_text_nocopy (ds_steal_cstr (&title)), + "Crosstabulation"); + pivot_table_set_weight_format (table, &proc->weight_format); - ds_put_cstr (&title, " ["); - i = 0; - for (t = names; t < &names[n_names]; t++) - if (proc->cells & (1u << t->value)) - { - if (i++) - ds_put_cstr (&title, ", "); - ds_put_cstr (&title, gettext (t->name)); - } - ds_put_cstr (&title, "]."); + struct pivot_dimension *statistics = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Statistics")); - tab_title (table, "%s", ds_cstr (&title)); - ds_destroy (&title); + struct statistic + { + const char *label; + const char *rc; + }; + static const struct statistic stats[CRS_CL_count] = + { + [CRS_CL_COUNT] = { N_("Count"), PIVOT_RC_COUNT }, + [CRS_CL_ROW] = { N_("Row %"), PIVOT_RC_PERCENT }, + [CRS_CL_COLUMN] = { N_("Column %"), PIVOT_RC_PERCENT }, + [CRS_CL_TOTAL] = { N_("Total %"), PIVOT_RC_PERCENT }, + [CRS_CL_EXPECTED] = { N_("Expected"), PIVOT_RC_OTHER }, + [CRS_CL_RESIDUAL] = { N_("Residual"), PIVOT_RC_RESIDUAL }, + [CRS_CL_SRESIDUAL] = { N_("Std. Residual"), PIVOT_RC_RESIDUAL }, + [CRS_CL_ASRESIDUAL] = { N_("Adjusted Residual"), PIVOT_RC_RESIDUAL }, + }; + for (size_t i = 0; i < CRS_CL_count; i++) + if (proc->cells & (1u << i) && stats[i].label) + crs_leaves[i] = pivot_category_create_leaf_rc ( + statistics->root, pivot_value_new_text (stats[i].label), + stats[i].rc); + + for (size_t i = 0; i < xt->n_vars; i++) + add_var_dimension (table, &xt->vars[i], + i == COL_VAR ? PIVOT_AXIS_COLUMN : PIVOT_AXIS_ROW, + true); - tab_offset (table, 0, 2); return table; } -static struct tab_table * -create_chisq_table (struct pivot_table *pt) +static struct pivot_table * +create_chisq_table (struct crosstabulation *xt) { - struct tab_table *chisq; - - chisq = tab_create (6 + (pt->n_vars - 2), - pt->n_entries / pt->n_cols * 3 / 2 * N_CHISQ + 10, - 1); - tab_headers (chisq, 1 + (pt->n_vars - 2), 0, 1, 0); - - tab_title (chisq, _("Chi-square tests.")); - - tab_offset (chisq, pt->n_vars - 2, 0); - tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic")); - tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value")); - tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df")); - tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE, - _("Asymp. Sig. (2-sided)")); - tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE, - _("Exact Sig. (2-sided)")); - tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE, - _("Exact Sig. (1-sided)")); - tab_offset (chisq, 0, 1); + struct pivot_table *chisq = pivot_table_create (N_("Chi-Square Tests")); + pivot_table_set_weight_format (chisq, &xt->weight_format); + + pivot_dimension_create ( + chisq, PIVOT_AXIS_ROW, N_("Statistics"), + N_("Pearson Chi-Square"), + N_("Likelihood Ratio"), + N_("Fisher's Exact Test"), + N_("Continuity Correction"), + N_("Linear-by-Linear Association"), + N_("N of Valid Cases"), PIVOT_RC_COUNT); + + pivot_dimension_create ( + chisq, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("Value"), PIVOT_RC_OTHER, + N_("df"), PIVOT_RC_COUNT, + N_("Asymptotic Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE, + N_("Exact Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE, + N_("Exact Sig. (1-tailed)"), PIVOT_RC_SIGNIFICANCE); + + for (size_t i = 2; i < xt->n_vars; i++) + add_var_dimension (chisq, &xt->vars[i], PIVOT_AXIS_ROW, false); return chisq; } /* Symmetric measures. */ -static struct tab_table * -create_sym_table (struct pivot_table *pt) +static struct pivot_table * +create_sym_table (struct crosstabulation *xt) { - struct tab_table *sym; - - sym = tab_create (6 + (pt->n_vars - 2), - pt->n_entries / pt->n_cols * 7 + 10, 1); - tab_headers (sym, 2 + (pt->n_vars - 2), 0, 1, 0); - tab_title (sym, _("Symmetric measures.")); - - tab_offset (sym, pt->n_vars - 2, 0); - tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category")); - tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic")); - tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value")); - tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error")); - tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T")); - tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig.")); - tab_offset (sym, 0, 1); + struct pivot_table *sym = pivot_table_create (N_("Symmetric Measures")); + pivot_table_set_weight_format (sym, &xt->weight_format); + + pivot_dimension_create ( + sym, PIVOT_AXIS_COLUMN, N_("Values"), + N_("Value"), PIVOT_RC_OTHER, + N_("Asymp. Std. Error"), PIVOT_RC_OTHER, + N_("Approx. T"), PIVOT_RC_OTHER, + N_("Approx. Sig."), PIVOT_RC_SIGNIFICANCE); + + struct pivot_dimension *statistics = pivot_dimension_create ( + sym, PIVOT_AXIS_ROW, N_("Statistics")); + pivot_category_create_group ( + statistics->root, N_("Nominal by Nominal"), + N_("Phi"), N_("Cramer's V"), N_("Contingency Coefficient")); + pivot_category_create_group ( + statistics->root, N_("Ordinal by Ordinal"), + N_("Kendall's tau-b"), N_("Kendall's tau-c"), + N_("Gamma"), N_("Spearman Correlation")); + pivot_category_create_group ( + statistics->root, N_("Interval by Interval"), + N_("Pearson's R")); + pivot_category_create_group ( + statistics->root, N_("Measure of Agreement"), + N_("Kappa")); + pivot_category_create_leaves (statistics->root, N_("N of Valid Cases"), + PIVOT_RC_COUNT); + + for (size_t i = 2; i < xt->n_vars; i++) + add_var_dimension (sym, &xt->vars[i], PIVOT_AXIS_ROW, false); return sym; } /* Risk estimate. */ -static struct tab_table * -create_risk_table (struct pivot_table *pt) +static struct pivot_table * +create_risk_table (struct crosstabulation *xt, + struct pivot_dimension **risk_statistics) { - struct tab_table *risk; - - risk = tab_create (4 + (pt->n_vars - 2), pt->n_entries / pt->n_cols * 4 + 10, - 1); - tab_headers (risk, 1 + pt->n_vars - 2, 0, 2, 0); - tab_title (risk, _("Risk estimate.")); - - tab_offset (risk, pt->n_vars - 2, 0); - tab_joint_text_format (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE, - _("95%% Confidence Interval")); - tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic")); - tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value")); - tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower")); - tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper")); - tab_hline (risk, TAL_1, 2, 3, 1); - tab_vline (risk, TAL_1, 2, 0, 1); - tab_offset (risk, 0, 2); + struct pivot_table *risk = pivot_table_create (N_("Risk Estimate")); + pivot_table_set_weight_format (risk, &xt->weight_format); + + struct pivot_dimension *values = pivot_dimension_create ( + risk, PIVOT_AXIS_COLUMN, N_("Values"), + N_("Value"), PIVOT_RC_OTHER); + pivot_category_create_group ( + /* xgettext:no-c-format */ + values->root, N_("95% Confidence Interval"), + N_("Lower"), PIVOT_RC_OTHER, + N_("Upper"), PIVOT_RC_OTHER); + + *risk_statistics = pivot_dimension_create ( + risk, PIVOT_AXIS_ROW, N_("Statistics")); + + for (size_t i = 2; i < xt->n_vars; i++) + add_var_dimension (risk, &xt->vars[i], PIVOT_AXIS_ROW, false); return risk; } +static void +create_direct_stat (struct pivot_category *parent, + const struct crosstabulation *xt, + const char *name, bool symmetric) +{ + struct pivot_category *group = pivot_category_create_group ( + parent, name); + if (symmetric) + pivot_category_create_leaf (group, pivot_value_new_text (N_("Symmetric"))); + + char *row_label = xasprintf (_("%s Dependent"), + var_to_string (xt->vars[ROW_VAR].var)); + pivot_category_create_leaf (group, pivot_value_new_user_text_nocopy ( + row_label)); + + char *col_label = xasprintf (_("%s Dependent"), + var_to_string (xt->vars[COL_VAR].var)); + pivot_category_create_leaf (group, pivot_value_new_user_text_nocopy ( + col_label)); +} + /* Directional measures. */ -static struct tab_table * -create_direct_table (struct pivot_table *pt) +static struct pivot_table * +create_direct_table (struct crosstabulation *xt) { - struct tab_table *direct; - - direct = tab_create (7 + (pt->n_vars - 2), - pt->n_entries / pt->n_cols * 7 + 10, 1); - tab_headers (direct, 3 + (pt->n_vars - 2), 0, 1, 0); - tab_title (direct, _("Directional measures.")); - - tab_offset (direct, pt->n_vars - 2, 0); - tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category")); - tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic")); - tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type")); - tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value")); - tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error")); - tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T")); - tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig.")); - tab_offset (direct, 0, 1); + struct pivot_table *direct = pivot_table_create (N_("Directional Measures")); + pivot_table_set_weight_format (direct, &xt->weight_format); + + pivot_dimension_create ( + direct, PIVOT_AXIS_COLUMN, N_("Values"), + N_("Value"), PIVOT_RC_OTHER, + N_("Asymp. Std. Error"), PIVOT_RC_OTHER, + N_("Approx. T"), PIVOT_RC_OTHER, + N_("Approx. Sig."), PIVOT_RC_SIGNIFICANCE); + + struct pivot_dimension *statistics = pivot_dimension_create ( + direct, PIVOT_AXIS_ROW, N_("Statistics")); + struct pivot_category *nn = pivot_category_create_group ( + statistics->root, N_("Nominal by Nominal")); + create_direct_stat (nn, xt, N_("Lambda"), true); + create_direct_stat (nn, xt, N_("Goodman and Kruskal tau"), false); + create_direct_stat (nn, xt, N_("Uncertainty Coefficient"), true); + struct pivot_category *oo = pivot_category_create_group ( + statistics->root, N_("Ordinal by Ordinal")); + create_direct_stat (oo, xt, N_("Somers' d"), true); + struct pivot_category *ni = pivot_category_create_group ( + statistics->root, N_("Nominal by Interval")); + create_direct_stat (ni, xt, N_("Eta"), false); + + for (size_t i = 2; i < xt->n_vars; i++) + add_var_dimension (direct, &xt->vars[i], PIVOT_AXIS_ROW, false); return direct; } - /* Delete missing rows and columns for statistical analysis when /MISSING=REPORT. */ static void -delete_missing (struct pivot_table *pt) +delete_missing (struct crosstabulation *xt) { + size_t n_rows = xt->vars[ROW_VAR].n_values; + size_t n_cols = xt->vars[COL_VAR].n_values; int r, c; - for (r = 0; r < pt->n_rows; r++) - if (var_is_num_missing (pt->vars[ROW_VAR], pt->rows[r].f, MV_USER)) + for (r = 0; r < n_rows; r++) + if (var_is_num_missing (xt->vars[ROW_VAR].var, + xt->vars[ROW_VAR].values[r].f, MV_USER)) { - for (c = 0; c < pt->n_cols; c++) - pt->mat[c + r * pt->n_cols] = 0.; - pt->ns_rows--; + for (c = 0; c < n_cols; c++) + xt->mat[c + r * n_cols] = 0.; + xt->ns_rows--; } - for (c = 0; c < pt->n_cols; c++) - if (var_is_num_missing (pt->vars[COL_VAR], pt->cols[c].f, MV_USER)) + for (c = 0; c < n_cols; c++) + if (var_is_num_missing (xt->vars[COL_VAR].var, + xt->vars[COL_VAR].values[c].f, MV_USER)) { - for (r = 0; r < pt->n_rows; r++) - pt->mat[c + r * pt->n_cols] = 0.; - pt->ns_cols--; + for (r = 0; r < n_rows; r++) + xt->mat[c + r * n_cols] = 0.; + xt->ns_cols--; } } -/* Prepare table T for submission, and submit it. */ -static void -submit (struct crosstabs_proc *proc, struct pivot_table *pt, - struct tab_table *t) -{ - int i; - - if (t == NULL) - return; - - tab_resize (t, -1, 0); - if (tab_nr (t) == tab_t (t)) - { - tab_destroy (t); - return; - } - tab_offset (t, 0, 0); - if (pt != NULL) - for (i = 2; i < pt->n_vars; i++) - tab_text (t, pt->n_vars - i - 1, 0, TAB_RIGHT | TAT_TITLE, - var_to_string (pt->vars[i])); - tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1); - tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1, - tab_nr (t) - 1); - tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1, - tab_nr (t) - 1); - tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1); - tab_dim (t, crosstabs_dim, proc); - tab_submit (t); -} - -/* Sets the widths of all the columns and heights of all the rows in - table T for driver D. */ -static void -crosstabs_dim (struct tab_table *t, struct outp_driver *d, void *proc_) -{ - struct crosstabs_proc *proc = proc_; - int i; - - /* Width of a numerical column. */ - int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL); - if (proc->exclude == MV_NEVER) - c += outp_string_width (d, "M", OUTP_PROPORTIONAL); - - /* Set width for header columns. */ - if (t->l != 0) - { - size_t i; - int w; - - w = d->width - c * (t->nc - t->l); - for (i = 0; i <= t->nc; i++) - w -= t->wrv[i]; - w /= t->l; - - if (w < d->prop_em_width * 8) - w = d->prop_em_width * 8; - - if (w > d->prop_em_width * 15) - w = d->prop_em_width * 15; - - for (i = 0; i < t->l; i++) - t->w[i] = w; - } - - for (i = t->l; i < t->nc; i++) - t->w[i] = c; - - for (i = 0; i < t->nr; i++) - t->h[i] = tab_natural_height (t, d, i); -} - static bool -find_crosstab (struct pivot_table *pt, size_t *row0p, size_t *row1p) +find_crosstab (struct crosstabulation *xt, size_t *row0p, size_t *row1p) { size_t row0 = *row1p; size_t row1; - if (row0 >= pt->n_entries) + if (row0 >= xt->n_entries) return false; - for (row1 = row0 + 1; row1 < pt->n_entries; row1++) + for (row1 = row0 + 1; row1 < xt->n_entries; row1++) { - struct table_entry *a = pt->entries[row0]; - struct table_entry *b = pt->entries[row1]; - if (compare_table_entry_vars_3way (a, b, pt, 2, pt->n_vars) != 0) + struct freq *a = xt->entries[row0]; + struct freq *b = xt->entries[row1]; + if (compare_table_entry_vars_3way (a, b, xt, 2, xt->n_vars) != 0) break; } *row0p = row0; @@ -1445,39 +1492,43 @@ compare_value_3way (const void *a_, const void *b_, const void *width_) return value_compare_3way (a, b, *width); } +/* Inverted version of the above */ +static int +compare_value_3way_inv (const void *a_, const void *b_, const void *width_) +{ + return -compare_value_3way (a_, b_, width_); +} + + /* Given an array of ENTRY_CNT table_entry structures starting at ENTRIES, creates a sorted list of the values that the variable - with index VAR_IDX takes on. The values are returned as a - malloc()'d array stored in *VALUES, with the number of values - stored in *VALUE_CNT. - */ + with index VAR_IDX takes on. Stores the array of the values in + XT->values and the number of values in XT->n_values. */ static void -enum_var_values (const struct pivot_table *pt, int var_idx, - union value **valuesp, int *n_values) +enum_var_values (const struct crosstabulation *xt, int var_idx, + bool descending) { - const struct variable *var = pt->vars[var_idx]; - struct var_range *range = get_var_range (var); - union value *values; - size_t i; + struct xtab_var *xv = &xt->vars[var_idx]; + const struct var_range *range = get_var_range (xt->proc, xv->var); if (range) { - values = *valuesp = xnmalloc (range->count, sizeof *values); - *n_values = range->count; - for (i = 0; i < range->count; i++) - values[i].f = range->min + i; + xv->values = xnmalloc (range->count, sizeof *xv->values); + xv->n_values = range->count; + for (size_t i = 0; i < range->count; i++) + xv->values[i].f = range->min + i; } else { - int width = var_get_width (var); + int width = var_get_width (xv->var); struct hmapx_node *node; const union value *iter; struct hmapx set; hmapx_init (&set); - for (i = 0; i < pt->n_entries; i++) + for (size_t i = 0; i < xt->n_entries; i++) { - const struct table_entry *te = pt->entries[i]; + const struct freq *te = xt->entries[i]; const union value *value = &te->values[var_idx]; size_t hash = value_hash (value, width, 0); @@ -1490,606 +1541,374 @@ enum_var_values (const struct pivot_table *pt, int var_idx, next_entry: ; } - *n_values = hmapx_count (&set); - values = *valuesp = xnmalloc (*n_values, sizeof *values); - i = 0; + xv->n_values = hmapx_count (&set); + xv->values = xnmalloc (xv->n_values, sizeof *xv->values); + size_t i = 0; HMAPX_FOR_EACH (iter, node, &set) - values[i++] = *iter; + xv->values[i++] = *iter; hmapx_destroy (&set); - sort (values, *n_values, sizeof *values, compare_value_3way, &width); + sort (xv->values, xv->n_values, sizeof *xv->values, + descending ? compare_value_3way_inv : compare_value_3way, + &width); } } -/* Sets cell (C,R) in TABLE, with options OPT, to have a value taken - from V, displayed with print format spec from variable VAR. When - in REPORT missing-value mode, missing values have an M appended. */ static void -table_value_missing (struct crosstabs_proc *proc, - struct tab_table *table, int c, int r, unsigned char opt, - const union value *v, const struct variable *var) +free_var_values (const struct crosstabulation *xt, int var_idx) { - struct substring s; - const struct fmt_spec *print = var_get_print_format (var); - - const char *label = var_lookup_value_label (var, v); - if (label) - { - tab_text (table, c, r, TAB_LEFT, label); - return; - } - - s.string = tab_alloc (table, print->w); - data_out (v, print, s.string); - s.length = print->w; - if (proc->exclude == MV_NEVER && var_is_num_missing (var, v->f, MV_USER)) - s.string[s.length++] = 'M'; - while (s.length && *s.string == ' ') - { - s.length--; - s.string++; - } - tab_raw (table, c, r, opt, &s); -} - -/* Draws a line across TABLE at the current row to indicate the most - major dimension variable with index FIRST_DIFFERENCE out of N_VARS - that changed, and puts the values that changed into the table. TB - and PT must be the corresponding table_entry and crosstab, - respectively. */ -static void -display_dimensions (struct crosstabs_proc *proc, struct pivot_table *pt, - struct tab_table *table, int first_difference) -{ - tab_hline (table, TAL_1, pt->n_vars - first_difference - 1, tab_nc (table) - 1, 0); - - for (; first_difference >= 2; first_difference--) - table_value_missing (proc, table, pt->n_vars - first_difference - 1, 0, - TAB_RIGHT, &pt->entries[0]->values[first_difference], - pt->vars[first_difference]); -} - -/* Put VALUE into cell (C,R) of TABLE, suffixed with character - SUFFIX if nonzero. If MARK_MISSING is true the entry is - additionally suffixed with a letter `M'. */ -static void -format_cell_entry (struct tab_table *table, int c, int r, double value, - char suffix, bool mark_missing) -{ - const struct fmt_spec f = {FMT_F, 10, 1}; - union value v; - struct substring s; - - s.length = 10; - s.string = tab_alloc (table, 16); - v.f = value; - data_out (&v, &f, s.string); - while (*s.string == ' ') - { - s.length--; - s.string++; - } - if (suffix != 0) - s.string[s.length++] = suffix; - if (mark_missing) - s.string[s.length++] = 'M'; - - tab_raw (table, c, r, TAB_RIGHT, &s); + struct xtab_var *xv = &xt->vars[var_idx]; + free (xv->values); + xv->values = NULL; + xv->n_values = 0; } /* Displays the crosstabulation table. */ static void -display_crosstabulation (struct crosstabs_proc *proc, struct pivot_table *pt, - struct tab_table *table) +display_crosstabulation (struct crosstabs_proc *proc, + struct crosstabulation *xt, struct pivot_table *table, + size_t crs_leaves[CRS_CL_count]) { - int last_row; - int r, c, i; - double *mp; - - for (r = 0; r < pt->n_rows; r++) - table_value_missing (proc, table, pt->n_vars - 2, r * proc->n_cells, - TAB_RIGHT, &pt->rows[r], pt->vars[ROW_VAR]); + size_t n_rows = xt->vars[ROW_VAR].n_values; + size_t n_cols = xt->vars[COL_VAR].n_values; - tab_text (table, pt->n_vars - 2, pt->n_rows * proc->n_cells, - TAB_LEFT, _("Total")); + size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes); + assert (xt->n_vars == 2); + for (size_t i = 0; i < xt->n_consts; i++) + indexes[i + 3] = xt->const_indexes[i]; /* Put in the actual cells. */ - mp = pt->mat; - tab_offset (table, pt->n_vars - 1, -1); - for (r = 0; r < pt->n_rows; r++) + double *mp = xt->mat; + for (size_t r = 0; r < n_rows; r++) { - if (proc->n_cells > 1) - tab_hline (table, TAL_1, -1, pt->n_cols, 0); - for (c = 0; c < pt->n_cols; c++) + if (!xt->row_tot[r] && proc->mode != INTEGER) + continue; + + indexes[ROW_VAR + 1] = r; + for (size_t c = 0; c < n_cols; c++) { - bool mark_missing = false; - double expected_value = pt->row_tot[r] * pt->col_tot[c] / pt->total; - if (proc->exclude == MV_NEVER - && (var_is_num_missing (pt->vars[COL_VAR], pt->cols[c].f, MV_USER) - || var_is_num_missing (pt->vars[ROW_VAR], pt->rows[r].f, - MV_USER))) - mark_missing = true; - for (i = 0; i < proc->n_cells; i++) + if (!xt->col_tot[c] && proc->mode != INTEGER) + continue; + + indexes[COL_VAR + 1] = c; + + double expected_value = xt->row_tot[r] * xt->col_tot[c] / xt->total; + double residual = *mp - expected_value; + double sresidual = residual / sqrt (expected_value); + double asresidual = (sresidual + * (1. - xt->row_tot[r] / xt->total) + * (1. - xt->col_tot[c] / xt->total)); + double entries[] = { + [CRS_CL_COUNT] = *mp, + [CRS_CL_ROW] = *mp / xt->row_tot[r] * 100., + [CRS_CL_COLUMN] = *mp / xt->col_tot[c] * 100., + [CRS_CL_TOTAL] = *mp / xt->total * 100., + [CRS_CL_EXPECTED] = expected_value, + [CRS_CL_RESIDUAL] = residual, + [CRS_CL_SRESIDUAL] = sresidual, + [CRS_CL_ASRESIDUAL] = asresidual, + }; + for (size_t i = 0; i < proc->n_cells; i++) { - double v; - int suffix = 0; - - switch (proc->a_cells[i]) - { - case CRS_CL_COUNT: - v = *mp; - break; - case CRS_CL_ROW: - v = *mp / pt->row_tot[r] * 100.; - suffix = '%'; - break; - case CRS_CL_COLUMN: - v = *mp / pt->col_tot[c] * 100.; - suffix = '%'; - break; - case CRS_CL_TOTAL: - v = *mp / pt->total * 100.; - suffix = '%'; - break; - case CRS_CL_EXPECTED: - v = expected_value; - break; - case CRS_CL_RESIDUAL: - v = *mp - expected_value; - break; - case CRS_CL_SRESIDUAL: - v = (*mp - expected_value) / sqrt (expected_value); - break; - case CRS_CL_ASRESIDUAL: - v = ((*mp - expected_value) - / sqrt (expected_value - * (1. - pt->row_tot[r] / pt->total) - * (1. - pt->col_tot[c] / pt->total))); - break; - default: - NOT_REACHED (); - } - format_cell_entry (table, c, i, v, suffix, mark_missing); + int cell = proc->a_cells[i]; + indexes[0] = crs_leaves[cell]; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (entries[cell])); } mp++; } - - tab_offset (table, -1, tab_row (table) + proc->n_cells); } /* Row totals. */ - tab_offset (table, -1, tab_row (table) - proc->n_cells * pt->n_rows); - for (r = 0; r < pt->n_rows; r++) - { - bool mark_missing = false; - - if (proc->exclude == MV_NEVER - && var_is_num_missing (pt->vars[ROW_VAR], pt->rows[r].f, MV_USER)) - mark_missing = true; - - for (i = 0; i < proc->n_cells; i++) + for (size_t r = 0; r < n_rows; r++) + { + if (!xt->row_tot[r] && proc->mode != INTEGER) + continue; + + double expected_value = xt->row_tot[r] / xt->total; + double entries[] = { + [CRS_CL_COUNT] = xt->row_tot[r], + [CRS_CL_ROW] = 100.0, + [CRS_CL_COLUMN] = expected_value * 100., + [CRS_CL_TOTAL] = expected_value * 100., + [CRS_CL_EXPECTED] = expected_value, + [CRS_CL_RESIDUAL] = SYSMIS, + [CRS_CL_SRESIDUAL] = SYSMIS, + [CRS_CL_ASRESIDUAL] = SYSMIS, + }; + for (size_t i = 0; i < proc->n_cells; i++) { - char suffix = 0; - double v; - - switch (proc->a_cells[i]) + int cell = proc->a_cells[i]; + double entry = entries[cell]; + if (entry != SYSMIS) { - case CRS_CL_COUNT: - v = pt->row_tot[r]; - break; - case CRS_CL_ROW: - v = 100.0; - suffix = '%'; - break; - case CRS_CL_COLUMN: - v = pt->row_tot[r] / pt->total * 100.; - suffix = '%'; - break; - case CRS_CL_TOTAL: - v = pt->row_tot[r] / pt->total * 100.; - suffix = '%'; - break; - case CRS_CL_EXPECTED: - case CRS_CL_RESIDUAL: - case CRS_CL_SRESIDUAL: - case CRS_CL_ASRESIDUAL: - v = 0.; - break; - default: - NOT_REACHED (); + indexes[ROW_VAR + 1] = r; + indexes[COL_VAR + 1] = n_cols; + indexes[0] = crs_leaves[cell]; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (entry)); } - - format_cell_entry (table, pt->n_cols, 0, v, suffix, mark_missing); - tab_next_row (table); } } - /* Column totals, grand total. */ - last_row = 0; - if (proc->n_cells > 1) - tab_hline (table, TAL_1, -1, pt->n_cols, 0); - for (c = 0; c <= pt->n_cols; c++) - { - double ct = c < pt->n_cols ? pt->col_tot[c] : pt->total; - bool mark_missing = false; - int i; - - if (proc->exclude == MV_NEVER && c < pt->n_cols - && var_is_num_missing (pt->vars[COL_VAR], pt->cols[c].f, MV_USER)) - mark_missing = true; - - for (i = 0; i < proc->n_cells; i++) + for (size_t c = 0; c <= n_cols; c++) + { + if (c < n_cols && !xt->col_tot[c] && proc->mode != INTEGER) + continue; + + double ct = c < n_cols ? xt->col_tot[c] : xt->total; + double expected_value = ct / xt->total; + double entries[] = { + [CRS_CL_COUNT] = ct, + [CRS_CL_ROW] = expected_value * 100.0, + [CRS_CL_COLUMN] = 100.0, + [CRS_CL_TOTAL] = expected_value * 100., + [CRS_CL_EXPECTED] = expected_value, + [CRS_CL_RESIDUAL] = SYSMIS, + [CRS_CL_SRESIDUAL] = SYSMIS, + [CRS_CL_ASRESIDUAL] = SYSMIS, + }; + for (size_t i = 0; i < proc->n_cells; i++) { - char suffix = 0; - double v; - - switch (proc->a_cells[i]) + int cell = proc->a_cells[i]; + double entry = entries[cell]; + if (entry != SYSMIS) { - case CRS_CL_COUNT: - v = ct; - break; - case CRS_CL_ROW: - v = ct / pt->total * 100.; - suffix = '%'; - break; - case CRS_CL_COLUMN: - v = 100.; - suffix = '%'; - break; - case CRS_CL_TOTAL: - v = ct / pt->total * 100.; - suffix = '%'; - break; - case CRS_CL_EXPECTED: - case CRS_CL_RESIDUAL: - case CRS_CL_SRESIDUAL: - case CRS_CL_ASRESIDUAL: - continue; - default: - NOT_REACHED (); + indexes[ROW_VAR + 1] = n_rows; + indexes[COL_VAR + 1] = c; + indexes[0] = crs_leaves[cell]; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (entry)); } - - format_cell_entry (table, c, i, v, suffix, mark_missing); } - last_row = i; } - tab_offset (table, -1, tab_row (table) + last_row); - tab_offset (table, 0, -1); + free (indexes); } -static void calc_r (struct pivot_table *, - double *PT, double *Y, double *, double *, double *); -static void calc_chisq (struct pivot_table *, +static void calc_r (struct crosstabulation *, + double *XT, double *Y, double *, double *, double *); +static void calc_chisq (struct crosstabulation *, double[N_CHISQ], int[N_CHISQ], double *, double *); /* Display chi-square statistics. */ static void -display_chisq (struct pivot_table *pt, struct tab_table *chisq, - bool *showed_fisher) +display_chisq (struct crosstabulation *xt, struct pivot_table *chisq) { - static const char *chisq_stats[N_CHISQ] = - { - N_("Pearson Chi-Square"), - N_("Likelihood Ratio"), - N_("Fisher's Exact Test"), - N_("Continuity Correction"), - N_("Linear-by-Linear Association"), - }; double chisq_v[N_CHISQ]; double fisher1, fisher2; int df[N_CHISQ]; + calc_chisq (xt, chisq_v, df, &fisher1, &fisher2); - int i; - - calc_chisq (pt, chisq_v, df, &fisher1, &fisher2); - - tab_offset (chisq, pt->n_vars - 2, -1); - - for (i = 0; i < N_CHISQ; i++) + size_t *indexes = xnmalloc (chisq->n_dimensions, sizeof *indexes); + assert (xt->n_vars == 2); + for (size_t i = 0; i < xt->n_consts; i++) + indexes[i + 2] = xt->const_indexes[i]; + for (int i = 0; i < N_CHISQ; i++) { - if ((i != 2 && chisq_v[i] == SYSMIS) - || (i == 2 && fisher1 == SYSMIS)) - continue; + indexes[0] = i; - tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i])); - if (i != 2) - { - tab_double (chisq, 1, 0, TAB_RIGHT, chisq_v[i], NULL); - tab_double (chisq, 2, 0, TAB_RIGHT, df[i], &pt->weight_format); - tab_double (chisq, 3, 0, TAB_RIGHT, - gsl_cdf_chisq_Q (chisq_v[i], df[i]), NULL); - } - else - { - *showed_fisher = true; - tab_double (chisq, 4, 0, TAB_RIGHT, fisher2, NULL); - tab_double (chisq, 5, 0, TAB_RIGHT, fisher1, NULL); - } - tab_next_row (chisq); + double entries[5] = { SYSMIS, SYSMIS, SYSMIS, SYSMIS, SYSMIS }; + if (i == 2) + { + entries[3] = fisher2; + entries[4] = fisher1; + } + else if (chisq_v[i] != SYSMIS) + { + entries[0] = chisq_v[i]; + entries[1] = df[i]; + entries[2] = gsl_cdf_chisq_Q (chisq_v[i], df[i]); + } + + for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) + if (entries[j] != SYSMIS) + { + indexes[1] = j; + pivot_table_put (chisq, indexes, chisq->n_dimensions, + pivot_value_new_number (entries[j])); + } } - tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases")); - tab_double (chisq, 1, 0, TAB_RIGHT, pt->total, &pt->weight_format); - tab_next_row (chisq); + indexes[0] = 5; + indexes[1] = 0; + pivot_table_put (chisq, indexes, chisq->n_dimensions, + pivot_value_new_number (xt->total)); - tab_offset (chisq, 0, -1); + free (indexes); } -static int calc_symmetric (struct crosstabs_proc *, struct pivot_table *, +static int calc_symmetric (struct crosstabs_proc *, struct crosstabulation *, double[N_SYMMETRIC], double[N_SYMMETRIC], double[N_SYMMETRIC], double[3], double[3], double[3]); /* Display symmetric measures. */ static void -display_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, - struct tab_table *sym) +display_symmetric (struct crosstabs_proc *proc, struct crosstabulation *xt, + struct pivot_table *sym) { - static const char *categories[] = - { - N_("Nominal by Nominal"), - N_("Ordinal by Ordinal"), - N_("Interval by Interval"), - N_("Measure of Agreement"), - }; - - static const char *stats[N_SYMMETRIC] = - { - N_("Phi"), - N_("Cramer's V"), - N_("Contingency Coefficient"), - N_("Kendall's tau-b"), - N_("Kendall's tau-c"), - N_("Gamma"), - N_("Spearman Correlation"), - N_("Pearson's R"), - N_("Kappa"), - }; - - static const int stats_categories[N_SYMMETRIC] = - { - 0, 0, 0, 1, 1, 1, 1, 2, 3, - }; - - int last_cat = -1; double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC]; double somers_d_v[3], somers_d_ase[3], somers_d_t[3]; - int i; - if (!calc_symmetric (proc, pt, sym_v, sym_ase, sym_t, + if (!calc_symmetric (proc, xt, sym_v, sym_ase, sym_t, somers_d_v, somers_d_ase, somers_d_t)) return; - tab_offset (sym, pt->n_vars - 2, -1); + size_t *indexes = xnmalloc (sym->n_dimensions, sizeof *indexes); + assert (xt->n_vars == 2); + for (size_t i = 0; i < xt->n_consts; i++) + indexes[i + 2] = xt->const_indexes[i]; - for (i = 0; i < N_SYMMETRIC; i++) + for (int i = 0; i < N_SYMMETRIC; i++) { if (sym_v[i] == SYSMIS) continue; - if (stats_categories[i] != last_cat) - { - last_cat = stats_categories[i]; - tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat])); - } + indexes[1] = i; - tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i])); - tab_double (sym, 2, 0, TAB_RIGHT, sym_v[i], NULL); - if (sym_ase[i] != SYSMIS) - tab_double (sym, 3, 0, TAB_RIGHT, sym_ase[i], NULL); - if (sym_t[i] != SYSMIS) - tab_double (sym, 4, 0, TAB_RIGHT, sym_t[i], NULL); - /*tab_double (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), NULL);*/ - tab_next_row (sym); + double entries[] = { sym_v[i], sym_ase[i], sym_t[i] }; + for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) + if (entries[j] != SYSMIS) + { + indexes[0] = j; + pivot_table_put (sym, indexes, sym->n_dimensions, + pivot_value_new_number (entries[j])); + } } - tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases")); - tab_double (sym, 2, 0, TAB_RIGHT, pt->total, &pt->weight_format); - tab_next_row (sym); + indexes[1] = N_SYMMETRIC; + indexes[0] = 0; + struct pivot_value *total = pivot_value_new_number (xt->total); + pivot_value_set_rc (sym, total, PIVOT_RC_COUNT); + pivot_table_put (sym, indexes, sym->n_dimensions, total); - tab_offset (sym, 0, -1); + free (indexes); } -static int calc_risk (struct pivot_table *, - double[], double[], double[], union value *); +static bool calc_risk (struct crosstabulation *, + double[], double[], double[], union value *, + double *); /* Display risk estimate. */ static void -display_risk (struct pivot_table *pt, struct tab_table *risk) +display_risk (struct crosstabulation *xt, struct pivot_table *risk, + struct pivot_dimension *risk_statistics) { - char buf[256]; - double risk_v[3], lower[3], upper[3]; + double risk_v[3], lower[3], upper[3], n_valid; union value c[2]; - int i; - - if (!calc_risk (pt, risk_v, upper, lower, c)) + if (!calc_risk (xt, risk_v, upper, lower, c, &n_valid)) return; - tab_offset (risk, pt->n_vars - 2, -1); + size_t *indexes = xnmalloc (risk->n_dimensions, sizeof *indexes); + assert (xt->n_vars == 2); + for (size_t i = 0; i < xt->n_consts; i++) + indexes[i + 2] = xt->const_indexes[i]; - for (i = 0; i < 3; i++) + for (int i = 0; i < 3; i++) { - const struct variable *cv = pt->vars[COL_VAR]; - const struct variable *rv = pt->vars[ROW_VAR]; - int cvw = var_get_width (cv); - int rvw = var_get_width (rv); + const struct variable *cv = xt->vars[COL_VAR].var; + const struct variable *rv = xt->vars[ROW_VAR].var; if (risk_v[i] == SYSMIS) continue; + struct string label = DS_EMPTY_INITIALIZER; switch (i) { case 0: - if (var_is_numeric (cv)) - sprintf (buf, _("Odds Ratio for %s (%g / %g)"), - var_get_name (cv), c[0].f, c[1].f); - else - sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"), - var_get_name (cv), - cvw, value_str (&c[0], cvw), - cvw, value_str (&c[1], cvw)); + ds_put_format (&label, _("Odds Ratio for %s"), var_to_string (rv)); + ds_put_cstr (&label, " ("); + var_append_value_name (rv, &c[0], &label); + ds_put_cstr (&label, " / "); + var_append_value_name (rv, &c[1], &label); + ds_put_cstr (&label, ")"); break; case 1: case 2: - if (var_is_numeric (rv)) - sprintf (buf, _("For cohort %s = %g"), - var_get_name (rv), pt->rows[i - 1].f); - else - sprintf (buf, _("For cohort %s = %.*s"), - var_get_name (rv), - rvw, value_str (&pt->rows[i - 1], rvw)); + ds_put_format (&label, _("For cohort %s = "), var_to_string (cv)); + var_append_value_name (cv, &xt->vars[ROW_VAR].values[i - 1], &label); break; } - tab_text (risk, 0, 0, TAB_LEFT, buf); - tab_double (risk, 1, 0, TAB_RIGHT, risk_v[i], NULL); - tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL); - tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL); - tab_next_row (risk); - } - - tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases")); - tab_double (risk, 1, 0, TAB_RIGHT, pt->total, &pt->weight_format); - tab_next_row (risk); + indexes[1] = pivot_category_create_leaf ( + risk_statistics->root, + pivot_value_new_user_text_nocopy (ds_steal_cstr (&label))); - tab_offset (risk, 0, -1); + double entries[] = { risk_v[i], lower[i], upper[i] }; + for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) + { + indexes[0] = j; + pivot_table_put (risk, indexes, risk->n_dimensions, + pivot_value_new_number (entries[i])); + } + } + indexes[1] = pivot_category_create_leaf ( + risk_statistics->root, + pivot_value_new_text (N_("N of Valid Cases"))); + indexes[0] = 0; + pivot_table_put (risk, indexes, risk->n_dimensions, + pivot_value_new_number (n_valid)); + free (indexes); } -static int calc_directional (struct crosstabs_proc *, struct pivot_table *, +static int calc_directional (struct crosstabs_proc *, struct crosstabulation *, double[N_DIRECTIONAL], double[N_DIRECTIONAL], - double[N_DIRECTIONAL]); + double[N_DIRECTIONAL], double[N_DIRECTIONAL]); /* Display directional measures. */ static void -display_directional (struct crosstabs_proc *proc, struct pivot_table *pt, - struct tab_table *direct) +display_directional (struct crosstabs_proc *proc, + struct crosstabulation *xt, struct pivot_table *direct) { - static const char *categories[] = - { - N_("Nominal by Nominal"), - N_("Ordinal by Ordinal"), - N_("Nominal by Interval"), - }; - - static const char *stats[] = - { - N_("Lambda"), - N_("Goodman and Kruskal tau"), - N_("Uncertainty Coefficient"), - N_("Somers' d"), - N_("Eta"), - }; - - static const char *types[] = - { - N_("Symmetric"), - N_("%s Dependent"), - N_("%s Dependent"), - }; - - static const int stats_categories[N_DIRECTIONAL] = - { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, - }; - - static const int stats_stats[N_DIRECTIONAL] = - { - 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, - }; - - static const int stats_types[N_DIRECTIONAL] = - { - 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2, - }; - - static const int *stats_lookup[] = - { - stats_categories, - stats_stats, - stats_types, - }; - - static const char **stats_names[] = - { - categories, - stats, - types, - }; - - int last[3] = - { - -1, -1, -1, - }; - double direct_v[N_DIRECTIONAL]; double direct_ase[N_DIRECTIONAL]; double direct_t[N_DIRECTIONAL]; - - int i; - - if (!calc_directional (proc, pt, direct_v, direct_ase, direct_t)) + double sig[N_DIRECTIONAL]; + if (!calc_directional (proc, xt, direct_v, direct_ase, direct_t, sig)) return; - tab_offset (direct, pt->n_vars - 2, -1); + size_t *indexes = xnmalloc (direct->n_dimensions, sizeof *indexes); + assert (xt->n_vars == 2); + for (size_t i = 0; i < xt->n_consts; i++) + indexes[i + 2] = xt->const_indexes[i]; - for (i = 0; i < N_DIRECTIONAL; i++) + for (int i = 0; i < N_DIRECTIONAL; i++) { if (direct_v[i] == SYSMIS) continue; - { - int j; - - for (j = 0; j < 3; j++) - if (last[j] != stats_lookup[j][i]) - { - if (j < 2) - tab_hline (direct, TAL_1, j, 6, 0); - - for (; j < 3; j++) - { - const char *string; - int k = last[j] = stats_lookup[j][i]; - - if (k == 0) - string = NULL; - else if (k == 1) - string = var_get_name (pt->vars[0]); - else - string = var_get_name (pt->vars[1]); - - tab_text_format (direct, j, 0, TAB_LEFT, - gettext (stats_names[j][k]), string); - } - } - } + indexes[1] = i; - tab_double (direct, 3, 0, TAB_RIGHT, direct_v[i], NULL); - if (direct_ase[i] != SYSMIS) - tab_double (direct, 4, 0, TAB_RIGHT, direct_ase[i], NULL); - if (direct_t[i] != SYSMIS) - tab_double (direct, 5, 0, TAB_RIGHT, direct_t[i], NULL); - /*tab_double (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), NULL);*/ - tab_next_row (direct); + double entries[] = { + direct_v[i], direct_ase[i], direct_t[i], sig[i], + }; + for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) + if (entries[j] != SYSMIS) + { + indexes[0] = j; + pivot_table_put (direct, indexes, direct->n_dimensions, + pivot_value_new_number (entries[j])); + } } - tab_offset (direct, 0, -1); + free (indexes); } /* Statistical calculations. */ -/* Returns the value of the gamma (factorial) function for an integer - argument PT. */ +/* Returns the value of the logarithm of gamma (factorial) function for an integer + argument XT. */ static double -gamma_int (double pt) +log_gamma_int (double xt) { - double r = 1; + double r = 0; int i; - for (i = 2; i < pt; i++) - r *= i; + for (i = 2; i < xt; i++) + r += log(i); + return r; } @@ -2098,11 +1917,11 @@ gamma_int (double pt) static inline double Pr (int a, int b, int c, int d) { - return (gamma_int (a + b + 1.) / gamma_int (a + 1.) - * gamma_int (c + d + 1.) / gamma_int (b + 1.) - * gamma_int (a + c + 1.) / gamma_int (c + 1.) - * gamma_int (b + d + 1.) / gamma_int (d + 1.) - / gamma_int (a + b + c + d + 1.)); + return exp (log_gamma_int (a + b + 1.) - log_gamma_int (a + 1.) + + log_gamma_int (c + d + 1.) - log_gamma_int (b + 1.) + + log_gamma_int (a + c + 1.) - log_gamma_int (c + 1.) + + log_gamma_int (b + d + 1.) - log_gamma_int (d + 1.) + - log_gamma_int (a + b + c + d + 1.)); } /* Swap the contents of A and B. */ @@ -2119,7 +1938,8 @@ swap (int *a, int *b) static void calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2) { - int pt; + int xt; + double pn1; if (MIN (c, d) < MIN (a, b)) swap (&a, &c), swap (&b, &d); @@ -2133,47 +1953,54 @@ calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2) swap (&a, &c), swap (&b, &d); } - *fisher1 = 0.; - for (pt = 0; pt <= a; pt++) - *fisher1 += Pr (a - pt, b + pt, c + pt, d - pt); + pn1 = Pr (a, b, c, d); + *fisher1 = pn1; + for (xt = 1; xt <= a; xt++) + { + *fisher1 += Pr (a - xt, b + xt, c + xt, d - xt); + } *fisher2 = *fisher1; - for (pt = 1; pt <= b; pt++) - *fisher2 += Pr (a + pt, b - pt, c - pt, d + pt); + + for (xt = 1; xt <= b; xt++) + { + double p = Pr (a + xt, b - xt, c - xt, d + xt); + if (p < pn1) + *fisher2 += p; + } } /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS columns with values COLS and N_ROWS rows with values ROWS. Values - in the matrix sum to pt->total. */ + in the matrix sum to xt->total. */ static void -calc_chisq (struct pivot_table *pt, +calc_chisq (struct crosstabulation *xt, double chisq[N_CHISQ], int df[N_CHISQ], double *fisher1, double *fisher2) { - int r, c; - chisq[0] = chisq[1] = 0.; chisq[2] = chisq[3] = chisq[4] = SYSMIS; *fisher1 = *fisher2 = SYSMIS; - df[0] = df[1] = (pt->ns_cols - 1) * (pt->ns_rows - 1); + df[0] = df[1] = (xt->ns_cols - 1) * (xt->ns_rows - 1); - if (pt->ns_rows <= 1 || pt->ns_cols <= 1) + if (xt->ns_rows <= 1 || xt->ns_cols <= 1) { chisq[0] = chisq[1] = SYSMIS; return; } - for (r = 0; r < pt->n_rows; r++) - for (c = 0; c < pt->n_cols; c++) + size_t n_cols = xt->vars[COL_VAR].n_values; + FOR_EACH_POPULATED_ROW (r, xt) + FOR_EACH_POPULATED_COLUMN (c, xt) { - const double expected = pt->row_tot[r] * pt->col_tot[c] / pt->total; - const double freq = pt->mat[pt->n_cols * r + c]; - const double residual = freq - expected; + const double expected = xt->row_tot[r] * xt->col_tot[c] / xt->total; + const double freq = xt->mat[n_cols * r + c]; + const double residual = freq - expected; chisq[0] += residual * residual / expected; - if (freq) - chisq[1] += freq * log (expected / freq); + if (freq) + chisq[1] += freq * log (expected / freq); } if (chisq[0] == 0.) @@ -2185,36 +2012,34 @@ calc_chisq (struct pivot_table *pt, chisq[1] = SYSMIS; /* Calculate Yates and Fisher exact test. */ - if (pt->ns_cols == 2 && pt->ns_rows == 2) + if (xt->ns_cols == 2 && xt->ns_rows == 2) { double f11, f12, f21, f22; { int nz_cols[2]; - int i, j; - - for (i = j = 0; i < pt->n_cols; i++) - if (pt->col_tot[i] != 0.) - { - nz_cols[j++] = i; - if (j == 2) - break; - } + int j = 0; + FOR_EACH_POPULATED_COLUMN (c, xt) + { + nz_cols[j++] = c; + if (j == 2) + break; + } assert (j == 2); - f11 = pt->mat[nz_cols[0]]; - f12 = pt->mat[nz_cols[1]]; - f21 = pt->mat[nz_cols[0] + pt->n_cols]; - f22 = pt->mat[nz_cols[1] + pt->n_cols]; + f11 = xt->mat[nz_cols[0]]; + f12 = xt->mat[nz_cols[1]]; + f21 = xt->mat[nz_cols[0] + n_cols]; + f22 = xt->mat[nz_cols[1] + n_cols]; } /* Yates. */ { - const double pt_ = fabs (f11 * f22 - f12 * f21) - 0.5 * pt->total; + const double xt_ = fabs (f11 * f22 - f12 * f21) - 0.5 * xt->total; - if (pt_ > 0.) - chisq[3] = (pt->total * pow2 (pt_) + if (xt_ > 0.) + chisq[3] = (xt->total * pow2 (xt_) / (f11 + f12) / (f21 + f22) / (f11 + f21) / (f12 + f22)); else @@ -2224,28 +2049,32 @@ calc_chisq (struct pivot_table *pt, } /* Fisher. */ - if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.) - calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2); + calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2); } /* Calculate Mantel-Haenszel. */ - if (var_is_numeric (pt->vars[ROW_VAR]) && var_is_numeric (pt->vars[COL_VAR])) + if (var_is_numeric (xt->vars[ROW_VAR].var) + && var_is_numeric (xt->vars[COL_VAR].var)) { double r, ase_0, ase_1; - calc_r (pt, (double *) pt->rows, (double *) pt->cols, &r, &ase_0, &ase_1); + calc_r (xt, (double *) xt->vars[ROW_VAR].values, + (double *) xt->vars[COL_VAR].values, + &r, &ase_0, &ase_1); - chisq[4] = (pt->total - 1.) * r * r; + chisq[4] = (xt->total - 1.) * r * r; df[4] = 1; } } -/* Calculate the value of Pearson's r. r is stored into R, ase_1 into - ASE_1, and ase_0 into ASE_0. The row and column values must be - passed in PT and Y. */ +/* Calculate the value of Pearson's r. r is stored into R, its T value into + T, and standard error into ERROR. The row and column values must be + passed in XT and Y. */ static void -calc_r (struct pivot_table *pt, - double *PT, double *Y, double *r, double *ase_0, double *ase_1) +calc_r (struct crosstabulation *xt, + double *XT, double *Y, double *r, double *t, double *error) { + size_t n_rows = xt->vars[ROW_VAR].n_values; + size_t n_cols = xt->vars[COL_VAR].n_values; double SX, SY, S, T; double Xbar, Ybar; double sum_XYf, sum_X2Y2f; @@ -2253,72 +2082,74 @@ calc_r (struct pivot_table *pt, double sum_Yc, sum_Y2c; int i, j; - for (sum_X2Y2f = sum_XYf = 0., i = 0; i < pt->n_rows; i++) - for (j = 0; j < pt->n_cols; j++) + for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++) + for (j = 0; j < n_cols; j++) { - double fij = pt->mat[j + i * pt->n_cols]; - double product = PT[i] * Y[j]; + double fij = xt->mat[j + i * n_cols]; + double product = XT[i] * Y[j]; double temp = fij * product; sum_XYf += temp; sum_X2Y2f += temp * product; } - for (sum_Xr = sum_X2r = 0., i = 0; i < pt->n_rows; i++) + for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++) { - sum_Xr += PT[i] * pt->row_tot[i]; - sum_X2r += pow2 (PT[i]) * pt->row_tot[i]; + sum_Xr += XT[i] * xt->row_tot[i]; + sum_X2r += pow2 (XT[i]) * xt->row_tot[i]; } - Xbar = sum_Xr / pt->total; + Xbar = sum_Xr / xt->total; - for (sum_Yc = sum_Y2c = 0., i = 0; i < pt->n_cols; i++) + for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++) { - sum_Yc += Y[i] * pt->col_tot[i]; - sum_Y2c += Y[i] * Y[i] * pt->col_tot[i]; + sum_Yc += Y[i] * xt->col_tot[i]; + sum_Y2c += Y[i] * Y[i] * xt->col_tot[i]; } - Ybar = sum_Yc / pt->total; + Ybar = sum_Yc / xt->total; - S = sum_XYf - sum_Xr * sum_Yc / pt->total; - SX = sum_X2r - pow2 (sum_Xr) / pt->total; - SY = sum_Y2c - pow2 (sum_Yc) / pt->total; + S = sum_XYf - sum_Xr * sum_Yc / xt->total; + SX = sum_X2r - pow2 (sum_Xr) / xt->total; + SY = sum_Y2c - pow2 (sum_Yc) / xt->total; T = sqrt (SX * SY); *r = S / T; - *ase_0 = sqrt ((sum_X2Y2f - pow2 (sum_XYf) / pt->total) / (sum_X2r * sum_Y2c)); + *t = *r / sqrt (1 - pow2 (*r)) * sqrt (xt->total - 2); { double s, c, y, t; - for (s = c = 0., i = 0; i < pt->n_rows; i++) - for (j = 0; j < pt->n_cols; j++) + for (s = c = 0., i = 0; i < n_rows; i++) + for (j = 0; j < n_cols; j++) { double Xresid, Yresid; double temp; - Xresid = PT[i] - Xbar; + Xresid = XT[i] - Xbar; Yresid = Y[j] - Ybar; temp = (T * Xresid * Yresid - ((S / (2. * T)) * (Xresid * Xresid * SY + Yresid * Yresid * SX))); - y = pt->mat[j + i * pt->n_cols] * temp * temp - c; + y = xt->mat[j + i * n_cols] * temp * temp - c; t = s + y; c = (t - s) - y; s = t; } - *ase_1 = sqrt (s) / (T * T); + *error = sqrt (s) / (T * T); } } /* Calculate symmetric statistics and their asymptotic standard errors. Returns 0 if none could be calculated. */ static int -calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, +calc_symmetric (struct crosstabs_proc *proc, struct crosstabulation *xt, double v[N_SYMMETRIC], double ase[N_SYMMETRIC], double t[N_SYMMETRIC], double somers_d_v[3], double somers_d_ase[3], double somers_d_t[3]) { + size_t n_rows = xt->vars[ROW_VAR].n_values; + size_t n_cols = xt->vars[COL_VAR].n_values; int q, i; - q = MIN (pt->ns_rows, pt->ns_cols); + q = MIN (xt->ns_rows, xt->ns_cols); if (q <= 1) return 0; @@ -2329,25 +2160,24 @@ calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, if (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC))) { double Xp = 0.; /* Pearson chi-square. */ - int r, c; - for (r = 0; r < pt->n_rows; r++) - for (c = 0; c < pt->n_cols; c++) + FOR_EACH_POPULATED_ROW (r, xt) + FOR_EACH_POPULATED_COLUMN (c, xt) { - const double expected = pt->row_tot[r] * pt->col_tot[c] / pt->total; - const double freq = pt->mat[pt->n_cols * r + c]; - const double residual = freq - expected; + double expected = xt->row_tot[r] * xt->col_tot[c] / xt->total; + double freq = xt->mat[n_cols * r + c]; + double residual = freq - expected; Xp += residual * residual / expected; } if (proc->statistics & (1u << CRS_ST_PHI)) { - v[0] = sqrt (Xp / pt->total); - v[1] = sqrt (Xp / (pt->total * (q - 1))); + v[0] = sqrt (Xp / xt->total); + v[1] = sqrt (Xp / (xt->total * (q - 1))); } if (proc->statistics & (1u << CRS_ST_CC)) - v[2] = sqrt (Xp / (Xp + pt->total)); + v[2] = sqrt (Xp / (Xp + xt->total)); } if (proc->statistics & ((1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU) @@ -2360,19 +2190,19 @@ calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, double btau_var; int r, c; - Dr = Dc = pow2 (pt->total); - for (r = 0; r < pt->n_rows; r++) - Dr -= pow2 (pt->row_tot[r]); - for (c = 0; c < pt->n_cols; c++) - Dc -= pow2 (pt->col_tot[c]); + Dr = Dc = pow2 (xt->total); + for (r = 0; r < n_rows; r++) + Dr -= pow2 (xt->row_tot[r]); + for (c = 0; c < n_cols; c++) + Dc -= pow2 (xt->col_tot[c]); - cum = xnmalloc (pt->n_cols * pt->n_rows, sizeof *cum); - for (c = 0; c < pt->n_cols; c++) + cum = xnmalloc (n_cols * n_rows, sizeof *cum); + for (c = 0; c < n_cols; c++) { double ct = 0.; - for (r = 0; r < pt->n_rows; r++) - cum[c + r * pt->n_cols] = ct += pt->mat[c + r * pt->n_cols]; + for (r = 0; r < n_rows; r++) + cum[c + r * n_cols] = ct += xt->mat[c + r * n_cols]; } /* P and Q. */ @@ -2381,34 +2211,34 @@ calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, double Cij, Dij; P = Q = 0.; - for (i = 0; i < pt->n_rows; i++) + for (i = 0; i < n_rows; i++) { Cij = Dij = 0.; - for (j = 1; j < pt->n_cols; j++) - Cij += pt->col_tot[j] - cum[j + i * pt->n_cols]; + for (j = 1; j < n_cols; j++) + Cij += xt->col_tot[j] - cum[j + i * n_cols]; if (i > 0) - for (j = 1; j < pt->n_cols; j++) - Dij += cum[j + (i - 1) * pt->n_cols]; + for (j = 1; j < n_cols; j++) + Dij += cum[j + (i - 1) * n_cols]; for (j = 0;;) { - double fij = pt->mat[j + i * pt->n_cols]; + double fij = xt->mat[j + i * n_cols]; P += fij * Cij; Q += fij * Dij; - if (++j == pt->n_cols) + if (++j == n_cols) break; - assert (j < pt->n_cols); + assert (j < n_cols); - Cij -= pt->col_tot[j] - cum[j + i * pt->n_cols]; - Dij += pt->col_tot[j - 1] - cum[j - 1 + i * pt->n_cols]; + Cij -= xt->col_tot[j] - cum[j + i * n_cols]; + Dij += xt->col_tot[j - 1] - cum[j - 1 + i * n_cols]; if (i > 0) { - Cij += cum[j - 1 + (i - 1) * pt->n_cols]; - Dij -= cum[j + (i - 1) * pt->n_cols]; + Cij += cum[j - 1 + (i - 1) * n_cols]; + Dij -= cum[j + (i - 1) * n_cols]; } } } @@ -2417,7 +2247,7 @@ calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, if (proc->statistics & (1u << CRS_ST_BTAU)) v[3] = (P - Q) / sqrt (Dr * Dc); if (proc->statistics & (1u << CRS_ST_CTAU)) - v[4] = (q * (P - Q)) / (pow2 (pt->total) * (q - 1)); + v[4] = (q * (P - Q)) / (pow2 (xt->total) * (q - 1)); if (proc->statistics & (1u << CRS_ST_GAMMA)) v[5] = (P - Q) / (P + Q); @@ -2428,26 +2258,26 @@ calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, double Cij, Dij; btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.; - for (i = 0; i < pt->n_rows; i++) + for (i = 0; i < n_rows; i++) { Cij = Dij = 0.; - for (j = 1; j < pt->n_cols; j++) - Cij += pt->col_tot[j] - cum[j + i * pt->n_cols]; + for (j = 1; j < n_cols; j++) + Cij += xt->col_tot[j] - cum[j + i * n_cols]; if (i > 0) - for (j = 1; j < pt->n_cols; j++) - Dij += cum[j + (i - 1) * pt->n_cols]; + for (j = 1; j < n_cols; j++) + Dij += cum[j + (i - 1) * n_cols]; for (j = 0;;) { - double fij = pt->mat[j + i * pt->n_cols]; + double fij = xt->mat[j + i * n_cols]; if (proc->statistics & (1u << CRS_ST_BTAU)) { const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij) - + v[3] * (pt->row_tot[i] * Dc - + pt->col_tot[j] * Dr)); + + v[3] * (xt->row_tot[i] * Dc + + xt->col_tot[j] * Dr)); btau_cum += fij * temp * temp; } @@ -2465,65 +2295,65 @@ calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, if (proc->statistics & (1u << CRS_ST_D)) { d_yx_cum += fij * pow2 (Dr * (Cij - Dij) - - (P - Q) * (pt->total - pt->row_tot[i])); + - (P - Q) * (xt->total - xt->row_tot[i])); d_xy_cum += fij * pow2 (Dc * (Dij - Cij) - - (Q - P) * (pt->total - pt->col_tot[j])); + - (Q - P) * (xt->total - xt->col_tot[j])); } - if (++j == pt->n_cols) + if (++j == n_cols) break; - assert (j < pt->n_cols); + assert (j < n_cols); - Cij -= pt->col_tot[j] - cum[j + i * pt->n_cols]; - Dij += pt->col_tot[j - 1] - cum[j - 1 + i * pt->n_cols]; + Cij -= xt->col_tot[j] - cum[j + i * n_cols]; + Dij += xt->col_tot[j - 1] - cum[j - 1 + i * n_cols]; if (i > 0) { - Cij += cum[j - 1 + (i - 1) * pt->n_cols]; - Dij -= cum[j + (i - 1) * pt->n_cols]; + Cij += cum[j - 1 + (i - 1) * n_cols]; + Dij -= cum[j + (i - 1) * n_cols]; } } } } btau_var = ((btau_cum - - (pt->total * pow2 (pt->total * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc)))) + - (xt->total * pow2 (xt->total * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc)))) / pow2 (Dr * Dc)); if (proc->statistics & (1u << CRS_ST_BTAU)) { ase[3] = sqrt (btau_var); - t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / pt->total) + t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / xt->total) / (Dr * Dc))); } if (proc->statistics & (1u << CRS_ST_CTAU)) { - ase[4] = ((2 * q / ((q - 1) * pow2 (pt->total))) - * sqrt (ctau_cum - (P - Q) * (P - Q) / pt->total)); + ase[4] = ((2 * q / ((q - 1) * pow2 (xt->total))) + * sqrt (ctau_cum - (P - Q) * (P - Q) / xt->total)); t[4] = v[4] / ase[4]; } if (proc->statistics & (1u << CRS_ST_GAMMA)) { ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum)); t[5] = v[5] / (2. / (P + Q) - * sqrt (ctau_cum - (P - Q) * (P - Q) / pt->total)); + * sqrt (ctau_cum - (P - Q) * (P - Q) / xt->total)); } if (proc->statistics & (1u << CRS_ST_D)) { somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr)); - somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc); + somers_d_ase[0] = SYSMIS; somers_d_t[0] = (somers_d_v[0] / (4 / (Dc + Dr) - * sqrt (ctau_cum - pow2 (P - Q) / pt->total))); + * sqrt (ctau_cum - pow2 (P - Q) / xt->total))); somers_d_v[1] = (P - Q) / Dc; somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum); somers_d_t[1] = (somers_d_v[1] / (2. / Dc - * sqrt (ctau_cum - pow2 (P - Q) / pt->total))); + * sqrt (ctau_cum - pow2 (P - Q) / xt->total))); somers_d_v[2] = (P - Q) / Dr; somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum); somers_d_t[2] = (somers_d_v[2] / (2. / Dr - * sqrt (ctau_cum - pow2 (P - Q) / pt->total))); + * sqrt (ctau_cum - pow2 (P - Q) / xt->total))); } free (cum); @@ -2532,8 +2362,8 @@ calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, /* Spearman correlation, Pearson's r. */ if (proc->statistics & (1u << CRS_ST_CORR)) { - double *R = xmalloc (sizeof *R * pt->n_rows); - double *C = xmalloc (sizeof *C * pt->n_cols); + double *R = xmalloc (sizeof *R * n_rows); + double *C = xmalloc (sizeof *C * n_cols); { double y, t, c = 0., s = 0.; @@ -2541,14 +2371,14 @@ calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, for (;;) { - R[i] = s + (pt->row_tot[i] + 1.) / 2.; - y = pt->row_tot[i] - c; + R[i] = s + (xt->row_tot[i] + 1.) / 2.; + y = xt->row_tot[i] - c; t = s + y; c = (t - s) - y; s = t; - if (++i == pt->n_rows) + if (++i == n_rows) break; - assert (i < pt->n_rows); + assert (i < n_rows); } } @@ -2558,120 +2388,120 @@ calc_symmetric (struct crosstabs_proc *proc, struct pivot_table *pt, for (;;) { - C[j] = s + (pt->col_tot[j] + 1.) / 2; - y = pt->col_tot[j] - c; + C[j] = s + (xt->col_tot[j] + 1.) / 2; + y = xt->col_tot[j] - c; t = s + y; c = (t - s) - y; s = t; - if (++j == pt->n_cols) + if (++j == n_cols) break; - assert (j < pt->n_cols); + assert (j < n_cols); } } - calc_r (pt, R, C, &v[6], &t[6], &ase[6]); - t[6] = v[6] / t[6]; + calc_r (xt, R, C, &v[6], &t[6], &ase[6]); free (R); free (C); - calc_r (pt, (double *) pt->rows, (double *) pt->cols, &v[7], &t[7], &ase[7]); - t[7] = v[7] / t[7]; + calc_r (xt, (double *) xt->vars[ROW_VAR].values, + (double *) xt->vars[COL_VAR].values, + &v[7], &t[7], &ase[7]); } /* Cohen's kappa. */ - if (proc->statistics & (1u << CRS_ST_KAPPA) && pt->ns_rows == pt->ns_cols) + if (proc->statistics & (1u << CRS_ST_KAPPA) && xt->ns_rows == xt->ns_cols) { + double ase_under_h0; double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci; int i, j; for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0; - i < pt->ns_rows; i++, j++) + i < xt->ns_rows; i++, j++) { double prod, sum; - while (pt->col_tot[j] == 0.) + while (xt->col_tot[j] == 0.) j++; - prod = pt->row_tot[i] * pt->col_tot[j]; - sum = pt->row_tot[i] + pt->col_tot[j]; + prod = xt->row_tot[i] * xt->col_tot[j]; + sum = xt->row_tot[i] + xt->col_tot[j]; - sum_fii += pt->mat[j + i * pt->n_cols]; + sum_fii += xt->mat[j + i * n_cols]; sum_rici += prod; - sum_fiiri_ci += pt->mat[j + i * pt->n_cols] * sum; + sum_fiiri_ci += xt->mat[j + i * n_cols] * sum; sum_riciri_ci += prod * sum; } - for (sum_fijri_ci2 = 0., i = 0; i < pt->ns_rows; i++) - for (j = 0; j < pt->ns_cols; j++) + for (sum_fijri_ci2 = 0., i = 0; i < xt->ns_rows; i++) + for (j = 0; j < xt->ns_cols; j++) { - double sum = pt->row_tot[i] + pt->col_tot[j]; - sum_fijri_ci2 += pt->mat[j + i * pt->n_cols] * sum * sum; + double sum = xt->row_tot[i] + xt->col_tot[j]; + sum_fijri_ci2 += xt->mat[j + i * n_cols] * sum * sum; } - v[8] = (pt->total * sum_fii - sum_rici) / (pow2 (pt->total) - sum_rici); + v[8] = (xt->total * sum_fii - sum_rici) / (pow2 (xt->total) - sum_rici); + + ase_under_h0 = sqrt ((pow2 (xt->total) * sum_rici + + sum_rici * sum_rici + - xt->total * sum_riciri_ci) + / (xt->total * (pow2 (xt->total) - sum_rici) * (pow2 (xt->total) - sum_rici))); - ase[8] = sqrt ((pow2 (pt->total) * sum_rici - + sum_rici * sum_rici - - pt->total * sum_riciri_ci) - / (pt->total * (pow2 (pt->total) - sum_rici) * (pow2 (pt->total) - sum_rici))); -#if 0 - t[8] = v[8] / sqrt (pt->total * (((sum_fii * (pt->total - sum_fii)) - / pow2 (pow2 (pt->total) - sum_rici)) - + ((2. * (pt->total - sum_fii) + ase[8] = sqrt (xt->total * (((sum_fii * (xt->total - sum_fii)) + / pow2 (pow2 (xt->total) - sum_rici)) + + ((2. * (xt->total - sum_fii) * (2. * sum_fii * sum_rici - - pt->total * sum_fiiri_ci)) - / cube (pow2 (pt->total) - sum_rici)) - + (pow2 (pt->total - sum_fii) - * (pt->total * sum_fijri_ci2 - 4. + - xt->total * sum_fiiri_ci)) + / pow3 (pow2 (xt->total) - sum_rici)) + + (pow2 (xt->total - sum_fii) + * (xt->total * sum_fijri_ci2 - 4. * sum_rici * sum_rici) - / pow4 (pow2 (pt->total) - sum_rici)))); -#else - t[8] = v[8] / ase[8]; -#endif + / pow4 (pow2 (xt->total) - sum_rici)))); + + t[8] = v[8] / ase_under_h0; } return 1; } /* Calculate risk estimate. */ -static int -calc_risk (struct pivot_table *pt, - double *value, double *upper, double *lower, union value *c) +static bool +calc_risk (struct crosstabulation *xt, + double *value, double *upper, double *lower, union value *c, + double *n_valid) { + size_t n_cols = xt->vars[COL_VAR].n_values; double f11, f12, f21, f22; double v; - { - int i; - - for (i = 0; i < 3; i++) - value[i] = upper[i] = lower[i] = SYSMIS; - } + for (int i = 0; i < 3; i++) + value[i] = upper[i] = lower[i] = SYSMIS; - if (pt->ns_rows != 2 || pt->ns_cols != 2) - return 0; + if (xt->ns_rows != 2 || xt->ns_cols != 2) + return false; { + /* Find populated columns. */ int nz_cols[2]; - int i, j; - - for (i = j = 0; i < pt->n_cols; i++) - if (pt->col_tot[i] != 0.) - { - nz_cols[j++] = i; - if (j == 2) - break; - } - - assert (j == 2); - - f11 = pt->mat[nz_cols[0]]; - f12 = pt->mat[nz_cols[1]]; - f21 = pt->mat[nz_cols[0] + pt->n_cols]; - f22 = pt->mat[nz_cols[1] + pt->n_cols]; - - c[0] = pt->cols[nz_cols[0]]; - c[1] = pt->cols[nz_cols[1]]; + int n = 0; + FOR_EACH_POPULATED_COLUMN (c, xt) + nz_cols[n++] = c; + assert (n == 2); + + /* Find populated rows. */ + int nz_rows[2]; + n = 0; + FOR_EACH_POPULATED_ROW (r, xt) + nz_rows[n++] = r; + assert (n == 2); + + f11 = xt->mat[nz_cols[0] + n_cols * nz_rows[0]]; + f12 = xt->mat[nz_cols[1] + n_cols * nz_rows[0]]; + f21 = xt->mat[nz_cols[0] + n_cols * nz_rows[1]]; + f22 = xt->mat[nz_cols[1] + n_cols * nz_rows[1]]; + *n_valid = f11 + f12 + f21 + f22; + + c[0] = xt->vars[COL_VAR].values[nz_cols[0]]; + c[1] = xt->vars[COL_VAR].values[nz_cols[1]]; } value[0] = (f11 * f22) / (f12 * f21); @@ -2691,250 +2521,235 @@ calc_risk (struct pivot_table *pt, lower[2] = value[2] * exp (-1.960 * v); upper[2] = value[2] * exp (1.960 * v); - return 1; + return true; } /* Calculate directional measures. */ static int -calc_directional (struct crosstabs_proc *proc, struct pivot_table *pt, +calc_directional (struct crosstabs_proc *proc, struct crosstabulation *xt, double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], - double t[N_DIRECTIONAL]) + double t[N_DIRECTIONAL], double sig[N_DIRECTIONAL]) { - { - int i; - - for (i = 0; i < N_DIRECTIONAL; i++) - v[i] = ase[i] = t[i] = SYSMIS; - } + size_t n_rows = xt->vars[ROW_VAR].n_values; + size_t n_cols = xt->vars[COL_VAR].n_values; + for (int i = 0; i < N_DIRECTIONAL; i++) + v[i] = ase[i] = t[i] = sig[i] = SYSMIS; /* Lambda. */ if (proc->statistics & (1u << CRS_ST_LAMBDA)) { - double *fim = xnmalloc (pt->n_rows, sizeof *fim); - int *fim_index = xnmalloc (pt->n_rows, sizeof *fim_index); - double *fmj = xnmalloc (pt->n_cols, sizeof *fmj); - int *fmj_index = xnmalloc (pt->n_cols, sizeof *fmj_index); - double sum_fim, sum_fmj; - double rm, cm; - int rm_index, cm_index; - int i, j; - /* Find maximum for each row and their sum. */ - for (sum_fim = 0., i = 0; i < pt->n_rows; i++) + double *fim = xnmalloc (n_rows, sizeof *fim); + int *fim_index = xnmalloc (n_rows, sizeof *fim_index); + double sum_fim = 0.0; + for (int i = 0; i < n_rows; i++) { - double max = pt->mat[i * pt->n_cols]; + double max = xt->mat[i * n_cols]; int index = 0; - for (j = 1; j < pt->n_cols; j++) - if (pt->mat[j + i * pt->n_cols] > max) + for (int j = 1; j < n_cols; j++) + if (xt->mat[j + i * n_cols] > max) { - max = pt->mat[j + i * pt->n_cols]; + max = xt->mat[j + i * n_cols]; index = j; } - sum_fim += fim[i] = max; + fim[i] = max; + sum_fim += max; fim_index[i] = index; } /* Find maximum for each column. */ - for (sum_fmj = 0., j = 0; j < pt->n_cols; j++) + double *fmj = xnmalloc (n_cols, sizeof *fmj); + int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index); + double sum_fmj = 0.0; + for (int j = 0; j < n_cols; j++) { - double max = pt->mat[j]; + double max = xt->mat[j]; int index = 0; - for (i = 1; i < pt->n_rows; i++) - if (pt->mat[j + i * pt->n_cols] > max) + for (int i = 1; i < n_rows; i++) + if (xt->mat[j + i * n_cols] > max) { - max = pt->mat[j + i * pt->n_cols]; + max = xt->mat[j + i * n_cols]; index = i; } - sum_fmj += fmj[j] = max; + fmj[j] = max; + sum_fmj += max; fmj_index[j] = index; } /* Find maximum row total. */ - rm = pt->row_tot[0]; - rm_index = 0; - for (i = 1; i < pt->n_rows; i++) - if (pt->row_tot[i] > rm) + double rm = xt->row_tot[0]; + int rm_index = 0; + for (int i = 1; i < n_rows; i++) + if (xt->row_tot[i] > rm) { - rm = pt->row_tot[i]; + rm = xt->row_tot[i]; rm_index = i; } /* Find maximum column total. */ - cm = pt->col_tot[0]; - cm_index = 0; - for (j = 1; j < pt->n_cols; j++) - if (pt->col_tot[j] > cm) + double cm = xt->col_tot[0]; + int cm_index = 0; + for (int j = 1; j < n_cols; j++) + if (xt->col_tot[j] > cm) { - cm = pt->col_tot[j]; + cm = xt->col_tot[j]; cm_index = j; } - v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * pt->total - rm - cm); - v[1] = (sum_fmj - rm) / (pt->total - rm); - v[2] = (sum_fim - cm) / (pt->total - cm); + v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * xt->total - rm - cm); + v[1] = (sum_fmj - rm) / (xt->total - rm); + v[2] = (sum_fim - cm) / (xt->total - cm); - /* ASE1 for Y given PT. */ + /* ASE1 for Y given XT. */ { - double accum; - - for (accum = 0., i = 0; i < pt->n_rows; i++) - for (j = 0; j < pt->n_cols; j++) - { - const int deltaj = j == cm_index; - accum += (pt->mat[j + i * pt->n_cols] - * pow2 ((j == fim_index[i]) - - deltaj - + v[0] * deltaj)); - } - - ase[2] = sqrt (accum - pt->total * v[0]) / (pt->total - cm); + double accum = 0.0; + for (int i = 0; i < n_rows; i++) + if (cm_index == fim_index[i]) + accum += fim[i]; + ase[2] = sqrt ((xt->total - sum_fim) * (sum_fim + cm - 2. * accum) + / pow3 (xt->total - cm)); } - /* ASE0 for Y given PT. */ + /* ASE0 for Y given XT. */ { - double accum; - - for (accum = 0., i = 0; i < pt->n_rows; i++) + double accum = 0.0; + for (int i = 0; i < n_rows; i++) if (cm_index != fim_index[i]) - accum += (pt->mat[i * pt->n_cols + fim_index[i]] - + pt->mat[i * pt->n_cols + cm_index]); - t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / pt->total) / (pt->total - cm)); + accum += (xt->mat[i * n_cols + fim_index[i]] + + xt->mat[i * n_cols + cm_index]); + t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / xt->total) / (xt->total - cm)); } - /* ASE1 for PT given Y. */ + /* ASE1 for XT given Y. */ { - double accum; - - for (accum = 0., i = 0; i < pt->n_rows; i++) - for (j = 0; j < pt->n_cols; j++) - { - const int deltaj = i == rm_index; - accum += (pt->mat[j + i * pt->n_cols] - * pow2 ((i == fmj_index[j]) - - deltaj - + v[0] * deltaj)); - } - - ase[1] = sqrt (accum - pt->total * v[0]) / (pt->total - rm); + double accum = 0.0; + for (int j = 0; j < n_cols; j++) + if (rm_index == fmj_index[j]) + accum += fmj[j]; + ase[1] = sqrt ((xt->total - sum_fmj) * (sum_fmj + rm - 2. * accum) + / pow3 (xt->total - rm)); } - /* ASE0 for PT given Y. */ + /* ASE0 for XT given Y. */ { - double accum; - - for (accum = 0., j = 0; j < pt->n_cols; j++) + double accum = 0.0; + for (int j = 0; j < n_cols; j++) if (rm_index != fmj_index[j]) - accum += (pt->mat[j + pt->n_cols * fmj_index[j]] - + pt->mat[j + pt->n_cols * rm_index]); - t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / pt->total) / (pt->total - rm)); + accum += (xt->mat[j + n_cols * fmj_index[j]] + + xt->mat[j + n_cols * rm_index]); + t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / xt->total) / (xt->total - rm)); } /* Symmetric ASE0 and ASE1. */ { - double accum0; - double accum1; - - for (accum0 = accum1 = 0., i = 0; i < pt->n_rows; i++) - for (j = 0; j < pt->n_cols; j++) + double accum0 = 0.0; + double accum1 = 0.0; + for (int i = 0; i < n_rows; i++) + for (int j = 0; j < n_cols; j++) { int temp0 = (fmj_index[j] == i) + (fim_index[i] == j); int temp1 = (i == rm_index) + (j == cm_index); - accum0 += pt->mat[j + i * pt->n_cols] * pow2 (temp0 - temp1); - accum1 += (pt->mat[j + i * pt->n_cols] + accum0 += xt->mat[j + i * n_cols] * pow2 (temp0 - temp1); + accum1 += (xt->mat[j + i * n_cols] * pow2 (temp0 + (v[0] - 1.) * temp1)); } - ase[0] = sqrt (accum1 - 4. * pt->total * v[0] * v[0]) / (2. * pt->total - rm - cm); - t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / pt->total)) - / (2. * pt->total - rm - cm)); + ase[0] = sqrt (accum1 - 4. * xt->total * v[0] * v[0]) / (2. * xt->total - rm - cm); + t[0] = v[0] / (sqrt (accum0 - pow2 (sum_fim + sum_fmj - cm - rm) / xt->total) + / (2. * xt->total - rm - cm)); } + for (int i = 0; i < 3; i++) + sig[i] = 2 * gsl_cdf_ugaussian_Q (t[i]); + free (fim); free (fim_index); free (fmj); free (fmj_index); + /* Tau. */ { - double sum_fij2_ri, sum_fij2_ci; - double sum_ri2, sum_cj2; - - for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < pt->n_rows; i++) - for (j = 0; j < pt->n_cols; j++) + double sum_fij2_ri = 0.0; + double sum_fij2_ci = 0.0; + FOR_EACH_POPULATED_ROW (i, xt) + FOR_EACH_POPULATED_COLUMN (j, xt) { - double temp = pow2 (pt->mat[j + i * pt->n_cols]); - sum_fij2_ri += temp / pt->row_tot[i]; - sum_fij2_ci += temp / pt->col_tot[j]; + double temp = pow2 (xt->mat[j + i * n_cols]); + sum_fij2_ri += temp / xt->row_tot[i]; + sum_fij2_ci += temp / xt->col_tot[j]; } - for (sum_ri2 = 0., i = 0; i < pt->n_rows; i++) - sum_ri2 += pow2 (pt->row_tot[i]); + double sum_ri2 = 0.0; + for (int i = 0; i < n_rows; i++) + sum_ri2 += pow2 (xt->row_tot[i]); - for (sum_cj2 = 0., j = 0; j < pt->n_cols; j++) - sum_cj2 += pow2 (pt->col_tot[j]); + double sum_cj2 = 0.0; + for (int j = 0; j < n_cols; j++) + sum_cj2 += pow2 (xt->col_tot[j]); - v[3] = (pt->total * sum_fij2_ci - sum_ri2) / (pow2 (pt->total) - sum_ri2); - v[4] = (pt->total * sum_fij2_ri - sum_cj2) / (pow2 (pt->total) - sum_cj2); + v[3] = (xt->total * sum_fij2_ci - sum_ri2) / (pow2 (xt->total) - sum_ri2); + v[4] = (xt->total * sum_fij2_ri - sum_cj2) / (pow2 (xt->total) - sum_cj2); } } if (proc->statistics & (1u << CRS_ST_UC)) { - double UX, UY, UXY, P; - double ase1_yx, ase1_xy, ase1_sym; - int i, j; + double UX = 0.0; + FOR_EACH_POPULATED_ROW (i, xt) + UX -= xt->row_tot[i] / xt->total * log (xt->row_tot[i] / xt->total); - for (UX = 0., i = 0; i < pt->n_rows; i++) - if (pt->row_tot[i] > 0.) - UX -= pt->row_tot[i] / pt->total * log (pt->row_tot[i] / pt->total); + double UY = 0.0; + FOR_EACH_POPULATED_COLUMN (j, xt) + UY -= xt->col_tot[j] / xt->total * log (xt->col_tot[j] / xt->total); - for (UY = 0., j = 0; j < pt->n_cols; j++) - if (pt->col_tot[j] > 0.) - UY -= pt->col_tot[j] / pt->total * log (pt->col_tot[j] / pt->total); - - for (UXY = P = 0., i = 0; i < pt->n_rows; i++) - for (j = 0; j < pt->n_cols; j++) + double UXY = 0.0; + double P = 0.0; + for (int i = 0; i < n_rows; i++) + for (int j = 0; j < n_cols; j++) { - double entry = pt->mat[j + i * pt->n_cols]; + double entry = xt->mat[j + i * n_cols]; if (entry <= 0.) continue; - P += entry * pow2 (log (pt->col_tot[j] * pt->row_tot[i] / (pt->total * entry))); - UXY -= entry / pt->total * log (entry / pt->total); + P += entry * pow2 (log (xt->col_tot[j] * xt->row_tot[i] / (xt->total * entry))); + UXY -= entry / xt->total * log (entry / xt->total); } - for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < pt->n_rows; i++) - for (j = 0; j < pt->n_cols; j++) + double ase1_yx = 0.0; + double ase1_xy = 0.0; + double ase1_sym = 0.0; + for (int i = 0; i < n_rows; i++) + for (int j = 0; j < n_cols; j++) { - double entry = pt->mat[j + i * pt->n_cols]; + double entry = xt->mat[j + i * n_cols]; if (entry <= 0.) continue; - ase1_yx += entry * pow2 (UY * log (entry / pt->row_tot[i]) - + (UX - UXY) * log (pt->col_tot[j] / pt->total)); - ase1_xy += entry * pow2 (UX * log (entry / pt->col_tot[j]) - + (UY - UXY) * log (pt->row_tot[i] / pt->total)); + ase1_yx += entry * pow2 (UY * log (entry / xt->row_tot[i]) + + (UX - UXY) * log (xt->col_tot[j] / xt->total)); + ase1_xy += entry * pow2 (UX * log (entry / xt->col_tot[j]) + + (UY - UXY) * log (xt->row_tot[i] / xt->total)); ase1_sym += entry * pow2 ((UXY - * log (pt->row_tot[i] * pt->col_tot[j] / pow2 (pt->total))) - - (UX + UY) * log (entry / pt->total)); + * log (xt->row_tot[i] * xt->col_tot[j] / pow2 (xt->total))) + - (UX + UY) * log (entry / xt->total)); } v[5] = 2. * ((UX + UY - UXY) / (UX + UY)); - ase[5] = (2. / (pt->total * pow2 (UX + UY))) * sqrt (ase1_sym); - t[5] = v[5] / ((2. / (pt->total * (UX + UY))) - * sqrt (P - pow2 (UX + UY - UXY) / pt->total)); + ase[5] = (2. / (xt->total * pow2 (UX + UY))) * sqrt (ase1_sym); + t[5] = SYSMIS; v[6] = (UX + UY - UXY) / UX; - ase[6] = sqrt (ase1_xy) / (pt->total * UX * UX); - t[6] = v[6] / (sqrt (P - pt->total * pow2 (UX + UY - UXY)) / (pt->total * UX)); + ase[6] = sqrt (ase1_xy) / (xt->total * UX * UX); + t[6] = v[6] / (sqrt (P - xt->total * pow2 (UX + UY - UXY)) / (xt->total * UX)); v[7] = (UX + UY - UXY) / UY; - ase[7] = sqrt (ase1_yx) / (pt->total * UY * UY); - t[7] = v[7] / (sqrt (P - pt->total * pow2 (UX + UY - UXY)) / (pt->total * UY)); + ase[7] = sqrt (ase1_yx) / (xt->total * UY * UY); + t[7] = v[7] / (sqrt (P - xt->total * pow2 (UX + UY - UXY)) / (xt->total * UY)); } /* Somers' D. */ @@ -2947,15 +2762,15 @@ calc_directional (struct crosstabs_proc *proc, struct pivot_table *pt, double somers_d_ase[3]; double somers_d_t[3]; - if (calc_symmetric (proc, pt, v_dummy, ase_dummy, t_dummy, + if (calc_symmetric (proc, xt, v_dummy, ase_dummy, t_dummy, somers_d_v, somers_d_ase, somers_d_t)) { - int i; - for (i = 0; i < 3; i++) + for (int i = 0; i < 3; i++) { v[8 + i] = somers_d_v[i]; ase[8 + i] = somers_d_ase[i]; t[8 + i] = somers_d_t[i]; + sig[8 + i] = 2 * gsl_cdf_ugaussian_Q (fabs (somers_d_t[i])); } } } @@ -2963,59 +2778,58 @@ calc_directional (struct crosstabs_proc *proc, struct pivot_table *pt, /* Eta. */ if (proc->statistics & (1u << CRS_ST_ETA)) { - { - double sum_Xr, sum_X2r; - double SX, SXW; - int i, j; - - for (sum_Xr = sum_X2r = 0., i = 0; i < pt->n_rows; i++) - { - sum_Xr += pt->rows[i].f * pt->row_tot[i]; - sum_X2r += pow2 (pt->rows[i].f) * pt->row_tot[i]; - } - SX = sum_X2r - pow2 (sum_Xr) / pt->total; - - for (SXW = 0., j = 0; j < pt->n_cols; j++) - { - double cum; - - for (cum = 0., i = 0; i < pt->n_rows; i++) - { - SXW += pow2 (pt->rows[i].f) * pt->mat[j + i * pt->n_cols]; - cum += pt->rows[i].f * pt->mat[j + i * pt->n_cols]; - } + /* X dependent. */ + double sum_Xr = 0.0; + double sum_X2r = 0.0; + for (int i = 0; i < n_rows; i++) + { + sum_Xr += xt->vars[ROW_VAR].values[i].f * xt->row_tot[i]; + sum_X2r += pow2 (xt->vars[ROW_VAR].values[i].f) * xt->row_tot[i]; + } + double SX = sum_X2r - pow2 (sum_Xr) / xt->total; - SXW -= cum * cum / pt->col_tot[j]; - } - v[11] = sqrt (1. - SXW / SX); - } + double SXW = 0.0; + FOR_EACH_POPULATED_COLUMN (j, xt) + { + double cum = 0.0; - { - double sum_Yc, sum_Y2c; - double SY, SYW; - int i, j; + for (int i = 0; i < n_rows; i++) + { + SXW += (pow2 (xt->vars[ROW_VAR].values[i].f) + * xt->mat[j + i * n_cols]); + cum += (xt->vars[ROW_VAR].values[i].f + * xt->mat[j + i * n_cols]); + } - for (sum_Yc = sum_Y2c = 0., i = 0; i < pt->n_cols; i++) - { - sum_Yc += pt->cols[i].f * pt->col_tot[i]; - sum_Y2c += pow2 (pt->cols[i].f) * pt->col_tot[i]; - } - SY = sum_Y2c - sum_Yc * sum_Yc / pt->total; + SXW -= cum * cum / xt->col_tot[j]; + } + v[11] = sqrt (1. - SXW / SX); - for (SYW = 0., i = 0; i < pt->n_rows; i++) - { - double cum; + /* Y dependent. */ + double sum_Yc = 0.0; + double sum_Y2c = 0.0; + for (int i = 0; i < n_cols; i++) + { + sum_Yc += xt->vars[COL_VAR].values[i].f * xt->col_tot[i]; + sum_Y2c += pow2 (xt->vars[COL_VAR].values[i].f) * xt->col_tot[i]; + } + double SY = sum_Y2c - pow2 (sum_Yc) / xt->total; - for (cum = 0., j = 0; j < pt->n_cols; j++) - { - SYW += pow2 (pt->cols[j].f) * pt->mat[j + i * pt->n_cols]; - cum += pt->cols[j].f * pt->mat[j + i * pt->n_cols]; - } + double SYW = 0.0; + FOR_EACH_POPULATED_ROW (i, xt) + { + double cum = 0.0; + for (int j = 0; j < n_cols; j++) + { + SYW += (pow2 (xt->vars[COL_VAR].values[j].f) + * xt->mat[j + i * n_cols]); + cum += (xt->vars[COL_VAR].values[j].f + * xt->mat[j + i * n_cols]); + } - SYW -= cum * cum / pt->row_tot[i]; - } - v[12] = sqrt (1. - SYW / SY); - } + SYW -= cum * cum / xt->row_tot[i]; + } + v[12] = sqrt (1. - SYW / SY); } return 1;