1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
35 #include <gsl/gsl_cdf.h>
39 #include <data/case.h>
40 #include <data/data-out.h>
41 #include <data/dictionary.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/alloc.h>
50 #include <libpspp/array.h>
51 #include <libpspp/assertion.h>
52 #include <libpspp/compiler.h>
53 #include <libpspp/hash.h>
54 #include <libpspp/magic.h>
55 #include <libpspp/message.h>
56 #include <libpspp/message.h>
57 #include <libpspp/misc.h>
58 #include <libpspp/pool.h>
59 #include <libpspp/str.h>
60 #include <output/output.h>
61 #include <output/table.h>
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) msgid
75 missing=miss:!table/include/report;
76 +write[wr_]=none,cells,all;
77 +format=fmt:!labels/nolabels/novallabs,
80 tabl:!tables/notables,
83 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
85 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
91 /* Number of chi-square statistics. */
94 /* Number of symmetric statistics. */
97 /* Number of directional statistics. */
98 #define N_DIRECTIONAL 13
100 /* A single table entry for general mode. */
103 int table; /* Flattened table number. */
106 double freq; /* Frequency count. */
107 double *data; /* Crosstabulation table for integer mode. */
110 union value values[1]; /* Values. */
113 /* A crosstabulation. */
116 int nvar; /* Number of variables. */
117 double missing; /* Missing cases count. */
118 int ofs; /* Integer mode: Offset into sorted_tab[]. */
119 struct variable *vars[2]; /* At least two variables; sorted by
120 larger indices first. */
123 /* Integer mode variable info. */
126 int min; /* Minimum value. */
127 int max; /* Maximum value + 1. */
128 int count; /* max - min. */
131 static inline struct var_range *
132 get_var_range (struct variable *v)
135 assert (v->aux != NULL);
139 /* Indexes into crosstab.v. */
146 /* General mode crosstabulation table. */
147 static struct hsh_table *gen_tab; /* Hash table. */
148 static int n_sorted_tab; /* Number of entries in sorted_tab. */
149 static struct table_entry **sorted_tab; /* Sorted table. */
151 /* Variables specifies on VARIABLES. */
152 static struct variable **variables;
153 static size_t variables_cnt;
156 static struct crosstab **xtab;
159 /* Integer or general mode? */
168 static int num_cells; /* Number of cells requested. */
169 static int cells[8]; /* Cells requested. */
172 static int write; /* One of WR_* that specifies the WRITE style. */
174 /* Command parsing info. */
175 static struct cmd_crosstabs cmd;
178 static struct pool *pl_tc; /* For table cells. */
179 static struct pool *pl_col; /* For column data. */
181 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
182 static void precalc (const struct ccase *, void *, const struct dataset *);
183 static bool calc_general (const struct ccase *, void *, const struct dataset *);
184 static bool calc_integer (const struct ccase *, void *, const struct dataset *);
185 static bool postcalc (void *, const struct dataset *);
186 static void submit (struct tab_table *);
188 static void format_short (char *s, const struct fmt_spec *fp,
189 const union value *v);
191 /* Parse and execute CROSSTABS, then clean up. */
193 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
195 int result = internal_cmd_crosstabs (lexer, ds);
198 pool_destroy (pl_tc);
199 pool_destroy (pl_col);
204 /* Parses and executes the CROSSTABS procedure. */
206 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
215 pl_tc = pool_create ();
216 pl_col = pool_create ();
218 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
221 mode = variables ? INTEGER : GENERAL;
226 cmd.a_cells[CRS_CL_COUNT] = 1;
232 for (i = 0; i < CRS_CL_count; i++)
237 cmd.a_cells[CRS_CL_COUNT] = 1;
238 cmd.a_cells[CRS_CL_ROW] = 1;
239 cmd.a_cells[CRS_CL_COLUMN] = 1;
240 cmd.a_cells[CRS_CL_TOTAL] = 1;
242 if (cmd.a_cells[CRS_CL_ALL])
244 for (i = 0; i < CRS_CL_count; i++)
246 cmd.a_cells[CRS_CL_ALL] = 0;
248 cmd.a_cells[CRS_CL_NONE] = 0;
250 for (num_cells = i = 0; i < CRS_CL_count; i++)
252 cells[num_cells++] = i;
255 if (cmd.sbc_statistics)
260 for (i = 0; i < CRS_ST_count; i++)
261 if (cmd.a_statistics[i])
264 cmd.a_statistics[CRS_ST_CHISQ] = 1;
265 if (cmd.a_statistics[CRS_ST_ALL])
266 for (i = 0; i < CRS_ST_count; i++)
267 cmd.a_statistics[i] = 1;
271 if (cmd.miss == CRS_REPORT && mode == GENERAL)
273 msg (SE, _("Missing mode REPORT not allowed in general mode. "
274 "Assuming MISSING=TABLE."));
275 cmd.miss = CRS_TABLE;
279 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
280 cmd.a_write[CRS_WR_ALL] = 0;
281 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
283 msg (SE, _("Write mode ALL not allowed in general mode. "
284 "Assuming WRITE=CELLS."));
285 cmd.a_write[CRS_WR_CELLS] = 1;
288 && (cmd.a_write[CRS_WR_NONE]
289 + cmd.a_write[CRS_WR_ALL]
290 + cmd.a_write[CRS_WR_CELLS] == 0))
291 cmd.a_write[CRS_WR_CELLS] = 1;
292 if (cmd.a_write[CRS_WR_CELLS])
293 write = CRS_WR_CELLS;
294 else if (cmd.a_write[CRS_WR_ALL])
299 ok = procedure_with_splits (ds, precalc,
300 mode == GENERAL ? calc_general : calc_integer,
303 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
306 /* Parses the TABLES subcommand. */
308 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
310 struct var_set *var_set;
312 struct variable ***by = NULL;
313 size_t *by_nvar = NULL;
317 /* Ensure that this is a TABLES subcommand. */
318 if (!lex_match_id (lexer, "TABLES")
319 && (lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
320 && lex_token (lexer) != T_ALL)
322 lex_match (lexer, '=');
324 if (variables != NULL)
325 var_set = var_set_create_from_array (variables, variables_cnt);
327 var_set = var_set_create_from_dict (dataset_dict (ds));
328 assert (var_set != NULL);
332 by = xnrealloc (by, n_by + 1, sizeof *by);
333 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
334 if (!parse_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
335 PV_NO_DUPLICATE | PV_NO_SCRATCH))
337 if (xalloc_oversized (nx, by_nvar[n_by]))
339 msg (SE, _("Too many crosstabulation variables or dimensions."));
345 if (!lex_match (lexer, T_BY))
349 lex_error (lexer, _("expecting BY"));
358 int *by_iter = xcalloc (n_by, sizeof *by_iter);
361 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
362 for (i = 0; i < nx; i++)
366 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
373 for (i = 0; i < n_by; i++)
374 x->vars[i] = by[i][by_iter[i]];
380 for (i = n_by - 1; i >= 0; i--)
382 if (++by_iter[i] < by_nvar[i])
395 /* All return paths lead here. */
399 for (i = 0; i < n_by; i++)
405 var_set_destroy (var_set);
410 /* Parses the VARIABLES subcommand. */
412 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
416 msg (SE, _("VARIABLES must be specified before TABLES."));
420 lex_match (lexer, '=');
424 size_t orig_nv = variables_cnt;
429 if (!parse_variables (lexer, dataset_dict (ds),
430 &variables, &variables_cnt,
431 (PV_APPEND | PV_NUMERIC
432 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
435 if (lex_token (lexer) != '(')
437 lex_error (lexer, "expecting `('");
442 if (!lex_force_int (lexer))
444 min = lex_integer (lexer);
447 lex_match (lexer, ',');
449 if (!lex_force_int (lexer))
451 max = lex_integer (lexer);
454 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
460 if (lex_token (lexer) != ')')
462 lex_error (lexer, "expecting `)'");
467 for (i = orig_nv; i < variables_cnt; i++)
469 struct var_range *vr = xmalloc (sizeof *vr);
472 vr->count = max - min + 1;
473 var_attach_aux (variables[i], vr, var_dtor_free);
476 if (lex_token (lexer) == '/')
488 /* Data file processing. */
490 static int compare_table_entry (const void *, const void *, const void *);
491 static unsigned hash_table_entry (const void *, const void *);
493 /* Set up the crosstabulation tables for processing. */
495 precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
497 output_split_file_values (ds, first);
500 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
510 for (i = 0; i < nxtab; i++)
512 struct crosstab *x = xtab[i];
517 x->ofs = n_sorted_tab;
519 for (j = 2; j < x->nvar; j++)
520 count *= get_var_range (x->vars[j - 2])->count;
522 sorted_tab = xnrealloc (sorted_tab,
523 n_sorted_tab + count, sizeof *sorted_tab);
524 v = local_alloc (sizeof *v * x->nvar);
525 for (j = 2; j < x->nvar; j++)
526 v[j] = get_var_range (x->vars[j])->min;
527 for (j = 0; j < count; j++)
529 struct table_entry *te;
532 te = sorted_tab[n_sorted_tab++]
533 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
537 int row_cnt = get_var_range (x->vars[0])->count;
538 int col_cnt = get_var_range (x->vars[1])->count;
539 const int mat_size = row_cnt * col_cnt;
542 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
543 for (m = 0; m < mat_size; m++)
547 for (k = 2; k < x->nvar; k++)
548 te->values[k].f = v[k];
549 for (k = 2; k < x->nvar; k++)
551 struct var_range *vr = get_var_range (x->vars[k]);
552 if (++v[k] >= vr->max)
561 sorted_tab = xnrealloc (sorted_tab,
562 n_sorted_tab + 1, sizeof *sorted_tab);
563 sorted_tab[n_sorted_tab] = NULL;
568 /* Form crosstabulations for general mode. */
570 calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
572 bool bad_warn = true;
575 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
577 /* Flattened current table index. */
580 for (t = 0; t < nxtab; t++)
582 struct crosstab *x = xtab[t];
583 const size_t entry_size = (sizeof (struct table_entry)
584 + sizeof (union value) * (x->nvar - 1));
585 struct table_entry *te = local_alloc (entry_size);
587 /* Construct table entry for the current record and table. */
593 for (j = 0; j < x->nvar; j++)
595 const union value *v = case_data (c, x->vars[j]->fv);
596 if ((cmd.miss == CRS_TABLE && var_is_value_missing (x->vars[j], v))
597 || (cmd.miss == CRS_INCLUDE
598 && var_is_value_system_missing (x->vars[j], v)))
600 x->missing += weight;
604 if (var_is_numeric (x->vars[j]))
605 te->values[j].f = case_num (c, x->vars[j]->fv);
608 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
609 var_get_width (x->vars[j]));
611 /* Necessary in order to simplify comparisons. */
612 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
613 sizeof (union value) - var_get_width (x->vars[j]));
618 /* Add record to hash table. */
620 struct table_entry **tepp
621 = (struct table_entry **) hsh_probe (gen_tab, te);
624 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
627 memcpy (tep, te, entry_size);
632 (*tepp)->u.freq += weight;
643 calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
645 bool bad_warn = true;
648 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
650 /* Flattened current table index. */
653 for (t = 0; t < nxtab; t++)
655 struct crosstab *x = xtab[t];
660 for (i = 0; i < x->nvar; i++)
662 struct variable *const v = x->vars[i];
663 struct var_range *vr = get_var_range (v);
664 double value = case_num (c, v->fv);
666 /* Note that the first test also rules out SYSMIS. */
667 if ((value < vr->min || value >= vr->max)
668 || (cmd.miss == CRS_TABLE && var_is_num_user_missing (v, value)))
670 x->missing += weight;
676 ofs += fact * ((int) value - vr->min);
682 struct variable *row_var = x->vars[ROW_VAR];
683 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
685 struct variable *col_var = x->vars[COL_VAR];
686 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
688 const int col_dim = get_var_range (col_var)->count;
690 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
699 /* Compare the table_entry's at A and B and return a strcmp()-type
702 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
704 const struct table_entry *a = a_;
705 const struct table_entry *b = b_;
707 if (a->table > b->table)
709 else if (a->table < b->table)
713 const struct crosstab *x = xtab[a->table];
716 for (i = x->nvar - 1; i >= 0; i--)
717 if (var_is_numeric (x->vars[i]))
719 const double diffnum = a->values[i].f - b->values[i].f;
722 else if (diffnum > 0)
727 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
728 var_get_width (x->vars[i]));
737 /* Calculate a hash value from table_entry A. */
739 hash_table_entry (const void *a_, const void *aux UNUSED)
741 const struct table_entry *a = a_;
746 for (i = 0; i < xtab[a->table]->nvar; i++)
747 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
752 /* Post-data reading calculations. */
754 static struct table_entry **find_pivot_extent (struct table_entry **,
755 int *cnt, int pivot);
756 static void enum_var_values (struct table_entry **entries, int entry_cnt,
758 union value **values, int *value_cnt);
759 static void output_pivot_table (struct table_entry **, struct table_entry **,
760 double **, double **, double **,
761 int *, int *, int *);
762 static void make_summary_table (void);
765 postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
769 n_sorted_tab = hsh_count (gen_tab);
770 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
773 make_summary_table ();
775 /* Identify all the individual crosstabulation tables, and deal with
778 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
779 int pc = n_sorted_tab; /* Pivot count. */
781 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
782 int maxrows = 0, maxcols = 0, maxcells = 0;
786 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
790 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
791 &maxrows, &maxcols, &maxcells);
800 hsh_destroy (gen_tab);
805 static void insert_summary (struct tab_table *, int tab_index, double valid);
807 /* Output a table summarizing the cases processed. */
809 make_summary_table (void)
811 struct tab_table *summary;
813 struct table_entry **pb = sorted_tab, **pe;
814 int pc = n_sorted_tab;
817 summary = tab_create (7, 3 + nxtab, 1);
818 tab_title (summary, _("Summary."));
819 tab_headers (summary, 1, 0, 3, 0);
820 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
821 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
822 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
823 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
824 tab_hline (summary, TAL_1, 1, 6, 1);
825 tab_hline (summary, TAL_1, 1, 6, 2);
826 tab_vline (summary, TAL_1, 3, 1, 1);
827 tab_vline (summary, TAL_1, 5, 1, 1);
831 for (i = 0; i < 3; i++)
833 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
834 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
837 tab_offset (summary, 0, 3);
843 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
847 while (cur_tab < (*pb)->table)
848 insert_summary (summary, cur_tab++, 0.);
851 for (valid = 0.; pb < pe; pb++)
852 valid += (*pb)->u.freq;
855 const struct crosstab *const x = xtab[(*pb)->table];
856 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
857 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
858 const int count = n_cols * n_rows;
860 for (valid = 0.; pb < pe; pb++)
862 const double *data = (*pb)->u.data;
865 for (i = 0; i < count; i++)
869 insert_summary (summary, cur_tab++, valid);
874 while (cur_tab < nxtab)
875 insert_summary (summary, cur_tab++, 0.);
880 /* Inserts a line into T describing the crosstabulation at index
881 TAB_INDEX, which has VALID valid observations. */
883 insert_summary (struct tab_table *t, int tab_index, double valid)
885 struct crosstab *x = xtab[tab_index];
887 tab_hline (t, TAL_1, 0, 6, 0);
889 /* Crosstabulation name. */
891 char *buf = local_alloc (128 * x->nvar);
895 for (i = 0; i < x->nvar; i++)
898 cp = stpcpy (cp, " * ");
900 cp = stpcpy (cp, var_to_string (x->vars[i]));
902 tab_text (t, 0, 0, TAB_LEFT, buf);
907 /* Counts and percentages. */
917 for (i = 0; i < 3; i++)
919 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
920 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
931 static struct tab_table *table; /* Crosstabulation table. */
932 static struct tab_table *chisq; /* Chi-square table. */
933 static struct tab_table *sym; /* Symmetric measures table. */
934 static struct tab_table *risk; /* Risk estimate table. */
935 static struct tab_table *direct; /* Directional measures table. */
938 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
940 /* Column values, number of columns. */
941 static union value *cols;
944 /* Row values, number of rows. */
945 static union value *rows;
948 /* Number of statistically interesting columns/rows (columns/rows with
950 static int ns_cols, ns_rows;
952 /* Crosstabulation. */
953 static const struct crosstab *x;
955 /* Number of variables from the crosstabulation to consider. This is
956 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
959 /* Matrix contents. */
960 static double *mat; /* Matrix proper. */
961 static double *row_tot; /* Row totals. */
962 static double *col_tot; /* Column totals. */
963 static double W; /* Grand total. */
965 static void display_dimensions (struct tab_table *, int first_difference,
966 struct table_entry *);
967 static void display_crosstabulation (void);
968 static void display_chisq (void);
969 static void display_symmetric (void);
970 static void display_risk (void);
971 static void display_directional (void);
972 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
973 static void table_value_missing (struct tab_table *table, int c, int r,
974 unsigned char opt, const union value *v,
975 const struct variable *var);
976 static void delete_missing (void);
978 /* Output pivot table beginning at PB and continuing until PE,
979 exclusive. For efficiency, *MATP is a pointer to a matrix that can
980 hold *MAXROWS entries. */
982 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
983 double **matp, double **row_totp, double **col_totp,
984 int *maxrows, int *maxcols, int *maxcells)
987 struct table_entry **tb = pb, **te; /* Table begin, table end. */
988 int tc = pe - pb; /* Table count. */
990 /* Table entry for header comparison. */
991 struct table_entry *cmp = NULL;
993 x = xtab[(*pb)->table];
994 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
996 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
998 /* Crosstabulation table initialization. */
1001 table = tab_create (nvar + n_cols,
1002 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1003 tab_headers (table, nvar - 1, 0, 2, 0);
1005 /* First header line. */
1006 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1007 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1009 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1011 /* Second header line. */
1015 for (i = 2; i < nvar; i++)
1016 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1017 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1018 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1019 var_get_name (x->vars[ROW_VAR]));
1020 for (i = 0; i < n_cols; i++)
1021 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1023 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1026 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1027 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1031 char *title = local_alloc (x->nvar * 64 + 128);
1035 if (cmd.pivot == CRS_PIVOT)
1036 for (i = 0; i < nvar; i++)
1039 cp = stpcpy (cp, " by ");
1040 cp = stpcpy (cp, var_get_name (x->vars[i]));
1044 cp = spprintf (cp, "%s by %s for",
1045 var_get_name (x->vars[0]),
1046 var_get_name (x->vars[1]));
1047 for (i = 2; i < nvar; i++)
1049 char buf[64], *bufp;
1054 cp = stpcpy (cp, var_get_name (x->vars[i]));
1056 format_short (buf, var_get_print_format (x->vars[i]),
1058 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1060 cp = stpcpy (cp, bufp);
1064 cp = stpcpy (cp, " [");
1065 for (i = 0; i < num_cells; i++)
1073 static const struct tuple cell_names[] =
1075 {CRS_CL_COUNT, N_("count")},
1076 {CRS_CL_ROW, N_("row %")},
1077 {CRS_CL_COLUMN, N_("column %")},
1078 {CRS_CL_TOTAL, N_("total %")},
1079 {CRS_CL_EXPECTED, N_("expected")},
1080 {CRS_CL_RESIDUAL, N_("residual")},
1081 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1082 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1086 const struct tuple *t;
1088 for (t = cell_names; t->value != cells[i]; t++)
1089 assert (t->value != -1);
1091 cp = stpcpy (cp, ", ");
1092 cp = stpcpy (cp, gettext (t->name));
1096 tab_title (table, "%s", title);
1100 tab_offset (table, 0, 2);
1105 /* Chi-square table initialization. */
1106 if (cmd.a_statistics[CRS_ST_CHISQ])
1108 chisq = tab_create (6 + (nvar - 2),
1109 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1110 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1112 tab_title (chisq, _("Chi-square tests."));
1114 tab_offset (chisq, nvar - 2, 0);
1115 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1116 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1117 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1118 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1119 _("Asymp. Sig. (2-sided)"));
1120 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1121 _("Exact. Sig. (2-sided)"));
1122 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1123 _("Exact. Sig. (1-sided)"));
1125 tab_offset (chisq, 0, 1);
1130 /* Symmetric measures. */
1131 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1132 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1133 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1134 || cmd.a_statistics[CRS_ST_KAPPA])
1136 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1137 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1138 tab_title (sym, _("Symmetric measures."));
1140 tab_offset (sym, nvar - 2, 0);
1141 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1142 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1143 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1144 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1145 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1146 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1147 tab_offset (sym, 0, 1);
1152 /* Risk estimate. */
1153 if (cmd.a_statistics[CRS_ST_RISK])
1155 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1156 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1157 tab_title (risk, _("Risk estimate."));
1159 tab_offset (risk, nvar - 2, 0);
1160 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1161 _("95%% Confidence Interval"));
1162 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1163 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1164 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1165 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1166 tab_hline (risk, TAL_1, 2, 3, 1);
1167 tab_vline (risk, TAL_1, 2, 0, 1);
1168 tab_offset (risk, 0, 2);
1173 /* Directional measures. */
1174 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1175 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1177 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1178 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1179 tab_title (direct, _("Directional measures."));
1181 tab_offset (direct, nvar - 2, 0);
1182 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1183 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1184 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1185 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1186 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1187 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1188 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1189 tab_offset (direct, 0, 1);
1196 /* Find pivot subtable if applicable. */
1197 te = find_pivot_extent (tb, &tc, 0);
1201 /* Find all the row variable values. */
1202 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1204 /* Allocate memory space for the column and row totals. */
1205 if (n_rows > *maxrows)
1207 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1208 row_tot = *row_totp;
1211 if (n_cols > *maxcols)
1213 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1214 col_tot = *col_totp;
1218 /* Allocate table space for the matrix. */
1219 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1220 tab_realloc (table, -1,
1221 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1222 tab_nr (table) * (pe - pb) / (te - tb)));
1224 if (mode == GENERAL)
1226 /* Allocate memory space for the matrix. */
1227 if (n_cols * n_rows > *maxcells)
1229 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1230 *maxcells = n_cols * n_rows;
1235 /* Build the matrix and calculate column totals. */
1237 union value *cur_col = cols;
1238 union value *cur_row = rows;
1240 double *cp = col_tot;
1241 struct table_entry **p;
1244 for (p = &tb[0]; p < te; p++)
1246 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1250 for (; cur_row < &rows[n_rows]; cur_row++)
1256 mp = &mat[cur_col - cols];
1259 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1266 *cp += *mp = (*p)->u.freq;
1271 /* Zero out the rest of the matrix. */
1272 for (; cur_row < &rows[n_rows]; cur_row++)
1278 if (cur_col < &cols[n_cols])
1280 const int rem_cols = n_cols - (cur_col - cols);
1283 for (c = 0; c < rem_cols; c++)
1285 mp = &mat[cur_col - cols];
1286 for (r = 0; r < n_rows; r++)
1288 for (c = 0; c < rem_cols; c++)
1290 mp += n_cols - rem_cols;
1298 double *tp = col_tot;
1300 assert (mode == INTEGER);
1301 mat = (*tb)->u.data;
1304 /* Calculate column totals. */
1305 for (c = 0; c < n_cols; c++)
1308 double *cp = &mat[c];
1310 for (r = 0; r < n_rows; r++)
1311 cum += cp[r * n_cols];
1319 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1320 ns_cols += *cp != 0.;
1323 /* Calculate row totals. */
1326 double *rp = row_tot;
1329 for (ns_rows = 0, r = n_rows; r--; )
1332 for (c = n_cols; c--; )
1340 /* Calculate grand total. */
1346 if (n_rows < n_cols)
1347 tp = row_tot, n = n_rows;
1349 tp = col_tot, n = n_cols;
1355 /* Find the first variable that differs from the last subtable,
1356 then display the values of the dimensioning variables for
1357 each table that needs it. */
1359 int first_difference = nvar - 1;
1362 for (; ; first_difference--)
1364 assert (first_difference >= 2);
1365 if (memcmp (&cmp->values[first_difference],
1366 &(*tb)->values[first_difference],
1367 sizeof *cmp->values))
1373 display_dimensions (table, first_difference, *tb);
1375 display_dimensions (chisq, first_difference, *tb);
1377 display_dimensions (sym, first_difference, *tb);
1379 display_dimensions (risk, first_difference, *tb);
1381 display_dimensions (direct, first_difference, *tb);
1385 display_crosstabulation ();
1386 if (cmd.miss == CRS_REPORT)
1391 display_symmetric ();
1395 display_directional ();
1406 tab_resize (chisq, 4 + (nvar - 2), -1);
1417 /* Delete missing rows and columns for statistical analysis when
1420 delete_missing (void)
1425 for (r = 0; r < n_rows; r++)
1426 if (var_is_num_user_missing (x->vars[ROW_VAR], rows[r].f))
1430 for (c = 0; c < n_cols; c++)
1431 mat[c + r * n_cols] = 0.;
1439 for (c = 0; c < n_cols; c++)
1440 if (var_is_num_user_missing (x->vars[COL_VAR], cols[c].f))
1444 for (r = 0; r < n_rows; r++)
1445 mat[c + r * n_cols] = 0.;
1451 /* Prepare table T for submission, and submit it. */
1453 submit (struct tab_table *t)
1460 tab_resize (t, -1, 0);
1461 if (tab_nr (t) == tab_t (t))
1466 tab_offset (t, 0, 0);
1468 for (i = 2; i < nvar; i++)
1469 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1470 var_to_string (x->vars[i]));
1471 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1472 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1474 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1476 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1477 tab_dim (t, crosstabs_dim);
1481 /* Sets the widths of all the columns and heights of all the rows in
1482 table T for driver D. */
1484 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1488 /* Width of a numerical column. */
1489 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1490 if (cmd.miss == CRS_REPORT)
1491 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1493 /* Set width for header columns. */
1499 w = d->width - c * (t->nc - t->l);
1500 for (i = 0; i <= t->nc; i++)
1504 if (w < d->prop_em_width * 8)
1505 w = d->prop_em_width * 8;
1507 if (w > d->prop_em_width * 15)
1508 w = d->prop_em_width * 15;
1510 for (i = 0; i < t->l; i++)
1514 for (i = t->l; i < t->nc; i++)
1517 for (i = 0; i < t->nr; i++)
1518 t->h[i] = tab_natural_height (t, d, i);
1521 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1522 int *cnt, int pivot);
1523 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1524 int *cnt, int pivot);
1526 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1528 static struct table_entry **
1529 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1531 return (mode == GENERAL
1532 ? find_pivot_extent_general (tp, cnt, pivot)
1533 : find_pivot_extent_integer (tp, cnt, pivot));
1536 /* Find the extent of a region in TP that contains one table. If
1537 PIVOT != 0 that means a set of table entries with identical table
1538 number; otherwise they also have to have the same values for every
1539 dimension after the row and column dimensions. The table that is
1540 searched starts at TP and has length CNT. Returns the first entry
1541 after the last one in the table; sets *CNT to the number of
1542 remaining values. If there are no entries in TP at all, returns
1543 NULL. A yucky interface, admittedly, but it works. */
1544 static struct table_entry **
1545 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1547 struct table_entry *fp = *tp;
1552 x = xtab[(*tp)->table];
1560 if ((*tp)->table != fp->table)
1565 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1572 /* Integer mode correspondent to find_pivot_extent_general(). This
1573 could be optimized somewhat, but I just don't give a crap about
1574 CROSSTABS performance in integer mode, which is just a
1575 CROSSTABS wart as far as I'm concerned.
1577 That said, feel free to send optimization patches to me. */
1578 static struct table_entry **
1579 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1581 struct table_entry *fp = *tp;
1586 x = xtab[(*tp)->table];
1594 if ((*tp)->table != fp->table)
1599 if (memcmp (&(*tp)->values[2], &fp->values[2],
1600 sizeof (union value) * (x->nvar - 2)))
1607 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1608 result. WIDTH_ points to an int which is either 0 for a
1609 numeric value or a string width for a string value. */
1611 compare_value (const void *a_, const void *b_, const void *width_)
1613 const union value *a = a_;
1614 const union value *b = b_;
1615 const int *pwidth = width_;
1616 const int width = *pwidth;
1619 return (a->f < b->f) ? -1 : (a->f > b->f);
1621 return strncmp (a->s, b->s, width);
1624 /* Given an array of ENTRY_CNT table_entry structures starting at
1625 ENTRIES, creates a sorted list of the values that the variable
1626 with index VAR_IDX takes on. The values are returned as a
1627 malloc()'darray stored in *VALUES, with the number of values
1628 stored in *VALUE_CNT.
1631 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1632 union value **values, int *value_cnt)
1634 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1636 if (mode == GENERAL)
1638 int width = var_get_width (v);
1641 *values = xnmalloc (entry_cnt, sizeof **values);
1642 for (i = 0; i < entry_cnt; i++)
1643 (*values)[i] = entries[i]->values[var_idx];
1644 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1645 compare_value, &width);
1649 struct var_range *vr = get_var_range (v);
1652 assert (mode == INTEGER);
1653 *values = xnmalloc (vr->count, sizeof **values);
1654 for (i = 0; i < vr->count; i++)
1655 (*values)[i].f = i + vr->min;
1656 *value_cnt = vr->count;
1660 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1661 from V, displayed with print format spec from variable VAR. When
1662 in REPORT missing-value mode, missing values have an M appended. */
1664 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1665 const union value *v, const struct variable *var)
1668 const struct fmt_spec *print = var_get_print_format (var);
1670 const char *label = val_labs_find (var->val_labs, *v);
1673 tab_text (table, c, r, TAB_LEFT, label);
1677 s.string = tab_alloc (table, print->w);
1678 format_short (s.string, print, v);
1679 s.length = strlen (s.string);
1680 if (cmd.miss == CRS_REPORT && var_is_num_user_missing (var, v->f))
1681 s.string[s.length++] = 'M';
1682 while (s.length && *s.string == ' ')
1687 tab_raw (table, c, r, opt, &s);
1690 /* Draws a line across TABLE at the current row to indicate the most
1691 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1692 that changed, and puts the values that changed into the table. TB
1693 and X must be the corresponding table_entry and crosstab,
1696 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1698 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1700 for (; first_difference >= 2; first_difference--)
1701 table_value_missing (table, nvar - first_difference - 1, 0,
1702 TAB_RIGHT, &tb->values[first_difference],
1703 x->vars[first_difference]);
1706 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1707 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1708 additionally suffixed with a letter `M'. */
1710 format_cell_entry (struct tab_table *table, int c, int r, double value,
1711 char suffix, bool mark_missing)
1713 const struct fmt_spec f = {FMT_F, 10, 1};
1718 s.string = tab_alloc (table, 16);
1720 data_out (&v, &f, s.string);
1721 while (*s.string == ' ')
1727 s.string[s.length++] = suffix;
1729 s.string[s.length++] = 'M';
1731 tab_raw (table, c, r, TAB_RIGHT, &s);
1734 /* Displays the crosstabulation table. */
1736 display_crosstabulation (void)
1741 for (r = 0; r < n_rows; r++)
1742 table_value_missing (table, nvar - 2, r * num_cells,
1743 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1745 tab_text (table, nvar - 2, n_rows * num_cells,
1746 TAB_LEFT, _("Total"));
1748 /* Put in the actual cells. */
1753 tab_offset (table, nvar - 1, -1);
1754 for (r = 0; r < n_rows; r++)
1757 tab_hline (table, TAL_1, -1, n_cols, 0);
1758 for (c = 0; c < n_cols; c++)
1760 bool mark_missing = false;
1761 double expected_value = row_tot[r] * col_tot[c] / W;
1762 if (cmd.miss == CRS_REPORT
1763 && (var_is_num_user_missing (x->vars[COL_VAR], cols[c].f)
1764 || var_is_num_user_missing (x->vars[ROW_VAR], rows[r].f)))
1765 mark_missing = true;
1766 for (i = 0; i < num_cells; i++)
1777 v = *mp / row_tot[r] * 100.;
1781 v = *mp / col_tot[c] * 100.;
1788 case CRS_CL_EXPECTED:
1791 case CRS_CL_RESIDUAL:
1792 v = *mp - expected_value;
1794 case CRS_CL_SRESIDUAL:
1795 v = (*mp - expected_value) / sqrt (expected_value);
1797 case CRS_CL_ASRESIDUAL:
1798 v = ((*mp - expected_value)
1799 / sqrt (expected_value
1800 * (1. - row_tot[r] / W)
1801 * (1. - col_tot[c] / W)));
1807 format_cell_entry (table, c, i, v, suffix, mark_missing);
1813 tab_offset (table, -1, tab_row (table) + num_cells);
1821 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1822 for (r = 0; r < n_rows; r++)
1825 bool mark_missing = false;
1827 if (cmd.miss == CRS_REPORT
1828 && var_is_num_user_missing (x->vars[ROW_VAR], rows[r].f))
1829 mark_missing = true;
1831 for (i = 0; i < num_cells; i++)
1845 v = row_tot[r] / W * 100.;
1849 v = row_tot[r] / W * 100.;
1852 case CRS_CL_EXPECTED:
1853 case CRS_CL_RESIDUAL:
1854 case CRS_CL_SRESIDUAL:
1855 case CRS_CL_ASRESIDUAL:
1862 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1863 tab_next_row (table);
1868 /* Column totals, grand total. */
1874 tab_hline (table, TAL_1, -1, n_cols, 0);
1875 for (c = 0; c <= n_cols; c++)
1877 double ct = c < n_cols ? col_tot[c] : W;
1878 bool mark_missing = false;
1882 if (cmd.miss == CRS_REPORT && c < n_cols
1883 && var_is_num_user_missing (x->vars[COL_VAR], cols[c].f))
1884 mark_missing = true;
1886 for (i = 0; i < num_cells; i++)
1908 case CRS_CL_EXPECTED:
1909 case CRS_CL_RESIDUAL:
1910 case CRS_CL_SRESIDUAL:
1911 case CRS_CL_ASRESIDUAL:
1917 format_cell_entry (table, c, i, v, suffix, mark_missing);
1922 tab_offset (table, -1, tab_row (table) + last_row);
1925 tab_offset (table, 0, -1);
1928 static void calc_r (double *X, double *Y, double *, double *, double *);
1929 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1931 /* Display chi-square statistics. */
1933 display_chisq (void)
1935 static const char *chisq_stats[N_CHISQ] =
1937 N_("Pearson Chi-Square"),
1938 N_("Likelihood Ratio"),
1939 N_("Fisher's Exact Test"),
1940 N_("Continuity Correction"),
1941 N_("Linear-by-Linear Association"),
1943 double chisq_v[N_CHISQ];
1944 double fisher1, fisher2;
1950 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1952 tab_offset (chisq, nvar - 2, -1);
1954 for (i = 0; i < N_CHISQ; i++)
1956 if ((i != 2 && chisq_v[i] == SYSMIS)
1957 || (i == 2 && fisher1 == SYSMIS))
1961 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1964 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1965 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1966 tab_float (chisq, 3, 0, TAB_RIGHT,
1967 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1972 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1973 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1975 tab_next_row (chisq);
1978 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1979 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1980 tab_next_row (chisq);
1982 tab_offset (chisq, 0, -1);
1985 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1986 double[N_SYMMETRIC]);
1988 /* Display symmetric measures. */
1990 display_symmetric (void)
1992 static const char *categories[] =
1994 N_("Nominal by Nominal"),
1995 N_("Ordinal by Ordinal"),
1996 N_("Interval by Interval"),
1997 N_("Measure of Agreement"),
2000 static const char *stats[N_SYMMETRIC] =
2004 N_("Contingency Coefficient"),
2005 N_("Kendall's tau-b"),
2006 N_("Kendall's tau-c"),
2008 N_("Spearman Correlation"),
2013 static const int stats_categories[N_SYMMETRIC] =
2015 0, 0, 0, 1, 1, 1, 1, 2, 3,
2019 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2022 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2025 tab_offset (sym, nvar - 2, -1);
2027 for (i = 0; i < N_SYMMETRIC; i++)
2029 if (sym_v[i] == SYSMIS)
2032 if (stats_categories[i] != last_cat)
2034 last_cat = stats_categories[i];
2035 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2038 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2039 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2040 if (sym_ase[i] != SYSMIS)
2041 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2042 if (sym_t[i] != SYSMIS)
2043 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2044 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2048 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2049 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2052 tab_offset (sym, 0, -1);
2055 static int calc_risk (double[], double[], double[], union value *);
2057 /* Display risk estimate. */
2062 double risk_v[3], lower[3], upper[3];
2066 if (!calc_risk (risk_v, upper, lower, c))
2069 tab_offset (risk, nvar - 2, -1);
2071 for (i = 0; i < 3; i++)
2073 if (risk_v[i] == SYSMIS)
2079 if (var_is_numeric (x->vars[COL_VAR]))
2080 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2081 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2083 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2084 var_get_name (x->vars[COL_VAR]),
2085 var_get_width (x->vars[COL_VAR]), c[0].s,
2086 var_get_width (x->vars[COL_VAR]), c[1].s);
2090 if (var_is_numeric (x->vars[ROW_VAR]))
2091 sprintf (buf, _("For cohort %s = %g"),
2092 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2094 sprintf (buf, _("For cohort %s = %.*s"),
2095 var_get_name (x->vars[ROW_VAR]),
2096 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2100 tab_text (risk, 0, 0, TAB_LEFT, buf);
2101 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2102 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2103 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2104 tab_next_row (risk);
2107 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2108 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2109 tab_next_row (risk);
2111 tab_offset (risk, 0, -1);
2114 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2115 double[N_DIRECTIONAL]);
2117 /* Display directional measures. */
2119 display_directional (void)
2121 static const char *categories[] =
2123 N_("Nominal by Nominal"),
2124 N_("Ordinal by Ordinal"),
2125 N_("Nominal by Interval"),
2128 static const char *stats[] =
2131 N_("Goodman and Kruskal tau"),
2132 N_("Uncertainty Coefficient"),
2137 static const char *types[] =
2144 static const int stats_categories[N_DIRECTIONAL] =
2146 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2149 static const int stats_stats[N_DIRECTIONAL] =
2151 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2154 static const int stats_types[N_DIRECTIONAL] =
2156 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2159 static const int *stats_lookup[] =
2166 static const char **stats_names[] =
2178 double direct_v[N_DIRECTIONAL];
2179 double direct_ase[N_DIRECTIONAL];
2180 double direct_t[N_DIRECTIONAL];
2184 if (!calc_directional (direct_v, direct_ase, direct_t))
2187 tab_offset (direct, nvar - 2, -1);
2189 for (i = 0; i < N_DIRECTIONAL; i++)
2191 if (direct_v[i] == SYSMIS)
2197 for (j = 0; j < 3; j++)
2198 if (last[j] != stats_lookup[j][i])
2201 tab_hline (direct, TAL_1, j, 6, 0);
2206 int k = last[j] = stats_lookup[j][i];
2211 string = var_get_name (x->vars[0]);
2213 string = var_get_name (x->vars[1]);
2215 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2216 gettext (stats_names[j][k]), string);
2221 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2222 if (direct_ase[i] != SYSMIS)
2223 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2224 if (direct_t[i] != SYSMIS)
2225 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2226 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2227 tab_next_row (direct);
2230 tab_offset (direct, 0, -1);
2233 /* Statistical calculations. */
2235 /* Returns the value of the gamma (factorial) function for an integer
2238 gamma_int (double x)
2243 for (i = 2; i < x; i++)
2248 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2250 static inline double
2251 Pr (int a, int b, int c, int d)
2253 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2254 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2255 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2256 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2257 / gamma_int (a + b + c + d + 1.));
2260 /* Swap the contents of A and B. */
2262 swap (int *a, int *b)
2269 /* Calculate significance for Fisher's exact test as specified in
2270 _SPSS Statistical Algorithms_, Appendix 5. */
2272 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2276 if (MIN (c, d) < MIN (a, b))
2277 swap (&a, &c), swap (&b, &d);
2278 if (MIN (b, d) < MIN (a, c))
2279 swap (&a, &b), swap (&c, &d);
2283 swap (&a, &b), swap (&c, &d);
2285 swap (&a, &c), swap (&b, &d);
2289 for (x = 0; x <= a; x++)
2290 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2292 *fisher2 = *fisher1;
2293 for (x = 1; x <= b; x++)
2294 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2297 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2298 columns with values COLS and N_ROWS rows with values ROWS. Values
2299 in the matrix sum to W. */
2301 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2302 double *fisher1, double *fisher2)
2306 chisq[0] = chisq[1] = 0.;
2307 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2308 *fisher1 = *fisher2 = SYSMIS;
2310 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2312 if (ns_rows <= 1 || ns_cols <= 1)
2314 chisq[0] = chisq[1] = SYSMIS;
2318 for (r = 0; r < n_rows; r++)
2319 for (c = 0; c < n_cols; c++)
2321 const double expected = row_tot[r] * col_tot[c] / W;
2322 const double freq = mat[n_cols * r + c];
2323 const double residual = freq - expected;
2325 chisq[0] += residual * residual / expected;
2327 chisq[1] += freq * log (expected / freq);
2338 /* Calculate Yates and Fisher exact test. */
2339 if (ns_cols == 2 && ns_rows == 2)
2341 double f11, f12, f21, f22;
2347 for (i = j = 0; i < n_cols; i++)
2348 if (col_tot[i] != 0.)
2357 f11 = mat[nz_cols[0]];
2358 f12 = mat[nz_cols[1]];
2359 f21 = mat[nz_cols[0] + n_cols];
2360 f22 = mat[nz_cols[1] + n_cols];
2365 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2368 chisq[3] = (W * x * x
2369 / (f11 + f12) / (f21 + f22)
2370 / (f11 + f21) / (f12 + f22));
2378 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2379 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2382 /* Calculate Mantel-Haenszel. */
2383 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2385 double r, ase_0, ase_1;
2386 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2388 chisq[4] = (W - 1.) * r * r;
2393 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2394 ASE_1, and ase_0 into ASE_0. The row and column values must be
2395 passed in X and Y. */
2397 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2399 double SX, SY, S, T;
2401 double sum_XYf, sum_X2Y2f;
2402 double sum_Xr, sum_X2r;
2403 double sum_Yc, sum_Y2c;
2406 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2407 for (j = 0; j < n_cols; j++)
2409 double fij = mat[j + i * n_cols];
2410 double product = X[i] * Y[j];
2411 double temp = fij * product;
2413 sum_X2Y2f += temp * product;
2416 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2418 sum_Xr += X[i] * row_tot[i];
2419 sum_X2r += X[i] * X[i] * row_tot[i];
2423 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2425 sum_Yc += Y[i] * col_tot[i];
2426 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2430 S = sum_XYf - sum_Xr * sum_Yc / W;
2431 SX = sum_X2r - sum_Xr * sum_Xr / W;
2432 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2435 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2440 for (s = c = 0., i = 0; i < n_rows; i++)
2441 for (j = 0; j < n_cols; j++)
2443 double Xresid, Yresid;
2446 Xresid = X[i] - Xbar;
2447 Yresid = Y[j] - Ybar;
2448 temp = (T * Xresid * Yresid
2450 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2451 y = mat[j + i * n_cols] * temp * temp - c;
2456 *ase_1 = sqrt (s) / (T * T);
2460 static double somers_d_v[3];
2461 static double somers_d_ase[3];
2462 static double somers_d_t[3];
2464 /* Calculate symmetric statistics and their asymptotic standard
2465 errors. Returns 0 if none could be calculated. */
2467 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2468 double t[N_SYMMETRIC])
2470 int q = MIN (ns_rows, ns_cols);
2479 for (i = 0; i < N_SYMMETRIC; i++)
2480 v[i] = ase[i] = t[i] = SYSMIS;
2483 /* Phi, Cramer's V, contingency coefficient. */
2484 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2486 double Xp = 0.; /* Pearson chi-square. */
2491 for (r = 0; r < n_rows; r++)
2492 for (c = 0; c < n_cols; c++)
2494 const double expected = row_tot[r] * col_tot[c] / W;
2495 const double freq = mat[n_cols * r + c];
2496 const double residual = freq - expected;
2498 Xp += residual * residual / expected;
2502 if (cmd.a_statistics[CRS_ST_PHI])
2504 v[0] = sqrt (Xp / W);
2505 v[1] = sqrt (Xp / (W * (q - 1)));
2507 if (cmd.a_statistics[CRS_ST_CC])
2508 v[2] = sqrt (Xp / (Xp + W));
2511 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2512 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2517 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2524 for (r = 0; r < n_rows; r++)
2525 Dr -= row_tot[r] * row_tot[r];
2526 for (c = 0; c < n_cols; c++)
2527 Dc -= col_tot[c] * col_tot[c];
2533 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2534 for (c = 0; c < n_cols; c++)
2538 for (r = 0; r < n_rows; r++)
2539 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2549 for (i = 0; i < n_rows; i++)
2553 for (j = 1; j < n_cols; j++)
2554 Cij += col_tot[j] - cum[j + i * n_cols];
2557 for (j = 1; j < n_cols; j++)
2558 Dij += cum[j + (i - 1) * n_cols];
2562 double fij = mat[j + i * n_cols];
2568 assert (j < n_cols);
2570 Cij -= col_tot[j] - cum[j + i * n_cols];
2571 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2575 Cij += cum[j - 1 + (i - 1) * n_cols];
2576 Dij -= cum[j + (i - 1) * n_cols];
2582 if (cmd.a_statistics[CRS_ST_BTAU])
2583 v[3] = (P - Q) / sqrt (Dr * Dc);
2584 if (cmd.a_statistics[CRS_ST_CTAU])
2585 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2586 if (cmd.a_statistics[CRS_ST_GAMMA])
2587 v[5] = (P - Q) / (P + Q);
2589 /* ASE for tau-b, tau-c, gamma. Calculations could be
2590 eliminated here, at expense of memory. */
2595 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2596 for (i = 0; i < n_rows; i++)
2600 for (j = 1; j < n_cols; j++)
2601 Cij += col_tot[j] - cum[j + i * n_cols];
2604 for (j = 1; j < n_cols; j++)
2605 Dij += cum[j + (i - 1) * n_cols];
2609 double fij = mat[j + i * n_cols];
2611 if (cmd.a_statistics[CRS_ST_BTAU])
2613 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2614 + v[3] * (row_tot[i] * Dc
2615 + col_tot[j] * Dr));
2616 btau_cum += fij * temp * temp;
2620 const double temp = Cij - Dij;
2621 ctau_cum += fij * temp * temp;
2624 if (cmd.a_statistics[CRS_ST_GAMMA])
2626 const double temp = Q * Cij - P * Dij;
2627 gamma_cum += fij * temp * temp;
2630 if (cmd.a_statistics[CRS_ST_D])
2632 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2633 - (P - Q) * (W - row_tot[i]));
2634 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2635 - (Q - P) * (W - col_tot[j]));
2640 assert (j < n_cols);
2642 Cij -= col_tot[j] - cum[j + i * n_cols];
2643 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2647 Cij += cum[j - 1 + (i - 1) * n_cols];
2648 Dij -= cum[j + (i - 1) * n_cols];
2654 btau_var = ((btau_cum
2655 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2657 if (cmd.a_statistics[CRS_ST_BTAU])
2659 ase[3] = sqrt (btau_var);
2660 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2663 if (cmd.a_statistics[CRS_ST_CTAU])
2665 ase[4] = ((2 * q / ((q - 1) * W * W))
2666 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2667 t[4] = v[4] / ase[4];
2669 if (cmd.a_statistics[CRS_ST_GAMMA])
2671 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2672 t[5] = v[5] / (2. / (P + Q)
2673 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2675 if (cmd.a_statistics[CRS_ST_D])
2677 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2678 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2679 somers_d_t[0] = (somers_d_v[0]
2681 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2682 somers_d_v[1] = (P - Q) / Dc;
2683 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2684 somers_d_t[1] = (somers_d_v[1]
2686 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2687 somers_d_v[2] = (P - Q) / Dr;
2688 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2689 somers_d_t[2] = (somers_d_v[2]
2691 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2697 /* Spearman correlation, Pearson's r. */
2698 if (cmd.a_statistics[CRS_ST_CORR])
2700 double *R = local_alloc (sizeof *R * n_rows);
2701 double *C = local_alloc (sizeof *C * n_cols);
2704 double y, t, c = 0., s = 0.;
2709 R[i] = s + (row_tot[i] + 1.) / 2.;
2716 assert (i < n_rows);
2721 double y, t, c = 0., s = 0.;
2726 C[j] = s + (col_tot[j] + 1.) / 2;
2733 assert (j < n_cols);
2737 calc_r (R, C, &v[6], &t[6], &ase[6]);
2743 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2747 /* Cohen's kappa. */
2748 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2750 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2753 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2754 i < ns_rows; i++, j++)
2758 while (col_tot[j] == 0.)
2761 prod = row_tot[i] * col_tot[j];
2762 sum = row_tot[i] + col_tot[j];
2764 sum_fii += mat[j + i * n_cols];
2766 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2767 sum_riciri_ci += prod * sum;
2769 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2770 for (j = 0; j < ns_cols; j++)
2772 double sum = row_tot[i] + col_tot[j];
2773 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2776 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2778 ase[8] = sqrt ((W * W * sum_rici
2779 + sum_rici * sum_rici
2780 - W * sum_riciri_ci)
2781 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2783 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2784 / pow2 (W * W - sum_rici))
2785 + ((2. * (W - sum_fii)
2786 * (2. * sum_fii * sum_rici
2787 - W * sum_fiiri_ci))
2788 / cube (W * W - sum_rici))
2789 + (pow2 (W - sum_fii)
2790 * (W * sum_fijri_ci2 - 4.
2791 * sum_rici * sum_rici)
2792 / pow4 (W * W - sum_rici))));
2794 t[8] = v[8] / ase[8];
2801 /* Calculate risk estimate. */
2803 calc_risk (double *value, double *upper, double *lower, union value *c)
2805 double f11, f12, f21, f22;
2811 for (i = 0; i < 3; i++)
2812 value[i] = upper[i] = lower[i] = SYSMIS;
2815 if (ns_rows != 2 || ns_cols != 2)
2822 for (i = j = 0; i < n_cols; i++)
2823 if (col_tot[i] != 0.)
2832 f11 = mat[nz_cols[0]];
2833 f12 = mat[nz_cols[1]];
2834 f21 = mat[nz_cols[0] + n_cols];
2835 f22 = mat[nz_cols[1] + n_cols];
2837 c[0] = cols[nz_cols[0]];
2838 c[1] = cols[nz_cols[1]];
2841 value[0] = (f11 * f22) / (f12 * f21);
2842 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2843 lower[0] = value[0] * exp (-1.960 * v);
2844 upper[0] = value[0] * exp (1.960 * v);
2846 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2847 v = sqrt ((f12 / (f11 * (f11 + f12)))
2848 + (f22 / (f21 * (f21 + f22))));
2849 lower[1] = value[1] * exp (-1.960 * v);
2850 upper[1] = value[1] * exp (1.960 * v);
2852 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2853 v = sqrt ((f11 / (f12 * (f11 + f12)))
2854 + (f21 / (f22 * (f21 + f22))));
2855 lower[2] = value[2] * exp (-1.960 * v);
2856 upper[2] = value[2] * exp (1.960 * v);
2861 /* Calculate directional measures. */
2863 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2864 double t[N_DIRECTIONAL])
2869 for (i = 0; i < N_DIRECTIONAL; i++)
2870 v[i] = ase[i] = t[i] = SYSMIS;
2874 if (cmd.a_statistics[CRS_ST_LAMBDA])
2876 double *fim = xnmalloc (n_rows, sizeof *fim);
2877 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2878 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2879 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2880 double sum_fim, sum_fmj;
2882 int rm_index, cm_index;
2885 /* Find maximum for each row and their sum. */
2886 for (sum_fim = 0., i = 0; i < n_rows; i++)
2888 double max = mat[i * n_cols];
2891 for (j = 1; j < n_cols; j++)
2892 if (mat[j + i * n_cols] > max)
2894 max = mat[j + i * n_cols];
2898 sum_fim += fim[i] = max;
2899 fim_index[i] = index;
2902 /* Find maximum for each column. */
2903 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2905 double max = mat[j];
2908 for (i = 1; i < n_rows; i++)
2909 if (mat[j + i * n_cols] > max)
2911 max = mat[j + i * n_cols];
2915 sum_fmj += fmj[j] = max;
2916 fmj_index[j] = index;
2919 /* Find maximum row total. */
2922 for (i = 1; i < n_rows; i++)
2923 if (row_tot[i] > rm)
2929 /* Find maximum column total. */
2932 for (j = 1; j < n_cols; j++)
2933 if (col_tot[j] > cm)
2939 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2940 v[1] = (sum_fmj - rm) / (W - rm);
2941 v[2] = (sum_fim - cm) / (W - cm);
2943 /* ASE1 for Y given X. */
2947 for (accum = 0., i = 0; i < n_rows; i++)
2948 for (j = 0; j < n_cols; j++)
2950 const int deltaj = j == cm_index;
2951 accum += (mat[j + i * n_cols]
2952 * pow2 ((j == fim_index[i])
2957 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2960 /* ASE0 for Y given X. */
2964 for (accum = 0., i = 0; i < n_rows; i++)
2965 if (cm_index != fim_index[i])
2966 accum += (mat[i * n_cols + fim_index[i]]
2967 + mat[i * n_cols + cm_index]);
2968 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2971 /* ASE1 for X given Y. */
2975 for (accum = 0., i = 0; i < n_rows; i++)
2976 for (j = 0; j < n_cols; j++)
2978 const int deltaj = i == rm_index;
2979 accum += (mat[j + i * n_cols]
2980 * pow2 ((i == fmj_index[j])
2985 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2988 /* ASE0 for X given Y. */
2992 for (accum = 0., j = 0; j < n_cols; j++)
2993 if (rm_index != fmj_index[j])
2994 accum += (mat[j + n_cols * fmj_index[j]]
2995 + mat[j + n_cols * rm_index]);
2996 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2999 /* Symmetric ASE0 and ASE1. */
3004 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3005 for (j = 0; j < n_cols; j++)
3007 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3008 int temp1 = (i == rm_index) + (j == cm_index);
3009 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3010 accum1 += (mat[j + i * n_cols]
3011 * pow2 (temp0 + (v[0] - 1.) * temp1));
3013 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3014 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3015 / (2. * W - rm - cm));
3024 double sum_fij2_ri, sum_fij2_ci;
3025 double sum_ri2, sum_cj2;
3027 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3028 for (j = 0; j < n_cols; j++)
3030 double temp = pow2 (mat[j + i * n_cols]);
3031 sum_fij2_ri += temp / row_tot[i];
3032 sum_fij2_ci += temp / col_tot[j];
3035 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3036 sum_ri2 += row_tot[i] * row_tot[i];
3038 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3039 sum_cj2 += col_tot[j] * col_tot[j];
3041 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3042 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3046 if (cmd.a_statistics[CRS_ST_UC])
3048 double UX, UY, UXY, P;
3049 double ase1_yx, ase1_xy, ase1_sym;
3052 for (UX = 0., i = 0; i < n_rows; i++)
3053 if (row_tot[i] > 0.)
3054 UX -= row_tot[i] / W * log (row_tot[i] / W);
3056 for (UY = 0., j = 0; j < n_cols; j++)
3057 if (col_tot[j] > 0.)
3058 UY -= col_tot[j] / W * log (col_tot[j] / W);
3060 for (UXY = P = 0., i = 0; i < n_rows; i++)
3061 for (j = 0; j < n_cols; j++)
3063 double entry = mat[j + i * n_cols];
3068 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3069 UXY -= entry / W * log (entry / W);
3072 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3073 for (j = 0; j < n_cols; j++)
3075 double entry = mat[j + i * n_cols];
3080 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3081 + (UX - UXY) * log (col_tot[j] / W));
3082 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3083 + (UY - UXY) * log (row_tot[i] / W));
3084 ase1_sym += entry * pow2 ((UXY
3085 * log (row_tot[i] * col_tot[j] / (W * W)))
3086 - (UX + UY) * log (entry / W));
3089 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3090 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3091 t[5] = v[5] / ((2. / (W * (UX + UY)))
3092 * sqrt (P - pow2 (UX + UY - UXY) / W));
3094 v[6] = (UX + UY - UXY) / UX;
3095 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3096 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3098 v[7] = (UX + UY - UXY) / UY;
3099 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3100 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3104 if (cmd.a_statistics[CRS_ST_D])
3109 calc_symmetric (NULL, NULL, NULL);
3110 for (i = 0; i < 3; i++)
3112 v[8 + i] = somers_d_v[i];
3113 ase[8 + i] = somers_d_ase[i];
3114 t[8 + i] = somers_d_t[i];
3119 if (cmd.a_statistics[CRS_ST_ETA])
3122 double sum_Xr, sum_X2r;
3126 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3128 sum_Xr += rows[i].f * row_tot[i];
3129 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3131 SX = sum_X2r - sum_Xr * sum_Xr / W;
3133 for (SXW = 0., j = 0; j < n_cols; j++)
3137 for (cum = 0., i = 0; i < n_rows; i++)
3139 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3140 cum += rows[i].f * mat[j + i * n_cols];
3143 SXW -= cum * cum / col_tot[j];
3145 v[11] = sqrt (1. - SXW / SX);
3149 double sum_Yc, sum_Y2c;
3153 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3155 sum_Yc += cols[i].f * col_tot[i];
3156 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3158 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3160 for (SYW = 0., i = 0; i < n_rows; i++)
3164 for (cum = 0., j = 0; j < n_cols; j++)
3166 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3167 cum += cols[j].f * mat[j + i * n_cols];
3170 SYW -= cum * cum / row_tot[i];
3172 v[12] = sqrt (1. - SYW / SY);
3179 /* A wrapper around data_out() that limits string output to short
3180 string width and null terminates the result. */
3182 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3184 struct fmt_spec fmt_subst;
3186 /* Limit to short string width. */
3187 if (fmt_is_string (fp->type))
3191 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3192 if (fmt_subst.type == FMT_A)
3193 fmt_subst.w = MIN (8, fmt_subst.w);
3195 fmt_subst.w = MIN (16, fmt_subst.w);
3201 data_out (v, fp, s);
3203 /* Null terminate. */