1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
35 #include <gsl/gsl_cdf.h>
39 #include <data/case.h>
40 #include <data/data-out.h>
41 #include <data/dictionary.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/alloc.h>
50 #include <libpspp/array.h>
51 #include <libpspp/assertion.h>
52 #include <libpspp/compiler.h>
53 #include <libpspp/hash.h>
54 #include <libpspp/magic.h>
55 #include <libpspp/message.h>
56 #include <libpspp/message.h>
57 #include <libpspp/misc.h>
58 #include <libpspp/pool.h>
59 #include <libpspp/str.h>
60 #include <output/output.h>
61 #include <output/table.h>
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) msgid
75 missing=miss:!table/include/report;
76 +write[wr_]=none,cells,all;
77 +format=fmt:!labels/nolabels/novallabs,
80 tabl:!tables/notables,
83 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
85 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
91 /* Number of chi-square statistics. */
94 /* Number of symmetric statistics. */
97 /* Number of directional statistics. */
98 #define N_DIRECTIONAL 13
100 /* A single table entry for general mode. */
103 int table; /* Flattened table number. */
106 double freq; /* Frequency count. */
107 double *data; /* Crosstabulation table for integer mode. */
110 union value values[1]; /* Values. */
113 /* A crosstabulation. */
116 int nvar; /* Number of variables. */
117 double missing; /* Missing cases count. */
118 int ofs; /* Integer mode: Offset into sorted_tab[]. */
119 struct variable *vars[2]; /* At least two variables; sorted by
120 larger indices first. */
123 /* Integer mode variable info. */
126 int min; /* Minimum value. */
127 int max; /* Maximum value + 1. */
128 int count; /* max - min. */
131 static inline struct var_range *
132 get_var_range (struct variable *v)
135 assert (v->aux != NULL);
139 /* Indexes into crosstab.v. */
146 /* General mode crosstabulation table. */
147 static struct hsh_table *gen_tab; /* Hash table. */
148 static int n_sorted_tab; /* Number of entries in sorted_tab. */
149 static struct table_entry **sorted_tab; /* Sorted table. */
151 /* Variables specifies on VARIABLES. */
152 static struct variable **variables;
153 static size_t variables_cnt;
156 static struct crosstab **xtab;
159 /* Integer or general mode? */
168 static int num_cells; /* Number of cells requested. */
169 static int cells[8]; /* Cells requested. */
172 static int write; /* One of WR_* that specifies the WRITE style. */
174 /* Command parsing info. */
175 static struct cmd_crosstabs cmd;
178 static struct pool *pl_tc; /* For table cells. */
179 static struct pool *pl_col; /* For column data. */
181 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
182 static void precalc (const struct ccase *, void *, const struct dataset *);
183 static bool calc_general (const struct ccase *, void *, const struct dataset *);
184 static bool calc_integer (const struct ccase *, void *, const struct dataset *);
185 static bool postcalc (void *, const struct dataset *);
186 static void submit (struct tab_table *);
188 static void format_short (char *s, const struct fmt_spec *fp,
189 const union value *v);
191 /* Parse and execute CROSSTABS, then clean up. */
193 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
195 int result = internal_cmd_crosstabs (lexer, ds);
198 pool_destroy (pl_tc);
199 pool_destroy (pl_col);
204 /* Parses and executes the CROSSTABS procedure. */
206 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
215 pl_tc = pool_create ();
216 pl_col = pool_create ();
218 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
221 mode = variables ? INTEGER : GENERAL;
226 cmd.a_cells[CRS_CL_COUNT] = 1;
232 for (i = 0; i < CRS_CL_count; i++)
237 cmd.a_cells[CRS_CL_COUNT] = 1;
238 cmd.a_cells[CRS_CL_ROW] = 1;
239 cmd.a_cells[CRS_CL_COLUMN] = 1;
240 cmd.a_cells[CRS_CL_TOTAL] = 1;
242 if (cmd.a_cells[CRS_CL_ALL])
244 for (i = 0; i < CRS_CL_count; i++)
246 cmd.a_cells[CRS_CL_ALL] = 0;
248 cmd.a_cells[CRS_CL_NONE] = 0;
250 for (num_cells = i = 0; i < CRS_CL_count; i++)
252 cells[num_cells++] = i;
255 if (cmd.sbc_statistics)
260 for (i = 0; i < CRS_ST_count; i++)
261 if (cmd.a_statistics[i])
264 cmd.a_statistics[CRS_ST_CHISQ] = 1;
265 if (cmd.a_statistics[CRS_ST_ALL])
266 for (i = 0; i < CRS_ST_count; i++)
267 cmd.a_statistics[i] = 1;
271 if (cmd.miss == CRS_REPORT && mode == GENERAL)
273 msg (SE, _("Missing mode REPORT not allowed in general mode. "
274 "Assuming MISSING=TABLE."));
275 cmd.miss = CRS_TABLE;
279 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
280 cmd.a_write[CRS_WR_ALL] = 0;
281 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
283 msg (SE, _("Write mode ALL not allowed in general mode. "
284 "Assuming WRITE=CELLS."));
285 cmd.a_write[CRS_WR_CELLS] = 1;
288 && (cmd.a_write[CRS_WR_NONE]
289 + cmd.a_write[CRS_WR_ALL]
290 + cmd.a_write[CRS_WR_CELLS] == 0))
291 cmd.a_write[CRS_WR_CELLS] = 1;
292 if (cmd.a_write[CRS_WR_CELLS])
293 write = CRS_WR_CELLS;
294 else if (cmd.a_write[CRS_WR_ALL])
299 ok = procedure_with_splits (ds, precalc,
300 mode == GENERAL ? calc_general : calc_integer,
303 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
306 /* Parses the TABLES subcommand. */
308 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
310 struct var_set *var_set;
312 struct variable ***by = NULL;
313 size_t *by_nvar = NULL;
317 /* Ensure that this is a TABLES subcommand. */
318 if (!lex_match_id (lexer, "TABLES")
319 && (lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
320 && lex_token (lexer) != T_ALL)
322 lex_match (lexer, '=');
324 if (variables != NULL)
325 var_set = var_set_create_from_array (variables, variables_cnt);
327 var_set = var_set_create_from_dict (dataset_dict (ds));
328 assert (var_set != NULL);
332 by = xnrealloc (by, n_by + 1, sizeof *by);
333 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
334 if (!parse_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
335 PV_NO_DUPLICATE | PV_NO_SCRATCH))
337 if (xalloc_oversized (nx, by_nvar[n_by]))
339 msg (SE, _("Too many crosstabulation variables or dimensions."));
345 if (!lex_match (lexer, T_BY))
349 lex_error (lexer, _("expecting BY"));
358 int *by_iter = xcalloc (n_by, sizeof *by_iter);
361 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
362 for (i = 0; i < nx; i++)
366 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
373 for (i = 0; i < n_by; i++)
374 x->vars[i] = by[i][by_iter[i]];
380 for (i = n_by - 1; i >= 0; i--)
382 if (++by_iter[i] < by_nvar[i])
395 /* All return paths lead here. */
399 for (i = 0; i < n_by; i++)
405 var_set_destroy (var_set);
410 /* Parses the VARIABLES subcommand. */
412 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
416 msg (SE, _("VARIABLES must be specified before TABLES."));
420 lex_match (lexer, '=');
424 size_t orig_nv = variables_cnt;
429 if (!parse_variables (lexer, dataset_dict (ds),
430 &variables, &variables_cnt,
431 (PV_APPEND | PV_NUMERIC
432 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
435 if (lex_token (lexer) != '(')
437 lex_error (lexer, "expecting `('");
442 if (!lex_force_int (lexer))
444 min = lex_integer (lexer);
447 lex_match (lexer, ',');
449 if (!lex_force_int (lexer))
451 max = lex_integer (lexer);
454 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
460 if (lex_token (lexer) != ')')
462 lex_error (lexer, "expecting `)'");
467 for (i = orig_nv; i < variables_cnt; i++)
469 struct var_range *vr = xmalloc (sizeof *vr);
472 vr->count = max - min + 1;
473 var_attach_aux (variables[i], vr, var_dtor_free);
476 if (lex_token (lexer) == '/')
488 /* Data file processing. */
490 static int compare_table_entry (const void *, const void *, const void *);
491 static unsigned hash_table_entry (const void *, const void *);
493 /* Set up the crosstabulation tables for processing. */
495 precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
497 output_split_file_values (ds, first);
500 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
510 for (i = 0; i < nxtab; i++)
512 struct crosstab *x = xtab[i];
517 x->ofs = n_sorted_tab;
519 for (j = 2; j < x->nvar; j++)
520 count *= get_var_range (x->vars[j - 2])->count;
522 sorted_tab = xnrealloc (sorted_tab,
523 n_sorted_tab + count, sizeof *sorted_tab);
524 v = local_alloc (sizeof *v * x->nvar);
525 for (j = 2; j < x->nvar; j++)
526 v[j] = get_var_range (x->vars[j])->min;
527 for (j = 0; j < count; j++)
529 struct table_entry *te;
532 te = sorted_tab[n_sorted_tab++]
533 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
537 int row_cnt = get_var_range (x->vars[0])->count;
538 int col_cnt = get_var_range (x->vars[1])->count;
539 const int mat_size = row_cnt * col_cnt;
542 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
543 for (m = 0; m < mat_size; m++)
547 for (k = 2; k < x->nvar; k++)
548 te->values[k].f = v[k];
549 for (k = 2; k < x->nvar; k++)
551 struct var_range *vr = get_var_range (x->vars[k]);
552 if (++v[k] >= vr->max)
561 sorted_tab = xnrealloc (sorted_tab,
562 n_sorted_tab + 1, sizeof *sorted_tab);
563 sorted_tab[n_sorted_tab] = NULL;
568 /* Form crosstabulations for general mode. */
570 calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
572 bool bad_warn = true;
575 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
577 /* Flattened current table index. */
580 for (t = 0; t < nxtab; t++)
582 struct crosstab *x = xtab[t];
583 const size_t entry_size = (sizeof (struct table_entry)
584 + sizeof (union value) * (x->nvar - 1));
585 struct table_entry *te = local_alloc (entry_size);
587 /* Construct table entry for the current record and table. */
593 for (j = 0; j < x->nvar; j++)
595 const union value *v = case_data (c, x->vars[j]->fv);
596 const struct missing_values *mv = &x->vars[j]->miss;
597 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
598 || (cmd.miss == CRS_INCLUDE
599 && mv_is_value_system_missing (mv, v)))
601 x->missing += weight;
605 if (x->vars[j]->type == NUMERIC)
606 te->values[j].f = case_num (c, x->vars[j]->fv);
609 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
612 /* Necessary in order to simplify comparisons. */
613 memset (&te->values[j].s[x->vars[j]->width], 0,
614 sizeof (union value) - x->vars[j]->width);
619 /* Add record to hash table. */
621 struct table_entry **tepp
622 = (struct table_entry **) hsh_probe (gen_tab, te);
625 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
628 memcpy (tep, te, entry_size);
633 (*tepp)->u.freq += weight;
644 calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
646 bool bad_warn = true;
649 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
651 /* Flattened current table index. */
654 for (t = 0; t < nxtab; t++)
656 struct crosstab *x = xtab[t];
661 for (i = 0; i < x->nvar; i++)
663 struct variable *const v = x->vars[i];
664 struct var_range *vr = get_var_range (v);
665 double value = case_num (c, v->fv);
667 /* Note that the first test also rules out SYSMIS. */
668 if ((value < vr->min || value >= vr->max)
669 || (cmd.miss == CRS_TABLE
670 && mv_is_num_user_missing (&v->miss, value)))
672 x->missing += weight;
678 ofs += fact * ((int) value - vr->min);
684 struct variable *row_var = x->vars[ROW_VAR];
685 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
687 struct variable *col_var = x->vars[COL_VAR];
688 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
690 const int col_dim = get_var_range (col_var)->count;
692 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
701 /* Compare the table_entry's at A and B and return a strcmp()-type
704 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
706 const struct table_entry *a = a_;
707 const struct table_entry *b = b_;
709 if (a->table > b->table)
711 else if (a->table < b->table)
715 const struct crosstab *x = xtab[a->table];
718 for (i = x->nvar - 1; i >= 0; i--)
719 if (x->vars[i]->type == NUMERIC)
721 const double diffnum = a->values[i].f - b->values[i].f;
724 else if (diffnum > 0)
729 assert (x->vars[i]->type == ALPHA);
731 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
742 /* Calculate a hash value from table_entry A. */
744 hash_table_entry (const void *a_, const void *aux UNUSED)
746 const struct table_entry *a = a_;
751 for (i = 0; i < xtab[a->table]->nvar; i++)
752 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
757 /* Post-data reading calculations. */
759 static struct table_entry **find_pivot_extent (struct table_entry **,
760 int *cnt, int pivot);
761 static void enum_var_values (struct table_entry **entries, int entry_cnt,
763 union value **values, int *value_cnt);
764 static void output_pivot_table (struct table_entry **, struct table_entry **,
765 double **, double **, double **,
766 int *, int *, int *);
767 static void make_summary_table (void);
770 postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
774 n_sorted_tab = hsh_count (gen_tab);
775 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
778 make_summary_table ();
780 /* Identify all the individual crosstabulation tables, and deal with
783 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
784 int pc = n_sorted_tab; /* Pivot count. */
786 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
787 int maxrows = 0, maxcols = 0, maxcells = 0;
791 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
795 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
796 &maxrows, &maxcols, &maxcells);
805 hsh_destroy (gen_tab);
810 static void insert_summary (struct tab_table *, int tab_index, double valid);
812 /* Output a table summarizing the cases processed. */
814 make_summary_table (void)
816 struct tab_table *summary;
818 struct table_entry **pb = sorted_tab, **pe;
819 int pc = n_sorted_tab;
822 summary = tab_create (7, 3 + nxtab, 1);
823 tab_title (summary, _("Summary."));
824 tab_headers (summary, 1, 0, 3, 0);
825 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
826 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
827 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
828 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
829 tab_hline (summary, TAL_1, 1, 6, 1);
830 tab_hline (summary, TAL_1, 1, 6, 2);
831 tab_vline (summary, TAL_1, 3, 1, 1);
832 tab_vline (summary, TAL_1, 5, 1, 1);
836 for (i = 0; i < 3; i++)
838 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
839 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
842 tab_offset (summary, 0, 3);
848 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
852 while (cur_tab < (*pb)->table)
853 insert_summary (summary, cur_tab++, 0.);
856 for (valid = 0.; pb < pe; pb++)
857 valid += (*pb)->u.freq;
860 const struct crosstab *const x = xtab[(*pb)->table];
861 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
862 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
863 const int count = n_cols * n_rows;
865 for (valid = 0.; pb < pe; pb++)
867 const double *data = (*pb)->u.data;
870 for (i = 0; i < count; i++)
874 insert_summary (summary, cur_tab++, valid);
879 while (cur_tab < nxtab)
880 insert_summary (summary, cur_tab++, 0.);
885 /* Inserts a line into T describing the crosstabulation at index
886 TAB_INDEX, which has VALID valid observations. */
888 insert_summary (struct tab_table *t, int tab_index, double valid)
890 struct crosstab *x = xtab[tab_index];
892 tab_hline (t, TAL_1, 0, 6, 0);
894 /* Crosstabulation name. */
896 char *buf = local_alloc (128 * x->nvar);
900 for (i = 0; i < x->nvar; i++)
903 cp = stpcpy (cp, " * ");
906 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
908 tab_text (t, 0, 0, TAB_LEFT, buf);
913 /* Counts and percentages. */
923 for (i = 0; i < 3; i++)
925 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
926 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
937 static struct tab_table *table; /* Crosstabulation table. */
938 static struct tab_table *chisq; /* Chi-square table. */
939 static struct tab_table *sym; /* Symmetric measures table. */
940 static struct tab_table *risk; /* Risk estimate table. */
941 static struct tab_table *direct; /* Directional measures table. */
944 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
946 /* Column values, number of columns. */
947 static union value *cols;
950 /* Row values, number of rows. */
951 static union value *rows;
954 /* Number of statistically interesting columns/rows (columns/rows with
956 static int ns_cols, ns_rows;
958 /* Crosstabulation. */
959 static const struct crosstab *x;
961 /* Number of variables from the crosstabulation to consider. This is
962 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
965 /* Matrix contents. */
966 static double *mat; /* Matrix proper. */
967 static double *row_tot; /* Row totals. */
968 static double *col_tot; /* Column totals. */
969 static double W; /* Grand total. */
971 static void display_dimensions (struct tab_table *, int first_difference,
972 struct table_entry *);
973 static void display_crosstabulation (void);
974 static void display_chisq (void);
975 static void display_symmetric (void);
976 static void display_risk (void);
977 static void display_directional (void);
978 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
979 static void table_value_missing (struct tab_table *table, int c, int r,
980 unsigned char opt, const union value *v,
981 const struct variable *var);
982 static void delete_missing (void);
984 /* Output pivot table beginning at PB and continuing until PE,
985 exclusive. For efficiency, *MATP is a pointer to a matrix that can
986 hold *MAXROWS entries. */
988 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
989 double **matp, double **row_totp, double **col_totp,
990 int *maxrows, int *maxcols, int *maxcells)
993 struct table_entry **tb = pb, **te; /* Table begin, table end. */
994 int tc = pe - pb; /* Table count. */
996 /* Table entry for header comparison. */
997 struct table_entry *cmp = NULL;
999 x = xtab[(*pb)->table];
1000 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1002 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1004 /* Crosstabulation table initialization. */
1007 table = tab_create (nvar + n_cols,
1008 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1009 tab_headers (table, nvar - 1, 0, 2, 0);
1011 /* First header line. */
1012 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1013 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1015 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1017 /* Second header line. */
1021 for (i = 2; i < nvar; i++)
1022 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1023 TAB_RIGHT | TAT_TITLE,
1025 ? x->vars[i]->label : x->vars[i]->name));
1026 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1027 x->vars[ROW_VAR]->name);
1028 for (i = 0; i < n_cols; i++)
1029 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1031 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1034 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1035 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1039 char *title = local_alloc (x->nvar * 64 + 128);
1043 if (cmd.pivot == CRS_PIVOT)
1044 for (i = 0; i < nvar; i++)
1047 cp = stpcpy (cp, " by ");
1048 cp = stpcpy (cp, x->vars[i]->name);
1052 cp = spprintf (cp, "%s by %s for",
1053 x->vars[0]->name, x->vars[1]->name);
1054 for (i = 2; i < nvar; i++)
1056 char buf[64], *bufp;
1061 cp = stpcpy (cp, x->vars[i]->name);
1063 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1064 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1066 cp = stpcpy (cp, bufp);
1070 cp = stpcpy (cp, " [");
1071 for (i = 0; i < num_cells; i++)
1079 static const struct tuple cell_names[] =
1081 {CRS_CL_COUNT, N_("count")},
1082 {CRS_CL_ROW, N_("row %")},
1083 {CRS_CL_COLUMN, N_("column %")},
1084 {CRS_CL_TOTAL, N_("total %")},
1085 {CRS_CL_EXPECTED, N_("expected")},
1086 {CRS_CL_RESIDUAL, N_("residual")},
1087 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1088 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1092 const struct tuple *t;
1094 for (t = cell_names; t->value != cells[i]; t++)
1095 assert (t->value != -1);
1097 cp = stpcpy (cp, ", ");
1098 cp = stpcpy (cp, gettext (t->name));
1102 tab_title (table, "%s", title);
1106 tab_offset (table, 0, 2);
1111 /* Chi-square table initialization. */
1112 if (cmd.a_statistics[CRS_ST_CHISQ])
1114 chisq = tab_create (6 + (nvar - 2),
1115 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1116 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1118 tab_title (chisq, _("Chi-square tests."));
1120 tab_offset (chisq, nvar - 2, 0);
1121 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1122 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1123 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1124 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1125 _("Asymp. Sig. (2-sided)"));
1126 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1127 _("Exact. Sig. (2-sided)"));
1128 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1129 _("Exact. Sig. (1-sided)"));
1131 tab_offset (chisq, 0, 1);
1136 /* Symmetric measures. */
1137 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1138 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1139 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1140 || cmd.a_statistics[CRS_ST_KAPPA])
1142 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1143 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1144 tab_title (sym, _("Symmetric measures."));
1146 tab_offset (sym, nvar - 2, 0);
1147 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1148 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1149 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1150 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1151 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1152 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1153 tab_offset (sym, 0, 1);
1158 /* Risk estimate. */
1159 if (cmd.a_statistics[CRS_ST_RISK])
1161 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1162 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1163 tab_title (risk, _("Risk estimate."));
1165 tab_offset (risk, nvar - 2, 0);
1166 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1167 _("95%% Confidence Interval"));
1168 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1169 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1170 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1171 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1172 tab_hline (risk, TAL_1, 2, 3, 1);
1173 tab_vline (risk, TAL_1, 2, 0, 1);
1174 tab_offset (risk, 0, 2);
1179 /* Directional measures. */
1180 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1181 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1183 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1184 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1185 tab_title (direct, _("Directional measures."));
1187 tab_offset (direct, nvar - 2, 0);
1188 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1189 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1190 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1191 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1192 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1193 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1194 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1195 tab_offset (direct, 0, 1);
1202 /* Find pivot subtable if applicable. */
1203 te = find_pivot_extent (tb, &tc, 0);
1207 /* Find all the row variable values. */
1208 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1210 /* Allocate memory space for the column and row totals. */
1211 if (n_rows > *maxrows)
1213 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1214 row_tot = *row_totp;
1217 if (n_cols > *maxcols)
1219 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1220 col_tot = *col_totp;
1224 /* Allocate table space for the matrix. */
1225 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1226 tab_realloc (table, -1,
1227 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1228 tab_nr (table) * (pe - pb) / (te - tb)));
1230 if (mode == GENERAL)
1232 /* Allocate memory space for the matrix. */
1233 if (n_cols * n_rows > *maxcells)
1235 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1236 *maxcells = n_cols * n_rows;
1241 /* Build the matrix and calculate column totals. */
1243 union value *cur_col = cols;
1244 union value *cur_row = rows;
1246 double *cp = col_tot;
1247 struct table_entry **p;
1250 for (p = &tb[0]; p < te; p++)
1252 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1256 for (; cur_row < &rows[n_rows]; cur_row++)
1262 mp = &mat[cur_col - cols];
1265 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1272 *cp += *mp = (*p)->u.freq;
1277 /* Zero out the rest of the matrix. */
1278 for (; cur_row < &rows[n_rows]; cur_row++)
1284 if (cur_col < &cols[n_cols])
1286 const int rem_cols = n_cols - (cur_col - cols);
1289 for (c = 0; c < rem_cols; c++)
1291 mp = &mat[cur_col - cols];
1292 for (r = 0; r < n_rows; r++)
1294 for (c = 0; c < rem_cols; c++)
1296 mp += n_cols - rem_cols;
1304 double *tp = col_tot;
1306 assert (mode == INTEGER);
1307 mat = (*tb)->u.data;
1310 /* Calculate column totals. */
1311 for (c = 0; c < n_cols; c++)
1314 double *cp = &mat[c];
1316 for (r = 0; r < n_rows; r++)
1317 cum += cp[r * n_cols];
1325 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1326 ns_cols += *cp != 0.;
1329 /* Calculate row totals. */
1332 double *rp = row_tot;
1335 for (ns_rows = 0, r = n_rows; r--; )
1338 for (c = n_cols; c--; )
1346 /* Calculate grand total. */
1352 if (n_rows < n_cols)
1353 tp = row_tot, n = n_rows;
1355 tp = col_tot, n = n_cols;
1361 /* Find the first variable that differs from the last subtable,
1362 then display the values of the dimensioning variables for
1363 each table that needs it. */
1365 int first_difference = nvar - 1;
1368 for (; ; first_difference--)
1370 assert (first_difference >= 2);
1371 if (memcmp (&cmp->values[first_difference],
1372 &(*tb)->values[first_difference],
1373 sizeof *cmp->values))
1379 display_dimensions (table, first_difference, *tb);
1381 display_dimensions (chisq, first_difference, *tb);
1383 display_dimensions (sym, first_difference, *tb);
1385 display_dimensions (risk, first_difference, *tb);
1387 display_dimensions (direct, first_difference, *tb);
1391 display_crosstabulation ();
1392 if (cmd.miss == CRS_REPORT)
1397 display_symmetric ();
1401 display_directional ();
1412 tab_resize (chisq, 4 + (nvar - 2), -1);
1423 /* Delete missing rows and columns for statistical analysis when
1426 delete_missing (void)
1431 for (r = 0; r < n_rows; r++)
1432 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1436 for (c = 0; c < n_cols; c++)
1437 mat[c + r * n_cols] = 0.;
1445 for (c = 0; c < n_cols; c++)
1446 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1450 for (r = 0; r < n_rows; r++)
1451 mat[c + r * n_cols] = 0.;
1457 /* Prepare table T for submission, and submit it. */
1459 submit (struct tab_table *t)
1466 tab_resize (t, -1, 0);
1467 if (tab_nr (t) == tab_t (t))
1472 tab_offset (t, 0, 0);
1474 for (i = 2; i < nvar; i++)
1475 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1476 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1477 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1478 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1480 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1482 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1483 tab_dim (t, crosstabs_dim);
1487 /* Sets the widths of all the columns and heights of all the rows in
1488 table T for driver D. */
1490 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1494 /* Width of a numerical column. */
1495 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1496 if (cmd.miss == CRS_REPORT)
1497 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1499 /* Set width for header columns. */
1505 w = d->width - c * (t->nc - t->l);
1506 for (i = 0; i <= t->nc; i++)
1510 if (w < d->prop_em_width * 8)
1511 w = d->prop_em_width * 8;
1513 if (w > d->prop_em_width * 15)
1514 w = d->prop_em_width * 15;
1516 for (i = 0; i < t->l; i++)
1520 for (i = t->l; i < t->nc; i++)
1523 for (i = 0; i < t->nr; i++)
1524 t->h[i] = tab_natural_height (t, d, i);
1527 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1528 int *cnt, int pivot);
1529 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1530 int *cnt, int pivot);
1532 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1534 static struct table_entry **
1535 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1537 return (mode == GENERAL
1538 ? find_pivot_extent_general (tp, cnt, pivot)
1539 : find_pivot_extent_integer (tp, cnt, pivot));
1542 /* Find the extent of a region in TP that contains one table. If
1543 PIVOT != 0 that means a set of table entries with identical table
1544 number; otherwise they also have to have the same values for every
1545 dimension after the row and column dimensions. The table that is
1546 searched starts at TP and has length CNT. Returns the first entry
1547 after the last one in the table; sets *CNT to the number of
1548 remaining values. If there are no entries in TP at all, returns
1549 NULL. A yucky interface, admittedly, but it works. */
1550 static struct table_entry **
1551 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1553 struct table_entry *fp = *tp;
1558 x = xtab[(*tp)->table];
1566 if ((*tp)->table != fp->table)
1571 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1578 /* Integer mode correspondent to find_pivot_extent_general(). This
1579 could be optimized somewhat, but I just don't give a crap about
1580 CROSSTABS performance in integer mode, which is just a
1581 CROSSTABS wart as far as I'm concerned.
1583 That said, feel free to send optimization patches to me. */
1584 static struct table_entry **
1585 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1587 struct table_entry *fp = *tp;
1592 x = xtab[(*tp)->table];
1600 if ((*tp)->table != fp->table)
1605 if (memcmp (&(*tp)->values[2], &fp->values[2],
1606 sizeof (union value) * (x->nvar - 2)))
1613 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1614 result. WIDTH_ points to an int which is either 0 for a
1615 numeric value or a string width for a string value. */
1617 compare_value (const void *a_, const void *b_, const void *width_)
1619 const union value *a = a_;
1620 const union value *b = b_;
1621 const int *pwidth = width_;
1622 const int width = *pwidth;
1625 return (a->f < b->f) ? -1 : (a->f > b->f);
1627 return strncmp (a->s, b->s, width);
1630 /* Given an array of ENTRY_CNT table_entry structures starting at
1631 ENTRIES, creates a sorted list of the values that the variable
1632 with index VAR_IDX takes on. The values are returned as a
1633 malloc()'darray stored in *VALUES, with the number of values
1634 stored in *VALUE_CNT.
1637 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1638 union value **values, int *value_cnt)
1640 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1642 if (mode == GENERAL)
1644 int width = v->width;
1647 *values = xnmalloc (entry_cnt, sizeof **values);
1648 for (i = 0; i < entry_cnt; i++)
1649 (*values)[i] = entries[i]->values[var_idx];
1650 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1651 compare_value, &width);
1655 struct var_range *vr = get_var_range (v);
1658 assert (mode == INTEGER);
1659 *values = xnmalloc (vr->count, sizeof **values);
1660 for (i = 0; i < vr->count; i++)
1661 (*values)[i].f = i + vr->min;
1662 *value_cnt = vr->count;
1666 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1667 from V, displayed with print format spec from variable VAR. When
1668 in REPORT missing-value mode, missing values have an M appended. */
1670 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1671 const union value *v, const struct variable *var)
1675 const char *label = val_labs_find (var->val_labs, *v);
1678 tab_text (table, c, r, TAB_LEFT, label);
1682 s.string = tab_alloc (table, var->print.w);
1683 format_short (s.string, &var->print, v);
1684 s.length = strlen (s.string);
1685 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1686 s.string[s.length++] = 'M';
1687 while (s.length && *s.string == ' ')
1692 tab_raw (table, c, r, opt, &s);
1695 /* Draws a line across TABLE at the current row to indicate the most
1696 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1697 that changed, and puts the values that changed into the table. TB
1698 and X must be the corresponding table_entry and crosstab,
1701 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1703 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1705 for (; first_difference >= 2; first_difference--)
1706 table_value_missing (table, nvar - first_difference - 1, 0,
1707 TAB_RIGHT, &tb->values[first_difference],
1708 x->vars[first_difference]);
1711 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1712 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1713 additionally suffixed with a letter `M'. */
1715 format_cell_entry (struct tab_table *table, int c, int r, double value,
1716 char suffix, bool mark_missing)
1718 const struct fmt_spec f = {FMT_F, 10, 1};
1723 s.string = tab_alloc (table, 16);
1725 data_out (&v, &f, s.string);
1726 while (*s.string == ' ')
1732 s.string[s.length++] = suffix;
1734 s.string[s.length++] = 'M';
1736 tab_raw (table, c, r, TAB_RIGHT, &s);
1739 /* Displays the crosstabulation table. */
1741 display_crosstabulation (void)
1746 for (r = 0; r < n_rows; r++)
1747 table_value_missing (table, nvar - 2, r * num_cells,
1748 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1750 tab_text (table, nvar - 2, n_rows * num_cells,
1751 TAB_LEFT, _("Total"));
1753 /* Put in the actual cells. */
1758 tab_offset (table, nvar - 1, -1);
1759 for (r = 0; r < n_rows; r++)
1762 tab_hline (table, TAL_1, -1, n_cols, 0);
1763 for (c = 0; c < n_cols; c++)
1765 bool mark_missing = false;
1766 double expected_value = row_tot[r] * col_tot[c] / W;
1767 if (cmd.miss == CRS_REPORT
1768 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1769 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1771 mark_missing = true;
1772 for (i = 0; i < num_cells; i++)
1783 v = *mp / row_tot[r] * 100.;
1787 v = *mp / col_tot[c] * 100.;
1794 case CRS_CL_EXPECTED:
1797 case CRS_CL_RESIDUAL:
1798 v = *mp - expected_value;
1800 case CRS_CL_SRESIDUAL:
1801 v = (*mp - expected_value) / sqrt (expected_value);
1803 case CRS_CL_ASRESIDUAL:
1804 v = ((*mp - expected_value)
1805 / sqrt (expected_value
1806 * (1. - row_tot[r] / W)
1807 * (1. - col_tot[c] / W)));
1813 format_cell_entry (table, c, i, v, suffix, mark_missing);
1819 tab_offset (table, -1, tab_row (table) + num_cells);
1827 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1828 for (r = 0; r < n_rows; r++)
1831 bool mark_missing = false;
1833 if (cmd.miss == CRS_REPORT
1834 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1835 mark_missing = true;
1837 for (i = 0; i < num_cells; i++)
1851 v = row_tot[r] / W * 100.;
1855 v = row_tot[r] / W * 100.;
1858 case CRS_CL_EXPECTED:
1859 case CRS_CL_RESIDUAL:
1860 case CRS_CL_SRESIDUAL:
1861 case CRS_CL_ASRESIDUAL:
1868 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1869 tab_next_row (table);
1874 /* Column totals, grand total. */
1880 tab_hline (table, TAL_1, -1, n_cols, 0);
1881 for (c = 0; c <= n_cols; c++)
1883 double ct = c < n_cols ? col_tot[c] : W;
1884 bool mark_missing = false;
1888 if (cmd.miss == CRS_REPORT && c < n_cols
1889 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1890 mark_missing = true;
1892 for (i = 0; i < num_cells; i++)
1914 case CRS_CL_EXPECTED:
1915 case CRS_CL_RESIDUAL:
1916 case CRS_CL_SRESIDUAL:
1917 case CRS_CL_ASRESIDUAL:
1923 format_cell_entry (table, c, i, v, suffix, mark_missing);
1928 tab_offset (table, -1, tab_row (table) + last_row);
1931 tab_offset (table, 0, -1);
1934 static void calc_r (double *X, double *Y, double *, double *, double *);
1935 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1937 /* Display chi-square statistics. */
1939 display_chisq (void)
1941 static const char *chisq_stats[N_CHISQ] =
1943 N_("Pearson Chi-Square"),
1944 N_("Likelihood Ratio"),
1945 N_("Fisher's Exact Test"),
1946 N_("Continuity Correction"),
1947 N_("Linear-by-Linear Association"),
1949 double chisq_v[N_CHISQ];
1950 double fisher1, fisher2;
1956 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1958 tab_offset (chisq, nvar - 2, -1);
1960 for (i = 0; i < N_CHISQ; i++)
1962 if ((i != 2 && chisq_v[i] == SYSMIS)
1963 || (i == 2 && fisher1 == SYSMIS))
1967 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1970 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1971 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1972 tab_float (chisq, 3, 0, TAB_RIGHT,
1973 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1978 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1979 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1981 tab_next_row (chisq);
1984 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1985 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1986 tab_next_row (chisq);
1988 tab_offset (chisq, 0, -1);
1991 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1992 double[N_SYMMETRIC]);
1994 /* Display symmetric measures. */
1996 display_symmetric (void)
1998 static const char *categories[] =
2000 N_("Nominal by Nominal"),
2001 N_("Ordinal by Ordinal"),
2002 N_("Interval by Interval"),
2003 N_("Measure of Agreement"),
2006 static const char *stats[N_SYMMETRIC] =
2010 N_("Contingency Coefficient"),
2011 N_("Kendall's tau-b"),
2012 N_("Kendall's tau-c"),
2014 N_("Spearman Correlation"),
2019 static const int stats_categories[N_SYMMETRIC] =
2021 0, 0, 0, 1, 1, 1, 1, 2, 3,
2025 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2028 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2031 tab_offset (sym, nvar - 2, -1);
2033 for (i = 0; i < N_SYMMETRIC; i++)
2035 if (sym_v[i] == SYSMIS)
2038 if (stats_categories[i] != last_cat)
2040 last_cat = stats_categories[i];
2041 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2044 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2045 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2046 if (sym_ase[i] != SYSMIS)
2047 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2048 if (sym_t[i] != SYSMIS)
2049 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2050 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2054 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2055 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2058 tab_offset (sym, 0, -1);
2061 static int calc_risk (double[], double[], double[], union value *);
2063 /* Display risk estimate. */
2068 double risk_v[3], lower[3], upper[3];
2072 if (!calc_risk (risk_v, upper, lower, c))
2075 tab_offset (risk, nvar - 2, -1);
2077 for (i = 0; i < 3; i++)
2079 if (risk_v[i] == SYSMIS)
2085 if (x->vars[COL_VAR]->type == NUMERIC)
2086 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2087 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2089 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2090 x->vars[COL_VAR]->name,
2091 x->vars[COL_VAR]->width, c[0].s,
2092 x->vars[COL_VAR]->width, c[1].s);
2096 if (x->vars[ROW_VAR]->type == NUMERIC)
2097 sprintf (buf, _("For cohort %s = %g"),
2098 x->vars[ROW_VAR]->name, rows[i - 1].f);
2100 sprintf (buf, _("For cohort %s = %.*s"),
2101 x->vars[ROW_VAR]->name,
2102 x->vars[ROW_VAR]->width, rows[i - 1].s);
2106 tab_text (risk, 0, 0, TAB_LEFT, buf);
2107 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2108 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2109 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2110 tab_next_row (risk);
2113 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2114 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2115 tab_next_row (risk);
2117 tab_offset (risk, 0, -1);
2120 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2121 double[N_DIRECTIONAL]);
2123 /* Display directional measures. */
2125 display_directional (void)
2127 static const char *categories[] =
2129 N_("Nominal by Nominal"),
2130 N_("Ordinal by Ordinal"),
2131 N_("Nominal by Interval"),
2134 static const char *stats[] =
2137 N_("Goodman and Kruskal tau"),
2138 N_("Uncertainty Coefficient"),
2143 static const char *types[] =
2150 static const int stats_categories[N_DIRECTIONAL] =
2152 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2155 static const int stats_stats[N_DIRECTIONAL] =
2157 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2160 static const int stats_types[N_DIRECTIONAL] =
2162 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2165 static const int *stats_lookup[] =
2172 static const char **stats_names[] =
2184 double direct_v[N_DIRECTIONAL];
2185 double direct_ase[N_DIRECTIONAL];
2186 double direct_t[N_DIRECTIONAL];
2190 if (!calc_directional (direct_v, direct_ase, direct_t))
2193 tab_offset (direct, nvar - 2, -1);
2195 for (i = 0; i < N_DIRECTIONAL; i++)
2197 if (direct_v[i] == SYSMIS)
2203 for (j = 0; j < 3; j++)
2204 if (last[j] != stats_lookup[j][i])
2207 tab_hline (direct, TAL_1, j, 6, 0);
2212 int k = last[j] = stats_lookup[j][i];
2217 string = x->vars[0]->name;
2219 string = x->vars[1]->name;
2221 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2222 gettext (stats_names[j][k]), string);
2227 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2228 if (direct_ase[i] != SYSMIS)
2229 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2230 if (direct_t[i] != SYSMIS)
2231 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2232 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2233 tab_next_row (direct);
2236 tab_offset (direct, 0, -1);
2239 /* Statistical calculations. */
2241 /* Returns the value of the gamma (factorial) function for an integer
2244 gamma_int (double x)
2249 for (i = 2; i < x; i++)
2254 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2256 static inline double
2257 Pr (int a, int b, int c, int d)
2259 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2260 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2261 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2262 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2263 / gamma_int (a + b + c + d + 1.));
2266 /* Swap the contents of A and B. */
2268 swap (int *a, int *b)
2275 /* Calculate significance for Fisher's exact test as specified in
2276 _SPSS Statistical Algorithms_, Appendix 5. */
2278 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2282 if (MIN (c, d) < MIN (a, b))
2283 swap (&a, &c), swap (&b, &d);
2284 if (MIN (b, d) < MIN (a, c))
2285 swap (&a, &b), swap (&c, &d);
2289 swap (&a, &b), swap (&c, &d);
2291 swap (&a, &c), swap (&b, &d);
2295 for (x = 0; x <= a; x++)
2296 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2298 *fisher2 = *fisher1;
2299 for (x = 1; x <= b; x++)
2300 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2303 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2304 columns with values COLS and N_ROWS rows with values ROWS. Values
2305 in the matrix sum to W. */
2307 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2308 double *fisher1, double *fisher2)
2312 chisq[0] = chisq[1] = 0.;
2313 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2314 *fisher1 = *fisher2 = SYSMIS;
2316 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2318 if (ns_rows <= 1 || ns_cols <= 1)
2320 chisq[0] = chisq[1] = SYSMIS;
2324 for (r = 0; r < n_rows; r++)
2325 for (c = 0; c < n_cols; c++)
2327 const double expected = row_tot[r] * col_tot[c] / W;
2328 const double freq = mat[n_cols * r + c];
2329 const double residual = freq - expected;
2331 chisq[0] += residual * residual / expected;
2333 chisq[1] += freq * log (expected / freq);
2344 /* Calculate Yates and Fisher exact test. */
2345 if (ns_cols == 2 && ns_rows == 2)
2347 double f11, f12, f21, f22;
2353 for (i = j = 0; i < n_cols; i++)
2354 if (col_tot[i] != 0.)
2363 f11 = mat[nz_cols[0]];
2364 f12 = mat[nz_cols[1]];
2365 f21 = mat[nz_cols[0] + n_cols];
2366 f22 = mat[nz_cols[1] + n_cols];
2371 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2374 chisq[3] = (W * x * x
2375 / (f11 + f12) / (f21 + f22)
2376 / (f11 + f21) / (f12 + f22));
2384 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2385 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2388 /* Calculate Mantel-Haenszel. */
2389 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2391 double r, ase_0, ase_1;
2392 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2394 chisq[4] = (W - 1.) * r * r;
2399 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2400 ASE_1, and ase_0 into ASE_0. The row and column values must be
2401 passed in X and Y. */
2403 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2405 double SX, SY, S, T;
2407 double sum_XYf, sum_X2Y2f;
2408 double sum_Xr, sum_X2r;
2409 double sum_Yc, sum_Y2c;
2412 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2413 for (j = 0; j < n_cols; j++)
2415 double fij = mat[j + i * n_cols];
2416 double product = X[i] * Y[j];
2417 double temp = fij * product;
2419 sum_X2Y2f += temp * product;
2422 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2424 sum_Xr += X[i] * row_tot[i];
2425 sum_X2r += X[i] * X[i] * row_tot[i];
2429 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2431 sum_Yc += Y[i] * col_tot[i];
2432 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2436 S = sum_XYf - sum_Xr * sum_Yc / W;
2437 SX = sum_X2r - sum_Xr * sum_Xr / W;
2438 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2441 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2446 for (s = c = 0., i = 0; i < n_rows; i++)
2447 for (j = 0; j < n_cols; j++)
2449 double Xresid, Yresid;
2452 Xresid = X[i] - Xbar;
2453 Yresid = Y[j] - Ybar;
2454 temp = (T * Xresid * Yresid
2456 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2457 y = mat[j + i * n_cols] * temp * temp - c;
2462 *ase_1 = sqrt (s) / (T * T);
2466 static double somers_d_v[3];
2467 static double somers_d_ase[3];
2468 static double somers_d_t[3];
2470 /* Calculate symmetric statistics and their asymptotic standard
2471 errors. Returns 0 if none could be calculated. */
2473 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2474 double t[N_SYMMETRIC])
2476 int q = MIN (ns_rows, ns_cols);
2485 for (i = 0; i < N_SYMMETRIC; i++)
2486 v[i] = ase[i] = t[i] = SYSMIS;
2489 /* Phi, Cramer's V, contingency coefficient. */
2490 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2492 double Xp = 0.; /* Pearson chi-square. */
2497 for (r = 0; r < n_rows; r++)
2498 for (c = 0; c < n_cols; c++)
2500 const double expected = row_tot[r] * col_tot[c] / W;
2501 const double freq = mat[n_cols * r + c];
2502 const double residual = freq - expected;
2504 Xp += residual * residual / expected;
2508 if (cmd.a_statistics[CRS_ST_PHI])
2510 v[0] = sqrt (Xp / W);
2511 v[1] = sqrt (Xp / (W * (q - 1)));
2513 if (cmd.a_statistics[CRS_ST_CC])
2514 v[2] = sqrt (Xp / (Xp + W));
2517 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2518 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2523 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2530 for (r = 0; r < n_rows; r++)
2531 Dr -= row_tot[r] * row_tot[r];
2532 for (c = 0; c < n_cols; c++)
2533 Dc -= col_tot[c] * col_tot[c];
2539 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2540 for (c = 0; c < n_cols; c++)
2544 for (r = 0; r < n_rows; r++)
2545 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2555 for (i = 0; i < n_rows; i++)
2559 for (j = 1; j < n_cols; j++)
2560 Cij += col_tot[j] - cum[j + i * n_cols];
2563 for (j = 1; j < n_cols; j++)
2564 Dij += cum[j + (i - 1) * n_cols];
2568 double fij = mat[j + i * n_cols];
2574 assert (j < n_cols);
2576 Cij -= col_tot[j] - cum[j + i * n_cols];
2577 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2581 Cij += cum[j - 1 + (i - 1) * n_cols];
2582 Dij -= cum[j + (i - 1) * n_cols];
2588 if (cmd.a_statistics[CRS_ST_BTAU])
2589 v[3] = (P - Q) / sqrt (Dr * Dc);
2590 if (cmd.a_statistics[CRS_ST_CTAU])
2591 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2592 if (cmd.a_statistics[CRS_ST_GAMMA])
2593 v[5] = (P - Q) / (P + Q);
2595 /* ASE for tau-b, tau-c, gamma. Calculations could be
2596 eliminated here, at expense of memory. */
2601 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2602 for (i = 0; i < n_rows; i++)
2606 for (j = 1; j < n_cols; j++)
2607 Cij += col_tot[j] - cum[j + i * n_cols];
2610 for (j = 1; j < n_cols; j++)
2611 Dij += cum[j + (i - 1) * n_cols];
2615 double fij = mat[j + i * n_cols];
2617 if (cmd.a_statistics[CRS_ST_BTAU])
2619 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2620 + v[3] * (row_tot[i] * Dc
2621 + col_tot[j] * Dr));
2622 btau_cum += fij * temp * temp;
2626 const double temp = Cij - Dij;
2627 ctau_cum += fij * temp * temp;
2630 if (cmd.a_statistics[CRS_ST_GAMMA])
2632 const double temp = Q * Cij - P * Dij;
2633 gamma_cum += fij * temp * temp;
2636 if (cmd.a_statistics[CRS_ST_D])
2638 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2639 - (P - Q) * (W - row_tot[i]));
2640 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2641 - (Q - P) * (W - col_tot[j]));
2646 assert (j < n_cols);
2648 Cij -= col_tot[j] - cum[j + i * n_cols];
2649 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2653 Cij += cum[j - 1 + (i - 1) * n_cols];
2654 Dij -= cum[j + (i - 1) * n_cols];
2660 btau_var = ((btau_cum
2661 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2663 if (cmd.a_statistics[CRS_ST_BTAU])
2665 ase[3] = sqrt (btau_var);
2666 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2669 if (cmd.a_statistics[CRS_ST_CTAU])
2671 ase[4] = ((2 * q / ((q - 1) * W * W))
2672 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2673 t[4] = v[4] / ase[4];
2675 if (cmd.a_statistics[CRS_ST_GAMMA])
2677 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2678 t[5] = v[5] / (2. / (P + Q)
2679 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2681 if (cmd.a_statistics[CRS_ST_D])
2683 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2684 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2685 somers_d_t[0] = (somers_d_v[0]
2687 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2688 somers_d_v[1] = (P - Q) / Dc;
2689 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2690 somers_d_t[1] = (somers_d_v[1]
2692 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2693 somers_d_v[2] = (P - Q) / Dr;
2694 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2695 somers_d_t[2] = (somers_d_v[2]
2697 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2703 /* Spearman correlation, Pearson's r. */
2704 if (cmd.a_statistics[CRS_ST_CORR])
2706 double *R = local_alloc (sizeof *R * n_rows);
2707 double *C = local_alloc (sizeof *C * n_cols);
2710 double y, t, c = 0., s = 0.;
2715 R[i] = s + (row_tot[i] + 1.) / 2.;
2722 assert (i < n_rows);
2727 double y, t, c = 0., s = 0.;
2732 C[j] = s + (col_tot[j] + 1.) / 2;
2739 assert (j < n_cols);
2743 calc_r (R, C, &v[6], &t[6], &ase[6]);
2749 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2753 /* Cohen's kappa. */
2754 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2756 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2759 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2760 i < ns_rows; i++, j++)
2764 while (col_tot[j] == 0.)
2767 prod = row_tot[i] * col_tot[j];
2768 sum = row_tot[i] + col_tot[j];
2770 sum_fii += mat[j + i * n_cols];
2772 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2773 sum_riciri_ci += prod * sum;
2775 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2776 for (j = 0; j < ns_cols; j++)
2778 double sum = row_tot[i] + col_tot[j];
2779 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2782 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2784 ase[8] = sqrt ((W * W * sum_rici
2785 + sum_rici * sum_rici
2786 - W * sum_riciri_ci)
2787 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2789 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2790 / pow2 (W * W - sum_rici))
2791 + ((2. * (W - sum_fii)
2792 * (2. * sum_fii * sum_rici
2793 - W * sum_fiiri_ci))
2794 / cube (W * W - sum_rici))
2795 + (pow2 (W - sum_fii)
2796 * (W * sum_fijri_ci2 - 4.
2797 * sum_rici * sum_rici)
2798 / pow4 (W * W - sum_rici))));
2800 t[8] = v[8] / ase[8];
2807 /* Calculate risk estimate. */
2809 calc_risk (double *value, double *upper, double *lower, union value *c)
2811 double f11, f12, f21, f22;
2817 for (i = 0; i < 3; i++)
2818 value[i] = upper[i] = lower[i] = SYSMIS;
2821 if (ns_rows != 2 || ns_cols != 2)
2828 for (i = j = 0; i < n_cols; i++)
2829 if (col_tot[i] != 0.)
2838 f11 = mat[nz_cols[0]];
2839 f12 = mat[nz_cols[1]];
2840 f21 = mat[nz_cols[0] + n_cols];
2841 f22 = mat[nz_cols[1] + n_cols];
2843 c[0] = cols[nz_cols[0]];
2844 c[1] = cols[nz_cols[1]];
2847 value[0] = (f11 * f22) / (f12 * f21);
2848 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2849 lower[0] = value[0] * exp (-1.960 * v);
2850 upper[0] = value[0] * exp (1.960 * v);
2852 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2853 v = sqrt ((f12 / (f11 * (f11 + f12)))
2854 + (f22 / (f21 * (f21 + f22))));
2855 lower[1] = value[1] * exp (-1.960 * v);
2856 upper[1] = value[1] * exp (1.960 * v);
2858 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2859 v = sqrt ((f11 / (f12 * (f11 + f12)))
2860 + (f21 / (f22 * (f21 + f22))));
2861 lower[2] = value[2] * exp (-1.960 * v);
2862 upper[2] = value[2] * exp (1.960 * v);
2867 /* Calculate directional measures. */
2869 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2870 double t[N_DIRECTIONAL])
2875 for (i = 0; i < N_DIRECTIONAL; i++)
2876 v[i] = ase[i] = t[i] = SYSMIS;
2880 if (cmd.a_statistics[CRS_ST_LAMBDA])
2882 double *fim = xnmalloc (n_rows, sizeof *fim);
2883 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2884 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2885 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2886 double sum_fim, sum_fmj;
2888 int rm_index, cm_index;
2891 /* Find maximum for each row and their sum. */
2892 for (sum_fim = 0., i = 0; i < n_rows; i++)
2894 double max = mat[i * n_cols];
2897 for (j = 1; j < n_cols; j++)
2898 if (mat[j + i * n_cols] > max)
2900 max = mat[j + i * n_cols];
2904 sum_fim += fim[i] = max;
2905 fim_index[i] = index;
2908 /* Find maximum for each column. */
2909 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2911 double max = mat[j];
2914 for (i = 1; i < n_rows; i++)
2915 if (mat[j + i * n_cols] > max)
2917 max = mat[j + i * n_cols];
2921 sum_fmj += fmj[j] = max;
2922 fmj_index[j] = index;
2925 /* Find maximum row total. */
2928 for (i = 1; i < n_rows; i++)
2929 if (row_tot[i] > rm)
2935 /* Find maximum column total. */
2938 for (j = 1; j < n_cols; j++)
2939 if (col_tot[j] > cm)
2945 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2946 v[1] = (sum_fmj - rm) / (W - rm);
2947 v[2] = (sum_fim - cm) / (W - cm);
2949 /* ASE1 for Y given X. */
2953 for (accum = 0., i = 0; i < n_rows; i++)
2954 for (j = 0; j < n_cols; j++)
2956 const int deltaj = j == cm_index;
2957 accum += (mat[j + i * n_cols]
2958 * pow2 ((j == fim_index[i])
2963 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2966 /* ASE0 for Y given X. */
2970 for (accum = 0., i = 0; i < n_rows; i++)
2971 if (cm_index != fim_index[i])
2972 accum += (mat[i * n_cols + fim_index[i]]
2973 + mat[i * n_cols + cm_index]);
2974 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2977 /* ASE1 for X given Y. */
2981 for (accum = 0., i = 0; i < n_rows; i++)
2982 for (j = 0; j < n_cols; j++)
2984 const int deltaj = i == rm_index;
2985 accum += (mat[j + i * n_cols]
2986 * pow2 ((i == fmj_index[j])
2991 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2994 /* ASE0 for X given Y. */
2998 for (accum = 0., j = 0; j < n_cols; j++)
2999 if (rm_index != fmj_index[j])
3000 accum += (mat[j + n_cols * fmj_index[j]]
3001 + mat[j + n_cols * rm_index]);
3002 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3005 /* Symmetric ASE0 and ASE1. */
3010 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3011 for (j = 0; j < n_cols; j++)
3013 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3014 int temp1 = (i == rm_index) + (j == cm_index);
3015 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3016 accum1 += (mat[j + i * n_cols]
3017 * pow2 (temp0 + (v[0] - 1.) * temp1));
3019 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3020 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3021 / (2. * W - rm - cm));
3030 double sum_fij2_ri, sum_fij2_ci;
3031 double sum_ri2, sum_cj2;
3033 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3034 for (j = 0; j < n_cols; j++)
3036 double temp = pow2 (mat[j + i * n_cols]);
3037 sum_fij2_ri += temp / row_tot[i];
3038 sum_fij2_ci += temp / col_tot[j];
3041 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3042 sum_ri2 += row_tot[i] * row_tot[i];
3044 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3045 sum_cj2 += col_tot[j] * col_tot[j];
3047 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3048 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3052 if (cmd.a_statistics[CRS_ST_UC])
3054 double UX, UY, UXY, P;
3055 double ase1_yx, ase1_xy, ase1_sym;
3058 for (UX = 0., i = 0; i < n_rows; i++)
3059 if (row_tot[i] > 0.)
3060 UX -= row_tot[i] / W * log (row_tot[i] / W);
3062 for (UY = 0., j = 0; j < n_cols; j++)
3063 if (col_tot[j] > 0.)
3064 UY -= col_tot[j] / W * log (col_tot[j] / W);
3066 for (UXY = P = 0., i = 0; i < n_rows; i++)
3067 for (j = 0; j < n_cols; j++)
3069 double entry = mat[j + i * n_cols];
3074 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3075 UXY -= entry / W * log (entry / W);
3078 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3079 for (j = 0; j < n_cols; j++)
3081 double entry = mat[j + i * n_cols];
3086 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3087 + (UX - UXY) * log (col_tot[j] / W));
3088 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3089 + (UY - UXY) * log (row_tot[i] / W));
3090 ase1_sym += entry * pow2 ((UXY
3091 * log (row_tot[i] * col_tot[j] / (W * W)))
3092 - (UX + UY) * log (entry / W));
3095 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3096 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3097 t[5] = v[5] / ((2. / (W * (UX + UY)))
3098 * sqrt (P - pow2 (UX + UY - UXY) / W));
3100 v[6] = (UX + UY - UXY) / UX;
3101 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3102 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3104 v[7] = (UX + UY - UXY) / UY;
3105 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3106 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3110 if (cmd.a_statistics[CRS_ST_D])
3115 calc_symmetric (NULL, NULL, NULL);
3116 for (i = 0; i < 3; i++)
3118 v[8 + i] = somers_d_v[i];
3119 ase[8 + i] = somers_d_ase[i];
3120 t[8 + i] = somers_d_t[i];
3125 if (cmd.a_statistics[CRS_ST_ETA])
3128 double sum_Xr, sum_X2r;
3132 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3134 sum_Xr += rows[i].f * row_tot[i];
3135 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3137 SX = sum_X2r - sum_Xr * sum_Xr / W;
3139 for (SXW = 0., j = 0; j < n_cols; j++)
3143 for (cum = 0., i = 0; i < n_rows; i++)
3145 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3146 cum += rows[i].f * mat[j + i * n_cols];
3149 SXW -= cum * cum / col_tot[j];
3151 v[11] = sqrt (1. - SXW / SX);
3155 double sum_Yc, sum_Y2c;
3159 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3161 sum_Yc += cols[i].f * col_tot[i];
3162 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3164 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3166 for (SYW = 0., i = 0; i < n_rows; i++)
3170 for (cum = 0., j = 0; j < n_cols; j++)
3172 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3173 cum += cols[j].f * mat[j + i * n_cols];
3176 SYW -= cum * cum / row_tot[i];
3178 v[12] = sqrt (1. - SYW / SY);
3185 /* A wrapper around data_out() that limits string output to short
3186 string width and null terminates the result. */
3188 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3190 struct fmt_spec fmt_subst;
3192 /* Limit to short string width. */
3193 if (fmt_is_string (fp->type))
3197 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3198 if (fmt_subst.type == FMT_A)
3199 fmt_subst.w = MIN (8, fmt_subst.w);
3201 fmt_subst.w = MIN (16, fmt_subst.w);
3207 data_out (v, fp, s);
3209 /* Null terminate. */