1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
35 #include <gsl/gsl_cdf.h>
39 #include <data/case.h>
40 #include <data/dictionary.h>
41 #include <data/procedure.h>
42 #include <data/value-labels.h>
43 #include <data/variable.h>
44 #include <language/command.h>
45 #include <language/dictionary/split-file.h>
46 #include <language/lexer/lexer.h>
47 #include <language/lexer/variable-parser.h>
48 #include <libpspp/alloc.h>
49 #include <libpspp/array.h>
50 #include <libpspp/assertion.h>
51 #include <libpspp/compiler.h>
52 #include <libpspp/hash.h>
53 #include <libpspp/magic.h>
54 #include <libpspp/message.h>
55 #include <libpspp/message.h>
56 #include <libpspp/misc.h>
57 #include <libpspp/pool.h>
58 #include <libpspp/str.h>
59 #include <output/output.h>
60 #include <output/table.h>
63 #define _(msgid) gettext (msgid)
64 #define N_(msgid) msgid
72 missing=miss:!table/include/report;
73 +write[wr_]=none,cells,all;
74 +format=fmt:!labels/nolabels/novallabs,
77 tabl:!tables/notables,
80 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
82 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
88 /* Number of chi-square statistics. */
91 /* Number of symmetric statistics. */
94 /* Number of directional statistics. */
95 #define N_DIRECTIONAL 13
97 /* A single table entry for general mode. */
100 int table; /* Flattened table number. */
103 double freq; /* Frequency count. */
104 double *data; /* Crosstabulation table for integer mode. */
107 union value values[1]; /* Values. */
110 /* A crosstabulation. */
113 int nvar; /* Number of variables. */
114 double missing; /* Missing cases count. */
115 int ofs; /* Integer mode: Offset into sorted_tab[]. */
116 struct variable *vars[2]; /* At least two variables; sorted by
117 larger indices first. */
120 /* Integer mode variable info. */
123 int min; /* Minimum value. */
124 int max; /* Maximum value + 1. */
125 int count; /* max - min. */
128 static inline struct var_range *
129 get_var_range (struct variable *v)
132 assert (v->aux != NULL);
136 /* Indexes into crosstab.v. */
143 /* General mode crosstabulation table. */
144 static struct hsh_table *gen_tab; /* Hash table. */
145 static int n_sorted_tab; /* Number of entries in sorted_tab. */
146 static struct table_entry **sorted_tab; /* Sorted table. */
148 /* Variables specifies on VARIABLES. */
149 static struct variable **variables;
150 static size_t variables_cnt;
153 static struct crosstab **xtab;
156 /* Integer or general mode? */
165 static int num_cells; /* Number of cells requested. */
166 static int cells[8]; /* Cells requested. */
169 static int write; /* One of WR_* that specifies the WRITE style. */
171 /* Command parsing info. */
172 static struct cmd_crosstabs cmd;
175 static struct pool *pl_tc; /* For table cells. */
176 static struct pool *pl_col; /* For column data. */
178 static int internal_cmd_crosstabs (void);
179 static void precalc (const struct ccase *, void *);
180 static bool calc_general (const struct ccase *, void *);
181 static bool calc_integer (const struct ccase *, void *);
182 static void postcalc (void *);
183 static void submit (struct tab_table *);
185 static void format_short (char *s, const struct fmt_spec *fp,
186 const union value *v);
188 /* Parse and execute CROSSTABS, then clean up. */
192 int result = internal_cmd_crosstabs ();
195 pool_destroy (pl_tc);
196 pool_destroy (pl_col);
201 /* Parses and executes the CROSSTABS procedure. */
203 internal_cmd_crosstabs (void)
212 pl_tc = pool_create ();
213 pl_col = pool_create ();
215 if (!parse_crosstabs (&cmd, NULL))
218 mode = variables ? INTEGER : GENERAL;
223 cmd.a_cells[CRS_CL_COUNT] = 1;
229 for (i = 0; i < CRS_CL_count; i++)
234 cmd.a_cells[CRS_CL_COUNT] = 1;
235 cmd.a_cells[CRS_CL_ROW] = 1;
236 cmd.a_cells[CRS_CL_COLUMN] = 1;
237 cmd.a_cells[CRS_CL_TOTAL] = 1;
239 if (cmd.a_cells[CRS_CL_ALL])
241 for (i = 0; i < CRS_CL_count; i++)
243 cmd.a_cells[CRS_CL_ALL] = 0;
245 cmd.a_cells[CRS_CL_NONE] = 0;
247 for (num_cells = i = 0; i < CRS_CL_count; i++)
249 cells[num_cells++] = i;
252 if (cmd.sbc_statistics)
257 for (i = 0; i < CRS_ST_count; i++)
258 if (cmd.a_statistics[i])
261 cmd.a_statistics[CRS_ST_CHISQ] = 1;
262 if (cmd.a_statistics[CRS_ST_ALL])
263 for (i = 0; i < CRS_ST_count; i++)
264 cmd.a_statistics[i] = 1;
268 if (cmd.miss == CRS_REPORT && mode == GENERAL)
270 msg (SE, _("Missing mode REPORT not allowed in general mode. "
271 "Assuming MISSING=TABLE."));
272 cmd.miss = CRS_TABLE;
276 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
277 cmd.a_write[CRS_WR_ALL] = 0;
278 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
280 msg (SE, _("Write mode ALL not allowed in general mode. "
281 "Assuming WRITE=CELLS."));
282 cmd.a_write[CRS_WR_CELLS] = 1;
285 && (cmd.a_write[CRS_WR_NONE]
286 + cmd.a_write[CRS_WR_ALL]
287 + cmd.a_write[CRS_WR_CELLS] == 0))
288 cmd.a_write[CRS_WR_CELLS] = 1;
289 if (cmd.a_write[CRS_WR_CELLS])
290 write = CRS_WR_CELLS;
291 else if (cmd.a_write[CRS_WR_ALL])
296 ok = procedure_with_splits (precalc,
297 mode == GENERAL ? calc_general : calc_integer,
300 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
303 /* Parses the TABLES subcommand. */
305 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
307 struct var_set *var_set;
309 struct variable ***by = NULL;
310 size_t *by_nvar = NULL;
314 /* Ensure that this is a TABLES subcommand. */
315 if (!lex_match_id ("TABLES")
316 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
321 if (variables != NULL)
322 var_set = var_set_create_from_array (variables, variables_cnt);
324 var_set = var_set_create_from_dict (default_dict);
325 assert (var_set != NULL);
329 by = xnrealloc (by, n_by + 1, sizeof *by);
330 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
331 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
332 PV_NO_DUPLICATE | PV_NO_SCRATCH))
334 if (xalloc_oversized (nx, by_nvar[n_by]))
336 msg (SE, _("Too many crosstabulation variables or dimensions."));
342 if (!lex_match (T_BY))
346 lex_error (_("expecting BY"));
355 int *by_iter = xcalloc (n_by, sizeof *by_iter);
358 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
359 for (i = 0; i < nx; i++)
363 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
370 for (i = 0; i < n_by; i++)
371 x->vars[i] = by[i][by_iter[i]];
377 for (i = n_by - 1; i >= 0; i--)
379 if (++by_iter[i] < by_nvar[i])
392 /* All return paths lead here. */
396 for (i = 0; i < n_by; i++)
402 var_set_destroy (var_set);
407 /* Parses the VARIABLES subcommand. */
409 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
413 msg (SE, _("VARIABLES must be specified before TABLES."));
421 size_t orig_nv = variables_cnt;
426 if (!parse_variables (default_dict, &variables, &variables_cnt,
427 (PV_APPEND | PV_NUMERIC
428 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
433 lex_error ("expecting `('");
438 if (!lex_force_int ())
440 min = lex_integer ();
445 if (!lex_force_int ())
447 max = lex_integer ();
450 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
458 lex_error ("expecting `)'");
463 for (i = orig_nv; i < variables_cnt; i++)
465 struct var_range *vr = xmalloc (sizeof *vr);
468 vr->count = max - min + 1;
469 var_attach_aux (variables[i], vr, var_dtor_free);
484 /* Data file processing. */
486 static int compare_table_entry (const void *, const void *, void *);
487 static unsigned hash_table_entry (const void *, void *);
489 /* Set up the crosstabulation tables for processing. */
491 precalc (const struct ccase *first, void *aux UNUSED)
493 output_split_file_values (first);
496 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
506 for (i = 0; i < nxtab; i++)
508 struct crosstab *x = xtab[i];
513 x->ofs = n_sorted_tab;
515 for (j = 2; j < x->nvar; j++)
516 count *= get_var_range (x->vars[j - 2])->count;
518 sorted_tab = xnrealloc (sorted_tab,
519 n_sorted_tab + count, sizeof *sorted_tab);
520 v = local_alloc (sizeof *v * x->nvar);
521 for (j = 2; j < x->nvar; j++)
522 v[j] = get_var_range (x->vars[j])->min;
523 for (j = 0; j < count; j++)
525 struct table_entry *te;
528 te = sorted_tab[n_sorted_tab++]
529 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
533 int row_cnt = get_var_range (x->vars[0])->count;
534 int col_cnt = get_var_range (x->vars[1])->count;
535 const int mat_size = row_cnt * col_cnt;
538 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
539 for (m = 0; m < mat_size; m++)
543 for (k = 2; k < x->nvar; k++)
544 te->values[k].f = v[k];
545 for (k = 2; k < x->nvar; k++)
547 struct var_range *vr = get_var_range (x->vars[k]);
548 if (++v[k] >= vr->max)
557 sorted_tab = xnrealloc (sorted_tab,
558 n_sorted_tab + 1, sizeof *sorted_tab);
559 sorted_tab[n_sorted_tab] = NULL;
563 /* Form crosstabulations for general mode. */
565 calc_general (const struct ccase *c, void *aux UNUSED)
570 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
572 /* Flattened current table index. */
575 for (t = 0; t < nxtab; t++)
577 struct crosstab *x = xtab[t];
578 const size_t entry_size = (sizeof (struct table_entry)
579 + sizeof (union value) * (x->nvar - 1));
580 struct table_entry *te = local_alloc (entry_size);
582 /* Construct table entry for the current record and table. */
588 for (j = 0; j < x->nvar; j++)
590 const union value *v = case_data (c, x->vars[j]->fv);
591 const struct missing_values *mv = &x->vars[j]->miss;
592 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
593 || (cmd.miss == CRS_INCLUDE
594 && mv_is_value_system_missing (mv, v)))
596 x->missing += weight;
600 if (x->vars[j]->type == NUMERIC)
601 te->values[j].f = case_num (c, x->vars[j]->fv);
604 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
607 /* Necessary in order to simplify comparisons. */
608 memset (&te->values[j].s[x->vars[j]->width], 0,
609 sizeof (union value) - x->vars[j]->width);
614 /* Add record to hash table. */
616 struct table_entry **tepp
617 = (struct table_entry **) hsh_probe (gen_tab, te);
620 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
623 memcpy (tep, te, entry_size);
628 (*tepp)->u.freq += weight;
639 calc_integer (const struct ccase *c, void *aux UNUSED)
644 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
646 /* Flattened current table index. */
649 for (t = 0; t < nxtab; t++)
651 struct crosstab *x = xtab[t];
656 for (i = 0; i < x->nvar; i++)
658 struct variable *const v = x->vars[i];
659 struct var_range *vr = get_var_range (v);
660 double value = case_num (c, v->fv);
662 /* Note that the first test also rules out SYSMIS. */
663 if ((value < vr->min || value >= vr->max)
664 || (cmd.miss == CRS_TABLE
665 && mv_is_num_user_missing (&v->miss, value)))
667 x->missing += weight;
673 ofs += fact * ((int) value - vr->min);
679 struct variable *row_var = x->vars[ROW_VAR];
680 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
682 struct variable *col_var = x->vars[COL_VAR];
683 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
685 const int col_dim = get_var_range (col_var)->count;
687 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
696 /* Compare the table_entry's at A and B and return a strcmp()-type
699 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
701 const struct table_entry *a = a_;
702 const struct table_entry *b = b_;
704 if (a->table > b->table)
706 else if (a->table < b->table)
710 const struct crosstab *x = xtab[a->table];
713 for (i = x->nvar - 1; i >= 0; i--)
714 if (x->vars[i]->type == NUMERIC)
716 const double diffnum = a->values[i].f - b->values[i].f;
719 else if (diffnum > 0)
724 assert (x->vars[i]->type == ALPHA);
726 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
737 /* Calculate a hash value from table_entry A. */
739 hash_table_entry (const void *a_, void *foo UNUSED)
741 const struct table_entry *a = a_;
746 for (i = 0; i < xtab[a->table]->nvar; i++)
747 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
752 /* Post-data reading calculations. */
754 static struct table_entry **find_pivot_extent (struct table_entry **,
755 int *cnt, int pivot);
756 static void enum_var_values (struct table_entry **entries, int entry_cnt,
758 union value **values, int *value_cnt);
759 static void output_pivot_table (struct table_entry **, struct table_entry **,
760 double **, double **, double **,
761 int *, int *, int *);
762 static void make_summary_table (void);
765 postcalc (void *aux UNUSED)
769 n_sorted_tab = hsh_count (gen_tab);
770 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
773 make_summary_table ();
775 /* Identify all the individual crosstabulation tables, and deal with
778 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
779 int pc = n_sorted_tab; /* Pivot count. */
781 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
782 int maxrows = 0, maxcols = 0, maxcells = 0;
786 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
790 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
791 &maxrows, &maxcols, &maxcells);
800 hsh_destroy (gen_tab);
803 static void insert_summary (struct tab_table *, int tab_index, double valid);
805 /* Output a table summarizing the cases processed. */
807 make_summary_table (void)
809 struct tab_table *summary;
811 struct table_entry **pb = sorted_tab, **pe;
812 int pc = n_sorted_tab;
815 summary = tab_create (7, 3 + nxtab, 1);
816 tab_title (summary, _("Summary."));
817 tab_headers (summary, 1, 0, 3, 0);
818 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
819 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
820 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
821 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
822 tab_hline (summary, TAL_1, 1, 6, 1);
823 tab_hline (summary, TAL_1, 1, 6, 2);
824 tab_vline (summary, TAL_1, 3, 1, 1);
825 tab_vline (summary, TAL_1, 5, 1, 1);
829 for (i = 0; i < 3; i++)
831 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
832 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
835 tab_offset (summary, 0, 3);
841 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
845 while (cur_tab < (*pb)->table)
846 insert_summary (summary, cur_tab++, 0.);
849 for (valid = 0.; pb < pe; pb++)
850 valid += (*pb)->u.freq;
853 const struct crosstab *const x = xtab[(*pb)->table];
854 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
855 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
856 const int count = n_cols * n_rows;
858 for (valid = 0.; pb < pe; pb++)
860 const double *data = (*pb)->u.data;
863 for (i = 0; i < count; i++)
867 insert_summary (summary, cur_tab++, valid);
872 while (cur_tab < nxtab)
873 insert_summary (summary, cur_tab++, 0.);
878 /* Inserts a line into T describing the crosstabulation at index
879 TAB_INDEX, which has VALID valid observations. */
881 insert_summary (struct tab_table *t, int tab_index, double valid)
883 struct crosstab *x = xtab[tab_index];
885 tab_hline (t, TAL_1, 0, 6, 0);
887 /* Crosstabulation name. */
889 char *buf = local_alloc (128 * x->nvar);
893 for (i = 0; i < x->nvar; i++)
896 cp = stpcpy (cp, " * ");
899 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
901 tab_text (t, 0, 0, TAB_LEFT, buf);
906 /* Counts and percentages. */
916 for (i = 0; i < 3; i++)
918 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
919 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
930 static struct tab_table *table; /* Crosstabulation table. */
931 static struct tab_table *chisq; /* Chi-square table. */
932 static struct tab_table *sym; /* Symmetric measures table. */
933 static struct tab_table *risk; /* Risk estimate table. */
934 static struct tab_table *direct; /* Directional measures table. */
937 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
939 /* Column values, number of columns. */
940 static union value *cols;
943 /* Row values, number of rows. */
944 static union value *rows;
947 /* Number of statistically interesting columns/rows (columns/rows with
949 static int ns_cols, ns_rows;
951 /* Crosstabulation. */
952 static struct crosstab *x;
954 /* Number of variables from the crosstabulation to consider. This is
955 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
958 /* Matrix contents. */
959 static double *mat; /* Matrix proper. */
960 static double *row_tot; /* Row totals. */
961 static double *col_tot; /* Column totals. */
962 static double W; /* Grand total. */
964 static void display_dimensions (struct tab_table *, int first_difference,
965 struct table_entry *);
966 static void display_crosstabulation (void);
967 static void display_chisq (void);
968 static void display_symmetric (void);
969 static void display_risk (void);
970 static void display_directional (void);
971 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
972 static void table_value_missing (struct tab_table *table, int c, int r,
973 unsigned char opt, const union value *v,
974 const struct variable *var);
975 static void delete_missing (void);
977 /* Output pivot table beginning at PB and continuing until PE,
978 exclusive. For efficiency, *MATP is a pointer to a matrix that can
979 hold *MAXROWS entries. */
981 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
982 double **matp, double **row_totp, double **col_totp,
983 int *maxrows, int *maxcols, int *maxcells)
986 struct table_entry **tb = pb, **te; /* Table begin, table end. */
987 int tc = pe - pb; /* Table count. */
989 /* Table entry for header comparison. */
990 struct table_entry *cmp = NULL;
992 x = xtab[(*pb)->table];
993 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
995 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
997 /* Crosstabulation table initialization. */
1000 table = tab_create (nvar + n_cols,
1001 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1002 tab_headers (table, nvar - 1, 0, 2, 0);
1004 /* First header line. */
1005 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1006 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1008 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1010 /* Second header line. */
1014 for (i = 2; i < nvar; i++)
1015 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1016 TAB_RIGHT | TAT_TITLE,
1018 ? x->vars[i]->label : x->vars[i]->name));
1019 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1020 x->vars[ROW_VAR]->name);
1021 for (i = 0; i < n_cols; i++)
1022 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1024 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1027 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1028 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1032 char *title = local_alloc (x->nvar * 64 + 128);
1036 if (cmd.pivot == CRS_PIVOT)
1037 for (i = 0; i < nvar; i++)
1040 cp = stpcpy (cp, " by ");
1041 cp = stpcpy (cp, x->vars[i]->name);
1045 cp = spprintf (cp, "%s by %s for",
1046 x->vars[0]->name, x->vars[1]->name);
1047 for (i = 2; i < nvar; i++)
1049 char buf[64], *bufp;
1054 cp = stpcpy (cp, x->vars[i]->name);
1056 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1057 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1059 cp = stpcpy (cp, bufp);
1063 cp = stpcpy (cp, " [");
1064 for (i = 0; i < num_cells; i++)
1072 static const struct tuple cell_names[] =
1074 {CRS_CL_COUNT, N_("count")},
1075 {CRS_CL_ROW, N_("row %")},
1076 {CRS_CL_COLUMN, N_("column %")},
1077 {CRS_CL_TOTAL, N_("total %")},
1078 {CRS_CL_EXPECTED, N_("expected")},
1079 {CRS_CL_RESIDUAL, N_("residual")},
1080 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1081 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1085 const struct tuple *t;
1087 for (t = cell_names; t->value != cells[i]; t++)
1088 assert (t->value != -1);
1090 cp = stpcpy (cp, ", ");
1091 cp = stpcpy (cp, gettext (t->name));
1095 tab_title (table, "%s", title);
1099 tab_offset (table, 0, 2);
1104 /* Chi-square table initialization. */
1105 if (cmd.a_statistics[CRS_ST_CHISQ])
1107 chisq = tab_create (6 + (nvar - 2),
1108 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1109 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1111 tab_title (chisq, _("Chi-square tests."));
1113 tab_offset (chisq, nvar - 2, 0);
1114 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1115 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1116 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1117 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1118 _("Asymp. Sig. (2-sided)"));
1119 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1120 _("Exact. Sig. (2-sided)"));
1121 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1122 _("Exact. Sig. (1-sided)"));
1124 tab_offset (chisq, 0, 1);
1129 /* Symmetric measures. */
1130 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1131 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1132 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1133 || cmd.a_statistics[CRS_ST_KAPPA])
1135 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1136 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1137 tab_title (sym, _("Symmetric measures."));
1139 tab_offset (sym, nvar - 2, 0);
1140 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1141 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1142 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1143 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1144 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1145 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1146 tab_offset (sym, 0, 1);
1151 /* Risk estimate. */
1152 if (cmd.a_statistics[CRS_ST_RISK])
1154 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1155 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1156 tab_title (risk, _("Risk estimate."));
1158 tab_offset (risk, nvar - 2, 0);
1159 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1160 _("95%% Confidence Interval"));
1161 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1162 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1163 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1164 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1165 tab_hline (risk, TAL_1, 2, 3, 1);
1166 tab_vline (risk, TAL_1, 2, 0, 1);
1167 tab_offset (risk, 0, 2);
1172 /* Directional measures. */
1173 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1174 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1176 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1177 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1178 tab_title (direct, _("Directional measures."));
1180 tab_offset (direct, nvar - 2, 0);
1181 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1182 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1183 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1184 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1185 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1186 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1187 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1188 tab_offset (direct, 0, 1);
1195 /* Find pivot subtable if applicable. */
1196 te = find_pivot_extent (tb, &tc, 0);
1200 /* Find all the row variable values. */
1201 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1203 /* Allocate memory space for the column and row totals. */
1204 if (n_rows > *maxrows)
1206 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1207 row_tot = *row_totp;
1210 if (n_cols > *maxcols)
1212 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1213 col_tot = *col_totp;
1217 /* Allocate table space for the matrix. */
1218 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1219 tab_realloc (table, -1,
1220 max (tab_nr (table) + (n_rows + 1) * num_cells,
1221 tab_nr (table) * (pe - pb) / (te - tb)));
1223 if (mode == GENERAL)
1225 /* Allocate memory space for the matrix. */
1226 if (n_cols * n_rows > *maxcells)
1228 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1229 *maxcells = n_cols * n_rows;
1234 /* Build the matrix and calculate column totals. */
1236 union value *cur_col = cols;
1237 union value *cur_row = rows;
1239 double *cp = col_tot;
1240 struct table_entry **p;
1243 for (p = &tb[0]; p < te; p++)
1245 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1249 for (; cur_row < &rows[n_rows]; cur_row++)
1255 mp = &mat[cur_col - cols];
1258 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1265 *cp += *mp = (*p)->u.freq;
1270 /* Zero out the rest of the matrix. */
1271 for (; cur_row < &rows[n_rows]; cur_row++)
1277 if (cur_col < &cols[n_cols])
1279 const int rem_cols = n_cols - (cur_col - cols);
1282 for (c = 0; c < rem_cols; c++)
1284 mp = &mat[cur_col - cols];
1285 for (r = 0; r < n_rows; r++)
1287 for (c = 0; c < rem_cols; c++)
1289 mp += n_cols - rem_cols;
1297 double *tp = col_tot;
1299 assert (mode == INTEGER);
1300 mat = (*tb)->u.data;
1303 /* Calculate column totals. */
1304 for (c = 0; c < n_cols; c++)
1307 double *cp = &mat[c];
1309 for (r = 0; r < n_rows; r++)
1310 cum += cp[r * n_cols];
1318 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1319 ns_cols += *cp != 0.;
1322 /* Calculate row totals. */
1325 double *rp = row_tot;
1328 for (ns_rows = 0, r = n_rows; r--; )
1331 for (c = n_cols; c--; )
1339 /* Calculate grand total. */
1345 if (n_rows < n_cols)
1346 tp = row_tot, n = n_rows;
1348 tp = col_tot, n = n_cols;
1354 /* Find the first variable that differs from the last subtable,
1355 then display the values of the dimensioning variables for
1356 each table that needs it. */
1358 int first_difference = nvar - 1;
1361 for (; ; first_difference--)
1363 assert (first_difference >= 2);
1364 if (memcmp (&cmp->values[first_difference],
1365 &(*tb)->values[first_difference],
1366 sizeof *cmp->values))
1372 display_dimensions (table, first_difference, *tb);
1374 display_dimensions (chisq, first_difference, *tb);
1376 display_dimensions (sym, first_difference, *tb);
1378 display_dimensions (risk, first_difference, *tb);
1380 display_dimensions (direct, first_difference, *tb);
1384 display_crosstabulation ();
1385 if (cmd.miss == CRS_REPORT)
1390 display_symmetric ();
1394 display_directional ();
1405 tab_resize (chisq, 4 + (nvar - 2), -1);
1416 /* Delete missing rows and columns for statistical analysis when
1419 delete_missing (void)
1424 for (r = 0; r < n_rows; r++)
1425 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1429 for (c = 0; c < n_cols; c++)
1430 mat[c + r * n_cols] = 0.;
1438 for (c = 0; c < n_cols; c++)
1439 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1443 for (r = 0; r < n_rows; r++)
1444 mat[c + r * n_cols] = 0.;
1450 /* Prepare table T for submission, and submit it. */
1452 submit (struct tab_table *t)
1459 tab_resize (t, -1, 0);
1460 if (tab_nr (t) == tab_t (t))
1465 tab_offset (t, 0, 0);
1467 for (i = 2; i < nvar; i++)
1468 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1469 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1470 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1471 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1473 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1475 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1476 tab_dim (t, crosstabs_dim);
1480 /* Sets the widths of all the columns and heights of all the rows in
1481 table T for driver D. */
1483 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1487 /* Width of a numerical column. */
1488 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1489 if (cmd.miss == CRS_REPORT)
1490 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1492 /* Set width for header columns. */
1498 w = d->width - c * (t->nc - t->l);
1499 for (i = 0; i <= t->nc; i++)
1503 if (w < d->prop_em_width * 8)
1504 w = d->prop_em_width * 8;
1506 if (w > d->prop_em_width * 15)
1507 w = d->prop_em_width * 15;
1509 for (i = 0; i < t->l; i++)
1513 for (i = t->l; i < t->nc; i++)
1516 for (i = 0; i < t->nr; i++)
1517 t->h[i] = tab_natural_height (t, d, i);
1520 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1521 int *cnt, int pivot);
1522 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1523 int *cnt, int pivot);
1525 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1527 static struct table_entry **
1528 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1530 return (mode == GENERAL
1531 ? find_pivot_extent_general (tp, cnt, pivot)
1532 : find_pivot_extent_integer (tp, cnt, pivot));
1535 /* Find the extent of a region in TP that contains one table. If
1536 PIVOT != 0 that means a set of table entries with identical table
1537 number; otherwise they also have to have the same values for every
1538 dimension after the row and column dimensions. The table that is
1539 searched starts at TP and has length CNT. Returns the first entry
1540 after the last one in the table; sets *CNT to the number of
1541 remaining values. If there are no entries in TP at all, returns
1542 NULL. A yucky interface, admittedly, but it works. */
1543 static struct table_entry **
1544 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1546 struct table_entry *fp = *tp;
1551 x = xtab[(*tp)->table];
1559 if ((*tp)->table != fp->table)
1564 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1571 /* Integer mode correspondent to find_pivot_extent_general(). This
1572 could be optimized somewhat, but I just don't give a crap about
1573 CROSSTABS performance in integer mode, which is just a
1574 CROSSTABS wart as far as I'm concerned.
1576 That said, feel free to send optimization patches to me. */
1577 static struct table_entry **
1578 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1580 struct table_entry *fp = *tp;
1585 x = xtab[(*tp)->table];
1593 if ((*tp)->table != fp->table)
1598 if (memcmp (&(*tp)->values[2], &fp->values[2],
1599 sizeof (union value) * (x->nvar - 2)))
1606 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1607 result. WIDTH_ points to an int which is either 0 for a
1608 numeric value or a string width for a string value. */
1610 compare_value (const void *a_, const void *b_, void *width_)
1612 const union value *a = a_;
1613 const union value *b = b_;
1614 const int *pwidth = width_;
1615 const int width = *pwidth;
1618 return (a->f < b->f) ? -1 : (a->f > b->f);
1620 return strncmp (a->s, b->s, width);
1623 /* Given an array of ENTRY_CNT table_entry structures starting at
1624 ENTRIES, creates a sorted list of the values that the variable
1625 with index VAR_IDX takes on. The values are returned as a
1626 malloc()'darray stored in *VALUES, with the number of values
1627 stored in *VALUE_CNT.
1630 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1631 union value **values, int *value_cnt)
1633 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1635 if (mode == GENERAL)
1637 int width = v->width;
1640 *values = xnmalloc (entry_cnt, sizeof **values);
1641 for (i = 0; i < entry_cnt; i++)
1642 (*values)[i] = entries[i]->values[var_idx];
1643 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1644 compare_value, &width);
1648 struct var_range *vr = get_var_range (v);
1651 assert (mode == INTEGER);
1652 *values = xnmalloc (vr->count, sizeof **values);
1653 for (i = 0; i < vr->count; i++)
1654 (*values)[i].f = i + vr->min;
1655 *value_cnt = vr->count;
1659 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1660 from V, displayed with print format spec from variable VAR. When
1661 in REPORT missing-value mode, missing values have an M appended. */
1663 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1664 const union value *v, const struct variable *var)
1668 const char *label = val_labs_find (var->val_labs, *v);
1671 tab_text (table, c, r, TAB_LEFT, label);
1675 s.string = tab_alloc (table, var->print.w);
1676 format_short (s.string, &var->print, v);
1677 s.length = strlen (s.string);
1678 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1679 s.string[s.length++] = 'M';
1680 while (s.length && *s.string == ' ')
1685 tab_raw (table, c, r, opt, &s);
1688 /* Draws a line across TABLE at the current row to indicate the most
1689 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1690 that changed, and puts the values that changed into the table. TB
1691 and X must be the corresponding table_entry and crosstab,
1694 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1696 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1698 for (; first_difference >= 2; first_difference--)
1699 table_value_missing (table, nvar - first_difference - 1, 0,
1700 TAB_RIGHT, &tb->values[first_difference],
1701 x->vars[first_difference]);
1704 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1705 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1706 additionally suffixed with a letter `M'. */
1708 format_cell_entry (struct tab_table *table, int c, int r, double value,
1709 char suffix, int mark_missing)
1711 const struct fmt_spec f = {FMT_F, 10, 1};
1716 s.string = tab_alloc (table, 16);
1718 data_out (s.string, &f, &v);
1719 while (*s.string == ' ')
1725 s.string[s.length++] = suffix;
1727 s.string[s.length++] = 'M';
1729 tab_raw (table, c, r, TAB_RIGHT, &s);
1732 /* Displays the crosstabulation table. */
1734 display_crosstabulation (void)
1739 for (r = 0; r < n_rows; r++)
1740 table_value_missing (table, nvar - 2, r * num_cells,
1741 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1743 tab_text (table, nvar - 2, n_rows * num_cells,
1744 TAB_LEFT, _("Total"));
1746 /* Put in the actual cells. */
1751 tab_offset (table, nvar - 1, -1);
1752 for (r = 0; r < n_rows; r++)
1755 tab_hline (table, TAL_1, -1, n_cols, 0);
1756 for (c = 0; c < n_cols; c++)
1758 int mark_missing = 0;
1759 double expected_value = row_tot[r] * col_tot[c] / W;
1760 if (cmd.miss == CRS_REPORT
1761 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1762 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1765 for (i = 0; i < num_cells; i++)
1776 v = *mp / row_tot[r] * 100.;
1780 v = *mp / col_tot[c] * 100.;
1787 case CRS_CL_EXPECTED:
1790 case CRS_CL_RESIDUAL:
1791 v = *mp - expected_value;
1793 case CRS_CL_SRESIDUAL:
1794 v = (*mp - expected_value) / sqrt (expected_value);
1796 case CRS_CL_ASRESIDUAL:
1797 v = ((*mp - expected_value)
1798 / sqrt (expected_value
1799 * (1. - row_tot[r] / W)
1800 * (1. - col_tot[c] / W)));
1806 format_cell_entry (table, c, i, v, suffix, mark_missing);
1812 tab_offset (table, -1, tab_row (table) + num_cells);
1820 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1821 for (r = 0; r < n_rows; r++)
1824 int mark_missing = 0;
1826 if (cmd.miss == CRS_REPORT
1827 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1830 for (i = 0; i < num_cells; i++)
1844 v = row_tot[r] / W * 100.;
1848 v = row_tot[r] / W * 100.;
1851 case CRS_CL_EXPECTED:
1852 case CRS_CL_RESIDUAL:
1853 case CRS_CL_SRESIDUAL:
1854 case CRS_CL_ASRESIDUAL:
1861 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1862 tab_next_row (table);
1867 /* Column totals, grand total. */
1873 tab_hline (table, TAL_1, -1, n_cols, 0);
1874 for (c = 0; c <= n_cols; c++)
1876 double ct = c < n_cols ? col_tot[c] : W;
1877 int mark_missing = 0;
1881 if (cmd.miss == CRS_REPORT && c < n_cols
1882 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1885 for (i = 0; i < num_cells; i++)
1907 case CRS_CL_EXPECTED:
1908 case CRS_CL_RESIDUAL:
1909 case CRS_CL_SRESIDUAL:
1910 case CRS_CL_ASRESIDUAL:
1916 format_cell_entry (table, c, i, v, suffix, mark_missing);
1921 tab_offset (table, -1, tab_row (table) + last_row);
1924 tab_offset (table, 0, -1);
1927 static void calc_r (double *X, double *Y, double *, double *, double *);
1928 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1930 /* Display chi-square statistics. */
1932 display_chisq (void)
1934 static const char *chisq_stats[N_CHISQ] =
1936 N_("Pearson Chi-Square"),
1937 N_("Likelihood Ratio"),
1938 N_("Fisher's Exact Test"),
1939 N_("Continuity Correction"),
1940 N_("Linear-by-Linear Association"),
1942 double chisq_v[N_CHISQ];
1943 double fisher1, fisher2;
1949 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1951 tab_offset (chisq, nvar - 2, -1);
1953 for (i = 0; i < N_CHISQ; i++)
1955 if ((i != 2 && chisq_v[i] == SYSMIS)
1956 || (i == 2 && fisher1 == SYSMIS))
1960 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1963 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1964 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1965 tab_float (chisq, 3, 0, TAB_RIGHT,
1966 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1971 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1972 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1974 tab_next_row (chisq);
1977 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1978 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1979 tab_next_row (chisq);
1981 tab_offset (chisq, 0, -1);
1984 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1985 double[N_SYMMETRIC]);
1987 /* Display symmetric measures. */
1989 display_symmetric (void)
1991 static const char *categories[] =
1993 N_("Nominal by Nominal"),
1994 N_("Ordinal by Ordinal"),
1995 N_("Interval by Interval"),
1996 N_("Measure of Agreement"),
1999 static const char *stats[N_SYMMETRIC] =
2003 N_("Contingency Coefficient"),
2004 N_("Kendall's tau-b"),
2005 N_("Kendall's tau-c"),
2007 N_("Spearman Correlation"),
2012 static const int stats_categories[N_SYMMETRIC] =
2014 0, 0, 0, 1, 1, 1, 1, 2, 3,
2018 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2021 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2024 tab_offset (sym, nvar - 2, -1);
2026 for (i = 0; i < N_SYMMETRIC; i++)
2028 if (sym_v[i] == SYSMIS)
2031 if (stats_categories[i] != last_cat)
2033 last_cat = stats_categories[i];
2034 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2037 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2038 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2039 if (sym_ase[i] != SYSMIS)
2040 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2041 if (sym_t[i] != SYSMIS)
2042 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2043 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2047 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2048 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2051 tab_offset (sym, 0, -1);
2054 static int calc_risk (double[], double[], double[], union value *);
2056 /* Display risk estimate. */
2061 double risk_v[3], lower[3], upper[3];
2065 if (!calc_risk (risk_v, upper, lower, c))
2068 tab_offset (risk, nvar - 2, -1);
2070 for (i = 0; i < 3; i++)
2072 if (risk_v[i] == SYSMIS)
2078 if (x->vars[COL_VAR]->type == NUMERIC)
2079 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2080 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2082 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2083 x->vars[COL_VAR]->name,
2084 x->vars[COL_VAR]->width, c[0].s,
2085 x->vars[COL_VAR]->width, c[1].s);
2089 if (x->vars[ROW_VAR]->type == NUMERIC)
2090 sprintf (buf, _("For cohort %s = %g"),
2091 x->vars[ROW_VAR]->name, rows[i - 1].f);
2093 sprintf (buf, _("For cohort %s = %.*s"),
2094 x->vars[ROW_VAR]->name,
2095 x->vars[ROW_VAR]->width, rows[i - 1].s);
2099 tab_text (risk, 0, 0, TAB_LEFT, buf);
2100 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2101 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2102 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2103 tab_next_row (risk);
2106 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2107 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2108 tab_next_row (risk);
2110 tab_offset (risk, 0, -1);
2113 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2114 double[N_DIRECTIONAL]);
2116 /* Display directional measures. */
2118 display_directional (void)
2120 static const char *categories[] =
2122 N_("Nominal by Nominal"),
2123 N_("Ordinal by Ordinal"),
2124 N_("Nominal by Interval"),
2127 static const char *stats[] =
2130 N_("Goodman and Kruskal tau"),
2131 N_("Uncertainty Coefficient"),
2136 static const char *types[] =
2143 static const int stats_categories[N_DIRECTIONAL] =
2145 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2148 static const int stats_stats[N_DIRECTIONAL] =
2150 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2153 static const int stats_types[N_DIRECTIONAL] =
2155 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2158 static const int *stats_lookup[] =
2165 static const char **stats_names[] =
2177 double direct_v[N_DIRECTIONAL];
2178 double direct_ase[N_DIRECTIONAL];
2179 double direct_t[N_DIRECTIONAL];
2183 if (!calc_directional (direct_v, direct_ase, direct_t))
2186 tab_offset (direct, nvar - 2, -1);
2188 for (i = 0; i < N_DIRECTIONAL; i++)
2190 if (direct_v[i] == SYSMIS)
2196 for (j = 0; j < 3; j++)
2197 if (last[j] != stats_lookup[j][i])
2200 tab_hline (direct, TAL_1, j, 6, 0);
2205 int k = last[j] = stats_lookup[j][i];
2210 string = x->vars[0]->name;
2212 string = x->vars[1]->name;
2214 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2215 gettext (stats_names[j][k]), string);
2220 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2221 if (direct_ase[i] != SYSMIS)
2222 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2223 if (direct_t[i] != SYSMIS)
2224 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2225 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2226 tab_next_row (direct);
2229 tab_offset (direct, 0, -1);
2232 /* Statistical calculations. */
2234 /* Returns the value of the gamma (factorial) function for an integer
2237 gamma_int (double x)
2242 for (i = 2; i < x; i++)
2247 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2249 static inline double
2250 Pr (int a, int b, int c, int d)
2252 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2253 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2254 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2255 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2256 / gamma_int (a + b + c + d + 1.));
2259 /* Swap the contents of A and B. */
2261 swap (int *a, int *b)
2268 /* Calculate significance for Fisher's exact test as specified in
2269 _SPSS Statistical Algorithms_, Appendix 5. */
2271 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2275 if (min (c, d) < min (a, b))
2276 swap (&a, &c), swap (&b, &d);
2277 if (min (b, d) < min (a, c))
2278 swap (&a, &b), swap (&c, &d);
2282 swap (&a, &b), swap (&c, &d);
2284 swap (&a, &c), swap (&b, &d);
2288 for (x = 0; x <= a; x++)
2289 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2291 *fisher2 = *fisher1;
2292 for (x = 1; x <= b; x++)
2293 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2296 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2297 columns with values COLS and N_ROWS rows with values ROWS. Values
2298 in the matrix sum to W. */
2300 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2301 double *fisher1, double *fisher2)
2305 chisq[0] = chisq[1] = 0.;
2306 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2307 *fisher1 = *fisher2 = SYSMIS;
2309 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2311 if (ns_rows <= 1 || ns_cols <= 1)
2313 chisq[0] = chisq[1] = SYSMIS;
2317 for (r = 0; r < n_rows; r++)
2318 for (c = 0; c < n_cols; c++)
2320 const double expected = row_tot[r] * col_tot[c] / W;
2321 const double freq = mat[n_cols * r + c];
2322 const double residual = freq - expected;
2324 chisq[0] += residual * residual / expected;
2326 chisq[1] += freq * log (expected / freq);
2337 /* Calculate Yates and Fisher exact test. */
2338 if (ns_cols == 2 && ns_rows == 2)
2340 double f11, f12, f21, f22;
2346 for (i = j = 0; i < n_cols; i++)
2347 if (col_tot[i] != 0.)
2356 f11 = mat[nz_cols[0]];
2357 f12 = mat[nz_cols[1]];
2358 f21 = mat[nz_cols[0] + n_cols];
2359 f22 = mat[nz_cols[1] + n_cols];
2364 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2367 chisq[3] = (W * x * x
2368 / (f11 + f12) / (f21 + f22)
2369 / (f11 + f21) / (f12 + f22));
2377 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2378 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2381 /* Calculate Mantel-Haenszel. */
2382 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2384 double r, ase_0, ase_1;
2385 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2387 chisq[4] = (W - 1.) * r * r;
2392 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2393 ASE_1, and ase_0 into ASE_0. The row and column values must be
2394 passed in X and Y. */
2396 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2398 double SX, SY, S, T;
2400 double sum_XYf, sum_X2Y2f;
2401 double sum_Xr, sum_X2r;
2402 double sum_Yc, sum_Y2c;
2405 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2406 for (j = 0; j < n_cols; j++)
2408 double fij = mat[j + i * n_cols];
2409 double product = X[i] * Y[j];
2410 double temp = fij * product;
2412 sum_X2Y2f += temp * product;
2415 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2417 sum_Xr += X[i] * row_tot[i];
2418 sum_X2r += X[i] * X[i] * row_tot[i];
2422 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2424 sum_Yc += Y[i] * col_tot[i];
2425 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2429 S = sum_XYf - sum_Xr * sum_Yc / W;
2430 SX = sum_X2r - sum_Xr * sum_Xr / W;
2431 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2434 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2439 for (s = c = 0., i = 0; i < n_rows; i++)
2440 for (j = 0; j < n_cols; j++)
2442 double Xresid, Yresid;
2445 Xresid = X[i] - Xbar;
2446 Yresid = Y[j] - Ybar;
2447 temp = (T * Xresid * Yresid
2449 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2450 y = mat[j + i * n_cols] * temp * temp - c;
2455 *ase_1 = sqrt (s) / (T * T);
2459 static double somers_d_v[3];
2460 static double somers_d_ase[3];
2461 static double somers_d_t[3];
2463 /* Calculate symmetric statistics and their asymptotic standard
2464 errors. Returns 0 if none could be calculated. */
2466 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2467 double t[N_SYMMETRIC])
2469 int q = min (ns_rows, ns_cols);
2478 for (i = 0; i < N_SYMMETRIC; i++)
2479 v[i] = ase[i] = t[i] = SYSMIS;
2482 /* Phi, Cramer's V, contingency coefficient. */
2483 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2485 double Xp = 0.; /* Pearson chi-square. */
2490 for (r = 0; r < n_rows; r++)
2491 for (c = 0; c < n_cols; c++)
2493 const double expected = row_tot[r] * col_tot[c] / W;
2494 const double freq = mat[n_cols * r + c];
2495 const double residual = freq - expected;
2497 Xp += residual * residual / expected;
2501 if (cmd.a_statistics[CRS_ST_PHI])
2503 v[0] = sqrt (Xp / W);
2504 v[1] = sqrt (Xp / (W * (q - 1)));
2506 if (cmd.a_statistics[CRS_ST_CC])
2507 v[2] = sqrt (Xp / (Xp + W));
2510 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2511 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2516 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2523 for (r = 0; r < n_rows; r++)
2524 Dr -= row_tot[r] * row_tot[r];
2525 for (c = 0; c < n_cols; c++)
2526 Dc -= col_tot[c] * col_tot[c];
2532 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2533 for (c = 0; c < n_cols; c++)
2537 for (r = 0; r < n_rows; r++)
2538 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2548 for (i = 0; i < n_rows; i++)
2552 for (j = 1; j < n_cols; j++)
2553 Cij += col_tot[j] - cum[j + i * n_cols];
2556 for (j = 1; j < n_cols; j++)
2557 Dij += cum[j + (i - 1) * n_cols];
2561 double fij = mat[j + i * n_cols];
2567 assert (j < n_cols);
2569 Cij -= col_tot[j] - cum[j + i * n_cols];
2570 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2574 Cij += cum[j - 1 + (i - 1) * n_cols];
2575 Dij -= cum[j + (i - 1) * n_cols];
2581 if (cmd.a_statistics[CRS_ST_BTAU])
2582 v[3] = (P - Q) / sqrt (Dr * Dc);
2583 if (cmd.a_statistics[CRS_ST_CTAU])
2584 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2585 if (cmd.a_statistics[CRS_ST_GAMMA])
2586 v[5] = (P - Q) / (P + Q);
2588 /* ASE for tau-b, tau-c, gamma. Calculations could be
2589 eliminated here, at expense of memory. */
2594 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2595 for (i = 0; i < n_rows; i++)
2599 for (j = 1; j < n_cols; j++)
2600 Cij += col_tot[j] - cum[j + i * n_cols];
2603 for (j = 1; j < n_cols; j++)
2604 Dij += cum[j + (i - 1) * n_cols];
2608 double fij = mat[j + i * n_cols];
2610 if (cmd.a_statistics[CRS_ST_BTAU])
2612 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2613 + v[3] * (row_tot[i] * Dc
2614 + col_tot[j] * Dr));
2615 btau_cum += fij * temp * temp;
2619 const double temp = Cij - Dij;
2620 ctau_cum += fij * temp * temp;
2623 if (cmd.a_statistics[CRS_ST_GAMMA])
2625 const double temp = Q * Cij - P * Dij;
2626 gamma_cum += fij * temp * temp;
2629 if (cmd.a_statistics[CRS_ST_D])
2631 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2632 - (P - Q) * (W - row_tot[i]));
2633 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2634 - (Q - P) * (W - col_tot[j]));
2639 assert (j < n_cols);
2641 Cij -= col_tot[j] - cum[j + i * n_cols];
2642 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2646 Cij += cum[j - 1 + (i - 1) * n_cols];
2647 Dij -= cum[j + (i - 1) * n_cols];
2653 btau_var = ((btau_cum
2654 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2656 if (cmd.a_statistics[CRS_ST_BTAU])
2658 ase[3] = sqrt (btau_var);
2659 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2662 if (cmd.a_statistics[CRS_ST_CTAU])
2664 ase[4] = ((2 * q / ((q - 1) * W * W))
2665 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2666 t[4] = v[4] / ase[4];
2668 if (cmd.a_statistics[CRS_ST_GAMMA])
2670 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2671 t[5] = v[5] / (2. / (P + Q)
2672 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2674 if (cmd.a_statistics[CRS_ST_D])
2676 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2677 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2678 somers_d_t[0] = (somers_d_v[0]
2680 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2681 somers_d_v[1] = (P - Q) / Dc;
2682 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2683 somers_d_t[1] = (somers_d_v[1]
2685 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2686 somers_d_v[2] = (P - Q) / Dr;
2687 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2688 somers_d_t[2] = (somers_d_v[2]
2690 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2696 /* Spearman correlation, Pearson's r. */
2697 if (cmd.a_statistics[CRS_ST_CORR])
2699 double *R = local_alloc (sizeof *R * n_rows);
2700 double *C = local_alloc (sizeof *C * n_cols);
2703 double y, t, c = 0., s = 0.;
2708 R[i] = s + (row_tot[i] + 1.) / 2.;
2715 assert (i < n_rows);
2720 double y, t, c = 0., s = 0.;
2725 C[j] = s + (col_tot[j] + 1.) / 2;
2732 assert (j < n_cols);
2736 calc_r (R, C, &v[6], &t[6], &ase[6]);
2742 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2746 /* Cohen's kappa. */
2747 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2749 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2752 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2753 i < ns_rows; i++, j++)
2757 while (col_tot[j] == 0.)
2760 prod = row_tot[i] * col_tot[j];
2761 sum = row_tot[i] + col_tot[j];
2763 sum_fii += mat[j + i * n_cols];
2765 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2766 sum_riciri_ci += prod * sum;
2768 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2769 for (j = 0; j < ns_cols; j++)
2771 double sum = row_tot[i] + col_tot[j];
2772 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2775 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2777 ase[8] = sqrt ((W * W * sum_rici
2778 + sum_rici * sum_rici
2779 - W * sum_riciri_ci)
2780 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2782 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2783 / pow2 (W * W - sum_rici))
2784 + ((2. * (W - sum_fii)
2785 * (2. * sum_fii * sum_rici
2786 - W * sum_fiiri_ci))
2787 / cube (W * W - sum_rici))
2788 + (pow2 (W - sum_fii)
2789 * (W * sum_fijri_ci2 - 4.
2790 * sum_rici * sum_rici)
2791 / pow4 (W * W - sum_rici))));
2793 t[8] = v[8] / ase[8];
2800 /* Calculate risk estimate. */
2802 calc_risk (double *value, double *upper, double *lower, union value *c)
2804 double f11, f12, f21, f22;
2810 for (i = 0; i < 3; i++)
2811 value[i] = upper[i] = lower[i] = SYSMIS;
2814 if (ns_rows != 2 || ns_cols != 2)
2821 for (i = j = 0; i < n_cols; i++)
2822 if (col_tot[i] != 0.)
2831 f11 = mat[nz_cols[0]];
2832 f12 = mat[nz_cols[1]];
2833 f21 = mat[nz_cols[0] + n_cols];
2834 f22 = mat[nz_cols[1] + n_cols];
2836 c[0] = cols[nz_cols[0]];
2837 c[1] = cols[nz_cols[1]];
2840 value[0] = (f11 * f22) / (f12 * f21);
2841 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2842 lower[0] = value[0] * exp (-1.960 * v);
2843 upper[0] = value[0] * exp (1.960 * v);
2845 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2846 v = sqrt ((f12 / (f11 * (f11 + f12)))
2847 + (f22 / (f21 * (f21 + f22))));
2848 lower[1] = value[1] * exp (-1.960 * v);
2849 upper[1] = value[1] * exp (1.960 * v);
2851 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2852 v = sqrt ((f11 / (f12 * (f11 + f12)))
2853 + (f21 / (f22 * (f21 + f22))));
2854 lower[2] = value[2] * exp (-1.960 * v);
2855 upper[2] = value[2] * exp (1.960 * v);
2860 /* Calculate directional measures. */
2862 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2863 double t[N_DIRECTIONAL])
2868 for (i = 0; i < N_DIRECTIONAL; i++)
2869 v[i] = ase[i] = t[i] = SYSMIS;
2873 if (cmd.a_statistics[CRS_ST_LAMBDA])
2875 double *fim = xnmalloc (n_rows, sizeof *fim);
2876 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2877 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2878 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2879 double sum_fim, sum_fmj;
2881 int rm_index, cm_index;
2884 /* Find maximum for each row and their sum. */
2885 for (sum_fim = 0., i = 0; i < n_rows; i++)
2887 double max = mat[i * n_cols];
2890 for (j = 1; j < n_cols; j++)
2891 if (mat[j + i * n_cols] > max)
2893 max = mat[j + i * n_cols];
2897 sum_fim += fim[i] = max;
2898 fim_index[i] = index;
2901 /* Find maximum for each column. */
2902 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2904 double max = mat[j];
2907 for (i = 1; i < n_rows; i++)
2908 if (mat[j + i * n_cols] > max)
2910 max = mat[j + i * n_cols];
2914 sum_fmj += fmj[j] = max;
2915 fmj_index[j] = index;
2918 /* Find maximum row total. */
2921 for (i = 1; i < n_rows; i++)
2922 if (row_tot[i] > rm)
2928 /* Find maximum column total. */
2931 for (j = 1; j < n_cols; j++)
2932 if (col_tot[j] > cm)
2938 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2939 v[1] = (sum_fmj - rm) / (W - rm);
2940 v[2] = (sum_fim - cm) / (W - cm);
2942 /* ASE1 for Y given X. */
2946 for (accum = 0., i = 0; i < n_rows; i++)
2947 for (j = 0; j < n_cols; j++)
2949 const int deltaj = j == cm_index;
2950 accum += (mat[j + i * n_cols]
2951 * pow2 ((j == fim_index[i])
2956 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2959 /* ASE0 for Y given X. */
2963 for (accum = 0., i = 0; i < n_rows; i++)
2964 if (cm_index != fim_index[i])
2965 accum += (mat[i * n_cols + fim_index[i]]
2966 + mat[i * n_cols + cm_index]);
2967 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2970 /* ASE1 for X given Y. */
2974 for (accum = 0., i = 0; i < n_rows; i++)
2975 for (j = 0; j < n_cols; j++)
2977 const int deltaj = i == rm_index;
2978 accum += (mat[j + i * n_cols]
2979 * pow2 ((i == fmj_index[j])
2984 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2987 /* ASE0 for X given Y. */
2991 for (accum = 0., j = 0; j < n_cols; j++)
2992 if (rm_index != fmj_index[j])
2993 accum += (mat[j + n_cols * fmj_index[j]]
2994 + mat[j + n_cols * rm_index]);
2995 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2998 /* Symmetric ASE0 and ASE1. */
3003 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3004 for (j = 0; j < n_cols; j++)
3006 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3007 int temp1 = (i == rm_index) + (j == cm_index);
3008 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3009 accum1 += (mat[j + i * n_cols]
3010 * pow2 (temp0 + (v[0] - 1.) * temp1));
3012 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3013 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3014 / (2. * W - rm - cm));
3023 double sum_fij2_ri, sum_fij2_ci;
3024 double sum_ri2, sum_cj2;
3026 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3027 for (j = 0; j < n_cols; j++)
3029 double temp = pow2 (mat[j + i * n_cols]);
3030 sum_fij2_ri += temp / row_tot[i];
3031 sum_fij2_ci += temp / col_tot[j];
3034 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3035 sum_ri2 += row_tot[i] * row_tot[i];
3037 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3038 sum_cj2 += col_tot[j] * col_tot[j];
3040 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3041 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3045 if (cmd.a_statistics[CRS_ST_UC])
3047 double UX, UY, UXY, P;
3048 double ase1_yx, ase1_xy, ase1_sym;
3051 for (UX = 0., i = 0; i < n_rows; i++)
3052 if (row_tot[i] > 0.)
3053 UX -= row_tot[i] / W * log (row_tot[i] / W);
3055 for (UY = 0., j = 0; j < n_cols; j++)
3056 if (col_tot[j] > 0.)
3057 UY -= col_tot[j] / W * log (col_tot[j] / W);
3059 for (UXY = P = 0., i = 0; i < n_rows; i++)
3060 for (j = 0; j < n_cols; j++)
3062 double entry = mat[j + i * n_cols];
3067 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3068 UXY -= entry / W * log (entry / W);
3071 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3072 for (j = 0; j < n_cols; j++)
3074 double entry = mat[j + i * n_cols];
3079 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3080 + (UX - UXY) * log (col_tot[j] / W));
3081 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3082 + (UY - UXY) * log (row_tot[i] / W));
3083 ase1_sym += entry * pow2 ((UXY
3084 * log (row_tot[i] * col_tot[j] / (W * W)))
3085 - (UX + UY) * log (entry / W));
3088 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3089 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3090 t[5] = v[5] / ((2. / (W * (UX + UY)))
3091 * sqrt (P - pow2 (UX + UY - UXY) / W));
3093 v[6] = (UX + UY - UXY) / UX;
3094 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3095 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3097 v[7] = (UX + UY - UXY) / UY;
3098 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3099 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3103 if (cmd.a_statistics[CRS_ST_D])
3108 calc_symmetric (NULL, NULL, NULL);
3109 for (i = 0; i < 3; i++)
3111 v[8 + i] = somers_d_v[i];
3112 ase[8 + i] = somers_d_ase[i];
3113 t[8 + i] = somers_d_t[i];
3118 if (cmd.a_statistics[CRS_ST_ETA])
3121 double sum_Xr, sum_X2r;
3125 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3127 sum_Xr += rows[i].f * row_tot[i];
3128 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3130 SX = sum_X2r - sum_Xr * sum_Xr / W;
3132 for (SXW = 0., j = 0; j < n_cols; j++)
3136 for (cum = 0., i = 0; i < n_rows; i++)
3138 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3139 cum += rows[i].f * mat[j + i * n_cols];
3142 SXW -= cum * cum / col_tot[j];
3144 v[11] = sqrt (1. - SXW / SX);
3148 double sum_Yc, sum_Y2c;
3152 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3154 sum_Yc += cols[i].f * col_tot[i];
3155 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3157 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3159 for (SYW = 0., i = 0; i < n_rows; i++)
3163 for (cum = 0., j = 0; j < n_cols; j++)
3165 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3166 cum += cols[j].f * mat[j + i * n_cols];
3169 SYW -= cum * cum / row_tot[i];
3171 v[12] = sqrt (1. - SYW / SY);
3178 /* A wrapper around data_out() that limits string output to short
3179 string width and null terminates the result. */
3181 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3183 struct fmt_spec fmt_subst;
3185 /* Limit to short string width. */
3186 if (formats[fp->type].cat & FCAT_STRING)
3190 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3191 if (fmt_subst.type == FMT_A)
3192 fmt_subst.w = min (8, fmt_subst.w);
3194 fmt_subst.w = min (16, fmt_subst.w);
3200 data_out (s, fp, v);
3202 /* Null terminate. */