1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
35 #include <gsl/gsl_cdf.h>
39 #include <data/case.h>
40 #include <data/dictionary.h>
41 #include <data/procedure.h>
42 #include <data/value-labels.h>
43 #include <data/variable.h>
44 #include <language/command.h>
45 #include <language/dictionary/split-file.h>
46 #include <language/lexer/lexer.h>
47 #include <language/lexer/variable-parser.h>
48 #include <libpspp/alloc.h>
49 #include <libpspp/array.h>
50 #include <libpspp/assertion.h>
51 #include <libpspp/compiler.h>
52 #include <libpspp/hash.h>
53 #include <libpspp/magic.h>
54 #include <libpspp/message.h>
55 #include <libpspp/message.h>
56 #include <libpspp/misc.h>
57 #include <libpspp/pool.h>
58 #include <libpspp/str.h>
59 #include <output/output.h>
60 #include <output/table.h>
63 #define _(msgid) gettext (msgid)
64 #define N_(msgid) msgid
72 missing=miss:!table/include/report;
73 +write[wr_]=none,cells,all;
74 +format=fmt:!labels/nolabels/novallabs,
77 tabl:!tables/notables,
80 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
82 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
88 /* Number of chi-square statistics. */
91 /* Number of symmetric statistics. */
94 /* Number of directional statistics. */
95 #define N_DIRECTIONAL 13
97 /* A single table entry for general mode. */
100 int table; /* Flattened table number. */
103 double freq; /* Frequency count. */
104 double *data; /* Crosstabulation table for integer mode. */
107 union value values[1]; /* Values. */
110 /* A crosstabulation. */
113 int nvar; /* Number of variables. */
114 double missing; /* Missing cases count. */
115 int ofs; /* Integer mode: Offset into sorted_tab[]. */
116 struct variable *vars[2]; /* At least two variables; sorted by
117 larger indices first. */
120 /* Integer mode variable info. */
123 int min; /* Minimum value. */
124 int max; /* Maximum value + 1. */
125 int count; /* max - min. */
128 static inline struct var_range *
129 get_var_range (struct variable *v)
132 assert (v->aux != NULL);
136 /* Indexes into crosstab.v. */
143 /* General mode crosstabulation table. */
144 static struct hsh_table *gen_tab; /* Hash table. */
145 static int n_sorted_tab; /* Number of entries in sorted_tab. */
146 static struct table_entry **sorted_tab; /* Sorted table. */
148 /* Variables specifies on VARIABLES. */
149 static struct variable **variables;
150 static size_t variables_cnt;
153 static struct crosstab **xtab;
156 /* Integer or general mode? */
165 static int num_cells; /* Number of cells requested. */
166 static int cells[8]; /* Cells requested. */
169 static int write; /* One of WR_* that specifies the WRITE style. */
171 /* Command parsing info. */
172 static struct cmd_crosstabs cmd;
175 static struct pool *pl_tc; /* For table cells. */
176 static struct pool *pl_col; /* For column data. */
178 static int internal_cmd_crosstabs (void);
179 static void precalc (const struct ccase *, void *);
180 static bool calc_general (const struct ccase *, void *);
181 static bool calc_integer (const struct ccase *, void *);
182 static void postcalc (void *);
183 static void submit (struct tab_table *);
185 static void format_short (char *s, const struct fmt_spec *fp,
186 const union value *v);
188 /* Parse and execute CROSSTABS, then clean up. */
192 int result = internal_cmd_crosstabs ();
195 pool_destroy (pl_tc);
196 pool_destroy (pl_col);
201 /* Parses and executes the CROSSTABS procedure. */
203 internal_cmd_crosstabs (void)
212 pl_tc = pool_create ();
213 pl_col = pool_create ();
215 if (!parse_crosstabs (&cmd, NULL))
218 mode = variables ? INTEGER : GENERAL;
223 cmd.a_cells[CRS_CL_COUNT] = 1;
229 for (i = 0; i < CRS_CL_count; i++)
234 cmd.a_cells[CRS_CL_COUNT] = 1;
235 cmd.a_cells[CRS_CL_ROW] = 1;
236 cmd.a_cells[CRS_CL_COLUMN] = 1;
237 cmd.a_cells[CRS_CL_TOTAL] = 1;
239 if (cmd.a_cells[CRS_CL_ALL])
241 for (i = 0; i < CRS_CL_count; i++)
243 cmd.a_cells[CRS_CL_ALL] = 0;
245 cmd.a_cells[CRS_CL_NONE] = 0;
247 for (num_cells = i = 0; i < CRS_CL_count; i++)
249 cells[num_cells++] = i;
252 if (cmd.sbc_statistics)
257 for (i = 0; i < CRS_ST_count; i++)
258 if (cmd.a_statistics[i])
261 cmd.a_statistics[CRS_ST_CHISQ] = 1;
262 if (cmd.a_statistics[CRS_ST_ALL])
263 for (i = 0; i < CRS_ST_count; i++)
264 cmd.a_statistics[i] = 1;
268 if (cmd.miss == CRS_REPORT && mode == GENERAL)
270 msg (SE, _("Missing mode REPORT not allowed in general mode. "
271 "Assuming MISSING=TABLE."));
272 cmd.miss = CRS_TABLE;
276 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
277 cmd.a_write[CRS_WR_ALL] = 0;
278 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
280 msg (SE, _("Write mode ALL not allowed in general mode. "
281 "Assuming WRITE=CELLS."));
282 cmd.a_write[CRS_WR_CELLS] = 1;
285 && (cmd.a_write[CRS_WR_NONE]
286 + cmd.a_write[CRS_WR_ALL]
287 + cmd.a_write[CRS_WR_CELLS] == 0))
288 cmd.a_write[CRS_WR_CELLS] = 1;
289 if (cmd.a_write[CRS_WR_CELLS])
290 write = CRS_WR_CELLS;
291 else if (cmd.a_write[CRS_WR_ALL])
296 ok = procedure_with_splits (current_dataset, precalc,
297 mode == GENERAL ? calc_general : calc_integer,
300 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
303 /* Parses the TABLES subcommand. */
305 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
307 struct var_set *var_set;
309 struct variable ***by = NULL;
310 size_t *by_nvar = NULL;
314 /* Ensure that this is a TABLES subcommand. */
315 if (!lex_match_id ("TABLES")
316 && (token != T_ID || dict_lookup_var (dataset_dict (current_dataset), tokid) == NULL)
321 if (variables != NULL)
322 var_set = var_set_create_from_array (variables, variables_cnt);
324 var_set = var_set_create_from_dict (dataset_dict (current_dataset));
325 assert (var_set != NULL);
329 by = xnrealloc (by, n_by + 1, sizeof *by);
330 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
331 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
332 PV_NO_DUPLICATE | PV_NO_SCRATCH))
334 if (xalloc_oversized (nx, by_nvar[n_by]))
336 msg (SE, _("Too many crosstabulation variables or dimensions."));
342 if (!lex_match (T_BY))
346 lex_error (_("expecting BY"));
355 int *by_iter = xcalloc (n_by, sizeof *by_iter);
358 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
359 for (i = 0; i < nx; i++)
363 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
370 for (i = 0; i < n_by; i++)
371 x->vars[i] = by[i][by_iter[i]];
377 for (i = n_by - 1; i >= 0; i--)
379 if (++by_iter[i] < by_nvar[i])
392 /* All return paths lead here. */
396 for (i = 0; i < n_by; i++)
402 var_set_destroy (var_set);
407 /* Parses the VARIABLES subcommand. */
409 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
413 msg (SE, _("VARIABLES must be specified before TABLES."));
421 size_t orig_nv = variables_cnt;
426 if (!parse_variables (dataset_dict (current_dataset), &variables, &variables_cnt,
427 (PV_APPEND | PV_NUMERIC
428 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
433 lex_error ("expecting `('");
438 if (!lex_force_int ())
440 min = lex_integer ();
445 if (!lex_force_int ())
447 max = lex_integer ();
450 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
458 lex_error ("expecting `)'");
463 for (i = orig_nv; i < variables_cnt; i++)
465 struct var_range *vr = xmalloc (sizeof *vr);
468 vr->count = max - min + 1;
469 var_attach_aux (variables[i], vr, var_dtor_free);
484 /* Data file processing. */
486 static int compare_table_entry (const void *, const void *, void *);
487 static unsigned hash_table_entry (const void *, void *);
489 /* Set up the crosstabulation tables for processing. */
491 precalc (const struct ccase *first, void *aux UNUSED)
493 output_split_file_values (first);
496 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
506 for (i = 0; i < nxtab; i++)
508 struct crosstab *x = xtab[i];
513 x->ofs = n_sorted_tab;
515 for (j = 2; j < x->nvar; j++)
516 count *= get_var_range (x->vars[j - 2])->count;
518 sorted_tab = xnrealloc (sorted_tab,
519 n_sorted_tab + count, sizeof *sorted_tab);
520 v = local_alloc (sizeof *v * x->nvar);
521 for (j = 2; j < x->nvar; j++)
522 v[j] = get_var_range (x->vars[j])->min;
523 for (j = 0; j < count; j++)
525 struct table_entry *te;
528 te = sorted_tab[n_sorted_tab++]
529 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
533 int row_cnt = get_var_range (x->vars[0])->count;
534 int col_cnt = get_var_range (x->vars[1])->count;
535 const int mat_size = row_cnt * col_cnt;
538 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
539 for (m = 0; m < mat_size; m++)
543 for (k = 2; k < x->nvar; k++)
544 te->values[k].f = v[k];
545 for (k = 2; k < x->nvar; k++)
547 struct var_range *vr = get_var_range (x->vars[k]);
548 if (++v[k] >= vr->max)
557 sorted_tab = xnrealloc (sorted_tab,
558 n_sorted_tab + 1, sizeof *sorted_tab);
559 sorted_tab[n_sorted_tab] = NULL;
564 /* Form crosstabulations for general mode. */
566 calc_general (const struct ccase *c, void *aux UNUSED)
568 bool bad_warn = true;
571 double weight = dict_get_case_weight (dataset_dict (current_dataset), c, &bad_warn);
573 /* Flattened current table index. */
576 for (t = 0; t < nxtab; t++)
578 struct crosstab *x = xtab[t];
579 const size_t entry_size = (sizeof (struct table_entry)
580 + sizeof (union value) * (x->nvar - 1));
581 struct table_entry *te = local_alloc (entry_size);
583 /* Construct table entry for the current record and table. */
589 for (j = 0; j < x->nvar; j++)
591 const union value *v = case_data (c, x->vars[j]->fv);
592 const struct missing_values *mv = &x->vars[j]->miss;
593 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
594 || (cmd.miss == CRS_INCLUDE
595 && mv_is_value_system_missing (mv, v)))
597 x->missing += weight;
601 if (x->vars[j]->type == NUMERIC)
602 te->values[j].f = case_num (c, x->vars[j]->fv);
605 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
608 /* Necessary in order to simplify comparisons. */
609 memset (&te->values[j].s[x->vars[j]->width], 0,
610 sizeof (union value) - x->vars[j]->width);
615 /* Add record to hash table. */
617 struct table_entry **tepp
618 = (struct table_entry **) hsh_probe (gen_tab, te);
621 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
624 memcpy (tep, te, entry_size);
629 (*tepp)->u.freq += weight;
640 calc_integer (const struct ccase *c, void *aux UNUSED)
642 bool bad_warn = true;
645 double weight = dict_get_case_weight (dataset_dict (current_dataset), c, &bad_warn);
647 /* Flattened current table index. */
650 for (t = 0; t < nxtab; t++)
652 struct crosstab *x = xtab[t];
657 for (i = 0; i < x->nvar; i++)
659 struct variable *const v = x->vars[i];
660 struct var_range *vr = get_var_range (v);
661 double value = case_num (c, v->fv);
663 /* Note that the first test also rules out SYSMIS. */
664 if ((value < vr->min || value >= vr->max)
665 || (cmd.miss == CRS_TABLE
666 && mv_is_num_user_missing (&v->miss, value)))
668 x->missing += weight;
674 ofs += fact * ((int) value - vr->min);
680 struct variable *row_var = x->vars[ROW_VAR];
681 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
683 struct variable *col_var = x->vars[COL_VAR];
684 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
686 const int col_dim = get_var_range (col_var)->count;
688 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
697 /* Compare the table_entry's at A and B and return a strcmp()-type
700 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
702 const struct table_entry *a = a_;
703 const struct table_entry *b = b_;
705 if (a->table > b->table)
707 else if (a->table < b->table)
711 const struct crosstab *x = xtab[a->table];
714 for (i = x->nvar - 1; i >= 0; i--)
715 if (x->vars[i]->type == NUMERIC)
717 const double diffnum = a->values[i].f - b->values[i].f;
720 else if (diffnum > 0)
725 assert (x->vars[i]->type == ALPHA);
727 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
738 /* Calculate a hash value from table_entry A. */
740 hash_table_entry (const void *a_, void *foo UNUSED)
742 const struct table_entry *a = a_;
747 for (i = 0; i < xtab[a->table]->nvar; i++)
748 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
753 /* Post-data reading calculations. */
755 static struct table_entry **find_pivot_extent (struct table_entry **,
756 int *cnt, int pivot);
757 static void enum_var_values (struct table_entry **entries, int entry_cnt,
759 union value **values, int *value_cnt);
760 static void output_pivot_table (struct table_entry **, struct table_entry **,
761 double **, double **, double **,
762 int *, int *, int *);
763 static void make_summary_table (void);
766 postcalc (void *aux UNUSED)
770 n_sorted_tab = hsh_count (gen_tab);
771 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
774 make_summary_table ();
776 /* Identify all the individual crosstabulation tables, and deal with
779 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
780 int pc = n_sorted_tab; /* Pivot count. */
782 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
783 int maxrows = 0, maxcols = 0, maxcells = 0;
787 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
791 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
792 &maxrows, &maxcols, &maxcells);
801 hsh_destroy (gen_tab);
804 static void insert_summary (struct tab_table *, int tab_index, double valid);
806 /* Output a table summarizing the cases processed. */
808 make_summary_table (void)
810 struct tab_table *summary;
812 struct table_entry **pb = sorted_tab, **pe;
813 int pc = n_sorted_tab;
816 summary = tab_create (7, 3 + nxtab, 1);
817 tab_title (summary, _("Summary."));
818 tab_headers (summary, 1, 0, 3, 0);
819 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
820 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
821 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
822 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
823 tab_hline (summary, TAL_1, 1, 6, 1);
824 tab_hline (summary, TAL_1, 1, 6, 2);
825 tab_vline (summary, TAL_1, 3, 1, 1);
826 tab_vline (summary, TAL_1, 5, 1, 1);
830 for (i = 0; i < 3; i++)
832 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
833 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
836 tab_offset (summary, 0, 3);
842 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
846 while (cur_tab < (*pb)->table)
847 insert_summary (summary, cur_tab++, 0.);
850 for (valid = 0.; pb < pe; pb++)
851 valid += (*pb)->u.freq;
854 const struct crosstab *const x = xtab[(*pb)->table];
855 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
856 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
857 const int count = n_cols * n_rows;
859 for (valid = 0.; pb < pe; pb++)
861 const double *data = (*pb)->u.data;
864 for (i = 0; i < count; i++)
868 insert_summary (summary, cur_tab++, valid);
873 while (cur_tab < nxtab)
874 insert_summary (summary, cur_tab++, 0.);
879 /* Inserts a line into T describing the crosstabulation at index
880 TAB_INDEX, which has VALID valid observations. */
882 insert_summary (struct tab_table *t, int tab_index, double valid)
884 struct crosstab *x = xtab[tab_index];
886 tab_hline (t, TAL_1, 0, 6, 0);
888 /* Crosstabulation name. */
890 char *buf = local_alloc (128 * x->nvar);
894 for (i = 0; i < x->nvar; i++)
897 cp = stpcpy (cp, " * ");
900 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
902 tab_text (t, 0, 0, TAB_LEFT, buf);
907 /* Counts and percentages. */
917 for (i = 0; i < 3; i++)
919 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
920 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
931 static struct tab_table *table; /* Crosstabulation table. */
932 static struct tab_table *chisq; /* Chi-square table. */
933 static struct tab_table *sym; /* Symmetric measures table. */
934 static struct tab_table *risk; /* Risk estimate table. */
935 static struct tab_table *direct; /* Directional measures table. */
938 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
940 /* Column values, number of columns. */
941 static union value *cols;
944 /* Row values, number of rows. */
945 static union value *rows;
948 /* Number of statistically interesting columns/rows (columns/rows with
950 static int ns_cols, ns_rows;
952 /* Crosstabulation. */
953 static const struct crosstab *x;
955 /* Number of variables from the crosstabulation to consider. This is
956 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
959 /* Matrix contents. */
960 static double *mat; /* Matrix proper. */
961 static double *row_tot; /* Row totals. */
962 static double *col_tot; /* Column totals. */
963 static double W; /* Grand total. */
965 static void display_dimensions (struct tab_table *, int first_difference,
966 struct table_entry *);
967 static void display_crosstabulation (void);
968 static void display_chisq (void);
969 static void display_symmetric (void);
970 static void display_risk (void);
971 static void display_directional (void);
972 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
973 static void table_value_missing (struct tab_table *table, int c, int r,
974 unsigned char opt, const union value *v,
975 const struct variable *var);
976 static void delete_missing (void);
978 /* Output pivot table beginning at PB and continuing until PE,
979 exclusive. For efficiency, *MATP is a pointer to a matrix that can
980 hold *MAXROWS entries. */
982 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
983 double **matp, double **row_totp, double **col_totp,
984 int *maxrows, int *maxcols, int *maxcells)
987 struct table_entry **tb = pb, **te; /* Table begin, table end. */
988 int tc = pe - pb; /* Table count. */
990 /* Table entry for header comparison. */
991 struct table_entry *cmp = NULL;
993 x = xtab[(*pb)->table];
994 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
996 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
998 /* Crosstabulation table initialization. */
1001 table = tab_create (nvar + n_cols,
1002 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1003 tab_headers (table, nvar - 1, 0, 2, 0);
1005 /* First header line. */
1006 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1007 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1009 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1011 /* Second header line. */
1015 for (i = 2; i < nvar; i++)
1016 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1017 TAB_RIGHT | TAT_TITLE,
1019 ? x->vars[i]->label : x->vars[i]->name));
1020 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1021 x->vars[ROW_VAR]->name);
1022 for (i = 0; i < n_cols; i++)
1023 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1025 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1028 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1029 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1033 char *title = local_alloc (x->nvar * 64 + 128);
1037 if (cmd.pivot == CRS_PIVOT)
1038 for (i = 0; i < nvar; i++)
1041 cp = stpcpy (cp, " by ");
1042 cp = stpcpy (cp, x->vars[i]->name);
1046 cp = spprintf (cp, "%s by %s for",
1047 x->vars[0]->name, x->vars[1]->name);
1048 for (i = 2; i < nvar; i++)
1050 char buf[64], *bufp;
1055 cp = stpcpy (cp, x->vars[i]->name);
1057 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1058 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1060 cp = stpcpy (cp, bufp);
1064 cp = stpcpy (cp, " [");
1065 for (i = 0; i < num_cells; i++)
1073 static const struct tuple cell_names[] =
1075 {CRS_CL_COUNT, N_("count")},
1076 {CRS_CL_ROW, N_("row %")},
1077 {CRS_CL_COLUMN, N_("column %")},
1078 {CRS_CL_TOTAL, N_("total %")},
1079 {CRS_CL_EXPECTED, N_("expected")},
1080 {CRS_CL_RESIDUAL, N_("residual")},
1081 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1082 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1086 const struct tuple *t;
1088 for (t = cell_names; t->value != cells[i]; t++)
1089 assert (t->value != -1);
1091 cp = stpcpy (cp, ", ");
1092 cp = stpcpy (cp, gettext (t->name));
1096 tab_title (table, "%s", title);
1100 tab_offset (table, 0, 2);
1105 /* Chi-square table initialization. */
1106 if (cmd.a_statistics[CRS_ST_CHISQ])
1108 chisq = tab_create (6 + (nvar - 2),
1109 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1110 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1112 tab_title (chisq, _("Chi-square tests."));
1114 tab_offset (chisq, nvar - 2, 0);
1115 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1116 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1117 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1118 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1119 _("Asymp. Sig. (2-sided)"));
1120 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1121 _("Exact. Sig. (2-sided)"));
1122 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1123 _("Exact. Sig. (1-sided)"));
1125 tab_offset (chisq, 0, 1);
1130 /* Symmetric measures. */
1131 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1132 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1133 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1134 || cmd.a_statistics[CRS_ST_KAPPA])
1136 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1137 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1138 tab_title (sym, _("Symmetric measures."));
1140 tab_offset (sym, nvar - 2, 0);
1141 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1142 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1143 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1144 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1145 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1146 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1147 tab_offset (sym, 0, 1);
1152 /* Risk estimate. */
1153 if (cmd.a_statistics[CRS_ST_RISK])
1155 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1156 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1157 tab_title (risk, _("Risk estimate."));
1159 tab_offset (risk, nvar - 2, 0);
1160 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1161 _("95%% Confidence Interval"));
1162 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1163 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1164 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1165 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1166 tab_hline (risk, TAL_1, 2, 3, 1);
1167 tab_vline (risk, TAL_1, 2, 0, 1);
1168 tab_offset (risk, 0, 2);
1173 /* Directional measures. */
1174 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1175 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1177 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1178 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1179 tab_title (direct, _("Directional measures."));
1181 tab_offset (direct, nvar - 2, 0);
1182 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1183 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1184 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1185 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1186 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1187 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1188 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1189 tab_offset (direct, 0, 1);
1196 /* Find pivot subtable if applicable. */
1197 te = find_pivot_extent (tb, &tc, 0);
1201 /* Find all the row variable values. */
1202 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1204 /* Allocate memory space for the column and row totals. */
1205 if (n_rows > *maxrows)
1207 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1208 row_tot = *row_totp;
1211 if (n_cols > *maxcols)
1213 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1214 col_tot = *col_totp;
1218 /* Allocate table space for the matrix. */
1219 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1220 tab_realloc (table, -1,
1221 max (tab_nr (table) + (n_rows + 1) * num_cells,
1222 tab_nr (table) * (pe - pb) / (te - tb)));
1224 if (mode == GENERAL)
1226 /* Allocate memory space for the matrix. */
1227 if (n_cols * n_rows > *maxcells)
1229 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1230 *maxcells = n_cols * n_rows;
1235 /* Build the matrix and calculate column totals. */
1237 union value *cur_col = cols;
1238 union value *cur_row = rows;
1240 double *cp = col_tot;
1241 struct table_entry **p;
1244 for (p = &tb[0]; p < te; p++)
1246 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1250 for (; cur_row < &rows[n_rows]; cur_row++)
1256 mp = &mat[cur_col - cols];
1259 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1266 *cp += *mp = (*p)->u.freq;
1271 /* Zero out the rest of the matrix. */
1272 for (; cur_row < &rows[n_rows]; cur_row++)
1278 if (cur_col < &cols[n_cols])
1280 const int rem_cols = n_cols - (cur_col - cols);
1283 for (c = 0; c < rem_cols; c++)
1285 mp = &mat[cur_col - cols];
1286 for (r = 0; r < n_rows; r++)
1288 for (c = 0; c < rem_cols; c++)
1290 mp += n_cols - rem_cols;
1298 double *tp = col_tot;
1300 assert (mode == INTEGER);
1301 mat = (*tb)->u.data;
1304 /* Calculate column totals. */
1305 for (c = 0; c < n_cols; c++)
1308 double *cp = &mat[c];
1310 for (r = 0; r < n_rows; r++)
1311 cum += cp[r * n_cols];
1319 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1320 ns_cols += *cp != 0.;
1323 /* Calculate row totals. */
1326 double *rp = row_tot;
1329 for (ns_rows = 0, r = n_rows; r--; )
1332 for (c = n_cols; c--; )
1340 /* Calculate grand total. */
1346 if (n_rows < n_cols)
1347 tp = row_tot, n = n_rows;
1349 tp = col_tot, n = n_cols;
1355 /* Find the first variable that differs from the last subtable,
1356 then display the values of the dimensioning variables for
1357 each table that needs it. */
1359 int first_difference = nvar - 1;
1362 for (; ; first_difference--)
1364 assert (first_difference >= 2);
1365 if (memcmp (&cmp->values[first_difference],
1366 &(*tb)->values[first_difference],
1367 sizeof *cmp->values))
1373 display_dimensions (table, first_difference, *tb);
1375 display_dimensions (chisq, first_difference, *tb);
1377 display_dimensions (sym, first_difference, *tb);
1379 display_dimensions (risk, first_difference, *tb);
1381 display_dimensions (direct, first_difference, *tb);
1385 display_crosstabulation ();
1386 if (cmd.miss == CRS_REPORT)
1391 display_symmetric ();
1395 display_directional ();
1406 tab_resize (chisq, 4 + (nvar - 2), -1);
1417 /* Delete missing rows and columns for statistical analysis when
1420 delete_missing (void)
1425 for (r = 0; r < n_rows; r++)
1426 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1430 for (c = 0; c < n_cols; c++)
1431 mat[c + r * n_cols] = 0.;
1439 for (c = 0; c < n_cols; c++)
1440 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1444 for (r = 0; r < n_rows; r++)
1445 mat[c + r * n_cols] = 0.;
1451 /* Prepare table T for submission, and submit it. */
1453 submit (struct tab_table *t)
1460 tab_resize (t, -1, 0);
1461 if (tab_nr (t) == tab_t (t))
1466 tab_offset (t, 0, 0);
1468 for (i = 2; i < nvar; i++)
1469 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1470 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1471 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1472 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1474 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1476 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1477 tab_dim (t, crosstabs_dim);
1481 /* Sets the widths of all the columns and heights of all the rows in
1482 table T for driver D. */
1484 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1488 /* Width of a numerical column. */
1489 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1490 if (cmd.miss == CRS_REPORT)
1491 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1493 /* Set width for header columns. */
1499 w = d->width - c * (t->nc - t->l);
1500 for (i = 0; i <= t->nc; i++)
1504 if (w < d->prop_em_width * 8)
1505 w = d->prop_em_width * 8;
1507 if (w > d->prop_em_width * 15)
1508 w = d->prop_em_width * 15;
1510 for (i = 0; i < t->l; i++)
1514 for (i = t->l; i < t->nc; i++)
1517 for (i = 0; i < t->nr; i++)
1518 t->h[i] = tab_natural_height (t, d, i);
1521 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1522 int *cnt, int pivot);
1523 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1524 int *cnt, int pivot);
1526 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1528 static struct table_entry **
1529 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1531 return (mode == GENERAL
1532 ? find_pivot_extent_general (tp, cnt, pivot)
1533 : find_pivot_extent_integer (tp, cnt, pivot));
1536 /* Find the extent of a region in TP that contains one table. If
1537 PIVOT != 0 that means a set of table entries with identical table
1538 number; otherwise they also have to have the same values for every
1539 dimension after the row and column dimensions. The table that is
1540 searched starts at TP and has length CNT. Returns the first entry
1541 after the last one in the table; sets *CNT to the number of
1542 remaining values. If there are no entries in TP at all, returns
1543 NULL. A yucky interface, admittedly, but it works. */
1544 static struct table_entry **
1545 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1547 struct table_entry *fp = *tp;
1552 x = xtab[(*tp)->table];
1560 if ((*tp)->table != fp->table)
1565 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1572 /* Integer mode correspondent to find_pivot_extent_general(). This
1573 could be optimized somewhat, but I just don't give a crap about
1574 CROSSTABS performance in integer mode, which is just a
1575 CROSSTABS wart as far as I'm concerned.
1577 That said, feel free to send optimization patches to me. */
1578 static struct table_entry **
1579 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1581 struct table_entry *fp = *tp;
1586 x = xtab[(*tp)->table];
1594 if ((*tp)->table != fp->table)
1599 if (memcmp (&(*tp)->values[2], &fp->values[2],
1600 sizeof (union value) * (x->nvar - 2)))
1607 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1608 result. WIDTH_ points to an int which is either 0 for a
1609 numeric value or a string width for a string value. */
1611 compare_value (const void *a_, const void *b_, void *width_)
1613 const union value *a = a_;
1614 const union value *b = b_;
1615 const int *pwidth = width_;
1616 const int width = *pwidth;
1619 return (a->f < b->f) ? -1 : (a->f > b->f);
1621 return strncmp (a->s, b->s, width);
1624 /* Given an array of ENTRY_CNT table_entry structures starting at
1625 ENTRIES, creates a sorted list of the values that the variable
1626 with index VAR_IDX takes on. The values are returned as a
1627 malloc()'darray stored in *VALUES, with the number of values
1628 stored in *VALUE_CNT.
1631 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1632 union value **values, int *value_cnt)
1634 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1636 if (mode == GENERAL)
1638 int width = v->width;
1641 *values = xnmalloc (entry_cnt, sizeof **values);
1642 for (i = 0; i < entry_cnt; i++)
1643 (*values)[i] = entries[i]->values[var_idx];
1644 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1645 compare_value, &width);
1649 struct var_range *vr = get_var_range (v);
1652 assert (mode == INTEGER);
1653 *values = xnmalloc (vr->count, sizeof **values);
1654 for (i = 0; i < vr->count; i++)
1655 (*values)[i].f = i + vr->min;
1656 *value_cnt = vr->count;
1660 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1661 from V, displayed with print format spec from variable VAR. When
1662 in REPORT missing-value mode, missing values have an M appended. */
1664 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1665 const union value *v, const struct variable *var)
1669 const char *label = val_labs_find (var->val_labs, *v);
1672 tab_text (table, c, r, TAB_LEFT, label);
1676 s.string = tab_alloc (table, var->print.w);
1677 format_short (s.string, &var->print, v);
1678 s.length = strlen (s.string);
1679 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1680 s.string[s.length++] = 'M';
1681 while (s.length && *s.string == ' ')
1686 tab_raw (table, c, r, opt, &s);
1689 /* Draws a line across TABLE at the current row to indicate the most
1690 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1691 that changed, and puts the values that changed into the table. TB
1692 and X must be the corresponding table_entry and crosstab,
1695 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1697 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1699 for (; first_difference >= 2; first_difference--)
1700 table_value_missing (table, nvar - first_difference - 1, 0,
1701 TAB_RIGHT, &tb->values[first_difference],
1702 x->vars[first_difference]);
1705 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1706 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1707 additionally suffixed with a letter `M'. */
1709 format_cell_entry (struct tab_table *table, int c, int r, double value,
1710 char suffix, bool mark_missing)
1712 const struct fmt_spec f = {FMT_F, 10, 1};
1717 s.string = tab_alloc (table, 16);
1719 data_out (s.string, &f, &v);
1720 while (*s.string == ' ')
1726 s.string[s.length++] = suffix;
1728 s.string[s.length++] = 'M';
1730 tab_raw (table, c, r, TAB_RIGHT, &s);
1733 /* Displays the crosstabulation table. */
1735 display_crosstabulation (void)
1740 for (r = 0; r < n_rows; r++)
1741 table_value_missing (table, nvar - 2, r * num_cells,
1742 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1744 tab_text (table, nvar - 2, n_rows * num_cells,
1745 TAB_LEFT, _("Total"));
1747 /* Put in the actual cells. */
1752 tab_offset (table, nvar - 1, -1);
1753 for (r = 0; r < n_rows; r++)
1756 tab_hline (table, TAL_1, -1, n_cols, 0);
1757 for (c = 0; c < n_cols; c++)
1759 bool mark_missing = false;
1760 double expected_value = row_tot[r] * col_tot[c] / W;
1761 if (cmd.miss == CRS_REPORT
1762 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1763 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1765 mark_missing = true;
1766 for (i = 0; i < num_cells; i++)
1777 v = *mp / row_tot[r] * 100.;
1781 v = *mp / col_tot[c] * 100.;
1788 case CRS_CL_EXPECTED:
1791 case CRS_CL_RESIDUAL:
1792 v = *mp - expected_value;
1794 case CRS_CL_SRESIDUAL:
1795 v = (*mp - expected_value) / sqrt (expected_value);
1797 case CRS_CL_ASRESIDUAL:
1798 v = ((*mp - expected_value)
1799 / sqrt (expected_value
1800 * (1. - row_tot[r] / W)
1801 * (1. - col_tot[c] / W)));
1807 format_cell_entry (table, c, i, v, suffix, mark_missing);
1813 tab_offset (table, -1, tab_row (table) + num_cells);
1821 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1822 for (r = 0; r < n_rows; r++)
1825 bool mark_missing = false;
1827 if (cmd.miss == CRS_REPORT
1828 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1829 mark_missing = true;
1831 for (i = 0; i < num_cells; i++)
1845 v = row_tot[r] / W * 100.;
1849 v = row_tot[r] / W * 100.;
1852 case CRS_CL_EXPECTED:
1853 case CRS_CL_RESIDUAL:
1854 case CRS_CL_SRESIDUAL:
1855 case CRS_CL_ASRESIDUAL:
1862 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1863 tab_next_row (table);
1868 /* Column totals, grand total. */
1874 tab_hline (table, TAL_1, -1, n_cols, 0);
1875 for (c = 0; c <= n_cols; c++)
1877 double ct = c < n_cols ? col_tot[c] : W;
1878 bool mark_missing = false;
1882 if (cmd.miss == CRS_REPORT && c < n_cols
1883 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1884 mark_missing = true;
1886 for (i = 0; i < num_cells; i++)
1908 case CRS_CL_EXPECTED:
1909 case CRS_CL_RESIDUAL:
1910 case CRS_CL_SRESIDUAL:
1911 case CRS_CL_ASRESIDUAL:
1917 format_cell_entry (table, c, i, v, suffix, mark_missing);
1922 tab_offset (table, -1, tab_row (table) + last_row);
1925 tab_offset (table, 0, -1);
1928 static void calc_r (double *X, double *Y, double *, double *, double *);
1929 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1931 /* Display chi-square statistics. */
1933 display_chisq (void)
1935 static const char *chisq_stats[N_CHISQ] =
1937 N_("Pearson Chi-Square"),
1938 N_("Likelihood Ratio"),
1939 N_("Fisher's Exact Test"),
1940 N_("Continuity Correction"),
1941 N_("Linear-by-Linear Association"),
1943 double chisq_v[N_CHISQ];
1944 double fisher1, fisher2;
1950 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1952 tab_offset (chisq, nvar - 2, -1);
1954 for (i = 0; i < N_CHISQ; i++)
1956 if ((i != 2 && chisq_v[i] == SYSMIS)
1957 || (i == 2 && fisher1 == SYSMIS))
1961 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1964 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1965 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1966 tab_float (chisq, 3, 0, TAB_RIGHT,
1967 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1972 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1973 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1975 tab_next_row (chisq);
1978 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1979 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1980 tab_next_row (chisq);
1982 tab_offset (chisq, 0, -1);
1985 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1986 double[N_SYMMETRIC]);
1988 /* Display symmetric measures. */
1990 display_symmetric (void)
1992 static const char *categories[] =
1994 N_("Nominal by Nominal"),
1995 N_("Ordinal by Ordinal"),
1996 N_("Interval by Interval"),
1997 N_("Measure of Agreement"),
2000 static const char *stats[N_SYMMETRIC] =
2004 N_("Contingency Coefficient"),
2005 N_("Kendall's tau-b"),
2006 N_("Kendall's tau-c"),
2008 N_("Spearman Correlation"),
2013 static const int stats_categories[N_SYMMETRIC] =
2015 0, 0, 0, 1, 1, 1, 1, 2, 3,
2019 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2022 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2025 tab_offset (sym, nvar - 2, -1);
2027 for (i = 0; i < N_SYMMETRIC; i++)
2029 if (sym_v[i] == SYSMIS)
2032 if (stats_categories[i] != last_cat)
2034 last_cat = stats_categories[i];
2035 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2038 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2039 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2040 if (sym_ase[i] != SYSMIS)
2041 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2042 if (sym_t[i] != SYSMIS)
2043 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2044 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2048 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2049 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2052 tab_offset (sym, 0, -1);
2055 static int calc_risk (double[], double[], double[], union value *);
2057 /* Display risk estimate. */
2062 double risk_v[3], lower[3], upper[3];
2066 if (!calc_risk (risk_v, upper, lower, c))
2069 tab_offset (risk, nvar - 2, -1);
2071 for (i = 0; i < 3; i++)
2073 if (risk_v[i] == SYSMIS)
2079 if (x->vars[COL_VAR]->type == NUMERIC)
2080 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2081 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2083 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2084 x->vars[COL_VAR]->name,
2085 x->vars[COL_VAR]->width, c[0].s,
2086 x->vars[COL_VAR]->width, c[1].s);
2090 if (x->vars[ROW_VAR]->type == NUMERIC)
2091 sprintf (buf, _("For cohort %s = %g"),
2092 x->vars[ROW_VAR]->name, rows[i - 1].f);
2094 sprintf (buf, _("For cohort %s = %.*s"),
2095 x->vars[ROW_VAR]->name,
2096 x->vars[ROW_VAR]->width, rows[i - 1].s);
2100 tab_text (risk, 0, 0, TAB_LEFT, buf);
2101 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2102 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2103 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2104 tab_next_row (risk);
2107 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2108 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2109 tab_next_row (risk);
2111 tab_offset (risk, 0, -1);
2114 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2115 double[N_DIRECTIONAL]);
2117 /* Display directional measures. */
2119 display_directional (void)
2121 static const char *categories[] =
2123 N_("Nominal by Nominal"),
2124 N_("Ordinal by Ordinal"),
2125 N_("Nominal by Interval"),
2128 static const char *stats[] =
2131 N_("Goodman and Kruskal tau"),
2132 N_("Uncertainty Coefficient"),
2137 static const char *types[] =
2144 static const int stats_categories[N_DIRECTIONAL] =
2146 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2149 static const int stats_stats[N_DIRECTIONAL] =
2151 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2154 static const int stats_types[N_DIRECTIONAL] =
2156 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2159 static const int *stats_lookup[] =
2166 static const char **stats_names[] =
2178 double direct_v[N_DIRECTIONAL];
2179 double direct_ase[N_DIRECTIONAL];
2180 double direct_t[N_DIRECTIONAL];
2184 if (!calc_directional (direct_v, direct_ase, direct_t))
2187 tab_offset (direct, nvar - 2, -1);
2189 for (i = 0; i < N_DIRECTIONAL; i++)
2191 if (direct_v[i] == SYSMIS)
2197 for (j = 0; j < 3; j++)
2198 if (last[j] != stats_lookup[j][i])
2201 tab_hline (direct, TAL_1, j, 6, 0);
2206 int k = last[j] = stats_lookup[j][i];
2211 string = x->vars[0]->name;
2213 string = x->vars[1]->name;
2215 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2216 gettext (stats_names[j][k]), string);
2221 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2222 if (direct_ase[i] != SYSMIS)
2223 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2224 if (direct_t[i] != SYSMIS)
2225 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2226 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2227 tab_next_row (direct);
2230 tab_offset (direct, 0, -1);
2233 /* Statistical calculations. */
2235 /* Returns the value of the gamma (factorial) function for an integer
2238 gamma_int (double x)
2243 for (i = 2; i < x; i++)
2248 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2250 static inline double
2251 Pr (int a, int b, int c, int d)
2253 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2254 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2255 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2256 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2257 / gamma_int (a + b + c + d + 1.));
2260 /* Swap the contents of A and B. */
2262 swap (int *a, int *b)
2269 /* Calculate significance for Fisher's exact test as specified in
2270 _SPSS Statistical Algorithms_, Appendix 5. */
2272 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2276 if (min (c, d) < min (a, b))
2277 swap (&a, &c), swap (&b, &d);
2278 if (min (b, d) < min (a, c))
2279 swap (&a, &b), swap (&c, &d);
2283 swap (&a, &b), swap (&c, &d);
2285 swap (&a, &c), swap (&b, &d);
2289 for (x = 0; x <= a; x++)
2290 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2292 *fisher2 = *fisher1;
2293 for (x = 1; x <= b; x++)
2294 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2297 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2298 columns with values COLS and N_ROWS rows with values ROWS. Values
2299 in the matrix sum to W. */
2301 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2302 double *fisher1, double *fisher2)
2306 chisq[0] = chisq[1] = 0.;
2307 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2308 *fisher1 = *fisher2 = SYSMIS;
2310 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2312 if (ns_rows <= 1 || ns_cols <= 1)
2314 chisq[0] = chisq[1] = SYSMIS;
2318 for (r = 0; r < n_rows; r++)
2319 for (c = 0; c < n_cols; c++)
2321 const double expected = row_tot[r] * col_tot[c] / W;
2322 const double freq = mat[n_cols * r + c];
2323 const double residual = freq - expected;
2325 chisq[0] += residual * residual / expected;
2327 chisq[1] += freq * log (expected / freq);
2338 /* Calculate Yates and Fisher exact test. */
2339 if (ns_cols == 2 && ns_rows == 2)
2341 double f11, f12, f21, f22;
2347 for (i = j = 0; i < n_cols; i++)
2348 if (col_tot[i] != 0.)
2357 f11 = mat[nz_cols[0]];
2358 f12 = mat[nz_cols[1]];
2359 f21 = mat[nz_cols[0] + n_cols];
2360 f22 = mat[nz_cols[1] + n_cols];
2365 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2368 chisq[3] = (W * x * x
2369 / (f11 + f12) / (f21 + f22)
2370 / (f11 + f21) / (f12 + f22));
2378 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2379 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2382 /* Calculate Mantel-Haenszel. */
2383 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2385 double r, ase_0, ase_1;
2386 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2388 chisq[4] = (W - 1.) * r * r;
2393 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2394 ASE_1, and ase_0 into ASE_0. The row and column values must be
2395 passed in X and Y. */
2397 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2399 double SX, SY, S, T;
2401 double sum_XYf, sum_X2Y2f;
2402 double sum_Xr, sum_X2r;
2403 double sum_Yc, sum_Y2c;
2406 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2407 for (j = 0; j < n_cols; j++)
2409 double fij = mat[j + i * n_cols];
2410 double product = X[i] * Y[j];
2411 double temp = fij * product;
2413 sum_X2Y2f += temp * product;
2416 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2418 sum_Xr += X[i] * row_tot[i];
2419 sum_X2r += X[i] * X[i] * row_tot[i];
2423 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2425 sum_Yc += Y[i] * col_tot[i];
2426 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2430 S = sum_XYf - sum_Xr * sum_Yc / W;
2431 SX = sum_X2r - sum_Xr * sum_Xr / W;
2432 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2435 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2440 for (s = c = 0., i = 0; i < n_rows; i++)
2441 for (j = 0; j < n_cols; j++)
2443 double Xresid, Yresid;
2446 Xresid = X[i] - Xbar;
2447 Yresid = Y[j] - Ybar;
2448 temp = (T * Xresid * Yresid
2450 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2451 y = mat[j + i * n_cols] * temp * temp - c;
2456 *ase_1 = sqrt (s) / (T * T);
2460 static double somers_d_v[3];
2461 static double somers_d_ase[3];
2462 static double somers_d_t[3];
2464 /* Calculate symmetric statistics and their asymptotic standard
2465 errors. Returns 0 if none could be calculated. */
2467 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2468 double t[N_SYMMETRIC])
2470 int q = min (ns_rows, ns_cols);
2479 for (i = 0; i < N_SYMMETRIC; i++)
2480 v[i] = ase[i] = t[i] = SYSMIS;
2483 /* Phi, Cramer's V, contingency coefficient. */
2484 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2486 double Xp = 0.; /* Pearson chi-square. */
2491 for (r = 0; r < n_rows; r++)
2492 for (c = 0; c < n_cols; c++)
2494 const double expected = row_tot[r] * col_tot[c] / W;
2495 const double freq = mat[n_cols * r + c];
2496 const double residual = freq - expected;
2498 Xp += residual * residual / expected;
2502 if (cmd.a_statistics[CRS_ST_PHI])
2504 v[0] = sqrt (Xp / W);
2505 v[1] = sqrt (Xp / (W * (q - 1)));
2507 if (cmd.a_statistics[CRS_ST_CC])
2508 v[2] = sqrt (Xp / (Xp + W));
2511 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2512 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2517 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2524 for (r = 0; r < n_rows; r++)
2525 Dr -= row_tot[r] * row_tot[r];
2526 for (c = 0; c < n_cols; c++)
2527 Dc -= col_tot[c] * col_tot[c];
2533 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2534 for (c = 0; c < n_cols; c++)
2538 for (r = 0; r < n_rows; r++)
2539 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2549 for (i = 0; i < n_rows; i++)
2553 for (j = 1; j < n_cols; j++)
2554 Cij += col_tot[j] - cum[j + i * n_cols];
2557 for (j = 1; j < n_cols; j++)
2558 Dij += cum[j + (i - 1) * n_cols];
2562 double fij = mat[j + i * n_cols];
2568 assert (j < n_cols);
2570 Cij -= col_tot[j] - cum[j + i * n_cols];
2571 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2575 Cij += cum[j - 1 + (i - 1) * n_cols];
2576 Dij -= cum[j + (i - 1) * n_cols];
2582 if (cmd.a_statistics[CRS_ST_BTAU])
2583 v[3] = (P - Q) / sqrt (Dr * Dc);
2584 if (cmd.a_statistics[CRS_ST_CTAU])
2585 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2586 if (cmd.a_statistics[CRS_ST_GAMMA])
2587 v[5] = (P - Q) / (P + Q);
2589 /* ASE for tau-b, tau-c, gamma. Calculations could be
2590 eliminated here, at expense of memory. */
2595 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2596 for (i = 0; i < n_rows; i++)
2600 for (j = 1; j < n_cols; j++)
2601 Cij += col_tot[j] - cum[j + i * n_cols];
2604 for (j = 1; j < n_cols; j++)
2605 Dij += cum[j + (i - 1) * n_cols];
2609 double fij = mat[j + i * n_cols];
2611 if (cmd.a_statistics[CRS_ST_BTAU])
2613 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2614 + v[3] * (row_tot[i] * Dc
2615 + col_tot[j] * Dr));
2616 btau_cum += fij * temp * temp;
2620 const double temp = Cij - Dij;
2621 ctau_cum += fij * temp * temp;
2624 if (cmd.a_statistics[CRS_ST_GAMMA])
2626 const double temp = Q * Cij - P * Dij;
2627 gamma_cum += fij * temp * temp;
2630 if (cmd.a_statistics[CRS_ST_D])
2632 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2633 - (P - Q) * (W - row_tot[i]));
2634 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2635 - (Q - P) * (W - col_tot[j]));
2640 assert (j < n_cols);
2642 Cij -= col_tot[j] - cum[j + i * n_cols];
2643 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2647 Cij += cum[j - 1 + (i - 1) * n_cols];
2648 Dij -= cum[j + (i - 1) * n_cols];
2654 btau_var = ((btau_cum
2655 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2657 if (cmd.a_statistics[CRS_ST_BTAU])
2659 ase[3] = sqrt (btau_var);
2660 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2663 if (cmd.a_statistics[CRS_ST_CTAU])
2665 ase[4] = ((2 * q / ((q - 1) * W * W))
2666 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2667 t[4] = v[4] / ase[4];
2669 if (cmd.a_statistics[CRS_ST_GAMMA])
2671 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2672 t[5] = v[5] / (2. / (P + Q)
2673 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2675 if (cmd.a_statistics[CRS_ST_D])
2677 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2678 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2679 somers_d_t[0] = (somers_d_v[0]
2681 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2682 somers_d_v[1] = (P - Q) / Dc;
2683 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2684 somers_d_t[1] = (somers_d_v[1]
2686 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2687 somers_d_v[2] = (P - Q) / Dr;
2688 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2689 somers_d_t[2] = (somers_d_v[2]
2691 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2697 /* Spearman correlation, Pearson's r. */
2698 if (cmd.a_statistics[CRS_ST_CORR])
2700 double *R = local_alloc (sizeof *R * n_rows);
2701 double *C = local_alloc (sizeof *C * n_cols);
2704 double y, t, c = 0., s = 0.;
2709 R[i] = s + (row_tot[i] + 1.) / 2.;
2716 assert (i < n_rows);
2721 double y, t, c = 0., s = 0.;
2726 C[j] = s + (col_tot[j] + 1.) / 2;
2733 assert (j < n_cols);
2737 calc_r (R, C, &v[6], &t[6], &ase[6]);
2743 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2747 /* Cohen's kappa. */
2748 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2750 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2753 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2754 i < ns_rows; i++, j++)
2758 while (col_tot[j] == 0.)
2761 prod = row_tot[i] * col_tot[j];
2762 sum = row_tot[i] + col_tot[j];
2764 sum_fii += mat[j + i * n_cols];
2766 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2767 sum_riciri_ci += prod * sum;
2769 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2770 for (j = 0; j < ns_cols; j++)
2772 double sum = row_tot[i] + col_tot[j];
2773 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2776 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2778 ase[8] = sqrt ((W * W * sum_rici
2779 + sum_rici * sum_rici
2780 - W * sum_riciri_ci)
2781 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2783 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2784 / pow2 (W * W - sum_rici))
2785 + ((2. * (W - sum_fii)
2786 * (2. * sum_fii * sum_rici
2787 - W * sum_fiiri_ci))
2788 / cube (W * W - sum_rici))
2789 + (pow2 (W - sum_fii)
2790 * (W * sum_fijri_ci2 - 4.
2791 * sum_rici * sum_rici)
2792 / pow4 (W * W - sum_rici))));
2794 t[8] = v[8] / ase[8];
2801 /* Calculate risk estimate. */
2803 calc_risk (double *value, double *upper, double *lower, union value *c)
2805 double f11, f12, f21, f22;
2811 for (i = 0; i < 3; i++)
2812 value[i] = upper[i] = lower[i] = SYSMIS;
2815 if (ns_rows != 2 || ns_cols != 2)
2822 for (i = j = 0; i < n_cols; i++)
2823 if (col_tot[i] != 0.)
2832 f11 = mat[nz_cols[0]];
2833 f12 = mat[nz_cols[1]];
2834 f21 = mat[nz_cols[0] + n_cols];
2835 f22 = mat[nz_cols[1] + n_cols];
2837 c[0] = cols[nz_cols[0]];
2838 c[1] = cols[nz_cols[1]];
2841 value[0] = (f11 * f22) / (f12 * f21);
2842 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2843 lower[0] = value[0] * exp (-1.960 * v);
2844 upper[0] = value[0] * exp (1.960 * v);
2846 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2847 v = sqrt ((f12 / (f11 * (f11 + f12)))
2848 + (f22 / (f21 * (f21 + f22))));
2849 lower[1] = value[1] * exp (-1.960 * v);
2850 upper[1] = value[1] * exp (1.960 * v);
2852 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2853 v = sqrt ((f11 / (f12 * (f11 + f12)))
2854 + (f21 / (f22 * (f21 + f22))));
2855 lower[2] = value[2] * exp (-1.960 * v);
2856 upper[2] = value[2] * exp (1.960 * v);
2861 /* Calculate directional measures. */
2863 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2864 double t[N_DIRECTIONAL])
2869 for (i = 0; i < N_DIRECTIONAL; i++)
2870 v[i] = ase[i] = t[i] = SYSMIS;
2874 if (cmd.a_statistics[CRS_ST_LAMBDA])
2876 double *fim = xnmalloc (n_rows, sizeof *fim);
2877 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2878 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2879 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2880 double sum_fim, sum_fmj;
2882 int rm_index, cm_index;
2885 /* Find maximum for each row and their sum. */
2886 for (sum_fim = 0., i = 0; i < n_rows; i++)
2888 double max = mat[i * n_cols];
2891 for (j = 1; j < n_cols; j++)
2892 if (mat[j + i * n_cols] > max)
2894 max = mat[j + i * n_cols];
2898 sum_fim += fim[i] = max;
2899 fim_index[i] = index;
2902 /* Find maximum for each column. */
2903 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2905 double max = mat[j];
2908 for (i = 1; i < n_rows; i++)
2909 if (mat[j + i * n_cols] > max)
2911 max = mat[j + i * n_cols];
2915 sum_fmj += fmj[j] = max;
2916 fmj_index[j] = index;
2919 /* Find maximum row total. */
2922 for (i = 1; i < n_rows; i++)
2923 if (row_tot[i] > rm)
2929 /* Find maximum column total. */
2932 for (j = 1; j < n_cols; j++)
2933 if (col_tot[j] > cm)
2939 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2940 v[1] = (sum_fmj - rm) / (W - rm);
2941 v[2] = (sum_fim - cm) / (W - cm);
2943 /* ASE1 for Y given X. */
2947 for (accum = 0., i = 0; i < n_rows; i++)
2948 for (j = 0; j < n_cols; j++)
2950 const int deltaj = j == cm_index;
2951 accum += (mat[j + i * n_cols]
2952 * pow2 ((j == fim_index[i])
2957 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2960 /* ASE0 for Y given X. */
2964 for (accum = 0., i = 0; i < n_rows; i++)
2965 if (cm_index != fim_index[i])
2966 accum += (mat[i * n_cols + fim_index[i]]
2967 + mat[i * n_cols + cm_index]);
2968 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2971 /* ASE1 for X given Y. */
2975 for (accum = 0., i = 0; i < n_rows; i++)
2976 for (j = 0; j < n_cols; j++)
2978 const int deltaj = i == rm_index;
2979 accum += (mat[j + i * n_cols]
2980 * pow2 ((i == fmj_index[j])
2985 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2988 /* ASE0 for X given Y. */
2992 for (accum = 0., j = 0; j < n_cols; j++)
2993 if (rm_index != fmj_index[j])
2994 accum += (mat[j + n_cols * fmj_index[j]]
2995 + mat[j + n_cols * rm_index]);
2996 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
2999 /* Symmetric ASE0 and ASE1. */
3004 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3005 for (j = 0; j < n_cols; j++)
3007 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3008 int temp1 = (i == rm_index) + (j == cm_index);
3009 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3010 accum1 += (mat[j + i * n_cols]
3011 * pow2 (temp0 + (v[0] - 1.) * temp1));
3013 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3014 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3015 / (2. * W - rm - cm));
3024 double sum_fij2_ri, sum_fij2_ci;
3025 double sum_ri2, sum_cj2;
3027 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3028 for (j = 0; j < n_cols; j++)
3030 double temp = pow2 (mat[j + i * n_cols]);
3031 sum_fij2_ri += temp / row_tot[i];
3032 sum_fij2_ci += temp / col_tot[j];
3035 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3036 sum_ri2 += row_tot[i] * row_tot[i];
3038 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3039 sum_cj2 += col_tot[j] * col_tot[j];
3041 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3042 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3046 if (cmd.a_statistics[CRS_ST_UC])
3048 double UX, UY, UXY, P;
3049 double ase1_yx, ase1_xy, ase1_sym;
3052 for (UX = 0., i = 0; i < n_rows; i++)
3053 if (row_tot[i] > 0.)
3054 UX -= row_tot[i] / W * log (row_tot[i] / W);
3056 for (UY = 0., j = 0; j < n_cols; j++)
3057 if (col_tot[j] > 0.)
3058 UY -= col_tot[j] / W * log (col_tot[j] / W);
3060 for (UXY = P = 0., i = 0; i < n_rows; i++)
3061 for (j = 0; j < n_cols; j++)
3063 double entry = mat[j + i * n_cols];
3068 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3069 UXY -= entry / W * log (entry / W);
3072 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3073 for (j = 0; j < n_cols; j++)
3075 double entry = mat[j + i * n_cols];
3080 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3081 + (UX - UXY) * log (col_tot[j] / W));
3082 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3083 + (UY - UXY) * log (row_tot[i] / W));
3084 ase1_sym += entry * pow2 ((UXY
3085 * log (row_tot[i] * col_tot[j] / (W * W)))
3086 - (UX + UY) * log (entry / W));
3089 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3090 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3091 t[5] = v[5] / ((2. / (W * (UX + UY)))
3092 * sqrt (P - pow2 (UX + UY - UXY) / W));
3094 v[6] = (UX + UY - UXY) / UX;
3095 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3096 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3098 v[7] = (UX + UY - UXY) / UY;
3099 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3100 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3104 if (cmd.a_statistics[CRS_ST_D])
3109 calc_symmetric (NULL, NULL, NULL);
3110 for (i = 0; i < 3; i++)
3112 v[8 + i] = somers_d_v[i];
3113 ase[8 + i] = somers_d_ase[i];
3114 t[8 + i] = somers_d_t[i];
3119 if (cmd.a_statistics[CRS_ST_ETA])
3122 double sum_Xr, sum_X2r;
3126 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3128 sum_Xr += rows[i].f * row_tot[i];
3129 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3131 SX = sum_X2r - sum_Xr * sum_Xr / W;
3133 for (SXW = 0., j = 0; j < n_cols; j++)
3137 for (cum = 0., i = 0; i < n_rows; i++)
3139 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3140 cum += rows[i].f * mat[j + i * n_cols];
3143 SXW -= cum * cum / col_tot[j];
3145 v[11] = sqrt (1. - SXW / SX);
3149 double sum_Yc, sum_Y2c;
3153 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3155 sum_Yc += cols[i].f * col_tot[i];
3156 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3158 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3160 for (SYW = 0., i = 0; i < n_rows; i++)
3164 for (cum = 0., j = 0; j < n_cols; j++)
3166 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3167 cum += cols[j].f * mat[j + i * n_cols];
3170 SYW -= cum * cum / row_tot[i];
3172 v[12] = sqrt (1. - SYW / SY);
3179 /* A wrapper around data_out() that limits string output to short
3180 string width and null terminates the result. */
3182 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3184 struct fmt_spec fmt_subst;
3186 /* Limit to short string width. */
3187 if (formats[fp->type].cat & FCAT_STRING)
3191 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3192 if (fmt_subst.type == FMT_A)
3193 fmt_subst.w = min (8, fmt_subst.w);
3195 fmt_subst.w = min (16, fmt_subst.w);
3201 data_out (s, fp, v);
3203 /* Null terminate. */