1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - Pearson's R (but not Spearman!) is off a little.
20 - T values for Spearman's R and Pearson's R are wrong.
21 - How to calculate significance of symmetric and directional measures?
22 - Asymmetric ASEs and T values for lambda are wrong.
23 - ASE of Goodman and Kruskal's tau is not calculated.
24 - ASE of symmetric somers' d is wrong.
25 - Approx. T of uncertainty coefficient is wrong.
32 #include <gsl/gsl_cdf.h>
36 #include <data/case.h>
37 #include <data/casegrouper.h>
38 #include <data/casereader.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/alloc.h>
50 #include <libpspp/array.h>
51 #include <libpspp/assertion.h>
52 #include <libpspp/compiler.h>
53 #include <libpspp/hash.h>
54 #include <libpspp/magic.h>
55 #include <libpspp/message.h>
56 #include <libpspp/message.h>
57 #include <libpspp/misc.h>
58 #include <libpspp/pool.h>
59 #include <libpspp/str.h>
60 #include <output/output.h>
61 #include <output/table.h>
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) msgid
75 missing=miss:!table/include/report;
76 +write[wr_]=none,cells,all;
77 +format=fmt:!labels/nolabels/novallabs,
80 tabl:!tables/notables,
83 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
85 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
91 /* Number of chi-square statistics. */
94 /* Number of symmetric statistics. */
97 /* Number of directional statistics. */
98 #define N_DIRECTIONAL 13
100 /* A single table entry for general mode. */
103 int table; /* Flattened table number. */
106 double freq; /* Frequency count. */
107 double *data; /* Crosstabulation table for integer mode. */
110 union value values[1]; /* Values. */
113 /* A crosstabulation. */
116 int nvar; /* Number of variables. */
117 double missing; /* Missing cases count. */
118 int ofs; /* Integer mode: Offset into sorted_tab[]. */
119 const struct variable *vars[2]; /* At least two variables; sorted by
120 larger indices first. */
123 /* Integer mode variable info. */
126 int min; /* Minimum value. */
127 int max; /* Maximum value + 1. */
128 int count; /* max - min. */
131 static inline struct var_range *
132 get_var_range (const struct variable *v)
134 return var_get_aux (v);
137 /* Indexes into crosstab.v. */
144 /* General mode crosstabulation table. */
145 static struct hsh_table *gen_tab; /* Hash table. */
146 static int n_sorted_tab; /* Number of entries in sorted_tab. */
147 static struct table_entry **sorted_tab; /* Sorted table. */
149 /* Variables specifies on VARIABLES. */
150 static const struct variable **variables;
151 static size_t variables_cnt;
154 static struct crosstab **xtab;
157 /* Integer or general mode? */
166 static int num_cells; /* Number of cells requested. */
167 static int cells[8]; /* Cells requested. */
170 static int write_style; /* One of WR_* that specifies the WRITE style. */
172 /* Command parsing info. */
173 static struct cmd_crosstabs cmd;
176 static struct pool *pl_tc; /* For table cells. */
177 static struct pool *pl_col; /* For column data. */
179 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
180 static void precalc (struct casereader *, const struct dataset *);
181 static void calc_general (struct ccase *, const struct dataset *);
182 static void calc_integer (struct ccase *, const struct dataset *);
183 static void postcalc (void);
184 static void submit (struct tab_table *);
186 static void format_short (char *s, const struct fmt_spec *fp,
187 const union value *v);
189 /* Parse and execute CROSSTABS, then clean up. */
191 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
193 int result = internal_cmd_crosstabs (lexer, ds);
196 pool_destroy (pl_tc);
197 pool_destroy (pl_col);
202 /* Parses and executes the CROSSTABS procedure. */
204 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
206 struct casegrouper *grouper;
207 struct casereader *input, *group;
215 pl_tc = pool_create ();
216 pl_col = pool_create ();
218 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
221 mode = variables ? INTEGER : GENERAL;
226 cmd.a_cells[CRS_CL_COUNT] = 1;
232 for (i = 0; i < CRS_CL_count; i++)
237 cmd.a_cells[CRS_CL_COUNT] = 1;
238 cmd.a_cells[CRS_CL_ROW] = 1;
239 cmd.a_cells[CRS_CL_COLUMN] = 1;
240 cmd.a_cells[CRS_CL_TOTAL] = 1;
242 if (cmd.a_cells[CRS_CL_ALL])
244 for (i = 0; i < CRS_CL_count; i++)
246 cmd.a_cells[CRS_CL_ALL] = 0;
248 cmd.a_cells[CRS_CL_NONE] = 0;
250 for (num_cells = i = 0; i < CRS_CL_count; i++)
252 cells[num_cells++] = i;
255 if (cmd.sbc_statistics)
260 for (i = 0; i < CRS_ST_count; i++)
261 if (cmd.a_statistics[i])
264 cmd.a_statistics[CRS_ST_CHISQ] = 1;
265 if (cmd.a_statistics[CRS_ST_ALL])
266 for (i = 0; i < CRS_ST_count; i++)
267 cmd.a_statistics[i] = 1;
271 if (cmd.miss == CRS_REPORT && mode == GENERAL)
273 msg (SE, _("Missing mode REPORT not allowed in general mode. "
274 "Assuming MISSING=TABLE."));
275 cmd.miss = CRS_TABLE;
279 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
280 cmd.a_write[CRS_WR_ALL] = 0;
281 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
283 msg (SE, _("Write mode ALL not allowed in general mode. "
284 "Assuming WRITE=CELLS."));
285 cmd.a_write[CRS_WR_CELLS] = 1;
288 && (cmd.a_write[CRS_WR_NONE]
289 + cmd.a_write[CRS_WR_ALL]
290 + cmd.a_write[CRS_WR_CELLS] == 0))
291 cmd.a_write[CRS_WR_CELLS] = 1;
292 if (cmd.a_write[CRS_WR_CELLS])
293 write_style = CRS_WR_CELLS;
294 else if (cmd.a_write[CRS_WR_ALL])
295 write_style = CRS_WR_ALL;
297 write_style = CRS_WR_NONE;
299 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
301 grouper = casegrouper_create_splits (input, dataset_dict (ds));
302 while (casegrouper_get_next_group (grouper, &group))
308 for (; casereader_read (group, &c); case_destroy (&c))
311 calc_general (&c, ds);
313 calc_integer (&c, ds);
315 casereader_destroy (group);
319 ok = casegrouper_destroy (grouper);
320 ok = proc_commit (ds) && ok;
322 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
325 /* Parses the TABLES subcommand. */
327 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
329 struct const_var_set *var_set;
331 const struct variable ***by = NULL;
332 size_t *by_nvar = NULL;
336 /* Ensure that this is a TABLES subcommand. */
337 if (!lex_match_id (lexer, "TABLES")
338 && (lex_token (lexer) != T_ID ||
339 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
340 && lex_token (lexer) != T_ALL)
342 lex_match (lexer, '=');
344 if (variables != NULL)
345 var_set = const_var_set_create_from_array (variables, variables_cnt);
347 var_set = const_var_set_create_from_dict (dataset_dict (ds));
348 assert (var_set != NULL);
352 by = xnrealloc (by, n_by + 1, sizeof *by);
353 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
354 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
355 PV_NO_DUPLICATE | PV_NO_SCRATCH))
357 if (xalloc_oversized (nx, by_nvar[n_by]))
359 msg (SE, _("Too many crosstabulation variables or dimensions."));
365 if (!lex_match (lexer, T_BY))
369 lex_error (lexer, _("expecting BY"));
378 int *by_iter = xcalloc (n_by, sizeof *by_iter);
381 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
382 for (i = 0; i < nx; i++)
386 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
393 for (i = 0; i < n_by; i++)
394 x->vars[i] = by[i][by_iter[i]];
400 for (i = n_by - 1; i >= 0; i--)
402 if (++by_iter[i] < by_nvar[i])
415 /* All return paths lead here. */
419 for (i = 0; i < n_by; i++)
425 const_var_set_destroy (var_set);
430 /* Parses the VARIABLES subcommand. */
432 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
436 msg (SE, _("VARIABLES must be specified before TABLES."));
440 lex_match (lexer, '=');
444 size_t orig_nv = variables_cnt;
449 if (!parse_variables_const (lexer, dataset_dict (ds),
450 &variables, &variables_cnt,
451 (PV_APPEND | PV_NUMERIC
452 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
455 if (lex_token (lexer) != '(')
457 lex_error (lexer, "expecting `('");
462 if (!lex_force_int (lexer))
464 min = lex_integer (lexer);
467 lex_match (lexer, ',');
469 if (!lex_force_int (lexer))
471 max = lex_integer (lexer);
474 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
480 if (lex_token (lexer) != ')')
482 lex_error (lexer, "expecting `)'");
487 for (i = orig_nv; i < variables_cnt; i++)
489 struct var_range *vr = xmalloc (sizeof *vr);
492 vr->count = max - min + 1;
493 var_attach_aux (variables[i], vr, var_dtor_free);
496 if (lex_token (lexer) == '/')
508 /* Data file processing. */
510 static int compare_table_entry (const void *, const void *, const void *);
511 static unsigned hash_table_entry (const void *, const void *);
513 /* Set up the crosstabulation tables for processing. */
515 precalc (struct casereader *input, const struct dataset *ds)
519 if (!casereader_peek (input, 0, &c))
521 output_split_file_values (ds, &c);
526 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
536 for (i = 0; i < nxtab; i++)
538 struct crosstab *x = xtab[i];
543 x->ofs = n_sorted_tab;
545 for (j = 2; j < x->nvar; j++)
546 count *= get_var_range (x->vars[j - 2])->count;
548 sorted_tab = xnrealloc (sorted_tab,
549 n_sorted_tab + count, sizeof *sorted_tab);
550 v = local_alloc (sizeof *v * x->nvar);
551 for (j = 2; j < x->nvar; j++)
552 v[j] = get_var_range (x->vars[j])->min;
553 for (j = 0; j < count; j++)
555 struct table_entry *te;
558 te = sorted_tab[n_sorted_tab++]
559 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
563 int row_cnt = get_var_range (x->vars[0])->count;
564 int col_cnt = get_var_range (x->vars[1])->count;
565 const int mat_size = row_cnt * col_cnt;
568 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
569 for (m = 0; m < mat_size; m++)
573 for (k = 2; k < x->nvar; k++)
574 te->values[k].f = v[k];
575 for (k = 2; k < x->nvar; k++)
577 struct var_range *vr = get_var_range (x->vars[k]);
578 if (++v[k] >= vr->max)
587 sorted_tab = xnrealloc (sorted_tab,
588 n_sorted_tab + 1, sizeof *sorted_tab);
589 sorted_tab[n_sorted_tab] = NULL;
594 /* Form crosstabulations for general mode. */
596 calc_general (struct ccase *c, const struct dataset *ds)
598 /* Missing values to exclude. */
599 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
600 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
604 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
606 /* Flattened current table index. */
609 for (t = 0; t < nxtab; t++)
611 struct crosstab *x = xtab[t];
612 const size_t entry_size = (sizeof (struct table_entry)
613 + sizeof (union value) * (x->nvar - 1));
614 struct table_entry *te = local_alloc (entry_size);
616 /* Construct table entry for the current record and table. */
622 for (j = 0; j < x->nvar; j++)
624 const union value *v = case_data (c, x->vars[j]);
625 if (var_is_value_missing (x->vars[j], v, exclude))
627 x->missing += weight;
631 if (var_is_numeric (x->vars[j]))
632 te->values[j].f = case_num (c, x->vars[j]);
635 memcpy (te->values[j].s, case_str (c, x->vars[j]),
636 var_get_width (x->vars[j]));
638 /* Necessary in order to simplify comparisons. */
639 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
640 sizeof (union value) - var_get_width (x->vars[j]));
645 /* Add record to hash table. */
647 struct table_entry **tepp
648 = (struct table_entry **) hsh_probe (gen_tab, te);
651 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
654 memcpy (tep, te, entry_size);
659 (*tepp)->u.freq += weight;
668 calc_integer (struct ccase *c, const struct dataset *ds)
670 bool bad_warn = true;
673 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
675 /* Flattened current table index. */
678 for (t = 0; t < nxtab; t++)
680 struct crosstab *x = xtab[t];
685 for (i = 0; i < x->nvar; i++)
687 const struct variable *const v = x->vars[i];
688 struct var_range *vr = get_var_range (v);
689 double value = case_num (c, v);
691 /* Note that the first test also rules out SYSMIS. */
692 if ((value < vr->min || value >= vr->max)
693 || (cmd.miss == CRS_TABLE
694 && var_is_num_missing (v, value, MV_USER)))
696 x->missing += weight;
702 ofs += fact * ((int) value - vr->min);
708 const struct variable *row_var = x->vars[ROW_VAR];
709 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
711 const struct variable *col_var = x->vars[COL_VAR];
712 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
714 const int col_dim = get_var_range (col_var)->count;
716 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
723 /* Compare the table_entry's at A and B and return a strcmp()-type
726 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
728 const struct table_entry *a = a_;
729 const struct table_entry *b = b_;
731 if (a->table > b->table)
733 else if (a->table < b->table)
737 const struct crosstab *x = xtab[a->table];
740 for (i = x->nvar - 1; i >= 0; i--)
741 if (var_is_numeric (x->vars[i]))
743 const double diffnum = a->values[i].f - b->values[i].f;
746 else if (diffnum > 0)
751 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
752 var_get_width (x->vars[i]));
761 /* Calculate a hash value from table_entry A. */
763 hash_table_entry (const void *a_, const void *aux UNUSED)
765 const struct table_entry *a = a_;
770 for (i = 0; i < xtab[a->table]->nvar; i++)
771 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
776 /* Post-data reading calculations. */
778 static struct table_entry **find_pivot_extent (struct table_entry **,
779 int *cnt, int pivot);
780 static void enum_var_values (struct table_entry **entries, int entry_cnt,
782 union value **values, int *value_cnt);
783 static void output_pivot_table (struct table_entry **, struct table_entry **,
784 double **, double **, double **,
785 int *, int *, int *);
786 static void make_summary_table (void);
793 n_sorted_tab = hsh_count (gen_tab);
794 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
797 make_summary_table ();
799 /* Identify all the individual crosstabulation tables, and deal with
802 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
803 int pc = n_sorted_tab; /* Pivot count. */
805 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
806 int maxrows = 0, maxcols = 0, maxcells = 0;
810 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
814 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
815 &maxrows, &maxcols, &maxcells);
824 hsh_destroy (gen_tab);
827 static void insert_summary (struct tab_table *, int tab_index, double valid);
829 /* Output a table summarizing the cases processed. */
831 make_summary_table (void)
833 struct tab_table *summary;
835 struct table_entry **pb = sorted_tab, **pe;
836 int pc = n_sorted_tab;
839 summary = tab_create (7, 3 + nxtab, 1);
840 tab_title (summary, _("Summary."));
841 tab_headers (summary, 1, 0, 3, 0);
842 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
843 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
844 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
845 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
846 tab_hline (summary, TAL_1, 1, 6, 1);
847 tab_hline (summary, TAL_1, 1, 6, 2);
848 tab_vline (summary, TAL_1, 3, 1, 1);
849 tab_vline (summary, TAL_1, 5, 1, 1);
853 for (i = 0; i < 3; i++)
855 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
856 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
859 tab_offset (summary, 0, 3);
865 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
869 while (cur_tab < (*pb)->table)
870 insert_summary (summary, cur_tab++, 0.);
873 for (valid = 0.; pb < pe; pb++)
874 valid += (*pb)->u.freq;
877 const struct crosstab *const x = xtab[(*pb)->table];
878 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
879 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
880 const int count = n_cols * n_rows;
882 for (valid = 0.; pb < pe; pb++)
884 const double *data = (*pb)->u.data;
887 for (i = 0; i < count; i++)
891 insert_summary (summary, cur_tab++, valid);
896 while (cur_tab < nxtab)
897 insert_summary (summary, cur_tab++, 0.);
902 /* Inserts a line into T describing the crosstabulation at index
903 TAB_INDEX, which has VALID valid observations. */
905 insert_summary (struct tab_table *t, int tab_index, double valid)
907 struct crosstab *x = xtab[tab_index];
909 tab_hline (t, TAL_1, 0, 6, 0);
911 /* Crosstabulation name. */
913 char *buf = local_alloc (128 * x->nvar);
917 for (i = 0; i < x->nvar; i++)
920 cp = stpcpy (cp, " * ");
922 cp = stpcpy (cp, var_to_string (x->vars[i]));
924 tab_text (t, 0, 0, TAB_LEFT, buf);
929 /* Counts and percentages. */
939 for (i = 0; i < 3; i++)
941 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
942 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
953 static struct tab_table *table; /* Crosstabulation table. */
954 static struct tab_table *chisq; /* Chi-square table. */
955 static struct tab_table *sym; /* Symmetric measures table. */
956 static struct tab_table *risk; /* Risk estimate table. */
957 static struct tab_table *direct; /* Directional measures table. */
960 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
962 /* Column values, number of columns. */
963 static union value *cols;
966 /* Row values, number of rows. */
967 static union value *rows;
970 /* Number of statistically interesting columns/rows (columns/rows with
972 static int ns_cols, ns_rows;
974 /* Crosstabulation. */
975 static const struct crosstab *x;
977 /* Number of variables from the crosstabulation to consider. This is
978 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
981 /* Matrix contents. */
982 static double *mat; /* Matrix proper. */
983 static double *row_tot; /* Row totals. */
984 static double *col_tot; /* Column totals. */
985 static double W; /* Grand total. */
987 static void display_dimensions (struct tab_table *, int first_difference,
988 struct table_entry *);
989 static void display_crosstabulation (void);
990 static void display_chisq (void);
991 static void display_symmetric (void);
992 static void display_risk (void);
993 static void display_directional (void);
994 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
995 static void table_value_missing (struct tab_table *table, int c, int r,
996 unsigned char opt, const union value *v,
997 const struct variable *var);
998 static void delete_missing (void);
1000 /* Output pivot table beginning at PB and continuing until PE,
1001 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1002 hold *MAXROWS entries. */
1004 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1005 double **matp, double **row_totp, double **col_totp,
1006 int *maxrows, int *maxcols, int *maxcells)
1009 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1010 int tc = pe - pb; /* Table count. */
1012 /* Table entry for header comparison. */
1013 struct table_entry *cmp = NULL;
1015 x = xtab[(*pb)->table];
1016 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1018 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1020 /* Crosstabulation table initialization. */
1023 table = tab_create (nvar + n_cols,
1024 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1025 tab_headers (table, nvar - 1, 0, 2, 0);
1027 /* First header line. */
1028 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1029 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1031 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1033 /* Second header line. */
1037 for (i = 2; i < nvar; i++)
1038 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1039 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1040 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1041 var_get_name (x->vars[ROW_VAR]));
1042 for (i = 0; i < n_cols; i++)
1043 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1045 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1048 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1049 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1053 char *title = local_alloc (x->nvar * 64 + 128);
1057 if (cmd.pivot == CRS_PIVOT)
1058 for (i = 0; i < nvar; i++)
1061 cp = stpcpy (cp, " by ");
1062 cp = stpcpy (cp, var_get_name (x->vars[i]));
1066 cp = spprintf (cp, "%s by %s for",
1067 var_get_name (x->vars[0]),
1068 var_get_name (x->vars[1]));
1069 for (i = 2; i < nvar; i++)
1071 char buf[64], *bufp;
1076 cp = stpcpy (cp, var_get_name (x->vars[i]));
1078 format_short (buf, var_get_print_format (x->vars[i]),
1080 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1082 cp = stpcpy (cp, bufp);
1086 cp = stpcpy (cp, " [");
1087 for (i = 0; i < num_cells; i++)
1095 static const struct tuple cell_names[] =
1097 {CRS_CL_COUNT, N_("count")},
1098 {CRS_CL_ROW, N_("row %")},
1099 {CRS_CL_COLUMN, N_("column %")},
1100 {CRS_CL_TOTAL, N_("total %")},
1101 {CRS_CL_EXPECTED, N_("expected")},
1102 {CRS_CL_RESIDUAL, N_("residual")},
1103 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1104 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1108 const struct tuple *t;
1110 for (t = cell_names; t->value != cells[i]; t++)
1111 assert (t->value != -1);
1113 cp = stpcpy (cp, ", ");
1114 cp = stpcpy (cp, gettext (t->name));
1118 tab_title (table, "%s", title);
1122 tab_offset (table, 0, 2);
1127 /* Chi-square table initialization. */
1128 if (cmd.a_statistics[CRS_ST_CHISQ])
1130 chisq = tab_create (6 + (nvar - 2),
1131 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1132 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1134 tab_title (chisq, _("Chi-square tests."));
1136 tab_offset (chisq, nvar - 2, 0);
1137 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1138 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1139 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1140 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1141 _("Asymp. Sig. (2-sided)"));
1142 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1143 _("Exact. Sig. (2-sided)"));
1144 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1145 _("Exact. Sig. (1-sided)"));
1147 tab_offset (chisq, 0, 1);
1152 /* Symmetric measures. */
1153 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1154 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1155 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1156 || cmd.a_statistics[CRS_ST_KAPPA])
1158 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1159 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1160 tab_title (sym, _("Symmetric measures."));
1162 tab_offset (sym, nvar - 2, 0);
1163 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1164 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1165 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1166 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1167 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1168 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1169 tab_offset (sym, 0, 1);
1174 /* Risk estimate. */
1175 if (cmd.a_statistics[CRS_ST_RISK])
1177 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1178 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1179 tab_title (risk, _("Risk estimate."));
1181 tab_offset (risk, nvar - 2, 0);
1182 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1183 _("95%% Confidence Interval"));
1184 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1185 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1186 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1187 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1188 tab_hline (risk, TAL_1, 2, 3, 1);
1189 tab_vline (risk, TAL_1, 2, 0, 1);
1190 tab_offset (risk, 0, 2);
1195 /* Directional measures. */
1196 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1197 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1199 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1200 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1201 tab_title (direct, _("Directional measures."));
1203 tab_offset (direct, nvar - 2, 0);
1204 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1205 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1206 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1207 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1208 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1209 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1210 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1211 tab_offset (direct, 0, 1);
1218 /* Find pivot subtable if applicable. */
1219 te = find_pivot_extent (tb, &tc, 0);
1223 /* Find all the row variable values. */
1224 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1226 /* Allocate memory space for the column and row totals. */
1227 if (n_rows > *maxrows)
1229 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1230 row_tot = *row_totp;
1233 if (n_cols > *maxcols)
1235 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1236 col_tot = *col_totp;
1240 /* Allocate table space for the matrix. */
1241 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1242 tab_realloc (table, -1,
1243 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1244 tab_nr (table) * (pe - pb) / (te - tb)));
1246 if (mode == GENERAL)
1248 /* Allocate memory space for the matrix. */
1249 if (n_cols * n_rows > *maxcells)
1251 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1252 *maxcells = n_cols * n_rows;
1257 /* Build the matrix and calculate column totals. */
1259 union value *cur_col = cols;
1260 union value *cur_row = rows;
1262 double *cp = col_tot;
1263 struct table_entry **p;
1266 for (p = &tb[0]; p < te; p++)
1268 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1272 for (; cur_row < &rows[n_rows]; cur_row++)
1278 mp = &mat[cur_col - cols];
1281 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1288 *cp += *mp = (*p)->u.freq;
1293 /* Zero out the rest of the matrix. */
1294 for (; cur_row < &rows[n_rows]; cur_row++)
1300 if (cur_col < &cols[n_cols])
1302 const int rem_cols = n_cols - (cur_col - cols);
1305 for (c = 0; c < rem_cols; c++)
1307 mp = &mat[cur_col - cols];
1308 for (r = 0; r < n_rows; r++)
1310 for (c = 0; c < rem_cols; c++)
1312 mp += n_cols - rem_cols;
1320 double *tp = col_tot;
1322 assert (mode == INTEGER);
1323 mat = (*tb)->u.data;
1326 /* Calculate column totals. */
1327 for (c = 0; c < n_cols; c++)
1330 double *cp = &mat[c];
1332 for (r = 0; r < n_rows; r++)
1333 cum += cp[r * n_cols];
1341 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1342 ns_cols += *cp != 0.;
1345 /* Calculate row totals. */
1348 double *rp = row_tot;
1351 for (ns_rows = 0, r = n_rows; r--; )
1354 for (c = n_cols; c--; )
1362 /* Calculate grand total. */
1368 if (n_rows < n_cols)
1369 tp = row_tot, n = n_rows;
1371 tp = col_tot, n = n_cols;
1377 /* Find the first variable that differs from the last subtable,
1378 then display the values of the dimensioning variables for
1379 each table that needs it. */
1381 int first_difference = nvar - 1;
1384 for (; ; first_difference--)
1386 assert (first_difference >= 2);
1387 if (memcmp (&cmp->values[first_difference],
1388 &(*tb)->values[first_difference],
1389 sizeof *cmp->values))
1395 display_dimensions (table, first_difference, *tb);
1397 display_dimensions (chisq, first_difference, *tb);
1399 display_dimensions (sym, first_difference, *tb);
1401 display_dimensions (risk, first_difference, *tb);
1403 display_dimensions (direct, first_difference, *tb);
1407 display_crosstabulation ();
1408 if (cmd.miss == CRS_REPORT)
1413 display_symmetric ();
1417 display_directional ();
1428 tab_resize (chisq, 4 + (nvar - 2), -1);
1439 /* Delete missing rows and columns for statistical analysis when
1442 delete_missing (void)
1447 for (r = 0; r < n_rows; r++)
1448 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1452 for (c = 0; c < n_cols; c++)
1453 mat[c + r * n_cols] = 0.;
1461 for (c = 0; c < n_cols; c++)
1462 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1466 for (r = 0; r < n_rows; r++)
1467 mat[c + r * n_cols] = 0.;
1473 /* Prepare table T for submission, and submit it. */
1475 submit (struct tab_table *t)
1482 tab_resize (t, -1, 0);
1483 if (tab_nr (t) == tab_t (t))
1488 tab_offset (t, 0, 0);
1490 for (i = 2; i < nvar; i++)
1491 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1492 var_to_string (x->vars[i]));
1493 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1494 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1496 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1498 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1499 tab_dim (t, crosstabs_dim);
1503 /* Sets the widths of all the columns and heights of all the rows in
1504 table T for driver D. */
1506 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1510 /* Width of a numerical column. */
1511 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1512 if (cmd.miss == CRS_REPORT)
1513 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1515 /* Set width for header columns. */
1521 w = d->width - c * (t->nc - t->l);
1522 for (i = 0; i <= t->nc; i++)
1526 if (w < d->prop_em_width * 8)
1527 w = d->prop_em_width * 8;
1529 if (w > d->prop_em_width * 15)
1530 w = d->prop_em_width * 15;
1532 for (i = 0; i < t->l; i++)
1536 for (i = t->l; i < t->nc; i++)
1539 for (i = 0; i < t->nr; i++)
1540 t->h[i] = tab_natural_height (t, d, i);
1543 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1544 int *cnt, int pivot);
1545 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1546 int *cnt, int pivot);
1548 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1550 static struct table_entry **
1551 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1553 return (mode == GENERAL
1554 ? find_pivot_extent_general (tp, cnt, pivot)
1555 : find_pivot_extent_integer (tp, cnt, pivot));
1558 /* Find the extent of a region in TP that contains one table. If
1559 PIVOT != 0 that means a set of table entries with identical table
1560 number; otherwise they also have to have the same values for every
1561 dimension after the row and column dimensions. The table that is
1562 searched starts at TP and has length CNT. Returns the first entry
1563 after the last one in the table; sets *CNT to the number of
1564 remaining values. If there are no entries in TP at all, returns
1565 NULL. A yucky interface, admittedly, but it works. */
1566 static struct table_entry **
1567 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1569 struct table_entry *fp = *tp;
1574 x = xtab[(*tp)->table];
1582 if ((*tp)->table != fp->table)
1587 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1594 /* Integer mode correspondent to find_pivot_extent_general(). This
1595 could be optimized somewhat, but I just don't give a crap about
1596 CROSSTABS performance in integer mode, which is just a
1597 CROSSTABS wart as far as I'm concerned.
1599 That said, feel free to send optimization patches to me. */
1600 static struct table_entry **
1601 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1603 struct table_entry *fp = *tp;
1608 x = xtab[(*tp)->table];
1616 if ((*tp)->table != fp->table)
1621 if (memcmp (&(*tp)->values[2], &fp->values[2],
1622 sizeof (union value) * (x->nvar - 2)))
1629 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1630 result. WIDTH_ points to an int which is either 0 for a
1631 numeric value or a string width for a string value. */
1633 compare_value (const void *a_, const void *b_, const void *width_)
1635 const union value *a = a_;
1636 const union value *b = b_;
1637 const int *pwidth = width_;
1638 const int width = *pwidth;
1641 return (a->f < b->f) ? -1 : (a->f > b->f);
1643 return strncmp (a->s, b->s, width);
1646 /* Given an array of ENTRY_CNT table_entry structures starting at
1647 ENTRIES, creates a sorted list of the values that the variable
1648 with index VAR_IDX takes on. The values are returned as a
1649 malloc()'darray stored in *VALUES, with the number of values
1650 stored in *VALUE_CNT.
1653 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1654 union value **values, int *value_cnt)
1656 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1658 if (mode == GENERAL)
1660 int width = var_get_width (v);
1663 *values = xnmalloc (entry_cnt, sizeof **values);
1664 for (i = 0; i < entry_cnt; i++)
1665 (*values)[i] = entries[i]->values[var_idx];
1666 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1667 compare_value, &width);
1671 struct var_range *vr = get_var_range (v);
1674 assert (mode == INTEGER);
1675 *values = xnmalloc (vr->count, sizeof **values);
1676 for (i = 0; i < vr->count; i++)
1677 (*values)[i].f = i + vr->min;
1678 *value_cnt = vr->count;
1682 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1683 from V, displayed with print format spec from variable VAR. When
1684 in REPORT missing-value mode, missing values have an M appended. */
1686 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1687 const union value *v, const struct variable *var)
1690 const struct fmt_spec *print = var_get_print_format (var);
1692 const char *label = var_lookup_value_label (var, v);
1695 tab_text (table, c, r, TAB_LEFT, label);
1699 s.string = tab_alloc (table, print->w);
1700 format_short (s.string, print, v);
1701 s.length = strlen (s.string);
1702 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1703 s.string[s.length++] = 'M';
1704 while (s.length && *s.string == ' ')
1709 tab_raw (table, c, r, opt, &s);
1712 /* Draws a line across TABLE at the current row to indicate the most
1713 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1714 that changed, and puts the values that changed into the table. TB
1715 and X must be the corresponding table_entry and crosstab,
1718 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1720 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1722 for (; first_difference >= 2; first_difference--)
1723 table_value_missing (table, nvar - first_difference - 1, 0,
1724 TAB_RIGHT, &tb->values[first_difference],
1725 x->vars[first_difference]);
1728 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1729 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1730 additionally suffixed with a letter `M'. */
1732 format_cell_entry (struct tab_table *table, int c, int r, double value,
1733 char suffix, bool mark_missing)
1735 const struct fmt_spec f = {FMT_F, 10, 1};
1740 s.string = tab_alloc (table, 16);
1742 data_out (&v, &f, s.string);
1743 while (*s.string == ' ')
1749 s.string[s.length++] = suffix;
1751 s.string[s.length++] = 'M';
1753 tab_raw (table, c, r, TAB_RIGHT, &s);
1756 /* Displays the crosstabulation table. */
1758 display_crosstabulation (void)
1763 for (r = 0; r < n_rows; r++)
1764 table_value_missing (table, nvar - 2, r * num_cells,
1765 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1767 tab_text (table, nvar - 2, n_rows * num_cells,
1768 TAB_LEFT, _("Total"));
1770 /* Put in the actual cells. */
1775 tab_offset (table, nvar - 1, -1);
1776 for (r = 0; r < n_rows; r++)
1779 tab_hline (table, TAL_1, -1, n_cols, 0);
1780 for (c = 0; c < n_cols; c++)
1782 bool mark_missing = false;
1783 double expected_value = row_tot[r] * col_tot[c] / W;
1784 if (cmd.miss == CRS_REPORT
1785 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1786 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1788 mark_missing = true;
1789 for (i = 0; i < num_cells; i++)
1800 v = *mp / row_tot[r] * 100.;
1804 v = *mp / col_tot[c] * 100.;
1811 case CRS_CL_EXPECTED:
1814 case CRS_CL_RESIDUAL:
1815 v = *mp - expected_value;
1817 case CRS_CL_SRESIDUAL:
1818 v = (*mp - expected_value) / sqrt (expected_value);
1820 case CRS_CL_ASRESIDUAL:
1821 v = ((*mp - expected_value)
1822 / sqrt (expected_value
1823 * (1. - row_tot[r] / W)
1824 * (1. - col_tot[c] / W)));
1830 format_cell_entry (table, c, i, v, suffix, mark_missing);
1836 tab_offset (table, -1, tab_row (table) + num_cells);
1844 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1845 for (r = 0; r < n_rows; r++)
1848 bool mark_missing = false;
1850 if (cmd.miss == CRS_REPORT
1851 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1852 mark_missing = true;
1854 for (i = 0; i < num_cells; i++)
1868 v = row_tot[r] / W * 100.;
1872 v = row_tot[r] / W * 100.;
1875 case CRS_CL_EXPECTED:
1876 case CRS_CL_RESIDUAL:
1877 case CRS_CL_SRESIDUAL:
1878 case CRS_CL_ASRESIDUAL:
1885 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1886 tab_next_row (table);
1891 /* Column totals, grand total. */
1897 tab_hline (table, TAL_1, -1, n_cols, 0);
1898 for (c = 0; c <= n_cols; c++)
1900 double ct = c < n_cols ? col_tot[c] : W;
1901 bool mark_missing = false;
1905 if (cmd.miss == CRS_REPORT && c < n_cols
1906 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1907 mark_missing = true;
1909 for (i = 0; i < num_cells; i++)
1931 case CRS_CL_EXPECTED:
1932 case CRS_CL_RESIDUAL:
1933 case CRS_CL_SRESIDUAL:
1934 case CRS_CL_ASRESIDUAL:
1940 format_cell_entry (table, c, i, v, suffix, mark_missing);
1945 tab_offset (table, -1, tab_row (table) + last_row);
1948 tab_offset (table, 0, -1);
1951 static void calc_r (double *X, double *Y, double *, double *, double *);
1952 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1954 /* Display chi-square statistics. */
1956 display_chisq (void)
1958 static const char *chisq_stats[N_CHISQ] =
1960 N_("Pearson Chi-Square"),
1961 N_("Likelihood Ratio"),
1962 N_("Fisher's Exact Test"),
1963 N_("Continuity Correction"),
1964 N_("Linear-by-Linear Association"),
1966 double chisq_v[N_CHISQ];
1967 double fisher1, fisher2;
1973 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1975 tab_offset (chisq, nvar - 2, -1);
1977 for (i = 0; i < N_CHISQ; i++)
1979 if ((i != 2 && chisq_v[i] == SYSMIS)
1980 || (i == 2 && fisher1 == SYSMIS))
1984 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1987 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1988 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1989 tab_float (chisq, 3, 0, TAB_RIGHT,
1990 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1995 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1996 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1998 tab_next_row (chisq);
2001 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2002 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2003 tab_next_row (chisq);
2005 tab_offset (chisq, 0, -1);
2008 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2009 double[N_SYMMETRIC]);
2011 /* Display symmetric measures. */
2013 display_symmetric (void)
2015 static const char *categories[] =
2017 N_("Nominal by Nominal"),
2018 N_("Ordinal by Ordinal"),
2019 N_("Interval by Interval"),
2020 N_("Measure of Agreement"),
2023 static const char *stats[N_SYMMETRIC] =
2027 N_("Contingency Coefficient"),
2028 N_("Kendall's tau-b"),
2029 N_("Kendall's tau-c"),
2031 N_("Spearman Correlation"),
2036 static const int stats_categories[N_SYMMETRIC] =
2038 0, 0, 0, 1, 1, 1, 1, 2, 3,
2042 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2045 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2048 tab_offset (sym, nvar - 2, -1);
2050 for (i = 0; i < N_SYMMETRIC; i++)
2052 if (sym_v[i] == SYSMIS)
2055 if (stats_categories[i] != last_cat)
2057 last_cat = stats_categories[i];
2058 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2061 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2062 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2063 if (sym_ase[i] != SYSMIS)
2064 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2065 if (sym_t[i] != SYSMIS)
2066 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2067 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2071 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2072 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2075 tab_offset (sym, 0, -1);
2078 static int calc_risk (double[], double[], double[], union value *);
2080 /* Display risk estimate. */
2085 double risk_v[3], lower[3], upper[3];
2089 if (!calc_risk (risk_v, upper, lower, c))
2092 tab_offset (risk, nvar - 2, -1);
2094 for (i = 0; i < 3; i++)
2096 if (risk_v[i] == SYSMIS)
2102 if (var_is_numeric (x->vars[COL_VAR]))
2103 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2104 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2106 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2107 var_get_name (x->vars[COL_VAR]),
2108 var_get_width (x->vars[COL_VAR]), c[0].s,
2109 var_get_width (x->vars[COL_VAR]), c[1].s);
2113 if (var_is_numeric (x->vars[ROW_VAR]))
2114 sprintf (buf, _("For cohort %s = %g"),
2115 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2117 sprintf (buf, _("For cohort %s = %.*s"),
2118 var_get_name (x->vars[ROW_VAR]),
2119 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2123 tab_text (risk, 0, 0, TAB_LEFT, buf);
2124 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2125 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2126 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2127 tab_next_row (risk);
2130 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2131 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2132 tab_next_row (risk);
2134 tab_offset (risk, 0, -1);
2137 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2138 double[N_DIRECTIONAL]);
2140 /* Display directional measures. */
2142 display_directional (void)
2144 static const char *categories[] =
2146 N_("Nominal by Nominal"),
2147 N_("Ordinal by Ordinal"),
2148 N_("Nominal by Interval"),
2151 static const char *stats[] =
2154 N_("Goodman and Kruskal tau"),
2155 N_("Uncertainty Coefficient"),
2160 static const char *types[] =
2167 static const int stats_categories[N_DIRECTIONAL] =
2169 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2172 static const int stats_stats[N_DIRECTIONAL] =
2174 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2177 static const int stats_types[N_DIRECTIONAL] =
2179 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2182 static const int *stats_lookup[] =
2189 static const char **stats_names[] =
2201 double direct_v[N_DIRECTIONAL];
2202 double direct_ase[N_DIRECTIONAL];
2203 double direct_t[N_DIRECTIONAL];
2207 if (!calc_directional (direct_v, direct_ase, direct_t))
2210 tab_offset (direct, nvar - 2, -1);
2212 for (i = 0; i < N_DIRECTIONAL; i++)
2214 if (direct_v[i] == SYSMIS)
2220 for (j = 0; j < 3; j++)
2221 if (last[j] != stats_lookup[j][i])
2224 tab_hline (direct, TAL_1, j, 6, 0);
2229 int k = last[j] = stats_lookup[j][i];
2234 string = var_get_name (x->vars[0]);
2236 string = var_get_name (x->vars[1]);
2238 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2239 gettext (stats_names[j][k]), string);
2244 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2245 if (direct_ase[i] != SYSMIS)
2246 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2247 if (direct_t[i] != SYSMIS)
2248 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2249 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2250 tab_next_row (direct);
2253 tab_offset (direct, 0, -1);
2256 /* Statistical calculations. */
2258 /* Returns the value of the gamma (factorial) function for an integer
2261 gamma_int (double x)
2266 for (i = 2; i < x; i++)
2271 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2273 static inline double
2274 Pr (int a, int b, int c, int d)
2276 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2277 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2278 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2279 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2280 / gamma_int (a + b + c + d + 1.));
2283 /* Swap the contents of A and B. */
2285 swap (int *a, int *b)
2292 /* Calculate significance for Fisher's exact test as specified in
2293 _SPSS Statistical Algorithms_, Appendix 5. */
2295 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2299 if (MIN (c, d) < MIN (a, b))
2300 swap (&a, &c), swap (&b, &d);
2301 if (MIN (b, d) < MIN (a, c))
2302 swap (&a, &b), swap (&c, &d);
2306 swap (&a, &b), swap (&c, &d);
2308 swap (&a, &c), swap (&b, &d);
2312 for (x = 0; x <= a; x++)
2313 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2315 *fisher2 = *fisher1;
2316 for (x = 1; x <= b; x++)
2317 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2320 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2321 columns with values COLS and N_ROWS rows with values ROWS. Values
2322 in the matrix sum to W. */
2324 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2325 double *fisher1, double *fisher2)
2329 chisq[0] = chisq[1] = 0.;
2330 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2331 *fisher1 = *fisher2 = SYSMIS;
2333 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2335 if (ns_rows <= 1 || ns_cols <= 1)
2337 chisq[0] = chisq[1] = SYSMIS;
2341 for (r = 0; r < n_rows; r++)
2342 for (c = 0; c < n_cols; c++)
2344 const double expected = row_tot[r] * col_tot[c] / W;
2345 const double freq = mat[n_cols * r + c];
2346 const double residual = freq - expected;
2348 chisq[0] += residual * residual / expected;
2350 chisq[1] += freq * log (expected / freq);
2361 /* Calculate Yates and Fisher exact test. */
2362 if (ns_cols == 2 && ns_rows == 2)
2364 double f11, f12, f21, f22;
2370 for (i = j = 0; i < n_cols; i++)
2371 if (col_tot[i] != 0.)
2380 f11 = mat[nz_cols[0]];
2381 f12 = mat[nz_cols[1]];
2382 f21 = mat[nz_cols[0] + n_cols];
2383 f22 = mat[nz_cols[1] + n_cols];
2388 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2391 chisq[3] = (W * x * x
2392 / (f11 + f12) / (f21 + f22)
2393 / (f11 + f21) / (f12 + f22));
2401 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2402 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2405 /* Calculate Mantel-Haenszel. */
2406 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2408 double r, ase_0, ase_1;
2409 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2411 chisq[4] = (W - 1.) * r * r;
2416 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2417 ASE_1, and ase_0 into ASE_0. The row and column values must be
2418 passed in X and Y. */
2420 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2422 double SX, SY, S, T;
2424 double sum_XYf, sum_X2Y2f;
2425 double sum_Xr, sum_X2r;
2426 double sum_Yc, sum_Y2c;
2429 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2430 for (j = 0; j < n_cols; j++)
2432 double fij = mat[j + i * n_cols];
2433 double product = X[i] * Y[j];
2434 double temp = fij * product;
2436 sum_X2Y2f += temp * product;
2439 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2441 sum_Xr += X[i] * row_tot[i];
2442 sum_X2r += X[i] * X[i] * row_tot[i];
2446 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2448 sum_Yc += Y[i] * col_tot[i];
2449 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2453 S = sum_XYf - sum_Xr * sum_Yc / W;
2454 SX = sum_X2r - sum_Xr * sum_Xr / W;
2455 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2458 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2463 for (s = c = 0., i = 0; i < n_rows; i++)
2464 for (j = 0; j < n_cols; j++)
2466 double Xresid, Yresid;
2469 Xresid = X[i] - Xbar;
2470 Yresid = Y[j] - Ybar;
2471 temp = (T * Xresid * Yresid
2473 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2474 y = mat[j + i * n_cols] * temp * temp - c;
2479 *ase_1 = sqrt (s) / (T * T);
2483 static double somers_d_v[3];
2484 static double somers_d_ase[3];
2485 static double somers_d_t[3];
2487 /* Calculate symmetric statistics and their asymptotic standard
2488 errors. Returns 0 if none could be calculated. */
2490 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2491 double t[N_SYMMETRIC])
2493 int q = MIN (ns_rows, ns_cols);
2502 for (i = 0; i < N_SYMMETRIC; i++)
2503 v[i] = ase[i] = t[i] = SYSMIS;
2506 /* Phi, Cramer's V, contingency coefficient. */
2507 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2509 double Xp = 0.; /* Pearson chi-square. */
2514 for (r = 0; r < n_rows; r++)
2515 for (c = 0; c < n_cols; c++)
2517 const double expected = row_tot[r] * col_tot[c] / W;
2518 const double freq = mat[n_cols * r + c];
2519 const double residual = freq - expected;
2521 Xp += residual * residual / expected;
2525 if (cmd.a_statistics[CRS_ST_PHI])
2527 v[0] = sqrt (Xp / W);
2528 v[1] = sqrt (Xp / (W * (q - 1)));
2530 if (cmd.a_statistics[CRS_ST_CC])
2531 v[2] = sqrt (Xp / (Xp + W));
2534 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2535 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2540 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2547 for (r = 0; r < n_rows; r++)
2548 Dr -= row_tot[r] * row_tot[r];
2549 for (c = 0; c < n_cols; c++)
2550 Dc -= col_tot[c] * col_tot[c];
2556 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2557 for (c = 0; c < n_cols; c++)
2561 for (r = 0; r < n_rows; r++)
2562 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2572 for (i = 0; i < n_rows; i++)
2576 for (j = 1; j < n_cols; j++)
2577 Cij += col_tot[j] - cum[j + i * n_cols];
2580 for (j = 1; j < n_cols; j++)
2581 Dij += cum[j + (i - 1) * n_cols];
2585 double fij = mat[j + i * n_cols];
2591 assert (j < n_cols);
2593 Cij -= col_tot[j] - cum[j + i * n_cols];
2594 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2598 Cij += cum[j - 1 + (i - 1) * n_cols];
2599 Dij -= cum[j + (i - 1) * n_cols];
2605 if (cmd.a_statistics[CRS_ST_BTAU])
2606 v[3] = (P - Q) / sqrt (Dr * Dc);
2607 if (cmd.a_statistics[CRS_ST_CTAU])
2608 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2609 if (cmd.a_statistics[CRS_ST_GAMMA])
2610 v[5] = (P - Q) / (P + Q);
2612 /* ASE for tau-b, tau-c, gamma. Calculations could be
2613 eliminated here, at expense of memory. */
2618 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2619 for (i = 0; i < n_rows; i++)
2623 for (j = 1; j < n_cols; j++)
2624 Cij += col_tot[j] - cum[j + i * n_cols];
2627 for (j = 1; j < n_cols; j++)
2628 Dij += cum[j + (i - 1) * n_cols];
2632 double fij = mat[j + i * n_cols];
2634 if (cmd.a_statistics[CRS_ST_BTAU])
2636 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2637 + v[3] * (row_tot[i] * Dc
2638 + col_tot[j] * Dr));
2639 btau_cum += fij * temp * temp;
2643 const double temp = Cij - Dij;
2644 ctau_cum += fij * temp * temp;
2647 if (cmd.a_statistics[CRS_ST_GAMMA])
2649 const double temp = Q * Cij - P * Dij;
2650 gamma_cum += fij * temp * temp;
2653 if (cmd.a_statistics[CRS_ST_D])
2655 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2656 - (P - Q) * (W - row_tot[i]));
2657 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2658 - (Q - P) * (W - col_tot[j]));
2663 assert (j < n_cols);
2665 Cij -= col_tot[j] - cum[j + i * n_cols];
2666 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2670 Cij += cum[j - 1 + (i - 1) * n_cols];
2671 Dij -= cum[j + (i - 1) * n_cols];
2677 btau_var = ((btau_cum
2678 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2680 if (cmd.a_statistics[CRS_ST_BTAU])
2682 ase[3] = sqrt (btau_var);
2683 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2686 if (cmd.a_statistics[CRS_ST_CTAU])
2688 ase[4] = ((2 * q / ((q - 1) * W * W))
2689 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2690 t[4] = v[4] / ase[4];
2692 if (cmd.a_statistics[CRS_ST_GAMMA])
2694 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2695 t[5] = v[5] / (2. / (P + Q)
2696 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2698 if (cmd.a_statistics[CRS_ST_D])
2700 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2701 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2702 somers_d_t[0] = (somers_d_v[0]
2704 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2705 somers_d_v[1] = (P - Q) / Dc;
2706 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2707 somers_d_t[1] = (somers_d_v[1]
2709 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2710 somers_d_v[2] = (P - Q) / Dr;
2711 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2712 somers_d_t[2] = (somers_d_v[2]
2714 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2720 /* Spearman correlation, Pearson's r. */
2721 if (cmd.a_statistics[CRS_ST_CORR])
2723 double *R = local_alloc (sizeof *R * n_rows);
2724 double *C = local_alloc (sizeof *C * n_cols);
2727 double y, t, c = 0., s = 0.;
2732 R[i] = s + (row_tot[i] + 1.) / 2.;
2739 assert (i < n_rows);
2744 double y, t, c = 0., s = 0.;
2749 C[j] = s + (col_tot[j] + 1.) / 2;
2756 assert (j < n_cols);
2760 calc_r (R, C, &v[6], &t[6], &ase[6]);
2766 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2770 /* Cohen's kappa. */
2771 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2773 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2776 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2777 i < ns_rows; i++, j++)
2781 while (col_tot[j] == 0.)
2784 prod = row_tot[i] * col_tot[j];
2785 sum = row_tot[i] + col_tot[j];
2787 sum_fii += mat[j + i * n_cols];
2789 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2790 sum_riciri_ci += prod * sum;
2792 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2793 for (j = 0; j < ns_cols; j++)
2795 double sum = row_tot[i] + col_tot[j];
2796 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2799 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2801 ase[8] = sqrt ((W * W * sum_rici
2802 + sum_rici * sum_rici
2803 - W * sum_riciri_ci)
2804 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2806 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2807 / pow2 (W * W - sum_rici))
2808 + ((2. * (W - sum_fii)
2809 * (2. * sum_fii * sum_rici
2810 - W * sum_fiiri_ci))
2811 / cube (W * W - sum_rici))
2812 + (pow2 (W - sum_fii)
2813 * (W * sum_fijri_ci2 - 4.
2814 * sum_rici * sum_rici)
2815 / pow4 (W * W - sum_rici))));
2817 t[8] = v[8] / ase[8];
2824 /* Calculate risk estimate. */
2826 calc_risk (double *value, double *upper, double *lower, union value *c)
2828 double f11, f12, f21, f22;
2834 for (i = 0; i < 3; i++)
2835 value[i] = upper[i] = lower[i] = SYSMIS;
2838 if (ns_rows != 2 || ns_cols != 2)
2845 for (i = j = 0; i < n_cols; i++)
2846 if (col_tot[i] != 0.)
2855 f11 = mat[nz_cols[0]];
2856 f12 = mat[nz_cols[1]];
2857 f21 = mat[nz_cols[0] + n_cols];
2858 f22 = mat[nz_cols[1] + n_cols];
2860 c[0] = cols[nz_cols[0]];
2861 c[1] = cols[nz_cols[1]];
2864 value[0] = (f11 * f22) / (f12 * f21);
2865 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2866 lower[0] = value[0] * exp (-1.960 * v);
2867 upper[0] = value[0] * exp (1.960 * v);
2869 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2870 v = sqrt ((f12 / (f11 * (f11 + f12)))
2871 + (f22 / (f21 * (f21 + f22))));
2872 lower[1] = value[1] * exp (-1.960 * v);
2873 upper[1] = value[1] * exp (1.960 * v);
2875 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2876 v = sqrt ((f11 / (f12 * (f11 + f12)))
2877 + (f21 / (f22 * (f21 + f22))));
2878 lower[2] = value[2] * exp (-1.960 * v);
2879 upper[2] = value[2] * exp (1.960 * v);
2884 /* Calculate directional measures. */
2886 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2887 double t[N_DIRECTIONAL])
2892 for (i = 0; i < N_DIRECTIONAL; i++)
2893 v[i] = ase[i] = t[i] = SYSMIS;
2897 if (cmd.a_statistics[CRS_ST_LAMBDA])
2899 double *fim = xnmalloc (n_rows, sizeof *fim);
2900 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2901 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2902 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2903 double sum_fim, sum_fmj;
2905 int rm_index, cm_index;
2908 /* Find maximum for each row and their sum. */
2909 for (sum_fim = 0., i = 0; i < n_rows; i++)
2911 double max = mat[i * n_cols];
2914 for (j = 1; j < n_cols; j++)
2915 if (mat[j + i * n_cols] > max)
2917 max = mat[j + i * n_cols];
2921 sum_fim += fim[i] = max;
2922 fim_index[i] = index;
2925 /* Find maximum for each column. */
2926 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2928 double max = mat[j];
2931 for (i = 1; i < n_rows; i++)
2932 if (mat[j + i * n_cols] > max)
2934 max = mat[j + i * n_cols];
2938 sum_fmj += fmj[j] = max;
2939 fmj_index[j] = index;
2942 /* Find maximum row total. */
2945 for (i = 1; i < n_rows; i++)
2946 if (row_tot[i] > rm)
2952 /* Find maximum column total. */
2955 for (j = 1; j < n_cols; j++)
2956 if (col_tot[j] > cm)
2962 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2963 v[1] = (sum_fmj - rm) / (W - rm);
2964 v[2] = (sum_fim - cm) / (W - cm);
2966 /* ASE1 for Y given X. */
2970 for (accum = 0., i = 0; i < n_rows; i++)
2971 for (j = 0; j < n_cols; j++)
2973 const int deltaj = j == cm_index;
2974 accum += (mat[j + i * n_cols]
2975 * pow2 ((j == fim_index[i])
2980 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2983 /* ASE0 for Y given X. */
2987 for (accum = 0., i = 0; i < n_rows; i++)
2988 if (cm_index != fim_index[i])
2989 accum += (mat[i * n_cols + fim_index[i]]
2990 + mat[i * n_cols + cm_index]);
2991 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2994 /* ASE1 for X given Y. */
2998 for (accum = 0., i = 0; i < n_rows; i++)
2999 for (j = 0; j < n_cols; j++)
3001 const int deltaj = i == rm_index;
3002 accum += (mat[j + i * n_cols]
3003 * pow2 ((i == fmj_index[j])
3008 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3011 /* ASE0 for X given Y. */
3015 for (accum = 0., j = 0; j < n_cols; j++)
3016 if (rm_index != fmj_index[j])
3017 accum += (mat[j + n_cols * fmj_index[j]]
3018 + mat[j + n_cols * rm_index]);
3019 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3022 /* Symmetric ASE0 and ASE1. */
3027 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3028 for (j = 0; j < n_cols; j++)
3030 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3031 int temp1 = (i == rm_index) + (j == cm_index);
3032 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3033 accum1 += (mat[j + i * n_cols]
3034 * pow2 (temp0 + (v[0] - 1.) * temp1));
3036 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3037 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3038 / (2. * W - rm - cm));
3047 double sum_fij2_ri, sum_fij2_ci;
3048 double sum_ri2, sum_cj2;
3050 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3051 for (j = 0; j < n_cols; j++)
3053 double temp = pow2 (mat[j + i * n_cols]);
3054 sum_fij2_ri += temp / row_tot[i];
3055 sum_fij2_ci += temp / col_tot[j];
3058 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3059 sum_ri2 += row_tot[i] * row_tot[i];
3061 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3062 sum_cj2 += col_tot[j] * col_tot[j];
3064 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3065 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3069 if (cmd.a_statistics[CRS_ST_UC])
3071 double UX, UY, UXY, P;
3072 double ase1_yx, ase1_xy, ase1_sym;
3075 for (UX = 0., i = 0; i < n_rows; i++)
3076 if (row_tot[i] > 0.)
3077 UX -= row_tot[i] / W * log (row_tot[i] / W);
3079 for (UY = 0., j = 0; j < n_cols; j++)
3080 if (col_tot[j] > 0.)
3081 UY -= col_tot[j] / W * log (col_tot[j] / W);
3083 for (UXY = P = 0., i = 0; i < n_rows; i++)
3084 for (j = 0; j < n_cols; j++)
3086 double entry = mat[j + i * n_cols];
3091 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3092 UXY -= entry / W * log (entry / W);
3095 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3096 for (j = 0; j < n_cols; j++)
3098 double entry = mat[j + i * n_cols];
3103 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3104 + (UX - UXY) * log (col_tot[j] / W));
3105 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3106 + (UY - UXY) * log (row_tot[i] / W));
3107 ase1_sym += entry * pow2 ((UXY
3108 * log (row_tot[i] * col_tot[j] / (W * W)))
3109 - (UX + UY) * log (entry / W));
3112 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3113 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3114 t[5] = v[5] / ((2. / (W * (UX + UY)))
3115 * sqrt (P - pow2 (UX + UY - UXY) / W));
3117 v[6] = (UX + UY - UXY) / UX;
3118 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3119 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3121 v[7] = (UX + UY - UXY) / UY;
3122 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3123 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3127 if (cmd.a_statistics[CRS_ST_D])
3132 calc_symmetric (NULL, NULL, NULL);
3133 for (i = 0; i < 3; i++)
3135 v[8 + i] = somers_d_v[i];
3136 ase[8 + i] = somers_d_ase[i];
3137 t[8 + i] = somers_d_t[i];
3142 if (cmd.a_statistics[CRS_ST_ETA])
3145 double sum_Xr, sum_X2r;
3149 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3151 sum_Xr += rows[i].f * row_tot[i];
3152 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3154 SX = sum_X2r - sum_Xr * sum_Xr / W;
3156 for (SXW = 0., j = 0; j < n_cols; j++)
3160 for (cum = 0., i = 0; i < n_rows; i++)
3162 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3163 cum += rows[i].f * mat[j + i * n_cols];
3166 SXW -= cum * cum / col_tot[j];
3168 v[11] = sqrt (1. - SXW / SX);
3172 double sum_Yc, sum_Y2c;
3176 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3178 sum_Yc += cols[i].f * col_tot[i];
3179 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3181 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3183 for (SYW = 0., i = 0; i < n_rows; i++)
3187 for (cum = 0., j = 0; j < n_cols; j++)
3189 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3190 cum += cols[j].f * mat[j + i * n_cols];
3193 SYW -= cum * cum / row_tot[i];
3195 v[12] = sqrt (1. - SYW / SY);
3202 /* A wrapper around data_out() that limits string output to short
3203 string width and null terminates the result. */
3205 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3207 struct fmt_spec fmt_subst;
3209 /* Limit to short string width. */
3210 if (fmt_is_string (fp->type))
3214 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3215 if (fmt_subst.type == FMT_A)
3216 fmt_subst.w = MIN (8, fmt_subst.w);
3218 fmt_subst.w = MIN (16, fmt_subst.w);
3224 data_out (v, fp, s);
3226 /* Null terminate. */