1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - Pearson's R (but not Spearman!) is off a little.
20 - T values for Spearman's R and Pearson's R are wrong.
21 - How to calculate significance of symmetric and directional measures?
22 - Asymmetric ASEs and T values for lambda are wrong.
23 - ASE of Goodman and Kruskal's tau is not calculated.
24 - ASE of symmetric somers' d is wrong.
25 - Approx. T of uncertainty coefficient is wrong.
32 #include <gsl/gsl_cdf.h>
36 #include <data/case.h>
37 #include <data/casegrouper.h>
38 #include <data/casereader.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/alloc.h>
50 #include <libpspp/array.h>
51 #include <libpspp/assertion.h>
52 #include <libpspp/compiler.h>
53 #include <libpspp/hash.h>
54 #include <libpspp/magic.h>
55 #include <libpspp/message.h>
56 #include <libpspp/message.h>
57 #include <libpspp/misc.h>
58 #include <libpspp/pool.h>
59 #include <libpspp/str.h>
60 #include <output/output.h>
61 #include <output/table.h>
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) msgid
75 missing=miss:!table/include/report;
76 +write[wr_]=none,cells,all;
77 +format=fmt:!labels/nolabels/novallabs,
80 tabl:!tables/notables,
83 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
85 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
91 /* Number of chi-square statistics. */
94 /* Number of symmetric statistics. */
97 /* Number of directional statistics. */
98 #define N_DIRECTIONAL 13
100 /* A single table entry for general mode. */
103 int table; /* Flattened table number. */
106 double freq; /* Frequency count. */
107 double *data; /* Crosstabulation table for integer mode. */
110 union value values[1]; /* Values. */
113 /* A crosstabulation. */
116 int nvar; /* Number of variables. */
117 double missing; /* Missing cases count. */
118 int ofs; /* Integer mode: Offset into sorted_tab[]. */
119 const struct variable *vars[2]; /* At least two variables; sorted by
120 larger indices first. */
123 /* Integer mode variable info. */
126 int min; /* Minimum value. */
127 int max; /* Maximum value + 1. */
128 int count; /* max - min. */
131 static inline struct var_range *
132 get_var_range (const struct variable *v)
134 return var_get_aux (v);
137 /* Indexes into crosstab.v. */
144 /* General mode crosstabulation table. */
145 static struct hsh_table *gen_tab; /* Hash table. */
146 static int n_sorted_tab; /* Number of entries in sorted_tab. */
147 static struct table_entry **sorted_tab; /* Sorted table. */
149 /* Variables specifies on VARIABLES. */
150 static const struct variable **variables;
151 static size_t variables_cnt;
154 static struct crosstab **xtab;
157 /* Integer or general mode? */
166 static int num_cells; /* Number of cells requested. */
167 static int cells[8]; /* Cells requested. */
170 static int write_style; /* One of WR_* that specifies the WRITE style. */
172 /* Command parsing info. */
173 static struct cmd_crosstabs cmd;
176 static struct pool *pl_tc; /* For table cells. */
177 static struct pool *pl_col; /* For column data. */
179 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
180 static void precalc (struct casereader *, const struct dataset *);
181 static void calc_general (struct ccase *, const struct dataset *);
182 static void calc_integer (struct ccase *, const struct dataset *);
183 static void postcalc (void);
184 static void submit (struct tab_table *);
186 static void format_short (char *s, const struct fmt_spec *fp,
187 const union value *v);
189 /* Parse and execute CROSSTABS, then clean up. */
191 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
193 int result = internal_cmd_crosstabs (lexer, ds);
197 pool_destroy (pl_tc);
198 pool_destroy (pl_col);
200 for (i = 0; i < nxtab; i++)
207 /* Parses and executes the CROSSTABS procedure. */
209 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
211 struct casegrouper *grouper;
212 struct casereader *input, *group;
220 pl_tc = pool_create ();
221 pl_col = pool_create ();
223 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
226 mode = variables ? INTEGER : GENERAL;
231 cmd.a_cells[CRS_CL_COUNT] = 1;
237 for (i = 0; i < CRS_CL_count; i++)
242 cmd.a_cells[CRS_CL_COUNT] = 1;
243 cmd.a_cells[CRS_CL_ROW] = 1;
244 cmd.a_cells[CRS_CL_COLUMN] = 1;
245 cmd.a_cells[CRS_CL_TOTAL] = 1;
247 if (cmd.a_cells[CRS_CL_ALL])
249 for (i = 0; i < CRS_CL_count; i++)
251 cmd.a_cells[CRS_CL_ALL] = 0;
253 cmd.a_cells[CRS_CL_NONE] = 0;
255 for (num_cells = i = 0; i < CRS_CL_count; i++)
257 cells[num_cells++] = i;
260 if (cmd.sbc_statistics)
265 for (i = 0; i < CRS_ST_count; i++)
266 if (cmd.a_statistics[i])
269 cmd.a_statistics[CRS_ST_CHISQ] = 1;
270 if (cmd.a_statistics[CRS_ST_ALL])
271 for (i = 0; i < CRS_ST_count; i++)
272 cmd.a_statistics[i] = 1;
276 if (cmd.miss == CRS_REPORT && mode == GENERAL)
278 msg (SE, _("Missing mode REPORT not allowed in general mode. "
279 "Assuming MISSING=TABLE."));
280 cmd.miss = CRS_TABLE;
284 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
285 cmd.a_write[CRS_WR_ALL] = 0;
286 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
288 msg (SE, _("Write mode ALL not allowed in general mode. "
289 "Assuming WRITE=CELLS."));
290 cmd.a_write[CRS_WR_CELLS] = 1;
293 && (cmd.a_write[CRS_WR_NONE]
294 + cmd.a_write[CRS_WR_ALL]
295 + cmd.a_write[CRS_WR_CELLS] == 0))
296 cmd.a_write[CRS_WR_CELLS] = 1;
297 if (cmd.a_write[CRS_WR_CELLS])
298 write_style = CRS_WR_CELLS;
299 else if (cmd.a_write[CRS_WR_ALL])
300 write_style = CRS_WR_ALL;
302 write_style = CRS_WR_NONE;
304 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
306 grouper = casegrouper_create_splits (input, dataset_dict (ds));
307 while (casegrouper_get_next_group (grouper, &group))
313 for (; casereader_read (group, &c); case_destroy (&c))
316 calc_general (&c, ds);
318 calc_integer (&c, ds);
320 casereader_destroy (group);
324 ok = casegrouper_destroy (grouper);
325 ok = proc_commit (ds) && ok;
327 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
330 /* Parses the TABLES subcommand. */
332 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
334 struct const_var_set *var_set;
336 const struct variable ***by = NULL;
337 size_t *by_nvar = NULL;
341 /* Ensure that this is a TABLES subcommand. */
342 if (!lex_match_id (lexer, "TABLES")
343 && (lex_token (lexer) != T_ID ||
344 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
345 && lex_token (lexer) != T_ALL)
347 lex_match (lexer, '=');
349 if (variables != NULL)
350 var_set = const_var_set_create_from_array (variables, variables_cnt);
352 var_set = const_var_set_create_from_dict (dataset_dict (ds));
353 assert (var_set != NULL);
357 by = xnrealloc (by, n_by + 1, sizeof *by);
358 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
359 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
360 PV_NO_DUPLICATE | PV_NO_SCRATCH))
362 if (xalloc_oversized (nx, by_nvar[n_by]))
364 msg (SE, _("Too many crosstabulation variables or dimensions."));
370 if (!lex_match (lexer, T_BY))
374 lex_error (lexer, _("expecting BY"));
383 int *by_iter = xcalloc (n_by, sizeof *by_iter);
386 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
387 for (i = 0; i < nx; i++)
391 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
398 for (i = 0; i < n_by; i++)
399 x->vars[i] = by[i][by_iter[i]];
405 for (i = n_by - 1; i >= 0; i--)
407 if (++by_iter[i] < by_nvar[i])
420 /* All return paths lead here. */
424 for (i = 0; i < n_by; i++)
430 const_var_set_destroy (var_set);
435 /* Parses the VARIABLES subcommand. */
437 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
441 msg (SE, _("VARIABLES must be specified before TABLES."));
445 lex_match (lexer, '=');
449 size_t orig_nv = variables_cnt;
454 if (!parse_variables_const (lexer, dataset_dict (ds),
455 &variables, &variables_cnt,
456 (PV_APPEND | PV_NUMERIC
457 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
460 if (lex_token (lexer) != '(')
462 lex_error (lexer, "expecting `('");
467 if (!lex_force_int (lexer))
469 min = lex_integer (lexer);
472 lex_match (lexer, ',');
474 if (!lex_force_int (lexer))
476 max = lex_integer (lexer);
479 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
485 if (lex_token (lexer) != ')')
487 lex_error (lexer, "expecting `)'");
492 for (i = orig_nv; i < variables_cnt; i++)
494 struct var_range *vr = xmalloc (sizeof *vr);
497 vr->count = max - min + 1;
498 var_attach_aux (variables[i], vr, var_dtor_free);
501 if (lex_token (lexer) == '/')
513 /* Data file processing. */
515 static int compare_table_entry (const void *, const void *, const void *);
516 static unsigned hash_table_entry (const void *, const void *);
518 /* Set up the crosstabulation tables for processing. */
520 precalc (struct casereader *input, const struct dataset *ds)
524 if (!casereader_peek (input, 0, &c))
526 output_split_file_values (ds, &c);
531 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
541 for (i = 0; i < nxtab; i++)
543 struct crosstab *x = xtab[i];
548 x->ofs = n_sorted_tab;
550 for (j = 2; j < x->nvar; j++)
551 count *= get_var_range (x->vars[j - 2])->count;
553 sorted_tab = xnrealloc (sorted_tab,
554 n_sorted_tab + count, sizeof *sorted_tab);
555 v = local_alloc (sizeof *v * x->nvar);
556 for (j = 2; j < x->nvar; j++)
557 v[j] = get_var_range (x->vars[j])->min;
558 for (j = 0; j < count; j++)
560 struct table_entry *te;
563 te = sorted_tab[n_sorted_tab++]
564 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
568 int row_cnt = get_var_range (x->vars[0])->count;
569 int col_cnt = get_var_range (x->vars[1])->count;
570 const int mat_size = row_cnt * col_cnt;
573 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
574 for (m = 0; m < mat_size; m++)
578 for (k = 2; k < x->nvar; k++)
579 te->values[k].f = v[k];
580 for (k = 2; k < x->nvar; k++)
582 struct var_range *vr = get_var_range (x->vars[k]);
583 if (++v[k] >= vr->max)
592 sorted_tab = xnrealloc (sorted_tab,
593 n_sorted_tab + 1, sizeof *sorted_tab);
594 sorted_tab[n_sorted_tab] = NULL;
599 /* Form crosstabulations for general mode. */
601 calc_general (struct ccase *c, const struct dataset *ds)
603 /* Missing values to exclude. */
604 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
605 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
609 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
611 /* Flattened current table index. */
614 for (t = 0; t < nxtab; t++)
616 struct crosstab *x = xtab[t];
617 const size_t entry_size = (sizeof (struct table_entry)
618 + sizeof (union value) * (x->nvar - 1));
619 struct table_entry *te = local_alloc (entry_size);
621 /* Construct table entry for the current record and table. */
627 for (j = 0; j < x->nvar; j++)
629 const union value *v = case_data (c, x->vars[j]);
630 if (var_is_value_missing (x->vars[j], v, exclude))
632 x->missing += weight;
636 if (var_is_numeric (x->vars[j]))
637 te->values[j].f = case_num (c, x->vars[j]);
640 memcpy (te->values[j].s, case_str (c, x->vars[j]),
641 var_get_width (x->vars[j]));
643 /* Necessary in order to simplify comparisons. */
644 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
645 sizeof (union value) - var_get_width (x->vars[j]));
650 /* Add record to hash table. */
652 struct table_entry **tepp
653 = (struct table_entry **) hsh_probe (gen_tab, te);
656 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
659 memcpy (tep, te, entry_size);
664 (*tepp)->u.freq += weight;
673 calc_integer (struct ccase *c, const struct dataset *ds)
675 bool bad_warn = true;
678 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
680 /* Flattened current table index. */
683 for (t = 0; t < nxtab; t++)
685 struct crosstab *x = xtab[t];
690 for (i = 0; i < x->nvar; i++)
692 const struct variable *const v = x->vars[i];
693 struct var_range *vr = get_var_range (v);
694 double value = case_num (c, v);
696 /* Note that the first test also rules out SYSMIS. */
697 if ((value < vr->min || value >= vr->max)
698 || (cmd.miss == CRS_TABLE
699 && var_is_num_missing (v, value, MV_USER)))
701 x->missing += weight;
707 ofs += fact * ((int) value - vr->min);
713 const struct variable *row_var = x->vars[ROW_VAR];
714 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
716 const struct variable *col_var = x->vars[COL_VAR];
717 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
719 const int col_dim = get_var_range (col_var)->count;
721 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
728 /* Compare the table_entry's at A and B and return a strcmp()-type
731 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
733 const struct table_entry *a = a_;
734 const struct table_entry *b = b_;
736 if (a->table > b->table)
738 else if (a->table < b->table)
742 const struct crosstab *x = xtab[a->table];
745 for (i = x->nvar - 1; i >= 0; i--)
746 if (var_is_numeric (x->vars[i]))
748 const double diffnum = a->values[i].f - b->values[i].f;
751 else if (diffnum > 0)
756 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
757 var_get_width (x->vars[i]));
766 /* Calculate a hash value from table_entry A. */
768 hash_table_entry (const void *a_, const void *aux UNUSED)
770 const struct table_entry *a = a_;
775 for (i = 0; i < xtab[a->table]->nvar; i++)
776 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
781 /* Post-data reading calculations. */
783 static struct table_entry **find_pivot_extent (struct table_entry **,
784 int *cnt, int pivot);
785 static void enum_var_values (struct table_entry **entries, int entry_cnt,
787 union value **values, int *value_cnt);
788 static void output_pivot_table (struct table_entry **, struct table_entry **,
789 double **, double **, double **,
790 int *, int *, int *);
791 static void make_summary_table (void);
798 n_sorted_tab = hsh_count (gen_tab);
799 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
802 make_summary_table ();
804 /* Identify all the individual crosstabulation tables, and deal with
807 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
808 int pc = n_sorted_tab; /* Pivot count. */
810 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
811 int maxrows = 0, maxcols = 0, maxcells = 0;
815 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
819 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
820 &maxrows, &maxcols, &maxcells);
829 hsh_destroy (gen_tab);
832 static void insert_summary (struct tab_table *, int tab_index, double valid);
834 /* Output a table summarizing the cases processed. */
836 make_summary_table (void)
838 struct tab_table *summary;
840 struct table_entry **pb = sorted_tab, **pe;
841 int pc = n_sorted_tab;
844 summary = tab_create (7, 3 + nxtab, 1);
845 tab_title (summary, _("Summary."));
846 tab_headers (summary, 1, 0, 3, 0);
847 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
848 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
849 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
850 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
851 tab_hline (summary, TAL_1, 1, 6, 1);
852 tab_hline (summary, TAL_1, 1, 6, 2);
853 tab_vline (summary, TAL_1, 3, 1, 1);
854 tab_vline (summary, TAL_1, 5, 1, 1);
858 for (i = 0; i < 3; i++)
860 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
861 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
864 tab_offset (summary, 0, 3);
870 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
874 while (cur_tab < (*pb)->table)
875 insert_summary (summary, cur_tab++, 0.);
878 for (valid = 0.; pb < pe; pb++)
879 valid += (*pb)->u.freq;
882 const struct crosstab *const x = xtab[(*pb)->table];
883 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
884 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
885 const int count = n_cols * n_rows;
887 for (valid = 0.; pb < pe; pb++)
889 const double *data = (*pb)->u.data;
892 for (i = 0; i < count; i++)
896 insert_summary (summary, cur_tab++, valid);
901 while (cur_tab < nxtab)
902 insert_summary (summary, cur_tab++, 0.);
907 /* Inserts a line into T describing the crosstabulation at index
908 TAB_INDEX, which has VALID valid observations. */
910 insert_summary (struct tab_table *t, int tab_index, double valid)
912 struct crosstab *x = xtab[tab_index];
914 tab_hline (t, TAL_1, 0, 6, 0);
916 /* Crosstabulation name. */
918 char *buf = local_alloc (128 * x->nvar);
922 for (i = 0; i < x->nvar; i++)
925 cp = stpcpy (cp, " * ");
927 cp = stpcpy (cp, var_to_string (x->vars[i]));
929 tab_text (t, 0, 0, TAB_LEFT, buf);
934 /* Counts and percentages. */
944 for (i = 0; i < 3; i++)
946 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
947 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
958 static struct tab_table *table; /* Crosstabulation table. */
959 static struct tab_table *chisq; /* Chi-square table. */
960 static struct tab_table *sym; /* Symmetric measures table. */
961 static struct tab_table *risk; /* Risk estimate table. */
962 static struct tab_table *direct; /* Directional measures table. */
965 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
967 /* Column values, number of columns. */
968 static union value *cols;
971 /* Row values, number of rows. */
972 static union value *rows;
975 /* Number of statistically interesting columns/rows (columns/rows with
977 static int ns_cols, ns_rows;
979 /* Crosstabulation. */
980 static const struct crosstab *x;
982 /* Number of variables from the crosstabulation to consider. This is
983 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
986 /* Matrix contents. */
987 static double *mat; /* Matrix proper. */
988 static double *row_tot; /* Row totals. */
989 static double *col_tot; /* Column totals. */
990 static double W; /* Grand total. */
992 static void display_dimensions (struct tab_table *, int first_difference,
993 struct table_entry *);
994 static void display_crosstabulation (void);
995 static void display_chisq (void);
996 static void display_symmetric (void);
997 static void display_risk (void);
998 static void display_directional (void);
999 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1000 static void table_value_missing (struct tab_table *table, int c, int r,
1001 unsigned char opt, const union value *v,
1002 const struct variable *var);
1003 static void delete_missing (void);
1005 /* Output pivot table beginning at PB and continuing until PE,
1006 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1007 hold *MAXROWS entries. */
1009 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1010 double **matp, double **row_totp, double **col_totp,
1011 int *maxrows, int *maxcols, int *maxcells)
1014 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1015 int tc = pe - pb; /* Table count. */
1017 /* Table entry for header comparison. */
1018 struct table_entry *cmp = NULL;
1020 x = xtab[(*pb)->table];
1021 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1023 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1025 /* Crosstabulation table initialization. */
1028 table = tab_create (nvar + n_cols,
1029 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1030 tab_headers (table, nvar - 1, 0, 2, 0);
1032 /* First header line. */
1033 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1034 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1036 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1038 /* Second header line. */
1042 for (i = 2; i < nvar; i++)
1043 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1044 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1045 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1046 var_get_name (x->vars[ROW_VAR]));
1047 for (i = 0; i < n_cols; i++)
1048 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1050 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1053 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1054 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1058 char *title = local_alloc (x->nvar * 64 + 128);
1062 if (cmd.pivot == CRS_PIVOT)
1063 for (i = 0; i < nvar; i++)
1066 cp = stpcpy (cp, " by ");
1067 cp = stpcpy (cp, var_get_name (x->vars[i]));
1071 cp = spprintf (cp, "%s by %s for",
1072 var_get_name (x->vars[0]),
1073 var_get_name (x->vars[1]));
1074 for (i = 2; i < nvar; i++)
1076 char buf[64], *bufp;
1081 cp = stpcpy (cp, var_get_name (x->vars[i]));
1083 format_short (buf, var_get_print_format (x->vars[i]),
1085 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1087 cp = stpcpy (cp, bufp);
1091 cp = stpcpy (cp, " [");
1092 for (i = 0; i < num_cells; i++)
1100 static const struct tuple cell_names[] =
1102 {CRS_CL_COUNT, N_("count")},
1103 {CRS_CL_ROW, N_("row %")},
1104 {CRS_CL_COLUMN, N_("column %")},
1105 {CRS_CL_TOTAL, N_("total %")},
1106 {CRS_CL_EXPECTED, N_("expected")},
1107 {CRS_CL_RESIDUAL, N_("residual")},
1108 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1109 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1113 const struct tuple *t;
1115 for (t = cell_names; t->value != cells[i]; t++)
1116 assert (t->value != -1);
1118 cp = stpcpy (cp, ", ");
1119 cp = stpcpy (cp, gettext (t->name));
1123 tab_title (table, "%s", title);
1127 tab_offset (table, 0, 2);
1132 /* Chi-square table initialization. */
1133 if (cmd.a_statistics[CRS_ST_CHISQ])
1135 chisq = tab_create (6 + (nvar - 2),
1136 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1137 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1139 tab_title (chisq, _("Chi-square tests."));
1141 tab_offset (chisq, nvar - 2, 0);
1142 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1143 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1144 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1145 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1146 _("Asymp. Sig. (2-sided)"));
1147 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1148 _("Exact. Sig. (2-sided)"));
1149 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1150 _("Exact. Sig. (1-sided)"));
1152 tab_offset (chisq, 0, 1);
1157 /* Symmetric measures. */
1158 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1159 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1160 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1161 || cmd.a_statistics[CRS_ST_KAPPA])
1163 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1164 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1165 tab_title (sym, _("Symmetric measures."));
1167 tab_offset (sym, nvar - 2, 0);
1168 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1169 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1170 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1171 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1172 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1173 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1174 tab_offset (sym, 0, 1);
1179 /* Risk estimate. */
1180 if (cmd.a_statistics[CRS_ST_RISK])
1182 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1183 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1184 tab_title (risk, _("Risk estimate."));
1186 tab_offset (risk, nvar - 2, 0);
1187 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1188 _("95%% Confidence Interval"));
1189 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1190 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1191 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1192 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1193 tab_hline (risk, TAL_1, 2, 3, 1);
1194 tab_vline (risk, TAL_1, 2, 0, 1);
1195 tab_offset (risk, 0, 2);
1200 /* Directional measures. */
1201 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1202 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1204 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1205 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1206 tab_title (direct, _("Directional measures."));
1208 tab_offset (direct, nvar - 2, 0);
1209 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1210 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1211 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1212 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1213 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1214 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1215 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1216 tab_offset (direct, 0, 1);
1223 /* Find pivot subtable if applicable. */
1224 te = find_pivot_extent (tb, &tc, 0);
1228 /* Find all the row variable values. */
1229 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1231 /* Allocate memory space for the column and row totals. */
1232 if (n_rows > *maxrows)
1234 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1235 row_tot = *row_totp;
1238 if (n_cols > *maxcols)
1240 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1241 col_tot = *col_totp;
1245 /* Allocate table space for the matrix. */
1246 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1247 tab_realloc (table, -1,
1248 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1249 tab_nr (table) * (pe - pb) / (te - tb)));
1251 if (mode == GENERAL)
1253 /* Allocate memory space for the matrix. */
1254 if (n_cols * n_rows > *maxcells)
1256 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1257 *maxcells = n_cols * n_rows;
1262 /* Build the matrix and calculate column totals. */
1264 union value *cur_col = cols;
1265 union value *cur_row = rows;
1267 double *cp = col_tot;
1268 struct table_entry **p;
1271 for (p = &tb[0]; p < te; p++)
1273 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1277 for (; cur_row < &rows[n_rows]; cur_row++)
1283 mp = &mat[cur_col - cols];
1286 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1293 *cp += *mp = (*p)->u.freq;
1298 /* Zero out the rest of the matrix. */
1299 for (; cur_row < &rows[n_rows]; cur_row++)
1305 if (cur_col < &cols[n_cols])
1307 const int rem_cols = n_cols - (cur_col - cols);
1310 for (c = 0; c < rem_cols; c++)
1312 mp = &mat[cur_col - cols];
1313 for (r = 0; r < n_rows; r++)
1315 for (c = 0; c < rem_cols; c++)
1317 mp += n_cols - rem_cols;
1325 double *tp = col_tot;
1327 assert (mode == INTEGER);
1328 mat = (*tb)->u.data;
1331 /* Calculate column totals. */
1332 for (c = 0; c < n_cols; c++)
1335 double *cp = &mat[c];
1337 for (r = 0; r < n_rows; r++)
1338 cum += cp[r * n_cols];
1346 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1347 ns_cols += *cp != 0.;
1350 /* Calculate row totals. */
1353 double *rp = row_tot;
1356 for (ns_rows = 0, r = n_rows; r--; )
1359 for (c = n_cols; c--; )
1367 /* Calculate grand total. */
1373 if (n_rows < n_cols)
1374 tp = row_tot, n = n_rows;
1376 tp = col_tot, n = n_cols;
1382 /* Find the first variable that differs from the last subtable,
1383 then display the values of the dimensioning variables for
1384 each table that needs it. */
1386 int first_difference = nvar - 1;
1389 for (; ; first_difference--)
1391 assert (first_difference >= 2);
1392 if (memcmp (&cmp->values[first_difference],
1393 &(*tb)->values[first_difference],
1394 sizeof *cmp->values))
1400 display_dimensions (table, first_difference, *tb);
1402 display_dimensions (chisq, first_difference, *tb);
1404 display_dimensions (sym, first_difference, *tb);
1406 display_dimensions (risk, first_difference, *tb);
1408 display_dimensions (direct, first_difference, *tb);
1412 display_crosstabulation ();
1413 if (cmd.miss == CRS_REPORT)
1418 display_symmetric ();
1422 display_directional ();
1433 tab_resize (chisq, 4 + (nvar - 2), -1);
1444 /* Delete missing rows and columns for statistical analysis when
1447 delete_missing (void)
1452 for (r = 0; r < n_rows; r++)
1453 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1457 for (c = 0; c < n_cols; c++)
1458 mat[c + r * n_cols] = 0.;
1466 for (c = 0; c < n_cols; c++)
1467 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1471 for (r = 0; r < n_rows; r++)
1472 mat[c + r * n_cols] = 0.;
1478 /* Prepare table T for submission, and submit it. */
1480 submit (struct tab_table *t)
1487 tab_resize (t, -1, 0);
1488 if (tab_nr (t) == tab_t (t))
1493 tab_offset (t, 0, 0);
1495 for (i = 2; i < nvar; i++)
1496 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1497 var_to_string (x->vars[i]));
1498 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1499 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1501 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1503 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1504 tab_dim (t, crosstabs_dim);
1508 /* Sets the widths of all the columns and heights of all the rows in
1509 table T for driver D. */
1511 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1515 /* Width of a numerical column. */
1516 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1517 if (cmd.miss == CRS_REPORT)
1518 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1520 /* Set width for header columns. */
1526 w = d->width - c * (t->nc - t->l);
1527 for (i = 0; i <= t->nc; i++)
1531 if (w < d->prop_em_width * 8)
1532 w = d->prop_em_width * 8;
1534 if (w > d->prop_em_width * 15)
1535 w = d->prop_em_width * 15;
1537 for (i = 0; i < t->l; i++)
1541 for (i = t->l; i < t->nc; i++)
1544 for (i = 0; i < t->nr; i++)
1545 t->h[i] = tab_natural_height (t, d, i);
1548 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1549 int *cnt, int pivot);
1550 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1551 int *cnt, int pivot);
1553 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1555 static struct table_entry **
1556 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1558 return (mode == GENERAL
1559 ? find_pivot_extent_general (tp, cnt, pivot)
1560 : find_pivot_extent_integer (tp, cnt, pivot));
1563 /* Find the extent of a region in TP that contains one table. If
1564 PIVOT != 0 that means a set of table entries with identical table
1565 number; otherwise they also have to have the same values for every
1566 dimension after the row and column dimensions. The table that is
1567 searched starts at TP and has length CNT. Returns the first entry
1568 after the last one in the table; sets *CNT to the number of
1569 remaining values. If there are no entries in TP at all, returns
1570 NULL. A yucky interface, admittedly, but it works. */
1571 static struct table_entry **
1572 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1574 struct table_entry *fp = *tp;
1579 x = xtab[(*tp)->table];
1587 if ((*tp)->table != fp->table)
1592 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1599 /* Integer mode correspondent to find_pivot_extent_general(). This
1600 could be optimized somewhat, but I just don't give a crap about
1601 CROSSTABS performance in integer mode, which is just a
1602 CROSSTABS wart as far as I'm concerned.
1604 That said, feel free to send optimization patches to me. */
1605 static struct table_entry **
1606 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1608 struct table_entry *fp = *tp;
1613 x = xtab[(*tp)->table];
1621 if ((*tp)->table != fp->table)
1626 if (memcmp (&(*tp)->values[2], &fp->values[2],
1627 sizeof (union value) * (x->nvar - 2)))
1634 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1635 result. WIDTH_ points to an int which is either 0 for a
1636 numeric value or a string width for a string value. */
1638 compare_value (const void *a_, const void *b_, const void *width_)
1640 const union value *a = a_;
1641 const union value *b = b_;
1642 const int *pwidth = width_;
1643 const int width = *pwidth;
1646 return (a->f < b->f) ? -1 : (a->f > b->f);
1648 return strncmp (a->s, b->s, width);
1651 /* Given an array of ENTRY_CNT table_entry structures starting at
1652 ENTRIES, creates a sorted list of the values that the variable
1653 with index VAR_IDX takes on. The values are returned as a
1654 malloc()'darray stored in *VALUES, with the number of values
1655 stored in *VALUE_CNT.
1658 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1659 union value **values, int *value_cnt)
1661 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1663 if (mode == GENERAL)
1665 int width = var_get_width (v);
1668 *values = xnmalloc (entry_cnt, sizeof **values);
1669 for (i = 0; i < entry_cnt; i++)
1670 (*values)[i] = entries[i]->values[var_idx];
1671 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1672 compare_value, &width);
1676 struct var_range *vr = get_var_range (v);
1679 assert (mode == INTEGER);
1680 *values = xnmalloc (vr->count, sizeof **values);
1681 for (i = 0; i < vr->count; i++)
1682 (*values)[i].f = i + vr->min;
1683 *value_cnt = vr->count;
1687 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1688 from V, displayed with print format spec from variable VAR. When
1689 in REPORT missing-value mode, missing values have an M appended. */
1691 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1692 const union value *v, const struct variable *var)
1695 const struct fmt_spec *print = var_get_print_format (var);
1697 const char *label = var_lookup_value_label (var, v);
1700 tab_text (table, c, r, TAB_LEFT, label);
1704 s.string = tab_alloc (table, print->w);
1705 format_short (s.string, print, v);
1706 s.length = strlen (s.string);
1707 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1708 s.string[s.length++] = 'M';
1709 while (s.length && *s.string == ' ')
1714 tab_raw (table, c, r, opt, &s);
1717 /* Draws a line across TABLE at the current row to indicate the most
1718 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1719 that changed, and puts the values that changed into the table. TB
1720 and X must be the corresponding table_entry and crosstab,
1723 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1725 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1727 for (; first_difference >= 2; first_difference--)
1728 table_value_missing (table, nvar - first_difference - 1, 0,
1729 TAB_RIGHT, &tb->values[first_difference],
1730 x->vars[first_difference]);
1733 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1734 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1735 additionally suffixed with a letter `M'. */
1737 format_cell_entry (struct tab_table *table, int c, int r, double value,
1738 char suffix, bool mark_missing)
1740 const struct fmt_spec f = {FMT_F, 10, 1};
1745 s.string = tab_alloc (table, 16);
1747 data_out (&v, &f, s.string);
1748 while (*s.string == ' ')
1754 s.string[s.length++] = suffix;
1756 s.string[s.length++] = 'M';
1758 tab_raw (table, c, r, TAB_RIGHT, &s);
1761 /* Displays the crosstabulation table. */
1763 display_crosstabulation (void)
1768 for (r = 0; r < n_rows; r++)
1769 table_value_missing (table, nvar - 2, r * num_cells,
1770 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1772 tab_text (table, nvar - 2, n_rows * num_cells,
1773 TAB_LEFT, _("Total"));
1775 /* Put in the actual cells. */
1780 tab_offset (table, nvar - 1, -1);
1781 for (r = 0; r < n_rows; r++)
1784 tab_hline (table, TAL_1, -1, n_cols, 0);
1785 for (c = 0; c < n_cols; c++)
1787 bool mark_missing = false;
1788 double expected_value = row_tot[r] * col_tot[c] / W;
1789 if (cmd.miss == CRS_REPORT
1790 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1791 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1793 mark_missing = true;
1794 for (i = 0; i < num_cells; i++)
1805 v = *mp / row_tot[r] * 100.;
1809 v = *mp / col_tot[c] * 100.;
1816 case CRS_CL_EXPECTED:
1819 case CRS_CL_RESIDUAL:
1820 v = *mp - expected_value;
1822 case CRS_CL_SRESIDUAL:
1823 v = (*mp - expected_value) / sqrt (expected_value);
1825 case CRS_CL_ASRESIDUAL:
1826 v = ((*mp - expected_value)
1827 / sqrt (expected_value
1828 * (1. - row_tot[r] / W)
1829 * (1. - col_tot[c] / W)));
1835 format_cell_entry (table, c, i, v, suffix, mark_missing);
1841 tab_offset (table, -1, tab_row (table) + num_cells);
1849 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1850 for (r = 0; r < n_rows; r++)
1853 bool mark_missing = false;
1855 if (cmd.miss == CRS_REPORT
1856 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1857 mark_missing = true;
1859 for (i = 0; i < num_cells; i++)
1873 v = row_tot[r] / W * 100.;
1877 v = row_tot[r] / W * 100.;
1880 case CRS_CL_EXPECTED:
1881 case CRS_CL_RESIDUAL:
1882 case CRS_CL_SRESIDUAL:
1883 case CRS_CL_ASRESIDUAL:
1890 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1891 tab_next_row (table);
1896 /* Column totals, grand total. */
1902 tab_hline (table, TAL_1, -1, n_cols, 0);
1903 for (c = 0; c <= n_cols; c++)
1905 double ct = c < n_cols ? col_tot[c] : W;
1906 bool mark_missing = false;
1910 if (cmd.miss == CRS_REPORT && c < n_cols
1911 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1912 mark_missing = true;
1914 for (i = 0; i < num_cells; i++)
1936 case CRS_CL_EXPECTED:
1937 case CRS_CL_RESIDUAL:
1938 case CRS_CL_SRESIDUAL:
1939 case CRS_CL_ASRESIDUAL:
1945 format_cell_entry (table, c, i, v, suffix, mark_missing);
1950 tab_offset (table, -1, tab_row (table) + last_row);
1953 tab_offset (table, 0, -1);
1956 static void calc_r (double *X, double *Y, double *, double *, double *);
1957 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1959 /* Display chi-square statistics. */
1961 display_chisq (void)
1963 static const char *chisq_stats[N_CHISQ] =
1965 N_("Pearson Chi-Square"),
1966 N_("Likelihood Ratio"),
1967 N_("Fisher's Exact Test"),
1968 N_("Continuity Correction"),
1969 N_("Linear-by-Linear Association"),
1971 double chisq_v[N_CHISQ];
1972 double fisher1, fisher2;
1978 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1980 tab_offset (chisq, nvar - 2, -1);
1982 for (i = 0; i < N_CHISQ; i++)
1984 if ((i != 2 && chisq_v[i] == SYSMIS)
1985 || (i == 2 && fisher1 == SYSMIS))
1989 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1992 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1993 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1994 tab_float (chisq, 3, 0, TAB_RIGHT,
1995 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
2000 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2001 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2003 tab_next_row (chisq);
2006 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2007 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2008 tab_next_row (chisq);
2010 tab_offset (chisq, 0, -1);
2013 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2014 double[N_SYMMETRIC]);
2016 /* Display symmetric measures. */
2018 display_symmetric (void)
2020 static const char *categories[] =
2022 N_("Nominal by Nominal"),
2023 N_("Ordinal by Ordinal"),
2024 N_("Interval by Interval"),
2025 N_("Measure of Agreement"),
2028 static const char *stats[N_SYMMETRIC] =
2032 N_("Contingency Coefficient"),
2033 N_("Kendall's tau-b"),
2034 N_("Kendall's tau-c"),
2036 N_("Spearman Correlation"),
2041 static const int stats_categories[N_SYMMETRIC] =
2043 0, 0, 0, 1, 1, 1, 1, 2, 3,
2047 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2050 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2053 tab_offset (sym, nvar - 2, -1);
2055 for (i = 0; i < N_SYMMETRIC; i++)
2057 if (sym_v[i] == SYSMIS)
2060 if (stats_categories[i] != last_cat)
2062 last_cat = stats_categories[i];
2063 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2066 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2067 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2068 if (sym_ase[i] != SYSMIS)
2069 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2070 if (sym_t[i] != SYSMIS)
2071 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2072 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2076 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2077 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2080 tab_offset (sym, 0, -1);
2083 static int calc_risk (double[], double[], double[], union value *);
2085 /* Display risk estimate. */
2090 double risk_v[3], lower[3], upper[3];
2094 if (!calc_risk (risk_v, upper, lower, c))
2097 tab_offset (risk, nvar - 2, -1);
2099 for (i = 0; i < 3; i++)
2101 if (risk_v[i] == SYSMIS)
2107 if (var_is_numeric (x->vars[COL_VAR]))
2108 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2109 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2111 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2112 var_get_name (x->vars[COL_VAR]),
2113 var_get_width (x->vars[COL_VAR]), c[0].s,
2114 var_get_width (x->vars[COL_VAR]), c[1].s);
2118 if (var_is_numeric (x->vars[ROW_VAR]))
2119 sprintf (buf, _("For cohort %s = %g"),
2120 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2122 sprintf (buf, _("For cohort %s = %.*s"),
2123 var_get_name (x->vars[ROW_VAR]),
2124 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2128 tab_text (risk, 0, 0, TAB_LEFT, buf);
2129 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2130 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2131 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2132 tab_next_row (risk);
2135 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2136 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2137 tab_next_row (risk);
2139 tab_offset (risk, 0, -1);
2142 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2143 double[N_DIRECTIONAL]);
2145 /* Display directional measures. */
2147 display_directional (void)
2149 static const char *categories[] =
2151 N_("Nominal by Nominal"),
2152 N_("Ordinal by Ordinal"),
2153 N_("Nominal by Interval"),
2156 static const char *stats[] =
2159 N_("Goodman and Kruskal tau"),
2160 N_("Uncertainty Coefficient"),
2165 static const char *types[] =
2172 static const int stats_categories[N_DIRECTIONAL] =
2174 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2177 static const int stats_stats[N_DIRECTIONAL] =
2179 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2182 static const int stats_types[N_DIRECTIONAL] =
2184 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2187 static const int *stats_lookup[] =
2194 static const char **stats_names[] =
2206 double direct_v[N_DIRECTIONAL];
2207 double direct_ase[N_DIRECTIONAL];
2208 double direct_t[N_DIRECTIONAL];
2212 if (!calc_directional (direct_v, direct_ase, direct_t))
2215 tab_offset (direct, nvar - 2, -1);
2217 for (i = 0; i < N_DIRECTIONAL; i++)
2219 if (direct_v[i] == SYSMIS)
2225 for (j = 0; j < 3; j++)
2226 if (last[j] != stats_lookup[j][i])
2229 tab_hline (direct, TAL_1, j, 6, 0);
2234 int k = last[j] = stats_lookup[j][i];
2239 string = var_get_name (x->vars[0]);
2241 string = var_get_name (x->vars[1]);
2243 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2244 gettext (stats_names[j][k]), string);
2249 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2250 if (direct_ase[i] != SYSMIS)
2251 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2252 if (direct_t[i] != SYSMIS)
2253 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2254 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2255 tab_next_row (direct);
2258 tab_offset (direct, 0, -1);
2261 /* Statistical calculations. */
2263 /* Returns the value of the gamma (factorial) function for an integer
2266 gamma_int (double x)
2271 for (i = 2; i < x; i++)
2276 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2278 static inline double
2279 Pr (int a, int b, int c, int d)
2281 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2282 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2283 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2284 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2285 / gamma_int (a + b + c + d + 1.));
2288 /* Swap the contents of A and B. */
2290 swap (int *a, int *b)
2297 /* Calculate significance for Fisher's exact test as specified in
2298 _SPSS Statistical Algorithms_, Appendix 5. */
2300 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2304 if (MIN (c, d) < MIN (a, b))
2305 swap (&a, &c), swap (&b, &d);
2306 if (MIN (b, d) < MIN (a, c))
2307 swap (&a, &b), swap (&c, &d);
2311 swap (&a, &b), swap (&c, &d);
2313 swap (&a, &c), swap (&b, &d);
2317 for (x = 0; x <= a; x++)
2318 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2320 *fisher2 = *fisher1;
2321 for (x = 1; x <= b; x++)
2322 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2325 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2326 columns with values COLS and N_ROWS rows with values ROWS. Values
2327 in the matrix sum to W. */
2329 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2330 double *fisher1, double *fisher2)
2334 chisq[0] = chisq[1] = 0.;
2335 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2336 *fisher1 = *fisher2 = SYSMIS;
2338 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2340 if (ns_rows <= 1 || ns_cols <= 1)
2342 chisq[0] = chisq[1] = SYSMIS;
2346 for (r = 0; r < n_rows; r++)
2347 for (c = 0; c < n_cols; c++)
2349 const double expected = row_tot[r] * col_tot[c] / W;
2350 const double freq = mat[n_cols * r + c];
2351 const double residual = freq - expected;
2353 chisq[0] += residual * residual / expected;
2355 chisq[1] += freq * log (expected / freq);
2366 /* Calculate Yates and Fisher exact test. */
2367 if (ns_cols == 2 && ns_rows == 2)
2369 double f11, f12, f21, f22;
2375 for (i = j = 0; i < n_cols; i++)
2376 if (col_tot[i] != 0.)
2385 f11 = mat[nz_cols[0]];
2386 f12 = mat[nz_cols[1]];
2387 f21 = mat[nz_cols[0] + n_cols];
2388 f22 = mat[nz_cols[1] + n_cols];
2393 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2396 chisq[3] = (W * x * x
2397 / (f11 + f12) / (f21 + f22)
2398 / (f11 + f21) / (f12 + f22));
2406 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2407 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2410 /* Calculate Mantel-Haenszel. */
2411 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2413 double r, ase_0, ase_1;
2414 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2416 chisq[4] = (W - 1.) * r * r;
2421 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2422 ASE_1, and ase_0 into ASE_0. The row and column values must be
2423 passed in X and Y. */
2425 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2427 double SX, SY, S, T;
2429 double sum_XYf, sum_X2Y2f;
2430 double sum_Xr, sum_X2r;
2431 double sum_Yc, sum_Y2c;
2434 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2435 for (j = 0; j < n_cols; j++)
2437 double fij = mat[j + i * n_cols];
2438 double product = X[i] * Y[j];
2439 double temp = fij * product;
2441 sum_X2Y2f += temp * product;
2444 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2446 sum_Xr += X[i] * row_tot[i];
2447 sum_X2r += X[i] * X[i] * row_tot[i];
2451 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2453 sum_Yc += Y[i] * col_tot[i];
2454 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2458 S = sum_XYf - sum_Xr * sum_Yc / W;
2459 SX = sum_X2r - sum_Xr * sum_Xr / W;
2460 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2463 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2468 for (s = c = 0., i = 0; i < n_rows; i++)
2469 for (j = 0; j < n_cols; j++)
2471 double Xresid, Yresid;
2474 Xresid = X[i] - Xbar;
2475 Yresid = Y[j] - Ybar;
2476 temp = (T * Xresid * Yresid
2478 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2479 y = mat[j + i * n_cols] * temp * temp - c;
2484 *ase_1 = sqrt (s) / (T * T);
2488 static double somers_d_v[3];
2489 static double somers_d_ase[3];
2490 static double somers_d_t[3];
2492 /* Calculate symmetric statistics and their asymptotic standard
2493 errors. Returns 0 if none could be calculated. */
2495 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2496 double t[N_SYMMETRIC])
2498 int q = MIN (ns_rows, ns_cols);
2507 for (i = 0; i < N_SYMMETRIC; i++)
2508 v[i] = ase[i] = t[i] = SYSMIS;
2511 /* Phi, Cramer's V, contingency coefficient. */
2512 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2514 double Xp = 0.; /* Pearson chi-square. */
2519 for (r = 0; r < n_rows; r++)
2520 for (c = 0; c < n_cols; c++)
2522 const double expected = row_tot[r] * col_tot[c] / W;
2523 const double freq = mat[n_cols * r + c];
2524 const double residual = freq - expected;
2526 Xp += residual * residual / expected;
2530 if (cmd.a_statistics[CRS_ST_PHI])
2532 v[0] = sqrt (Xp / W);
2533 v[1] = sqrt (Xp / (W * (q - 1)));
2535 if (cmd.a_statistics[CRS_ST_CC])
2536 v[2] = sqrt (Xp / (Xp + W));
2539 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2540 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2545 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2552 for (r = 0; r < n_rows; r++)
2553 Dr -= row_tot[r] * row_tot[r];
2554 for (c = 0; c < n_cols; c++)
2555 Dc -= col_tot[c] * col_tot[c];
2561 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2562 for (c = 0; c < n_cols; c++)
2566 for (r = 0; r < n_rows; r++)
2567 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2577 for (i = 0; i < n_rows; i++)
2581 for (j = 1; j < n_cols; j++)
2582 Cij += col_tot[j] - cum[j + i * n_cols];
2585 for (j = 1; j < n_cols; j++)
2586 Dij += cum[j + (i - 1) * n_cols];
2590 double fij = mat[j + i * n_cols];
2596 assert (j < n_cols);
2598 Cij -= col_tot[j] - cum[j + i * n_cols];
2599 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2603 Cij += cum[j - 1 + (i - 1) * n_cols];
2604 Dij -= cum[j + (i - 1) * n_cols];
2610 if (cmd.a_statistics[CRS_ST_BTAU])
2611 v[3] = (P - Q) / sqrt (Dr * Dc);
2612 if (cmd.a_statistics[CRS_ST_CTAU])
2613 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2614 if (cmd.a_statistics[CRS_ST_GAMMA])
2615 v[5] = (P - Q) / (P + Q);
2617 /* ASE for tau-b, tau-c, gamma. Calculations could be
2618 eliminated here, at expense of memory. */
2623 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2624 for (i = 0; i < n_rows; i++)
2628 for (j = 1; j < n_cols; j++)
2629 Cij += col_tot[j] - cum[j + i * n_cols];
2632 for (j = 1; j < n_cols; j++)
2633 Dij += cum[j + (i - 1) * n_cols];
2637 double fij = mat[j + i * n_cols];
2639 if (cmd.a_statistics[CRS_ST_BTAU])
2641 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2642 + v[3] * (row_tot[i] * Dc
2643 + col_tot[j] * Dr));
2644 btau_cum += fij * temp * temp;
2648 const double temp = Cij - Dij;
2649 ctau_cum += fij * temp * temp;
2652 if (cmd.a_statistics[CRS_ST_GAMMA])
2654 const double temp = Q * Cij - P * Dij;
2655 gamma_cum += fij * temp * temp;
2658 if (cmd.a_statistics[CRS_ST_D])
2660 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2661 - (P - Q) * (W - row_tot[i]));
2662 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2663 - (Q - P) * (W - col_tot[j]));
2668 assert (j < n_cols);
2670 Cij -= col_tot[j] - cum[j + i * n_cols];
2671 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2675 Cij += cum[j - 1 + (i - 1) * n_cols];
2676 Dij -= cum[j + (i - 1) * n_cols];
2682 btau_var = ((btau_cum
2683 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2685 if (cmd.a_statistics[CRS_ST_BTAU])
2687 ase[3] = sqrt (btau_var);
2688 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2691 if (cmd.a_statistics[CRS_ST_CTAU])
2693 ase[4] = ((2 * q / ((q - 1) * W * W))
2694 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2695 t[4] = v[4] / ase[4];
2697 if (cmd.a_statistics[CRS_ST_GAMMA])
2699 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2700 t[5] = v[5] / (2. / (P + Q)
2701 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2703 if (cmd.a_statistics[CRS_ST_D])
2705 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2706 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2707 somers_d_t[0] = (somers_d_v[0]
2709 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2710 somers_d_v[1] = (P - Q) / Dc;
2711 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2712 somers_d_t[1] = (somers_d_v[1]
2714 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2715 somers_d_v[2] = (P - Q) / Dr;
2716 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2717 somers_d_t[2] = (somers_d_v[2]
2719 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2725 /* Spearman correlation, Pearson's r. */
2726 if (cmd.a_statistics[CRS_ST_CORR])
2728 double *R = local_alloc (sizeof *R * n_rows);
2729 double *C = local_alloc (sizeof *C * n_cols);
2732 double y, t, c = 0., s = 0.;
2737 R[i] = s + (row_tot[i] + 1.) / 2.;
2744 assert (i < n_rows);
2749 double y, t, c = 0., s = 0.;
2754 C[j] = s + (col_tot[j] + 1.) / 2;
2761 assert (j < n_cols);
2765 calc_r (R, C, &v[6], &t[6], &ase[6]);
2771 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2775 /* Cohen's kappa. */
2776 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2778 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2781 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2782 i < ns_rows; i++, j++)
2786 while (col_tot[j] == 0.)
2789 prod = row_tot[i] * col_tot[j];
2790 sum = row_tot[i] + col_tot[j];
2792 sum_fii += mat[j + i * n_cols];
2794 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2795 sum_riciri_ci += prod * sum;
2797 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2798 for (j = 0; j < ns_cols; j++)
2800 double sum = row_tot[i] + col_tot[j];
2801 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2804 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2806 ase[8] = sqrt ((W * W * sum_rici
2807 + sum_rici * sum_rici
2808 - W * sum_riciri_ci)
2809 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2811 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2812 / pow2 (W * W - sum_rici))
2813 + ((2. * (W - sum_fii)
2814 * (2. * sum_fii * sum_rici
2815 - W * sum_fiiri_ci))
2816 / cube (W * W - sum_rici))
2817 + (pow2 (W - sum_fii)
2818 * (W * sum_fijri_ci2 - 4.
2819 * sum_rici * sum_rici)
2820 / pow4 (W * W - sum_rici))));
2822 t[8] = v[8] / ase[8];
2829 /* Calculate risk estimate. */
2831 calc_risk (double *value, double *upper, double *lower, union value *c)
2833 double f11, f12, f21, f22;
2839 for (i = 0; i < 3; i++)
2840 value[i] = upper[i] = lower[i] = SYSMIS;
2843 if (ns_rows != 2 || ns_cols != 2)
2850 for (i = j = 0; i < n_cols; i++)
2851 if (col_tot[i] != 0.)
2860 f11 = mat[nz_cols[0]];
2861 f12 = mat[nz_cols[1]];
2862 f21 = mat[nz_cols[0] + n_cols];
2863 f22 = mat[nz_cols[1] + n_cols];
2865 c[0] = cols[nz_cols[0]];
2866 c[1] = cols[nz_cols[1]];
2869 value[0] = (f11 * f22) / (f12 * f21);
2870 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2871 lower[0] = value[0] * exp (-1.960 * v);
2872 upper[0] = value[0] * exp (1.960 * v);
2874 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2875 v = sqrt ((f12 / (f11 * (f11 + f12)))
2876 + (f22 / (f21 * (f21 + f22))));
2877 lower[1] = value[1] * exp (-1.960 * v);
2878 upper[1] = value[1] * exp (1.960 * v);
2880 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2881 v = sqrt ((f11 / (f12 * (f11 + f12)))
2882 + (f21 / (f22 * (f21 + f22))));
2883 lower[2] = value[2] * exp (-1.960 * v);
2884 upper[2] = value[2] * exp (1.960 * v);
2889 /* Calculate directional measures. */
2891 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2892 double t[N_DIRECTIONAL])
2897 for (i = 0; i < N_DIRECTIONAL; i++)
2898 v[i] = ase[i] = t[i] = SYSMIS;
2902 if (cmd.a_statistics[CRS_ST_LAMBDA])
2904 double *fim = xnmalloc (n_rows, sizeof *fim);
2905 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2906 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2907 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2908 double sum_fim, sum_fmj;
2910 int rm_index, cm_index;
2913 /* Find maximum for each row and their sum. */
2914 for (sum_fim = 0., i = 0; i < n_rows; i++)
2916 double max = mat[i * n_cols];
2919 for (j = 1; j < n_cols; j++)
2920 if (mat[j + i * n_cols] > max)
2922 max = mat[j + i * n_cols];
2926 sum_fim += fim[i] = max;
2927 fim_index[i] = index;
2930 /* Find maximum for each column. */
2931 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2933 double max = mat[j];
2936 for (i = 1; i < n_rows; i++)
2937 if (mat[j + i * n_cols] > max)
2939 max = mat[j + i * n_cols];
2943 sum_fmj += fmj[j] = max;
2944 fmj_index[j] = index;
2947 /* Find maximum row total. */
2950 for (i = 1; i < n_rows; i++)
2951 if (row_tot[i] > rm)
2957 /* Find maximum column total. */
2960 for (j = 1; j < n_cols; j++)
2961 if (col_tot[j] > cm)
2967 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2968 v[1] = (sum_fmj - rm) / (W - rm);
2969 v[2] = (sum_fim - cm) / (W - cm);
2971 /* ASE1 for Y given X. */
2975 for (accum = 0., i = 0; i < n_rows; i++)
2976 for (j = 0; j < n_cols; j++)
2978 const int deltaj = j == cm_index;
2979 accum += (mat[j + i * n_cols]
2980 * pow2 ((j == fim_index[i])
2985 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2988 /* ASE0 for Y given X. */
2992 for (accum = 0., i = 0; i < n_rows; i++)
2993 if (cm_index != fim_index[i])
2994 accum += (mat[i * n_cols + fim_index[i]]
2995 + mat[i * n_cols + cm_index]);
2996 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2999 /* ASE1 for X given Y. */
3003 for (accum = 0., i = 0; i < n_rows; i++)
3004 for (j = 0; j < n_cols; j++)
3006 const int deltaj = i == rm_index;
3007 accum += (mat[j + i * n_cols]
3008 * pow2 ((i == fmj_index[j])
3013 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3016 /* ASE0 for X given Y. */
3020 for (accum = 0., j = 0; j < n_cols; j++)
3021 if (rm_index != fmj_index[j])
3022 accum += (mat[j + n_cols * fmj_index[j]]
3023 + mat[j + n_cols * rm_index]);
3024 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3027 /* Symmetric ASE0 and ASE1. */
3032 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3033 for (j = 0; j < n_cols; j++)
3035 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3036 int temp1 = (i == rm_index) + (j == cm_index);
3037 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3038 accum1 += (mat[j + i * n_cols]
3039 * pow2 (temp0 + (v[0] - 1.) * temp1));
3041 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3042 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3043 / (2. * W - rm - cm));
3052 double sum_fij2_ri, sum_fij2_ci;
3053 double sum_ri2, sum_cj2;
3055 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3056 for (j = 0; j < n_cols; j++)
3058 double temp = pow2 (mat[j + i * n_cols]);
3059 sum_fij2_ri += temp / row_tot[i];
3060 sum_fij2_ci += temp / col_tot[j];
3063 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3064 sum_ri2 += row_tot[i] * row_tot[i];
3066 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3067 sum_cj2 += col_tot[j] * col_tot[j];
3069 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3070 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3074 if (cmd.a_statistics[CRS_ST_UC])
3076 double UX, UY, UXY, P;
3077 double ase1_yx, ase1_xy, ase1_sym;
3080 for (UX = 0., i = 0; i < n_rows; i++)
3081 if (row_tot[i] > 0.)
3082 UX -= row_tot[i] / W * log (row_tot[i] / W);
3084 for (UY = 0., j = 0; j < n_cols; j++)
3085 if (col_tot[j] > 0.)
3086 UY -= col_tot[j] / W * log (col_tot[j] / W);
3088 for (UXY = P = 0., i = 0; i < n_rows; i++)
3089 for (j = 0; j < n_cols; j++)
3091 double entry = mat[j + i * n_cols];
3096 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3097 UXY -= entry / W * log (entry / W);
3100 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3101 for (j = 0; j < n_cols; j++)
3103 double entry = mat[j + i * n_cols];
3108 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3109 + (UX - UXY) * log (col_tot[j] / W));
3110 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3111 + (UY - UXY) * log (row_tot[i] / W));
3112 ase1_sym += entry * pow2 ((UXY
3113 * log (row_tot[i] * col_tot[j] / (W * W)))
3114 - (UX + UY) * log (entry / W));
3117 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3118 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3119 t[5] = v[5] / ((2. / (W * (UX + UY)))
3120 * sqrt (P - pow2 (UX + UY - UXY) / W));
3122 v[6] = (UX + UY - UXY) / UX;
3123 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3124 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3126 v[7] = (UX + UY - UXY) / UY;
3127 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3128 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3132 if (cmd.a_statistics[CRS_ST_D])
3137 calc_symmetric (NULL, NULL, NULL);
3138 for (i = 0; i < 3; i++)
3140 v[8 + i] = somers_d_v[i];
3141 ase[8 + i] = somers_d_ase[i];
3142 t[8 + i] = somers_d_t[i];
3147 if (cmd.a_statistics[CRS_ST_ETA])
3150 double sum_Xr, sum_X2r;
3154 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3156 sum_Xr += rows[i].f * row_tot[i];
3157 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3159 SX = sum_X2r - sum_Xr * sum_Xr / W;
3161 for (SXW = 0., j = 0; j < n_cols; j++)
3165 for (cum = 0., i = 0; i < n_rows; i++)
3167 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3168 cum += rows[i].f * mat[j + i * n_cols];
3171 SXW -= cum * cum / col_tot[j];
3173 v[11] = sqrt (1. - SXW / SX);
3177 double sum_Yc, sum_Y2c;
3181 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3183 sum_Yc += cols[i].f * col_tot[i];
3184 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3186 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3188 for (SYW = 0., i = 0; i < n_rows; i++)
3192 for (cum = 0., j = 0; j < n_cols; j++)
3194 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3195 cum += cols[j].f * mat[j + i * n_cols];
3198 SYW -= cum * cum / row_tot[i];
3200 v[12] = sqrt (1. - SYW / SY);
3207 /* A wrapper around data_out() that limits string output to short
3208 string width and null terminates the result. */
3210 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3212 struct fmt_spec fmt_subst;
3214 /* Limit to short string width. */
3215 if (fmt_is_string (fp->type))
3219 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3220 if (fmt_subst.type == FMT_A)
3221 fmt_subst.w = MIN (8, fmt_subst.w);
3223 fmt_subst.w = MIN (16, fmt_subst.w);
3229 data_out (v, fp, s);
3231 /* Null terminate. */