1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - Pearson's R (but not Spearman!) is off a little.
20 - T values for Spearman's R and Pearson's R are wrong.
21 - How to calculate significance of symmetric and directional measures?
22 - Asymmetric ASEs and T values for lambda are wrong.
23 - ASE of Goodman and Kruskal's tau is not calculated.
24 - ASE of symmetric somers' d is wrong.
25 - Approx. T of uncertainty coefficient is wrong.
32 #include <gsl/gsl_cdf.h>
36 #include <data/case.h>
37 #include <data/casegrouper.h>
38 #include <data/casereader.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/array.h>
50 #include <libpspp/assertion.h>
51 #include <libpspp/compiler.h>
52 #include <libpspp/hash.h>
53 #include <libpspp/message.h>
54 #include <libpspp/misc.h>
55 #include <libpspp/pool.h>
56 #include <libpspp/str.h>
57 #include <output/output.h>
58 #include <output/table.h>
65 #define _(msgid) gettext (msgid)
66 #define N_(msgid) msgid
74 missing=miss:!table/include/report;
75 +write[wr_]=none,cells,all;
76 +format=fmt:!labels/nolabels/novallabs,
79 tabl:!tables/notables,
82 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
84 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
90 /* Number of chi-square statistics. */
93 /* Number of symmetric statistics. */
96 /* Number of directional statistics. */
97 #define N_DIRECTIONAL 13
99 /* A single table entry for general mode. */
102 int table; /* Flattened table number. */
105 double freq; /* Frequency count. */
106 double *data; /* Crosstabulation table for integer mode. */
109 union value values[1]; /* Values. */
112 /* A crosstabulation. */
115 int nvar; /* Number of variables. */
116 double missing; /* Missing cases count. */
117 int ofs; /* Integer mode: Offset into sorted_tab[]. */
118 const struct variable *vars[2]; /* At least two variables; sorted by
119 larger indices first. */
122 /* Integer mode variable info. */
125 int min; /* Minimum value. */
126 int max; /* Maximum value + 1. */
127 int count; /* max - min. */
130 static inline struct var_range *
131 get_var_range (const struct variable *v)
133 return var_get_aux (v);
136 /* Indexes into crosstab.v. */
143 /* General mode crosstabulation table. */
144 static struct hsh_table *gen_tab; /* Hash table. */
145 static int n_sorted_tab; /* Number of entries in sorted_tab. */
146 static struct table_entry **sorted_tab; /* Sorted table. */
148 /* Variables specifies on VARIABLES. */
149 static const struct variable **variables;
150 static size_t variables_cnt;
153 static struct crosstab **xtab;
156 /* Integer or general mode? */
165 static int num_cells; /* Number of cells requested. */
166 static int cells[8]; /* Cells requested. */
169 static int write_style; /* One of WR_* that specifies the WRITE style. */
171 /* Command parsing info. */
172 static struct cmd_crosstabs cmd;
175 static struct pool *pl_tc; /* For table cells. */
176 static struct pool *pl_col; /* For column data. */
178 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
179 static void precalc (struct casereader *, const struct dataset *);
180 static void calc_general (struct ccase *, const struct dataset *);
181 static void calc_integer (struct ccase *, const struct dataset *);
182 static void postcalc (void);
183 static void submit (struct tab_table *);
185 static void format_short (char *s, const struct fmt_spec *fp,
186 const union value *v);
188 /* Parse and execute CROSSTABS, then clean up. */
190 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
192 int result = internal_cmd_crosstabs (lexer, ds);
196 pool_destroy (pl_tc);
197 pool_destroy (pl_col);
199 for (i = 0; i < nxtab; i++)
206 /* Parses and executes the CROSSTABS procedure. */
208 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
210 struct casegrouper *grouper;
211 struct casereader *input, *group;
219 pl_tc = pool_create ();
220 pl_col = pool_create ();
222 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
225 mode = variables ? INTEGER : GENERAL;
230 cmd.a_cells[CRS_CL_COUNT] = 1;
236 for (i = 0; i < CRS_CL_count; i++)
241 cmd.a_cells[CRS_CL_COUNT] = 1;
242 cmd.a_cells[CRS_CL_ROW] = 1;
243 cmd.a_cells[CRS_CL_COLUMN] = 1;
244 cmd.a_cells[CRS_CL_TOTAL] = 1;
246 if (cmd.a_cells[CRS_CL_ALL])
248 for (i = 0; i < CRS_CL_count; i++)
250 cmd.a_cells[CRS_CL_ALL] = 0;
252 cmd.a_cells[CRS_CL_NONE] = 0;
254 for (num_cells = i = 0; i < CRS_CL_count; i++)
256 cells[num_cells++] = i;
259 if (cmd.sbc_statistics)
264 for (i = 0; i < CRS_ST_count; i++)
265 if (cmd.a_statistics[i])
268 cmd.a_statistics[CRS_ST_CHISQ] = 1;
269 if (cmd.a_statistics[CRS_ST_ALL])
270 for (i = 0; i < CRS_ST_count; i++)
271 cmd.a_statistics[i] = 1;
275 if (cmd.miss == CRS_REPORT && mode == GENERAL)
277 msg (SE, _("Missing mode REPORT not allowed in general mode. "
278 "Assuming MISSING=TABLE."));
279 cmd.miss = CRS_TABLE;
283 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
284 cmd.a_write[CRS_WR_ALL] = 0;
285 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
287 msg (SE, _("Write mode ALL not allowed in general mode. "
288 "Assuming WRITE=CELLS."));
289 cmd.a_write[CRS_WR_CELLS] = 1;
292 && (cmd.a_write[CRS_WR_NONE]
293 + cmd.a_write[CRS_WR_ALL]
294 + cmd.a_write[CRS_WR_CELLS] == 0))
295 cmd.a_write[CRS_WR_CELLS] = 1;
296 if (cmd.a_write[CRS_WR_CELLS])
297 write_style = CRS_WR_CELLS;
298 else if (cmd.a_write[CRS_WR_ALL])
299 write_style = CRS_WR_ALL;
301 write_style = CRS_WR_NONE;
303 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
305 grouper = casegrouper_create_splits (input, dataset_dict (ds));
306 while (casegrouper_get_next_group (grouper, &group))
312 for (; casereader_read (group, &c); case_destroy (&c))
315 calc_general (&c, ds);
317 calc_integer (&c, ds);
319 casereader_destroy (group);
323 ok = casegrouper_destroy (grouper);
324 ok = proc_commit (ds) && ok;
326 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
329 /* Parses the TABLES subcommand. */
331 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
333 struct const_var_set *var_set;
335 const struct variable ***by = NULL;
336 size_t *by_nvar = NULL;
340 /* Ensure that this is a TABLES subcommand. */
341 if (!lex_match_id (lexer, "TABLES")
342 && (lex_token (lexer) != T_ID ||
343 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
344 && lex_token (lexer) != T_ALL)
346 lex_match (lexer, '=');
348 if (variables != NULL)
349 var_set = const_var_set_create_from_array (variables, variables_cnt);
351 var_set = const_var_set_create_from_dict (dataset_dict (ds));
352 assert (var_set != NULL);
356 by = xnrealloc (by, n_by + 1, sizeof *by);
357 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
358 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
359 PV_NO_DUPLICATE | PV_NO_SCRATCH))
361 if (xalloc_oversized (nx, by_nvar[n_by]))
363 msg (SE, _("Too many cross-tabulation variables or dimensions."));
369 if (!lex_match (lexer, T_BY))
373 lex_error (lexer, _("expecting BY"));
382 int *by_iter = xcalloc (n_by, sizeof *by_iter);
385 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
386 for (i = 0; i < nx; i++)
390 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
397 for (i = 0; i < n_by; i++)
398 x->vars[i] = by[i][by_iter[i]];
404 for (i = n_by - 1; i >= 0; i--)
406 if (++by_iter[i] < by_nvar[i])
419 /* All return paths lead here. */
423 for (i = 0; i < n_by; i++)
429 const_var_set_destroy (var_set);
434 /* Parses the VARIABLES subcommand. */
436 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
440 msg (SE, _("VARIABLES must be specified before TABLES."));
444 lex_match (lexer, '=');
448 size_t orig_nv = variables_cnt;
453 if (!parse_variables_const (lexer, dataset_dict (ds),
454 &variables, &variables_cnt,
455 (PV_APPEND | PV_NUMERIC
456 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
459 if (lex_token (lexer) != '(')
461 lex_error (lexer, "expecting `('");
466 if (!lex_force_int (lexer))
468 min = lex_integer (lexer);
471 lex_match (lexer, ',');
473 if (!lex_force_int (lexer))
475 max = lex_integer (lexer);
478 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
484 if (lex_token (lexer) != ')')
486 lex_error (lexer, "expecting `)'");
491 for (i = orig_nv; i < variables_cnt; i++)
493 struct var_range *vr = xmalloc (sizeof *vr);
496 vr->count = max - min + 1;
497 var_attach_aux (variables[i], vr, var_dtor_free);
500 if (lex_token (lexer) == '/')
512 /* Data file processing. */
514 static int compare_table_entry (const void *, const void *, const void *);
515 static unsigned hash_table_entry (const void *, const void *);
517 /* Set up the crosstabulation tables for processing. */
519 precalc (struct casereader *input, const struct dataset *ds)
523 if (casereader_peek (input, 0, &c))
525 output_split_file_values (ds, &c);
531 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
541 for (i = 0; i < nxtab; i++)
543 struct crosstab *x = xtab[i];
548 x->ofs = n_sorted_tab;
550 for (j = 2; j < x->nvar; j++)
551 count *= get_var_range (x->vars[j - 2])->count;
553 sorted_tab = xnrealloc (sorted_tab,
554 n_sorted_tab + count, sizeof *sorted_tab);
555 v = xmalloca (sizeof *v * x->nvar);
556 for (j = 2; j < x->nvar; j++)
557 v[j] = get_var_range (x->vars[j])->min;
558 for (j = 0; j < count; j++)
560 struct table_entry *te;
563 te = sorted_tab[n_sorted_tab++]
564 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
568 int row_cnt = get_var_range (x->vars[0])->count;
569 int col_cnt = get_var_range (x->vars[1])->count;
570 const int mat_size = row_cnt * col_cnt;
573 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
574 for (m = 0; m < mat_size; m++)
578 for (k = 2; k < x->nvar; k++)
579 te->values[k].f = v[k];
580 for (k = 2; k < x->nvar; k++)
582 struct var_range *vr = get_var_range (x->vars[k]);
583 if (++v[k] >= vr->max)
592 sorted_tab = xnrealloc (sorted_tab,
593 n_sorted_tab + 1, sizeof *sorted_tab);
594 sorted_tab[n_sorted_tab] = NULL;
599 /* Form crosstabulations for general mode. */
601 calc_general (struct ccase *c, const struct dataset *ds)
603 /* Missing values to exclude. */
604 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
605 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
609 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
611 /* Flattened current table index. */
614 for (t = 0; t < nxtab; t++)
616 struct crosstab *x = xtab[t];
617 const size_t entry_size = (sizeof (struct table_entry)
618 + sizeof (union value) * (x->nvar - 1));
619 struct table_entry *te = xmalloca (entry_size);
621 /* Construct table entry for the current record and table. */
627 for (j = 0; j < x->nvar; j++)
629 const union value *v = case_data (c, x->vars[j]);
630 if (var_is_value_missing (x->vars[j], v, exclude))
632 x->missing += weight;
636 if (var_is_numeric (x->vars[j]))
637 te->values[j].f = case_num (c, x->vars[j]);
640 size_t n = var_get_width (x->vars[j]);
641 if (n > MAX_SHORT_STRING)
642 n = MAX_SHORT_STRING;
643 memcpy (te->values[j].s, case_str (c, x->vars[j]), n);
645 /* Necessary in order to simplify comparisons. */
646 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
647 sizeof (union value) - n);
652 /* Add record to hash table. */
654 struct table_entry **tepp
655 = (struct table_entry **) hsh_probe (gen_tab, te);
658 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
661 memcpy (tep, te, entry_size);
666 (*tepp)->u.freq += weight;
675 calc_integer (struct ccase *c, const struct dataset *ds)
677 bool bad_warn = true;
680 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
682 /* Flattened current table index. */
685 for (t = 0; t < nxtab; t++)
687 struct crosstab *x = xtab[t];
692 for (i = 0; i < x->nvar; i++)
694 const struct variable *const v = x->vars[i];
695 struct var_range *vr = get_var_range (v);
696 double value = case_num (c, v);
698 /* Note that the first test also rules out SYSMIS. */
699 if ((value < vr->min || value >= vr->max)
700 || (cmd.miss == CRS_TABLE
701 && var_is_num_missing (v, value, MV_USER)))
703 x->missing += weight;
709 ofs += fact * ((int) value - vr->min);
715 const struct variable *row_var = x->vars[ROW_VAR];
716 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
718 const struct variable *col_var = x->vars[COL_VAR];
719 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
721 const int col_dim = get_var_range (col_var)->count;
723 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
730 /* Compare the table_entry's at A and B and return a strcmp()-type
733 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
735 const struct table_entry *a = a_;
736 const struct table_entry *b = b_;
738 if (a->table > b->table)
740 else if (a->table < b->table)
744 const struct crosstab *x = xtab[a->table];
747 for (i = x->nvar - 1; i >= 0; i--)
748 if (var_is_numeric (x->vars[i]))
750 const double diffnum = a->values[i].f - b->values[i].f;
753 else if (diffnum > 0)
758 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
759 var_get_width (x->vars[i]));
768 /* Calculate a hash value from table_entry A. */
770 hash_table_entry (const void *a_, const void *aux UNUSED)
772 const struct table_entry *a = a_;
777 for (i = 0; i < xtab[a->table]->nvar; i++)
778 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
783 /* Post-data reading calculations. */
785 static struct table_entry **find_pivot_extent (struct table_entry **,
786 int *cnt, int pivot);
787 static void enum_var_values (struct table_entry **entries, int entry_cnt,
789 union value **values, int *value_cnt);
790 static void output_pivot_table (struct table_entry **, struct table_entry **,
791 double **, double **, double **,
792 int *, int *, int *);
793 static void make_summary_table (void);
800 n_sorted_tab = hsh_count (gen_tab);
801 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
804 make_summary_table ();
806 /* Identify all the individual crosstabulation tables, and deal with
809 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
810 int pc = n_sorted_tab; /* Pivot count. */
812 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
813 int maxrows = 0, maxcols = 0, maxcells = 0;
817 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
821 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
822 &maxrows, &maxcols, &maxcells);
831 hsh_destroy (gen_tab);
835 for (i = 0; i < n_sorted_tab; i++)
837 free (sorted_tab[i]->u.data);
838 free (sorted_tab[i]);
844 static void insert_summary (struct tab_table *, int tab_index, double valid);
846 /* Output a table summarizing the cases processed. */
848 make_summary_table (void)
850 struct tab_table *summary;
852 struct table_entry **pb = sorted_tab, **pe;
853 int pc = n_sorted_tab;
856 summary = tab_create (7, 3 + nxtab, 1);
857 tab_title (summary, _("Summary."));
858 tab_headers (summary, 1, 0, 3, 0);
859 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
860 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
861 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
862 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
863 tab_hline (summary, TAL_1, 1, 6, 1);
864 tab_hline (summary, TAL_1, 1, 6, 2);
865 tab_vline (summary, TAL_1, 3, 1, 1);
866 tab_vline (summary, TAL_1, 5, 1, 1);
870 for (i = 0; i < 3; i++)
872 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
873 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
876 tab_offset (summary, 0, 3);
882 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
886 while (cur_tab < (*pb)->table)
887 insert_summary (summary, cur_tab++, 0.);
890 for (valid = 0.; pb < pe; pb++)
891 valid += (*pb)->u.freq;
894 const struct crosstab *const x = xtab[(*pb)->table];
895 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
896 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
897 const int count = n_cols * n_rows;
899 for (valid = 0.; pb < pe; pb++)
901 const double *data = (*pb)->u.data;
904 for (i = 0; i < count; i++)
908 insert_summary (summary, cur_tab++, valid);
913 while (cur_tab < nxtab)
914 insert_summary (summary, cur_tab++, 0.);
919 /* Inserts a line into T describing the crosstabulation at index
920 TAB_INDEX, which has VALID valid observations. */
922 insert_summary (struct tab_table *t, int tab_index, double valid)
924 struct crosstab *x = xtab[tab_index];
926 tab_hline (t, TAL_1, 0, 6, 0);
928 /* Crosstabulation name. */
930 char *buf = xmalloca (128 * x->nvar);
934 for (i = 0; i < x->nvar; i++)
937 cp = stpcpy (cp, " * ");
939 cp = stpcpy (cp, var_to_string (x->vars[i]));
941 tab_text (t, 0, 0, TAB_LEFT, buf);
946 /* Counts and percentages. */
956 for (i = 0; i < 3; i++)
958 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
959 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
970 static struct tab_table *table; /* Crosstabulation table. */
971 static struct tab_table *chisq; /* Chi-square table. */
972 static struct tab_table *sym; /* Symmetric measures table. */
973 static struct tab_table *risk; /* Risk estimate table. */
974 static struct tab_table *direct; /* Directional measures table. */
977 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
979 /* Column values, number of columns. */
980 static union value *cols;
983 /* Row values, number of rows. */
984 static union value *rows;
987 /* Number of statistically interesting columns/rows (columns/rows with
989 static int ns_cols, ns_rows;
991 /* Crosstabulation. */
992 static const struct crosstab *x;
994 /* Number of variables from the crosstabulation to consider. This is
995 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
998 /* Matrix contents. */
999 static double *mat; /* Matrix proper. */
1000 static double *row_tot; /* Row totals. */
1001 static double *col_tot; /* Column totals. */
1002 static double W; /* Grand total. */
1004 static void display_dimensions (struct tab_table *, int first_difference,
1005 struct table_entry *);
1006 static void display_crosstabulation (void);
1007 static void display_chisq (void);
1008 static void display_symmetric (void);
1009 static void display_risk (void);
1010 static void display_directional (void);
1011 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1012 static void table_value_missing (struct tab_table *table, int c, int r,
1013 unsigned char opt, const union value *v,
1014 const struct variable *var);
1015 static void delete_missing (void);
1017 /* Output pivot table beginning at PB and continuing until PE,
1018 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1019 hold *MAXROWS entries. */
1021 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1022 double **matp, double **row_totp, double **col_totp,
1023 int *maxrows, int *maxcols, int *maxcells)
1026 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1027 int tc = pe - pb; /* Table count. */
1029 /* Table entry for header comparison. */
1030 struct table_entry *cmp = NULL;
1032 x = xtab[(*pb)->table];
1033 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1035 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1037 /* Crosstabulation table initialization. */
1040 table = tab_create (nvar + n_cols,
1041 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1042 tab_headers (table, nvar - 1, 0, 2, 0);
1044 /* First header line. */
1045 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1046 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1048 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1050 /* Second header line. */
1054 for (i = 2; i < nvar; i++)
1055 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1056 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1057 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1058 var_get_name (x->vars[ROW_VAR]));
1059 for (i = 0; i < n_cols; i++)
1060 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1062 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1065 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1066 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1070 char *title = xmalloca (x->nvar * 64 + 128);
1074 if (cmd.pivot == CRS_PIVOT)
1075 for (i = 0; i < nvar; i++)
1078 cp = stpcpy (cp, " by ");
1079 cp = stpcpy (cp, var_get_name (x->vars[i]));
1083 cp = spprintf (cp, "%s by %s for",
1084 var_get_name (x->vars[0]),
1085 var_get_name (x->vars[1]));
1086 for (i = 2; i < nvar; i++)
1088 char buf[64], *bufp;
1093 cp = stpcpy (cp, var_get_name (x->vars[i]));
1095 format_short (buf, var_get_print_format (x->vars[i]),
1097 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1099 cp = stpcpy (cp, bufp);
1103 cp = stpcpy (cp, " [");
1104 for (i = 0; i < num_cells; i++)
1112 static const struct tuple cell_names[] =
1114 {CRS_CL_COUNT, N_("count")},
1115 {CRS_CL_ROW, N_("row %")},
1116 {CRS_CL_COLUMN, N_("column %")},
1117 {CRS_CL_TOTAL, N_("total %")},
1118 {CRS_CL_EXPECTED, N_("expected")},
1119 {CRS_CL_RESIDUAL, N_("residual")},
1120 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1121 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1125 const struct tuple *t;
1127 for (t = cell_names; t->value != cells[i]; t++)
1128 assert (t->value != -1);
1130 cp = stpcpy (cp, ", ");
1131 cp = stpcpy (cp, gettext (t->name));
1135 tab_title (table, "%s", title);
1139 tab_offset (table, 0, 2);
1144 /* Chi-square table initialization. */
1145 if (cmd.a_statistics[CRS_ST_CHISQ])
1147 chisq = tab_create (6 + (nvar - 2),
1148 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1149 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1151 tab_title (chisq, _("Chi-square tests."));
1153 tab_offset (chisq, nvar - 2, 0);
1154 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1155 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1156 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1157 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1158 _("Asymp. Sig. (2-sided)"));
1159 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1160 _("Exact. Sig. (2-sided)"));
1161 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1162 _("Exact. Sig. (1-sided)"));
1164 tab_offset (chisq, 0, 1);
1169 /* Symmetric measures. */
1170 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1171 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1172 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1173 || cmd.a_statistics[CRS_ST_KAPPA])
1175 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1176 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1177 tab_title (sym, _("Symmetric measures."));
1179 tab_offset (sym, nvar - 2, 0);
1180 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1181 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1182 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1183 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1184 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1185 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1186 tab_offset (sym, 0, 1);
1191 /* Risk estimate. */
1192 if (cmd.a_statistics[CRS_ST_RISK])
1194 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1195 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1196 tab_title (risk, _("Risk estimate."));
1198 tab_offset (risk, nvar - 2, 0);
1199 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1200 _("95%% Confidence Interval"));
1201 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1202 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1203 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1204 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1205 tab_hline (risk, TAL_1, 2, 3, 1);
1206 tab_vline (risk, TAL_1, 2, 0, 1);
1207 tab_offset (risk, 0, 2);
1212 /* Directional measures. */
1213 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1214 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1216 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1217 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1218 tab_title (direct, _("Directional measures."));
1220 tab_offset (direct, nvar - 2, 0);
1221 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1222 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1223 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1224 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1225 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1226 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1227 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1228 tab_offset (direct, 0, 1);
1235 /* Find pivot subtable if applicable. */
1236 te = find_pivot_extent (tb, &tc, 0);
1240 /* Find all the row variable values. */
1241 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1243 /* Allocate memory space for the column and row totals. */
1244 if (n_rows > *maxrows)
1246 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1247 row_tot = *row_totp;
1250 if (n_cols > *maxcols)
1252 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1253 col_tot = *col_totp;
1257 /* Allocate table space for the matrix. */
1258 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1259 tab_realloc (table, -1,
1260 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1261 tab_nr (table) * (pe - pb) / (te - tb)));
1263 if (mode == GENERAL)
1265 /* Allocate memory space for the matrix. */
1266 if (n_cols * n_rows > *maxcells)
1268 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1269 *maxcells = n_cols * n_rows;
1274 /* Build the matrix and calculate column totals. */
1276 union value *cur_col = cols;
1277 union value *cur_row = rows;
1279 double *cp = col_tot;
1280 struct table_entry **p;
1283 for (p = &tb[0]; p < te; p++)
1285 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1289 for (; cur_row < &rows[n_rows]; cur_row++)
1295 mp = &mat[cur_col - cols];
1298 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1305 *cp += *mp = (*p)->u.freq;
1310 /* Zero out the rest of the matrix. */
1311 for (; cur_row < &rows[n_rows]; cur_row++)
1317 if (cur_col < &cols[n_cols])
1319 const int rem_cols = n_cols - (cur_col - cols);
1322 for (c = 0; c < rem_cols; c++)
1324 mp = &mat[cur_col - cols];
1325 for (r = 0; r < n_rows; r++)
1327 for (c = 0; c < rem_cols; c++)
1329 mp += n_cols - rem_cols;
1337 double *tp = col_tot;
1339 assert (mode == INTEGER);
1340 mat = (*tb)->u.data;
1343 /* Calculate column totals. */
1344 for (c = 0; c < n_cols; c++)
1347 double *cp = &mat[c];
1349 for (r = 0; r < n_rows; r++)
1350 cum += cp[r * n_cols];
1358 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1359 ns_cols += *cp != 0.;
1362 /* Calculate row totals. */
1365 double *rp = row_tot;
1368 for (ns_rows = 0, r = n_rows; r--; )
1371 for (c = n_cols; c--; )
1379 /* Calculate grand total. */
1385 if (n_rows < n_cols)
1386 tp = row_tot, n = n_rows;
1388 tp = col_tot, n = n_cols;
1394 /* Find the first variable that differs from the last subtable,
1395 then display the values of the dimensioning variables for
1396 each table that needs it. */
1398 int first_difference = nvar - 1;
1401 for (; ; first_difference--)
1403 assert (first_difference >= 2);
1404 if (memcmp (&cmp->values[first_difference],
1405 &(*tb)->values[first_difference],
1406 sizeof *cmp->values))
1412 display_dimensions (table, first_difference, *tb);
1414 display_dimensions (chisq, first_difference, *tb);
1416 display_dimensions (sym, first_difference, *tb);
1418 display_dimensions (risk, first_difference, *tb);
1420 display_dimensions (direct, first_difference, *tb);
1424 display_crosstabulation ();
1425 if (cmd.miss == CRS_REPORT)
1430 display_symmetric ();
1434 display_directional ();
1445 tab_resize (chisq, 4 + (nvar - 2), -1);
1456 /* Delete missing rows and columns for statistical analysis when
1459 delete_missing (void)
1464 for (r = 0; r < n_rows; r++)
1465 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1469 for (c = 0; c < n_cols; c++)
1470 mat[c + r * n_cols] = 0.;
1478 for (c = 0; c < n_cols; c++)
1479 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1483 for (r = 0; r < n_rows; r++)
1484 mat[c + r * n_cols] = 0.;
1490 /* Prepare table T for submission, and submit it. */
1492 submit (struct tab_table *t)
1499 tab_resize (t, -1, 0);
1500 if (tab_nr (t) == tab_t (t))
1505 tab_offset (t, 0, 0);
1507 for (i = 2; i < nvar; i++)
1508 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1509 var_to_string (x->vars[i]));
1510 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1511 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1513 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1515 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1516 tab_dim (t, crosstabs_dim);
1520 /* Sets the widths of all the columns and heights of all the rows in
1521 table T for driver D. */
1523 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1527 /* Width of a numerical column. */
1528 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1529 if (cmd.miss == CRS_REPORT)
1530 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1532 /* Set width for header columns. */
1538 w = d->width - c * (t->nc - t->l);
1539 for (i = 0; i <= t->nc; i++)
1543 if (w < d->prop_em_width * 8)
1544 w = d->prop_em_width * 8;
1546 if (w > d->prop_em_width * 15)
1547 w = d->prop_em_width * 15;
1549 for (i = 0; i < t->l; i++)
1553 for (i = t->l; i < t->nc; i++)
1556 for (i = 0; i < t->nr; i++)
1557 t->h[i] = tab_natural_height (t, d, i);
1560 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1561 int *cnt, int pivot);
1562 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1563 int *cnt, int pivot);
1565 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1567 static struct table_entry **
1568 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1570 return (mode == GENERAL
1571 ? find_pivot_extent_general (tp, cnt, pivot)
1572 : find_pivot_extent_integer (tp, cnt, pivot));
1575 /* Find the extent of a region in TP that contains one table. If
1576 PIVOT != 0 that means a set of table entries with identical table
1577 number; otherwise they also have to have the same values for every
1578 dimension after the row and column dimensions. The table that is
1579 searched starts at TP and has length CNT. Returns the first entry
1580 after the last one in the table; sets *CNT to the number of
1581 remaining values. If there are no entries in TP at all, returns
1582 NULL. A yucky interface, admittedly, but it works. */
1583 static struct table_entry **
1584 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1586 struct table_entry *fp = *tp;
1591 x = xtab[(*tp)->table];
1599 if ((*tp)->table != fp->table)
1604 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1611 /* Integer mode correspondent to find_pivot_extent_general(). This
1612 could be optimized somewhat, but I just don't give a crap about
1613 CROSSTABS performance in integer mode, which is just a
1614 CROSSTABS wart as far as I'm concerned.
1616 That said, feel free to send optimization patches to me. */
1617 static struct table_entry **
1618 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1620 struct table_entry *fp = *tp;
1625 x = xtab[(*tp)->table];
1633 if ((*tp)->table != fp->table)
1638 if (memcmp (&(*tp)->values[2], &fp->values[2],
1639 sizeof (union value) * (x->nvar - 2)))
1646 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1647 result. WIDTH_ points to an int which is either 0 for a
1648 numeric value or a string width for a string value. */
1650 compare_value (const void *a_, const void *b_, const void *width_)
1652 const union value *a = a_;
1653 const union value *b = b_;
1654 const int *pwidth = width_;
1655 const int width = *pwidth;
1658 return (a->f < b->f) ? -1 : (a->f > b->f);
1660 return strncmp (a->s, b->s, width);
1663 /* Given an array of ENTRY_CNT table_entry structures starting at
1664 ENTRIES, creates a sorted list of the values that the variable
1665 with index VAR_IDX takes on. The values are returned as a
1666 malloc()'darray stored in *VALUES, with the number of values
1667 stored in *VALUE_CNT.
1670 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1671 union value **values, int *value_cnt)
1673 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1675 if (mode == GENERAL)
1677 int width = var_get_width (v);
1680 *values = xnmalloc (entry_cnt, sizeof **values);
1681 for (i = 0; i < entry_cnt; i++)
1682 (*values)[i] = entries[i]->values[var_idx];
1683 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1684 compare_value, &width);
1688 struct var_range *vr = get_var_range (v);
1691 assert (mode == INTEGER);
1692 *values = xnmalloc (vr->count, sizeof **values);
1693 for (i = 0; i < vr->count; i++)
1694 (*values)[i].f = i + vr->min;
1695 *value_cnt = vr->count;
1699 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1700 from V, displayed with print format spec from variable VAR. When
1701 in REPORT missing-value mode, missing values have an M appended. */
1703 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1704 const union value *v, const struct variable *var)
1707 const struct fmt_spec *print = var_get_print_format (var);
1709 const char *label = var_lookup_value_label (var, v);
1712 tab_text (table, c, r, TAB_LEFT, label);
1716 s.string = tab_alloc (table, print->w);
1717 format_short (s.string, print, v);
1718 s.length = strlen (s.string);
1719 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1720 s.string[s.length++] = 'M';
1721 while (s.length && *s.string == ' ')
1726 tab_raw (table, c, r, opt, &s);
1729 /* Draws a line across TABLE at the current row to indicate the most
1730 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1731 that changed, and puts the values that changed into the table. TB
1732 and X must be the corresponding table_entry and crosstab,
1735 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1737 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1739 for (; first_difference >= 2; first_difference--)
1740 table_value_missing (table, nvar - first_difference - 1, 0,
1741 TAB_RIGHT, &tb->values[first_difference],
1742 x->vars[first_difference]);
1745 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1746 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1747 additionally suffixed with a letter `M'. */
1749 format_cell_entry (struct tab_table *table, int c, int r, double value,
1750 char suffix, bool mark_missing)
1752 const struct fmt_spec f = {FMT_F, 10, 1};
1757 s.string = tab_alloc (table, 16);
1759 data_out (&v, &f, s.string);
1760 while (*s.string == ' ')
1766 s.string[s.length++] = suffix;
1768 s.string[s.length++] = 'M';
1770 tab_raw (table, c, r, TAB_RIGHT, &s);
1773 /* Displays the crosstabulation table. */
1775 display_crosstabulation (void)
1780 for (r = 0; r < n_rows; r++)
1781 table_value_missing (table, nvar - 2, r * num_cells,
1782 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1784 tab_text (table, nvar - 2, n_rows * num_cells,
1785 TAB_LEFT, _("Total"));
1787 /* Put in the actual cells. */
1792 tab_offset (table, nvar - 1, -1);
1793 for (r = 0; r < n_rows; r++)
1796 tab_hline (table, TAL_1, -1, n_cols, 0);
1797 for (c = 0; c < n_cols; c++)
1799 bool mark_missing = false;
1800 double expected_value = row_tot[r] * col_tot[c] / W;
1801 if (cmd.miss == CRS_REPORT
1802 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1803 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1805 mark_missing = true;
1806 for (i = 0; i < num_cells; i++)
1817 v = *mp / row_tot[r] * 100.;
1821 v = *mp / col_tot[c] * 100.;
1828 case CRS_CL_EXPECTED:
1831 case CRS_CL_RESIDUAL:
1832 v = *mp - expected_value;
1834 case CRS_CL_SRESIDUAL:
1835 v = (*mp - expected_value) / sqrt (expected_value);
1837 case CRS_CL_ASRESIDUAL:
1838 v = ((*mp - expected_value)
1839 / sqrt (expected_value
1840 * (1. - row_tot[r] / W)
1841 * (1. - col_tot[c] / W)));
1847 format_cell_entry (table, c, i, v, suffix, mark_missing);
1853 tab_offset (table, -1, tab_row (table) + num_cells);
1861 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1862 for (r = 0; r < n_rows; r++)
1864 bool mark_missing = false;
1866 if (cmd.miss == CRS_REPORT
1867 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1868 mark_missing = true;
1870 for (i = 0; i < num_cells; i++)
1885 v = row_tot[r] / W * 100.;
1889 v = row_tot[r] / W * 100.;
1892 case CRS_CL_EXPECTED:
1893 case CRS_CL_RESIDUAL:
1894 case CRS_CL_SRESIDUAL:
1895 case CRS_CL_ASRESIDUAL:
1902 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1903 tab_next_row (table);
1908 /* Column totals, grand total. */
1914 tab_hline (table, TAL_1, -1, n_cols, 0);
1915 for (c = 0; c <= n_cols; c++)
1917 double ct = c < n_cols ? col_tot[c] : W;
1918 bool mark_missing = false;
1921 if (cmd.miss == CRS_REPORT && c < n_cols
1922 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1923 mark_missing = true;
1925 for (i = 0; i < num_cells; i++)
1947 case CRS_CL_EXPECTED:
1948 case CRS_CL_RESIDUAL:
1949 case CRS_CL_SRESIDUAL:
1950 case CRS_CL_ASRESIDUAL:
1956 format_cell_entry (table, c, i, v, suffix, mark_missing);
1961 tab_offset (table, -1, tab_row (table) + last_row);
1964 tab_offset (table, 0, -1);
1967 static void calc_r (double *X, double *Y, double *, double *, double *);
1968 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1970 /* Display chi-square statistics. */
1972 display_chisq (void)
1974 static const char *chisq_stats[N_CHISQ] =
1976 N_("Pearson Chi-Square"),
1977 N_("Likelihood Ratio"),
1978 N_("Fisher's Exact Test"),
1979 N_("Continuity Correction"),
1980 N_("Linear-by-Linear Association"),
1982 double chisq_v[N_CHISQ];
1983 double fisher1, fisher2;
1989 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1991 tab_offset (chisq, nvar - 2, -1);
1993 for (i = 0; i < N_CHISQ; i++)
1995 if ((i != 2 && chisq_v[i] == SYSMIS)
1996 || (i == 2 && fisher1 == SYSMIS))
2000 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2003 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2004 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2005 tab_float (chisq, 3, 0, TAB_RIGHT,
2006 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
2011 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2012 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2014 tab_next_row (chisq);
2017 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2018 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2019 tab_next_row (chisq);
2021 tab_offset (chisq, 0, -1);
2024 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2025 double[N_SYMMETRIC]);
2027 /* Display symmetric measures. */
2029 display_symmetric (void)
2031 static const char *categories[] =
2033 N_("Nominal by Nominal"),
2034 N_("Ordinal by Ordinal"),
2035 N_("Interval by Interval"),
2036 N_("Measure of Agreement"),
2039 static const char *stats[N_SYMMETRIC] =
2043 N_("Contingency Coefficient"),
2044 N_("Kendall's tau-b"),
2045 N_("Kendall's tau-c"),
2047 N_("Spearman Correlation"),
2052 static const int stats_categories[N_SYMMETRIC] =
2054 0, 0, 0, 1, 1, 1, 1, 2, 3,
2058 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2061 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2064 tab_offset (sym, nvar - 2, -1);
2066 for (i = 0; i < N_SYMMETRIC; i++)
2068 if (sym_v[i] == SYSMIS)
2071 if (stats_categories[i] != last_cat)
2073 last_cat = stats_categories[i];
2074 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2077 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2078 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2079 if (sym_ase[i] != SYSMIS)
2080 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2081 if (sym_t[i] != SYSMIS)
2082 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2083 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2087 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2088 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2091 tab_offset (sym, 0, -1);
2094 static int calc_risk (double[], double[], double[], union value *);
2096 /* Display risk estimate. */
2101 double risk_v[3], lower[3], upper[3];
2105 if (!calc_risk (risk_v, upper, lower, c))
2108 tab_offset (risk, nvar - 2, -1);
2110 for (i = 0; i < 3; i++)
2112 if (risk_v[i] == SYSMIS)
2118 if (var_is_numeric (x->vars[COL_VAR]))
2119 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2120 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2122 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2123 var_get_name (x->vars[COL_VAR]),
2124 var_get_width (x->vars[COL_VAR]), c[0].s,
2125 var_get_width (x->vars[COL_VAR]), c[1].s);
2129 if (var_is_numeric (x->vars[ROW_VAR]))
2130 sprintf (buf, _("For cohort %s = %g"),
2131 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2133 sprintf (buf, _("For cohort %s = %.*s"),
2134 var_get_name (x->vars[ROW_VAR]),
2135 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2139 tab_text (risk, 0, 0, TAB_LEFT, buf);
2140 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2141 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2142 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2143 tab_next_row (risk);
2146 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2147 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2148 tab_next_row (risk);
2150 tab_offset (risk, 0, -1);
2153 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2154 double[N_DIRECTIONAL]);
2156 /* Display directional measures. */
2158 display_directional (void)
2160 static const char *categories[] =
2162 N_("Nominal by Nominal"),
2163 N_("Ordinal by Ordinal"),
2164 N_("Nominal by Interval"),
2167 static const char *stats[] =
2170 N_("Goodman and Kruskal tau"),
2171 N_("Uncertainty Coefficient"),
2176 static const char *types[] =
2183 static const int stats_categories[N_DIRECTIONAL] =
2185 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2188 static const int stats_stats[N_DIRECTIONAL] =
2190 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2193 static const int stats_types[N_DIRECTIONAL] =
2195 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2198 static const int *stats_lookup[] =
2205 static const char **stats_names[] =
2217 double direct_v[N_DIRECTIONAL];
2218 double direct_ase[N_DIRECTIONAL];
2219 double direct_t[N_DIRECTIONAL];
2223 if (!calc_directional (direct_v, direct_ase, direct_t))
2226 tab_offset (direct, nvar - 2, -1);
2228 for (i = 0; i < N_DIRECTIONAL; i++)
2230 if (direct_v[i] == SYSMIS)
2236 for (j = 0; j < 3; j++)
2237 if (last[j] != stats_lookup[j][i])
2240 tab_hline (direct, TAL_1, j, 6, 0);
2245 int k = last[j] = stats_lookup[j][i];
2250 string = var_get_name (x->vars[0]);
2252 string = var_get_name (x->vars[1]);
2254 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2255 gettext (stats_names[j][k]), string);
2260 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2261 if (direct_ase[i] != SYSMIS)
2262 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2263 if (direct_t[i] != SYSMIS)
2264 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2265 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2266 tab_next_row (direct);
2269 tab_offset (direct, 0, -1);
2272 /* Statistical calculations. */
2274 /* Returns the value of the gamma (factorial) function for an integer
2277 gamma_int (double x)
2282 for (i = 2; i < x; i++)
2287 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2289 static inline double
2290 Pr (int a, int b, int c, int d)
2292 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2293 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2294 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2295 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2296 / gamma_int (a + b + c + d + 1.));
2299 /* Swap the contents of A and B. */
2301 swap (int *a, int *b)
2308 /* Calculate significance for Fisher's exact test as specified in
2309 _SPSS Statistical Algorithms_, Appendix 5. */
2311 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2315 if (MIN (c, d) < MIN (a, b))
2316 swap (&a, &c), swap (&b, &d);
2317 if (MIN (b, d) < MIN (a, c))
2318 swap (&a, &b), swap (&c, &d);
2322 swap (&a, &b), swap (&c, &d);
2324 swap (&a, &c), swap (&b, &d);
2328 for (x = 0; x <= a; x++)
2329 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2331 *fisher2 = *fisher1;
2332 for (x = 1; x <= b; x++)
2333 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2336 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2337 columns with values COLS and N_ROWS rows with values ROWS. Values
2338 in the matrix sum to W. */
2340 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2341 double *fisher1, double *fisher2)
2345 chisq[0] = chisq[1] = 0.;
2346 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2347 *fisher1 = *fisher2 = SYSMIS;
2349 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2351 if (ns_rows <= 1 || ns_cols <= 1)
2353 chisq[0] = chisq[1] = SYSMIS;
2357 for (r = 0; r < n_rows; r++)
2358 for (c = 0; c < n_cols; c++)
2360 const double expected = row_tot[r] * col_tot[c] / W;
2361 const double freq = mat[n_cols * r + c];
2362 const double residual = freq - expected;
2364 chisq[0] += residual * residual / expected;
2366 chisq[1] += freq * log (expected / freq);
2377 /* Calculate Yates and Fisher exact test. */
2378 if (ns_cols == 2 && ns_rows == 2)
2380 double f11, f12, f21, f22;
2386 for (i = j = 0; i < n_cols; i++)
2387 if (col_tot[i] != 0.)
2396 f11 = mat[nz_cols[0]];
2397 f12 = mat[nz_cols[1]];
2398 f21 = mat[nz_cols[0] + n_cols];
2399 f22 = mat[nz_cols[1] + n_cols];
2404 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2407 chisq[3] = (W * x * x
2408 / (f11 + f12) / (f21 + f22)
2409 / (f11 + f21) / (f12 + f22));
2417 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2418 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2421 /* Calculate Mantel-Haenszel. */
2422 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2424 double r, ase_0, ase_1;
2425 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2427 chisq[4] = (W - 1.) * r * r;
2432 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2433 ASE_1, and ase_0 into ASE_0. The row and column values must be
2434 passed in X and Y. */
2436 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2438 double SX, SY, S, T;
2440 double sum_XYf, sum_X2Y2f;
2441 double sum_Xr, sum_X2r;
2442 double sum_Yc, sum_Y2c;
2445 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2446 for (j = 0; j < n_cols; j++)
2448 double fij = mat[j + i * n_cols];
2449 double product = X[i] * Y[j];
2450 double temp = fij * product;
2452 sum_X2Y2f += temp * product;
2455 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2457 sum_Xr += X[i] * row_tot[i];
2458 sum_X2r += pow2 (X[i]) * row_tot[i];
2462 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2464 sum_Yc += Y[i] * col_tot[i];
2465 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2469 S = sum_XYf - sum_Xr * sum_Yc / W;
2470 SX = sum_X2r - pow2 (sum_Xr) / W;
2471 SY = sum_Y2c - pow2 (sum_Yc) / W;
2474 *ase_0 = sqrt ((sum_X2Y2f - pow2 (sum_XYf) / W) / (sum_X2r * sum_Y2c));
2479 for (s = c = 0., i = 0; i < n_rows; i++)
2480 for (j = 0; j < n_cols; j++)
2482 double Xresid, Yresid;
2485 Xresid = X[i] - Xbar;
2486 Yresid = Y[j] - Ybar;
2487 temp = (T * Xresid * Yresid
2489 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2490 y = mat[j + i * n_cols] * temp * temp - c;
2495 *ase_1 = sqrt (s) / (T * T);
2499 static double somers_d_v[3];
2500 static double somers_d_ase[3];
2501 static double somers_d_t[3];
2503 /* Calculate symmetric statistics and their asymptotic standard
2504 errors. Returns 0 if none could be calculated. */
2506 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2507 double t[N_SYMMETRIC])
2509 int q = MIN (ns_rows, ns_cols);
2518 for (i = 0; i < N_SYMMETRIC; i++)
2519 v[i] = ase[i] = t[i] = SYSMIS;
2522 /* Phi, Cramer's V, contingency coefficient. */
2523 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2525 double Xp = 0.; /* Pearson chi-square. */
2530 for (r = 0; r < n_rows; r++)
2531 for (c = 0; c < n_cols; c++)
2533 const double expected = row_tot[r] * col_tot[c] / W;
2534 const double freq = mat[n_cols * r + c];
2535 const double residual = freq - expected;
2537 Xp += residual * residual / expected;
2541 if (cmd.a_statistics[CRS_ST_PHI])
2543 v[0] = sqrt (Xp / W);
2544 v[1] = sqrt (Xp / (W * (q - 1)));
2546 if (cmd.a_statistics[CRS_ST_CC])
2547 v[2] = sqrt (Xp / (Xp + W));
2550 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2551 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2556 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2563 for (r = 0; r < n_rows; r++)
2564 Dr -= pow2 (row_tot[r]);
2565 for (c = 0; c < n_cols; c++)
2566 Dc -= pow2 (col_tot[c]);
2572 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2573 for (c = 0; c < n_cols; c++)
2577 for (r = 0; r < n_rows; r++)
2578 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2588 for (i = 0; i < n_rows; i++)
2592 for (j = 1; j < n_cols; j++)
2593 Cij += col_tot[j] - cum[j + i * n_cols];
2596 for (j = 1; j < n_cols; j++)
2597 Dij += cum[j + (i - 1) * n_cols];
2601 double fij = mat[j + i * n_cols];
2607 assert (j < n_cols);
2609 Cij -= col_tot[j] - cum[j + i * n_cols];
2610 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2614 Cij += cum[j - 1 + (i - 1) * n_cols];
2615 Dij -= cum[j + (i - 1) * n_cols];
2621 if (cmd.a_statistics[CRS_ST_BTAU])
2622 v[3] = (P - Q) / sqrt (Dr * Dc);
2623 if (cmd.a_statistics[CRS_ST_CTAU])
2624 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2625 if (cmd.a_statistics[CRS_ST_GAMMA])
2626 v[5] = (P - Q) / (P + Q);
2628 /* ASE for tau-b, tau-c, gamma. Calculations could be
2629 eliminated here, at expense of memory. */
2634 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2635 for (i = 0; i < n_rows; i++)
2639 for (j = 1; j < n_cols; j++)
2640 Cij += col_tot[j] - cum[j + i * n_cols];
2643 for (j = 1; j < n_cols; j++)
2644 Dij += cum[j + (i - 1) * n_cols];
2648 double fij = mat[j + i * n_cols];
2650 if (cmd.a_statistics[CRS_ST_BTAU])
2652 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2653 + v[3] * (row_tot[i] * Dc
2654 + col_tot[j] * Dr));
2655 btau_cum += fij * temp * temp;
2659 const double temp = Cij - Dij;
2660 ctau_cum += fij * temp * temp;
2663 if (cmd.a_statistics[CRS_ST_GAMMA])
2665 const double temp = Q * Cij - P * Dij;
2666 gamma_cum += fij * temp * temp;
2669 if (cmd.a_statistics[CRS_ST_D])
2671 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2672 - (P - Q) * (W - row_tot[i]));
2673 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2674 - (Q - P) * (W - col_tot[j]));
2679 assert (j < n_cols);
2681 Cij -= col_tot[j] - cum[j + i * n_cols];
2682 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2686 Cij += cum[j - 1 + (i - 1) * n_cols];
2687 Dij -= cum[j + (i - 1) * n_cols];
2693 btau_var = ((btau_cum
2694 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2696 if (cmd.a_statistics[CRS_ST_BTAU])
2698 ase[3] = sqrt (btau_var);
2699 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2702 if (cmd.a_statistics[CRS_ST_CTAU])
2704 ase[4] = ((2 * q / ((q - 1) * W * W))
2705 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2706 t[4] = v[4] / ase[4];
2708 if (cmd.a_statistics[CRS_ST_GAMMA])
2710 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2711 t[5] = v[5] / (2. / (P + Q)
2712 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2714 if (cmd.a_statistics[CRS_ST_D])
2716 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2717 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2718 somers_d_t[0] = (somers_d_v[0]
2720 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2721 somers_d_v[1] = (P - Q) / Dc;
2722 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2723 somers_d_t[1] = (somers_d_v[1]
2725 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2726 somers_d_v[2] = (P - Q) / Dr;
2727 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2728 somers_d_t[2] = (somers_d_v[2]
2730 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2736 /* Spearman correlation, Pearson's r. */
2737 if (cmd.a_statistics[CRS_ST_CORR])
2739 double *R = xmalloca (sizeof *R * n_rows);
2740 double *C = xmalloca (sizeof *C * n_cols);
2743 double y, t, c = 0., s = 0.;
2748 R[i] = s + (row_tot[i] + 1.) / 2.;
2755 assert (i < n_rows);
2760 double y, t, c = 0., s = 0.;
2765 C[j] = s + (col_tot[j] + 1.) / 2;
2772 assert (j < n_cols);
2776 calc_r (R, C, &v[6], &t[6], &ase[6]);
2782 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2786 /* Cohen's kappa. */
2787 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2789 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2792 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2793 i < ns_rows; i++, j++)
2797 while (col_tot[j] == 0.)
2800 prod = row_tot[i] * col_tot[j];
2801 sum = row_tot[i] + col_tot[j];
2803 sum_fii += mat[j + i * n_cols];
2805 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2806 sum_riciri_ci += prod * sum;
2808 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2809 for (j = 0; j < ns_cols; j++)
2811 double sum = row_tot[i] + col_tot[j];
2812 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2815 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2817 ase[8] = sqrt ((W * W * sum_rici
2818 + sum_rici * sum_rici
2819 - W * sum_riciri_ci)
2820 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2822 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2823 / pow2 (W * W - sum_rici))
2824 + ((2. * (W - sum_fii)
2825 * (2. * sum_fii * sum_rici
2826 - W * sum_fiiri_ci))
2827 / cube (W * W - sum_rici))
2828 + (pow2 (W - sum_fii)
2829 * (W * sum_fijri_ci2 - 4.
2830 * sum_rici * sum_rici)
2831 / pow4 (W * W - sum_rici))));
2833 t[8] = v[8] / ase[8];
2840 /* Calculate risk estimate. */
2842 calc_risk (double *value, double *upper, double *lower, union value *c)
2844 double f11, f12, f21, f22;
2850 for (i = 0; i < 3; i++)
2851 value[i] = upper[i] = lower[i] = SYSMIS;
2854 if (ns_rows != 2 || ns_cols != 2)
2861 for (i = j = 0; i < n_cols; i++)
2862 if (col_tot[i] != 0.)
2871 f11 = mat[nz_cols[0]];
2872 f12 = mat[nz_cols[1]];
2873 f21 = mat[nz_cols[0] + n_cols];
2874 f22 = mat[nz_cols[1] + n_cols];
2876 c[0] = cols[nz_cols[0]];
2877 c[1] = cols[nz_cols[1]];
2880 value[0] = (f11 * f22) / (f12 * f21);
2881 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2882 lower[0] = value[0] * exp (-1.960 * v);
2883 upper[0] = value[0] * exp (1.960 * v);
2885 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2886 v = sqrt ((f12 / (f11 * (f11 + f12)))
2887 + (f22 / (f21 * (f21 + f22))));
2888 lower[1] = value[1] * exp (-1.960 * v);
2889 upper[1] = value[1] * exp (1.960 * v);
2891 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2892 v = sqrt ((f11 / (f12 * (f11 + f12)))
2893 + (f21 / (f22 * (f21 + f22))));
2894 lower[2] = value[2] * exp (-1.960 * v);
2895 upper[2] = value[2] * exp (1.960 * v);
2900 /* Calculate directional measures. */
2902 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2903 double t[N_DIRECTIONAL])
2908 for (i = 0; i < N_DIRECTIONAL; i++)
2909 v[i] = ase[i] = t[i] = SYSMIS;
2913 if (cmd.a_statistics[CRS_ST_LAMBDA])
2915 double *fim = xnmalloc (n_rows, sizeof *fim);
2916 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2917 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2918 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2919 double sum_fim, sum_fmj;
2921 int rm_index, cm_index;
2924 /* Find maximum for each row and their sum. */
2925 for (sum_fim = 0., i = 0; i < n_rows; i++)
2927 double max = mat[i * n_cols];
2930 for (j = 1; j < n_cols; j++)
2931 if (mat[j + i * n_cols] > max)
2933 max = mat[j + i * n_cols];
2937 sum_fim += fim[i] = max;
2938 fim_index[i] = index;
2941 /* Find maximum for each column. */
2942 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2944 double max = mat[j];
2947 for (i = 1; i < n_rows; i++)
2948 if (mat[j + i * n_cols] > max)
2950 max = mat[j + i * n_cols];
2954 sum_fmj += fmj[j] = max;
2955 fmj_index[j] = index;
2958 /* Find maximum row total. */
2961 for (i = 1; i < n_rows; i++)
2962 if (row_tot[i] > rm)
2968 /* Find maximum column total. */
2971 for (j = 1; j < n_cols; j++)
2972 if (col_tot[j] > cm)
2978 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2979 v[1] = (sum_fmj - rm) / (W - rm);
2980 v[2] = (sum_fim - cm) / (W - cm);
2982 /* ASE1 for Y given X. */
2986 for (accum = 0., i = 0; i < n_rows; i++)
2987 for (j = 0; j < n_cols; j++)
2989 const int deltaj = j == cm_index;
2990 accum += (mat[j + i * n_cols]
2991 * pow2 ((j == fim_index[i])
2996 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2999 /* ASE0 for Y given X. */
3003 for (accum = 0., i = 0; i < n_rows; i++)
3004 if (cm_index != fim_index[i])
3005 accum += (mat[i * n_cols + fim_index[i]]
3006 + mat[i * n_cols + cm_index]);
3007 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
3010 /* ASE1 for X given Y. */
3014 for (accum = 0., i = 0; i < n_rows; i++)
3015 for (j = 0; j < n_cols; j++)
3017 const int deltaj = i == rm_index;
3018 accum += (mat[j + i * n_cols]
3019 * pow2 ((i == fmj_index[j])
3024 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3027 /* ASE0 for X given Y. */
3031 for (accum = 0., j = 0; j < n_cols; j++)
3032 if (rm_index != fmj_index[j])
3033 accum += (mat[j + n_cols * fmj_index[j]]
3034 + mat[j + n_cols * rm_index]);
3035 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3038 /* Symmetric ASE0 and ASE1. */
3043 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3044 for (j = 0; j < n_cols; j++)
3046 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3047 int temp1 = (i == rm_index) + (j == cm_index);
3048 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3049 accum1 += (mat[j + i * n_cols]
3050 * pow2 (temp0 + (v[0] - 1.) * temp1));
3052 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3053 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3054 / (2. * W - rm - cm));
3063 double sum_fij2_ri, sum_fij2_ci;
3064 double sum_ri2, sum_cj2;
3066 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3067 for (j = 0; j < n_cols; j++)
3069 double temp = pow2 (mat[j + i * n_cols]);
3070 sum_fij2_ri += temp / row_tot[i];
3071 sum_fij2_ci += temp / col_tot[j];
3074 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3075 sum_ri2 += pow2 (row_tot[i]);
3077 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3078 sum_cj2 += pow2 (col_tot[j]);
3080 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3081 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3085 if (cmd.a_statistics[CRS_ST_UC])
3087 double UX, UY, UXY, P;
3088 double ase1_yx, ase1_xy, ase1_sym;
3091 for (UX = 0., i = 0; i < n_rows; i++)
3092 if (row_tot[i] > 0.)
3093 UX -= row_tot[i] / W * log (row_tot[i] / W);
3095 for (UY = 0., j = 0; j < n_cols; j++)
3096 if (col_tot[j] > 0.)
3097 UY -= col_tot[j] / W * log (col_tot[j] / W);
3099 for (UXY = P = 0., i = 0; i < n_rows; i++)
3100 for (j = 0; j < n_cols; j++)
3102 double entry = mat[j + i * n_cols];
3107 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3108 UXY -= entry / W * log (entry / W);
3111 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3112 for (j = 0; j < n_cols; j++)
3114 double entry = mat[j + i * n_cols];
3119 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3120 + (UX - UXY) * log (col_tot[j] / W));
3121 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3122 + (UY - UXY) * log (row_tot[i] / W));
3123 ase1_sym += entry * pow2 ((UXY
3124 * log (row_tot[i] * col_tot[j] / (W * W)))
3125 - (UX + UY) * log (entry / W));
3128 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3129 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3130 t[5] = v[5] / ((2. / (W * (UX + UY)))
3131 * sqrt (P - pow2 (UX + UY - UXY) / W));
3133 v[6] = (UX + UY - UXY) / UX;
3134 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3135 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3137 v[7] = (UX + UY - UXY) / UY;
3138 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3139 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3143 if (cmd.a_statistics[CRS_ST_D])
3148 calc_symmetric (NULL, NULL, NULL);
3149 for (i = 0; i < 3; i++)
3151 v[8 + i] = somers_d_v[i];
3152 ase[8 + i] = somers_d_ase[i];
3153 t[8 + i] = somers_d_t[i];
3158 if (cmd.a_statistics[CRS_ST_ETA])
3161 double sum_Xr, sum_X2r;
3165 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3167 sum_Xr += rows[i].f * row_tot[i];
3168 sum_X2r += pow2 (rows[i].f) * row_tot[i];
3170 SX = sum_X2r - pow2 (sum_Xr) / W;
3172 for (SXW = 0., j = 0; j < n_cols; j++)
3176 for (cum = 0., i = 0; i < n_rows; i++)
3178 SXW += pow2 (rows[i].f) * mat[j + i * n_cols];
3179 cum += rows[i].f * mat[j + i * n_cols];
3182 SXW -= cum * cum / col_tot[j];
3184 v[11] = sqrt (1. - SXW / SX);
3188 double sum_Yc, sum_Y2c;
3192 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3194 sum_Yc += cols[i].f * col_tot[i];
3195 sum_Y2c += pow2 (cols[i].f) * col_tot[i];
3197 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3199 for (SYW = 0., i = 0; i < n_rows; i++)
3203 for (cum = 0., j = 0; j < n_cols; j++)
3205 SYW += pow2 (cols[j].f) * mat[j + i * n_cols];
3206 cum += cols[j].f * mat[j + i * n_cols];
3209 SYW -= cum * cum / row_tot[i];
3211 v[12] = sqrt (1. - SYW / SY);
3218 /* A wrapper around data_out() that limits string output to short
3219 string width and null terminates the result. */
3221 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3223 struct fmt_spec fmt_subst;
3225 /* Limit to short string width. */
3226 if (fmt_is_string (fp->type))
3230 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3231 if (fmt_subst.type == FMT_A)
3232 fmt_subst.w = MIN (8, fmt_subst.w);
3234 fmt_subst.w = MIN (16, fmt_subst.w);
3240 data_out (v, fp, s);
3242 /* Null terminate. */