1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - Pearson's R (but not Spearman!) is off a little.
20 - T values for Spearman's R and Pearson's R are wrong.
21 - How to calculate significance of symmetric and directional measures?
22 - Asymmetric ASEs and T values for lambda are wrong.
23 - ASE of Goodman and Kruskal's tau is not calculated.
24 - ASE of symmetric somers' d is wrong.
25 - Approx. T of uncertainty coefficient is wrong.
32 #include <gsl/gsl_cdf.h>
36 #include <data/case.h>
37 #include <data/casegrouper.h>
38 #include <data/casereader.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/array.h>
50 #include <libpspp/assertion.h>
51 #include <libpspp/compiler.h>
52 #include <libpspp/hash.h>
53 #include <libpspp/message.h>
54 #include <libpspp/misc.h>
55 #include <libpspp/pool.h>
56 #include <libpspp/str.h>
57 #include <output/output.h>
58 #include <output/table.h>
65 #define _(msgid) gettext (msgid)
66 #define N_(msgid) msgid
74 missing=miss:!table/include/report;
75 +write[wr_]=none,cells,all;
76 +format=fmt:!labels/nolabels/novallabs,
79 tabl:!tables/notables,
82 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
84 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
90 /* Number of chi-square statistics. */
93 /* Number of symmetric statistics. */
96 /* Number of directional statistics. */
97 #define N_DIRECTIONAL 13
99 /* A single table entry for general mode. */
102 int table; /* Flattened table number. */
105 double freq; /* Frequency count. */
106 double *data; /* Crosstabulation table for integer mode. */
109 union value values[1]; /* Values. */
112 /* A crosstabulation. */
115 int nvar; /* Number of variables. */
116 double missing; /* Missing cases count. */
117 int ofs; /* Integer mode: Offset into sorted_tab[]. */
118 const struct variable *vars[2]; /* At least two variables; sorted by
119 larger indices first. */
122 /* Integer mode variable info. */
125 int min; /* Minimum value. */
126 int max; /* Maximum value + 1. */
127 int count; /* max - min. */
130 static inline struct var_range *
131 get_var_range (const struct variable *v)
133 return var_get_aux (v);
136 /* Indexes into crosstab.v. */
143 /* General mode crosstabulation table. */
144 static struct hsh_table *gen_tab; /* Hash table. */
145 static int n_sorted_tab; /* Number of entries in sorted_tab. */
146 static struct table_entry **sorted_tab; /* Sorted table. */
148 /* Variables specifies on VARIABLES. */
149 static const struct variable **variables;
150 static size_t variables_cnt;
153 static struct crosstab **xtab;
156 /* Integer or general mode? */
165 static int num_cells; /* Number of cells requested. */
166 static int cells[8]; /* Cells requested. */
169 static int write_style; /* One of WR_* that specifies the WRITE style. */
171 /* Command parsing info. */
172 static struct cmd_crosstabs cmd;
175 static struct pool *pl_tc; /* For table cells. */
176 static struct pool *pl_col; /* For column data. */
178 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
179 static void precalc (struct casereader *, const struct dataset *);
180 static void calc_general (struct ccase *, const struct dataset *);
181 static void calc_integer (struct ccase *, const struct dataset *);
182 static void postcalc (void);
183 static void submit (struct tab_table *);
185 static void format_short (char *s, const struct fmt_spec *fp,
186 const union value *v);
188 /* Parse and execute CROSSTABS, then clean up. */
190 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
192 int result = internal_cmd_crosstabs (lexer, ds);
196 pool_destroy (pl_tc);
197 pool_destroy (pl_col);
199 for (i = 0; i < nxtab; i++)
206 /* Parses and executes the CROSSTABS procedure. */
208 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
210 struct casegrouper *grouper;
211 struct casereader *input, *group;
219 pl_tc = pool_create ();
220 pl_col = pool_create ();
222 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
225 mode = variables ? INTEGER : GENERAL;
230 cmd.a_cells[CRS_CL_COUNT] = 1;
236 for (i = 0; i < CRS_CL_count; i++)
241 cmd.a_cells[CRS_CL_COUNT] = 1;
242 cmd.a_cells[CRS_CL_ROW] = 1;
243 cmd.a_cells[CRS_CL_COLUMN] = 1;
244 cmd.a_cells[CRS_CL_TOTAL] = 1;
246 if (cmd.a_cells[CRS_CL_ALL])
248 for (i = 0; i < CRS_CL_count; i++)
250 cmd.a_cells[CRS_CL_ALL] = 0;
252 cmd.a_cells[CRS_CL_NONE] = 0;
254 for (num_cells = i = 0; i < CRS_CL_count; i++)
256 cells[num_cells++] = i;
259 if (cmd.sbc_statistics)
264 for (i = 0; i < CRS_ST_count; i++)
265 if (cmd.a_statistics[i])
268 cmd.a_statistics[CRS_ST_CHISQ] = 1;
269 if (cmd.a_statistics[CRS_ST_ALL])
270 for (i = 0; i < CRS_ST_count; i++)
271 cmd.a_statistics[i] = 1;
275 if (cmd.miss == CRS_REPORT && mode == GENERAL)
277 msg (SE, _("Missing mode REPORT not allowed in general mode. "
278 "Assuming MISSING=TABLE."));
279 cmd.miss = CRS_TABLE;
283 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
284 cmd.a_write[CRS_WR_ALL] = 0;
285 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
287 msg (SE, _("Write mode ALL not allowed in general mode. "
288 "Assuming WRITE=CELLS."));
289 cmd.a_write[CRS_WR_CELLS] = 1;
292 && (cmd.a_write[CRS_WR_NONE]
293 + cmd.a_write[CRS_WR_ALL]
294 + cmd.a_write[CRS_WR_CELLS] == 0))
295 cmd.a_write[CRS_WR_CELLS] = 1;
296 if (cmd.a_write[CRS_WR_CELLS])
297 write_style = CRS_WR_CELLS;
298 else if (cmd.a_write[CRS_WR_ALL])
299 write_style = CRS_WR_ALL;
301 write_style = CRS_WR_NONE;
303 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
305 grouper = casegrouper_create_splits (input, dataset_dict (ds));
306 while (casegrouper_get_next_group (grouper, &group))
312 for (; casereader_read (group, &c); case_destroy (&c))
315 calc_general (&c, ds);
317 calc_integer (&c, ds);
319 casereader_destroy (group);
323 ok = casegrouper_destroy (grouper);
324 ok = proc_commit (ds) && ok;
326 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
329 /* Parses the TABLES subcommand. */
331 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
333 struct const_var_set *var_set;
335 const struct variable ***by = NULL;
336 size_t *by_nvar = NULL;
340 /* Ensure that this is a TABLES subcommand. */
341 if (!lex_match_id (lexer, "TABLES")
342 && (lex_token (lexer) != T_ID ||
343 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
344 && lex_token (lexer) != T_ALL)
346 lex_match (lexer, '=');
348 if (variables != NULL)
349 var_set = const_var_set_create_from_array (variables, variables_cnt);
351 var_set = const_var_set_create_from_dict (dataset_dict (ds));
352 assert (var_set != NULL);
356 by = xnrealloc (by, n_by + 1, sizeof *by);
357 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
358 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
359 PV_NO_DUPLICATE | PV_NO_SCRATCH))
361 if (xalloc_oversized (nx, by_nvar[n_by]))
363 msg (SE, _("Too many cross-tabulation variables or dimensions."));
369 if (!lex_match (lexer, T_BY))
373 lex_error (lexer, _("expecting BY"));
382 int *by_iter = xcalloc (n_by, sizeof *by_iter);
385 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
386 for (i = 0; i < nx; i++)
390 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
397 for (i = 0; i < n_by; i++)
398 x->vars[i] = by[i][by_iter[i]];
404 for (i = n_by - 1; i >= 0; i--)
406 if (++by_iter[i] < by_nvar[i])
419 /* All return paths lead here. */
423 for (i = 0; i < n_by; i++)
429 const_var_set_destroy (var_set);
434 /* Parses the VARIABLES subcommand. */
436 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
440 msg (SE, _("VARIABLES must be specified before TABLES."));
444 lex_match (lexer, '=');
448 size_t orig_nv = variables_cnt;
453 if (!parse_variables_const (lexer, dataset_dict (ds),
454 &variables, &variables_cnt,
455 (PV_APPEND | PV_NUMERIC
456 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
459 if (lex_token (lexer) != '(')
461 lex_error (lexer, "expecting `('");
466 if (!lex_force_int (lexer))
468 min = lex_integer (lexer);
471 lex_match (lexer, ',');
473 if (!lex_force_int (lexer))
475 max = lex_integer (lexer);
478 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
484 if (lex_token (lexer) != ')')
486 lex_error (lexer, "expecting `)'");
491 for (i = orig_nv; i < variables_cnt; i++)
493 struct var_range *vr = xmalloc (sizeof *vr);
496 vr->count = max - min + 1;
497 var_attach_aux (variables[i], vr, var_dtor_free);
500 if (lex_token (lexer) == '/')
512 /* Data file processing. */
514 static int compare_table_entry (const void *, const void *, const void *);
515 static unsigned hash_table_entry (const void *, const void *);
517 /* Set up the crosstabulation tables for processing. */
519 precalc (struct casereader *input, const struct dataset *ds)
523 if (casereader_peek (input, 0, &c))
525 output_split_file_values (ds, &c);
531 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
541 for (i = 0; i < nxtab; i++)
543 struct crosstab *x = xtab[i];
548 x->ofs = n_sorted_tab;
550 for (j = 2; j < x->nvar; j++)
551 count *= get_var_range (x->vars[j - 2])->count;
553 sorted_tab = xnrealloc (sorted_tab,
554 n_sorted_tab + count, sizeof *sorted_tab);
555 v = xmalloca (sizeof *v * x->nvar);
556 for (j = 2; j < x->nvar; j++)
557 v[j] = get_var_range (x->vars[j])->min;
558 for (j = 0; j < count; j++)
560 struct table_entry *te;
563 te = sorted_tab[n_sorted_tab++]
564 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
568 int row_cnt = get_var_range (x->vars[0])->count;
569 int col_cnt = get_var_range (x->vars[1])->count;
570 const int mat_size = row_cnt * col_cnt;
573 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
574 for (m = 0; m < mat_size; m++)
578 for (k = 2; k < x->nvar; k++)
579 te->values[k].f = v[k];
580 for (k = 2; k < x->nvar; k++)
582 struct var_range *vr = get_var_range (x->vars[k]);
583 if (++v[k] >= vr->max)
592 sorted_tab = xnrealloc (sorted_tab,
593 n_sorted_tab + 1, sizeof *sorted_tab);
594 sorted_tab[n_sorted_tab] = NULL;
599 /* Form crosstabulations for general mode. */
601 calc_general (struct ccase *c, const struct dataset *ds)
603 /* Missing values to exclude. */
604 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
605 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
609 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
611 /* Flattened current table index. */
614 for (t = 0; t < nxtab; t++)
616 struct crosstab *x = xtab[t];
617 const size_t entry_size = (sizeof (struct table_entry)
618 + sizeof (union value) * (x->nvar - 1));
619 struct table_entry *te = xmalloca (entry_size);
621 /* Construct table entry for the current record and table. */
627 for (j = 0; j < x->nvar; j++)
629 const union value *v = case_data (c, x->vars[j]);
630 if (var_is_value_missing (x->vars[j], v, exclude))
632 x->missing += weight;
636 if (var_is_numeric (x->vars[j]))
637 te->values[j].f = case_num (c, x->vars[j]);
640 size_t n = var_get_width (x->vars[j]);
641 if (n > MAX_SHORT_STRING)
642 n = MAX_SHORT_STRING;
643 memcpy (te->values[j].s, case_str (c, x->vars[j]), n);
645 /* Necessary in order to simplify comparisons. */
646 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
647 sizeof (union value) - n);
652 /* Add record to hash table. */
654 struct table_entry **tepp
655 = (struct table_entry **) hsh_probe (gen_tab, te);
658 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
661 memcpy (tep, te, entry_size);
666 (*tepp)->u.freq += weight;
675 calc_integer (struct ccase *c, const struct dataset *ds)
677 bool bad_warn = true;
680 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
682 /* Flattened current table index. */
685 for (t = 0; t < nxtab; t++)
687 struct crosstab *x = xtab[t];
692 for (i = 0; i < x->nvar; i++)
694 const struct variable *const v = x->vars[i];
695 struct var_range *vr = get_var_range (v);
696 double value = case_num (c, v);
698 /* Note that the first test also rules out SYSMIS. */
699 if ((value < vr->min || value >= vr->max)
700 || (cmd.miss == CRS_TABLE
701 && var_is_num_missing (v, value, MV_USER)))
703 x->missing += weight;
709 ofs += fact * ((int) value - vr->min);
715 const struct variable *row_var = x->vars[ROW_VAR];
716 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
718 const struct variable *col_var = x->vars[COL_VAR];
719 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
721 const int col_dim = get_var_range (col_var)->count;
723 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
730 /* Compare the table_entry's at A and B and return a strcmp()-type
733 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
735 const struct table_entry *a = a_;
736 const struct table_entry *b = b_;
738 if (a->table > b->table)
740 else if (a->table < b->table)
744 const struct crosstab *x = xtab[a->table];
747 for (i = x->nvar - 1; i >= 0; i--)
748 if (var_is_numeric (x->vars[i]))
750 const double diffnum = a->values[i].f - b->values[i].f;
753 else if (diffnum > 0)
758 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
759 var_get_width (x->vars[i]));
768 /* Calculate a hash value from table_entry A. */
770 hash_table_entry (const void *a_, const void *aux UNUSED)
772 const struct table_entry *a = a_;
777 for (i = 0; i < xtab[a->table]->nvar; i++)
778 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
783 /* Post-data reading calculations. */
785 static struct table_entry **find_pivot_extent (struct table_entry **,
786 int *cnt, int pivot);
787 static void enum_var_values (struct table_entry **entries, int entry_cnt,
789 union value **values, int *value_cnt);
790 static void output_pivot_table (struct table_entry **, struct table_entry **,
791 double **, double **, double **,
792 int *, int *, int *);
793 static void make_summary_table (void);
800 n_sorted_tab = hsh_count (gen_tab);
801 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
804 make_summary_table ();
806 /* Identify all the individual crosstabulation tables, and deal with
809 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
810 int pc = n_sorted_tab; /* Pivot count. */
812 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
813 int maxrows = 0, maxcols = 0, maxcells = 0;
817 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
821 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
822 &maxrows, &maxcols, &maxcells);
831 hsh_destroy (gen_tab);
835 for (i = 0; i < n_sorted_tab; i++)
837 free (sorted_tab[i]->u.data);
838 free (sorted_tab[i]);
844 static void insert_summary (struct tab_table *, int tab_index, double valid);
846 /* Output a table summarizing the cases processed. */
848 make_summary_table (void)
850 struct tab_table *summary;
852 struct table_entry **pb = sorted_tab, **pe;
853 int pc = n_sorted_tab;
856 summary = tab_create (7, 3 + nxtab, 1);
857 tab_title (summary, _("Summary."));
858 tab_headers (summary, 1, 0, 3, 0);
859 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
860 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
861 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
862 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
863 tab_hline (summary, TAL_1, 1, 6, 1);
864 tab_hline (summary, TAL_1, 1, 6, 2);
865 tab_vline (summary, TAL_1, 3, 1, 1);
866 tab_vline (summary, TAL_1, 5, 1, 1);
870 for (i = 0; i < 3; i++)
872 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
873 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
876 tab_offset (summary, 0, 3);
882 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
886 while (cur_tab < (*pb)->table)
887 insert_summary (summary, cur_tab++, 0.);
890 for (valid = 0.; pb < pe; pb++)
891 valid += (*pb)->u.freq;
894 const struct crosstab *const x = xtab[(*pb)->table];
895 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
896 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
897 const int count = n_cols * n_rows;
899 for (valid = 0.; pb < pe; pb++)
901 const double *data = (*pb)->u.data;
904 for (i = 0; i < count; i++)
908 insert_summary (summary, cur_tab++, valid);
913 while (cur_tab < nxtab)
914 insert_summary (summary, cur_tab++, 0.);
919 /* Inserts a line into T describing the crosstabulation at index
920 TAB_INDEX, which has VALID valid observations. */
922 insert_summary (struct tab_table *t, int tab_index, double valid)
924 struct crosstab *x = xtab[tab_index];
926 tab_hline (t, TAL_1, 0, 6, 0);
928 /* Crosstabulation name. */
930 char *buf = xmalloca (128 * x->nvar);
934 for (i = 0; i < x->nvar; i++)
937 cp = stpcpy (cp, " * ");
939 cp = stpcpy (cp, var_to_string (x->vars[i]));
941 tab_text (t, 0, 0, TAB_LEFT, buf);
946 /* Counts and percentages. */
956 for (i = 0; i < 3; i++)
958 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
959 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
970 static struct tab_table *table; /* Crosstabulation table. */
971 static struct tab_table *chisq; /* Chi-square table. */
972 static struct tab_table *sym; /* Symmetric measures table. */
973 static struct tab_table *risk; /* Risk estimate table. */
974 static struct tab_table *direct; /* Directional measures table. */
977 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
979 /* Column values, number of columns. */
980 static union value *cols;
983 /* Row values, number of rows. */
984 static union value *rows;
987 /* Number of statistically interesting columns/rows (columns/rows with
989 static int ns_cols, ns_rows;
991 /* Crosstabulation. */
992 static const struct crosstab *x;
994 /* Number of variables from the crosstabulation to consider. This is
995 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
998 /* Matrix contents. */
999 static double *mat; /* Matrix proper. */
1000 static double *row_tot; /* Row totals. */
1001 static double *col_tot; /* Column totals. */
1002 static double W; /* Grand total. */
1004 static void display_dimensions (struct tab_table *, int first_difference,
1005 struct table_entry *);
1006 static void display_crosstabulation (void);
1007 static void display_chisq (void);
1008 static void display_symmetric (void);
1009 static void display_risk (void);
1010 static void display_directional (void);
1011 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1012 static void table_value_missing (struct tab_table *table, int c, int r,
1013 unsigned char opt, const union value *v,
1014 const struct variable *var);
1015 static void delete_missing (void);
1017 /* Output pivot table beginning at PB and continuing until PE,
1018 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1019 hold *MAXROWS entries. */
1021 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1022 double **matp, double **row_totp, double **col_totp,
1023 int *maxrows, int *maxcols, int *maxcells)
1026 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1027 int tc = pe - pb; /* Table count. */
1029 /* Table entry for header comparison. */
1030 struct table_entry *cmp = NULL;
1032 x = xtab[(*pb)->table];
1033 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1035 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1037 /* Crosstabulation table initialization. */
1040 table = tab_create (nvar + n_cols,
1041 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1042 tab_headers (table, nvar - 1, 0, 2, 0);
1044 /* First header line. */
1045 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1046 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1048 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1050 /* Second header line. */
1054 for (i = 2; i < nvar; i++)
1055 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1056 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1057 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1058 var_get_name (x->vars[ROW_VAR]));
1059 for (i = 0; i < n_cols; i++)
1060 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1062 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1065 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1066 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1070 char *title = xmalloca (x->nvar * 64 + 128);
1074 if (cmd.pivot == CRS_PIVOT)
1075 for (i = 0; i < nvar; i++)
1078 cp = stpcpy (cp, " by ");
1079 cp = stpcpy (cp, var_get_name (x->vars[i]));
1083 cp = spprintf (cp, "%s by %s for",
1084 var_get_name (x->vars[0]),
1085 var_get_name (x->vars[1]));
1086 for (i = 2; i < nvar; i++)
1088 char buf[64], *bufp;
1093 cp = stpcpy (cp, var_get_name (x->vars[i]));
1095 format_short (buf, var_get_print_format (x->vars[i]),
1097 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1099 cp = stpcpy (cp, bufp);
1103 cp = stpcpy (cp, " [");
1104 for (i = 0; i < num_cells; i++)
1112 static const struct tuple cell_names[] =
1114 {CRS_CL_COUNT, N_("count")},
1115 {CRS_CL_ROW, N_("row %")},
1116 {CRS_CL_COLUMN, N_("column %")},
1117 {CRS_CL_TOTAL, N_("total %")},
1118 {CRS_CL_EXPECTED, N_("expected")},
1119 {CRS_CL_RESIDUAL, N_("residual")},
1120 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1121 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1125 const struct tuple *t;
1127 for (t = cell_names; t->value != cells[i]; t++)
1128 assert (t->value != -1);
1130 cp = stpcpy (cp, ", ");
1131 cp = stpcpy (cp, gettext (t->name));
1135 tab_title (table, "%s", title);
1139 tab_offset (table, 0, 2);
1144 /* Chi-square table initialization. */
1145 if (cmd.a_statistics[CRS_ST_CHISQ])
1147 chisq = tab_create (6 + (nvar - 2),
1148 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1149 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1151 tab_title (chisq, _("Chi-square tests."));
1153 tab_offset (chisq, nvar - 2, 0);
1154 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1155 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1156 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1157 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1158 _("Asymp. Sig. (2-sided)"));
1159 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1160 _("Exact. Sig. (2-sided)"));
1161 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1162 _("Exact. Sig. (1-sided)"));
1164 tab_offset (chisq, 0, 1);
1169 /* Symmetric measures. */
1170 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1171 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1172 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1173 || cmd.a_statistics[CRS_ST_KAPPA])
1175 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1176 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1177 tab_title (sym, _("Symmetric measures."));
1179 tab_offset (sym, nvar - 2, 0);
1180 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1181 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1182 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1183 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1184 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1185 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1186 tab_offset (sym, 0, 1);
1191 /* Risk estimate. */
1192 if (cmd.a_statistics[CRS_ST_RISK])
1194 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1195 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1196 tab_title (risk, _("Risk estimate."));
1198 tab_offset (risk, nvar - 2, 0);
1199 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1200 _("95%% Confidence Interval"));
1201 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1202 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1203 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1204 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1205 tab_hline (risk, TAL_1, 2, 3, 1);
1206 tab_vline (risk, TAL_1, 2, 0, 1);
1207 tab_offset (risk, 0, 2);
1212 /* Directional measures. */
1213 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1214 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1216 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1217 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1218 tab_title (direct, _("Directional measures."));
1220 tab_offset (direct, nvar - 2, 0);
1221 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1222 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1223 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1224 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1225 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1226 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1227 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1228 tab_offset (direct, 0, 1);
1235 /* Find pivot subtable if applicable. */
1236 te = find_pivot_extent (tb, &tc, 0);
1240 /* Find all the row variable values. */
1241 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1243 /* Allocate memory space for the column and row totals. */
1244 if (n_rows > *maxrows)
1246 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1247 row_tot = *row_totp;
1250 if (n_cols > *maxcols)
1252 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1253 col_tot = *col_totp;
1257 /* Allocate table space for the matrix. */
1258 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1259 tab_realloc (table, -1,
1260 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1261 tab_nr (table) * (pe - pb) / (te - tb)));
1263 if (mode == GENERAL)
1265 /* Allocate memory space for the matrix. */
1266 if (n_cols * n_rows > *maxcells)
1268 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1269 *maxcells = n_cols * n_rows;
1274 /* Build the matrix and calculate column totals. */
1276 union value *cur_col = cols;
1277 union value *cur_row = rows;
1279 double *cp = col_tot;
1280 struct table_entry **p;
1283 for (p = &tb[0]; p < te; p++)
1285 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1289 for (; cur_row < &rows[n_rows]; cur_row++)
1295 mp = &mat[cur_col - cols];
1298 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1305 *cp += *mp = (*p)->u.freq;
1310 /* Zero out the rest of the matrix. */
1311 for (; cur_row < &rows[n_rows]; cur_row++)
1317 if (cur_col < &cols[n_cols])
1319 const int rem_cols = n_cols - (cur_col - cols);
1322 for (c = 0; c < rem_cols; c++)
1324 mp = &mat[cur_col - cols];
1325 for (r = 0; r < n_rows; r++)
1327 for (c = 0; c < rem_cols; c++)
1329 mp += n_cols - rem_cols;
1337 double *tp = col_tot;
1339 assert (mode == INTEGER);
1340 mat = (*tb)->u.data;
1343 /* Calculate column totals. */
1344 for (c = 0; c < n_cols; c++)
1347 double *cp = &mat[c];
1349 for (r = 0; r < n_rows; r++)
1350 cum += cp[r * n_cols];
1358 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1359 ns_cols += *cp != 0.;
1362 /* Calculate row totals. */
1365 double *rp = row_tot;
1368 for (ns_rows = 0, r = n_rows; r--; )
1371 for (c = n_cols; c--; )
1379 /* Calculate grand total. */
1385 if (n_rows < n_cols)
1386 tp = row_tot, n = n_rows;
1388 tp = col_tot, n = n_cols;
1394 /* Find the first variable that differs from the last subtable,
1395 then display the values of the dimensioning variables for
1396 each table that needs it. */
1398 int first_difference = nvar - 1;
1401 for (; ; first_difference--)
1403 assert (first_difference >= 2);
1404 if (memcmp (&cmp->values[first_difference],
1405 &(*tb)->values[first_difference],
1406 sizeof *cmp->values))
1412 display_dimensions (table, first_difference, *tb);
1414 display_dimensions (chisq, first_difference, *tb);
1416 display_dimensions (sym, first_difference, *tb);
1418 display_dimensions (risk, first_difference, *tb);
1420 display_dimensions (direct, first_difference, *tb);
1424 display_crosstabulation ();
1425 if (cmd.miss == CRS_REPORT)
1430 display_symmetric ();
1434 display_directional ();
1445 tab_resize (chisq, 4 + (nvar - 2), -1);
1456 /* Delete missing rows and columns for statistical analysis when
1459 delete_missing (void)
1464 for (r = 0; r < n_rows; r++)
1465 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1469 for (c = 0; c < n_cols; c++)
1470 mat[c + r * n_cols] = 0.;
1478 for (c = 0; c < n_cols; c++)
1479 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1483 for (r = 0; r < n_rows; r++)
1484 mat[c + r * n_cols] = 0.;
1490 /* Prepare table T for submission, and submit it. */
1492 submit (struct tab_table *t)
1499 tab_resize (t, -1, 0);
1500 if (tab_nr (t) == tab_t (t))
1505 tab_offset (t, 0, 0);
1507 for (i = 2; i < nvar; i++)
1508 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1509 var_to_string (x->vars[i]));
1510 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1511 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1513 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1515 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1516 tab_dim (t, crosstabs_dim);
1520 /* Sets the widths of all the columns and heights of all the rows in
1521 table T for driver D. */
1523 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1527 /* Width of a numerical column. */
1528 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1529 if (cmd.miss == CRS_REPORT)
1530 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1532 /* Set width for header columns. */
1538 w = d->width - c * (t->nc - t->l);
1539 for (i = 0; i <= t->nc; i++)
1543 if (w < d->prop_em_width * 8)
1544 w = d->prop_em_width * 8;
1546 if (w > d->prop_em_width * 15)
1547 w = d->prop_em_width * 15;
1549 for (i = 0; i < t->l; i++)
1553 for (i = t->l; i < t->nc; i++)
1556 for (i = 0; i < t->nr; i++)
1557 t->h[i] = tab_natural_height (t, d, i);
1560 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1561 int *cnt, int pivot);
1562 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1563 int *cnt, int pivot);
1565 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1567 static struct table_entry **
1568 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1570 return (mode == GENERAL
1571 ? find_pivot_extent_general (tp, cnt, pivot)
1572 : find_pivot_extent_integer (tp, cnt, pivot));
1575 /* Find the extent of a region in TP that contains one table. If
1576 PIVOT != 0 that means a set of table entries with identical table
1577 number; otherwise they also have to have the same values for every
1578 dimension after the row and column dimensions. The table that is
1579 searched starts at TP and has length CNT. Returns the first entry
1580 after the last one in the table; sets *CNT to the number of
1581 remaining values. If there are no entries in TP at all, returns
1582 NULL. A yucky interface, admittedly, but it works. */
1583 static struct table_entry **
1584 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1586 struct table_entry *fp = *tp;
1591 x = xtab[(*tp)->table];
1599 if ((*tp)->table != fp->table)
1604 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1611 /* Integer mode correspondent to find_pivot_extent_general(). This
1612 could be optimized somewhat, but I just don't give a crap about
1613 CROSSTABS performance in integer mode, which is just a
1614 CROSSTABS wart as far as I'm concerned.
1616 That said, feel free to send optimization patches to me. */
1617 static struct table_entry **
1618 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1620 struct table_entry *fp = *tp;
1625 x = xtab[(*tp)->table];
1633 if ((*tp)->table != fp->table)
1638 if (memcmp (&(*tp)->values[2], &fp->values[2],
1639 sizeof (union value) * (x->nvar - 2)))
1646 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1647 result. WIDTH_ points to an int which is either 0 for a
1648 numeric value or a string width for a string value. */
1650 compare_value (const void *a_, const void *b_, const void *width_)
1652 const union value *a = a_;
1653 const union value *b = b_;
1654 const int *pwidth = width_;
1655 const int width = *pwidth;
1658 return (a->f < b->f) ? -1 : (a->f > b->f);
1660 return strncmp (a->s, b->s, width);
1663 /* Given an array of ENTRY_CNT table_entry structures starting at
1664 ENTRIES, creates a sorted list of the values that the variable
1665 with index VAR_IDX takes on. The values are returned as a
1666 malloc()'darray stored in *VALUES, with the number of values
1667 stored in *VALUE_CNT.
1670 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1671 union value **values, int *value_cnt)
1673 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1675 if (mode == GENERAL)
1677 int width = var_get_width (v);
1680 *values = xnmalloc (entry_cnt, sizeof **values);
1681 for (i = 0; i < entry_cnt; i++)
1682 (*values)[i] = entries[i]->values[var_idx];
1683 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1684 compare_value, &width);
1688 struct var_range *vr = get_var_range (v);
1691 assert (mode == INTEGER);
1692 *values = xnmalloc (vr->count, sizeof **values);
1693 for (i = 0; i < vr->count; i++)
1694 (*values)[i].f = i + vr->min;
1695 *value_cnt = vr->count;
1699 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1700 from V, displayed with print format spec from variable VAR. When
1701 in REPORT missing-value mode, missing values have an M appended. */
1703 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1704 const union value *v, const struct variable *var)
1707 const struct fmt_spec *print = var_get_print_format (var);
1709 const char *label = var_lookup_value_label (var, v);
1712 tab_text (table, c, r, TAB_LEFT, label);
1716 s.string = tab_alloc (table, print->w);
1717 format_short (s.string, print, v);
1718 s.length = strlen (s.string);
1719 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1720 s.string[s.length++] = 'M';
1721 while (s.length && *s.string == ' ')
1726 tab_raw (table, c, r, opt, &s);
1729 /* Draws a line across TABLE at the current row to indicate the most
1730 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1731 that changed, and puts the values that changed into the table. TB
1732 and X must be the corresponding table_entry and crosstab,
1735 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1737 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1739 for (; first_difference >= 2; first_difference--)
1740 table_value_missing (table, nvar - first_difference - 1, 0,
1741 TAB_RIGHT, &tb->values[first_difference],
1742 x->vars[first_difference]);
1745 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1746 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1747 additionally suffixed with a letter `M'. */
1749 format_cell_entry (struct tab_table *table, int c, int r, double value,
1750 char suffix, bool mark_missing)
1752 const struct fmt_spec f = {FMT_F, 10, 1};
1757 s.string = tab_alloc (table, 16);
1759 data_out (&v, &f, s.string);
1760 while (*s.string == ' ')
1766 s.string[s.length++] = suffix;
1768 s.string[s.length++] = 'M';
1770 tab_raw (table, c, r, TAB_RIGHT, &s);
1773 /* Displays the crosstabulation table. */
1775 display_crosstabulation (void)
1780 for (r = 0; r < n_rows; r++)
1781 table_value_missing (table, nvar - 2, r * num_cells,
1782 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1784 tab_text (table, nvar - 2, n_rows * num_cells,
1785 TAB_LEFT, _("Total"));
1787 /* Put in the actual cells. */
1792 tab_offset (table, nvar - 1, -1);
1793 for (r = 0; r < n_rows; r++)
1796 tab_hline (table, TAL_1, -1, n_cols, 0);
1797 for (c = 0; c < n_cols; c++)
1799 bool mark_missing = false;
1800 double expected_value = row_tot[r] * col_tot[c] / W;
1801 if (cmd.miss == CRS_REPORT
1802 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1803 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1805 mark_missing = true;
1806 for (i = 0; i < num_cells; i++)
1817 v = *mp / row_tot[r] * 100.;
1821 v = *mp / col_tot[c] * 100.;
1828 case CRS_CL_EXPECTED:
1831 case CRS_CL_RESIDUAL:
1832 v = *mp - expected_value;
1834 case CRS_CL_SRESIDUAL:
1835 v = (*mp - expected_value) / sqrt (expected_value);
1837 case CRS_CL_ASRESIDUAL:
1838 v = ((*mp - expected_value)
1839 / sqrt (expected_value
1840 * (1. - row_tot[r] / W)
1841 * (1. - col_tot[c] / W)));
1847 format_cell_entry (table, c, i, v, suffix, mark_missing);
1853 tab_offset (table, -1, tab_row (table) + num_cells);
1861 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1862 for (r = 0; r < n_rows; r++)
1865 bool mark_missing = false;
1867 if (cmd.miss == CRS_REPORT
1868 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1869 mark_missing = true;
1871 for (i = 0; i < num_cells; i++)
1885 v = row_tot[r] / W * 100.;
1889 v = row_tot[r] / W * 100.;
1892 case CRS_CL_EXPECTED:
1893 case CRS_CL_RESIDUAL:
1894 case CRS_CL_SRESIDUAL:
1895 case CRS_CL_ASRESIDUAL:
1902 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1903 tab_next_row (table);
1908 /* Column totals, grand total. */
1914 tab_hline (table, TAL_1, -1, n_cols, 0);
1915 for (c = 0; c <= n_cols; c++)
1917 double ct = c < n_cols ? col_tot[c] : W;
1918 bool mark_missing = false;
1922 if (cmd.miss == CRS_REPORT && c < n_cols
1923 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1924 mark_missing = true;
1926 for (i = 0; i < num_cells; i++)
1948 case CRS_CL_EXPECTED:
1949 case CRS_CL_RESIDUAL:
1950 case CRS_CL_SRESIDUAL:
1951 case CRS_CL_ASRESIDUAL:
1957 format_cell_entry (table, c, i, v, suffix, mark_missing);
1962 tab_offset (table, -1, tab_row (table) + last_row);
1965 tab_offset (table, 0, -1);
1968 static void calc_r (double *X, double *Y, double *, double *, double *);
1969 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1971 /* Display chi-square statistics. */
1973 display_chisq (void)
1975 static const char *chisq_stats[N_CHISQ] =
1977 N_("Pearson Chi-Square"),
1978 N_("Likelihood Ratio"),
1979 N_("Fisher's Exact Test"),
1980 N_("Continuity Correction"),
1981 N_("Linear-by-Linear Association"),
1983 double chisq_v[N_CHISQ];
1984 double fisher1, fisher2;
1990 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1992 tab_offset (chisq, nvar - 2, -1);
1994 for (i = 0; i < N_CHISQ; i++)
1996 if ((i != 2 && chisq_v[i] == SYSMIS)
1997 || (i == 2 && fisher1 == SYSMIS))
2001 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2004 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2005 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2006 tab_float (chisq, 3, 0, TAB_RIGHT,
2007 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
2012 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2013 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2015 tab_next_row (chisq);
2018 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2019 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2020 tab_next_row (chisq);
2022 tab_offset (chisq, 0, -1);
2025 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2026 double[N_SYMMETRIC]);
2028 /* Display symmetric measures. */
2030 display_symmetric (void)
2032 static const char *categories[] =
2034 N_("Nominal by Nominal"),
2035 N_("Ordinal by Ordinal"),
2036 N_("Interval by Interval"),
2037 N_("Measure of Agreement"),
2040 static const char *stats[N_SYMMETRIC] =
2044 N_("Contingency Coefficient"),
2045 N_("Kendall's tau-b"),
2046 N_("Kendall's tau-c"),
2048 N_("Spearman Correlation"),
2053 static const int stats_categories[N_SYMMETRIC] =
2055 0, 0, 0, 1, 1, 1, 1, 2, 3,
2059 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2062 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2065 tab_offset (sym, nvar - 2, -1);
2067 for (i = 0; i < N_SYMMETRIC; i++)
2069 if (sym_v[i] == SYSMIS)
2072 if (stats_categories[i] != last_cat)
2074 last_cat = stats_categories[i];
2075 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2078 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2079 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2080 if (sym_ase[i] != SYSMIS)
2081 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2082 if (sym_t[i] != SYSMIS)
2083 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2084 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2088 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2089 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2092 tab_offset (sym, 0, -1);
2095 static int calc_risk (double[], double[], double[], union value *);
2097 /* Display risk estimate. */
2102 double risk_v[3], lower[3], upper[3];
2106 if (!calc_risk (risk_v, upper, lower, c))
2109 tab_offset (risk, nvar - 2, -1);
2111 for (i = 0; i < 3; i++)
2113 if (risk_v[i] == SYSMIS)
2119 if (var_is_numeric (x->vars[COL_VAR]))
2120 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2121 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2123 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2124 var_get_name (x->vars[COL_VAR]),
2125 var_get_width (x->vars[COL_VAR]), c[0].s,
2126 var_get_width (x->vars[COL_VAR]), c[1].s);
2130 if (var_is_numeric (x->vars[ROW_VAR]))
2131 sprintf (buf, _("For cohort %s = %g"),
2132 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2134 sprintf (buf, _("For cohort %s = %.*s"),
2135 var_get_name (x->vars[ROW_VAR]),
2136 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2140 tab_text (risk, 0, 0, TAB_LEFT, buf);
2141 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2142 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2143 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2144 tab_next_row (risk);
2147 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2148 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2149 tab_next_row (risk);
2151 tab_offset (risk, 0, -1);
2154 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2155 double[N_DIRECTIONAL]);
2157 /* Display directional measures. */
2159 display_directional (void)
2161 static const char *categories[] =
2163 N_("Nominal by Nominal"),
2164 N_("Ordinal by Ordinal"),
2165 N_("Nominal by Interval"),
2168 static const char *stats[] =
2171 N_("Goodman and Kruskal tau"),
2172 N_("Uncertainty Coefficient"),
2177 static const char *types[] =
2184 static const int stats_categories[N_DIRECTIONAL] =
2186 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2189 static const int stats_stats[N_DIRECTIONAL] =
2191 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2194 static const int stats_types[N_DIRECTIONAL] =
2196 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2199 static const int *stats_lookup[] =
2206 static const char **stats_names[] =
2218 double direct_v[N_DIRECTIONAL];
2219 double direct_ase[N_DIRECTIONAL];
2220 double direct_t[N_DIRECTIONAL];
2224 if (!calc_directional (direct_v, direct_ase, direct_t))
2227 tab_offset (direct, nvar - 2, -1);
2229 for (i = 0; i < N_DIRECTIONAL; i++)
2231 if (direct_v[i] == SYSMIS)
2237 for (j = 0; j < 3; j++)
2238 if (last[j] != stats_lookup[j][i])
2241 tab_hline (direct, TAL_1, j, 6, 0);
2246 int k = last[j] = stats_lookup[j][i];
2251 string = var_get_name (x->vars[0]);
2253 string = var_get_name (x->vars[1]);
2255 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2256 gettext (stats_names[j][k]), string);
2261 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2262 if (direct_ase[i] != SYSMIS)
2263 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2264 if (direct_t[i] != SYSMIS)
2265 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2266 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2267 tab_next_row (direct);
2270 tab_offset (direct, 0, -1);
2273 /* Statistical calculations. */
2275 /* Returns the value of the gamma (factorial) function for an integer
2278 gamma_int (double x)
2283 for (i = 2; i < x; i++)
2288 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2290 static inline double
2291 Pr (int a, int b, int c, int d)
2293 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2294 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2295 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2296 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2297 / gamma_int (a + b + c + d + 1.));
2300 /* Swap the contents of A and B. */
2302 swap (int *a, int *b)
2309 /* Calculate significance for Fisher's exact test as specified in
2310 _SPSS Statistical Algorithms_, Appendix 5. */
2312 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2316 if (MIN (c, d) < MIN (a, b))
2317 swap (&a, &c), swap (&b, &d);
2318 if (MIN (b, d) < MIN (a, c))
2319 swap (&a, &b), swap (&c, &d);
2323 swap (&a, &b), swap (&c, &d);
2325 swap (&a, &c), swap (&b, &d);
2329 for (x = 0; x <= a; x++)
2330 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2332 *fisher2 = *fisher1;
2333 for (x = 1; x <= b; x++)
2334 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2337 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2338 columns with values COLS and N_ROWS rows with values ROWS. Values
2339 in the matrix sum to W. */
2341 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2342 double *fisher1, double *fisher2)
2346 chisq[0] = chisq[1] = 0.;
2347 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2348 *fisher1 = *fisher2 = SYSMIS;
2350 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2352 if (ns_rows <= 1 || ns_cols <= 1)
2354 chisq[0] = chisq[1] = SYSMIS;
2358 for (r = 0; r < n_rows; r++)
2359 for (c = 0; c < n_cols; c++)
2361 const double expected = row_tot[r] * col_tot[c] / W;
2362 const double freq = mat[n_cols * r + c];
2363 const double residual = freq - expected;
2365 chisq[0] += residual * residual / expected;
2367 chisq[1] += freq * log (expected / freq);
2378 /* Calculate Yates and Fisher exact test. */
2379 if (ns_cols == 2 && ns_rows == 2)
2381 double f11, f12, f21, f22;
2387 for (i = j = 0; i < n_cols; i++)
2388 if (col_tot[i] != 0.)
2397 f11 = mat[nz_cols[0]];
2398 f12 = mat[nz_cols[1]];
2399 f21 = mat[nz_cols[0] + n_cols];
2400 f22 = mat[nz_cols[1] + n_cols];
2405 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2408 chisq[3] = (W * x * x
2409 / (f11 + f12) / (f21 + f22)
2410 / (f11 + f21) / (f12 + f22));
2418 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2419 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2422 /* Calculate Mantel-Haenszel. */
2423 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2425 double r, ase_0, ase_1;
2426 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2428 chisq[4] = (W - 1.) * r * r;
2433 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2434 ASE_1, and ase_0 into ASE_0. The row and column values must be
2435 passed in X and Y. */
2437 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2439 double SX, SY, S, T;
2441 double sum_XYf, sum_X2Y2f;
2442 double sum_Xr, sum_X2r;
2443 double sum_Yc, sum_Y2c;
2446 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2447 for (j = 0; j < n_cols; j++)
2449 double fij = mat[j + i * n_cols];
2450 double product = X[i] * Y[j];
2451 double temp = fij * product;
2453 sum_X2Y2f += temp * product;
2456 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2458 sum_Xr += X[i] * row_tot[i];
2459 sum_X2r += pow2 (X[i]) * row_tot[i];
2463 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2465 sum_Yc += Y[i] * col_tot[i];
2466 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2470 S = sum_XYf - sum_Xr * sum_Yc / W;
2471 SX = sum_X2r - pow2 (sum_Xr) / W;
2472 SY = sum_Y2c - pow2 (sum_Yc) / W;
2475 *ase_0 = sqrt ((sum_X2Y2f - pow2 (sum_XYf) / W) / (sum_X2r * sum_Y2c));
2480 for (s = c = 0., i = 0; i < n_rows; i++)
2481 for (j = 0; j < n_cols; j++)
2483 double Xresid, Yresid;
2486 Xresid = X[i] - Xbar;
2487 Yresid = Y[j] - Ybar;
2488 temp = (T * Xresid * Yresid
2490 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2491 y = mat[j + i * n_cols] * temp * temp - c;
2496 *ase_1 = sqrt (s) / (T * T);
2500 static double somers_d_v[3];
2501 static double somers_d_ase[3];
2502 static double somers_d_t[3];
2504 /* Calculate symmetric statistics and their asymptotic standard
2505 errors. Returns 0 if none could be calculated. */
2507 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2508 double t[N_SYMMETRIC])
2510 int q = MIN (ns_rows, ns_cols);
2519 for (i = 0; i < N_SYMMETRIC; i++)
2520 v[i] = ase[i] = t[i] = SYSMIS;
2523 /* Phi, Cramer's V, contingency coefficient. */
2524 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2526 double Xp = 0.; /* Pearson chi-square. */
2531 for (r = 0; r < n_rows; r++)
2532 for (c = 0; c < n_cols; c++)
2534 const double expected = row_tot[r] * col_tot[c] / W;
2535 const double freq = mat[n_cols * r + c];
2536 const double residual = freq - expected;
2538 Xp += residual * residual / expected;
2542 if (cmd.a_statistics[CRS_ST_PHI])
2544 v[0] = sqrt (Xp / W);
2545 v[1] = sqrt (Xp / (W * (q - 1)));
2547 if (cmd.a_statistics[CRS_ST_CC])
2548 v[2] = sqrt (Xp / (Xp + W));
2551 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2552 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2557 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2564 for (r = 0; r < n_rows; r++)
2565 Dr -= pow2 (row_tot[r]);
2566 for (c = 0; c < n_cols; c++)
2567 Dc -= pow2 (col_tot[c]);
2573 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2574 for (c = 0; c < n_cols; c++)
2578 for (r = 0; r < n_rows; r++)
2579 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2589 for (i = 0; i < n_rows; i++)
2593 for (j = 1; j < n_cols; j++)
2594 Cij += col_tot[j] - cum[j + i * n_cols];
2597 for (j = 1; j < n_cols; j++)
2598 Dij += cum[j + (i - 1) * n_cols];
2602 double fij = mat[j + i * n_cols];
2608 assert (j < n_cols);
2610 Cij -= col_tot[j] - cum[j + i * n_cols];
2611 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2615 Cij += cum[j - 1 + (i - 1) * n_cols];
2616 Dij -= cum[j + (i - 1) * n_cols];
2622 if (cmd.a_statistics[CRS_ST_BTAU])
2623 v[3] = (P - Q) / sqrt (Dr * Dc);
2624 if (cmd.a_statistics[CRS_ST_CTAU])
2625 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2626 if (cmd.a_statistics[CRS_ST_GAMMA])
2627 v[5] = (P - Q) / (P + Q);
2629 /* ASE for tau-b, tau-c, gamma. Calculations could be
2630 eliminated here, at expense of memory. */
2635 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2636 for (i = 0; i < n_rows; i++)
2640 for (j = 1; j < n_cols; j++)
2641 Cij += col_tot[j] - cum[j + i * n_cols];
2644 for (j = 1; j < n_cols; j++)
2645 Dij += cum[j + (i - 1) * n_cols];
2649 double fij = mat[j + i * n_cols];
2651 if (cmd.a_statistics[CRS_ST_BTAU])
2653 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2654 + v[3] * (row_tot[i] * Dc
2655 + col_tot[j] * Dr));
2656 btau_cum += fij * temp * temp;
2660 const double temp = Cij - Dij;
2661 ctau_cum += fij * temp * temp;
2664 if (cmd.a_statistics[CRS_ST_GAMMA])
2666 const double temp = Q * Cij - P * Dij;
2667 gamma_cum += fij * temp * temp;
2670 if (cmd.a_statistics[CRS_ST_D])
2672 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2673 - (P - Q) * (W - row_tot[i]));
2674 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2675 - (Q - P) * (W - col_tot[j]));
2680 assert (j < n_cols);
2682 Cij -= col_tot[j] - cum[j + i * n_cols];
2683 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2687 Cij += cum[j - 1 + (i - 1) * n_cols];
2688 Dij -= cum[j + (i - 1) * n_cols];
2694 btau_var = ((btau_cum
2695 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2697 if (cmd.a_statistics[CRS_ST_BTAU])
2699 ase[3] = sqrt (btau_var);
2700 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2703 if (cmd.a_statistics[CRS_ST_CTAU])
2705 ase[4] = ((2 * q / ((q - 1) * W * W))
2706 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2707 t[4] = v[4] / ase[4];
2709 if (cmd.a_statistics[CRS_ST_GAMMA])
2711 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2712 t[5] = v[5] / (2. / (P + Q)
2713 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2715 if (cmd.a_statistics[CRS_ST_D])
2717 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2718 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2719 somers_d_t[0] = (somers_d_v[0]
2721 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2722 somers_d_v[1] = (P - Q) / Dc;
2723 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2724 somers_d_t[1] = (somers_d_v[1]
2726 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2727 somers_d_v[2] = (P - Q) / Dr;
2728 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2729 somers_d_t[2] = (somers_d_v[2]
2731 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2737 /* Spearman correlation, Pearson's r. */
2738 if (cmd.a_statistics[CRS_ST_CORR])
2740 double *R = xmalloca (sizeof *R * n_rows);
2741 double *C = xmalloca (sizeof *C * n_cols);
2744 double y, t, c = 0., s = 0.;
2749 R[i] = s + (row_tot[i] + 1.) / 2.;
2756 assert (i < n_rows);
2761 double y, t, c = 0., s = 0.;
2766 C[j] = s + (col_tot[j] + 1.) / 2;
2773 assert (j < n_cols);
2777 calc_r (R, C, &v[6], &t[6], &ase[6]);
2783 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2787 /* Cohen's kappa. */
2788 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2790 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2793 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2794 i < ns_rows; i++, j++)
2798 while (col_tot[j] == 0.)
2801 prod = row_tot[i] * col_tot[j];
2802 sum = row_tot[i] + col_tot[j];
2804 sum_fii += mat[j + i * n_cols];
2806 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2807 sum_riciri_ci += prod * sum;
2809 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2810 for (j = 0; j < ns_cols; j++)
2812 double sum = row_tot[i] + col_tot[j];
2813 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2816 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2818 ase[8] = sqrt ((W * W * sum_rici
2819 + sum_rici * sum_rici
2820 - W * sum_riciri_ci)
2821 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2823 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2824 / pow2 (W * W - sum_rici))
2825 + ((2. * (W - sum_fii)
2826 * (2. * sum_fii * sum_rici
2827 - W * sum_fiiri_ci))
2828 / cube (W * W - sum_rici))
2829 + (pow2 (W - sum_fii)
2830 * (W * sum_fijri_ci2 - 4.
2831 * sum_rici * sum_rici)
2832 / pow4 (W * W - sum_rici))));
2834 t[8] = v[8] / ase[8];
2841 /* Calculate risk estimate. */
2843 calc_risk (double *value, double *upper, double *lower, union value *c)
2845 double f11, f12, f21, f22;
2851 for (i = 0; i < 3; i++)
2852 value[i] = upper[i] = lower[i] = SYSMIS;
2855 if (ns_rows != 2 || ns_cols != 2)
2862 for (i = j = 0; i < n_cols; i++)
2863 if (col_tot[i] != 0.)
2872 f11 = mat[nz_cols[0]];
2873 f12 = mat[nz_cols[1]];
2874 f21 = mat[nz_cols[0] + n_cols];
2875 f22 = mat[nz_cols[1] + n_cols];
2877 c[0] = cols[nz_cols[0]];
2878 c[1] = cols[nz_cols[1]];
2881 value[0] = (f11 * f22) / (f12 * f21);
2882 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2883 lower[0] = value[0] * exp (-1.960 * v);
2884 upper[0] = value[0] * exp (1.960 * v);
2886 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2887 v = sqrt ((f12 / (f11 * (f11 + f12)))
2888 + (f22 / (f21 * (f21 + f22))));
2889 lower[1] = value[1] * exp (-1.960 * v);
2890 upper[1] = value[1] * exp (1.960 * v);
2892 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2893 v = sqrt ((f11 / (f12 * (f11 + f12)))
2894 + (f21 / (f22 * (f21 + f22))));
2895 lower[2] = value[2] * exp (-1.960 * v);
2896 upper[2] = value[2] * exp (1.960 * v);
2901 /* Calculate directional measures. */
2903 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2904 double t[N_DIRECTIONAL])
2909 for (i = 0; i < N_DIRECTIONAL; i++)
2910 v[i] = ase[i] = t[i] = SYSMIS;
2914 if (cmd.a_statistics[CRS_ST_LAMBDA])
2916 double *fim = xnmalloc (n_rows, sizeof *fim);
2917 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2918 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2919 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2920 double sum_fim, sum_fmj;
2922 int rm_index, cm_index;
2925 /* Find maximum for each row and their sum. */
2926 for (sum_fim = 0., i = 0; i < n_rows; i++)
2928 double max = mat[i * n_cols];
2931 for (j = 1; j < n_cols; j++)
2932 if (mat[j + i * n_cols] > max)
2934 max = mat[j + i * n_cols];
2938 sum_fim += fim[i] = max;
2939 fim_index[i] = index;
2942 /* Find maximum for each column. */
2943 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2945 double max = mat[j];
2948 for (i = 1; i < n_rows; i++)
2949 if (mat[j + i * n_cols] > max)
2951 max = mat[j + i * n_cols];
2955 sum_fmj += fmj[j] = max;
2956 fmj_index[j] = index;
2959 /* Find maximum row total. */
2962 for (i = 1; i < n_rows; i++)
2963 if (row_tot[i] > rm)
2969 /* Find maximum column total. */
2972 for (j = 1; j < n_cols; j++)
2973 if (col_tot[j] > cm)
2979 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2980 v[1] = (sum_fmj - rm) / (W - rm);
2981 v[2] = (sum_fim - cm) / (W - cm);
2983 /* ASE1 for Y given X. */
2987 for (accum = 0., i = 0; i < n_rows; i++)
2988 for (j = 0; j < n_cols; j++)
2990 const int deltaj = j == cm_index;
2991 accum += (mat[j + i * n_cols]
2992 * pow2 ((j == fim_index[i])
2997 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
3000 /* ASE0 for Y given X. */
3004 for (accum = 0., i = 0; i < n_rows; i++)
3005 if (cm_index != fim_index[i])
3006 accum += (mat[i * n_cols + fim_index[i]]
3007 + mat[i * n_cols + cm_index]);
3008 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
3011 /* ASE1 for X given Y. */
3015 for (accum = 0., i = 0; i < n_rows; i++)
3016 for (j = 0; j < n_cols; j++)
3018 const int deltaj = i == rm_index;
3019 accum += (mat[j + i * n_cols]
3020 * pow2 ((i == fmj_index[j])
3025 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3028 /* ASE0 for X given Y. */
3032 for (accum = 0., j = 0; j < n_cols; j++)
3033 if (rm_index != fmj_index[j])
3034 accum += (mat[j + n_cols * fmj_index[j]]
3035 + mat[j + n_cols * rm_index]);
3036 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3039 /* Symmetric ASE0 and ASE1. */
3044 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3045 for (j = 0; j < n_cols; j++)
3047 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3048 int temp1 = (i == rm_index) + (j == cm_index);
3049 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3050 accum1 += (mat[j + i * n_cols]
3051 * pow2 (temp0 + (v[0] - 1.) * temp1));
3053 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3054 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3055 / (2. * W - rm - cm));
3064 double sum_fij2_ri, sum_fij2_ci;
3065 double sum_ri2, sum_cj2;
3067 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3068 for (j = 0; j < n_cols; j++)
3070 double temp = pow2 (mat[j + i * n_cols]);
3071 sum_fij2_ri += temp / row_tot[i];
3072 sum_fij2_ci += temp / col_tot[j];
3075 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3076 sum_ri2 += pow2 (row_tot[i]);
3078 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3079 sum_cj2 += pow2 (col_tot[j]);
3081 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3082 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3086 if (cmd.a_statistics[CRS_ST_UC])
3088 double UX, UY, UXY, P;
3089 double ase1_yx, ase1_xy, ase1_sym;
3092 for (UX = 0., i = 0; i < n_rows; i++)
3093 if (row_tot[i] > 0.)
3094 UX -= row_tot[i] / W * log (row_tot[i] / W);
3096 for (UY = 0., j = 0; j < n_cols; j++)
3097 if (col_tot[j] > 0.)
3098 UY -= col_tot[j] / W * log (col_tot[j] / W);
3100 for (UXY = P = 0., i = 0; i < n_rows; i++)
3101 for (j = 0; j < n_cols; j++)
3103 double entry = mat[j + i * n_cols];
3108 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3109 UXY -= entry / W * log (entry / W);
3112 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3113 for (j = 0; j < n_cols; j++)
3115 double entry = mat[j + i * n_cols];
3120 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3121 + (UX - UXY) * log (col_tot[j] / W));
3122 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3123 + (UY - UXY) * log (row_tot[i] / W));
3124 ase1_sym += entry * pow2 ((UXY
3125 * log (row_tot[i] * col_tot[j] / (W * W)))
3126 - (UX + UY) * log (entry / W));
3129 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3130 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3131 t[5] = v[5] / ((2. / (W * (UX + UY)))
3132 * sqrt (P - pow2 (UX + UY - UXY) / W));
3134 v[6] = (UX + UY - UXY) / UX;
3135 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3136 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3138 v[7] = (UX + UY - UXY) / UY;
3139 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3140 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3144 if (cmd.a_statistics[CRS_ST_D])
3149 calc_symmetric (NULL, NULL, NULL);
3150 for (i = 0; i < 3; i++)
3152 v[8 + i] = somers_d_v[i];
3153 ase[8 + i] = somers_d_ase[i];
3154 t[8 + i] = somers_d_t[i];
3159 if (cmd.a_statistics[CRS_ST_ETA])
3162 double sum_Xr, sum_X2r;
3166 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3168 sum_Xr += rows[i].f * row_tot[i];
3169 sum_X2r += pow2 (rows[i].f) * row_tot[i];
3171 SX = sum_X2r - pow2 (sum_Xr) / W;
3173 for (SXW = 0., j = 0; j < n_cols; j++)
3177 for (cum = 0., i = 0; i < n_rows; i++)
3179 SXW += pow2 (rows[i].f) * mat[j + i * n_cols];
3180 cum += rows[i].f * mat[j + i * n_cols];
3183 SXW -= cum * cum / col_tot[j];
3185 v[11] = sqrt (1. - SXW / SX);
3189 double sum_Yc, sum_Y2c;
3193 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3195 sum_Yc += cols[i].f * col_tot[i];
3196 sum_Y2c += pow2 (cols[i].f) * col_tot[i];
3198 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3200 for (SYW = 0., i = 0; i < n_rows; i++)
3204 for (cum = 0., j = 0; j < n_cols; j++)
3206 SYW += pow2 (cols[j].f) * mat[j + i * n_cols];
3207 cum += cols[j].f * mat[j + i * n_cols];
3210 SYW -= cum * cum / row_tot[i];
3212 v[12] = sqrt (1. - SYW / SY);
3219 /* A wrapper around data_out() that limits string output to short
3220 string width and null terminates the result. */
3222 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3224 struct fmt_spec fmt_subst;
3226 /* Limit to short string width. */
3227 if (fmt_is_string (fp->type))
3231 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3232 if (fmt_subst.type == FMT_A)
3233 fmt_subst.w = MIN (8, fmt_subst.w);
3235 fmt_subst.w = MIN (16, fmt_subst.w);
3241 data_out (v, fp, s);
3243 /* Null terminate. */