1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - Pearson's R (but not Spearman!) is off a little.
20 - T values for Spearman's R and Pearson's R are wrong.
21 - How to calculate significance of symmetric and directional measures?
22 - Asymmetric ASEs and T values for lambda are wrong.
23 - ASE of Goodman and Kruskal's tau is not calculated.
24 - ASE of symmetric somers' d is wrong.
25 - Approx. T of uncertainty coefficient is wrong.
32 #include <gsl/gsl_cdf.h>
36 #include <data/case.h>
37 #include <data/casegrouper.h>
38 #include <data/casereader.h>
39 #include <data/data-out.h>
40 #include <data/dictionary.h>
41 #include <data/format.h>
42 #include <data/procedure.h>
43 #include <data/value-labels.h>
44 #include <data/variable.h>
45 #include <language/command.h>
46 #include <language/dictionary/split-file.h>
47 #include <language/lexer/lexer.h>
48 #include <language/lexer/variable-parser.h>
49 #include <libpspp/array.h>
50 #include <libpspp/assertion.h>
51 #include <libpspp/compiler.h>
52 #include <libpspp/hash.h>
53 #include <libpspp/message.h>
54 #include <libpspp/misc.h>
55 #include <libpspp/pool.h>
56 #include <libpspp/str.h>
57 #include <output/output.h>
58 #include <output/table.h>
65 #define _(msgid) gettext (msgid)
66 #define N_(msgid) msgid
74 missing=miss:!table/include/report;
75 +write[wr_]=none,cells,all;
76 +format=fmt:!labels/nolabels/novallabs,
79 tabl:!tables/notables,
82 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
84 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
90 /* Number of chi-square statistics. */
93 /* Number of symmetric statistics. */
96 /* Number of directional statistics. */
97 #define N_DIRECTIONAL 13
99 /* A single table entry for general mode. */
102 int table; /* Flattened table number. */
105 double freq; /* Frequency count. */
106 double *data; /* Crosstabulation table for integer mode. */
109 union value values[1]; /* Values. */
112 /* A crosstabulation. */
115 int nvar; /* Number of variables. */
116 double missing; /* Missing cases count. */
117 int ofs; /* Integer mode: Offset into sorted_tab[]. */
118 const struct variable *vars[2]; /* At least two variables; sorted by
119 larger indices first. */
122 /* Integer mode variable info. */
125 int min; /* Minimum value. */
126 int max; /* Maximum value + 1. */
127 int count; /* max - min. */
130 static inline struct var_range *
131 get_var_range (const struct variable *v)
133 return var_get_aux (v);
136 /* Indexes into crosstab.v. */
143 /* General mode crosstabulation table. */
144 static struct hsh_table *gen_tab; /* Hash table. */
145 static int n_sorted_tab; /* Number of entries in sorted_tab. */
146 static struct table_entry **sorted_tab; /* Sorted table. */
148 /* Variables specifies on VARIABLES. */
149 static const struct variable **variables;
150 static size_t variables_cnt;
153 static struct crosstab **xtab;
156 /* Integer or general mode? */
165 static int num_cells; /* Number of cells requested. */
166 static int cells[8]; /* Cells requested. */
169 static int write_style; /* One of WR_* that specifies the WRITE style. */
171 /* Command parsing info. */
172 static struct cmd_crosstabs cmd;
175 static struct pool *pl_tc; /* For table cells. */
176 static struct pool *pl_col; /* For column data. */
178 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
179 static void precalc (struct casereader *, const struct dataset *);
180 static void calc_general (struct ccase *, const struct dataset *);
181 static void calc_integer (struct ccase *, const struct dataset *);
182 static void postcalc (void);
183 static void submit (struct tab_table *);
185 static void format_short (char *s, const struct fmt_spec *fp,
186 const union value *v);
188 /* Parse and execute CROSSTABS, then clean up. */
190 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
192 int result = internal_cmd_crosstabs (lexer, ds);
196 pool_destroy (pl_tc);
197 pool_destroy (pl_col);
199 for (i = 0; i < nxtab; i++)
206 /* Parses and executes the CROSSTABS procedure. */
208 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
210 struct casegrouper *grouper;
211 struct casereader *input, *group;
219 pl_tc = pool_create ();
220 pl_col = pool_create ();
222 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
225 mode = variables ? INTEGER : GENERAL;
230 cmd.a_cells[CRS_CL_COUNT] = 1;
236 for (i = 0; i < CRS_CL_count; i++)
241 cmd.a_cells[CRS_CL_COUNT] = 1;
242 cmd.a_cells[CRS_CL_ROW] = 1;
243 cmd.a_cells[CRS_CL_COLUMN] = 1;
244 cmd.a_cells[CRS_CL_TOTAL] = 1;
246 if (cmd.a_cells[CRS_CL_ALL])
248 for (i = 0; i < CRS_CL_count; i++)
250 cmd.a_cells[CRS_CL_ALL] = 0;
252 cmd.a_cells[CRS_CL_NONE] = 0;
254 for (num_cells = i = 0; i < CRS_CL_count; i++)
256 cells[num_cells++] = i;
259 if (cmd.sbc_statistics)
264 for (i = 0; i < CRS_ST_count; i++)
265 if (cmd.a_statistics[i])
268 cmd.a_statistics[CRS_ST_CHISQ] = 1;
269 if (cmd.a_statistics[CRS_ST_ALL])
270 for (i = 0; i < CRS_ST_count; i++)
271 cmd.a_statistics[i] = 1;
275 if (cmd.miss == CRS_REPORT && mode == GENERAL)
277 msg (SE, _("Missing mode REPORT not allowed in general mode. "
278 "Assuming MISSING=TABLE."));
279 cmd.miss = CRS_TABLE;
283 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
284 cmd.a_write[CRS_WR_ALL] = 0;
285 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
287 msg (SE, _("Write mode ALL not allowed in general mode. "
288 "Assuming WRITE=CELLS."));
289 cmd.a_write[CRS_WR_CELLS] = 1;
292 && (cmd.a_write[CRS_WR_NONE]
293 + cmd.a_write[CRS_WR_ALL]
294 + cmd.a_write[CRS_WR_CELLS] == 0))
295 cmd.a_write[CRS_WR_CELLS] = 1;
296 if (cmd.a_write[CRS_WR_CELLS])
297 write_style = CRS_WR_CELLS;
298 else if (cmd.a_write[CRS_WR_ALL])
299 write_style = CRS_WR_ALL;
301 write_style = CRS_WR_NONE;
303 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
305 grouper = casegrouper_create_splits (input, dataset_dict (ds));
306 while (casegrouper_get_next_group (grouper, &group))
312 for (; casereader_read (group, &c); case_destroy (&c))
315 calc_general (&c, ds);
317 calc_integer (&c, ds);
319 casereader_destroy (group);
323 ok = casegrouper_destroy (grouper);
324 ok = proc_commit (ds) && ok;
326 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
329 /* Parses the TABLES subcommand. */
331 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
333 struct const_var_set *var_set;
335 const struct variable ***by = NULL;
336 size_t *by_nvar = NULL;
340 /* Ensure that this is a TABLES subcommand. */
341 if (!lex_match_id (lexer, "TABLES")
342 && (lex_token (lexer) != T_ID ||
343 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
344 && lex_token (lexer) != T_ALL)
346 lex_match (lexer, '=');
348 if (variables != NULL)
349 var_set = const_var_set_create_from_array (variables, variables_cnt);
351 var_set = const_var_set_create_from_dict (dataset_dict (ds));
352 assert (var_set != NULL);
356 by = xnrealloc (by, n_by + 1, sizeof *by);
357 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
358 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
359 PV_NO_DUPLICATE | PV_NO_SCRATCH))
361 if (xalloc_oversized (nx, by_nvar[n_by]))
363 msg (SE, _("Too many cross-tabulation variables or dimensions."));
369 if (!lex_match (lexer, T_BY))
373 lex_error (lexer, _("expecting BY"));
382 int *by_iter = xcalloc (n_by, sizeof *by_iter);
385 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
386 for (i = 0; i < nx; i++)
390 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
397 for (i = 0; i < n_by; i++)
398 x->vars[i] = by[i][by_iter[i]];
404 for (i = n_by - 1; i >= 0; i--)
406 if (++by_iter[i] < by_nvar[i])
419 /* All return paths lead here. */
423 for (i = 0; i < n_by; i++)
429 const_var_set_destroy (var_set);
434 /* Parses the VARIABLES subcommand. */
436 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
440 msg (SE, _("VARIABLES must be specified before TABLES."));
444 lex_match (lexer, '=');
448 size_t orig_nv = variables_cnt;
453 if (!parse_variables_const (lexer, dataset_dict (ds),
454 &variables, &variables_cnt,
455 (PV_APPEND | PV_NUMERIC
456 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
459 if (lex_token (lexer) != '(')
461 lex_error (lexer, "expecting `('");
466 if (!lex_force_int (lexer))
468 min = lex_integer (lexer);
471 lex_match (lexer, ',');
473 if (!lex_force_int (lexer))
475 max = lex_integer (lexer);
478 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
484 if (lex_token (lexer) != ')')
486 lex_error (lexer, "expecting `)'");
491 for (i = orig_nv; i < variables_cnt; i++)
493 struct var_range *vr = xmalloc (sizeof *vr);
496 vr->count = max - min + 1;
497 var_attach_aux (variables[i], vr, var_dtor_free);
500 if (lex_token (lexer) == '/')
512 /* Data file processing. */
514 static int compare_table_entry (const void *, const void *, const void *);
515 static unsigned hash_table_entry (const void *, const void *);
517 /* Set up the crosstabulation tables for processing. */
519 precalc (struct casereader *input, const struct dataset *ds)
523 if (!casereader_peek (input, 0, &c))
525 output_split_file_values (ds, &c);
530 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
540 for (i = 0; i < nxtab; i++)
542 struct crosstab *x = xtab[i];
547 x->ofs = n_sorted_tab;
549 for (j = 2; j < x->nvar; j++)
550 count *= get_var_range (x->vars[j - 2])->count;
552 sorted_tab = xnrealloc (sorted_tab,
553 n_sorted_tab + count, sizeof *sorted_tab);
554 v = xmalloca (sizeof *v * x->nvar);
555 for (j = 2; j < x->nvar; j++)
556 v[j] = get_var_range (x->vars[j])->min;
557 for (j = 0; j < count; j++)
559 struct table_entry *te;
562 te = sorted_tab[n_sorted_tab++]
563 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
567 int row_cnt = get_var_range (x->vars[0])->count;
568 int col_cnt = get_var_range (x->vars[1])->count;
569 const int mat_size = row_cnt * col_cnt;
572 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
573 for (m = 0; m < mat_size; m++)
577 for (k = 2; k < x->nvar; k++)
578 te->values[k].f = v[k];
579 for (k = 2; k < x->nvar; k++)
581 struct var_range *vr = get_var_range (x->vars[k]);
582 if (++v[k] >= vr->max)
591 sorted_tab = xnrealloc (sorted_tab,
592 n_sorted_tab + 1, sizeof *sorted_tab);
593 sorted_tab[n_sorted_tab] = NULL;
598 /* Form crosstabulations for general mode. */
600 calc_general (struct ccase *c, const struct dataset *ds)
602 /* Missing values to exclude. */
603 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
604 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
608 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
610 /* Flattened current table index. */
613 for (t = 0; t < nxtab; t++)
615 struct crosstab *x = xtab[t];
616 const size_t entry_size = (sizeof (struct table_entry)
617 + sizeof (union value) * (x->nvar - 1));
618 struct table_entry *te = xmalloca (entry_size);
620 /* Construct table entry for the current record and table. */
626 for (j = 0; j < x->nvar; j++)
628 const union value *v = case_data (c, x->vars[j]);
629 if (var_is_value_missing (x->vars[j], v, exclude))
631 x->missing += weight;
635 if (var_is_numeric (x->vars[j]))
636 te->values[j].f = case_num (c, x->vars[j]);
639 memcpy (te->values[j].s, case_str (c, x->vars[j]),
640 var_get_width (x->vars[j]));
642 /* Necessary in order to simplify comparisons. */
643 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
644 sizeof (union value) - var_get_width (x->vars[j]));
649 /* Add record to hash table. */
651 struct table_entry **tepp
652 = (struct table_entry **) hsh_probe (gen_tab, te);
655 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
658 memcpy (tep, te, entry_size);
663 (*tepp)->u.freq += weight;
672 calc_integer (struct ccase *c, const struct dataset *ds)
674 bool bad_warn = true;
677 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
679 /* Flattened current table index. */
682 for (t = 0; t < nxtab; t++)
684 struct crosstab *x = xtab[t];
689 for (i = 0; i < x->nvar; i++)
691 const struct variable *const v = x->vars[i];
692 struct var_range *vr = get_var_range (v);
693 double value = case_num (c, v);
695 /* Note that the first test also rules out SYSMIS. */
696 if ((value < vr->min || value >= vr->max)
697 || (cmd.miss == CRS_TABLE
698 && var_is_num_missing (v, value, MV_USER)))
700 x->missing += weight;
706 ofs += fact * ((int) value - vr->min);
712 const struct variable *row_var = x->vars[ROW_VAR];
713 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
715 const struct variable *col_var = x->vars[COL_VAR];
716 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
718 const int col_dim = get_var_range (col_var)->count;
720 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
727 /* Compare the table_entry's at A and B and return a strcmp()-type
730 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
732 const struct table_entry *a = a_;
733 const struct table_entry *b = b_;
735 if (a->table > b->table)
737 else if (a->table < b->table)
741 const struct crosstab *x = xtab[a->table];
744 for (i = x->nvar - 1; i >= 0; i--)
745 if (var_is_numeric (x->vars[i]))
747 const double diffnum = a->values[i].f - b->values[i].f;
750 else if (diffnum > 0)
755 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
756 var_get_width (x->vars[i]));
765 /* Calculate a hash value from table_entry A. */
767 hash_table_entry (const void *a_, const void *aux UNUSED)
769 const struct table_entry *a = a_;
774 for (i = 0; i < xtab[a->table]->nvar; i++)
775 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
780 /* Post-data reading calculations. */
782 static struct table_entry **find_pivot_extent (struct table_entry **,
783 int *cnt, int pivot);
784 static void enum_var_values (struct table_entry **entries, int entry_cnt,
786 union value **values, int *value_cnt);
787 static void output_pivot_table (struct table_entry **, struct table_entry **,
788 double **, double **, double **,
789 int *, int *, int *);
790 static void make_summary_table (void);
797 n_sorted_tab = hsh_count (gen_tab);
798 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
801 make_summary_table ();
803 /* Identify all the individual crosstabulation tables, and deal with
806 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
807 int pc = n_sorted_tab; /* Pivot count. */
809 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
810 int maxrows = 0, maxcols = 0, maxcells = 0;
814 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
818 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
819 &maxrows, &maxcols, &maxcells);
828 hsh_destroy (gen_tab);
832 for (i = 0; i < n_sorted_tab; i++)
834 free (sorted_tab[i]->u.data);
835 free (sorted_tab[i]);
841 static void insert_summary (struct tab_table *, int tab_index, double valid);
843 /* Output a table summarizing the cases processed. */
845 make_summary_table (void)
847 struct tab_table *summary;
849 struct table_entry **pb = sorted_tab, **pe;
850 int pc = n_sorted_tab;
853 summary = tab_create (7, 3 + nxtab, 1);
854 tab_title (summary, _("Summary."));
855 tab_headers (summary, 1, 0, 3, 0);
856 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
857 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
858 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
859 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
860 tab_hline (summary, TAL_1, 1, 6, 1);
861 tab_hline (summary, TAL_1, 1, 6, 2);
862 tab_vline (summary, TAL_1, 3, 1, 1);
863 tab_vline (summary, TAL_1, 5, 1, 1);
867 for (i = 0; i < 3; i++)
869 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
870 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
873 tab_offset (summary, 0, 3);
879 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
883 while (cur_tab < (*pb)->table)
884 insert_summary (summary, cur_tab++, 0.);
887 for (valid = 0.; pb < pe; pb++)
888 valid += (*pb)->u.freq;
891 const struct crosstab *const x = xtab[(*pb)->table];
892 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
893 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
894 const int count = n_cols * n_rows;
896 for (valid = 0.; pb < pe; pb++)
898 const double *data = (*pb)->u.data;
901 for (i = 0; i < count; i++)
905 insert_summary (summary, cur_tab++, valid);
910 while (cur_tab < nxtab)
911 insert_summary (summary, cur_tab++, 0.);
916 /* Inserts a line into T describing the crosstabulation at index
917 TAB_INDEX, which has VALID valid observations. */
919 insert_summary (struct tab_table *t, int tab_index, double valid)
921 struct crosstab *x = xtab[tab_index];
923 tab_hline (t, TAL_1, 0, 6, 0);
925 /* Crosstabulation name. */
927 char *buf = xmalloca (128 * x->nvar);
931 for (i = 0; i < x->nvar; i++)
934 cp = stpcpy (cp, " * ");
936 cp = stpcpy (cp, var_to_string (x->vars[i]));
938 tab_text (t, 0, 0, TAB_LEFT, buf);
943 /* Counts and percentages. */
953 for (i = 0; i < 3; i++)
955 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
956 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
967 static struct tab_table *table; /* Crosstabulation table. */
968 static struct tab_table *chisq; /* Chi-square table. */
969 static struct tab_table *sym; /* Symmetric measures table. */
970 static struct tab_table *risk; /* Risk estimate table. */
971 static struct tab_table *direct; /* Directional measures table. */
974 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
976 /* Column values, number of columns. */
977 static union value *cols;
980 /* Row values, number of rows. */
981 static union value *rows;
984 /* Number of statistically interesting columns/rows (columns/rows with
986 static int ns_cols, ns_rows;
988 /* Crosstabulation. */
989 static const struct crosstab *x;
991 /* Number of variables from the crosstabulation to consider. This is
992 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
995 /* Matrix contents. */
996 static double *mat; /* Matrix proper. */
997 static double *row_tot; /* Row totals. */
998 static double *col_tot; /* Column totals. */
999 static double W; /* Grand total. */
1001 static void display_dimensions (struct tab_table *, int first_difference,
1002 struct table_entry *);
1003 static void display_crosstabulation (void);
1004 static void display_chisq (void);
1005 static void display_symmetric (void);
1006 static void display_risk (void);
1007 static void display_directional (void);
1008 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
1009 static void table_value_missing (struct tab_table *table, int c, int r,
1010 unsigned char opt, const union value *v,
1011 const struct variable *var);
1012 static void delete_missing (void);
1014 /* Output pivot table beginning at PB and continuing until PE,
1015 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1016 hold *MAXROWS entries. */
1018 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1019 double **matp, double **row_totp, double **col_totp,
1020 int *maxrows, int *maxcols, int *maxcells)
1023 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1024 int tc = pe - pb; /* Table count. */
1026 /* Table entry for header comparison. */
1027 struct table_entry *cmp = NULL;
1029 x = xtab[(*pb)->table];
1030 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1032 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1034 /* Crosstabulation table initialization. */
1037 table = tab_create (nvar + n_cols,
1038 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1039 tab_headers (table, nvar - 1, 0, 2, 0);
1041 /* First header line. */
1042 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1043 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1045 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1047 /* Second header line. */
1051 for (i = 2; i < nvar; i++)
1052 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1053 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1054 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1055 var_get_name (x->vars[ROW_VAR]));
1056 for (i = 0; i < n_cols; i++)
1057 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1059 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1062 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1063 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1067 char *title = xmalloca (x->nvar * 64 + 128);
1071 if (cmd.pivot == CRS_PIVOT)
1072 for (i = 0; i < nvar; i++)
1075 cp = stpcpy (cp, " by ");
1076 cp = stpcpy (cp, var_get_name (x->vars[i]));
1080 cp = spprintf (cp, "%s by %s for",
1081 var_get_name (x->vars[0]),
1082 var_get_name (x->vars[1]));
1083 for (i = 2; i < nvar; i++)
1085 char buf[64], *bufp;
1090 cp = stpcpy (cp, var_get_name (x->vars[i]));
1092 format_short (buf, var_get_print_format (x->vars[i]),
1094 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1096 cp = stpcpy (cp, bufp);
1100 cp = stpcpy (cp, " [");
1101 for (i = 0; i < num_cells; i++)
1109 static const struct tuple cell_names[] =
1111 {CRS_CL_COUNT, N_("count")},
1112 {CRS_CL_ROW, N_("row %")},
1113 {CRS_CL_COLUMN, N_("column %")},
1114 {CRS_CL_TOTAL, N_("total %")},
1115 {CRS_CL_EXPECTED, N_("expected")},
1116 {CRS_CL_RESIDUAL, N_("residual")},
1117 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1118 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1122 const struct tuple *t;
1124 for (t = cell_names; t->value != cells[i]; t++)
1125 assert (t->value != -1);
1127 cp = stpcpy (cp, ", ");
1128 cp = stpcpy (cp, gettext (t->name));
1132 tab_title (table, "%s", title);
1136 tab_offset (table, 0, 2);
1141 /* Chi-square table initialization. */
1142 if (cmd.a_statistics[CRS_ST_CHISQ])
1144 chisq = tab_create (6 + (nvar - 2),
1145 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1146 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1148 tab_title (chisq, _("Chi-square tests."));
1150 tab_offset (chisq, nvar - 2, 0);
1151 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1152 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1153 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1154 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1155 _("Asymp. Sig. (2-sided)"));
1156 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1157 _("Exact. Sig. (2-sided)"));
1158 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1159 _("Exact. Sig. (1-sided)"));
1161 tab_offset (chisq, 0, 1);
1166 /* Symmetric measures. */
1167 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1168 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1169 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1170 || cmd.a_statistics[CRS_ST_KAPPA])
1172 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1173 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1174 tab_title (sym, _("Symmetric measures."));
1176 tab_offset (sym, nvar - 2, 0);
1177 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1178 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1179 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1180 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1181 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1182 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1183 tab_offset (sym, 0, 1);
1188 /* Risk estimate. */
1189 if (cmd.a_statistics[CRS_ST_RISK])
1191 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1192 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1193 tab_title (risk, _("Risk estimate."));
1195 tab_offset (risk, nvar - 2, 0);
1196 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1197 _("95%% Confidence Interval"));
1198 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1199 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1200 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1201 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1202 tab_hline (risk, TAL_1, 2, 3, 1);
1203 tab_vline (risk, TAL_1, 2, 0, 1);
1204 tab_offset (risk, 0, 2);
1209 /* Directional measures. */
1210 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1211 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1213 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1214 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1215 tab_title (direct, _("Directional measures."));
1217 tab_offset (direct, nvar - 2, 0);
1218 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1219 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1220 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1221 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1222 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1223 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1224 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1225 tab_offset (direct, 0, 1);
1232 /* Find pivot subtable if applicable. */
1233 te = find_pivot_extent (tb, &tc, 0);
1237 /* Find all the row variable values. */
1238 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1240 /* Allocate memory space for the column and row totals. */
1241 if (n_rows > *maxrows)
1243 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1244 row_tot = *row_totp;
1247 if (n_cols > *maxcols)
1249 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1250 col_tot = *col_totp;
1254 /* Allocate table space for the matrix. */
1255 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1256 tab_realloc (table, -1,
1257 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1258 tab_nr (table) * (pe - pb) / (te - tb)));
1260 if (mode == GENERAL)
1262 /* Allocate memory space for the matrix. */
1263 if (n_cols * n_rows > *maxcells)
1265 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1266 *maxcells = n_cols * n_rows;
1271 /* Build the matrix and calculate column totals. */
1273 union value *cur_col = cols;
1274 union value *cur_row = rows;
1276 double *cp = col_tot;
1277 struct table_entry **p;
1280 for (p = &tb[0]; p < te; p++)
1282 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1286 for (; cur_row < &rows[n_rows]; cur_row++)
1292 mp = &mat[cur_col - cols];
1295 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1302 *cp += *mp = (*p)->u.freq;
1307 /* Zero out the rest of the matrix. */
1308 for (; cur_row < &rows[n_rows]; cur_row++)
1314 if (cur_col < &cols[n_cols])
1316 const int rem_cols = n_cols - (cur_col - cols);
1319 for (c = 0; c < rem_cols; c++)
1321 mp = &mat[cur_col - cols];
1322 for (r = 0; r < n_rows; r++)
1324 for (c = 0; c < rem_cols; c++)
1326 mp += n_cols - rem_cols;
1334 double *tp = col_tot;
1336 assert (mode == INTEGER);
1337 mat = (*tb)->u.data;
1340 /* Calculate column totals. */
1341 for (c = 0; c < n_cols; c++)
1344 double *cp = &mat[c];
1346 for (r = 0; r < n_rows; r++)
1347 cum += cp[r * n_cols];
1355 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1356 ns_cols += *cp != 0.;
1359 /* Calculate row totals. */
1362 double *rp = row_tot;
1365 for (ns_rows = 0, r = n_rows; r--; )
1368 for (c = n_cols; c--; )
1376 /* Calculate grand total. */
1382 if (n_rows < n_cols)
1383 tp = row_tot, n = n_rows;
1385 tp = col_tot, n = n_cols;
1391 /* Find the first variable that differs from the last subtable,
1392 then display the values of the dimensioning variables for
1393 each table that needs it. */
1395 int first_difference = nvar - 1;
1398 for (; ; first_difference--)
1400 assert (first_difference >= 2);
1401 if (memcmp (&cmp->values[first_difference],
1402 &(*tb)->values[first_difference],
1403 sizeof *cmp->values))
1409 display_dimensions (table, first_difference, *tb);
1411 display_dimensions (chisq, first_difference, *tb);
1413 display_dimensions (sym, first_difference, *tb);
1415 display_dimensions (risk, first_difference, *tb);
1417 display_dimensions (direct, first_difference, *tb);
1421 display_crosstabulation ();
1422 if (cmd.miss == CRS_REPORT)
1427 display_symmetric ();
1431 display_directional ();
1442 tab_resize (chisq, 4 + (nvar - 2), -1);
1453 /* Delete missing rows and columns for statistical analysis when
1456 delete_missing (void)
1461 for (r = 0; r < n_rows; r++)
1462 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1466 for (c = 0; c < n_cols; c++)
1467 mat[c + r * n_cols] = 0.;
1475 for (c = 0; c < n_cols; c++)
1476 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1480 for (r = 0; r < n_rows; r++)
1481 mat[c + r * n_cols] = 0.;
1487 /* Prepare table T for submission, and submit it. */
1489 submit (struct tab_table *t)
1496 tab_resize (t, -1, 0);
1497 if (tab_nr (t) == tab_t (t))
1502 tab_offset (t, 0, 0);
1504 for (i = 2; i < nvar; i++)
1505 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1506 var_to_string (x->vars[i]));
1507 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1508 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1510 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1512 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1513 tab_dim (t, crosstabs_dim);
1517 /* Sets the widths of all the columns and heights of all the rows in
1518 table T for driver D. */
1520 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1524 /* Width of a numerical column. */
1525 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1526 if (cmd.miss == CRS_REPORT)
1527 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1529 /* Set width for header columns. */
1535 w = d->width - c * (t->nc - t->l);
1536 for (i = 0; i <= t->nc; i++)
1540 if (w < d->prop_em_width * 8)
1541 w = d->prop_em_width * 8;
1543 if (w > d->prop_em_width * 15)
1544 w = d->prop_em_width * 15;
1546 for (i = 0; i < t->l; i++)
1550 for (i = t->l; i < t->nc; i++)
1553 for (i = 0; i < t->nr; i++)
1554 t->h[i] = tab_natural_height (t, d, i);
1557 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1558 int *cnt, int pivot);
1559 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1560 int *cnt, int pivot);
1562 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1564 static struct table_entry **
1565 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1567 return (mode == GENERAL
1568 ? find_pivot_extent_general (tp, cnt, pivot)
1569 : find_pivot_extent_integer (tp, cnt, pivot));
1572 /* Find the extent of a region in TP that contains one table. If
1573 PIVOT != 0 that means a set of table entries with identical table
1574 number; otherwise they also have to have the same values for every
1575 dimension after the row and column dimensions. The table that is
1576 searched starts at TP and has length CNT. Returns the first entry
1577 after the last one in the table; sets *CNT to the number of
1578 remaining values. If there are no entries in TP at all, returns
1579 NULL. A yucky interface, admittedly, but it works. */
1580 static struct table_entry **
1581 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1583 struct table_entry *fp = *tp;
1588 x = xtab[(*tp)->table];
1596 if ((*tp)->table != fp->table)
1601 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1608 /* Integer mode correspondent to find_pivot_extent_general(). This
1609 could be optimized somewhat, but I just don't give a crap about
1610 CROSSTABS performance in integer mode, which is just a
1611 CROSSTABS wart as far as I'm concerned.
1613 That said, feel free to send optimization patches to me. */
1614 static struct table_entry **
1615 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1617 struct table_entry *fp = *tp;
1622 x = xtab[(*tp)->table];
1630 if ((*tp)->table != fp->table)
1635 if (memcmp (&(*tp)->values[2], &fp->values[2],
1636 sizeof (union value) * (x->nvar - 2)))
1643 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1644 result. WIDTH_ points to an int which is either 0 for a
1645 numeric value or a string width for a string value. */
1647 compare_value (const void *a_, const void *b_, const void *width_)
1649 const union value *a = a_;
1650 const union value *b = b_;
1651 const int *pwidth = width_;
1652 const int width = *pwidth;
1655 return (a->f < b->f) ? -1 : (a->f > b->f);
1657 return strncmp (a->s, b->s, width);
1660 /* Given an array of ENTRY_CNT table_entry structures starting at
1661 ENTRIES, creates a sorted list of the values that the variable
1662 with index VAR_IDX takes on. The values are returned as a
1663 malloc()'darray stored in *VALUES, with the number of values
1664 stored in *VALUE_CNT.
1667 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1668 union value **values, int *value_cnt)
1670 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1672 if (mode == GENERAL)
1674 int width = var_get_width (v);
1677 *values = xnmalloc (entry_cnt, sizeof **values);
1678 for (i = 0; i < entry_cnt; i++)
1679 (*values)[i] = entries[i]->values[var_idx];
1680 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1681 compare_value, &width);
1685 struct var_range *vr = get_var_range (v);
1688 assert (mode == INTEGER);
1689 *values = xnmalloc (vr->count, sizeof **values);
1690 for (i = 0; i < vr->count; i++)
1691 (*values)[i].f = i + vr->min;
1692 *value_cnt = vr->count;
1696 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1697 from V, displayed with print format spec from variable VAR. When
1698 in REPORT missing-value mode, missing values have an M appended. */
1700 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1701 const union value *v, const struct variable *var)
1704 const struct fmt_spec *print = var_get_print_format (var);
1706 const char *label = var_lookup_value_label (var, v);
1709 tab_text (table, c, r, TAB_LEFT, label);
1713 s.string = tab_alloc (table, print->w);
1714 format_short (s.string, print, v);
1715 s.length = strlen (s.string);
1716 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1717 s.string[s.length++] = 'M';
1718 while (s.length && *s.string == ' ')
1723 tab_raw (table, c, r, opt, &s);
1726 /* Draws a line across TABLE at the current row to indicate the most
1727 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1728 that changed, and puts the values that changed into the table. TB
1729 and X must be the corresponding table_entry and crosstab,
1732 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1734 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1736 for (; first_difference >= 2; first_difference--)
1737 table_value_missing (table, nvar - first_difference - 1, 0,
1738 TAB_RIGHT, &tb->values[first_difference],
1739 x->vars[first_difference]);
1742 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1743 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1744 additionally suffixed with a letter `M'. */
1746 format_cell_entry (struct tab_table *table, int c, int r, double value,
1747 char suffix, bool mark_missing)
1749 const struct fmt_spec f = {FMT_F, 10, 1};
1754 s.string = tab_alloc (table, 16);
1756 data_out (&v, &f, s.string);
1757 while (*s.string == ' ')
1763 s.string[s.length++] = suffix;
1765 s.string[s.length++] = 'M';
1767 tab_raw (table, c, r, TAB_RIGHT, &s);
1770 /* Displays the crosstabulation table. */
1772 display_crosstabulation (void)
1777 for (r = 0; r < n_rows; r++)
1778 table_value_missing (table, nvar - 2, r * num_cells,
1779 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1781 tab_text (table, nvar - 2, n_rows * num_cells,
1782 TAB_LEFT, _("Total"));
1784 /* Put in the actual cells. */
1789 tab_offset (table, nvar - 1, -1);
1790 for (r = 0; r < n_rows; r++)
1793 tab_hline (table, TAL_1, -1, n_cols, 0);
1794 for (c = 0; c < n_cols; c++)
1796 bool mark_missing = false;
1797 double expected_value = row_tot[r] * col_tot[c] / W;
1798 if (cmd.miss == CRS_REPORT
1799 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1800 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1802 mark_missing = true;
1803 for (i = 0; i < num_cells; i++)
1814 v = *mp / row_tot[r] * 100.;
1818 v = *mp / col_tot[c] * 100.;
1825 case CRS_CL_EXPECTED:
1828 case CRS_CL_RESIDUAL:
1829 v = *mp - expected_value;
1831 case CRS_CL_SRESIDUAL:
1832 v = (*mp - expected_value) / sqrt (expected_value);
1834 case CRS_CL_ASRESIDUAL:
1835 v = ((*mp - expected_value)
1836 / sqrt (expected_value
1837 * (1. - row_tot[r] / W)
1838 * (1. - col_tot[c] / W)));
1844 format_cell_entry (table, c, i, v, suffix, mark_missing);
1850 tab_offset (table, -1, tab_row (table) + num_cells);
1858 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1859 for (r = 0; r < n_rows; r++)
1862 bool mark_missing = false;
1864 if (cmd.miss == CRS_REPORT
1865 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1866 mark_missing = true;
1868 for (i = 0; i < num_cells; i++)
1882 v = row_tot[r] / W * 100.;
1886 v = row_tot[r] / W * 100.;
1889 case CRS_CL_EXPECTED:
1890 case CRS_CL_RESIDUAL:
1891 case CRS_CL_SRESIDUAL:
1892 case CRS_CL_ASRESIDUAL:
1899 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1900 tab_next_row (table);
1905 /* Column totals, grand total. */
1911 tab_hline (table, TAL_1, -1, n_cols, 0);
1912 for (c = 0; c <= n_cols; c++)
1914 double ct = c < n_cols ? col_tot[c] : W;
1915 bool mark_missing = false;
1919 if (cmd.miss == CRS_REPORT && c < n_cols
1920 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1921 mark_missing = true;
1923 for (i = 0; i < num_cells; i++)
1945 case CRS_CL_EXPECTED:
1946 case CRS_CL_RESIDUAL:
1947 case CRS_CL_SRESIDUAL:
1948 case CRS_CL_ASRESIDUAL:
1954 format_cell_entry (table, c, i, v, suffix, mark_missing);
1959 tab_offset (table, -1, tab_row (table) + last_row);
1962 tab_offset (table, 0, -1);
1965 static void calc_r (double *X, double *Y, double *, double *, double *);
1966 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1968 /* Display chi-square statistics. */
1970 display_chisq (void)
1972 static const char *chisq_stats[N_CHISQ] =
1974 N_("Pearson Chi-Square"),
1975 N_("Likelihood Ratio"),
1976 N_("Fisher's Exact Test"),
1977 N_("Continuity Correction"),
1978 N_("Linear-by-Linear Association"),
1980 double chisq_v[N_CHISQ];
1981 double fisher1, fisher2;
1987 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1989 tab_offset (chisq, nvar - 2, -1);
1991 for (i = 0; i < N_CHISQ; i++)
1993 if ((i != 2 && chisq_v[i] == SYSMIS)
1994 || (i == 2 && fisher1 == SYSMIS))
1998 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
2001 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
2002 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
2003 tab_float (chisq, 3, 0, TAB_RIGHT,
2004 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
2009 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
2010 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2012 tab_next_row (chisq);
2015 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2016 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2017 tab_next_row (chisq);
2019 tab_offset (chisq, 0, -1);
2022 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2023 double[N_SYMMETRIC]);
2025 /* Display symmetric measures. */
2027 display_symmetric (void)
2029 static const char *categories[] =
2031 N_("Nominal by Nominal"),
2032 N_("Ordinal by Ordinal"),
2033 N_("Interval by Interval"),
2034 N_("Measure of Agreement"),
2037 static const char *stats[N_SYMMETRIC] =
2041 N_("Contingency Coefficient"),
2042 N_("Kendall's tau-b"),
2043 N_("Kendall's tau-c"),
2045 N_("Spearman Correlation"),
2050 static const int stats_categories[N_SYMMETRIC] =
2052 0, 0, 0, 1, 1, 1, 1, 2, 3,
2056 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2059 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2062 tab_offset (sym, nvar - 2, -1);
2064 for (i = 0; i < N_SYMMETRIC; i++)
2066 if (sym_v[i] == SYSMIS)
2069 if (stats_categories[i] != last_cat)
2071 last_cat = stats_categories[i];
2072 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2075 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2076 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2077 if (sym_ase[i] != SYSMIS)
2078 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2079 if (sym_t[i] != SYSMIS)
2080 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2081 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2085 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2086 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2089 tab_offset (sym, 0, -1);
2092 static int calc_risk (double[], double[], double[], union value *);
2094 /* Display risk estimate. */
2099 double risk_v[3], lower[3], upper[3];
2103 if (!calc_risk (risk_v, upper, lower, c))
2106 tab_offset (risk, nvar - 2, -1);
2108 for (i = 0; i < 3; i++)
2110 if (risk_v[i] == SYSMIS)
2116 if (var_is_numeric (x->vars[COL_VAR]))
2117 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2118 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2120 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2121 var_get_name (x->vars[COL_VAR]),
2122 var_get_width (x->vars[COL_VAR]), c[0].s,
2123 var_get_width (x->vars[COL_VAR]), c[1].s);
2127 if (var_is_numeric (x->vars[ROW_VAR]))
2128 sprintf (buf, _("For cohort %s = %g"),
2129 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2131 sprintf (buf, _("For cohort %s = %.*s"),
2132 var_get_name (x->vars[ROW_VAR]),
2133 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2137 tab_text (risk, 0, 0, TAB_LEFT, buf);
2138 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2139 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2140 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2141 tab_next_row (risk);
2144 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2145 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2146 tab_next_row (risk);
2148 tab_offset (risk, 0, -1);
2151 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2152 double[N_DIRECTIONAL]);
2154 /* Display directional measures. */
2156 display_directional (void)
2158 static const char *categories[] =
2160 N_("Nominal by Nominal"),
2161 N_("Ordinal by Ordinal"),
2162 N_("Nominal by Interval"),
2165 static const char *stats[] =
2168 N_("Goodman and Kruskal tau"),
2169 N_("Uncertainty Coefficient"),
2174 static const char *types[] =
2181 static const int stats_categories[N_DIRECTIONAL] =
2183 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2186 static const int stats_stats[N_DIRECTIONAL] =
2188 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2191 static const int stats_types[N_DIRECTIONAL] =
2193 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2196 static const int *stats_lookup[] =
2203 static const char **stats_names[] =
2215 double direct_v[N_DIRECTIONAL];
2216 double direct_ase[N_DIRECTIONAL];
2217 double direct_t[N_DIRECTIONAL];
2221 if (!calc_directional (direct_v, direct_ase, direct_t))
2224 tab_offset (direct, nvar - 2, -1);
2226 for (i = 0; i < N_DIRECTIONAL; i++)
2228 if (direct_v[i] == SYSMIS)
2234 for (j = 0; j < 3; j++)
2235 if (last[j] != stats_lookup[j][i])
2238 tab_hline (direct, TAL_1, j, 6, 0);
2243 int k = last[j] = stats_lookup[j][i];
2248 string = var_get_name (x->vars[0]);
2250 string = var_get_name (x->vars[1]);
2252 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2253 gettext (stats_names[j][k]), string);
2258 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2259 if (direct_ase[i] != SYSMIS)
2260 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2261 if (direct_t[i] != SYSMIS)
2262 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2263 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2264 tab_next_row (direct);
2267 tab_offset (direct, 0, -1);
2270 /* Statistical calculations. */
2272 /* Returns the value of the gamma (factorial) function for an integer
2275 gamma_int (double x)
2280 for (i = 2; i < x; i++)
2285 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2287 static inline double
2288 Pr (int a, int b, int c, int d)
2290 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2291 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2292 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2293 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2294 / gamma_int (a + b + c + d + 1.));
2297 /* Swap the contents of A and B. */
2299 swap (int *a, int *b)
2306 /* Calculate significance for Fisher's exact test as specified in
2307 _SPSS Statistical Algorithms_, Appendix 5. */
2309 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2313 if (MIN (c, d) < MIN (a, b))
2314 swap (&a, &c), swap (&b, &d);
2315 if (MIN (b, d) < MIN (a, c))
2316 swap (&a, &b), swap (&c, &d);
2320 swap (&a, &b), swap (&c, &d);
2322 swap (&a, &c), swap (&b, &d);
2326 for (x = 0; x <= a; x++)
2327 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2329 *fisher2 = *fisher1;
2330 for (x = 1; x <= b; x++)
2331 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2334 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2335 columns with values COLS and N_ROWS rows with values ROWS. Values
2336 in the matrix sum to W. */
2338 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2339 double *fisher1, double *fisher2)
2343 chisq[0] = chisq[1] = 0.;
2344 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2345 *fisher1 = *fisher2 = SYSMIS;
2347 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2349 if (ns_rows <= 1 || ns_cols <= 1)
2351 chisq[0] = chisq[1] = SYSMIS;
2355 for (r = 0; r < n_rows; r++)
2356 for (c = 0; c < n_cols; c++)
2358 const double expected = row_tot[r] * col_tot[c] / W;
2359 const double freq = mat[n_cols * r + c];
2360 const double residual = freq - expected;
2362 chisq[0] += residual * residual / expected;
2364 chisq[1] += freq * log (expected / freq);
2375 /* Calculate Yates and Fisher exact test. */
2376 if (ns_cols == 2 && ns_rows == 2)
2378 double f11, f12, f21, f22;
2384 for (i = j = 0; i < n_cols; i++)
2385 if (col_tot[i] != 0.)
2394 f11 = mat[nz_cols[0]];
2395 f12 = mat[nz_cols[1]];
2396 f21 = mat[nz_cols[0] + n_cols];
2397 f22 = mat[nz_cols[1] + n_cols];
2402 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2405 chisq[3] = (W * x * x
2406 / (f11 + f12) / (f21 + f22)
2407 / (f11 + f21) / (f12 + f22));
2415 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2416 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2419 /* Calculate Mantel-Haenszel. */
2420 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2422 double r, ase_0, ase_1;
2423 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2425 chisq[4] = (W - 1.) * r * r;
2430 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2431 ASE_1, and ase_0 into ASE_0. The row and column values must be
2432 passed in X and Y. */
2434 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2436 double SX, SY, S, T;
2438 double sum_XYf, sum_X2Y2f;
2439 double sum_Xr, sum_X2r;
2440 double sum_Yc, sum_Y2c;
2443 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2444 for (j = 0; j < n_cols; j++)
2446 double fij = mat[j + i * n_cols];
2447 double product = X[i] * Y[j];
2448 double temp = fij * product;
2450 sum_X2Y2f += temp * product;
2453 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2455 sum_Xr += X[i] * row_tot[i];
2456 sum_X2r += X[i] * X[i] * row_tot[i];
2460 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2462 sum_Yc += Y[i] * col_tot[i];
2463 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2467 S = sum_XYf - sum_Xr * sum_Yc / W;
2468 SX = sum_X2r - sum_Xr * sum_Xr / W;
2469 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2472 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2477 for (s = c = 0., i = 0; i < n_rows; i++)
2478 for (j = 0; j < n_cols; j++)
2480 double Xresid, Yresid;
2483 Xresid = X[i] - Xbar;
2484 Yresid = Y[j] - Ybar;
2485 temp = (T * Xresid * Yresid
2487 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2488 y = mat[j + i * n_cols] * temp * temp - c;
2493 *ase_1 = sqrt (s) / (T * T);
2497 static double somers_d_v[3];
2498 static double somers_d_ase[3];
2499 static double somers_d_t[3];
2501 /* Calculate symmetric statistics and their asymptotic standard
2502 errors. Returns 0 if none could be calculated. */
2504 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2505 double t[N_SYMMETRIC])
2507 int q = MIN (ns_rows, ns_cols);
2516 for (i = 0; i < N_SYMMETRIC; i++)
2517 v[i] = ase[i] = t[i] = SYSMIS;
2520 /* Phi, Cramer's V, contingency coefficient. */
2521 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2523 double Xp = 0.; /* Pearson chi-square. */
2528 for (r = 0; r < n_rows; r++)
2529 for (c = 0; c < n_cols; c++)
2531 const double expected = row_tot[r] * col_tot[c] / W;
2532 const double freq = mat[n_cols * r + c];
2533 const double residual = freq - expected;
2535 Xp += residual * residual / expected;
2539 if (cmd.a_statistics[CRS_ST_PHI])
2541 v[0] = sqrt (Xp / W);
2542 v[1] = sqrt (Xp / (W * (q - 1)));
2544 if (cmd.a_statistics[CRS_ST_CC])
2545 v[2] = sqrt (Xp / (Xp + W));
2548 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2549 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2554 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2561 for (r = 0; r < n_rows; r++)
2562 Dr -= row_tot[r] * row_tot[r];
2563 for (c = 0; c < n_cols; c++)
2564 Dc -= col_tot[c] * col_tot[c];
2570 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2571 for (c = 0; c < n_cols; c++)
2575 for (r = 0; r < n_rows; r++)
2576 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2586 for (i = 0; i < n_rows; i++)
2590 for (j = 1; j < n_cols; j++)
2591 Cij += col_tot[j] - cum[j + i * n_cols];
2594 for (j = 1; j < n_cols; j++)
2595 Dij += cum[j + (i - 1) * n_cols];
2599 double fij = mat[j + i * n_cols];
2605 assert (j < n_cols);
2607 Cij -= col_tot[j] - cum[j + i * n_cols];
2608 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2612 Cij += cum[j - 1 + (i - 1) * n_cols];
2613 Dij -= cum[j + (i - 1) * n_cols];
2619 if (cmd.a_statistics[CRS_ST_BTAU])
2620 v[3] = (P - Q) / sqrt (Dr * Dc);
2621 if (cmd.a_statistics[CRS_ST_CTAU])
2622 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2623 if (cmd.a_statistics[CRS_ST_GAMMA])
2624 v[5] = (P - Q) / (P + Q);
2626 /* ASE for tau-b, tau-c, gamma. Calculations could be
2627 eliminated here, at expense of memory. */
2632 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2633 for (i = 0; i < n_rows; i++)
2637 for (j = 1; j < n_cols; j++)
2638 Cij += col_tot[j] - cum[j + i * n_cols];
2641 for (j = 1; j < n_cols; j++)
2642 Dij += cum[j + (i - 1) * n_cols];
2646 double fij = mat[j + i * n_cols];
2648 if (cmd.a_statistics[CRS_ST_BTAU])
2650 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2651 + v[3] * (row_tot[i] * Dc
2652 + col_tot[j] * Dr));
2653 btau_cum += fij * temp * temp;
2657 const double temp = Cij - Dij;
2658 ctau_cum += fij * temp * temp;
2661 if (cmd.a_statistics[CRS_ST_GAMMA])
2663 const double temp = Q * Cij - P * Dij;
2664 gamma_cum += fij * temp * temp;
2667 if (cmd.a_statistics[CRS_ST_D])
2669 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2670 - (P - Q) * (W - row_tot[i]));
2671 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2672 - (Q - P) * (W - col_tot[j]));
2677 assert (j < n_cols);
2679 Cij -= col_tot[j] - cum[j + i * n_cols];
2680 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2684 Cij += cum[j - 1 + (i - 1) * n_cols];
2685 Dij -= cum[j + (i - 1) * n_cols];
2691 btau_var = ((btau_cum
2692 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2694 if (cmd.a_statistics[CRS_ST_BTAU])
2696 ase[3] = sqrt (btau_var);
2697 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2700 if (cmd.a_statistics[CRS_ST_CTAU])
2702 ase[4] = ((2 * q / ((q - 1) * W * W))
2703 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2704 t[4] = v[4] / ase[4];
2706 if (cmd.a_statistics[CRS_ST_GAMMA])
2708 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2709 t[5] = v[5] / (2. / (P + Q)
2710 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2712 if (cmd.a_statistics[CRS_ST_D])
2714 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2715 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2716 somers_d_t[0] = (somers_d_v[0]
2718 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2719 somers_d_v[1] = (P - Q) / Dc;
2720 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2721 somers_d_t[1] = (somers_d_v[1]
2723 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2724 somers_d_v[2] = (P - Q) / Dr;
2725 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2726 somers_d_t[2] = (somers_d_v[2]
2728 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2734 /* Spearman correlation, Pearson's r. */
2735 if (cmd.a_statistics[CRS_ST_CORR])
2737 double *R = xmalloca (sizeof *R * n_rows);
2738 double *C = xmalloca (sizeof *C * n_cols);
2741 double y, t, c = 0., s = 0.;
2746 R[i] = s + (row_tot[i] + 1.) / 2.;
2753 assert (i < n_rows);
2758 double y, t, c = 0., s = 0.;
2763 C[j] = s + (col_tot[j] + 1.) / 2;
2770 assert (j < n_cols);
2774 calc_r (R, C, &v[6], &t[6], &ase[6]);
2780 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2784 /* Cohen's kappa. */
2785 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2787 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2790 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2791 i < ns_rows; i++, j++)
2795 while (col_tot[j] == 0.)
2798 prod = row_tot[i] * col_tot[j];
2799 sum = row_tot[i] + col_tot[j];
2801 sum_fii += mat[j + i * n_cols];
2803 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2804 sum_riciri_ci += prod * sum;
2806 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2807 for (j = 0; j < ns_cols; j++)
2809 double sum = row_tot[i] + col_tot[j];
2810 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2813 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2815 ase[8] = sqrt ((W * W * sum_rici
2816 + sum_rici * sum_rici
2817 - W * sum_riciri_ci)
2818 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2820 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2821 / pow2 (W * W - sum_rici))
2822 + ((2. * (W - sum_fii)
2823 * (2. * sum_fii * sum_rici
2824 - W * sum_fiiri_ci))
2825 / cube (W * W - sum_rici))
2826 + (pow2 (W - sum_fii)
2827 * (W * sum_fijri_ci2 - 4.
2828 * sum_rici * sum_rici)
2829 / pow4 (W * W - sum_rici))));
2831 t[8] = v[8] / ase[8];
2838 /* Calculate risk estimate. */
2840 calc_risk (double *value, double *upper, double *lower, union value *c)
2842 double f11, f12, f21, f22;
2848 for (i = 0; i < 3; i++)
2849 value[i] = upper[i] = lower[i] = SYSMIS;
2852 if (ns_rows != 2 || ns_cols != 2)
2859 for (i = j = 0; i < n_cols; i++)
2860 if (col_tot[i] != 0.)
2869 f11 = mat[nz_cols[0]];
2870 f12 = mat[nz_cols[1]];
2871 f21 = mat[nz_cols[0] + n_cols];
2872 f22 = mat[nz_cols[1] + n_cols];
2874 c[0] = cols[nz_cols[0]];
2875 c[1] = cols[nz_cols[1]];
2878 value[0] = (f11 * f22) / (f12 * f21);
2879 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2880 lower[0] = value[0] * exp (-1.960 * v);
2881 upper[0] = value[0] * exp (1.960 * v);
2883 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2884 v = sqrt ((f12 / (f11 * (f11 + f12)))
2885 + (f22 / (f21 * (f21 + f22))));
2886 lower[1] = value[1] * exp (-1.960 * v);
2887 upper[1] = value[1] * exp (1.960 * v);
2889 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2890 v = sqrt ((f11 / (f12 * (f11 + f12)))
2891 + (f21 / (f22 * (f21 + f22))));
2892 lower[2] = value[2] * exp (-1.960 * v);
2893 upper[2] = value[2] * exp (1.960 * v);
2898 /* Calculate directional measures. */
2900 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2901 double t[N_DIRECTIONAL])
2906 for (i = 0; i < N_DIRECTIONAL; i++)
2907 v[i] = ase[i] = t[i] = SYSMIS;
2911 if (cmd.a_statistics[CRS_ST_LAMBDA])
2913 double *fim = xnmalloc (n_rows, sizeof *fim);
2914 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2915 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2916 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2917 double sum_fim, sum_fmj;
2919 int rm_index, cm_index;
2922 /* Find maximum for each row and their sum. */
2923 for (sum_fim = 0., i = 0; i < n_rows; i++)
2925 double max = mat[i * n_cols];
2928 for (j = 1; j < n_cols; j++)
2929 if (mat[j + i * n_cols] > max)
2931 max = mat[j + i * n_cols];
2935 sum_fim += fim[i] = max;
2936 fim_index[i] = index;
2939 /* Find maximum for each column. */
2940 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2942 double max = mat[j];
2945 for (i = 1; i < n_rows; i++)
2946 if (mat[j + i * n_cols] > max)
2948 max = mat[j + i * n_cols];
2952 sum_fmj += fmj[j] = max;
2953 fmj_index[j] = index;
2956 /* Find maximum row total. */
2959 for (i = 1; i < n_rows; i++)
2960 if (row_tot[i] > rm)
2966 /* Find maximum column total. */
2969 for (j = 1; j < n_cols; j++)
2970 if (col_tot[j] > cm)
2976 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2977 v[1] = (sum_fmj - rm) / (W - rm);
2978 v[2] = (sum_fim - cm) / (W - cm);
2980 /* ASE1 for Y given X. */
2984 for (accum = 0., i = 0; i < n_rows; i++)
2985 for (j = 0; j < n_cols; j++)
2987 const int deltaj = j == cm_index;
2988 accum += (mat[j + i * n_cols]
2989 * pow2 ((j == fim_index[i])
2994 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2997 /* ASE0 for Y given X. */
3001 for (accum = 0., i = 0; i < n_rows; i++)
3002 if (cm_index != fim_index[i])
3003 accum += (mat[i * n_cols + fim_index[i]]
3004 + mat[i * n_cols + cm_index]);
3005 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
3008 /* ASE1 for X given Y. */
3012 for (accum = 0., i = 0; i < n_rows; i++)
3013 for (j = 0; j < n_cols; j++)
3015 const int deltaj = i == rm_index;
3016 accum += (mat[j + i * n_cols]
3017 * pow2 ((i == fmj_index[j])
3022 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3025 /* ASE0 for X given Y. */
3029 for (accum = 0., j = 0; j < n_cols; j++)
3030 if (rm_index != fmj_index[j])
3031 accum += (mat[j + n_cols * fmj_index[j]]
3032 + mat[j + n_cols * rm_index]);
3033 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3036 /* Symmetric ASE0 and ASE1. */
3041 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3042 for (j = 0; j < n_cols; j++)
3044 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3045 int temp1 = (i == rm_index) + (j == cm_index);
3046 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3047 accum1 += (mat[j + i * n_cols]
3048 * pow2 (temp0 + (v[0] - 1.) * temp1));
3050 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3051 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3052 / (2. * W - rm - cm));
3061 double sum_fij2_ri, sum_fij2_ci;
3062 double sum_ri2, sum_cj2;
3064 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3065 for (j = 0; j < n_cols; j++)
3067 double temp = pow2 (mat[j + i * n_cols]);
3068 sum_fij2_ri += temp / row_tot[i];
3069 sum_fij2_ci += temp / col_tot[j];
3072 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3073 sum_ri2 += row_tot[i] * row_tot[i];
3075 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3076 sum_cj2 += col_tot[j] * col_tot[j];
3078 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3079 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3083 if (cmd.a_statistics[CRS_ST_UC])
3085 double UX, UY, UXY, P;
3086 double ase1_yx, ase1_xy, ase1_sym;
3089 for (UX = 0., i = 0; i < n_rows; i++)
3090 if (row_tot[i] > 0.)
3091 UX -= row_tot[i] / W * log (row_tot[i] / W);
3093 for (UY = 0., j = 0; j < n_cols; j++)
3094 if (col_tot[j] > 0.)
3095 UY -= col_tot[j] / W * log (col_tot[j] / W);
3097 for (UXY = P = 0., i = 0; i < n_rows; i++)
3098 for (j = 0; j < n_cols; j++)
3100 double entry = mat[j + i * n_cols];
3105 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3106 UXY -= entry / W * log (entry / W);
3109 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3110 for (j = 0; j < n_cols; j++)
3112 double entry = mat[j + i * n_cols];
3117 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3118 + (UX - UXY) * log (col_tot[j] / W));
3119 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3120 + (UY - UXY) * log (row_tot[i] / W));
3121 ase1_sym += entry * pow2 ((UXY
3122 * log (row_tot[i] * col_tot[j] / (W * W)))
3123 - (UX + UY) * log (entry / W));
3126 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3127 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3128 t[5] = v[5] / ((2. / (W * (UX + UY)))
3129 * sqrt (P - pow2 (UX + UY - UXY) / W));
3131 v[6] = (UX + UY - UXY) / UX;
3132 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3133 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3135 v[7] = (UX + UY - UXY) / UY;
3136 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3137 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3141 if (cmd.a_statistics[CRS_ST_D])
3146 calc_symmetric (NULL, NULL, NULL);
3147 for (i = 0; i < 3; i++)
3149 v[8 + i] = somers_d_v[i];
3150 ase[8 + i] = somers_d_ase[i];
3151 t[8 + i] = somers_d_t[i];
3156 if (cmd.a_statistics[CRS_ST_ETA])
3159 double sum_Xr, sum_X2r;
3163 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3165 sum_Xr += rows[i].f * row_tot[i];
3166 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3168 SX = sum_X2r - sum_Xr * sum_Xr / W;
3170 for (SXW = 0., j = 0; j < n_cols; j++)
3174 for (cum = 0., i = 0; i < n_rows; i++)
3176 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3177 cum += rows[i].f * mat[j + i * n_cols];
3180 SXW -= cum * cum / col_tot[j];
3182 v[11] = sqrt (1. - SXW / SX);
3186 double sum_Yc, sum_Y2c;
3190 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3192 sum_Yc += cols[i].f * col_tot[i];
3193 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3195 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3197 for (SYW = 0., i = 0; i < n_rows; i++)
3201 for (cum = 0., j = 0; j < n_cols; j++)
3203 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3204 cum += cols[j].f * mat[j + i * n_cols];
3207 SYW -= cum * cum / row_tot[i];
3209 v[12] = sqrt (1. - SYW / SY);
3216 /* A wrapper around data_out() that limits string output to short
3217 string width and null terminates the result. */
3219 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3221 struct fmt_spec fmt_subst;
3223 /* Limit to short string width. */
3224 if (fmt_is_string (fp->type))
3228 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3229 if (fmt_subst.type == FMT_A)
3230 fmt_subst.w = MIN (8, fmt_subst.w);
3232 fmt_subst.w = MIN (16, fmt_subst.w);
3238 data_out (v, fp, s);
3240 /* Null terminate. */