1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 - Pearson's R (but not Spearman!) is off a little.
22 - T values for Spearman's R and Pearson's R are wrong.
23 - How to calculate significance of symmetric and directional measures?
24 - Asymmetric ASEs and T values for lambda are wrong.
25 - ASE of Goodman and Kruskal's tau is not calculated.
26 - ASE of symmetric somers' d is wrong.
27 - Approx. T of uncertainty coefficient is wrong.
34 #include <gsl/gsl_cdf.h>
38 #include <data/case.h>
39 #include <data/casegrouper.h>
40 #include <data/casereader.h>
41 #include <data/data-out.h>
42 #include <data/dictionary.h>
43 #include <data/format.h>
44 #include <data/procedure.h>
45 #include <data/value-labels.h>
46 #include <data/variable.h>
47 #include <language/command.h>
48 #include <language/dictionary/split-file.h>
49 #include <language/lexer/lexer.h>
50 #include <language/lexer/variable-parser.h>
51 #include <libpspp/alloc.h>
52 #include <libpspp/array.h>
53 #include <libpspp/assertion.h>
54 #include <libpspp/compiler.h>
55 #include <libpspp/hash.h>
56 #include <libpspp/magic.h>
57 #include <libpspp/message.h>
58 #include <libpspp/message.h>
59 #include <libpspp/misc.h>
60 #include <libpspp/pool.h>
61 #include <libpspp/str.h>
62 #include <output/output.h>
63 #include <output/table.h>
68 #define _(msgid) gettext (msgid)
69 #define N_(msgid) msgid
77 missing=miss:!table/include/report;
78 +write[wr_]=none,cells,all;
79 +format=fmt:!labels/nolabels/novallabs,
82 tabl:!tables/notables,
85 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
87 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
93 /* Number of chi-square statistics. */
96 /* Number of symmetric statistics. */
99 /* Number of directional statistics. */
100 #define N_DIRECTIONAL 13
102 /* A single table entry for general mode. */
105 int table; /* Flattened table number. */
108 double freq; /* Frequency count. */
109 double *data; /* Crosstabulation table for integer mode. */
112 union value values[1]; /* Values. */
115 /* A crosstabulation. */
118 int nvar; /* Number of variables. */
119 double missing; /* Missing cases count. */
120 int ofs; /* Integer mode: Offset into sorted_tab[]. */
121 const struct variable *vars[2]; /* At least two variables; sorted by
122 larger indices first. */
125 /* Integer mode variable info. */
128 int min; /* Minimum value. */
129 int max; /* Maximum value + 1. */
130 int count; /* max - min. */
133 static inline struct var_range *
134 get_var_range (const struct variable *v)
136 return var_get_aux (v);
139 /* Indexes into crosstab.v. */
146 /* General mode crosstabulation table. */
147 static struct hsh_table *gen_tab; /* Hash table. */
148 static int n_sorted_tab; /* Number of entries in sorted_tab. */
149 static struct table_entry **sorted_tab; /* Sorted table. */
151 /* Variables specifies on VARIABLES. */
152 static const struct variable **variables;
153 static size_t variables_cnt;
156 static struct crosstab **xtab;
159 /* Integer or general mode? */
168 static int num_cells; /* Number of cells requested. */
169 static int cells[8]; /* Cells requested. */
172 static int write_style; /* One of WR_* that specifies the WRITE style. */
174 /* Command parsing info. */
175 static struct cmd_crosstabs cmd;
178 static struct pool *pl_tc; /* For table cells. */
179 static struct pool *pl_col; /* For column data. */
181 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
182 static void precalc (struct casereader *, const struct dataset *);
183 static void calc_general (struct ccase *, const struct dataset *);
184 static void calc_integer (struct ccase *, const struct dataset *);
185 static void postcalc (void);
186 static void submit (struct tab_table *);
188 static void format_short (char *s, const struct fmt_spec *fp,
189 const union value *v);
191 /* Parse and execute CROSSTABS, then clean up. */
193 cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
195 int result = internal_cmd_crosstabs (lexer, ds);
198 pool_destroy (pl_tc);
199 pool_destroy (pl_col);
204 /* Parses and executes the CROSSTABS procedure. */
206 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
208 struct casegrouper *grouper;
209 struct casereader *input, *group;
217 pl_tc = pool_create ();
218 pl_col = pool_create ();
220 if (!parse_crosstabs (lexer, ds, &cmd, NULL))
223 mode = variables ? INTEGER : GENERAL;
228 cmd.a_cells[CRS_CL_COUNT] = 1;
234 for (i = 0; i < CRS_CL_count; i++)
239 cmd.a_cells[CRS_CL_COUNT] = 1;
240 cmd.a_cells[CRS_CL_ROW] = 1;
241 cmd.a_cells[CRS_CL_COLUMN] = 1;
242 cmd.a_cells[CRS_CL_TOTAL] = 1;
244 if (cmd.a_cells[CRS_CL_ALL])
246 for (i = 0; i < CRS_CL_count; i++)
248 cmd.a_cells[CRS_CL_ALL] = 0;
250 cmd.a_cells[CRS_CL_NONE] = 0;
252 for (num_cells = i = 0; i < CRS_CL_count; i++)
254 cells[num_cells++] = i;
257 if (cmd.sbc_statistics)
262 for (i = 0; i < CRS_ST_count; i++)
263 if (cmd.a_statistics[i])
266 cmd.a_statistics[CRS_ST_CHISQ] = 1;
267 if (cmd.a_statistics[CRS_ST_ALL])
268 for (i = 0; i < CRS_ST_count; i++)
269 cmd.a_statistics[i] = 1;
273 if (cmd.miss == CRS_REPORT && mode == GENERAL)
275 msg (SE, _("Missing mode REPORT not allowed in general mode. "
276 "Assuming MISSING=TABLE."));
277 cmd.miss = CRS_TABLE;
281 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
282 cmd.a_write[CRS_WR_ALL] = 0;
283 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
285 msg (SE, _("Write mode ALL not allowed in general mode. "
286 "Assuming WRITE=CELLS."));
287 cmd.a_write[CRS_WR_CELLS] = 1;
290 && (cmd.a_write[CRS_WR_NONE]
291 + cmd.a_write[CRS_WR_ALL]
292 + cmd.a_write[CRS_WR_CELLS] == 0))
293 cmd.a_write[CRS_WR_CELLS] = 1;
294 if (cmd.a_write[CRS_WR_CELLS])
295 write_style = CRS_WR_CELLS;
296 else if (cmd.a_write[CRS_WR_ALL])
297 write_style = CRS_WR_ALL;
299 write_style = CRS_WR_NONE;
301 input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
303 grouper = casegrouper_create_splits (input, dataset_dict (ds));
304 while (casegrouper_get_next_group (grouper, &group))
310 for (; casereader_read (group, &c); case_destroy (&c))
313 calc_general (&c, ds);
315 calc_integer (&c, ds);
317 casereader_destroy (group);
321 ok = casegrouper_destroy (grouper);
322 ok = proc_commit (ds) && ok;
324 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
327 /* Parses the TABLES subcommand. */
329 crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
331 struct const_var_set *var_set;
333 const struct variable ***by = NULL;
334 size_t *by_nvar = NULL;
338 /* Ensure that this is a TABLES subcommand. */
339 if (!lex_match_id (lexer, "TABLES")
340 && (lex_token (lexer) != T_ID ||
341 dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
342 && lex_token (lexer) != T_ALL)
344 lex_match (lexer, '=');
346 if (variables != NULL)
347 var_set = const_var_set_create_from_array (variables, variables_cnt);
349 var_set = const_var_set_create_from_dict (dataset_dict (ds));
350 assert (var_set != NULL);
354 by = xnrealloc (by, n_by + 1, sizeof *by);
355 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
356 if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
357 PV_NO_DUPLICATE | PV_NO_SCRATCH))
359 if (xalloc_oversized (nx, by_nvar[n_by]))
361 msg (SE, _("Too many crosstabulation variables or dimensions."));
367 if (!lex_match (lexer, T_BY))
371 lex_error (lexer, _("expecting BY"));
380 int *by_iter = xcalloc (n_by, sizeof *by_iter);
383 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
384 for (i = 0; i < nx; i++)
388 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
395 for (i = 0; i < n_by; i++)
396 x->vars[i] = by[i][by_iter[i]];
402 for (i = n_by - 1; i >= 0; i--)
404 if (++by_iter[i] < by_nvar[i])
417 /* All return paths lead here. */
421 for (i = 0; i < n_by; i++)
427 const_var_set_destroy (var_set);
432 /* Parses the VARIABLES subcommand. */
434 crs_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
438 msg (SE, _("VARIABLES must be specified before TABLES."));
442 lex_match (lexer, '=');
446 size_t orig_nv = variables_cnt;
451 if (!parse_variables_const (lexer, dataset_dict (ds),
452 &variables, &variables_cnt,
453 (PV_APPEND | PV_NUMERIC
454 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
457 if (lex_token (lexer) != '(')
459 lex_error (lexer, "expecting `('");
464 if (!lex_force_int (lexer))
466 min = lex_integer (lexer);
469 lex_match (lexer, ',');
471 if (!lex_force_int (lexer))
473 max = lex_integer (lexer);
476 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
482 if (lex_token (lexer) != ')')
484 lex_error (lexer, "expecting `)'");
489 for (i = orig_nv; i < variables_cnt; i++)
491 struct var_range *vr = xmalloc (sizeof *vr);
494 vr->count = max - min + 1;
495 var_attach_aux (variables[i], vr, var_dtor_free);
498 if (lex_token (lexer) == '/')
510 /* Data file processing. */
512 static int compare_table_entry (const void *, const void *, const void *);
513 static unsigned hash_table_entry (const void *, const void *);
515 /* Set up the crosstabulation tables for processing. */
517 precalc (struct casereader *input, const struct dataset *ds)
521 if (!casereader_peek (input, 0, &c))
523 output_split_file_values (ds, &c);
528 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
538 for (i = 0; i < nxtab; i++)
540 struct crosstab *x = xtab[i];
545 x->ofs = n_sorted_tab;
547 for (j = 2; j < x->nvar; j++)
548 count *= get_var_range (x->vars[j - 2])->count;
550 sorted_tab = xnrealloc (sorted_tab,
551 n_sorted_tab + count, sizeof *sorted_tab);
552 v = local_alloc (sizeof *v * x->nvar);
553 for (j = 2; j < x->nvar; j++)
554 v[j] = get_var_range (x->vars[j])->min;
555 for (j = 0; j < count; j++)
557 struct table_entry *te;
560 te = sorted_tab[n_sorted_tab++]
561 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
565 int row_cnt = get_var_range (x->vars[0])->count;
566 int col_cnt = get_var_range (x->vars[1])->count;
567 const int mat_size = row_cnt * col_cnt;
570 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
571 for (m = 0; m < mat_size; m++)
575 for (k = 2; k < x->nvar; k++)
576 te->values[k].f = v[k];
577 for (k = 2; k < x->nvar; k++)
579 struct var_range *vr = get_var_range (x->vars[k]);
580 if (++v[k] >= vr->max)
589 sorted_tab = xnrealloc (sorted_tab,
590 n_sorted_tab + 1, sizeof *sorted_tab);
591 sorted_tab[n_sorted_tab] = NULL;
596 /* Form crosstabulations for general mode. */
598 calc_general (struct ccase *c, const struct dataset *ds)
600 /* Missing values to exclude. */
601 enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
602 : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
606 double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
608 /* Flattened current table index. */
611 for (t = 0; t < nxtab; t++)
613 struct crosstab *x = xtab[t];
614 const size_t entry_size = (sizeof (struct table_entry)
615 + sizeof (union value) * (x->nvar - 1));
616 struct table_entry *te = local_alloc (entry_size);
618 /* Construct table entry for the current record and table. */
624 for (j = 0; j < x->nvar; j++)
626 const union value *v = case_data (c, x->vars[j]);
627 if (var_is_value_missing (x->vars[j], v, exclude))
629 x->missing += weight;
633 if (var_is_numeric (x->vars[j]))
634 te->values[j].f = case_num (c, x->vars[j]);
637 memcpy (te->values[j].s, case_str (c, x->vars[j]),
638 var_get_width (x->vars[j]));
640 /* Necessary in order to simplify comparisons. */
641 memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
642 sizeof (union value) - var_get_width (x->vars[j]));
647 /* Add record to hash table. */
649 struct table_entry **tepp
650 = (struct table_entry **) hsh_probe (gen_tab, te);
653 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
656 memcpy (tep, te, entry_size);
661 (*tepp)->u.freq += weight;
670 calc_integer (struct ccase *c, const struct dataset *ds)
672 bool bad_warn = true;
675 double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
677 /* Flattened current table index. */
680 for (t = 0; t < nxtab; t++)
682 struct crosstab *x = xtab[t];
687 for (i = 0; i < x->nvar; i++)
689 const struct variable *const v = x->vars[i];
690 struct var_range *vr = get_var_range (v);
691 double value = case_num (c, v);
693 /* Note that the first test also rules out SYSMIS. */
694 if ((value < vr->min || value >= vr->max)
695 || (cmd.miss == CRS_TABLE
696 && var_is_num_missing (v, value, MV_USER)))
698 x->missing += weight;
704 ofs += fact * ((int) value - vr->min);
710 const struct variable *row_var = x->vars[ROW_VAR];
711 const int row = case_num (c, row_var) - get_var_range (row_var)->min;
713 const struct variable *col_var = x->vars[COL_VAR];
714 const int col = case_num (c, col_var) - get_var_range (col_var)->min;
716 const int col_dim = get_var_range (col_var)->count;
718 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
725 /* Compare the table_entry's at A and B and return a strcmp()-type
728 compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
730 const struct table_entry *a = a_;
731 const struct table_entry *b = b_;
733 if (a->table > b->table)
735 else if (a->table < b->table)
739 const struct crosstab *x = xtab[a->table];
742 for (i = x->nvar - 1; i >= 0; i--)
743 if (var_is_numeric (x->vars[i]))
745 const double diffnum = a->values[i].f - b->values[i].f;
748 else if (diffnum > 0)
753 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
754 var_get_width (x->vars[i]));
763 /* Calculate a hash value from table_entry A. */
765 hash_table_entry (const void *a_, const void *aux UNUSED)
767 const struct table_entry *a = a_;
772 for (i = 0; i < xtab[a->table]->nvar; i++)
773 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
778 /* Post-data reading calculations. */
780 static struct table_entry **find_pivot_extent (struct table_entry **,
781 int *cnt, int pivot);
782 static void enum_var_values (struct table_entry **entries, int entry_cnt,
784 union value **values, int *value_cnt);
785 static void output_pivot_table (struct table_entry **, struct table_entry **,
786 double **, double **, double **,
787 int *, int *, int *);
788 static void make_summary_table (void);
795 n_sorted_tab = hsh_count (gen_tab);
796 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
799 make_summary_table ();
801 /* Identify all the individual crosstabulation tables, and deal with
804 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
805 int pc = n_sorted_tab; /* Pivot count. */
807 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
808 int maxrows = 0, maxcols = 0, maxcells = 0;
812 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
816 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
817 &maxrows, &maxcols, &maxcells);
826 hsh_destroy (gen_tab);
829 static void insert_summary (struct tab_table *, int tab_index, double valid);
831 /* Output a table summarizing the cases processed. */
833 make_summary_table (void)
835 struct tab_table *summary;
837 struct table_entry **pb = sorted_tab, **pe;
838 int pc = n_sorted_tab;
841 summary = tab_create (7, 3 + nxtab, 1);
842 tab_title (summary, _("Summary."));
843 tab_headers (summary, 1, 0, 3, 0);
844 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
845 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
846 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
847 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
848 tab_hline (summary, TAL_1, 1, 6, 1);
849 tab_hline (summary, TAL_1, 1, 6, 2);
850 tab_vline (summary, TAL_1, 3, 1, 1);
851 tab_vline (summary, TAL_1, 5, 1, 1);
855 for (i = 0; i < 3; i++)
857 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
858 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
861 tab_offset (summary, 0, 3);
867 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
871 while (cur_tab < (*pb)->table)
872 insert_summary (summary, cur_tab++, 0.);
875 for (valid = 0.; pb < pe; pb++)
876 valid += (*pb)->u.freq;
879 const struct crosstab *const x = xtab[(*pb)->table];
880 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
881 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
882 const int count = n_cols * n_rows;
884 for (valid = 0.; pb < pe; pb++)
886 const double *data = (*pb)->u.data;
889 for (i = 0; i < count; i++)
893 insert_summary (summary, cur_tab++, valid);
898 while (cur_tab < nxtab)
899 insert_summary (summary, cur_tab++, 0.);
904 /* Inserts a line into T describing the crosstabulation at index
905 TAB_INDEX, which has VALID valid observations. */
907 insert_summary (struct tab_table *t, int tab_index, double valid)
909 struct crosstab *x = xtab[tab_index];
911 tab_hline (t, TAL_1, 0, 6, 0);
913 /* Crosstabulation name. */
915 char *buf = local_alloc (128 * x->nvar);
919 for (i = 0; i < x->nvar; i++)
922 cp = stpcpy (cp, " * ");
924 cp = stpcpy (cp, var_to_string (x->vars[i]));
926 tab_text (t, 0, 0, TAB_LEFT, buf);
931 /* Counts and percentages. */
941 for (i = 0; i < 3; i++)
943 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
944 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
955 static struct tab_table *table; /* Crosstabulation table. */
956 static struct tab_table *chisq; /* Chi-square table. */
957 static struct tab_table *sym; /* Symmetric measures table. */
958 static struct tab_table *risk; /* Risk estimate table. */
959 static struct tab_table *direct; /* Directional measures table. */
962 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
964 /* Column values, number of columns. */
965 static union value *cols;
968 /* Row values, number of rows. */
969 static union value *rows;
972 /* Number of statistically interesting columns/rows (columns/rows with
974 static int ns_cols, ns_rows;
976 /* Crosstabulation. */
977 static const struct crosstab *x;
979 /* Number of variables from the crosstabulation to consider. This is
980 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
983 /* Matrix contents. */
984 static double *mat; /* Matrix proper. */
985 static double *row_tot; /* Row totals. */
986 static double *col_tot; /* Column totals. */
987 static double W; /* Grand total. */
989 static void display_dimensions (struct tab_table *, int first_difference,
990 struct table_entry *);
991 static void display_crosstabulation (void);
992 static void display_chisq (void);
993 static void display_symmetric (void);
994 static void display_risk (void);
995 static void display_directional (void);
996 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
997 static void table_value_missing (struct tab_table *table, int c, int r,
998 unsigned char opt, const union value *v,
999 const struct variable *var);
1000 static void delete_missing (void);
1002 /* Output pivot table beginning at PB and continuing until PE,
1003 exclusive. For efficiency, *MATP is a pointer to a matrix that can
1004 hold *MAXROWS entries. */
1006 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
1007 double **matp, double **row_totp, double **col_totp,
1008 int *maxrows, int *maxcols, int *maxcells)
1011 struct table_entry **tb = pb, **te; /* Table begin, table end. */
1012 int tc = pe - pb; /* Table count. */
1014 /* Table entry for header comparison. */
1015 struct table_entry *cmp = NULL;
1017 x = xtab[(*pb)->table];
1018 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
1020 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
1022 /* Crosstabulation table initialization. */
1025 table = tab_create (nvar + n_cols,
1026 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1027 tab_headers (table, nvar - 1, 0, 2, 0);
1029 /* First header line. */
1030 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1031 TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
1033 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1035 /* Second header line. */
1039 for (i = 2; i < nvar; i++)
1040 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1041 TAB_RIGHT | TAT_TITLE, var_to_string (x->vars[i]));
1042 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1043 var_get_name (x->vars[ROW_VAR]));
1044 for (i = 0; i < n_cols; i++)
1045 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1047 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1050 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1051 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1055 char *title = local_alloc (x->nvar * 64 + 128);
1059 if (cmd.pivot == CRS_PIVOT)
1060 for (i = 0; i < nvar; i++)
1063 cp = stpcpy (cp, " by ");
1064 cp = stpcpy (cp, var_get_name (x->vars[i]));
1068 cp = spprintf (cp, "%s by %s for",
1069 var_get_name (x->vars[0]),
1070 var_get_name (x->vars[1]));
1071 for (i = 2; i < nvar; i++)
1073 char buf[64], *bufp;
1078 cp = stpcpy (cp, var_get_name (x->vars[i]));
1080 format_short (buf, var_get_print_format (x->vars[i]),
1082 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1084 cp = stpcpy (cp, bufp);
1088 cp = stpcpy (cp, " [");
1089 for (i = 0; i < num_cells; i++)
1097 static const struct tuple cell_names[] =
1099 {CRS_CL_COUNT, N_("count")},
1100 {CRS_CL_ROW, N_("row %")},
1101 {CRS_CL_COLUMN, N_("column %")},
1102 {CRS_CL_TOTAL, N_("total %")},
1103 {CRS_CL_EXPECTED, N_("expected")},
1104 {CRS_CL_RESIDUAL, N_("residual")},
1105 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1106 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1110 const struct tuple *t;
1112 for (t = cell_names; t->value != cells[i]; t++)
1113 assert (t->value != -1);
1115 cp = stpcpy (cp, ", ");
1116 cp = stpcpy (cp, gettext (t->name));
1120 tab_title (table, "%s", title);
1124 tab_offset (table, 0, 2);
1129 /* Chi-square table initialization. */
1130 if (cmd.a_statistics[CRS_ST_CHISQ])
1132 chisq = tab_create (6 + (nvar - 2),
1133 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1134 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1136 tab_title (chisq, _("Chi-square tests."));
1138 tab_offset (chisq, nvar - 2, 0);
1139 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1140 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1141 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1142 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1143 _("Asymp. Sig. (2-sided)"));
1144 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1145 _("Exact. Sig. (2-sided)"));
1146 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1147 _("Exact. Sig. (1-sided)"));
1149 tab_offset (chisq, 0, 1);
1154 /* Symmetric measures. */
1155 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1156 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1157 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1158 || cmd.a_statistics[CRS_ST_KAPPA])
1160 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1161 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1162 tab_title (sym, _("Symmetric measures."));
1164 tab_offset (sym, nvar - 2, 0);
1165 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1166 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1167 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1168 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1169 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1170 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1171 tab_offset (sym, 0, 1);
1176 /* Risk estimate. */
1177 if (cmd.a_statistics[CRS_ST_RISK])
1179 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1180 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1181 tab_title (risk, _("Risk estimate."));
1183 tab_offset (risk, nvar - 2, 0);
1184 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1185 _("95%% Confidence Interval"));
1186 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1187 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1188 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1189 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1190 tab_hline (risk, TAL_1, 2, 3, 1);
1191 tab_vline (risk, TAL_1, 2, 0, 1);
1192 tab_offset (risk, 0, 2);
1197 /* Directional measures. */
1198 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1199 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1201 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1202 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1203 tab_title (direct, _("Directional measures."));
1205 tab_offset (direct, nvar - 2, 0);
1206 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1207 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1208 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1209 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1210 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1211 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1212 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1213 tab_offset (direct, 0, 1);
1220 /* Find pivot subtable if applicable. */
1221 te = find_pivot_extent (tb, &tc, 0);
1225 /* Find all the row variable values. */
1226 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1228 /* Allocate memory space for the column and row totals. */
1229 if (n_rows > *maxrows)
1231 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1232 row_tot = *row_totp;
1235 if (n_cols > *maxcols)
1237 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1238 col_tot = *col_totp;
1242 /* Allocate table space for the matrix. */
1243 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1244 tab_realloc (table, -1,
1245 MAX (tab_nr (table) + (n_rows + 1) * num_cells,
1246 tab_nr (table) * (pe - pb) / (te - tb)));
1248 if (mode == GENERAL)
1250 /* Allocate memory space for the matrix. */
1251 if (n_cols * n_rows > *maxcells)
1253 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1254 *maxcells = n_cols * n_rows;
1259 /* Build the matrix and calculate column totals. */
1261 union value *cur_col = cols;
1262 union value *cur_row = rows;
1264 double *cp = col_tot;
1265 struct table_entry **p;
1268 for (p = &tb[0]; p < te; p++)
1270 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1274 for (; cur_row < &rows[n_rows]; cur_row++)
1280 mp = &mat[cur_col - cols];
1283 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1290 *cp += *mp = (*p)->u.freq;
1295 /* Zero out the rest of the matrix. */
1296 for (; cur_row < &rows[n_rows]; cur_row++)
1302 if (cur_col < &cols[n_cols])
1304 const int rem_cols = n_cols - (cur_col - cols);
1307 for (c = 0; c < rem_cols; c++)
1309 mp = &mat[cur_col - cols];
1310 for (r = 0; r < n_rows; r++)
1312 for (c = 0; c < rem_cols; c++)
1314 mp += n_cols - rem_cols;
1322 double *tp = col_tot;
1324 assert (mode == INTEGER);
1325 mat = (*tb)->u.data;
1328 /* Calculate column totals. */
1329 for (c = 0; c < n_cols; c++)
1332 double *cp = &mat[c];
1334 for (r = 0; r < n_rows; r++)
1335 cum += cp[r * n_cols];
1343 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1344 ns_cols += *cp != 0.;
1347 /* Calculate row totals. */
1350 double *rp = row_tot;
1353 for (ns_rows = 0, r = n_rows; r--; )
1356 for (c = n_cols; c--; )
1364 /* Calculate grand total. */
1370 if (n_rows < n_cols)
1371 tp = row_tot, n = n_rows;
1373 tp = col_tot, n = n_cols;
1379 /* Find the first variable that differs from the last subtable,
1380 then display the values of the dimensioning variables for
1381 each table that needs it. */
1383 int first_difference = nvar - 1;
1386 for (; ; first_difference--)
1388 assert (first_difference >= 2);
1389 if (memcmp (&cmp->values[first_difference],
1390 &(*tb)->values[first_difference],
1391 sizeof *cmp->values))
1397 display_dimensions (table, first_difference, *tb);
1399 display_dimensions (chisq, first_difference, *tb);
1401 display_dimensions (sym, first_difference, *tb);
1403 display_dimensions (risk, first_difference, *tb);
1405 display_dimensions (direct, first_difference, *tb);
1409 display_crosstabulation ();
1410 if (cmd.miss == CRS_REPORT)
1415 display_symmetric ();
1419 display_directional ();
1430 tab_resize (chisq, 4 + (nvar - 2), -1);
1441 /* Delete missing rows and columns for statistical analysis when
1444 delete_missing (void)
1449 for (r = 0; r < n_rows; r++)
1450 if (var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1454 for (c = 0; c < n_cols; c++)
1455 mat[c + r * n_cols] = 0.;
1463 for (c = 0; c < n_cols; c++)
1464 if (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1468 for (r = 0; r < n_rows; r++)
1469 mat[c + r * n_cols] = 0.;
1475 /* Prepare table T for submission, and submit it. */
1477 submit (struct tab_table *t)
1484 tab_resize (t, -1, 0);
1485 if (tab_nr (t) == tab_t (t))
1490 tab_offset (t, 0, 0);
1492 for (i = 2; i < nvar; i++)
1493 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1494 var_to_string (x->vars[i]));
1495 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1496 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1498 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1500 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1501 tab_dim (t, crosstabs_dim);
1505 /* Sets the widths of all the columns and heights of all the rows in
1506 table T for driver D. */
1508 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1512 /* Width of a numerical column. */
1513 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1514 if (cmd.miss == CRS_REPORT)
1515 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1517 /* Set width for header columns. */
1523 w = d->width - c * (t->nc - t->l);
1524 for (i = 0; i <= t->nc; i++)
1528 if (w < d->prop_em_width * 8)
1529 w = d->prop_em_width * 8;
1531 if (w > d->prop_em_width * 15)
1532 w = d->prop_em_width * 15;
1534 for (i = 0; i < t->l; i++)
1538 for (i = t->l; i < t->nc; i++)
1541 for (i = 0; i < t->nr; i++)
1542 t->h[i] = tab_natural_height (t, d, i);
1545 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1546 int *cnt, int pivot);
1547 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1548 int *cnt, int pivot);
1550 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1552 static struct table_entry **
1553 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1555 return (mode == GENERAL
1556 ? find_pivot_extent_general (tp, cnt, pivot)
1557 : find_pivot_extent_integer (tp, cnt, pivot));
1560 /* Find the extent of a region in TP that contains one table. If
1561 PIVOT != 0 that means a set of table entries with identical table
1562 number; otherwise they also have to have the same values for every
1563 dimension after the row and column dimensions. The table that is
1564 searched starts at TP and has length CNT. Returns the first entry
1565 after the last one in the table; sets *CNT to the number of
1566 remaining values. If there are no entries in TP at all, returns
1567 NULL. A yucky interface, admittedly, but it works. */
1568 static struct table_entry **
1569 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1571 struct table_entry *fp = *tp;
1576 x = xtab[(*tp)->table];
1584 if ((*tp)->table != fp->table)
1589 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1596 /* Integer mode correspondent to find_pivot_extent_general(). This
1597 could be optimized somewhat, but I just don't give a crap about
1598 CROSSTABS performance in integer mode, which is just a
1599 CROSSTABS wart as far as I'm concerned.
1601 That said, feel free to send optimization patches to me. */
1602 static struct table_entry **
1603 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1605 struct table_entry *fp = *tp;
1610 x = xtab[(*tp)->table];
1618 if ((*tp)->table != fp->table)
1623 if (memcmp (&(*tp)->values[2], &fp->values[2],
1624 sizeof (union value) * (x->nvar - 2)))
1631 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1632 result. WIDTH_ points to an int which is either 0 for a
1633 numeric value or a string width for a string value. */
1635 compare_value (const void *a_, const void *b_, const void *width_)
1637 const union value *a = a_;
1638 const union value *b = b_;
1639 const int *pwidth = width_;
1640 const int width = *pwidth;
1643 return (a->f < b->f) ? -1 : (a->f > b->f);
1645 return strncmp (a->s, b->s, width);
1648 /* Given an array of ENTRY_CNT table_entry structures starting at
1649 ENTRIES, creates a sorted list of the values that the variable
1650 with index VAR_IDX takes on. The values are returned as a
1651 malloc()'darray stored in *VALUES, with the number of values
1652 stored in *VALUE_CNT.
1655 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1656 union value **values, int *value_cnt)
1658 const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1660 if (mode == GENERAL)
1662 int width = var_get_width (v);
1665 *values = xnmalloc (entry_cnt, sizeof **values);
1666 for (i = 0; i < entry_cnt; i++)
1667 (*values)[i] = entries[i]->values[var_idx];
1668 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1669 compare_value, &width);
1673 struct var_range *vr = get_var_range (v);
1676 assert (mode == INTEGER);
1677 *values = xnmalloc (vr->count, sizeof **values);
1678 for (i = 0; i < vr->count; i++)
1679 (*values)[i].f = i + vr->min;
1680 *value_cnt = vr->count;
1684 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1685 from V, displayed with print format spec from variable VAR. When
1686 in REPORT missing-value mode, missing values have an M appended. */
1688 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1689 const union value *v, const struct variable *var)
1692 const struct fmt_spec *print = var_get_print_format (var);
1694 const char *label = var_lookup_value_label (var, v);
1697 tab_text (table, c, r, TAB_LEFT, label);
1701 s.string = tab_alloc (table, print->w);
1702 format_short (s.string, print, v);
1703 s.length = strlen (s.string);
1704 if (cmd.miss == CRS_REPORT && var_is_num_missing (var, v->f, MV_USER))
1705 s.string[s.length++] = 'M';
1706 while (s.length && *s.string == ' ')
1711 tab_raw (table, c, r, opt, &s);
1714 /* Draws a line across TABLE at the current row to indicate the most
1715 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1716 that changed, and puts the values that changed into the table. TB
1717 and X must be the corresponding table_entry and crosstab,
1720 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1722 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1724 for (; first_difference >= 2; first_difference--)
1725 table_value_missing (table, nvar - first_difference - 1, 0,
1726 TAB_RIGHT, &tb->values[first_difference],
1727 x->vars[first_difference]);
1730 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1731 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1732 additionally suffixed with a letter `M'. */
1734 format_cell_entry (struct tab_table *table, int c, int r, double value,
1735 char suffix, bool mark_missing)
1737 const struct fmt_spec f = {FMT_F, 10, 1};
1742 s.string = tab_alloc (table, 16);
1744 data_out (&v, &f, s.string);
1745 while (*s.string == ' ')
1751 s.string[s.length++] = suffix;
1753 s.string[s.length++] = 'M';
1755 tab_raw (table, c, r, TAB_RIGHT, &s);
1758 /* Displays the crosstabulation table. */
1760 display_crosstabulation (void)
1765 for (r = 0; r < n_rows; r++)
1766 table_value_missing (table, nvar - 2, r * num_cells,
1767 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1769 tab_text (table, nvar - 2, n_rows * num_cells,
1770 TAB_LEFT, _("Total"));
1772 /* Put in the actual cells. */
1777 tab_offset (table, nvar - 1, -1);
1778 for (r = 0; r < n_rows; r++)
1781 tab_hline (table, TAL_1, -1, n_cols, 0);
1782 for (c = 0; c < n_cols; c++)
1784 bool mark_missing = false;
1785 double expected_value = row_tot[r] * col_tot[c] / W;
1786 if (cmd.miss == CRS_REPORT
1787 && (var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER)
1788 || var_is_num_missing (x->vars[ROW_VAR], rows[r].f,
1790 mark_missing = true;
1791 for (i = 0; i < num_cells; i++)
1802 v = *mp / row_tot[r] * 100.;
1806 v = *mp / col_tot[c] * 100.;
1813 case CRS_CL_EXPECTED:
1816 case CRS_CL_RESIDUAL:
1817 v = *mp - expected_value;
1819 case CRS_CL_SRESIDUAL:
1820 v = (*mp - expected_value) / sqrt (expected_value);
1822 case CRS_CL_ASRESIDUAL:
1823 v = ((*mp - expected_value)
1824 / sqrt (expected_value
1825 * (1. - row_tot[r] / W)
1826 * (1. - col_tot[c] / W)));
1832 format_cell_entry (table, c, i, v, suffix, mark_missing);
1838 tab_offset (table, -1, tab_row (table) + num_cells);
1846 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1847 for (r = 0; r < n_rows; r++)
1850 bool mark_missing = false;
1852 if (cmd.miss == CRS_REPORT
1853 && var_is_num_missing (x->vars[ROW_VAR], rows[r].f, MV_USER))
1854 mark_missing = true;
1856 for (i = 0; i < num_cells; i++)
1870 v = row_tot[r] / W * 100.;
1874 v = row_tot[r] / W * 100.;
1877 case CRS_CL_EXPECTED:
1878 case CRS_CL_RESIDUAL:
1879 case CRS_CL_SRESIDUAL:
1880 case CRS_CL_ASRESIDUAL:
1887 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1888 tab_next_row (table);
1893 /* Column totals, grand total. */
1899 tab_hline (table, TAL_1, -1, n_cols, 0);
1900 for (c = 0; c <= n_cols; c++)
1902 double ct = c < n_cols ? col_tot[c] : W;
1903 bool mark_missing = false;
1907 if (cmd.miss == CRS_REPORT && c < n_cols
1908 && var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
1909 mark_missing = true;
1911 for (i = 0; i < num_cells; i++)
1933 case CRS_CL_EXPECTED:
1934 case CRS_CL_RESIDUAL:
1935 case CRS_CL_SRESIDUAL:
1936 case CRS_CL_ASRESIDUAL:
1942 format_cell_entry (table, c, i, v, suffix, mark_missing);
1947 tab_offset (table, -1, tab_row (table) + last_row);
1950 tab_offset (table, 0, -1);
1953 static void calc_r (double *X, double *Y, double *, double *, double *);
1954 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1956 /* Display chi-square statistics. */
1958 display_chisq (void)
1960 static const char *chisq_stats[N_CHISQ] =
1962 N_("Pearson Chi-Square"),
1963 N_("Likelihood Ratio"),
1964 N_("Fisher's Exact Test"),
1965 N_("Continuity Correction"),
1966 N_("Linear-by-Linear Association"),
1968 double chisq_v[N_CHISQ];
1969 double fisher1, fisher2;
1975 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1977 tab_offset (chisq, nvar - 2, -1);
1979 for (i = 0; i < N_CHISQ; i++)
1981 if ((i != 2 && chisq_v[i] == SYSMIS)
1982 || (i == 2 && fisher1 == SYSMIS))
1986 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1989 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1990 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1991 tab_float (chisq, 3, 0, TAB_RIGHT,
1992 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1997 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1998 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
2000 tab_next_row (chisq);
2003 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2004 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
2005 tab_next_row (chisq);
2007 tab_offset (chisq, 0, -1);
2010 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
2011 double[N_SYMMETRIC]);
2013 /* Display symmetric measures. */
2015 display_symmetric (void)
2017 static const char *categories[] =
2019 N_("Nominal by Nominal"),
2020 N_("Ordinal by Ordinal"),
2021 N_("Interval by Interval"),
2022 N_("Measure of Agreement"),
2025 static const char *stats[N_SYMMETRIC] =
2029 N_("Contingency Coefficient"),
2030 N_("Kendall's tau-b"),
2031 N_("Kendall's tau-c"),
2033 N_("Spearman Correlation"),
2038 static const int stats_categories[N_SYMMETRIC] =
2040 0, 0, 0, 1, 1, 1, 1, 2, 3,
2044 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2047 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2050 tab_offset (sym, nvar - 2, -1);
2052 for (i = 0; i < N_SYMMETRIC; i++)
2054 if (sym_v[i] == SYSMIS)
2057 if (stats_categories[i] != last_cat)
2059 last_cat = stats_categories[i];
2060 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2063 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2064 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2065 if (sym_ase[i] != SYSMIS)
2066 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2067 if (sym_t[i] != SYSMIS)
2068 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2069 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2073 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2074 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2077 tab_offset (sym, 0, -1);
2080 static int calc_risk (double[], double[], double[], union value *);
2082 /* Display risk estimate. */
2087 double risk_v[3], lower[3], upper[3];
2091 if (!calc_risk (risk_v, upper, lower, c))
2094 tab_offset (risk, nvar - 2, -1);
2096 for (i = 0; i < 3; i++)
2098 if (risk_v[i] == SYSMIS)
2104 if (var_is_numeric (x->vars[COL_VAR]))
2105 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2106 var_get_name (x->vars[COL_VAR]), c[0].f, c[1].f);
2108 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2109 var_get_name (x->vars[COL_VAR]),
2110 var_get_width (x->vars[COL_VAR]), c[0].s,
2111 var_get_width (x->vars[COL_VAR]), c[1].s);
2115 if (var_is_numeric (x->vars[ROW_VAR]))
2116 sprintf (buf, _("For cohort %s = %g"),
2117 var_get_name (x->vars[ROW_VAR]), rows[i - 1].f);
2119 sprintf (buf, _("For cohort %s = %.*s"),
2120 var_get_name (x->vars[ROW_VAR]),
2121 var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
2125 tab_text (risk, 0, 0, TAB_LEFT, buf);
2126 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2127 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2128 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2129 tab_next_row (risk);
2132 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2133 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2134 tab_next_row (risk);
2136 tab_offset (risk, 0, -1);
2139 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2140 double[N_DIRECTIONAL]);
2142 /* Display directional measures. */
2144 display_directional (void)
2146 static const char *categories[] =
2148 N_("Nominal by Nominal"),
2149 N_("Ordinal by Ordinal"),
2150 N_("Nominal by Interval"),
2153 static const char *stats[] =
2156 N_("Goodman and Kruskal tau"),
2157 N_("Uncertainty Coefficient"),
2162 static const char *types[] =
2169 static const int stats_categories[N_DIRECTIONAL] =
2171 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2174 static const int stats_stats[N_DIRECTIONAL] =
2176 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2179 static const int stats_types[N_DIRECTIONAL] =
2181 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2184 static const int *stats_lookup[] =
2191 static const char **stats_names[] =
2203 double direct_v[N_DIRECTIONAL];
2204 double direct_ase[N_DIRECTIONAL];
2205 double direct_t[N_DIRECTIONAL];
2209 if (!calc_directional (direct_v, direct_ase, direct_t))
2212 tab_offset (direct, nvar - 2, -1);
2214 for (i = 0; i < N_DIRECTIONAL; i++)
2216 if (direct_v[i] == SYSMIS)
2222 for (j = 0; j < 3; j++)
2223 if (last[j] != stats_lookup[j][i])
2226 tab_hline (direct, TAL_1, j, 6, 0);
2231 int k = last[j] = stats_lookup[j][i];
2236 string = var_get_name (x->vars[0]);
2238 string = var_get_name (x->vars[1]);
2240 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2241 gettext (stats_names[j][k]), string);
2246 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2247 if (direct_ase[i] != SYSMIS)
2248 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2249 if (direct_t[i] != SYSMIS)
2250 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2251 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2252 tab_next_row (direct);
2255 tab_offset (direct, 0, -1);
2258 /* Statistical calculations. */
2260 /* Returns the value of the gamma (factorial) function for an integer
2263 gamma_int (double x)
2268 for (i = 2; i < x; i++)
2273 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2275 static inline double
2276 Pr (int a, int b, int c, int d)
2278 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2279 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2280 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2281 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2282 / gamma_int (a + b + c + d + 1.));
2285 /* Swap the contents of A and B. */
2287 swap (int *a, int *b)
2294 /* Calculate significance for Fisher's exact test as specified in
2295 _SPSS Statistical Algorithms_, Appendix 5. */
2297 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2301 if (MIN (c, d) < MIN (a, b))
2302 swap (&a, &c), swap (&b, &d);
2303 if (MIN (b, d) < MIN (a, c))
2304 swap (&a, &b), swap (&c, &d);
2308 swap (&a, &b), swap (&c, &d);
2310 swap (&a, &c), swap (&b, &d);
2314 for (x = 0; x <= a; x++)
2315 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2317 *fisher2 = *fisher1;
2318 for (x = 1; x <= b; x++)
2319 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2322 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2323 columns with values COLS and N_ROWS rows with values ROWS. Values
2324 in the matrix sum to W. */
2326 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2327 double *fisher1, double *fisher2)
2331 chisq[0] = chisq[1] = 0.;
2332 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2333 *fisher1 = *fisher2 = SYSMIS;
2335 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2337 if (ns_rows <= 1 || ns_cols <= 1)
2339 chisq[0] = chisq[1] = SYSMIS;
2343 for (r = 0; r < n_rows; r++)
2344 for (c = 0; c < n_cols; c++)
2346 const double expected = row_tot[r] * col_tot[c] / W;
2347 const double freq = mat[n_cols * r + c];
2348 const double residual = freq - expected;
2350 chisq[0] += residual * residual / expected;
2352 chisq[1] += freq * log (expected / freq);
2363 /* Calculate Yates and Fisher exact test. */
2364 if (ns_cols == 2 && ns_rows == 2)
2366 double f11, f12, f21, f22;
2372 for (i = j = 0; i < n_cols; i++)
2373 if (col_tot[i] != 0.)
2382 f11 = mat[nz_cols[0]];
2383 f12 = mat[nz_cols[1]];
2384 f21 = mat[nz_cols[0] + n_cols];
2385 f22 = mat[nz_cols[1] + n_cols];
2390 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2393 chisq[3] = (W * x * x
2394 / (f11 + f12) / (f21 + f22)
2395 / (f11 + f21) / (f12 + f22));
2403 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2404 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2407 /* Calculate Mantel-Haenszel. */
2408 if (var_is_numeric (x->vars[ROW_VAR]) && var_is_numeric (x->vars[COL_VAR]))
2410 double r, ase_0, ase_1;
2411 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2413 chisq[4] = (W - 1.) * r * r;
2418 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2419 ASE_1, and ase_0 into ASE_0. The row and column values must be
2420 passed in X and Y. */
2422 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2424 double SX, SY, S, T;
2426 double sum_XYf, sum_X2Y2f;
2427 double sum_Xr, sum_X2r;
2428 double sum_Yc, sum_Y2c;
2431 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2432 for (j = 0; j < n_cols; j++)
2434 double fij = mat[j + i * n_cols];
2435 double product = X[i] * Y[j];
2436 double temp = fij * product;
2438 sum_X2Y2f += temp * product;
2441 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2443 sum_Xr += X[i] * row_tot[i];
2444 sum_X2r += X[i] * X[i] * row_tot[i];
2448 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2450 sum_Yc += Y[i] * col_tot[i];
2451 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2455 S = sum_XYf - sum_Xr * sum_Yc / W;
2456 SX = sum_X2r - sum_Xr * sum_Xr / W;
2457 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2460 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2465 for (s = c = 0., i = 0; i < n_rows; i++)
2466 for (j = 0; j < n_cols; j++)
2468 double Xresid, Yresid;
2471 Xresid = X[i] - Xbar;
2472 Yresid = Y[j] - Ybar;
2473 temp = (T * Xresid * Yresid
2475 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2476 y = mat[j + i * n_cols] * temp * temp - c;
2481 *ase_1 = sqrt (s) / (T * T);
2485 static double somers_d_v[3];
2486 static double somers_d_ase[3];
2487 static double somers_d_t[3];
2489 /* Calculate symmetric statistics and their asymptotic standard
2490 errors. Returns 0 if none could be calculated. */
2492 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2493 double t[N_SYMMETRIC])
2495 int q = MIN (ns_rows, ns_cols);
2504 for (i = 0; i < N_SYMMETRIC; i++)
2505 v[i] = ase[i] = t[i] = SYSMIS;
2508 /* Phi, Cramer's V, contingency coefficient. */
2509 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2511 double Xp = 0.; /* Pearson chi-square. */
2516 for (r = 0; r < n_rows; r++)
2517 for (c = 0; c < n_cols; c++)
2519 const double expected = row_tot[r] * col_tot[c] / W;
2520 const double freq = mat[n_cols * r + c];
2521 const double residual = freq - expected;
2523 Xp += residual * residual / expected;
2527 if (cmd.a_statistics[CRS_ST_PHI])
2529 v[0] = sqrt (Xp / W);
2530 v[1] = sqrt (Xp / (W * (q - 1)));
2532 if (cmd.a_statistics[CRS_ST_CC])
2533 v[2] = sqrt (Xp / (Xp + W));
2536 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2537 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2542 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2549 for (r = 0; r < n_rows; r++)
2550 Dr -= row_tot[r] * row_tot[r];
2551 for (c = 0; c < n_cols; c++)
2552 Dc -= col_tot[c] * col_tot[c];
2558 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2559 for (c = 0; c < n_cols; c++)
2563 for (r = 0; r < n_rows; r++)
2564 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2574 for (i = 0; i < n_rows; i++)
2578 for (j = 1; j < n_cols; j++)
2579 Cij += col_tot[j] - cum[j + i * n_cols];
2582 for (j = 1; j < n_cols; j++)
2583 Dij += cum[j + (i - 1) * n_cols];
2587 double fij = mat[j + i * n_cols];
2593 assert (j < n_cols);
2595 Cij -= col_tot[j] - cum[j + i * n_cols];
2596 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2600 Cij += cum[j - 1 + (i - 1) * n_cols];
2601 Dij -= cum[j + (i - 1) * n_cols];
2607 if (cmd.a_statistics[CRS_ST_BTAU])
2608 v[3] = (P - Q) / sqrt (Dr * Dc);
2609 if (cmd.a_statistics[CRS_ST_CTAU])
2610 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2611 if (cmd.a_statistics[CRS_ST_GAMMA])
2612 v[5] = (P - Q) / (P + Q);
2614 /* ASE for tau-b, tau-c, gamma. Calculations could be
2615 eliminated here, at expense of memory. */
2620 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2621 for (i = 0; i < n_rows; i++)
2625 for (j = 1; j < n_cols; j++)
2626 Cij += col_tot[j] - cum[j + i * n_cols];
2629 for (j = 1; j < n_cols; j++)
2630 Dij += cum[j + (i - 1) * n_cols];
2634 double fij = mat[j + i * n_cols];
2636 if (cmd.a_statistics[CRS_ST_BTAU])
2638 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2639 + v[3] * (row_tot[i] * Dc
2640 + col_tot[j] * Dr));
2641 btau_cum += fij * temp * temp;
2645 const double temp = Cij - Dij;
2646 ctau_cum += fij * temp * temp;
2649 if (cmd.a_statistics[CRS_ST_GAMMA])
2651 const double temp = Q * Cij - P * Dij;
2652 gamma_cum += fij * temp * temp;
2655 if (cmd.a_statistics[CRS_ST_D])
2657 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2658 - (P - Q) * (W - row_tot[i]));
2659 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2660 - (Q - P) * (W - col_tot[j]));
2665 assert (j < n_cols);
2667 Cij -= col_tot[j] - cum[j + i * n_cols];
2668 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2672 Cij += cum[j - 1 + (i - 1) * n_cols];
2673 Dij -= cum[j + (i - 1) * n_cols];
2679 btau_var = ((btau_cum
2680 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2682 if (cmd.a_statistics[CRS_ST_BTAU])
2684 ase[3] = sqrt (btau_var);
2685 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2688 if (cmd.a_statistics[CRS_ST_CTAU])
2690 ase[4] = ((2 * q / ((q - 1) * W * W))
2691 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2692 t[4] = v[4] / ase[4];
2694 if (cmd.a_statistics[CRS_ST_GAMMA])
2696 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2697 t[5] = v[5] / (2. / (P + Q)
2698 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2700 if (cmd.a_statistics[CRS_ST_D])
2702 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2703 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2704 somers_d_t[0] = (somers_d_v[0]
2706 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2707 somers_d_v[1] = (P - Q) / Dc;
2708 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2709 somers_d_t[1] = (somers_d_v[1]
2711 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2712 somers_d_v[2] = (P - Q) / Dr;
2713 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2714 somers_d_t[2] = (somers_d_v[2]
2716 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2722 /* Spearman correlation, Pearson's r. */
2723 if (cmd.a_statistics[CRS_ST_CORR])
2725 double *R = local_alloc (sizeof *R * n_rows);
2726 double *C = local_alloc (sizeof *C * n_cols);
2729 double y, t, c = 0., s = 0.;
2734 R[i] = s + (row_tot[i] + 1.) / 2.;
2741 assert (i < n_rows);
2746 double y, t, c = 0., s = 0.;
2751 C[j] = s + (col_tot[j] + 1.) / 2;
2758 assert (j < n_cols);
2762 calc_r (R, C, &v[6], &t[6], &ase[6]);
2768 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2772 /* Cohen's kappa. */
2773 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2775 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2778 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2779 i < ns_rows; i++, j++)
2783 while (col_tot[j] == 0.)
2786 prod = row_tot[i] * col_tot[j];
2787 sum = row_tot[i] + col_tot[j];
2789 sum_fii += mat[j + i * n_cols];
2791 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2792 sum_riciri_ci += prod * sum;
2794 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2795 for (j = 0; j < ns_cols; j++)
2797 double sum = row_tot[i] + col_tot[j];
2798 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2801 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2803 ase[8] = sqrt ((W * W * sum_rici
2804 + sum_rici * sum_rici
2805 - W * sum_riciri_ci)
2806 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2808 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2809 / pow2 (W * W - sum_rici))
2810 + ((2. * (W - sum_fii)
2811 * (2. * sum_fii * sum_rici
2812 - W * sum_fiiri_ci))
2813 / cube (W * W - sum_rici))
2814 + (pow2 (W - sum_fii)
2815 * (W * sum_fijri_ci2 - 4.
2816 * sum_rici * sum_rici)
2817 / pow4 (W * W - sum_rici))));
2819 t[8] = v[8] / ase[8];
2826 /* Calculate risk estimate. */
2828 calc_risk (double *value, double *upper, double *lower, union value *c)
2830 double f11, f12, f21, f22;
2836 for (i = 0; i < 3; i++)
2837 value[i] = upper[i] = lower[i] = SYSMIS;
2840 if (ns_rows != 2 || ns_cols != 2)
2847 for (i = j = 0; i < n_cols; i++)
2848 if (col_tot[i] != 0.)
2857 f11 = mat[nz_cols[0]];
2858 f12 = mat[nz_cols[1]];
2859 f21 = mat[nz_cols[0] + n_cols];
2860 f22 = mat[nz_cols[1] + n_cols];
2862 c[0] = cols[nz_cols[0]];
2863 c[1] = cols[nz_cols[1]];
2866 value[0] = (f11 * f22) / (f12 * f21);
2867 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2868 lower[0] = value[0] * exp (-1.960 * v);
2869 upper[0] = value[0] * exp (1.960 * v);
2871 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2872 v = sqrt ((f12 / (f11 * (f11 + f12)))
2873 + (f22 / (f21 * (f21 + f22))));
2874 lower[1] = value[1] * exp (-1.960 * v);
2875 upper[1] = value[1] * exp (1.960 * v);
2877 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2878 v = sqrt ((f11 / (f12 * (f11 + f12)))
2879 + (f21 / (f22 * (f21 + f22))));
2880 lower[2] = value[2] * exp (-1.960 * v);
2881 upper[2] = value[2] * exp (1.960 * v);
2886 /* Calculate directional measures. */
2888 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2889 double t[N_DIRECTIONAL])
2894 for (i = 0; i < N_DIRECTIONAL; i++)
2895 v[i] = ase[i] = t[i] = SYSMIS;
2899 if (cmd.a_statistics[CRS_ST_LAMBDA])
2901 double *fim = xnmalloc (n_rows, sizeof *fim);
2902 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2903 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2904 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2905 double sum_fim, sum_fmj;
2907 int rm_index, cm_index;
2910 /* Find maximum for each row and their sum. */
2911 for (sum_fim = 0., i = 0; i < n_rows; i++)
2913 double max = mat[i * n_cols];
2916 for (j = 1; j < n_cols; j++)
2917 if (mat[j + i * n_cols] > max)
2919 max = mat[j + i * n_cols];
2923 sum_fim += fim[i] = max;
2924 fim_index[i] = index;
2927 /* Find maximum for each column. */
2928 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2930 double max = mat[j];
2933 for (i = 1; i < n_rows; i++)
2934 if (mat[j + i * n_cols] > max)
2936 max = mat[j + i * n_cols];
2940 sum_fmj += fmj[j] = max;
2941 fmj_index[j] = index;
2944 /* Find maximum row total. */
2947 for (i = 1; i < n_rows; i++)
2948 if (row_tot[i] > rm)
2954 /* Find maximum column total. */
2957 for (j = 1; j < n_cols; j++)
2958 if (col_tot[j] > cm)
2964 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2965 v[1] = (sum_fmj - rm) / (W - rm);
2966 v[2] = (sum_fim - cm) / (W - cm);
2968 /* ASE1 for Y given X. */
2972 for (accum = 0., i = 0; i < n_rows; i++)
2973 for (j = 0; j < n_cols; j++)
2975 const int deltaj = j == cm_index;
2976 accum += (mat[j + i * n_cols]
2977 * pow2 ((j == fim_index[i])
2982 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2985 /* ASE0 for Y given X. */
2989 for (accum = 0., i = 0; i < n_rows; i++)
2990 if (cm_index != fim_index[i])
2991 accum += (mat[i * n_cols + fim_index[i]]
2992 + mat[i * n_cols + cm_index]);
2993 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2996 /* ASE1 for X given Y. */
3000 for (accum = 0., i = 0; i < n_rows; i++)
3001 for (j = 0; j < n_cols; j++)
3003 const int deltaj = i == rm_index;
3004 accum += (mat[j + i * n_cols]
3005 * pow2 ((i == fmj_index[j])
3010 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
3013 /* ASE0 for X given Y. */
3017 for (accum = 0., j = 0; j < n_cols; j++)
3018 if (rm_index != fmj_index[j])
3019 accum += (mat[j + n_cols * fmj_index[j]]
3020 + mat[j + n_cols * rm_index]);
3021 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3024 /* Symmetric ASE0 and ASE1. */
3029 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3030 for (j = 0; j < n_cols; j++)
3032 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3033 int temp1 = (i == rm_index) + (j == cm_index);
3034 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3035 accum1 += (mat[j + i * n_cols]
3036 * pow2 (temp0 + (v[0] - 1.) * temp1));
3038 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3039 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3040 / (2. * W - rm - cm));
3049 double sum_fij2_ri, sum_fij2_ci;
3050 double sum_ri2, sum_cj2;
3052 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3053 for (j = 0; j < n_cols; j++)
3055 double temp = pow2 (mat[j + i * n_cols]);
3056 sum_fij2_ri += temp / row_tot[i];
3057 sum_fij2_ci += temp / col_tot[j];
3060 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3061 sum_ri2 += row_tot[i] * row_tot[i];
3063 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3064 sum_cj2 += col_tot[j] * col_tot[j];
3066 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3067 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3071 if (cmd.a_statistics[CRS_ST_UC])
3073 double UX, UY, UXY, P;
3074 double ase1_yx, ase1_xy, ase1_sym;
3077 for (UX = 0., i = 0; i < n_rows; i++)
3078 if (row_tot[i] > 0.)
3079 UX -= row_tot[i] / W * log (row_tot[i] / W);
3081 for (UY = 0., j = 0; j < n_cols; j++)
3082 if (col_tot[j] > 0.)
3083 UY -= col_tot[j] / W * log (col_tot[j] / W);
3085 for (UXY = P = 0., i = 0; i < n_rows; i++)
3086 for (j = 0; j < n_cols; j++)
3088 double entry = mat[j + i * n_cols];
3093 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3094 UXY -= entry / W * log (entry / W);
3097 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3098 for (j = 0; j < n_cols; j++)
3100 double entry = mat[j + i * n_cols];
3105 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3106 + (UX - UXY) * log (col_tot[j] / W));
3107 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3108 + (UY - UXY) * log (row_tot[i] / W));
3109 ase1_sym += entry * pow2 ((UXY
3110 * log (row_tot[i] * col_tot[j] / (W * W)))
3111 - (UX + UY) * log (entry / W));
3114 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3115 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3116 t[5] = v[5] / ((2. / (W * (UX + UY)))
3117 * sqrt (P - pow2 (UX + UY - UXY) / W));
3119 v[6] = (UX + UY - UXY) / UX;
3120 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3121 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3123 v[7] = (UX + UY - UXY) / UY;
3124 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3125 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3129 if (cmd.a_statistics[CRS_ST_D])
3134 calc_symmetric (NULL, NULL, NULL);
3135 for (i = 0; i < 3; i++)
3137 v[8 + i] = somers_d_v[i];
3138 ase[8 + i] = somers_d_ase[i];
3139 t[8 + i] = somers_d_t[i];
3144 if (cmd.a_statistics[CRS_ST_ETA])
3147 double sum_Xr, sum_X2r;
3151 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3153 sum_Xr += rows[i].f * row_tot[i];
3154 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3156 SX = sum_X2r - sum_Xr * sum_Xr / W;
3158 for (SXW = 0., j = 0; j < n_cols; j++)
3162 for (cum = 0., i = 0; i < n_rows; i++)
3164 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3165 cum += rows[i].f * mat[j + i * n_cols];
3168 SXW -= cum * cum / col_tot[j];
3170 v[11] = sqrt (1. - SXW / SX);
3174 double sum_Yc, sum_Y2c;
3178 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3180 sum_Yc += cols[i].f * col_tot[i];
3181 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3183 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3185 for (SYW = 0., i = 0; i < n_rows; i++)
3189 for (cum = 0., j = 0; j < n_cols; j++)
3191 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3192 cum += cols[j].f * mat[j + i * n_cols];
3195 SYW -= cum * cum / row_tot[i];
3197 v[12] = sqrt (1. - SYW / SY);
3204 /* A wrapper around data_out() that limits string output to short
3205 string width and null terminates the result. */
3207 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3209 struct fmt_spec fmt_subst;
3211 /* Limit to short string width. */
3212 if (fmt_is_string (fp->type))
3216 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3217 if (fmt_subst.type == FMT_A)
3218 fmt_subst.w = MIN (8, fmt_subst.w);
3220 fmt_subst.w = MIN (16, fmt_subst.w);
3226 data_out (v, fp, s);
3228 /* Null terminate. */