1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 - Pearson's R (but not Spearman!) is off a little.
23 - T values for Spearman's R and Pearson's R are wrong.
24 - How to calculate significance of symmetric and directional measures?
25 - Asymmetric ASEs and T values for lambda are wrong.
26 - ASE of Goodman and Kruskal's tau is not calculated.
27 - ASE of symmetric somers' d is wrong.
28 - Approx. T of uncertainty coefficient is wrong.
35 #include <gsl/gsl_cdf.h>
39 #include <data/case.h>
40 #include <data/dictionary.h>
41 #include <data/procedure.h>
42 #include <data/value-labels.h>
43 #include <data/variable.h>
44 #include <language/command.h>
45 #include <language/dictionary/split-file.h>
46 #include <language/lexer/lexer.h>
47 #include <language/lexer/variable-parser.h>
48 #include <libpspp/alloc.h>
49 #include <libpspp/array.h>
50 #include <libpspp/compiler.h>
51 #include <libpspp/hash.h>
52 #include <libpspp/magic.h>
53 #include <libpspp/message.h>
54 #include <libpspp/message.h>
55 #include <libpspp/misc.h>
56 #include <libpspp/pool.h>
57 #include <libpspp/str.h>
58 #include <output/output.h>
59 #include <output/table.h>
62 #define _(msgid) gettext (msgid)
63 #define N_(msgid) msgid
71 missing=miss:!table/include/report;
72 +write[wr_]=none,cells,all;
73 +format=fmt:!labels/nolabels/novallabs,
76 tabl:!tables/notables,
79 +cells[cl_]=count,none,expected,row,column,total,residual,sresidual,
81 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
87 /* Number of chi-square statistics. */
90 /* Number of symmetric statistics. */
93 /* Number of directional statistics. */
94 #define N_DIRECTIONAL 13
96 /* A single table entry for general mode. */
99 int table; /* Flattened table number. */
102 double freq; /* Frequency count. */
103 double *data; /* Crosstabulation table for integer mode. */
106 union value values[1]; /* Values. */
109 /* A crosstabulation. */
112 int nvar; /* Number of variables. */
113 double missing; /* Missing cases count. */
114 int ofs; /* Integer mode: Offset into sorted_tab[]. */
115 struct variable *vars[2]; /* At least two variables; sorted by
116 larger indices first. */
119 /* Integer mode variable info. */
122 int min; /* Minimum value. */
123 int max; /* Maximum value + 1. */
124 int count; /* max - min. */
127 static inline struct var_range *
128 get_var_range (struct variable *v)
131 assert (v->aux != NULL);
135 /* Indexes into crosstab.v. */
142 /* General mode crosstabulation table. */
143 static struct hsh_table *gen_tab; /* Hash table. */
144 static int n_sorted_tab; /* Number of entries in sorted_tab. */
145 static struct table_entry **sorted_tab; /* Sorted table. */
147 /* Variables specifies on VARIABLES. */
148 static struct variable **variables;
149 static size_t variables_cnt;
152 static struct crosstab **xtab;
155 /* Integer or general mode? */
164 static int num_cells; /* Number of cells requested. */
165 static int cells[8]; /* Cells requested. */
168 static int write; /* One of WR_* that specifies the WRITE style. */
170 /* Command parsing info. */
171 static struct cmd_crosstabs cmd;
174 static struct pool *pl_tc; /* For table cells. */
175 static struct pool *pl_col; /* For column data. */
177 static int internal_cmd_crosstabs (void);
178 static void precalc (const struct ccase *, void *);
179 static bool calc_general (const struct ccase *, void *);
180 static bool calc_integer (const struct ccase *, void *);
181 static void postcalc (void *);
182 static void submit (struct tab_table *);
184 static void format_short (char *s, const struct fmt_spec *fp,
185 const union value *v);
187 /* Parse and execute CROSSTABS, then clean up. */
191 int result = internal_cmd_crosstabs ();
194 pool_destroy (pl_tc);
195 pool_destroy (pl_col);
200 /* Parses and executes the CROSSTABS procedure. */
202 internal_cmd_crosstabs (void)
211 pl_tc = pool_create ();
212 pl_col = pool_create ();
214 if (!parse_crosstabs (&cmd, NULL))
217 mode = variables ? INTEGER : GENERAL;
222 cmd.a_cells[CRS_CL_COUNT] = 1;
228 for (i = 0; i < CRS_CL_count; i++)
233 cmd.a_cells[CRS_CL_COUNT] = 1;
234 cmd.a_cells[CRS_CL_ROW] = 1;
235 cmd.a_cells[CRS_CL_COLUMN] = 1;
236 cmd.a_cells[CRS_CL_TOTAL] = 1;
238 if (cmd.a_cells[CRS_CL_ALL])
240 for (i = 0; i < CRS_CL_count; i++)
242 cmd.a_cells[CRS_CL_ALL] = 0;
244 cmd.a_cells[CRS_CL_NONE] = 0;
246 for (num_cells = i = 0; i < CRS_CL_count; i++)
248 cells[num_cells++] = i;
251 if (cmd.sbc_statistics)
256 for (i = 0; i < CRS_ST_count; i++)
257 if (cmd.a_statistics[i])
260 cmd.a_statistics[CRS_ST_CHISQ] = 1;
261 if (cmd.a_statistics[CRS_ST_ALL])
262 for (i = 0; i < CRS_ST_count; i++)
263 cmd.a_statistics[i] = 1;
267 if (cmd.miss == CRS_REPORT && mode == GENERAL)
269 msg (SE, _("Missing mode REPORT not allowed in general mode. "
270 "Assuming MISSING=TABLE."));
271 cmd.miss = CRS_TABLE;
275 if (cmd.a_write[CRS_WR_ALL] && cmd.a_write[CRS_WR_CELLS])
276 cmd.a_write[CRS_WR_ALL] = 0;
277 if (cmd.a_write[CRS_WR_ALL] && mode == GENERAL)
279 msg (SE, _("Write mode ALL not allowed in general mode. "
280 "Assuming WRITE=CELLS."));
281 cmd.a_write[CRS_WR_CELLS] = 1;
284 && (cmd.a_write[CRS_WR_NONE]
285 + cmd.a_write[CRS_WR_ALL]
286 + cmd.a_write[CRS_WR_CELLS] == 0))
287 cmd.a_write[CRS_WR_CELLS] = 1;
288 if (cmd.a_write[CRS_WR_CELLS])
289 write = CRS_WR_CELLS;
290 else if (cmd.a_write[CRS_WR_ALL])
295 ok = procedure_with_splits (precalc,
296 mode == GENERAL ? calc_general : calc_integer,
299 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
302 /* Parses the TABLES subcommand. */
304 crs_custom_tables (struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
306 struct var_set *var_set;
308 struct variable ***by = NULL;
309 size_t *by_nvar = NULL;
313 /* Ensure that this is a TABLES subcommand. */
314 if (!lex_match_id ("TABLES")
315 && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
320 if (variables != NULL)
321 var_set = var_set_create_from_array (variables, variables_cnt);
323 var_set = var_set_create_from_dict (default_dict);
324 assert (var_set != NULL);
328 by = xnrealloc (by, n_by + 1, sizeof *by);
329 by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
330 if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by],
331 PV_NO_DUPLICATE | PV_NO_SCRATCH))
333 if (xalloc_oversized (nx, by_nvar[n_by]))
335 msg (SE, _("Too many crosstabulation variables or dimensions."));
341 if (!lex_match (T_BY))
345 lex_error (_("expecting BY"));
354 int *by_iter = xcalloc (n_by, sizeof *by_iter);
357 xtab = xnrealloc (xtab, nxtab + nx, sizeof *xtab);
358 for (i = 0; i < nx; i++)
362 x = xmalloc (sizeof *x + sizeof (struct variable *) * (n_by - 2));
369 for (i = 0; i < n_by; i++)
370 x->vars[i] = by[i][by_iter[i]];
376 for (i = n_by - 1; i >= 0; i--)
378 if (++by_iter[i] < by_nvar[i])
391 /* All return paths lead here. */
395 for (i = 0; i < n_by; i++)
401 var_set_destroy (var_set);
406 /* Parses the VARIABLES subcommand. */
408 crs_custom_variables (struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
412 msg (SE, _("VARIABLES must be specified before TABLES."));
420 size_t orig_nv = variables_cnt;
425 if (!parse_variables (default_dict, &variables, &variables_cnt,
426 (PV_APPEND | PV_NUMERIC
427 | PV_NO_DUPLICATE | PV_NO_SCRATCH)))
432 lex_error ("expecting `('");
437 if (!lex_force_int ())
439 min = lex_integer ();
444 if (!lex_force_int ())
446 max = lex_integer ();
449 msg (SE, _("Maximum value (%ld) less than minimum value (%ld)."),
457 lex_error ("expecting `)'");
462 for (i = orig_nv; i < variables_cnt; i++)
464 struct var_range *vr = xmalloc (sizeof *vr);
467 vr->count = max - min + 1;
468 var_attach_aux (variables[i], vr, var_dtor_free);
483 /* Data file processing. */
485 static int compare_table_entry (const void *, const void *, void *);
486 static unsigned hash_table_entry (const void *, void *);
488 /* Set up the crosstabulation tables for processing. */
490 precalc (const struct ccase *first, void *aux UNUSED)
492 output_split_file_values (first);
495 gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
505 for (i = 0; i < nxtab; i++)
507 struct crosstab *x = xtab[i];
512 x->ofs = n_sorted_tab;
514 for (j = 2; j < x->nvar; j++)
515 count *= get_var_range (x->vars[j - 2])->count;
517 sorted_tab = xnrealloc (sorted_tab,
518 n_sorted_tab + count, sizeof *sorted_tab);
519 v = local_alloc (sizeof *v * x->nvar);
520 for (j = 2; j < x->nvar; j++)
521 v[j] = get_var_range (x->vars[j])->min;
522 for (j = 0; j < count; j++)
524 struct table_entry *te;
527 te = sorted_tab[n_sorted_tab++]
528 = xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
532 int row_cnt = get_var_range (x->vars[0])->count;
533 int col_cnt = get_var_range (x->vars[1])->count;
534 const int mat_size = row_cnt * col_cnt;
537 te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
538 for (m = 0; m < mat_size; m++)
542 for (k = 2; k < x->nvar; k++)
543 te->values[k].f = v[k];
544 for (k = 2; k < x->nvar; k++)
546 struct var_range *vr = get_var_range (x->vars[k]);
547 if (++v[k] >= vr->max)
556 sorted_tab = xnrealloc (sorted_tab,
557 n_sorted_tab + 1, sizeof *sorted_tab);
558 sorted_tab[n_sorted_tab] = NULL;
562 /* Form crosstabulations for general mode. */
564 calc_general (const struct ccase *c, void *aux UNUSED)
569 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
571 /* Flattened current table index. */
574 for (t = 0; t < nxtab; t++)
576 struct crosstab *x = xtab[t];
577 const size_t entry_size = (sizeof (struct table_entry)
578 + sizeof (union value) * (x->nvar - 1));
579 struct table_entry *te = local_alloc (entry_size);
581 /* Construct table entry for the current record and table. */
587 for (j = 0; j < x->nvar; j++)
589 const union value *v = case_data (c, x->vars[j]->fv);
590 const struct missing_values *mv = &x->vars[j]->miss;
591 if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
592 || (cmd.miss == CRS_INCLUDE
593 && mv_is_value_system_missing (mv, v)))
595 x->missing += weight;
599 if (x->vars[j]->type == NUMERIC)
600 te->values[j].f = case_num (c, x->vars[j]->fv);
603 memcpy (te->values[j].s, case_str (c, x->vars[j]->fv),
606 /* Necessary in order to simplify comparisons. */
607 memset (&te->values[j].s[x->vars[j]->width], 0,
608 sizeof (union value) - x->vars[j]->width);
613 /* Add record to hash table. */
615 struct table_entry **tepp
616 = (struct table_entry **) hsh_probe (gen_tab, te);
619 struct table_entry *tep = pool_alloc (pl_tc, entry_size);
622 memcpy (tep, te, entry_size);
627 (*tepp)->u.freq += weight;
638 calc_integer (const struct ccase *c, void *aux UNUSED)
643 double weight = dict_get_case_weight (default_dict, c, &bad_warn);
645 /* Flattened current table index. */
648 for (t = 0; t < nxtab; t++)
650 struct crosstab *x = xtab[t];
655 for (i = 0; i < x->nvar; i++)
657 struct variable *const v = x->vars[i];
658 struct var_range *vr = get_var_range (v);
659 double value = case_num (c, v->fv);
661 /* Note that the first test also rules out SYSMIS. */
662 if ((value < vr->min || value >= vr->max)
663 || (cmd.miss == CRS_TABLE
664 && mv_is_num_user_missing (&v->miss, value)))
666 x->missing += weight;
672 ofs += fact * ((int) value - vr->min);
678 struct variable *row_var = x->vars[ROW_VAR];
679 const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min;
681 struct variable *col_var = x->vars[COL_VAR];
682 const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min;
684 const int col_dim = get_var_range (col_var)->count;
686 sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
695 /* Compare the table_entry's at A and B and return a strcmp()-type
698 compare_table_entry (const void *a_, const void *b_, void *foo UNUSED)
700 const struct table_entry *a = a_;
701 const struct table_entry *b = b_;
703 if (a->table > b->table)
705 else if (a->table < b->table)
709 const struct crosstab *x = xtab[a->table];
712 for (i = x->nvar - 1; i >= 0; i--)
713 if (x->vars[i]->type == NUMERIC)
715 const double diffnum = a->values[i].f - b->values[i].f;
718 else if (diffnum > 0)
723 assert (x->vars[i]->type == ALPHA);
725 const int diffstr = strncmp (a->values[i].s, b->values[i].s,
736 /* Calculate a hash value from table_entry A. */
738 hash_table_entry (const void *a_, void *foo UNUSED)
740 const struct table_entry *a = a_;
745 for (i = 0; i < xtab[a->table]->nvar; i++)
746 hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
751 /* Post-data reading calculations. */
753 static struct table_entry **find_pivot_extent (struct table_entry **,
754 int *cnt, int pivot);
755 static void enum_var_values (struct table_entry **entries, int entry_cnt,
757 union value **values, int *value_cnt);
758 static void output_pivot_table (struct table_entry **, struct table_entry **,
759 double **, double **, double **,
760 int *, int *, int *);
761 static void make_summary_table (void);
764 postcalc (void *aux UNUSED)
768 n_sorted_tab = hsh_count (gen_tab);
769 sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
772 make_summary_table ();
774 /* Identify all the individual crosstabulation tables, and deal with
777 struct table_entry **pb = sorted_tab, **pe; /* Pivot begin, pivot end. */
778 int pc = n_sorted_tab; /* Pivot count. */
780 double *mat = NULL, *row_tot = NULL, *col_tot = NULL;
781 int maxrows = 0, maxcols = 0, maxcells = 0;
785 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
789 output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
790 &maxrows, &maxcols, &maxcells);
799 hsh_destroy (gen_tab);
802 static void insert_summary (struct tab_table *, int tab_index, double valid);
804 /* Output a table summarizing the cases processed. */
806 make_summary_table (void)
808 struct tab_table *summary;
810 struct table_entry **pb = sorted_tab, **pe;
811 int pc = n_sorted_tab;
814 summary = tab_create (7, 3 + nxtab, 1);
815 tab_title (summary, _("Summary."));
816 tab_headers (summary, 1, 0, 3, 0);
817 tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
818 tab_joint_text (summary, 1, 1, 2, 1, TAB_CENTER, _("Valid"));
819 tab_joint_text (summary, 3, 1, 4, 1, TAB_CENTER, _("Missing"));
820 tab_joint_text (summary, 5, 1, 6, 1, TAB_CENTER, _("Total"));
821 tab_hline (summary, TAL_1, 1, 6, 1);
822 tab_hline (summary, TAL_1, 1, 6, 2);
823 tab_vline (summary, TAL_1, 3, 1, 1);
824 tab_vline (summary, TAL_1, 5, 1, 1);
828 for (i = 0; i < 3; i++)
830 tab_text (summary, 1 + i * 2, 2, TAB_RIGHT, _("N"));
831 tab_text (summary, 2 + i * 2, 2, TAB_RIGHT, _("Percent"));
834 tab_offset (summary, 0, 3);
840 pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
844 while (cur_tab < (*pb)->table)
845 insert_summary (summary, cur_tab++, 0.);
848 for (valid = 0.; pb < pe; pb++)
849 valid += (*pb)->u.freq;
852 const struct crosstab *const x = xtab[(*pb)->table];
853 const int n_cols = get_var_range (x->vars[COL_VAR])->count;
854 const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
855 const int count = n_cols * n_rows;
857 for (valid = 0.; pb < pe; pb++)
859 const double *data = (*pb)->u.data;
862 for (i = 0; i < count; i++)
866 insert_summary (summary, cur_tab++, valid);
871 while (cur_tab < nxtab)
872 insert_summary (summary, cur_tab++, 0.);
877 /* Inserts a line into T describing the crosstabulation at index
878 TAB_INDEX, which has VALID valid observations. */
880 insert_summary (struct tab_table *t, int tab_index, double valid)
882 struct crosstab *x = xtab[tab_index];
884 tab_hline (t, TAL_1, 0, 6, 0);
886 /* Crosstabulation name. */
888 char *buf = local_alloc (128 * x->nvar);
892 for (i = 0; i < x->nvar; i++)
895 cp = stpcpy (cp, " * ");
898 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
900 tab_text (t, 0, 0, TAB_LEFT, buf);
905 /* Counts and percentages. */
915 for (i = 0; i < 3; i++)
917 tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0);
918 tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%",
929 static struct tab_table *table; /* Crosstabulation table. */
930 static struct tab_table *chisq; /* Chi-square table. */
931 static struct tab_table *sym; /* Symmetric measures table. */
932 static struct tab_table *risk; /* Risk estimate table. */
933 static struct tab_table *direct; /* Directional measures table. */
936 static int chisq_fisher; /* Did any rows include Fisher's exact test? */
938 /* Column values, number of columns. */
939 static union value *cols;
942 /* Row values, number of rows. */
943 static union value *rows;
946 /* Number of statistically interesting columns/rows (columns/rows with
948 static int ns_cols, ns_rows;
950 /* Crosstabulation. */
951 static struct crosstab *x;
953 /* Number of variables from the crosstabulation to consider. This is
954 either x->nvar, if pivoting is on, or 2, if pivoting is off. */
957 /* Matrix contents. */
958 static double *mat; /* Matrix proper. */
959 static double *row_tot; /* Row totals. */
960 static double *col_tot; /* Column totals. */
961 static double W; /* Grand total. */
963 static void display_dimensions (struct tab_table *, int first_difference,
964 struct table_entry *);
965 static void display_crosstabulation (void);
966 static void display_chisq (void);
967 static void display_symmetric (void);
968 static void display_risk (void);
969 static void display_directional (void);
970 static void crosstabs_dim (struct tab_table *, struct outp_driver *);
971 static void table_value_missing (struct tab_table *table, int c, int r,
972 unsigned char opt, const union value *v,
973 const struct variable *var);
974 static void delete_missing (void);
976 /* Output pivot table beginning at PB and continuing until PE,
977 exclusive. For efficiency, *MATP is a pointer to a matrix that can
978 hold *MAXROWS entries. */
980 output_pivot_table (struct table_entry **pb, struct table_entry **pe,
981 double **matp, double **row_totp, double **col_totp,
982 int *maxrows, int *maxcols, int *maxcells)
985 struct table_entry **tb = pb, **te; /* Table begin, table end. */
986 int tc = pe - pb; /* Table count. */
988 /* Table entry for header comparison. */
989 struct table_entry *cmp = NULL;
991 x = xtab[(*pb)->table];
992 enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
994 nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
996 /* Crosstabulation table initialization. */
999 table = tab_create (nvar + n_cols,
1000 (pe - pb) / n_cols * 3 / 2 * num_cells + 10, 1);
1001 tab_headers (table, nvar - 1, 0, 2, 0);
1003 /* First header line. */
1004 tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
1005 TAB_CENTER | TAT_TITLE, x->vars[COL_VAR]->name);
1007 tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
1009 /* Second header line. */
1013 for (i = 2; i < nvar; i++)
1014 tab_joint_text (table, nvar - i - 1, 0, nvar - i - 1, 1,
1015 TAB_RIGHT | TAT_TITLE,
1017 ? x->vars[i]->label : x->vars[i]->name));
1018 tab_text (table, nvar - 2, 1, TAB_RIGHT | TAT_TITLE,
1019 x->vars[ROW_VAR]->name);
1020 for (i = 0; i < n_cols; i++)
1021 table_value_missing (table, nvar + i - 1, 1, TAB_RIGHT, &cols[i],
1023 tab_text (table, nvar + n_cols - 1, 1, TAB_CENTER, _("Total"));
1026 tab_hline (table, TAL_1, 0, nvar + n_cols - 1, 2);
1027 tab_vline (table, TAL_1, nvar + n_cols - 1, 0, 1);
1031 char *title = local_alloc (x->nvar * 64 + 128);
1035 if (cmd.pivot == CRS_PIVOT)
1036 for (i = 0; i < nvar; i++)
1039 cp = stpcpy (cp, " by ");
1040 cp = stpcpy (cp, x->vars[i]->name);
1044 cp = spprintf (cp, "%s by %s for",
1045 x->vars[0]->name, x->vars[1]->name);
1046 for (i = 2; i < nvar; i++)
1048 char buf[64], *bufp;
1053 cp = stpcpy (cp, x->vars[i]->name);
1055 format_short (buf, &x->vars[i]->print, &(*pb)->values[i]);
1056 for (bufp = buf; isspace ((unsigned char) *bufp); bufp++)
1058 cp = stpcpy (cp, bufp);
1062 cp = stpcpy (cp, " [");
1063 for (i = 0; i < num_cells; i++)
1071 static const struct tuple cell_names[] =
1073 {CRS_CL_COUNT, N_("count")},
1074 {CRS_CL_ROW, N_("row %")},
1075 {CRS_CL_COLUMN, N_("column %")},
1076 {CRS_CL_TOTAL, N_("total %")},
1077 {CRS_CL_EXPECTED, N_("expected")},
1078 {CRS_CL_RESIDUAL, N_("residual")},
1079 {CRS_CL_SRESIDUAL, N_("std. resid.")},
1080 {CRS_CL_ASRESIDUAL, N_("adj. resid.")},
1084 const struct tuple *t;
1086 for (t = cell_names; t->value != cells[i]; t++)
1087 assert (t->value != -1);
1089 cp = stpcpy (cp, ", ");
1090 cp = stpcpy (cp, gettext (t->name));
1094 tab_title (table, "%s", title);
1098 tab_offset (table, 0, 2);
1103 /* Chi-square table initialization. */
1104 if (cmd.a_statistics[CRS_ST_CHISQ])
1106 chisq = tab_create (6 + (nvar - 2),
1107 (pe - pb) / n_cols * 3 / 2 * N_CHISQ + 10, 1);
1108 tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
1110 tab_title (chisq, _("Chi-square tests."));
1112 tab_offset (chisq, nvar - 2, 0);
1113 tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1114 tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1115 tab_text (chisq, 2, 0, TAB_RIGHT | TAT_TITLE, _("df"));
1116 tab_text (chisq, 3, 0, TAB_RIGHT | TAT_TITLE,
1117 _("Asymp. Sig. (2-sided)"));
1118 tab_text (chisq, 4, 0, TAB_RIGHT | TAT_TITLE,
1119 _("Exact. Sig. (2-sided)"));
1120 tab_text (chisq, 5, 0, TAB_RIGHT | TAT_TITLE,
1121 _("Exact. Sig. (1-sided)"));
1123 tab_offset (chisq, 0, 1);
1128 /* Symmetric measures. */
1129 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
1130 || cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
1131 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_CORR]
1132 || cmd.a_statistics[CRS_ST_KAPPA])
1134 sym = tab_create (6 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1135 tab_headers (sym, 2 + (nvar - 2), 0, 1, 0);
1136 tab_title (sym, _("Symmetric measures."));
1138 tab_offset (sym, nvar - 2, 0);
1139 tab_text (sym, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1140 tab_text (sym, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1141 tab_text (sym, 2, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1142 tab_text (sym, 3, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1143 tab_text (sym, 4, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1144 tab_text (sym, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1145 tab_offset (sym, 0, 1);
1150 /* Risk estimate. */
1151 if (cmd.a_statistics[CRS_ST_RISK])
1153 risk = tab_create (4 + (nvar - 2), (pe - pb) / n_cols * 4 + 10, 1);
1154 tab_headers (risk, 1 + nvar - 2, 0, 2, 0);
1155 tab_title (risk, _("Risk estimate."));
1157 tab_offset (risk, nvar - 2, 0);
1158 tab_joint_text (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
1159 _("95%% Confidence Interval"));
1160 tab_text (risk, 0, 1, TAB_LEFT | TAT_TITLE, _("Statistic"));
1161 tab_text (risk, 1, 1, TAB_RIGHT | TAT_TITLE, _("Value"));
1162 tab_text (risk, 2, 1, TAB_RIGHT | TAT_TITLE, _("Lower"));
1163 tab_text (risk, 3, 1, TAB_RIGHT | TAT_TITLE, _("Upper"));
1164 tab_hline (risk, TAL_1, 2, 3, 1);
1165 tab_vline (risk, TAL_1, 2, 0, 1);
1166 tab_offset (risk, 0, 2);
1171 /* Directional measures. */
1172 if (cmd.a_statistics[CRS_ST_LAMBDA] || cmd.a_statistics[CRS_ST_UC]
1173 || cmd.a_statistics[CRS_ST_D] || cmd.a_statistics[CRS_ST_ETA])
1175 direct = tab_create (7 + (nvar - 2), (pe - pb) / n_cols * 7 + 10, 1);
1176 tab_headers (direct, 3 + (nvar - 2), 0, 1, 0);
1177 tab_title (direct, _("Directional measures."));
1179 tab_offset (direct, nvar - 2, 0);
1180 tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
1181 tab_text (direct, 1, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
1182 tab_text (direct, 2, 0, TAB_LEFT | TAT_TITLE, _("Type"));
1183 tab_text (direct, 3, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
1184 tab_text (direct, 4, 0, TAB_RIGHT | TAT_TITLE, _("Asymp. Std. Error"));
1185 tab_text (direct, 5, 0, TAB_RIGHT | TAT_TITLE, _("Approx. T"));
1186 tab_text (direct, 6, 0, TAB_RIGHT | TAT_TITLE, _("Approx. Sig."));
1187 tab_offset (direct, 0, 1);
1194 /* Find pivot subtable if applicable. */
1195 te = find_pivot_extent (tb, &tc, 0);
1199 /* Find all the row variable values. */
1200 enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
1202 /* Allocate memory space for the column and row totals. */
1203 if (n_rows > *maxrows)
1205 *row_totp = xnrealloc (*row_totp, n_rows, sizeof **row_totp);
1206 row_tot = *row_totp;
1209 if (n_cols > *maxcols)
1211 *col_totp = xnrealloc (*col_totp, n_cols, sizeof **col_totp);
1212 col_tot = *col_totp;
1216 /* Allocate table space for the matrix. */
1217 if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
1218 tab_realloc (table, -1,
1219 max (tab_nr (table) + (n_rows + 1) * num_cells,
1220 tab_nr (table) * (pe - pb) / (te - tb)));
1222 if (mode == GENERAL)
1224 /* Allocate memory space for the matrix. */
1225 if (n_cols * n_rows > *maxcells)
1227 *matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
1228 *maxcells = n_cols * n_rows;
1233 /* Build the matrix and calculate column totals. */
1235 union value *cur_col = cols;
1236 union value *cur_row = rows;
1238 double *cp = col_tot;
1239 struct table_entry **p;
1242 for (p = &tb[0]; p < te; p++)
1244 for (; memcmp (cur_col, &(*p)->values[COL_VAR], sizeof *cur_col);
1248 for (; cur_row < &rows[n_rows]; cur_row++)
1254 mp = &mat[cur_col - cols];
1257 for (; memcmp (cur_row, &(*p)->values[ROW_VAR], sizeof *cur_row);
1264 *cp += *mp = (*p)->u.freq;
1269 /* Zero out the rest of the matrix. */
1270 for (; cur_row < &rows[n_rows]; cur_row++)
1276 if (cur_col < &cols[n_cols])
1278 const int rem_cols = n_cols - (cur_col - cols);
1281 for (c = 0; c < rem_cols; c++)
1283 mp = &mat[cur_col - cols];
1284 for (r = 0; r < n_rows; r++)
1286 for (c = 0; c < rem_cols; c++)
1288 mp += n_cols - rem_cols;
1296 double *tp = col_tot;
1298 assert (mode == INTEGER);
1299 mat = (*tb)->u.data;
1302 /* Calculate column totals. */
1303 for (c = 0; c < n_cols; c++)
1306 double *cp = &mat[c];
1308 for (r = 0; r < n_rows; r++)
1309 cum += cp[r * n_cols];
1317 for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
1318 ns_cols += *cp != 0.;
1321 /* Calculate row totals. */
1324 double *rp = row_tot;
1327 for (ns_rows = 0, r = n_rows; r--; )
1330 for (c = n_cols; c--; )
1338 /* Calculate grand total. */
1344 if (n_rows < n_cols)
1345 tp = row_tot, n = n_rows;
1347 tp = col_tot, n = n_cols;
1353 /* Find the first variable that differs from the last subtable,
1354 then display the values of the dimensioning variables for
1355 each table that needs it. */
1357 int first_difference = nvar - 1;
1360 for (; ; first_difference--)
1362 assert (first_difference >= 2);
1363 if (memcmp (&cmp->values[first_difference],
1364 &(*tb)->values[first_difference],
1365 sizeof *cmp->values))
1371 display_dimensions (table, first_difference, *tb);
1373 display_dimensions (chisq, first_difference, *tb);
1375 display_dimensions (sym, first_difference, *tb);
1377 display_dimensions (risk, first_difference, *tb);
1379 display_dimensions (direct, first_difference, *tb);
1383 display_crosstabulation ();
1384 if (cmd.miss == CRS_REPORT)
1389 display_symmetric ();
1393 display_directional ();
1404 tab_resize (chisq, 4 + (nvar - 2), -1);
1415 /* Delete missing rows and columns for statistical analysis when
1418 delete_missing (void)
1423 for (r = 0; r < n_rows; r++)
1424 if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1428 for (c = 0; c < n_cols; c++)
1429 mat[c + r * n_cols] = 0.;
1437 for (c = 0; c < n_cols; c++)
1438 if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1442 for (r = 0; r < n_rows; r++)
1443 mat[c + r * n_cols] = 0.;
1449 /* Prepare table T for submission, and submit it. */
1451 submit (struct tab_table *t)
1458 tab_resize (t, -1, 0);
1459 if (tab_nr (t) == tab_t (t))
1464 tab_offset (t, 0, 0);
1466 for (i = 2; i < nvar; i++)
1467 tab_text (t, nvar - i - 1, 0, TAB_RIGHT | TAT_TITLE,
1468 x->vars[i]->label ? x->vars[i]->label : x->vars[i]->name);
1469 tab_box (t, TAL_2, TAL_2, -1, -1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1);
1470 tab_box (t, -1, -1, -1, TAL_1, tab_l (t), tab_t (t) - 1, tab_nc (t) - 1,
1472 tab_box (t, -1, -1, -1, TAL_GAP, 0, tab_t (t), tab_l (t) - 1,
1474 tab_vline (t, TAL_2, tab_l (t), 0, tab_nr (t) - 1);
1475 tab_dim (t, crosstabs_dim);
1479 /* Sets the widths of all the columns and heights of all the rows in
1480 table T for driver D. */
1482 crosstabs_dim (struct tab_table *t, struct outp_driver *d)
1486 /* Width of a numerical column. */
1487 int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
1488 if (cmd.miss == CRS_REPORT)
1489 c += outp_string_width (d, "M", OUTP_PROPORTIONAL);
1491 /* Set width for header columns. */
1497 w = d->width - c * (t->nc - t->l);
1498 for (i = 0; i <= t->nc; i++)
1502 if (w < d->prop_em_width * 8)
1503 w = d->prop_em_width * 8;
1505 if (w > d->prop_em_width * 15)
1506 w = d->prop_em_width * 15;
1508 for (i = 0; i < t->l; i++)
1512 for (i = t->l; i < t->nc; i++)
1515 for (i = 0; i < t->nr; i++)
1516 t->h[i] = tab_natural_height (t, d, i);
1519 static struct table_entry **find_pivot_extent_general (struct table_entry **tp,
1520 int *cnt, int pivot);
1521 static struct table_entry **find_pivot_extent_integer (struct table_entry **tp,
1522 int *cnt, int pivot);
1524 /* Calls find_pivot_extent_general or find_pivot_extent_integer, as
1526 static struct table_entry **
1527 find_pivot_extent (struct table_entry **tp, int *cnt, int pivot)
1529 return (mode == GENERAL
1530 ? find_pivot_extent_general (tp, cnt, pivot)
1531 : find_pivot_extent_integer (tp, cnt, pivot));
1534 /* Find the extent of a region in TP that contains one table. If
1535 PIVOT != 0 that means a set of table entries with identical table
1536 number; otherwise they also have to have the same values for every
1537 dimension after the row and column dimensions. The table that is
1538 searched starts at TP and has length CNT. Returns the first entry
1539 after the last one in the table; sets *CNT to the number of
1540 remaining values. If there are no entries in TP at all, returns
1541 NULL. A yucky interface, admittedly, but it works. */
1542 static struct table_entry **
1543 find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot)
1545 struct table_entry *fp = *tp;
1550 x = xtab[(*tp)->table];
1558 if ((*tp)->table != fp->table)
1563 if (memcmp (&(*tp)->values[2], &fp->values[2], sizeof (union value) * (x->nvar - 2)))
1570 /* Integer mode correspondent to find_pivot_extent_general(). This
1571 could be optimized somewhat, but I just don't give a crap about
1572 CROSSTABS performance in integer mode, which is just a
1573 CROSSTABS wart as far as I'm concerned.
1575 That said, feel free to send optimization patches to me. */
1576 static struct table_entry **
1577 find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot)
1579 struct table_entry *fp = *tp;
1584 x = xtab[(*tp)->table];
1592 if ((*tp)->table != fp->table)
1597 if (memcmp (&(*tp)->values[2], &fp->values[2],
1598 sizeof (union value) * (x->nvar - 2)))
1605 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1606 result. WIDTH_ points to an int which is either 0 for a
1607 numeric value or a string width for a string value. */
1609 compare_value (const void *a_, const void *b_, void *width_)
1611 const union value *a = a_;
1612 const union value *b = b_;
1613 const int *pwidth = width_;
1614 const int width = *pwidth;
1617 return (a->f < b->f) ? -1 : (a->f > b->f);
1619 return strncmp (a->s, b->s, width);
1622 /* Given an array of ENTRY_CNT table_entry structures starting at
1623 ENTRIES, creates a sorted list of the values that the variable
1624 with index VAR_IDX takes on. The values are returned as a
1625 malloc()'darray stored in *VALUES, with the number of values
1626 stored in *VALUE_CNT.
1629 enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
1630 union value **values, int *value_cnt)
1632 struct variable *v = xtab[(*entries)->table]->vars[var_idx];
1634 if (mode == GENERAL)
1636 int width = v->width;
1639 *values = xnmalloc (entry_cnt, sizeof **values);
1640 for (i = 0; i < entry_cnt; i++)
1641 (*values)[i] = entries[i]->values[var_idx];
1642 *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
1643 compare_value, &width);
1647 struct var_range *vr = get_var_range (v);
1650 assert (mode == INTEGER);
1651 *values = xnmalloc (vr->count, sizeof **values);
1652 for (i = 0; i < vr->count; i++)
1653 (*values)[i].f = i + vr->min;
1654 *value_cnt = vr->count;
1658 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1659 from V, displayed with print format spec from variable VAR. When
1660 in REPORT missing-value mode, missing values have an M appended. */
1662 table_value_missing (struct tab_table *table, int c, int r, unsigned char opt,
1663 const union value *v, const struct variable *var)
1667 const char *label = val_labs_find (var->val_labs, *v);
1670 tab_text (table, c, r, TAB_LEFT, label);
1674 s.string = tab_alloc (table, var->print.w);
1675 format_short (s.string, &var->print, v);
1676 s.length = strlen (s.string);
1677 if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
1678 s.string[s.length++] = 'M';
1679 while (s.length && *s.string == ' ')
1684 tab_raw (table, c, r, opt, &s);
1687 /* Draws a line across TABLE at the current row to indicate the most
1688 major dimension variable with index FIRST_DIFFERENCE out of NVAR
1689 that changed, and puts the values that changed into the table. TB
1690 and X must be the corresponding table_entry and crosstab,
1693 display_dimensions (struct tab_table *table, int first_difference, struct table_entry *tb)
1695 tab_hline (table, TAL_1, nvar - first_difference - 1, tab_nc (table) - 1, 0);
1697 for (; first_difference >= 2; first_difference--)
1698 table_value_missing (table, nvar - first_difference - 1, 0,
1699 TAB_RIGHT, &tb->values[first_difference],
1700 x->vars[first_difference]);
1703 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1704 SUFFIX if nonzero. If MARK_MISSING is nonzero the entry is
1705 additionally suffixed with a letter `M'. */
1707 format_cell_entry (struct tab_table *table, int c, int r, double value,
1708 char suffix, int mark_missing)
1710 const struct fmt_spec f = {FMT_F, 10, 1};
1715 s.string = tab_alloc (table, 16);
1717 data_out (s.string, &f, &v);
1718 while (*s.string == ' ')
1724 s.string[s.length++] = suffix;
1726 s.string[s.length++] = 'M';
1728 tab_raw (table, c, r, TAB_RIGHT, &s);
1731 /* Displays the crosstabulation table. */
1733 display_crosstabulation (void)
1738 for (r = 0; r < n_rows; r++)
1739 table_value_missing (table, nvar - 2, r * num_cells,
1740 TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
1742 tab_text (table, nvar - 2, n_rows * num_cells,
1743 TAB_LEFT, _("Total"));
1745 /* Put in the actual cells. */
1750 tab_offset (table, nvar - 1, -1);
1751 for (r = 0; r < n_rows; r++)
1754 tab_hline (table, TAL_1, -1, n_cols, 0);
1755 for (c = 0; c < n_cols; c++)
1757 int mark_missing = 0;
1758 double expected_value = row_tot[r] * col_tot[c] / W;
1759 if (cmd.miss == CRS_REPORT
1760 && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
1761 || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
1764 for (i = 0; i < num_cells; i++)
1775 v = *mp / row_tot[r] * 100.;
1779 v = *mp / col_tot[c] * 100.;
1786 case CRS_CL_EXPECTED:
1789 case CRS_CL_RESIDUAL:
1790 v = *mp - expected_value;
1792 case CRS_CL_SRESIDUAL:
1793 v = (*mp - expected_value) / sqrt (expected_value);
1795 case CRS_CL_ASRESIDUAL:
1796 v = ((*mp - expected_value)
1797 / sqrt (expected_value
1798 * (1. - row_tot[r] / W)
1799 * (1. - col_tot[c] / W)));
1806 format_cell_entry (table, c, i, v, suffix, mark_missing);
1812 tab_offset (table, -1, tab_row (table) + num_cells);
1820 tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
1821 for (r = 0; r < n_rows; r++)
1824 int mark_missing = 0;
1826 if (cmd.miss == CRS_REPORT
1827 && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
1830 for (i = 0; i < num_cells; i++)
1844 v = row_tot[r] / W * 100.;
1848 v = row_tot[r] / W * 100.;
1851 case CRS_CL_EXPECTED:
1852 case CRS_CL_RESIDUAL:
1853 case CRS_CL_SRESIDUAL:
1854 case CRS_CL_ASRESIDUAL:
1862 format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
1863 tab_next_row (table);
1868 /* Column totals, grand total. */
1874 tab_hline (table, TAL_1, -1, n_cols, 0);
1875 for (c = 0; c <= n_cols; c++)
1877 double ct = c < n_cols ? col_tot[c] : W;
1878 int mark_missing = 0;
1882 if (cmd.miss == CRS_REPORT && c < n_cols
1883 && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
1886 for (i = 0; i < num_cells; i++)
1908 case CRS_CL_EXPECTED:
1909 case CRS_CL_RESIDUAL:
1910 case CRS_CL_SRESIDUAL:
1911 case CRS_CL_ASRESIDUAL:
1918 format_cell_entry (table, c, i, v, suffix, mark_missing);
1923 tab_offset (table, -1, tab_row (table) + last_row);
1926 tab_offset (table, 0, -1);
1929 static void calc_r (double *X, double *Y, double *, double *, double *);
1930 static void calc_chisq (double[N_CHISQ], int[N_CHISQ], double *, double *);
1932 /* Display chi-square statistics. */
1934 display_chisq (void)
1936 static const char *chisq_stats[N_CHISQ] =
1938 N_("Pearson Chi-Square"),
1939 N_("Likelihood Ratio"),
1940 N_("Fisher's Exact Test"),
1941 N_("Continuity Correction"),
1942 N_("Linear-by-Linear Association"),
1944 double chisq_v[N_CHISQ];
1945 double fisher1, fisher2;
1951 calc_chisq (chisq_v, df, &fisher1, &fisher2);
1953 tab_offset (chisq, nvar - 2, -1);
1955 for (i = 0; i < N_CHISQ; i++)
1957 if ((i != 2 && chisq_v[i] == SYSMIS)
1958 || (i == 2 && fisher1 == SYSMIS))
1962 tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
1965 tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
1966 tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
1967 tab_float (chisq, 3, 0, TAB_RIGHT,
1968 gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
1973 tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3);
1974 tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3);
1976 tab_next_row (chisq);
1979 tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
1980 tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
1981 tab_next_row (chisq);
1983 tab_offset (chisq, 0, -1);
1986 static int calc_symmetric (double[N_SYMMETRIC], double[N_SYMMETRIC],
1987 double[N_SYMMETRIC]);
1989 /* Display symmetric measures. */
1991 display_symmetric (void)
1993 static const char *categories[] =
1995 N_("Nominal by Nominal"),
1996 N_("Ordinal by Ordinal"),
1997 N_("Interval by Interval"),
1998 N_("Measure of Agreement"),
2001 static const char *stats[N_SYMMETRIC] =
2005 N_("Contingency Coefficient"),
2006 N_("Kendall's tau-b"),
2007 N_("Kendall's tau-c"),
2009 N_("Spearman Correlation"),
2014 static const int stats_categories[N_SYMMETRIC] =
2016 0, 0, 0, 1, 1, 1, 1, 2, 3,
2020 double sym_v[N_SYMMETRIC], sym_ase[N_SYMMETRIC], sym_t[N_SYMMETRIC];
2023 if (!calc_symmetric (sym_v, sym_ase, sym_t))
2026 tab_offset (sym, nvar - 2, -1);
2028 for (i = 0; i < N_SYMMETRIC; i++)
2030 if (sym_v[i] == SYSMIS)
2033 if (stats_categories[i] != last_cat)
2035 last_cat = stats_categories[i];
2036 tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
2039 tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
2040 tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
2041 if (sym_ase[i] != SYSMIS)
2042 tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3);
2043 if (sym_t[i] != SYSMIS)
2044 tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3);
2045 /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/
2049 tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2050 tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
2053 tab_offset (sym, 0, -1);
2056 static int calc_risk (double[], double[], double[], union value *);
2058 /* Display risk estimate. */
2063 double risk_v[3], lower[3], upper[3];
2067 if (!calc_risk (risk_v, upper, lower, c))
2070 tab_offset (risk, nvar - 2, -1);
2072 for (i = 0; i < 3; i++)
2074 if (risk_v[i] == SYSMIS)
2080 if (x->vars[COL_VAR]->type == NUMERIC)
2081 sprintf (buf, _("Odds Ratio for %s (%g / %g)"),
2082 x->vars[COL_VAR]->name, c[0].f, c[1].f);
2084 sprintf (buf, _("Odds Ratio for %s (%.*s / %.*s)"),
2085 x->vars[COL_VAR]->name,
2086 x->vars[COL_VAR]->width, c[0].s,
2087 x->vars[COL_VAR]->width, c[1].s);
2091 if (x->vars[ROW_VAR]->type == NUMERIC)
2092 sprintf (buf, _("For cohort %s = %g"),
2093 x->vars[ROW_VAR]->name, rows[i - 1].f);
2095 sprintf (buf, _("For cohort %s = %.*s"),
2096 x->vars[ROW_VAR]->name,
2097 x->vars[ROW_VAR]->width, rows[i - 1].s);
2101 tab_text (risk, 0, 0, TAB_LEFT, buf);
2102 tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
2103 tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
2104 tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3);
2105 tab_next_row (risk);
2108 tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
2109 tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
2110 tab_next_row (risk);
2112 tab_offset (risk, 0, -1);
2115 static int calc_directional (double[N_DIRECTIONAL], double[N_DIRECTIONAL],
2116 double[N_DIRECTIONAL]);
2118 /* Display directional measures. */
2120 display_directional (void)
2122 static const char *categories[] =
2124 N_("Nominal by Nominal"),
2125 N_("Ordinal by Ordinal"),
2126 N_("Nominal by Interval"),
2129 static const char *stats[] =
2132 N_("Goodman and Kruskal tau"),
2133 N_("Uncertainty Coefficient"),
2138 static const char *types[] =
2145 static const int stats_categories[N_DIRECTIONAL] =
2147 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2150 static const int stats_stats[N_DIRECTIONAL] =
2152 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2155 static const int stats_types[N_DIRECTIONAL] =
2157 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2160 static const int *stats_lookup[] =
2167 static const char **stats_names[] =
2179 double direct_v[N_DIRECTIONAL];
2180 double direct_ase[N_DIRECTIONAL];
2181 double direct_t[N_DIRECTIONAL];
2185 if (!calc_directional (direct_v, direct_ase, direct_t))
2188 tab_offset (direct, nvar - 2, -1);
2190 for (i = 0; i < N_DIRECTIONAL; i++)
2192 if (direct_v[i] == SYSMIS)
2198 for (j = 0; j < 3; j++)
2199 if (last[j] != stats_lookup[j][i])
2202 tab_hline (direct, TAL_1, j, 6, 0);
2207 int k = last[j] = stats_lookup[j][i];
2212 string = x->vars[0]->name;
2214 string = x->vars[1]->name;
2216 tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
2217 gettext (stats_names[j][k]), string);
2222 tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
2223 if (direct_ase[i] != SYSMIS)
2224 tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
2225 if (direct_t[i] != SYSMIS)
2226 tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3);
2227 /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/
2228 tab_next_row (direct);
2231 tab_offset (direct, 0, -1);
2234 /* Statistical calculations. */
2236 /* Returns the value of the gamma (factorial) function for an integer
2239 gamma_int (double x)
2244 for (i = 2; i < x; i++)
2249 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2251 static inline double
2252 Pr (int a, int b, int c, int d)
2254 return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
2255 * gamma_int (c + d + 1.) / gamma_int (b + 1.)
2256 * gamma_int (a + c + 1.) / gamma_int (c + 1.)
2257 * gamma_int (b + d + 1.) / gamma_int (d + 1.)
2258 / gamma_int (a + b + c + d + 1.));
2261 /* Swap the contents of A and B. */
2263 swap (int *a, int *b)
2270 /* Calculate significance for Fisher's exact test as specified in
2271 _SPSS Statistical Algorithms_, Appendix 5. */
2273 calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
2277 if (min (c, d) < min (a, b))
2278 swap (&a, &c), swap (&b, &d);
2279 if (min (b, d) < min (a, c))
2280 swap (&a, &b), swap (&c, &d);
2284 swap (&a, &b), swap (&c, &d);
2286 swap (&a, &c), swap (&b, &d);
2290 for (x = 0; x <= a; x++)
2291 *fisher1 += Pr (a - x, b + x, c + x, d - x);
2293 *fisher2 = *fisher1;
2294 for (x = 1; x <= b; x++)
2295 *fisher2 += Pr (a + x, b - x, c - x, d + x);
2298 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2299 columns with values COLS and N_ROWS rows with values ROWS. Values
2300 in the matrix sum to W. */
2302 calc_chisq (double chisq[N_CHISQ], int df[N_CHISQ],
2303 double *fisher1, double *fisher2)
2307 chisq[0] = chisq[1] = 0.;
2308 chisq[2] = chisq[3] = chisq[4] = SYSMIS;
2309 *fisher1 = *fisher2 = SYSMIS;
2311 df[0] = df[1] = (ns_cols - 1) * (ns_rows - 1);
2313 if (ns_rows <= 1 || ns_cols <= 1)
2315 chisq[0] = chisq[1] = SYSMIS;
2319 for (r = 0; r < n_rows; r++)
2320 for (c = 0; c < n_cols; c++)
2322 const double expected = row_tot[r] * col_tot[c] / W;
2323 const double freq = mat[n_cols * r + c];
2324 const double residual = freq - expected;
2326 chisq[0] += residual * residual / expected;
2328 chisq[1] += freq * log (expected / freq);
2339 /* Calculate Yates and Fisher exact test. */
2340 if (ns_cols == 2 && ns_rows == 2)
2342 double f11, f12, f21, f22;
2348 for (i = j = 0; i < n_cols; i++)
2349 if (col_tot[i] != 0.)
2358 f11 = mat[nz_cols[0]];
2359 f12 = mat[nz_cols[1]];
2360 f21 = mat[nz_cols[0] + n_cols];
2361 f22 = mat[nz_cols[1] + n_cols];
2366 const double x = fabs (f11 * f22 - f12 * f21) - 0.5 * W;
2369 chisq[3] = (W * x * x
2370 / (f11 + f12) / (f21 + f22)
2371 / (f11 + f21) / (f12 + f22));
2379 if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
2380 calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
2383 /* Calculate Mantel-Haenszel. */
2384 if (x->vars[ROW_VAR]->type == NUMERIC && x->vars[COL_VAR]->type == NUMERIC)
2386 double r, ase_0, ase_1;
2387 calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
2389 chisq[4] = (W - 1.) * r * r;
2394 /* Calculate the value of Pearson's r. r is stored into R, ase_1 into
2395 ASE_1, and ase_0 into ASE_0. The row and column values must be
2396 passed in X and Y. */
2398 calc_r (double *X, double *Y, double *r, double *ase_0, double *ase_1)
2400 double SX, SY, S, T;
2402 double sum_XYf, sum_X2Y2f;
2403 double sum_Xr, sum_X2r;
2404 double sum_Yc, sum_Y2c;
2407 for (sum_X2Y2f = sum_XYf = 0., i = 0; i < n_rows; i++)
2408 for (j = 0; j < n_cols; j++)
2410 double fij = mat[j + i * n_cols];
2411 double product = X[i] * Y[j];
2412 double temp = fij * product;
2414 sum_X2Y2f += temp * product;
2417 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
2419 sum_Xr += X[i] * row_tot[i];
2420 sum_X2r += X[i] * X[i] * row_tot[i];
2424 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
2426 sum_Yc += Y[i] * col_tot[i];
2427 sum_Y2c += Y[i] * Y[i] * col_tot[i];
2431 S = sum_XYf - sum_Xr * sum_Yc / W;
2432 SX = sum_X2r - sum_Xr * sum_Xr / W;
2433 SY = sum_Y2c - sum_Yc * sum_Yc / W;
2436 *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
2441 for (s = c = 0., i = 0; i < n_rows; i++)
2442 for (j = 0; j < n_cols; j++)
2444 double Xresid, Yresid;
2447 Xresid = X[i] - Xbar;
2448 Yresid = Y[j] - Ybar;
2449 temp = (T * Xresid * Yresid
2451 * (Xresid * Xresid * SY + Yresid * Yresid * SX)));
2452 y = mat[j + i * n_cols] * temp * temp - c;
2457 *ase_1 = sqrt (s) / (T * T);
2461 static double somers_d_v[3];
2462 static double somers_d_ase[3];
2463 static double somers_d_t[3];
2465 /* Calculate symmetric statistics and their asymptotic standard
2466 errors. Returns 0 if none could be calculated. */
2468 calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC],
2469 double t[N_SYMMETRIC])
2471 int q = min (ns_rows, ns_cols);
2480 for (i = 0; i < N_SYMMETRIC; i++)
2481 v[i] = ase[i] = t[i] = SYSMIS;
2484 /* Phi, Cramer's V, contingency coefficient. */
2485 if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC])
2487 double Xp = 0.; /* Pearson chi-square. */
2492 for (r = 0; r < n_rows; r++)
2493 for (c = 0; c < n_cols; c++)
2495 const double expected = row_tot[r] * col_tot[c] / W;
2496 const double freq = mat[n_cols * r + c];
2497 const double residual = freq - expected;
2499 Xp += residual * residual / expected;
2503 if (cmd.a_statistics[CRS_ST_PHI])
2505 v[0] = sqrt (Xp / W);
2506 v[1] = sqrt (Xp / (W * (q - 1)));
2508 if (cmd.a_statistics[CRS_ST_CC])
2509 v[2] = sqrt (Xp / (Xp + W));
2512 if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
2513 || cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
2518 double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
2525 for (r = 0; r < n_rows; r++)
2526 Dr -= row_tot[r] * row_tot[r];
2527 for (c = 0; c < n_cols; c++)
2528 Dc -= col_tot[c] * col_tot[c];
2534 cum = xnmalloc (n_cols * n_rows, sizeof *cum);
2535 for (c = 0; c < n_cols; c++)
2539 for (r = 0; r < n_rows; r++)
2540 cum[c + r * n_cols] = ct += mat[c + r * n_cols];
2550 for (i = 0; i < n_rows; i++)
2554 for (j = 1; j < n_cols; j++)
2555 Cij += col_tot[j] - cum[j + i * n_cols];
2558 for (j = 1; j < n_cols; j++)
2559 Dij += cum[j + (i - 1) * n_cols];
2563 double fij = mat[j + i * n_cols];
2569 assert (j < n_cols);
2571 Cij -= col_tot[j] - cum[j + i * n_cols];
2572 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2576 Cij += cum[j - 1 + (i - 1) * n_cols];
2577 Dij -= cum[j + (i - 1) * n_cols];
2583 if (cmd.a_statistics[CRS_ST_BTAU])
2584 v[3] = (P - Q) / sqrt (Dr * Dc);
2585 if (cmd.a_statistics[CRS_ST_CTAU])
2586 v[4] = (q * (P - Q)) / ((W * W) * (q - 1));
2587 if (cmd.a_statistics[CRS_ST_GAMMA])
2588 v[5] = (P - Q) / (P + Q);
2590 /* ASE for tau-b, tau-c, gamma. Calculations could be
2591 eliminated here, at expense of memory. */
2596 btau_cum = ctau_cum = gamma_cum = d_yx_cum = d_xy_cum = 0.;
2597 for (i = 0; i < n_rows; i++)
2601 for (j = 1; j < n_cols; j++)
2602 Cij += col_tot[j] - cum[j + i * n_cols];
2605 for (j = 1; j < n_cols; j++)
2606 Dij += cum[j + (i - 1) * n_cols];
2610 double fij = mat[j + i * n_cols];
2612 if (cmd.a_statistics[CRS_ST_BTAU])
2614 const double temp = (2. * sqrt (Dr * Dc) * (Cij - Dij)
2615 + v[3] * (row_tot[i] * Dc
2616 + col_tot[j] * Dr));
2617 btau_cum += fij * temp * temp;
2621 const double temp = Cij - Dij;
2622 ctau_cum += fij * temp * temp;
2625 if (cmd.a_statistics[CRS_ST_GAMMA])
2627 const double temp = Q * Cij - P * Dij;
2628 gamma_cum += fij * temp * temp;
2631 if (cmd.a_statistics[CRS_ST_D])
2633 d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
2634 - (P - Q) * (W - row_tot[i]));
2635 d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
2636 - (Q - P) * (W - col_tot[j]));
2641 assert (j < n_cols);
2643 Cij -= col_tot[j] - cum[j + i * n_cols];
2644 Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
2648 Cij += cum[j - 1 + (i - 1) * n_cols];
2649 Dij -= cum[j + (i - 1) * n_cols];
2655 btau_var = ((btau_cum
2656 - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
2658 if (cmd.a_statistics[CRS_ST_BTAU])
2660 ase[3] = sqrt (btau_var);
2661 t[3] = v[3] / (2 * sqrt ((ctau_cum - (P - Q) * (P - Q) / W)
2664 if (cmd.a_statistics[CRS_ST_CTAU])
2666 ase[4] = ((2 * q / ((q - 1) * W * W))
2667 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2668 t[4] = v[4] / ase[4];
2670 if (cmd.a_statistics[CRS_ST_GAMMA])
2672 ase[5] = ((4. / ((P + Q) * (P + Q))) * sqrt (gamma_cum));
2673 t[5] = v[5] / (2. / (P + Q)
2674 * sqrt (ctau_cum - (P - Q) * (P - Q) / W));
2676 if (cmd.a_statistics[CRS_ST_D])
2678 somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
2679 somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
2680 somers_d_t[0] = (somers_d_v[0]
2682 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2683 somers_d_v[1] = (P - Q) / Dc;
2684 somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
2685 somers_d_t[1] = (somers_d_v[1]
2687 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2688 somers_d_v[2] = (P - Q) / Dr;
2689 somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
2690 somers_d_t[2] = (somers_d_v[2]
2692 * sqrt (ctau_cum - pow2 (P - Q) / W)));
2698 /* Spearman correlation, Pearson's r. */
2699 if (cmd.a_statistics[CRS_ST_CORR])
2701 double *R = local_alloc (sizeof *R * n_rows);
2702 double *C = local_alloc (sizeof *C * n_cols);
2705 double y, t, c = 0., s = 0.;
2710 R[i] = s + (row_tot[i] + 1.) / 2.;
2717 assert (i < n_rows);
2722 double y, t, c = 0., s = 0.;
2727 C[j] = s + (col_tot[j] + 1.) / 2;
2734 assert (j < n_cols);
2738 calc_r (R, C, &v[6], &t[6], &ase[6]);
2744 calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
2748 /* Cohen's kappa. */
2749 if (cmd.a_statistics[CRS_ST_KAPPA] && ns_rows == ns_cols)
2751 double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
2754 for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
2755 i < ns_rows; i++, j++)
2759 while (col_tot[j] == 0.)
2762 prod = row_tot[i] * col_tot[j];
2763 sum = row_tot[i] + col_tot[j];
2765 sum_fii += mat[j + i * n_cols];
2767 sum_fiiri_ci += mat[j + i * n_cols] * sum;
2768 sum_riciri_ci += prod * sum;
2770 for (sum_fijri_ci2 = 0., i = 0; i < ns_rows; i++)
2771 for (j = 0; j < ns_cols; j++)
2773 double sum = row_tot[i] + col_tot[j];
2774 sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
2777 v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
2779 ase[8] = sqrt ((W * W * sum_rici
2780 + sum_rici * sum_rici
2781 - W * sum_riciri_ci)
2782 / (W * (W * W - sum_rici) * (W * W - sum_rici)));
2784 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
2785 / pow2 (W * W - sum_rici))
2786 + ((2. * (W - sum_fii)
2787 * (2. * sum_fii * sum_rici
2788 - W * sum_fiiri_ci))
2789 / cube (W * W - sum_rici))
2790 + (pow2 (W - sum_fii)
2791 * (W * sum_fijri_ci2 - 4.
2792 * sum_rici * sum_rici)
2793 / pow4 (W * W - sum_rici))));
2795 t[8] = v[8] / ase[8];
2802 /* Calculate risk estimate. */
2804 calc_risk (double *value, double *upper, double *lower, union value *c)
2806 double f11, f12, f21, f22;
2812 for (i = 0; i < 3; i++)
2813 value[i] = upper[i] = lower[i] = SYSMIS;
2816 if (ns_rows != 2 || ns_cols != 2)
2823 for (i = j = 0; i < n_cols; i++)
2824 if (col_tot[i] != 0.)
2833 f11 = mat[nz_cols[0]];
2834 f12 = mat[nz_cols[1]];
2835 f21 = mat[nz_cols[0] + n_cols];
2836 f22 = mat[nz_cols[1] + n_cols];
2838 c[0] = cols[nz_cols[0]];
2839 c[1] = cols[nz_cols[1]];
2842 value[0] = (f11 * f22) / (f12 * f21);
2843 v = sqrt (1. / f11 + 1. / f12 + 1. / f21 + 1. / f22);
2844 lower[0] = value[0] * exp (-1.960 * v);
2845 upper[0] = value[0] * exp (1.960 * v);
2847 value[1] = (f11 * (f21 + f22)) / (f21 * (f11 + f12));
2848 v = sqrt ((f12 / (f11 * (f11 + f12)))
2849 + (f22 / (f21 * (f21 + f22))));
2850 lower[1] = value[1] * exp (-1.960 * v);
2851 upper[1] = value[1] * exp (1.960 * v);
2853 value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
2854 v = sqrt ((f11 / (f12 * (f11 + f12)))
2855 + (f21 / (f22 * (f21 + f22))));
2856 lower[2] = value[2] * exp (-1.960 * v);
2857 upper[2] = value[2] * exp (1.960 * v);
2862 /* Calculate directional measures. */
2864 calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
2865 double t[N_DIRECTIONAL])
2870 for (i = 0; i < N_DIRECTIONAL; i++)
2871 v[i] = ase[i] = t[i] = SYSMIS;
2875 if (cmd.a_statistics[CRS_ST_LAMBDA])
2877 double *fim = xnmalloc (n_rows, sizeof *fim);
2878 int *fim_index = xnmalloc (n_rows, sizeof *fim_index);
2879 double *fmj = xnmalloc (n_cols, sizeof *fmj);
2880 int *fmj_index = xnmalloc (n_cols, sizeof *fmj_index);
2881 double sum_fim, sum_fmj;
2883 int rm_index, cm_index;
2886 /* Find maximum for each row and their sum. */
2887 for (sum_fim = 0., i = 0; i < n_rows; i++)
2889 double max = mat[i * n_cols];
2892 for (j = 1; j < n_cols; j++)
2893 if (mat[j + i * n_cols] > max)
2895 max = mat[j + i * n_cols];
2899 sum_fim += fim[i] = max;
2900 fim_index[i] = index;
2903 /* Find maximum for each column. */
2904 for (sum_fmj = 0., j = 0; j < n_cols; j++)
2906 double max = mat[j];
2909 for (i = 1; i < n_rows; i++)
2910 if (mat[j + i * n_cols] > max)
2912 max = mat[j + i * n_cols];
2916 sum_fmj += fmj[j] = max;
2917 fmj_index[j] = index;
2920 /* Find maximum row total. */
2923 for (i = 1; i < n_rows; i++)
2924 if (row_tot[i] > rm)
2930 /* Find maximum column total. */
2933 for (j = 1; j < n_cols; j++)
2934 if (col_tot[j] > cm)
2940 v[0] = (sum_fim + sum_fmj - cm - rm) / (2. * W - rm - cm);
2941 v[1] = (sum_fmj - rm) / (W - rm);
2942 v[2] = (sum_fim - cm) / (W - cm);
2944 /* ASE1 for Y given X. */
2948 for (accum = 0., i = 0; i < n_rows; i++)
2949 for (j = 0; j < n_cols; j++)
2951 const int deltaj = j == cm_index;
2952 accum += (mat[j + i * n_cols]
2953 * pow2 ((j == fim_index[i])
2958 ase[2] = sqrt (accum - W * v[0]) / (W - cm);
2961 /* ASE0 for Y given X. */
2965 for (accum = 0., i = 0; i < n_rows; i++)
2966 if (cm_index != fim_index[i])
2967 accum += (mat[i * n_cols + fim_index[i]]
2968 + mat[i * n_cols + cm_index]);
2969 t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
2972 /* ASE1 for X given Y. */
2976 for (accum = 0., i = 0; i < n_rows; i++)
2977 for (j = 0; j < n_cols; j++)
2979 const int deltaj = i == rm_index;
2980 accum += (mat[j + i * n_cols]
2981 * pow2 ((i == fmj_index[j])
2986 ase[1] = sqrt (accum - W * v[0]) / (W - rm);
2989 /* ASE0 for X given Y. */
2993 for (accum = 0., j = 0; j < n_cols; j++)
2994 if (rm_index != fmj_index[j])
2995 accum += (mat[j + n_cols * fmj_index[j]]
2996 + mat[j + n_cols * rm_index]);
2997 t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
3000 /* Symmetric ASE0 and ASE1. */
3005 for (accum0 = accum1 = 0., i = 0; i < n_rows; i++)
3006 for (j = 0; j < n_cols; j++)
3008 int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
3009 int temp1 = (i == rm_index) + (j == cm_index);
3010 accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
3011 accum1 += (mat[j + i * n_cols]
3012 * pow2 (temp0 + (v[0] - 1.) * temp1));
3014 ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
3015 t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
3016 / (2. * W - rm - cm));
3025 double sum_fij2_ri, sum_fij2_ci;
3026 double sum_ri2, sum_cj2;
3028 for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
3029 for (j = 0; j < n_cols; j++)
3031 double temp = pow2 (mat[j + i * n_cols]);
3032 sum_fij2_ri += temp / row_tot[i];
3033 sum_fij2_ci += temp / col_tot[j];
3036 for (sum_ri2 = 0., i = 0; i < n_rows; i++)
3037 sum_ri2 += row_tot[i] * row_tot[i];
3039 for (sum_cj2 = 0., j = 0; j < n_cols; j++)
3040 sum_cj2 += col_tot[j] * col_tot[j];
3042 v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2);
3043 v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2);
3047 if (cmd.a_statistics[CRS_ST_UC])
3049 double UX, UY, UXY, P;
3050 double ase1_yx, ase1_xy, ase1_sym;
3053 for (UX = 0., i = 0; i < n_rows; i++)
3054 if (row_tot[i] > 0.)
3055 UX -= row_tot[i] / W * log (row_tot[i] / W);
3057 for (UY = 0., j = 0; j < n_cols; j++)
3058 if (col_tot[j] > 0.)
3059 UY -= col_tot[j] / W * log (col_tot[j] / W);
3061 for (UXY = P = 0., i = 0; i < n_rows; i++)
3062 for (j = 0; j < n_cols; j++)
3064 double entry = mat[j + i * n_cols];
3069 P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
3070 UXY -= entry / W * log (entry / W);
3073 for (ase1_yx = ase1_xy = ase1_sym = 0., i = 0; i < n_rows; i++)
3074 for (j = 0; j < n_cols; j++)
3076 double entry = mat[j + i * n_cols];
3081 ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
3082 + (UX - UXY) * log (col_tot[j] / W));
3083 ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
3084 + (UY - UXY) * log (row_tot[i] / W));
3085 ase1_sym += entry * pow2 ((UXY
3086 * log (row_tot[i] * col_tot[j] / (W * W)))
3087 - (UX + UY) * log (entry / W));
3090 v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
3091 ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
3092 t[5] = v[5] / ((2. / (W * (UX + UY)))
3093 * sqrt (P - pow2 (UX + UY - UXY) / W));
3095 v[6] = (UX + UY - UXY) / UX;
3096 ase[6] = sqrt (ase1_xy) / (W * UX * UX);
3097 t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
3099 v[7] = (UX + UY - UXY) / UY;
3100 ase[7] = sqrt (ase1_yx) / (W * UY * UY);
3101 t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
3105 if (cmd.a_statistics[CRS_ST_D])
3110 calc_symmetric (NULL, NULL, NULL);
3111 for (i = 0; i < 3; i++)
3113 v[8 + i] = somers_d_v[i];
3114 ase[8 + i] = somers_d_ase[i];
3115 t[8 + i] = somers_d_t[i];
3120 if (cmd.a_statistics[CRS_ST_ETA])
3123 double sum_Xr, sum_X2r;
3127 for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
3129 sum_Xr += rows[i].f * row_tot[i];
3130 sum_X2r += rows[i].f * rows[i].f * row_tot[i];
3132 SX = sum_X2r - sum_Xr * sum_Xr / W;
3134 for (SXW = 0., j = 0; j < n_cols; j++)
3138 for (cum = 0., i = 0; i < n_rows; i++)
3140 SXW += rows[i].f * rows[i].f * mat[j + i * n_cols];
3141 cum += rows[i].f * mat[j + i * n_cols];
3144 SXW -= cum * cum / col_tot[j];
3146 v[11] = sqrt (1. - SXW / SX);
3150 double sum_Yc, sum_Y2c;
3154 for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++)
3156 sum_Yc += cols[i].f * col_tot[i];
3157 sum_Y2c += cols[i].f * cols[i].f * col_tot[i];
3159 SY = sum_Y2c - sum_Yc * sum_Yc / W;
3161 for (SYW = 0., i = 0; i < n_rows; i++)
3165 for (cum = 0., j = 0; j < n_cols; j++)
3167 SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
3168 cum += cols[j].f * mat[j + i * n_cols];
3171 SYW -= cum * cum / row_tot[i];
3173 v[12] = sqrt (1. - SYW / SY);
3180 /* A wrapper around data_out() that limits string output to short
3181 string width and null terminates the result. */
3183 format_short (char *s, const struct fmt_spec *fp, const union value *v)
3185 struct fmt_spec fmt_subst;
3187 /* Limit to short string width. */
3188 if (formats[fp->type].cat & FCAT_STRING)
3192 assert (fmt_subst.type == FMT_A || fmt_subst.type == FMT_AHEX);
3193 if (fmt_subst.type == FMT_A)
3194 fmt_subst.w = min (8, fmt_subst.w);
3196 fmt_subst.w = min (16, fmt_subst.w);
3202 data_out (s, fp, v);
3204 /* Null terminate. */