/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
/* FIXME:
- - Pearson's R (but not Spearman!) is off a little.
- - T values for Spearman's R and Pearson's R are wrong.
- - How to calculate significance of symmetric and directional measures?
- - Asymmetric ASEs and T values for lambda are wrong.
- - ASE of Goodman and Kruskal's tau is not calculated.
- - ASE of symmetric somers' d is wrong.
- - Approx. T of uncertainty coefficient is wrong.
+ - How to calculate significance of some symmetric and directional measures?
+ - How to calculate ASE for symmetric Somers ' d?
+ - How to calculate ASE for Goodman and Kruskal's tau?
+ - How to calculate approx. T of symmetric uncertainty coefficient?
*/
#include <config.h>
#include <ctype.h>
+#include <float.h>
#include <gsl/gsl_cdf.h>
#include <stdlib.h>
#include <stdio.h>
: MV_NEVER);
if (proc.mode == GENERAL && proc.exclude == MV_NEVER)
{
- msg (SE, _("Missing mode REPORT not allowed in general mode. "
- "Assuming MISSING=TABLE."));
+ msg (SE, _("Missing mode %s not allowed in general mode. "
+ "Assuming %s."), "REPORT", "MISSING=TABLE");
proc.exclude = MV_ANY;
}
struct crosstabs_proc *proc = proc_;
if (proc->n_pivots)
{
- msg (SE, _("VARIABLES must be specified before TABLES."));
+ msg (SE, _("%s must be specified before %s."), "VARIABLES", "TABLES");
return 0;
}
int i;
summary = tab_create (7, 3 + proc->n_pivots);
+ tab_set_format (summary, RC_WEIGHT, &proc->weight_format);
tab_title (summary, _("Summary."));
tab_headers (summary, 1, 0, 3, 0);
tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
n[2] = n[0] + n[1];
for (i = 0; i < 3; i++)
{
- tab_double (summary, i * 2 + 1, 0, TAB_RIGHT, n[i],
- &proc->weight_format);
+ tab_double (summary, i * 2 + 1, 0, TAB_RIGHT, n[i], NULL, RC_WEIGHT);
tab_text_format (summary, i * 2 + 2, 0, TAB_RIGHT, "%.1f%%",
n[i] / n[2] * 100.);
}
static struct tab_table *create_crosstab_table (struct crosstabs_proc *,
struct pivot_table *);
-static struct tab_table *create_chisq_table (struct pivot_table *);
-static struct tab_table *create_sym_table (struct pivot_table *);
-static struct tab_table *create_risk_table (struct pivot_table *);
-static struct tab_table *create_direct_table (struct pivot_table *);
+static struct tab_table *create_chisq_table (struct crosstabs_proc *proc, struct pivot_table *);
+static struct tab_table *create_sym_table (struct crosstabs_proc *proc, struct pivot_table *);
+static struct tab_table *create_risk_table (struct crosstabs_proc *proc, struct pivot_table *);
+static struct tab_table *create_direct_table (struct crosstabs_proc *proc, struct pivot_table *);
static void display_dimensions (struct crosstabs_proc *, struct pivot_table *,
struct tab_table *, int first_difference);
static void display_crosstabulation (struct crosstabs_proc *,
if (proc->cells)
table = create_crosstab_table (proc, pt);
if (proc->statistics & (1u << CRS_ST_CHISQ))
- chisq = create_chisq_table (pt);
+ chisq = create_chisq_table (proc, pt);
if (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC)
| (1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU)
| (1u << CRS_ST_GAMMA) | (1u << CRS_ST_CORR)
| (1u << CRS_ST_KAPPA)))
- sym = create_sym_table (pt);
+ sym = create_sym_table (proc, pt);
if (proc->statistics & (1u << CRS_ST_RISK))
- risk = create_risk_table (pt);
+ risk = create_risk_table (proc, pt);
if (proc->statistics & ((1u << CRS_ST_LAMBDA) | (1u << CRS_ST_UC)
| (1u << CRS_ST_D) | (1u << CRS_ST_ETA)))
- direct = create_direct_table (pt);
+ direct = create_direct_table (proc, pt);
row0 = row1 = 0;
while (find_crosstab (pt, &row0, &row1))
table = tab_create (x.n_consts + 1 + x.n_cols + 1,
(x.n_entries / x.n_cols) * 3 / 2 * proc->n_cells + 10);
tab_headers (table, x.n_consts + 1, 0, 2, 0);
+ tab_set_format (table, RC_WEIGHT, &proc->weight_format);
/* First header line. */
tab_joint_text (table, x.n_consts + 1, 0,
}
static struct tab_table *
-create_chisq_table (struct pivot_table *pt)
+create_chisq_table (struct crosstabs_proc *proc, struct pivot_table *pt)
{
struct tab_table *chisq;
chisq = tab_create (6 + (pt->n_vars - 2),
pt->n_entries / pt->n_cols * 3 / 2 * N_CHISQ + 10);
tab_headers (chisq, 1 + (pt->n_vars - 2), 0, 1, 0);
+ tab_set_format (chisq, RC_WEIGHT, &proc->weight_format);
tab_title (chisq, _("Chi-square tests."));
/* Symmetric measures. */
static struct tab_table *
-create_sym_table (struct pivot_table *pt)
+create_sym_table (struct crosstabs_proc *proc, struct pivot_table *pt)
{
struct tab_table *sym;
sym = tab_create (6 + (pt->n_vars - 2),
pt->n_entries / pt->n_cols * 7 + 10);
+
+ tab_set_format (sym, RC_WEIGHT, &proc->weight_format);
+
tab_headers (sym, 2 + (pt->n_vars - 2), 0, 1, 0);
tab_title (sym, _("Symmetric measures."));
/* Risk estimate. */
static struct tab_table *
-create_risk_table (struct pivot_table *pt)
+create_risk_table (struct crosstabs_proc *proc, struct pivot_table *pt)
{
struct tab_table *risk;
risk = tab_create (4 + (pt->n_vars - 2), pt->n_entries / pt->n_cols * 4 + 10);
tab_headers (risk, 1 + pt->n_vars - 2, 0, 2, 0);
tab_title (risk, _("Risk estimate."));
+ tab_set_format (risk, RC_WEIGHT, &proc->weight_format);
tab_offset (risk, pt->n_vars - 2, 0);
tab_joint_text_format (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE,
/* Directional measures. */
static struct tab_table *
-create_direct_table (struct pivot_table *pt)
+create_direct_table (struct crosstabs_proc *proc, struct pivot_table *pt)
{
struct tab_table *direct;
pt->n_entries / pt->n_cols * 7 + 10);
tab_headers (direct, 3 + (pt->n_vars - 2), 0, 1, 0);
tab_title (direct, _("Directional measures."));
+ tab_set_format (direct, RC_WEIGHT, &proc->weight_format);
tab_offset (direct, pt->n_vars - 2, 0);
tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
if (i != 2)
{
- tab_double (chisq, 1, 0, TAB_RIGHT, chisq_v[i], NULL);
- tab_double (chisq, 2, 0, TAB_RIGHT, df[i], &pt->weight_format);
+ tab_double (chisq, 1, 0, TAB_RIGHT, chisq_v[i], NULL, RC_OTHER);
+ tab_double (chisq, 2, 0, TAB_RIGHT, df[i], NULL, RC_WEIGHT);
tab_double (chisq, 3, 0, TAB_RIGHT,
- gsl_cdf_chisq_Q (chisq_v[i], df[i]), NULL);
+ gsl_cdf_chisq_Q (chisq_v[i], df[i]), NULL, RC_PVALUE);
}
else
{
*showed_fisher = true;
- tab_double (chisq, 4, 0, TAB_RIGHT, fisher2, NULL);
- tab_double (chisq, 5, 0, TAB_RIGHT, fisher1, NULL);
+ tab_double (chisq, 4, 0, TAB_RIGHT, fisher2, NULL, RC_PVALUE);
+ tab_double (chisq, 5, 0, TAB_RIGHT, fisher1, NULL, RC_PVALUE);
}
tab_next_row (chisq);
}
tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
- tab_double (chisq, 1, 0, TAB_RIGHT, pt->total, &pt->weight_format);
+ tab_double (chisq, 1, 0, TAB_RIGHT, pt->total, NULL, RC_WEIGHT);
tab_next_row (chisq);
tab_offset (chisq, 0, -1);
}
tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
- tab_double (sym, 2, 0, TAB_RIGHT, sym_v[i], NULL);
+ tab_double (sym, 2, 0, TAB_RIGHT, sym_v[i], NULL, RC_OTHER);
if (sym_ase[i] != SYSMIS)
- tab_double (sym, 3, 0, TAB_RIGHT, sym_ase[i], NULL);
+ tab_double (sym, 3, 0, TAB_RIGHT, sym_ase[i], NULL, RC_OTHER);
if (sym_t[i] != SYSMIS)
- tab_double (sym, 4, 0, TAB_RIGHT, sym_t[i], NULL);
- /*tab_double (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), NULL);*/
+ tab_double (sym, 4, 0, TAB_RIGHT, sym_t[i], NULL, RC_OTHER);
+ /*tab_double (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), NULL, RC_PVALUE);*/
tab_next_row (sym);
}
tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
- tab_double (sym, 2, 0, TAB_RIGHT, pt->total, &pt->weight_format);
+ tab_double (sym, 2, 0, TAB_RIGHT, pt->total, NULL, RC_WEIGHT);
tab_next_row (sym);
tab_offset (sym, 0, -1);
case 1:
case 2:
if (var_is_numeric (rv))
- sprintf (buf, _("For cohort %s = %g"),
- var_to_string (rv), pt->rows[i - 1].f);
+ sprintf (buf, _("For cohort %s = %.*g"),
+ var_to_string (rv), DBL_DIG + 1, pt->rows[i - 1].f);
else
sprintf (buf, _("For cohort %s = %.*s"),
var_to_string (rv),
}
tab_text (risk, 0, 0, TAB_LEFT, buf);
- tab_double (risk, 1, 0, TAB_RIGHT, risk_v[i], NULL);
- tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL);
- tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL);
+ tab_double (risk, 1, 0, TAB_RIGHT, risk_v[i], NULL, RC_OTHER);
+ tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL, RC_OTHER);
+ tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL, RC_OTHER);
tab_next_row (risk);
}
tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
- tab_double (risk, 1, 0, TAB_RIGHT, pt->total, &pt->weight_format);
+ tab_double (risk, 1, 0, TAB_RIGHT, pt->total, NULL, RC_WEIGHT);
tab_next_row (risk);
tab_offset (risk, 0, -1);
static int calc_directional (struct crosstabs_proc *, struct pivot_table *,
double[N_DIRECTIONAL], double[N_DIRECTIONAL],
- double[N_DIRECTIONAL]);
+ double[N_DIRECTIONAL], double[N_DIRECTIONAL]);
/* Display directional measures. */
static void
double direct_v[N_DIRECTIONAL];
double direct_ase[N_DIRECTIONAL];
double direct_t[N_DIRECTIONAL];
+ double sig[N_DIRECTIONAL];
int i;
- if (!calc_directional (proc, pt, direct_v, direct_ase, direct_t))
+ if (!calc_directional (proc, pt, direct_v, direct_ase, direct_t, sig))
return;
tab_offset (direct, pt->n_consts + pt->n_vars - 2, -1);
}
}
- tab_double (direct, 3, 0, TAB_RIGHT, direct_v[i], NULL);
+ tab_double (direct, 3, 0, TAB_RIGHT, direct_v[i], NULL, RC_OTHER);
if (direct_ase[i] != SYSMIS)
- tab_double (direct, 4, 0, TAB_RIGHT, direct_ase[i], NULL);
+ tab_double (direct, 4, 0, TAB_RIGHT, direct_ase[i], NULL, RC_OTHER);
if (direct_t[i] != SYSMIS)
- tab_double (direct, 5, 0, TAB_RIGHT, direct_t[i], NULL);
- /*tab_double (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), NULL);*/
+ tab_double (direct, 5, 0, TAB_RIGHT, direct_t[i], NULL, RC_OTHER);
+ tab_double (direct, 6, 0, TAB_RIGHT, sig[i], NULL, RC_PVALUE);
tab_next_row (direct);
}
}
/* Fisher. */
- if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
- calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
+ calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
}
/* Calculate Mantel-Haenszel. */
}
}
-/* Calculate the value of Pearson's r. r is stored into R, ase_1 into
- ASE_1, and ase_0 into ASE_0. The row and column values must be
+/* Calculate the value of Pearson's r. r is stored into R, its T value into
+ T, and standard error into ERROR. The row and column values must be
passed in PT and Y. */
static void
calc_r (struct pivot_table *pt,
- double *PT, double *Y, double *r, double *ase_0, double *ase_1)
+ double *PT, double *Y, double *r, double *t, double *error)
{
double SX, SY, S, T;
double Xbar, Ybar;
SY = sum_Y2c - pow2 (sum_Yc) / pt->total;
T = sqrt (SX * SY);
*r = S / T;
- *ase_0 = sqrt ((sum_X2Y2f - pow2 (sum_XYf) / pt->total) / (sum_X2r * sum_Y2c));
+ *t = *r / sqrt (1 - pow2 (*r)) * sqrt (pt->total - 2);
{
double s, c, y, t;
c = (t - s) - y;
s = t;
}
- *ase_1 = sqrt (s) / (T * T);
+ *error = sqrt (s) / (T * T);
}
}
if (proc->statistics & (1u << CRS_ST_D))
{
somers_d_v[0] = (P - Q) / (.5 * (Dc + Dr));
- somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
+ somers_d_ase[0] = SYSMIS;
somers_d_t[0] = (somers_d_v[0]
/ (4 / (Dc + Dr)
* sqrt (ctau_cum - pow2 (P - Q) / pt->total)));
}
calc_r (pt, R, C, &v[6], &t[6], &ase[6]);
- t[6] = v[6] / t[6];
free (R);
free (C);
calc_r (pt, (double *) pt->rows, (double *) pt->cols, &v[7], &t[7], &ase[7]);
- t[7] = v[7] / t[7];
}
/* Cohen's kappa. */
static int
calc_directional (struct crosstabs_proc *proc, struct pivot_table *pt,
double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL],
- double t[N_DIRECTIONAL])
+ double t[N_DIRECTIONAL], double sig[N_DIRECTIONAL])
{
{
int i;
for (i = 0; i < N_DIRECTIONAL; i++)
- v[i] = ase[i] = t[i] = SYSMIS;
+ v[i] = ase[i] = t[i] = sig[i] = SYSMIS;
}
/* Lambda. */
/* ASE1 for Y given PT. */
{
- double accum;
+ double accum;
- for (accum = 0., i = 0; i < pt->n_rows; i++)
- for (j = 0; j < pt->n_cols; j++)
- {
- const int deltaj = j == cm_index;
- accum += (pt->mat[j + i * pt->n_cols]
- * pow2 ((j == fim_index[i])
- - deltaj
- + v[0] * deltaj));
- }
-
- ase[2] = sqrt (accum - pt->total * v[0]) / (pt->total - cm);
+ accum = 0.;
+ for (i = 0; i < pt->n_rows; i++)
+ if (cm_index == fim_index[i])
+ accum += fim[i];
+ ase[2] = sqrt ((pt->total - sum_fim) * (sum_fim + cm - 2. * accum)
+ / pow3 (pt->total - cm));
}
/* ASE0 for Y given PT. */
/* ASE1 for PT given Y. */
{
- double accum;
+ double accum;
- for (accum = 0., i = 0; i < pt->n_rows; i++)
- for (j = 0; j < pt->n_cols; j++)
- {
- const int deltaj = i == rm_index;
- accum += (pt->mat[j + i * pt->n_cols]
- * pow2 ((i == fmj_index[j])
- - deltaj
- + v[0] * deltaj));
- }
-
- ase[1] = sqrt (accum - pt->total * v[0]) / (pt->total - rm);
+ accum = 0.;
+ for (j = 0; j < pt->n_cols; j++)
+ if (rm_index == fmj_index[j])
+ accum += fmj[j];
+ ase[1] = sqrt ((pt->total - sum_fmj) * (sum_fmj + rm - 2. * accum)
+ / pow3 (pt->total - rm));
}
/* ASE0 for PT given Y. */
* pow2 (temp0 + (v[0] - 1.) * temp1));
}
ase[0] = sqrt (accum1 - 4. * pt->total * v[0] * v[0]) / (2. * pt->total - rm - cm);
- t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / pt->total))
+ t[0] = v[0] / (sqrt (accum0 - pow2 (sum_fim + sum_fmj - cm - rm) / pt->total)
/ (2. * pt->total - rm - cm));
}
+ for (i = 0; i < 3; i++)
+ sig[i] = 2 * gsl_cdf_ugaussian_Q (t[i]);
+
free (fim);
free (fim_index);
free (fmj);
free (fmj_index);
+ /* Tau. */
{
double sum_fij2_ri, sum_fij2_ci;
double sum_ri2, sum_cj2;
v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
ase[5] = (2. / (pt->total * pow2 (UX + UY))) * sqrt (ase1_sym);
- t[5] = v[5] / ((2. / (pt->total * (UX + UY)))
- * sqrt (P - pow2 (UX + UY - UXY) / pt->total));
+ t[5] = SYSMIS;
v[6] = (UX + UY - UXY) / UX;
ase[6] = sqrt (ase1_xy) / (pt->total * UX * UX);
v[8 + i] = somers_d_v[i];
ase[8 + i] = somers_d_ase[i];
t[8 + i] = somers_d_t[i];
+ sig[8 + i] = 2 * gsl_cdf_ugaussian_Q (fabs (somers_d_t[i]));
}
}
}