/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
/* FIXME:
- - Pearson's R (but not Spearman!) is off a little.
- - T values for Spearman's R and Pearson's R are wrong.
- How to calculate significance of symmetric and directional measures?
- Asymmetric ASEs and T values for lambda are wrong.
- ASE of Goodman and Kruskal's tau is not calculated.
#include <config.h>
#include <ctype.h>
+#include <float.h>
#include <gsl/gsl_cdf.h>
#include <stdlib.h>
#include <stdio.h>
: MV_NEVER);
if (proc.mode == GENERAL && proc.exclude == MV_NEVER)
{
- msg (SE, _("Missing mode REPORT not allowed in general mode. "
- "Assuming MISSING=TABLE."));
+ msg (SE, _("Missing mode %s not allowed in general mode. "
+ "Assuming %s."), "REPORT", "MISSING=TABLE");
proc.exclude = MV_ANY;
}
struct crosstabs_proc *proc = proc_;
if (proc->n_pivots)
{
- msg (SE, _("VARIABLES must be specified before TABLES."));
+ msg (SE, _("%s must be specified before %s."), "VARIABLES", "TABLES");
return 0;
}
pt->missing = 0.0;
- /* Free only the members that were allocated in this
- function. The other pointer members are either both
- allocated and destroyed at a lower level (in
- output_pivot_table), or both allocated and destroyed at
- a higher level (in crs_custom_tables and free_proc,
+ /* Free the members that were allocated in this function(and the values
+ owned by the entries.
+
+ The other pointer members are either both allocated and destroyed at a
+ lower level (in output_pivot_table), or both allocated and destroyed
+ at a higher level (in crs_custom_tables and free_proc,
respectively). */
+ for (i = 0; i < pt->n_vars; i++)
+ {
+ int width = var_get_width (pt->vars[i]);
+ if (value_needs_init (width))
+ {
+ size_t j;
+
+ for (j = 0; j < pt->n_entries; j++)
+ value_destroy (&pt->entries[j]->values[i], width);
+ }
+ }
+
for (i = 0; i < pt->n_entries; i++)
free (pt->entries[i]);
free (pt->entries);
int i;
summary = tab_create (7, 3 + proc->n_pivots);
+ tab_set_format (summary, RC_WEIGHT, &proc->weight_format);
tab_title (summary, _("Summary."));
tab_headers (summary, 1, 0, 3, 0);
tab_joint_text (summary, 1, 0, 6, 0, TAB_CENTER, _("Cases"));
n[2] = n[0] + n[1];
for (i = 0; i < 3; i++)
{
- tab_double (summary, i * 2 + 1, 0, TAB_RIGHT, n[i],
- &proc->weight_format);
+ tab_double (summary, i * 2 + 1, 0, TAB_RIGHT, n[i], NULL, RC_WEIGHT);
tab_text_format (summary, i * 2 + 2, 0, TAB_RIGHT, "%.1f%%",
n[i] / n[2] * 100.);
}
static struct tab_table *create_crosstab_table (struct crosstabs_proc *,
struct pivot_table *);
-static struct tab_table *create_chisq_table (struct pivot_table *);
-static struct tab_table *create_sym_table (struct pivot_table *);
-static struct tab_table *create_risk_table (struct pivot_table *);
-static struct tab_table *create_direct_table (struct pivot_table *);
+static struct tab_table *create_chisq_table (struct crosstabs_proc *proc, struct pivot_table *);
+static struct tab_table *create_sym_table (struct crosstabs_proc *proc, struct pivot_table *);
+static struct tab_table *create_risk_table (struct crosstabs_proc *proc, struct pivot_table *);
+static struct tab_table *create_direct_table (struct crosstabs_proc *proc, struct pivot_table *);
static void display_dimensions (struct crosstabs_proc *, struct pivot_table *,
struct tab_table *, int first_difference);
static void display_crosstabulation (struct crosstabs_proc *,
ds_cstr (&vars));
ds_destroy (&vars);
+ free (pt->cols);
return;
}
if (proc->cells)
table = create_crosstab_table (proc, pt);
if (proc->statistics & (1u << CRS_ST_CHISQ))
- chisq = create_chisq_table (pt);
+ chisq = create_chisq_table (proc, pt);
if (proc->statistics & ((1u << CRS_ST_PHI) | (1u << CRS_ST_CC)
| (1u << CRS_ST_BTAU) | (1u << CRS_ST_CTAU)
| (1u << CRS_ST_GAMMA) | (1u << CRS_ST_CORR)
| (1u << CRS_ST_KAPPA)))
- sym = create_sym_table (pt);
+ sym = create_sym_table (proc, pt);
if (proc->statistics & (1u << CRS_ST_RISK))
- risk = create_risk_table (pt);
+ risk = create_risk_table (proc, pt);
if (proc->statistics & ((1u << CRS_ST_LAMBDA) | (1u << CRS_ST_UC)
| (1u << CRS_ST_D) | (1u << CRS_ST_ETA)))
- direct = create_direct_table (pt);
+ direct = create_direct_table (proc, pt);
row0 = row1 = 0;
while (find_crosstab (pt, &row0, &row1))
table = tab_create (x.n_consts + 1 + x.n_cols + 1,
(x.n_entries / x.n_cols) * 3 / 2 * proc->n_cells + 10);
tab_headers (table, x.n_consts + 1, 0, 2, 0);
+ tab_set_format (table, RC_WEIGHT, &proc->weight_format);
/* First header line. */
tab_joint_text (table, x.n_consts + 1, 0,
}
static struct tab_table *
-create_chisq_table (struct pivot_table *pt)
+create_chisq_table (struct crosstabs_proc *proc, struct pivot_table *pt)
{
struct tab_table *chisq;
chisq = tab_create (6 + (pt->n_vars - 2),
pt->n_entries / pt->n_cols * 3 / 2 * N_CHISQ + 10);
tab_headers (chisq, 1 + (pt->n_vars - 2), 0, 1, 0);
+ tab_set_format (chisq, RC_WEIGHT, &proc->weight_format);
tab_title (chisq, _("Chi-square tests."));
/* Symmetric measures. */
static struct tab_table *
-create_sym_table (struct pivot_table *pt)
+create_sym_table (struct crosstabs_proc *proc, struct pivot_table *pt)
{
struct tab_table *sym;
sym = tab_create (6 + (pt->n_vars - 2),
pt->n_entries / pt->n_cols * 7 + 10);
+
+ tab_set_format (sym, RC_WEIGHT, &proc->weight_format);
+
tab_headers (sym, 2 + (pt->n_vars - 2), 0, 1, 0);
tab_title (sym, _("Symmetric measures."));
/* Risk estimate. */
static struct tab_table *
-create_risk_table (struct pivot_table *pt)
+create_risk_table (struct crosstabs_proc *proc, struct pivot_table *pt)
{
struct tab_table *risk;
risk = tab_create (4 + (pt->n_vars - 2), pt->n_entries / pt->n_cols * 4 + 10);
tab_headers (risk, 1 + pt->n_vars - 2, 0, 2, 0);
tab_title (risk, _("Risk estimate."));
+ tab_set_format (risk, RC_WEIGHT, &proc->weight_format);
tab_offset (risk, pt->n_vars - 2, 0);
tab_joint_text_format (risk, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE,
/* Directional measures. */
static struct tab_table *
-create_direct_table (struct pivot_table *pt)
+create_direct_table (struct crosstabs_proc *proc, struct pivot_table *pt)
{
struct tab_table *direct;
pt->n_entries / pt->n_cols * 7 + 10);
tab_headers (direct, 3 + (pt->n_vars - 2), 0, 1, 0);
tab_title (direct, _("Directional measures."));
+ tab_set_format (direct, RC_WEIGHT, &proc->weight_format);
tab_offset (direct, pt->n_vars - 2, 0);
tab_text (direct, 0, 0, TAB_LEFT | TAT_TITLE, _("Category"));
with index VAR_IDX takes on. The values are returned as a
malloc()'d array stored in *VALUES, with the number of values
stored in *VALUE_CNT.
- */
+
+ The caller must eventually free *VALUES, but each pointer in *VALUES points
+ to existing data not owned by *VALUES itself. */
static void
enum_var_values (const struct pivot_table *pt, int var_idx,
union value **valuesp, int *n_values, bool descending)
tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
if (i != 2)
{
- tab_double (chisq, 1, 0, TAB_RIGHT, chisq_v[i], NULL);
- tab_double (chisq, 2, 0, TAB_RIGHT, df[i], &pt->weight_format);
+ tab_double (chisq, 1, 0, TAB_RIGHT, chisq_v[i], NULL, RC_OTHER);
+ tab_double (chisq, 2, 0, TAB_RIGHT, df[i], NULL, RC_WEIGHT);
tab_double (chisq, 3, 0, TAB_RIGHT,
- gsl_cdf_chisq_Q (chisq_v[i], df[i]), NULL);
+ gsl_cdf_chisq_Q (chisq_v[i], df[i]), NULL, RC_PVALUE);
}
else
{
*showed_fisher = true;
- tab_double (chisq, 4, 0, TAB_RIGHT, fisher2, NULL);
- tab_double (chisq, 5, 0, TAB_RIGHT, fisher1, NULL);
+ tab_double (chisq, 4, 0, TAB_RIGHT, fisher2, NULL, RC_PVALUE);
+ tab_double (chisq, 5, 0, TAB_RIGHT, fisher1, NULL, RC_PVALUE);
}
tab_next_row (chisq);
}
tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
- tab_double (chisq, 1, 0, TAB_RIGHT, pt->total, &pt->weight_format);
+ tab_double (chisq, 1, 0, TAB_RIGHT, pt->total, NULL, RC_WEIGHT);
tab_next_row (chisq);
tab_offset (chisq, 0, -1);
}
tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
- tab_double (sym, 2, 0, TAB_RIGHT, sym_v[i], NULL);
+ tab_double (sym, 2, 0, TAB_RIGHT, sym_v[i], NULL, RC_OTHER);
if (sym_ase[i] != SYSMIS)
- tab_double (sym, 3, 0, TAB_RIGHT, sym_ase[i], NULL);
+ tab_double (sym, 3, 0, TAB_RIGHT, sym_ase[i], NULL, RC_OTHER);
if (sym_t[i] != SYSMIS)
- tab_double (sym, 4, 0, TAB_RIGHT, sym_t[i], NULL);
- /*tab_double (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), NULL);*/
+ tab_double (sym, 4, 0, TAB_RIGHT, sym_t[i], NULL, RC_OTHER);
+ /*tab_double (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), NULL, RC_PVALUE);*/
tab_next_row (sym);
}
tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
- tab_double (sym, 2, 0, TAB_RIGHT, pt->total, &pt->weight_format);
+ tab_double (sym, 2, 0, TAB_RIGHT, pt->total, NULL, RC_WEIGHT);
tab_next_row (sym);
tab_offset (sym, 0, -1);
case 1:
case 2:
if (var_is_numeric (rv))
- sprintf (buf, _("For cohort %s = %g"),
- var_to_string (rv), pt->rows[i - 1].f);
+ sprintf (buf, _("For cohort %s = %.*g"),
+ var_to_string (rv), DBL_DIG + 1, pt->rows[i - 1].f);
else
sprintf (buf, _("For cohort %s = %.*s"),
var_to_string (rv),
}
tab_text (risk, 0, 0, TAB_LEFT, buf);
- tab_double (risk, 1, 0, TAB_RIGHT, risk_v[i], NULL);
- tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL);
- tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL);
+ tab_double (risk, 1, 0, TAB_RIGHT, risk_v[i], NULL, RC_OTHER);
+ tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL, RC_OTHER);
+ tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL, RC_OTHER);
tab_next_row (risk);
}
tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
- tab_double (risk, 1, 0, TAB_RIGHT, pt->total, &pt->weight_format);
+ tab_double (risk, 1, 0, TAB_RIGHT, pt->total, NULL, RC_WEIGHT);
tab_next_row (risk);
tab_offset (risk, 0, -1);
}
}
- tab_double (direct, 3, 0, TAB_RIGHT, direct_v[i], NULL);
+ tab_double (direct, 3, 0, TAB_RIGHT, direct_v[i], NULL, RC_OTHER);
if (direct_ase[i] != SYSMIS)
- tab_double (direct, 4, 0, TAB_RIGHT, direct_ase[i], NULL);
+ tab_double (direct, 4, 0, TAB_RIGHT, direct_ase[i], NULL, RC_OTHER);
if (direct_t[i] != SYSMIS)
- tab_double (direct, 5, 0, TAB_RIGHT, direct_t[i], NULL);
- /*tab_double (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), NULL);*/
+ tab_double (direct, 5, 0, TAB_RIGHT, direct_t[i], NULL, RC_OTHER);
+ /*tab_double (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), NULL, RC_PVALUE);*/
tab_next_row (direct);
}
\f
/* Statistical calculations. */
-/* Returns the value of the gamma (factorial) function for an integer
+/* Returns the value of the logarithm of gamma (factorial) function for an integer
argument PT. */
static double
-gamma_int (double pt)
+log_gamma_int (double pt)
{
- double r = 1;
+ double r = 0;
int i;
for (i = 2; i < pt; i++)
- r *= i;
+ r += log(i);
+
return r;
}
static inline double
Pr (int a, int b, int c, int d)
{
- return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
- * gamma_int (c + d + 1.) / gamma_int (b + 1.)
- * gamma_int (a + c + 1.) / gamma_int (c + 1.)
- * gamma_int (b + d + 1.) / gamma_int (d + 1.)
- / gamma_int (a + b + c + d + 1.));
+ return exp (log_gamma_int (a + b + 1.) - log_gamma_int (a + 1.)
+ + log_gamma_int (c + d + 1.) - log_gamma_int (b + 1.)
+ + log_gamma_int (a + c + 1.) - log_gamma_int (c + 1.)
+ + log_gamma_int (b + d + 1.) - log_gamma_int (d + 1.)
+ - log_gamma_int (a + b + c + d + 1.));
}
/* Swap the contents of A and B. */
calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
{
int pt;
+ double pn1;
if (MIN (c, d) < MIN (a, b))
swap (&a, &c), swap (&b, &d);
swap (&a, &c), swap (&b, &d);
}
- *fisher1 = 0.;
- for (pt = 0; pt <= a; pt++)
- *fisher1 += Pr (a - pt, b + pt, c + pt, d - pt);
+ pn1 = Pr (a, b, c, d);
+ *fisher1 = pn1;
+ for (pt = 1; pt <= a; pt++)
+ {
+ *fisher1 += Pr (a - pt, b + pt, c + pt, d - pt);
+ }
*fisher2 = *fisher1;
+
for (pt = 1; pt <= b; pt++)
- *fisher2 += Pr (a + pt, b - pt, c - pt, d + pt);
+ {
+ double p = Pr (a + pt, b - pt, c - pt, d + pt);
+ if (p < pn1)
+ *fisher2 += p;
+ }
}
/* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
}
/* Fisher. */
- if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
- calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
+ calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
}
/* Calculate Mantel-Haenszel. */
}
}
-/* Calculate the value of Pearson's r. r is stored into R, ase_1 into
- ASE_1, and ase_0 into ASE_0. The row and column values must be
+/* Calculate the value of Pearson's r. r is stored into R, its T value into
+ T, and standard error into ERROR. The row and column values must be
passed in PT and Y. */
static void
calc_r (struct pivot_table *pt,
- double *PT, double *Y, double *r, double *ase_0, double *ase_1)
+ double *PT, double *Y, double *r, double *t, double *error)
{
double SX, SY, S, T;
double Xbar, Ybar;
SY = sum_Y2c - pow2 (sum_Yc) / pt->total;
T = sqrt (SX * SY);
*r = S / T;
- *ase_0 = sqrt ((sum_X2Y2f - pow2 (sum_XYf) / pt->total) / (sum_X2r * sum_Y2c));
+ *t = *r / sqrt (1 - pow2 (*r)) * sqrt (pt->total - 2);
{
double s, c, y, t;
c = (t - s) - y;
s = t;
}
- *ase_1 = sqrt (s) / (T * T);
+ *error = sqrt (s) / (T * T);
}
}
}
calc_r (pt, R, C, &v[6], &t[6], &ase[6]);
- t[6] = v[6] / t[6];
free (R);
free (C);
calc_r (pt, (double *) pt->rows, (double *) pt->cols, &v[7], &t[7], &ase[7]);
- t[7] = v[7] / t[7];
}
/* Cohen's kappa. */