-/* PSPP - computes sample statistics.
+/* PSPP - a program for statistical analysis.
Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* FIXME:
#include <stdio.h>
#include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
#include <data/data-out.h>
#include <data/dictionary.h>
#include <data/format.h>
#include <language/dictionary/split-file.h>
#include <language/lexer/lexer.h>
#include <language/lexer/variable-parser.h>
-#include <libpspp/alloc.h>
#include <libpspp/array.h>
#include <libpspp/assertion.h>
#include <libpspp/compiler.h>
#include <libpspp/hash.h>
-#include <libpspp/magic.h>
-#include <libpspp/message.h>
#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/pool.h>
#include <output/table.h>
#include "minmax.h"
+#include "xalloc.h"
+#include "xmalloca.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
int nvar; /* Number of variables. */
double missing; /* Missing cases count. */
int ofs; /* Integer mode: Offset into sorted_tab[]. */
- struct variable *vars[2]; /* At least two variables; sorted by
+ const struct variable *vars[2]; /* At least two variables; sorted by
larger indices first. */
};
};
static inline struct var_range *
-get_var_range (struct variable *v)
+get_var_range (const struct variable *v)
{
return var_get_aux (v);
}
static struct table_entry **sorted_tab; /* Sorted table. */
/* Variables specifies on VARIABLES. */
-static struct variable **variables;
+static const struct variable **variables;
static size_t variables_cnt;
/* TABLES. */
static struct pool *pl_col; /* For column data. */
static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
-static void precalc (const struct ccase *, void *, const struct dataset *);
-static bool calc_general (const struct ccase *, void *, const struct dataset *);
-static bool calc_integer (const struct ccase *, void *, const struct dataset *);
-static bool postcalc (void *, const struct dataset *);
+static void precalc (struct casereader *, const struct dataset *);
+static void calc_general (struct ccase *, const struct dataset *);
+static void calc_integer (struct ccase *, const struct dataset *);
+static void postcalc (void);
static void submit (struct tab_table *);
static void format_short (char *s, const struct fmt_spec *fp,
cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
{
int result = internal_cmd_crosstabs (lexer, ds);
+ int i;
free (variables);
pool_destroy (pl_tc);
pool_destroy (pl_col);
-
+
+ for (i = 0; i < nxtab; i++)
+ free (xtab[i]);
+ free (xtab);
+
return result;
}
static int
internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
{
- int i;
+ struct casegrouper *grouper;
+ struct casereader *input, *group;
bool ok;
+ int i;
variables = NULL;
variables_cnt = 0;
{
cmd.a_cells[CRS_CL_COUNT] = 1;
}
- else
+ else
{
int count = 0;
for (i = 0; i < CRS_ST_count; i++)
cmd.a_statistics[i] = 1;
}
-
+
/* MISSING. */
if (cmd.miss == CRS_REPORT && mode == GENERAL)
{
else
write_style = CRS_WR_NONE;
- ok = procedure_with_splits (ds, precalc,
- mode == GENERAL ? calc_general : calc_integer,
- postcalc, NULL);
+ input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
+ NULL, NULL);
+ grouper = casegrouper_create_splits (input, dataset_dict (ds));
+ while (casegrouper_get_next_group (grouper, &group))
+ {
+ struct ccase c;
+
+ precalc (group, ds);
+
+ for (; casereader_read (group, &c); case_destroy (&c))
+ {
+ if (mode == GENERAL)
+ calc_general (&c, ds);
+ else
+ calc_integer (&c, ds);
+ }
+ casereader_destroy (group);
+
+ postcalc ();
+ }
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
}
static int
crs_custom_tables (struct lexer *lexer, struct dataset *ds, struct cmd_crosstabs *cmd UNUSED, void *aux UNUSED)
{
- struct var_set *var_set;
+ struct const_var_set *var_set;
int n_by;
- struct variable ***by = NULL;
+ const struct variable ***by = NULL;
size_t *by_nvar = NULL;
size_t nx = 1;
int success = 0;
/* Ensure that this is a TABLES subcommand. */
if (!lex_match_id (lexer, "TABLES")
- && (lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
+ && (lex_token (lexer) != T_ID ||
+ dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
&& lex_token (lexer) != T_ALL)
return 2;
lex_match (lexer, '=');
if (variables != NULL)
- var_set = var_set_create_from_array (variables, variables_cnt);
+ var_set = const_var_set_create_from_array (variables, variables_cnt);
else
- var_set = var_set_create_from_dict (dataset_dict (ds));
+ var_set = const_var_set_create_from_dict (dataset_dict (ds));
assert (var_set != NULL);
-
+
for (n_by = 0; ;)
{
by = xnrealloc (by, n_by + 1, sizeof *by);
by_nvar = xnrealloc (by_nvar, n_by + 1, sizeof *by_nvar);
- if (!parse_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
+ if (!parse_const_var_set_vars (lexer, var_set, &by[n_by], &by_nvar[n_by],
PV_NO_DUPLICATE | PV_NO_SCRATCH))
goto done;
- if (xalloc_oversized (nx, by_nvar[n_by]))
+ if (xalloc_oversized (nx, by_nvar[n_by]))
{
- msg (SE, _("Too many crosstabulation variables or dimensions."));
+ msg (SE, _("Too many cross-tabulation variables or dimensions."));
goto done;
}
nx *= by_nvar[n_by];
lex_error (lexer, _("expecting BY"));
goto done;
}
- else
+ else
break;
}
}
-
+
{
int *by_iter = xcalloc (n_by, sizeof *by_iter);
int i;
for (i = 0; i < n_by; i++)
x->vars[i] = by[i][by_iter[i]];
}
-
+
{
int i;
free (by_nvar);
}
- var_set_destroy (var_set);
+ const_var_set_destroy (var_set);
return success;
}
}
lex_match (lexer, '=');
-
+
for (;;)
{
size_t orig_nv = variables_cnt;
size_t i;
long min, max;
-
- if (!parse_variables (lexer, dataset_dict (ds),
+
+ if (!parse_variables_const (lexer, dataset_dict (ds),
&variables, &variables_cnt,
(PV_APPEND | PV_NUMERIC
| PV_NO_DUPLICATE | PV_NO_SCRATCH)))
goto lossage;
}
lex_get (lexer);
-
- for (i = orig_nv; i < variables_cnt; i++)
+
+ for (i = orig_nv; i < variables_cnt; i++)
{
struct var_range *vr = xmalloc (sizeof *vr);
vr->min = min;
vr->count = max - min + 1;
var_attach_aux (variables[i], vr, var_dtor_free);
}
-
+
if (lex_token (lexer) == '/')
break;
}
-
+
return 1;
lossage:
static unsigned hash_table_entry (const void *, const void *);
/* Set up the crosstabulation tables for processing. */
-static void
-precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
+static void
+precalc (struct casereader *input, const struct dataset *ds)
{
- output_split_file_values (ds, first);
+ struct ccase c;
+
+ if (casereader_peek (input, 0, &c))
+ {
+ output_split_file_values (ds, &c);
+ case_destroy (&c);
+ }
+
if (mode == GENERAL)
{
gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
NULL, NULL);
}
- else
+ else
{
int i;
x->ofs = n_sorted_tab;
- for (j = 2; j < x->nvar; j++)
+ for (j = 2; j < x->nvar; j++)
count *= get_var_range (x->vars[j - 2])->count;
-
+
sorted_tab = xnrealloc (sorted_tab,
n_sorted_tab + count, sizeof *sorted_tab);
- v = local_alloc (sizeof *v * x->nvar);
- for (j = 2; j < x->nvar; j++)
- v[j] = get_var_range (x->vars[j])->min;
+ v = xmalloca (sizeof *v * x->nvar);
+ for (j = 2; j < x->nvar; j++)
+ v[j] = get_var_range (x->vars[j])->min;
for (j = 0; j < count; j++)
{
struct table_entry *te;
te = sorted_tab[n_sorted_tab++]
= xmalloc (sizeof *te + sizeof (union value) * (x->nvar - 1));
te->table = i;
-
+
{
int row_cnt = get_var_range (x->vars[0])->count;
int col_cnt = get_var_range (x->vars[1])->count;
const int mat_size = row_cnt * col_cnt;
int m;
-
+
te->u.data = xnmalloc (mat_size, sizeof *te->u.data);
for (m = 0; m < mat_size; m++)
te->u.data[m] = 0.;
}
-
+
for (k = 2; k < x->nvar; k++)
te->values[k].f = v[k];
- for (k = 2; k < x->nvar; k++)
+ for (k = 2; k < x->nvar; k++)
{
struct var_range *vr = get_var_range (x->vars[k]);
if (++v[k] >= vr->max)
v[k] = vr->min;
else
- break;
+ break;
}
}
- local_free (v);
+ freea (v);
}
sorted_tab = xnrealloc (sorted_tab,
}
/* Form crosstabulations for general mode. */
-static bool
-calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+calc_general (struct ccase *c, const struct dataset *ds)
{
- bool bad_warn = true;
-
/* Missing values to exclude. */
enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
: cmd.miss == CRS_INCLUDE ? MV_SYSTEM
: MV_NEVER);
/* Case weight. */
- double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
+ double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
/* Flattened current table index. */
int t;
struct crosstab *x = xtab[t];
const size_t entry_size = (sizeof (struct table_entry)
+ sizeof (union value) * (x->nvar - 1));
- struct table_entry *te = local_alloc (entry_size);
+ struct table_entry *te = xmalloca (entry_size);
/* Construct table entry for the current record and table. */
te->table = t;
x->missing += weight;
goto next_crosstab;
}
-
+
if (var_is_numeric (x->vars[j]))
te->values[j].f = case_num (c, x->vars[j]);
else
{
- memcpy (te->values[j].s, case_str (c, x->vars[j]),
- var_get_width (x->vars[j]));
-
+ size_t n = var_get_width (x->vars[j]);
+ if (n > MAX_SHORT_STRING)
+ n = MAX_SHORT_STRING;
+ memcpy (te->values[j].s, case_str (c, x->vars[j]), n);
+
/* Necessary in order to simplify comparisons. */
memset (&te->values[j].s[var_get_width (x->vars[j])], 0,
- sizeof (union value) - var_get_width (x->vars[j]));
+ sizeof (union value) - n);
}
}
}
if (*tepp == NULL)
{
struct table_entry *tep = pool_alloc (pl_tc, entry_size);
-
+
te->u.freq = weight;
memcpy (tep, te, entry_size);
-
+
*tepp = tep;
}
else
}
next_crosstab:
- local_free (te);
+ freea (te);
}
-
- return true;
}
-static bool
-calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+calc_integer (struct ccase *c, const struct dataset *ds)
{
bool bad_warn = true;
/* Case weight. */
double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
-
+
/* Flattened current table index. */
int t;
-
+
for (t = 0; t < nxtab; t++)
{
struct crosstab *x = xtab[t];
int i, fact, ofs;
-
+
fact = i = 1;
ofs = x->ofs;
for (i = 0; i < x->nvar; i++)
{
- struct variable *const v = x->vars[i];
+ const struct variable *const v = x->vars[i];
struct var_range *vr = get_var_range (v);
double value = case_num (c, v);
-
+
/* Note that the first test also rules out SYSMIS. */
if ((value < vr->min || value >= vr->max)
|| (cmd.miss == CRS_TABLE
x->missing += weight;
goto next_crosstab;
}
-
+
if (i > 1)
{
ofs += fact * ((int) value - vr->min);
fact *= vr->count;
}
}
-
+
{
- struct variable *row_var = x->vars[ROW_VAR];
+ const struct variable *row_var = x->vars[ROW_VAR];
const int row = case_num (c, row_var) - get_var_range (row_var)->min;
- struct variable *col_var = x->vars[COL_VAR];
+ const struct variable *col_var = x->vars[COL_VAR];
const int col = case_num (c, col_var) - get_var_range (col_var)->min;
const int col_dim = get_var_range (col_var)->count;
sorted_tab[ofs]->u.data[col + row * col_dim] += weight;
}
-
+
next_crosstab: ;
}
-
- return true;
}
/* Compare the table_entry's at A and B and return a strcmp()-type
result. */
-static int
+static int
compare_table_entry (const void *a_, const void *b_, const void *aux UNUSED)
{
const struct table_entry *a = a_;
return 1;
else if (a->table < b->table)
return -1;
-
+
{
const struct crosstab *x = xtab[a->table];
int i;
else if (diffnum > 0)
return 1;
}
- else
+ else
{
const int diffstr = strncmp (a->values[i].s, b->values[i].s,
var_get_width (x->vars[i]));
return diffstr;
}
}
-
+
return 0;
}
hash = a->table;
for (i = 0; i < xtab[a->table]->nvar; i++)
hash ^= hsh_hash_bytes (&a->values[i], sizeof a->values[i]);
-
+
return hash;
}
\f
int *, int *, int *);
static void make_summary_table (void);
-static bool
-postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
+static void
+postcalc (void)
{
if (mode == GENERAL)
{
n_sorted_tab = hsh_count (gen_tab);
sorted_tab = (struct table_entry **) hsh_sort (gen_tab);
}
-
+
make_summary_table ();
-
+
/* Identify all the individual crosstabulation tables, and deal with
them. */
{
pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
if (pe == NULL)
break;
-
+
output_pivot_table (pb, pe, &mat, &row_tot, &col_tot,
&maxrows, &maxcols, &maxcells);
-
+
pb = pe;
}
free (mat);
free (row_tot);
free (col_tot);
}
-
- hsh_destroy (gen_tab);
- return true;
+ hsh_destroy (gen_tab);
+ if (mode == INTEGER)
+ {
+ int i;
+ for (i = 0; i < n_sorted_tab; i++)
+ {
+ free (sorted_tab[i]->u.data);
+ free (sorted_tab[i]);
+ }
+ free (sorted_tab);
+ }
}
static void insert_summary (struct tab_table *, int tab_index, double valid);
make_summary_table (void)
{
struct tab_table *summary;
-
+
struct table_entry **pb = sorted_tab, **pe;
int pc = n_sorted_tab;
int cur_tab = 0;
}
}
tab_offset (summary, 0, 3);
-
+
for (;;)
{
double valid;
-
+
pe = find_pivot_extent (pb, &pc, cmd.pivot == CRS_PIVOT);
if (pe == NULL)
break;
const int n_cols = get_var_range (x->vars[COL_VAR])->count;
const int n_rows = get_var_range (x->vars[ROW_VAR])->count;
const int count = n_cols * n_rows;
-
+
for (valid = 0.; pb < pe; pb++)
{
const double *data = (*pb)->u.data;
int i;
-
+
for (i = 0; i < count; i++)
valid += *data++;
}
pb = pe;
}
-
+
while (cur_tab < nxtab)
insert_summary (summary, cur_tab++, 0.);
struct crosstab *x = xtab[tab_index];
tab_hline (t, TAL_1, 0, 6, 0);
-
+
/* Crosstabulation name. */
{
- char *buf = local_alloc (128 * x->nvar);
+ char *buf = xmalloca (128 * x->nvar);
char *cp = buf;
int i;
}
tab_text (t, 0, 0, TAB_LEFT, buf);
- local_free (buf);
+ freea (buf);
}
-
+
/* Counts and percentages. */
{
double n[3];
n[i] / n[2] * 100.);
}
}
-
+
tab_next_row (t);
}
\f
/* Row values, number of rows. */
static union value *rows;
static int n_rows;
-
+
/* Number of statistically interesting columns/rows (columns/rows with
data in them). */
static int ns_cols, ns_rows;
/* First header line. */
tab_joint_text (table, nvar - 1, 0, (nvar - 1) + (n_cols - 1), 0,
TAB_CENTER | TAT_TITLE, var_get_name (x->vars[COL_VAR]));
-
+
tab_hline (table, TAL_1, nvar - 1, nvar + n_cols - 2, 1);
-
+
/* Second header line. */
{
int i;
/* Title. */
{
- char *title = local_alloc (x->nvar * 64 + 128);
+ char *title = xmalloca (x->nvar * 64 + 128);
char *cp = title;
int i;
-
+
if (cmd.pivot == CRS_PIVOT)
for (i = 0; i < nvar; i++)
{
int value;
const char *name;
};
-
- static const struct tuple cell_names[] =
+
+ static const struct tuple cell_names[] =
{
{CRS_CL_COUNT, N_("count")},
{CRS_CL_ROW, N_("row %")},
strcpy (cp, "].");
tab_title (table, "%s", title);
- local_free (title);
+ freea (title);
}
-
+
tab_offset (table, 0, 2);
}
else
table = NULL;
-
+
/* Chi-square table initialization. */
if (cmd.a_statistics[CRS_ST_CHISQ])
{
tab_headers (chisq, 1 + (nvar - 2), 0, 1, 0);
tab_title (chisq, _("Chi-square tests."));
-
+
tab_offset (chisq, nvar - 2, 0);
tab_text (chisq, 0, 0, TAB_LEFT | TAT_TITLE, _("Statistic"));
tab_text (chisq, 1, 0, TAB_RIGHT | TAT_TITLE, _("Value"));
}
else
chisq = NULL;
-
+
/* Symmetric measures. */
if (cmd.a_statistics[CRS_ST_PHI] || cmd.a_statistics[CRS_ST_CC]
|| cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
col_tot = *col_totp;
*maxcols = n_cols;
}
-
+
/* Allocate table space for the matrix. */
if (table && tab_row (table) + (n_rows + 1) * num_cells > tab_nr (table))
tab_realloc (table, -1,
*matp = xnrealloc (*matp, n_cols * n_rows, sizeof **matp);
*maxcells = n_cols * n_rows;
}
-
+
mat = *matp;
/* Build the matrix and calculate column totals. */
{
int r, c;
double *tp = col_tot;
-
+
assert (mode == INTEGER);
mat = (*tb)->u.data;
ns_cols = n_cols;
{
double cum = 0.;
double *cp = &mat[c];
-
+
for (r = 0; r < n_rows; r++)
cum += cp[r * n_cols];
*tp++ = cum;
}
}
-
+
{
double *cp;
-
+
for (ns_cols = 0, cp = col_tot; cp < &col_tot[n_cols]; cp++)
ns_cols += *cp != 0.;
}
double *mp = mat;
double *rp = row_tot;
int r, c;
-
+
for (ns_rows = 0, r = n_rows; r--; )
{
double cum = 0.;
cum += *tp++;
W = cum;
}
-
+
/* Find the first variable that differs from the last subtable,
then display the values of the dimensioning variables for
each table that needs it. */
{
int first_difference = nvar - 1;
-
+
if (tb != pb)
for (; ; first_difference--)
{
break;
}
cmp = *tb;
-
+
if (table)
display_dimensions (table, first_difference, *tb);
if (chisq)
display_risk ();
if (direct)
display_directional ();
-
+
tb = te;
free (rows);
}
submit (table);
-
+
if (chisq)
{
if (!chisq_fisher)
ns_rows--;
}
}
-
+
{
int c;
submit (struct tab_table *t)
{
int i;
-
+
if (t == NULL)
return;
-
+
tab_resize (t, -1, 0);
if (tab_nr (t) == tab_t (t))
{
crosstabs_dim (struct tab_table *t, struct outp_driver *d)
{
int i;
-
+
/* Width of a numerical column. */
int c = outp_string_width (d, "0.000000", OUTP_PROPORTIONAL);
if (cmd.miss == CRS_REPORT)
for (i = 0; i <= t->nc; i++)
w -= t->wrv[i];
w /= t->l;
-
+
if (w < d->prop_em_width * 8)
w = d->prop_em_width * 8;
break;
if (pivot)
continue;
-
+
if (memcmp (&(*tp)->values[2], &fp->values[2],
sizeof (union value) * (x->nvar - 2)))
break;
malloc()'darray stored in *VALUES, with the number of values
stored in *VALUE_CNT.
*/
-static void
+static void
enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
union value **values, int *value_cnt)
{
- struct variable *v = xtab[(*entries)->table]->vars[var_idx];
+ const struct variable *v = xtab[(*entries)->table]->vars[var_idx];
if (mode == GENERAL)
{
- int width = var_get_width (v);
+ int width = MIN (var_get_width (v), MAX_SHORT_STRING);
int i;
*values = xnmalloc (entry_cnt, sizeof **values);
{
struct var_range *vr = get_var_range (v);
int i;
-
+
assert (mode == INTEGER);
*values = xnmalloc (vr->count, sizeof **values);
for (i = 0; i < vr->count; i++)
const struct fmt_spec *print = var_get_print_format (var);
const char *label = var_lookup_value_label (var, v);
- if (label)
+ if (label)
{
tab_text (table, c, r, TAB_LEFT, label);
return;
const struct fmt_spec f = {FMT_F, 10, 1};
union value v;
struct substring s;
-
+
s.length = 10;
s.string = tab_alloc (table, 16);
v.f = value;
{
{
int r;
-
+
for (r = 0; r < n_rows; r++)
table_value_missing (table, nvar - 2, r * num_cells,
TAB_RIGHT, &rows[r], x->vars[ROW_VAR]);
}
tab_text (table, nvar - 2, n_rows * num_cells,
TAB_LEFT, _("Total"));
-
+
/* Put in the actual cells. */
{
double *mp = mat;
int r, i;
tab_offset (table, -1, tab_row (table) - num_cells * n_rows);
- for (r = 0; r < n_rows; r++)
+ for (r = 0; r < n_rows; r++)
{
- char suffix = 0;
bool mark_missing = false;
if (cmd.miss == CRS_REPORT
for (i = 0; i < num_cells; i++)
{
+ char suffix = 0;
double v;
switch (cells[i])
v = row_tot[r];
break;
case CRS_CL_ROW:
- v = 100.;
+ v = 100.0;
suffix = '%';
break;
case CRS_CL_COLUMN:
format_cell_entry (table, n_cols, 0, v, suffix, mark_missing);
tab_next_row (table);
- }
+ }
}
}
{
double ct = c < n_cols ? col_tot[c] : W;
bool mark_missing = false;
- char suffix = 0;
int i;
-
- if (cmd.miss == CRS_REPORT && c < n_cols
+
+ if (cmd.miss == CRS_REPORT && c < n_cols
&& var_is_num_missing (x->vars[COL_VAR], cols[c].f, MV_USER))
mark_missing = true;
for (i = 0; i < num_cells; i++)
{
+ char suffix = 0;
double v;
switch (cells[i])
{
case CRS_CL_COUNT:
v = ct;
- suffix = '%';
break;
case CRS_CL_ROW:
v = ct / W * 100.;
NOT_REACHED ();
}
- format_cell_entry (table, c, i, v, suffix, mark_missing);
+ format_cell_entry (table, c, i, v, suffix, mark_missing);
}
last_row = i;
}
tab_offset (table, -1, tab_row (table) + last_row);
}
-
+
tab_offset (table, 0, -1);
}
static void
display_chisq (void)
{
- static const char *chisq_stats[N_CHISQ] =
+ static const char *chisq_stats[N_CHISQ] =
{
N_("Pearson Chi-Square"),
N_("Likelihood Ratio"),
int s = 0;
int i;
-
+
calc_chisq (chisq_v, df, &fisher1, &fisher2);
tab_offset (chisq, nvar - 2, -1);
-
+
for (i = 0; i < N_CHISQ; i++)
{
if ((i != 2 && chisq_v[i] == SYSMIS)
|| (i == 2 && fisher1 == SYSMIS))
continue;
s = 1;
-
+
tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i]));
if (i != 2)
{
tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases"));
tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0);
tab_next_row (chisq);
-
+
tab_offset (chisq, 0, -1);
}
static void
display_symmetric (void)
{
- static const char *categories[] =
+ static const char *categories[] =
{
N_("Nominal by Nominal"),
N_("Ordinal by Ordinal"),
return;
tab_offset (sym, nvar - 2, -1);
-
+
for (i = 0; i < N_SYMMETRIC; i++)
{
if (sym_v[i] == SYSMIS)
last_cat = stats_categories[i];
tab_text (sym, 0, 0, TAB_LEFT, gettext (categories[last_cat]));
}
-
+
tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i]));
tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3);
if (sym_ase[i] != SYSMIS)
tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases"));
tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0);
tab_next_row (sym);
-
+
tab_offset (sym, 0, -1);
}
double risk_v[3], lower[3], upper[3];
union value c[2];
int i;
-
+
if (!calc_risk (risk_v, upper, lower, c))
return;
-
+
tab_offset (risk, nvar - 2, -1);
-
+
for (i = 0; i < 3; i++)
{
if (risk_v[i] == SYSMIS)
var_get_width (x->vars[ROW_VAR]), rows[i - 1].s);
break;
}
-
+
tab_text (risk, 0, 0, TAB_LEFT, buf);
tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3);
tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3);
tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases"));
tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0);
tab_next_row (risk);
-
+
tab_offset (risk, 0, -1);
}
static void
display_directional (void)
{
- static const char *categories[] =
+ static const char *categories[] =
{
N_("Nominal by Nominal"),
N_("Ordinal by Ordinal"),
N_("Eta"),
};
- static const char *types[] =
+ static const char *types[] =
{
N_("Symmetric"),
N_("%s Dependent"),
{
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
};
-
- static const int stats_stats[N_DIRECTIONAL] =
+
+ static const int stats_stats[N_DIRECTIONAL] =
{
0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
};
- static const int stats_types[N_DIRECTIONAL] =
+ static const int stats_types[N_DIRECTIONAL] =
{
0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
};
- static const int *stats_lookup[] =
+ static const int *stats_lookup[] =
{
stats_categories,
stats_stats,
{
-1, -1, -1,
};
-
+
double direct_v[N_DIRECTIONAL];
double direct_ase[N_DIRECTIONAL];
double direct_t[N_DIRECTIONAL];
-
+
int i;
if (!calc_directional (direct_v, direct_ase, direct_t))
return;
tab_offset (direct, nvar - 2, -1);
-
+
for (i = 0; i < N_DIRECTIONAL; i++)
{
if (direct_v[i] == SYSMIS)
continue;
-
+
{
int j;
{
if (j < 2)
tab_hline (direct, TAL_1, j, 6, 0);
-
+
for (; j < 3; j++)
{
const char *string;
string = var_get_name (x->vars[0]);
else
string = var_get_name (x->vars[1]);
-
+
tab_text (direct, j, 0, TAB_LEFT | TAT_PRINTF,
gettext (stats_names[j][k]), string);
}
}
}
-
+
tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3);
if (direct_ase[i] != SYSMIS)
tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3);
{
double r = 1;
int i;
-
+
for (i = 2; i < x; i++)
r *= i;
return r;
calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
{
int x;
-
+
if (MIN (c, d) < MIN (a, b))
swap (&a, &c), swap (&b, &d);
if (MIN (b, d) < MIN (a, c))
const double expected = row_tot[r] * col_tot[c] / W;
const double freq = mat[n_cols * r + c];
const double residual = freq - expected;
-
+
chisq[0] += residual * residual / expected;
if (freq)
chisq[1] += freq * log (expected / freq);
if (ns_cols == 2 && ns_rows == 2)
{
double f11, f12, f21, f22;
-
+
{
int nz_cols[2];
int i, j;
{
double r, ase_0, ase_1;
calc_r ((double *) rows, (double *) cols, &r, &ase_0, &ase_1);
-
+
chisq[4] = (W - 1.) * r * r;
df[4] = 1;
}
T = sqrt (SX * SY);
*r = S / T;
*ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c));
-
+
{
double s, c, y, t;
-
+
for (s = c = 0., i = 0; i < n_rows; i++)
for (j = 0; j < n_cols; j++)
{
double t[N_SYMMETRIC])
{
int q = MIN (ns_rows, ns_cols);
-
+
if (q <= 1)
return 0;
-
+
{
int i;
- if (v)
+ if (v)
for (i = 0; i < N_SYMMETRIC; i++)
v[i] = ase[i] = t[i] = SYSMIS;
}
{
int r, c;
-
+
for (r = 0; r < n_rows; r++)
for (c = 0; c < n_cols; c++)
{
const double expected = row_tot[r] * col_tot[c] / W;
const double freq = mat[n_cols * r + c];
const double residual = freq - expected;
-
+
Xp += residual * residual / expected;
}
}
if (cmd.a_statistics[CRS_ST_CC])
v[2] = sqrt (Xp / (Xp + W));
}
-
+
if (cmd.a_statistics[CRS_ST_BTAU] || cmd.a_statistics[CRS_ST_CTAU]
|| cmd.a_statistics[CRS_ST_GAMMA] || cmd.a_statistics[CRS_ST_D])
{
double P, Q;
double btau_cum, ctau_cum, gamma_cum, d_yx_cum, d_xy_cum;
double btau_var;
-
+
{
int r, c;
-
+
Dr = Dc = W * W;
for (r = 0; r < n_rows; r++)
Dr -= row_tot[r] * row_tot[r];
for (c = 0; c < n_cols; c++)
Dc -= col_tot[c] * col_tot[c];
}
-
+
{
int r, c;
for (c = 0; c < n_cols; c++)
{
double ct = 0.;
-
+
for (r = 0; r < n_rows; r++)
cum[c + r * n_cols] = ct += mat[c + r * n_cols];
}
}
-
+
/* P and Q. */
{
int i, j;
double fij = mat[j + i * n_cols];
P += fij * Cij;
Q += fij * Dij;
-
+
if (++j == n_cols)
break;
assert (j < n_cols);
Cij -= col_tot[j] - cum[j + i * n_cols];
Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
-
+
if (i > 0)
{
Cij += cum[j - 1 + (i - 1) * n_cols];
+ col_tot[j] * Dr));
btau_cum += fij * temp * temp;
}
-
+
{
const double temp = Cij - Dij;
ctau_cum += fij * temp * temp;
d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
- (Q - P) * (W - col_tot[j]));
}
-
+
if (++j == n_cols)
break;
assert (j < n_cols);
Cij -= col_tot[j] - cum[j + i * n_cols];
Dij += col_tot[j - 1] - cum[j - 1 + i * n_cols];
-
+
if (i > 0)
{
Cij += cum[j - 1 + (i - 1) * n_cols];
/* Spearman correlation, Pearson's r. */
if (cmd.a_statistics[CRS_ST_CORR])
{
- double *R = local_alloc (sizeof *R * n_rows);
- double *C = local_alloc (sizeof *C * n_cols);
-
+ double *R = xmalloca (sizeof *R * n_rows);
+ double *C = xmalloca (sizeof *C * n_cols);
+
{
double y, t, c = 0., s = 0.;
int i = 0;
-
+
for (;;)
{
R[i] = s + (row_tot[i] + 1.) / 2.;
assert (i < n_rows);
}
}
-
+
{
double y, t, c = 0., s = 0.;
int j = 0;
-
+
for (;;)
{
C[j] = s + (col_tot[j] + 1.) / 2;
assert (j < n_cols);
}
}
-
+
calc_r (R, C, &v[6], &t[6], &ase[6]);
t[6] = v[6] / t[6];
- local_free (R);
- local_free (C);
+ freea (R);
+ freea (C);
calc_r ((double *) rows, (double *) cols, &v[7], &t[7], &ase[7]);
t[7] = v[7] / t[7];
{
double sum_fii, sum_rici, sum_fiiri_ci, sum_fijri_ci2, sum_riciri_ci;
int i, j;
-
+
for (sum_fii = sum_rici = sum_fiiri_ci = sum_riciri_ci = 0., i = j = 0;
i < ns_rows; i++, j++)
{
double prod, sum;
-
+
while (col_tot[j] == 0.)
j++;
-
+
prod = row_tot[i] * col_tot[j];
sum = row_tot[i] + col_tot[j];
-
+
sum_fii += mat[j + i * n_cols];
sum_rici += prod;
sum_fiiri_ci += mat[j + i * n_cols] * sum;
double sum = row_tot[i] + col_tot[j];
sum_fijri_ci2 += mat[j + i * n_cols] * sum * sum;
}
-
+
v[8] = (W * sum_fii - sum_rici) / (W * W - sum_rici);
ase[8] = sqrt ((W * W * sum_rici
{
int i;
-
+
for (i = 0; i < 3; i++)
value[i] = upper[i] = lower[i] = SYSMIS;
}
-
+
if (ns_rows != 2 || ns_cols != 2)
return 0;
-
+
{
int nz_cols[2];
int i, j;
+ (f22 / (f21 * (f21 + f22))));
lower[1] = value[1] * exp (-1.960 * v);
upper[1] = value[1] * exp (1.960 * v);
-
+
value[2] = (f12 * (f21 + f22)) / (f22 * (f11 + f12));
v = sqrt ((f11 / (f12 * (f11 + f12)))
+ (f21 / (f22 * (f21 + f22))));
max = mat[j + i * n_cols];
index = j;
}
-
+
sum_fim += fim[i] = max;
fim_index[i] = index;
}
max = mat[j + i * n_cols];
index = i;
}
-
+
sum_fmj += fmj[j] = max;
fmj_index[j] = index;
}
- deltaj
+ v[0] * deltaj));
}
-
+
ase[2] = sqrt (accum - W * v[0]) / (W - cm);
}
/* ASE0 for Y given X. */
{
double accum;
-
+
for (accum = 0., i = 0; i < n_rows; i++)
if (cm_index != fim_index[i])
accum += (mat[i * n_cols + fim_index[i]]
- deltaj
+ v[0] * deltaj));
}
-
+
ase[1] = sqrt (accum - W * v[0]) / (W - rm);
}
/* ASE0 for X given Y. */
{
double accum;
-
+
for (accum = 0., j = 0; j < n_cols; j++)
if (rm_index != fmj_index[j])
accum += (mat[j + n_cols * fmj_index[j]]
free (fim_index);
free (fmj);
free (fmj_index);
-
+
{
double sum_fij2_ri, sum_fij2_ci;
double sum_ri2, sum_cj2;
for (UX = 0., i = 0; i < n_rows; i++)
if (row_tot[i] > 0.)
UX -= row_tot[i] / W * log (row_tot[i] / W);
-
+
for (UY = 0., j = 0; j < n_cols; j++)
if (col_tot[j] > 0.)
UY -= col_tot[j] / W * log (col_tot[j] / W);
if (entry <= 0.)
continue;
-
+
P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
UXY -= entry / W * log (entry / W);
}
if (entry <= 0.)
continue;
-
+
ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
+ (UX - UXY) * log (col_tot[j] / W));
ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
* log (row_tot[i] * col_tot[j] / (W * W)))
- (UX + UY) * log (entry / W));
}
-
+
v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
t[5] = v[5] / ((2. / (W * (UX + UY)))
* sqrt (P - pow2 (UX + UY - UXY) / W));
-
+
v[6] = (UX + UY - UXY) / UX;
ase[6] = sqrt (ase1_xy) / (W * UX * UX);
t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
-
+
v[7] = (UX + UY - UXY) / UY;
ase[7] = sqrt (ase1_yx) / (W * UY * UY);
t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
if (cmd.a_statistics[CRS_ST_D])
{
int i;
-
+
if (!sym)
calc_symmetric (NULL, NULL, NULL);
for (i = 0; i < 3; i++)
double sum_Xr, sum_X2r;
double SX, SXW;
int i, j;
-
+
for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++)
{
sum_Xr += rows[i].f * row_tot[i];
sum_X2r += rows[i].f * rows[i].f * row_tot[i];
}
SX = sum_X2r - sum_Xr * sum_Xr / W;
-
+
for (SXW = 0., j = 0; j < n_cols; j++)
{
double cum;
SYW += cols[j].f * cols[j].f * mat[j + i * n_cols];
cum += cols[j].f * mat[j + i * n_cols];
}
-
+
SYW -= cum * cum / row_tot[i];
}
v[12] = sqrt (1. - SYW / SY);
struct fmt_spec fmt_subst;
/* Limit to short string width. */
- if (fmt_is_string (fp->type))
+ if (fmt_is_string (fp->type))
{
fmt_subst = *fp;
/* Format. */
data_out (v, fp, s);
-
+
/* Null terminate. */
s[fp->w] = '\0';
}
-/*
+/*
Local Variables:
mode: c
End: