/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
/* A crosstabulation of 2 or more variables. */
struct pivot_table
{
+ struct crosstabs_proc *proc;
struct fmt_spec weight_format; /* Format for weight variable. */
double missing; /* Weight of missing cases. */
/* Integer mode variable info. */
struct var_range
{
+ struct hmap_node hmap_node; /* In struct crosstabs_proc var_ranges map. */
+ const struct variable *var; /* The variable. */
int min; /* Minimum value. */
int max; /* Maximum value + 1. */
int count; /* max - min. */
};
-static inline struct var_range *
-get_var_range (const struct variable *v)
-{
- return var_get_aux (v);
-}
-
struct crosstabs_proc
{
const struct dictionary *dict;
/* Variables specifies on VARIABLES. */
const struct variable **variables;
size_t n_variables;
+ struct hmap var_ranges;
/* TABLES. */
struct pivot_table *pivots;
bool descending; /* True if descending sort order is requested. */
};
+const struct var_range *get_var_range (const struct crosstabs_proc *,
+ const struct variable *);
+
static bool should_tabulate_case (const struct pivot_table *,
const struct ccase *, enum mv_class exclude);
static void tabulate_general_case (struct pivot_table *, const struct ccase *,
cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
{
const struct variable *wv = dict_get_weight (dataset_dict (ds));
+ struct var_range *range, *next_range;
struct crosstabs_proc proc;
struct casegrouper *grouper;
struct casereader *input, *group;
proc.bad_warn = true;
proc.variables = NULL;
proc.n_variables = 0;
+ hmap_init (&proc.var_ranges);
proc.pivots = NULL;
proc.n_pivots = 0;
proc.descending = false;
exit:
free (proc.variables);
+ HMAP_FOR_EACH_SAFE (range, next_range, struct var_range, hmap_node,
+ &proc.var_ranges)
+ {
+ hmap_delete (&proc.var_ranges, &range->hmap_node);
+ free (range);
+ }
for (pt = &proc.pivots[0]; pt < &proc.pivots[proc.n_pivots]; pt++)
{
free (pt->vars);
struct pivot_table *pt = &proc->pivots[proc->n_pivots++];
int j;
+ pt->proc = proc;
pt->weight_format = proc->weight_format;
pt->missing = 0.;
pt->n_vars = n_by;
for (i = orig_nv; i < proc->n_variables; i++)
{
+ const struct variable *var = proc->variables[i];
struct var_range *vr = xmalloc (sizeof *vr);
+
+ vr->var = var;
vr->min = min;
vr->max = max + 1.;
vr->count = max - min + 1;
- var_attach_aux (proc->variables[i], vr, var_dtor_free);
+ hmap_insert (&proc->var_ranges, &vr->hmap_node,
+ hash_pointer (var, 0));
}
if (lex_token (lexer) == T_SLASH)
\f
/* Data file processing. */
+const struct var_range *
+get_var_range (const struct crosstabs_proc *proc, const struct variable *var)
+{
+ if (!hmap_is_empty (&proc->var_ranges))
+ {
+ const struct var_range *range;
+
+ HMAP_FOR_EACH_IN_BUCKET (range, struct var_range, hmap_node,
+ hash_pointer (var, 0), &proc->var_ranges)
+ if (range->var == var)
+ return range;
+ }
+
+ return NULL;
+}
+
static bool
should_tabulate_case (const struct pivot_table *pt, const struct ccase *c,
enum mv_class exclude)
for (j = 0; j < pt->n_vars; j++)
{
const struct variable *var = pt->vars[j];
- struct var_range *range = get_var_range (var);
+ const struct var_range *range = get_var_range (pt->proc, var);
if (var_is_value_missing (var, case_data (c, var), exclude))
return false;
pt->missing = 0.0;
- /* Free only the members that were allocated in this
- function. The other pointer members are either both
- allocated and destroyed at a lower level (in
- output_pivot_table), or both allocated and destroyed at
- a higher level (in crs_custom_tables and free_proc,
+ /* Free the members that were allocated in this function(and the values
+ owned by the entries.
+
+ The other pointer members are either both allocated and destroyed at a
+ lower level (in output_pivot_table), or both allocated and destroyed
+ at a higher level (in crs_custom_tables and free_proc,
respectively). */
+ for (i = 0; i < pt->n_vars; i++)
+ {
+ int width = var_get_width (pt->vars[i]);
+ if (value_needs_init (width))
+ {
+ size_t j;
+
+ for (j = 0; j < pt->n_entries; j++)
+ value_destroy (&pt->entries[j]->values[i], width);
+ }
+ }
+
for (i = 0; i < pt->n_entries; i++)
free (pt->entries[i]);
free (pt->entries);
ds_cstr (&vars));
ds_destroy (&vars);
+ free (pt->cols);
return;
}
with index VAR_IDX takes on. The values are returned as a
malloc()'d array stored in *VALUES, with the number of values
stored in *VALUE_CNT.
- */
+
+ The caller must eventually free *VALUES, but each pointer in *VALUES points
+ to existing data not owned by *VALUES itself. */
static void
enum_var_values (const struct pivot_table *pt, int var_idx,
union value **valuesp, int *n_values, bool descending)
{
const struct variable *var = pt->vars[var_idx];
- struct var_range *range = get_var_range (var);
+ const struct var_range *range = get_var_range (pt->proc, var);
union value *values;
size_t i;
format_cell_entry (struct tab_table *table, int c, int r, double value,
char suffix, bool mark_missing, const struct dictionary *dict)
{
- const struct fmt_spec f = {FMT_F, 10, 1};
union value v;
char suffixes[3];
int suffix_len;
char *s;
v.f = value;
- s = data_out (&v, dict_get_encoding (dict), &f);
+ s = data_out (&v, dict_get_encoding (dict), settings_get_format ());
suffix_len = 0;
if (suffix != 0)
\f
/* Statistical calculations. */
-/* Returns the value of the gamma (factorial) function for an integer
+/* Returns the value of the logarithm of gamma (factorial) function for an integer
argument PT. */
static double
-gamma_int (double pt)
+log_gamma_int (double pt)
{
- double r = 1;
+ double r = 0;
int i;
for (i = 2; i < pt; i++)
- r *= i;
+ r += log(i);
+
return r;
}
static inline double
Pr (int a, int b, int c, int d)
{
- return (gamma_int (a + b + 1.) / gamma_int (a + 1.)
- * gamma_int (c + d + 1.) / gamma_int (b + 1.)
- * gamma_int (a + c + 1.) / gamma_int (c + 1.)
- * gamma_int (b + d + 1.) / gamma_int (d + 1.)
- / gamma_int (a + b + c + d + 1.));
+ return exp (log_gamma_int (a + b + 1.) - log_gamma_int (a + 1.)
+ + log_gamma_int (c + d + 1.) - log_gamma_int (b + 1.)
+ + log_gamma_int (a + c + 1.) - log_gamma_int (c + 1.)
+ + log_gamma_int (b + d + 1.) - log_gamma_int (d + 1.)
+ - log_gamma_int (a + b + c + d + 1.));
}
/* Swap the contents of A and B. */
calc_fisher (int a, int b, int c, int d, double *fisher1, double *fisher2)
{
int pt;
+ double pn1;
if (MIN (c, d) < MIN (a, b))
swap (&a, &c), swap (&b, &d);
swap (&a, &c), swap (&b, &d);
}
- *fisher1 = 0.;
- for (pt = 0; pt <= a; pt++)
- *fisher1 += Pr (a - pt, b + pt, c + pt, d - pt);
+ pn1 = Pr (a, b, c, d);
+ *fisher1 = pn1;
+ for (pt = 1; pt <= a; pt++)
+ {
+ *fisher1 += Pr (a - pt, b + pt, c + pt, d - pt);
+ }
*fisher2 = *fisher1;
+
for (pt = 1; pt <= b; pt++)
- *fisher2 += Pr (a + pt, b - pt, c - pt, d + pt);
+ {
+ double p = Pr (a + pt, b - pt, c - pt, d + pt);
+ if (p < pn1)
+ *fisher2 += p;
+ }
}
/* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
}
/* Fisher. */
- if (f11 < 5. || f12 < 5. || f21 < 5. || f22 < 5.)
- calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
+ calc_fisher (f11 + .5, f12 + .5, f21 + .5, f22 + .5, fisher1, fisher2);
}
/* Calculate Mantel-Haenszel. */