X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fchisquare.c;h=4593df4116eda81be5230a64ee2eee6c5f9fa15f;hb=b5c82cc9aabe7e641011130240ae1b2e84348e23;hp=e9ab64c29ddd4879aebe06487834ed9399e14258;hpb=f5c108becd49d78f4898cab11352291f5689d24e;p=pspp-builds.git
diff --git a/src/language/stats/chisquare.c b/src/language/stats/chisquare.c
index e9ab64c2..4593df41 100644
--- a/src/language/stats/chisquare.c
+++ b/src/language/stats/chisquare.c
@@ -1,20 +1,18 @@
-/* PSPP - computes sample statistics.
- Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
+ along with this program. If not, see . */
#include
@@ -23,6 +21,7 @@
#include
#include
+#include
#include
#include
#include
@@ -31,7 +30,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -41,6 +39,8 @@
#include
+#include "xalloc.h"
+
#include "gettext.h"
#define _(msgid) gettext (msgid)
@@ -58,7 +58,7 @@ create_freq_hash_with_range (const struct dictionary *dict,
{
bool warn = true;
float i_d;
- struct ccase c;
+ struct ccase *c;
struct hsh_table *freq_hash =
hsh_create (4, compare_freq, hash_freq,
@@ -68,47 +68,22 @@ create_freq_hash_with_range (const struct dictionary *dict,
/* Populate the hash with zero entries */
for (i_d = trunc (lo); i_d <= trunc (hi); i_d += 1.0 )
{
- union value the_value;
struct freq_mutable *fr = xmalloc (sizeof (*fr));
-
- the_value.f = i_d;
-
- fr->value = value_dup (&the_value, 0);
+ value_init (&fr->value, 0);
+ fr->value.f = i_d;
fr->count = 0;
-
hsh_insert (freq_hash, fr);
}
- while (casereader_read (input, &c))
+ for (; (c = casereader_read (input)) != NULL; case_unref (c))
{
- union value obs_value;
- struct freq **existing_fr;
- struct freq *fr = xmalloc(sizeof (*fr));
- fr->value = case_data (&c, var);
-
- fr->count = dict_get_case_weight (dict, &c, &warn);
-
- obs_value.f = trunc (fr->value->f);
-
- if ( obs_value.f < lo || obs_value.f > hi)
- {
- free (fr);
- case_destroy (&c);
- continue;
- }
-
- fr->value = &obs_value;
-
- existing_fr = (struct freq **) hsh_probe (freq_hash, fr);
-
- /* This must exist in the hash, because we previously populated it
- with zero counts */
- assert (*existing_fr);
-
- (*existing_fr)->count += fr->count;
- free (fr);
-
- case_destroy (&c);
+ struct freq_mutable fr;
+ fr.value.f = trunc (case_num (c, var));
+ if (fr.value.f >= lo && fr.value.f <= hi)
+ {
+ struct freq_mutable *existing_fr = hsh_force_find (freq_hash, &fr);
+ existing_fr->count += dict_get_case_weight (dict, c, &warn);
+ }
}
if (casereader_destroy (input))
return freq_hash;
@@ -130,33 +105,36 @@ create_freq_hash (const struct dictionary *dict,
struct casereader *input,
const struct variable *var)
{
+ int width = var_get_width (var);
bool warn = true;
- struct ccase c;
+ struct ccase *c;
struct hsh_table *freq_hash =
hsh_create (4, compare_freq, hash_freq,
free_freq_mutable_hash,
(void *) var);
- for (; casereader_read (input, &c); case_destroy (&c))
+ for (; (c = casereader_read (input)) != NULL; case_unref (c))
{
- struct freq **existing_fr;
- struct freq *fr = xmalloc(sizeof (*fr));
- fr->value = case_data (&c, var);
+ struct freq_mutable fr;
+ void **p;
- fr->count = dict_get_case_weight (dict, &c, &warn);
+ fr.value = *case_data (c, var);
+ fr.count = dict_get_case_weight (dict, c, &warn);
- existing_fr = (struct freq **) hsh_probe (freq_hash, fr);
- if ( *existing_fr)
- {
- (*existing_fr)->count += fr->count;
- free (fr);
- }
+ p = hsh_probe (freq_hash, &fr);
+ if (*p == NULL)
+ {
+ struct freq_mutable *new_fr = *p = xmalloc (sizeof *new_fr);
+ value_init (&new_fr->value, width);
+ value_copy (&new_fr->value, &fr.value, width);
+ new_fr->count = fr.count;
+ }
else
- {
- *existing_fr = fr;
- fr->value = value_dup (fr->value, var_get_width (var));
- }
+ {
+ struct freq *existing_fr = *p;
+ existing_fr->count += fr.count;
+ }
}
if (casereader_destroy (input))
return freq_hash;
@@ -196,11 +174,13 @@ create_variable_frequency_table (const struct dictionary *dict,
test->n_expected, n_cells,
var_get_name (var)
);
+ hsh_destroy (*freq_hash);
+ *freq_hash = NULL;
return NULL;
}
table = tab_create(4, n_cells + 2, 0);
- tab_dim (table, tab_natural_dimensions);
+ tab_dim (table, tab_natural_dimensions, NULL);
tab_title (table, var_to_string(var));
tab_text (table, 1, 0, TAB_LEFT, _("Observed N"));
@@ -236,7 +216,7 @@ create_combo_frequency_table (const struct chisquare_test *test)
int n_cells = test->hi - test->lo + 1;
table = tab_create(1 + ost->n_vars * 4, n_cells + 3, 0);
- tab_dim (table, tab_natural_dimensions);
+ tab_dim (table, tab_natural_dimensions, NULL);
tab_title (table, _("Frequencies"));
for ( i = 0 ; i < ost->n_vars ; ++i )
@@ -268,8 +248,8 @@ create_combo_frequency_table (const struct chisquare_test *test)
}
for ( i = test->lo ; i <= test->hi ; ++i )
- tab_float (table, 0, 2 + i - test->lo,
- TAB_LEFT, 1 + i - test->lo, 8, 0);
+ tab_fixed (table, 0, 2 + i - test->lo,
+ TAB_LEFT, 1 + i - test->lo, 8, 0);
tab_headers (table, 1, 0, 2, 0);
@@ -292,7 +272,7 @@ create_stats_table (const struct chisquare_test *test)
struct tab_table *table;
table = tab_create (1 + ost->n_vars, 4, 0);
- tab_dim (table, tab_natural_dimensions);
+ tab_dim (table, tab_natural_dimensions, NULL);
tab_title (table, _("Test Statistics"));
tab_headers (table, 1, 0, 1, 0);
@@ -319,7 +299,9 @@ void
chisquare_execute (const struct dataset *ds,
struct casereader *input,
enum mv_class exclude,
- const struct npar_test *test)
+ const struct npar_test *test,
+ bool exact UNUSED,
+ double timer UNUSED)
{
const struct dictionary *dict = dataset_dict (ds);
int v, i;
@@ -327,6 +309,9 @@ chisquare_execute (const struct dataset *ds,
struct chisquare_test *cst = (struct chisquare_test *) test;
int n_cells = 0;
double total_expected = 0.0;
+ const struct variable *wvar = dict_get_weight (dict);
+ const struct fmt_spec *wfmt = wvar ?
+ var_get_print_format (wvar) : & F_8_0;
double *df = xzalloc (sizeof (*df) * ost->n_vars);
double *xsq = xzalloc (sizeof (*df) * ost->n_vars);
@@ -343,7 +328,8 @@ chisquare_execute (const struct dataset *ds,
struct hsh_table *freq_hash = NULL;
struct casereader *reader =
casereader_create_filter_missing (casereader_clone (input),
- &ost->vars[v], 1, exclude, NULL);
+ &ost->vars[v], 1, exclude,
+ NULL, NULL);
struct tab_table *freq_table =
create_variable_frequency_table(dict, reader, cst, v, &freq_hash);
@@ -361,36 +347,41 @@ chisquare_execute (const struct dataset *ds,
xsq[v] = 0.0;
for ( i = 0 ; i < n_cells ; ++i )
{
+ struct string str;
double exp;
- const union value *observed_value = ff[i]->value;
+ const union value *observed_value = &ff[i]->value;
+
+ ds_init_empty (&str);
+ var_append_value_name (ost->vars[v], observed_value, &str);
/* The key */
- tab_text (freq_table, 0, i + 1, TAB_LEFT,
- var_get_value_name (ost->vars[v], observed_value));
+ tab_text (freq_table, 0, i + 1, TAB_LEFT, ds_cstr (&str));
+ ds_destroy (&str);
+
/* The observed N */
- tab_float (freq_table, 1, i + 1, TAB_NONE,
- ff[i]->count, 8, 0);
+ tab_double (freq_table, 1, i + 1, TAB_NONE,
+ ff[i]->count, wfmt);
if ( cst->n_expected > 0 )
exp = cst->expected[i] * total_obs / total_expected ;
else
exp = total_obs / (double) n_cells;
- tab_float (freq_table, 2, i + 1, TAB_NONE,
- exp, 8, 2);
+ tab_double (freq_table, 2, i + 1, TAB_NONE,
+ exp, NULL);
/* The residual */
- tab_float (freq_table, 3, i + 1, TAB_NONE,
- ff[i]->count - exp, 8, 2);
+ tab_double (freq_table, 3, i + 1, TAB_NONE,
+ ff[i]->count - exp, NULL);
xsq[v] += (ff[i]->count - exp) * (ff[i]->count - exp) / exp;
}
df[v] = n_cells - 1.0;
- tab_float (freq_table, 1, i + 1, TAB_NONE,
- total_obs, 8, 0);
+ tab_double (freq_table, 1, i + 1, TAB_NONE,
+ total_obs, wfmt);
tab_submit (freq_table);
@@ -408,7 +399,8 @@ chisquare_execute (const struct dataset *ds,
double total_obs = 0.0;
struct casereader *reader =
casereader_create_filter_missing (casereader_clone (input),
- &ost->vars[v], 1, exclude, NULL);
+ &ost->vars[v], 1, exclude,
+ NULL, NULL);
struct hsh_table *freq_hash =
create_freq_hash_with_range (dict, reader,
ost->vars[v], cst->lo, cst->hi);
@@ -427,17 +419,21 @@ chisquare_execute (const struct dataset *ds,
xsq[v] = 0.0;
for ( i = 0 ; i < hsh_count (freq_hash) ; ++i )
{
+ struct string str;
double exp;
- const union value *observed_value = ff[i]->value;
+ const union value *observed_value = &ff[i]->value;
+ ds_init_empty (&str);
+ var_append_value_name (ost->vars[v], observed_value, &str);
/* The key */
tab_text (freq_table, v * 4 + 1, i + 2 , TAB_LEFT,
- var_get_value_name (ost->vars[v], observed_value));
+ ds_cstr (&str));
+ ds_destroy (&str);
/* The observed N */
- tab_float (freq_table, v * 4 + 2, i + 2 , TAB_NONE,
- ff[i]->count, 8, 0);
+ tab_double (freq_table, v * 4 + 2, i + 2 , TAB_NONE,
+ ff[i]->count, wfmt);
if ( cst->n_expected > 0 )
exp = cst->expected[i] * total_obs / total_expected ;
@@ -445,19 +441,19 @@ chisquare_execute (const struct dataset *ds,
exp = total_obs / (double) hsh_count (freq_hash);
/* The expected N */
- tab_float (freq_table, v * 4 + 3, i + 2 , TAB_NONE,
- exp, 8, 2);
+ tab_double (freq_table, v * 4 + 3, i + 2 , TAB_NONE,
+ exp, NULL);
/* The residual */
- tab_float (freq_table, v * 4 + 4, i + 2 , TAB_NONE,
- ff[i]->count - exp, 8, 2);
+ tab_double (freq_table, v * 4 + 4, i + 2 , TAB_NONE,
+ ff[i]->count - exp, NULL);
xsq[v] += (ff[i]->count - exp) * (ff[i]->count - exp) / exp;
}
- tab_float (freq_table, v * 4 + 2, tab_nr (freq_table) - 1, TAB_NONE,
- total_obs, 8, 0);
+ tab_double (freq_table, v * 4 + 2, tab_nr (freq_table) - 1, TAB_NONE,
+ total_obs, wfmt);
df[v] = n_cells - 1.0;
@@ -480,11 +476,11 @@ chisquare_execute (const struct dataset *ds,
tab_text (stats_table, 1 + v, 0, TAB_CENTER, var_get_name (var));
- tab_float (stats_table, 1 + v, 1, TAB_NONE, xsq[v], 8,3);
- tab_float (stats_table, 1 + v, 2, TAB_NONE, df[v], 8,0);
+ tab_double (stats_table, 1 + v, 1, TAB_NONE, xsq[v], NULL);
+ tab_fixed (stats_table, 1 + v, 2, TAB_NONE, df[v], 8, 0);
- tab_float (stats_table, 1 + v, 3, TAB_NONE,
- gsl_cdf_chisq_Q (xsq[v], df[v]), 8,3);
+ tab_double (stats_table, 1 + v, 3, TAB_NONE,
+ gsl_cdf_chisq_Q (xsq[v], df[v]), NULL);
}
tab_submit (stats_table);
}