X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Foneway.q;h=0f6b20a1db04cdf9503e4a21cd67cb2f22ecf6bf;hb=2cf38ce51a9f34961d68a75e0b312a591b5c9abf;hp=b2e77f45d06a29ba3f4574406232b9f606ea28fb;hpb=2165f59ab9eee5272b4037e45477811627cae078;p=pspp-builds.git diff --git a/src/language/stats/oneway.q b/src/language/stats/oneway.q index b2e77f45..0f6b20a1 100644 --- a/src/language/stats/oneway.q +++ b/src/language/stats/oneway.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2007 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -43,6 +43,7 @@ #include #include #include "sort-criteria.h" +#include #include "xalloc.h" @@ -88,9 +89,9 @@ static void run_oneway (struct cmd_oneway *, struct casereader *, /* Routines to show the output tables */ -static void show_anova_table (void); -static void show_descriptives (void); -static void show_homogeneity (void); +static void show_anova_table(void); +static void show_descriptives (const struct dictionary *dict); +static void show_homogeneity(void); static void show_contrast_coeffs (short *); static void show_contrast_tests (short *); @@ -100,7 +101,7 @@ enum stat_table_t {STAT_DESC = 1, STAT_HOMO = 2}; static enum stat_table_t stat_tables; -void output_oneway (void); +static void output_oneway (const struct dictionary *dict); int @@ -148,8 +149,8 @@ cmd_oneway (struct lexer *lexer, struct dataset *ds) } -void -output_oneway (void) +static void +output_oneway (const struct dictionary *dict) { size_t i; short *bad_contrast; @@ -180,7 +181,7 @@ output_oneway (void) } if ( stat_tables & STAT_DESC ) - show_descriptives (); + show_descriptives (dict); if ( stat_tables & STAT_HOMO ) show_homogeneity (); @@ -260,7 +261,7 @@ show_anova_table (void) t = tab_create (n_cols, n_rows, 0); tab_headers (t, 2, 0, 1, 0); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); tab_box (t, @@ -317,28 +318,28 @@ show_anova_table (void) /* Sums of Squares */ - tab_float (t, 2, i * 3 + 1, 0, ssa, 10, 2); - tab_float (t, 2, i * 3 + 3, 0, sst, 10, 2); - tab_float (t, 2, i * 3 + 2, 0, sst - ssa, 10, 2); + tab_double (t, 2, i * 3 + 1, 0, ssa, NULL); + tab_double (t, 2, i * 3 + 3, 0, sst, NULL); + tab_double (t, 2, i * 3 + 2, 0, sst - ssa, NULL); /* Degrees of freedom */ - tab_float (t, 3, i * 3 + 1, 0, df1, 4, 0); - tab_float (t, 3, i * 3 + 2, 0, df2, 4, 0); - tab_float (t, 3, i * 3 + 3, 0, totals->n - 1, 4, 0); + tab_fixed (t, 3, i * 3 + 1, 0, df1, 4, 0); + tab_fixed (t, 3, i * 3 + 2, 0, df2, 4, 0); + tab_fixed (t, 3, i * 3 + 3, 0, totals->n - 1, 4, 0); /* Mean Squares */ - tab_float (t, 4, i * 3 + 1, TAB_RIGHT, msa, 8, 3); - tab_float (t, 4, i * 3 + 2, TAB_RIGHT, gp->mse, 8, 3); + tab_double (t, 4, i * 3 + 1, TAB_RIGHT, msa, NULL); + tab_double (t, 4, i * 3 + 2, TAB_RIGHT, gp->mse, NULL); { - const double F = msa/gp->mse; + const double F = msa / gp->mse ; /* The F value */ - tab_float (t, 5, i * 3 + 1, 0, F, 8, 3); + tab_double (t, 5, i * 3 + 1, 0, F, NULL); /* The significance */ - tab_float (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q (F, df1, df2), 8, 3); + tab_double (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q (F, df1, df2), NULL); } } } @@ -351,16 +352,18 @@ show_anova_table (void) /* Show the descriptives table */ static void -show_descriptives (void) +show_descriptives (const struct dictionary *dict) { size_t v; - int n_cols =10; + int n_cols = 10; struct tab_table *t; int row; const double confidence = 0.95; const double q = (1.0 - confidence) / 2.0; + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; int n_rows = 2; @@ -369,7 +372,7 @@ show_descriptives (void) t = tab_create (n_cols, n_rows, 0); tab_headers (t, 2, 0, 2, 0); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); /* Put a frame around the entire box, and vertical lines inside */ @@ -391,8 +394,9 @@ show_descriptives (void) tab_vline (t, TAL_0, 7, 0, 0); tab_hline (t, TAL_1, 6, 7, 1); - tab_joint_text (t, 6, 0, 7, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF, - _("%g%% Confidence Interval for Mean"), confidence*100.0); + tab_joint_text_format (t, 6, 0, 7, 0, TAB_CENTER | TAT_TITLE, + _("%g%% Confidence Interval for Mean"), + confidence*100.0); tab_text (t, 6, 1, TAB_CENTER | TAT_TITLE, _("Lower Bound")); tab_text (t, 7, 1, TAB_CENTER | TAT_TITLE, _("Upper Bound")); @@ -416,6 +420,7 @@ show_descriptives (void) struct group_statistics *totals = &gp->ugs; const char *s = var_to_string (vars[v]); + const struct fmt_spec *fmt = var_get_print_format (vars[v]); struct group_statistics *const *gs_array = (struct group_statistics *const *) hsh_sort (gp->group_hash); @@ -441,57 +446,59 @@ show_descriptives (void) /* Now fill in the numbers ... */ - tab_float (t, 2, row + count, 0, gs->n, 8, 0); + tab_fixed (t, 2, row + count, 0, gs->n, 8, 0); - tab_float (t, 3, row + count, 0, gs->mean, 8, 2); + tab_double (t, 3, row + count, 0, gs->mean, NULL); - tab_float (t, 4, row + count, 0, gs->std_dev, 8, 2); + tab_double (t, 4, row + count, 0, gs->std_dev, NULL); - std_error = gs->std_dev/sqrt (gs->n); - tab_float (t, 5, row + count, 0, - std_error, 8, 2); + std_error = gs->std_dev / sqrt (gs->n) ; + tab_double (t, 5, row + count, 0, + std_error, NULL); /* Now the confidence interval */ T = gsl_cdf_tdist_Qinv (q, gs->n - 1); - tab_float (t, 6, row + count, 0, - gs->mean - T * std_error, 8, 2); + tab_double (t, 6, row + count, 0, + gs->mean - T * std_error, NULL); - tab_float (t, 7, row + count, 0, - gs->mean + T * std_error, 8, 2); + tab_double (t, 7, row + count, 0, + gs->mean + T * std_error, NULL); /* Min and Max */ - tab_float (t, 8, row + count, 0, gs->minimum, 8, 2); - tab_float (t, 9, row + count, 0, gs->maximum, 8, 2); + + tab_double (t, 8, row + count, 0, gs->minimum, fmt); + tab_double (t, 9, row + count, 0, gs->maximum, fmt); } tab_text (t, 1, row + count, TAB_LEFT | TAT_TITLE, _("Total")); - tab_float (t, 2, row + count, 0, totals->n, 8, 0); + tab_double (t, 2, row + count, 0, totals->n, wfmt); - tab_float (t, 3, row + count, 0, totals->mean, 8, 2); + tab_double (t, 3, row + count, 0, totals->mean, NULL); - tab_float (t, 4, row + count, 0, totals->std_dev, 8, 2); + tab_double (t, 4, row + count, 0, totals->std_dev, NULL); - std_error = totals->std_dev/sqrt (totals->n); + std_error = totals->std_dev / sqrt (totals->n) ; - tab_float (t, 5, row + count, 0, std_error, 8, 2); + tab_double (t, 5, row + count, 0, std_error, NULL); /* Now the confidence interval */ T = gsl_cdf_tdist_Qinv (q, totals->n - 1); - tab_float (t, 6, row + count, 0, - totals->mean - T * std_error, 8, 2); + tab_double (t, 6, row + count, 0, + totals->mean - T * std_error, NULL); - tab_float (t, 7, row + count, 0, - totals->mean + T * std_error, 8, 2); + tab_double (t, 7, row + count, 0, + totals->mean + T * std_error, NULL); /* Min and Max */ - tab_float (t, 8, row + count, 0, totals->minimum, 8, 2); - tab_float (t, 9, row + count, 0, totals->maximum, 8, 2); + + tab_double (t, 8, row + count, 0, totals->minimum, fmt); + tab_double (t, 9, row + count, 0, totals->maximum, fmt); row += gp->n_groups + 1; } @@ -512,7 +519,7 @@ show_homogeneity (void) t = tab_create (n_cols, n_rows, 0); tab_headers (t, 1, 0, 1, 0); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); /* Put a frame around the entire box, and vertical lines inside */ tab_box (t, @@ -547,12 +554,12 @@ show_homogeneity (void) tab_text (t, 0, v + 1, TAB_LEFT | TAT_TITLE, s); F = gp->levene; - tab_float (t, 1, v + 1, TAB_RIGHT, F, 8, 3); - tab_float (t, 2, v + 1, TAB_RIGHT, df1, 8, 0); - tab_float (t, 3, v + 1, TAB_RIGHT, df2, 8, 0); + tab_double (t, 1, v + 1, TAB_RIGHT, F, NULL); + tab_fixed (t, 2, v + 1, TAB_RIGHT, df1, 8, 0); + tab_fixed (t, 3, v + 1, TAB_RIGHT, df2, 8, 0); /* Now the significance */ - tab_float (t, 4, v + 1, TAB_RIGHT, gsl_cdf_fdist_Q (F, df1, df2), 8, 3); + tab_double (t, 4, v + 1, TAB_RIGHT,gsl_cdf_fdist_Q (F, df1, df2), NULL); } tab_submit (t); @@ -565,7 +572,6 @@ show_contrast_coeffs (short *bad_contrast) { int n_cols = 2 + ostensible_number_of_groups; int n_rows = 2 + cmd.sbc_contrast; - union value *group_value; int count = 0; void *const *group_values; @@ -573,7 +579,7 @@ show_contrast_coeffs (short *bad_contrast) t = tab_create (n_cols, n_rows, 0); tab_headers (t, 2, 0, 2, 0); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); /* Put a frame around the entire box, and vertical lines inside */ tab_box (t, @@ -612,13 +618,16 @@ show_contrast_coeffs (short *bad_contrast) count < hsh_count (global_group_hash); ++count) { + double *group_value_p; + union value group_value; int i; struct string vstr; - group_value = group_values[count]; ds_init_empty (&vstr); - var_append_value_name (indep_var, group_value, &vstr); + group_value_p = group_values[count]; + group_value.f = *group_value_p; + var_append_value_name (indep_var, &group_value, &vstr); tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, ds_cstr (&vstr)); @@ -628,14 +637,13 @@ show_contrast_coeffs (short *bad_contrast) for (i = 0; i < cmd.sbc_contrast; ++i ) { - tab_text (t, 1, i + 2, TAB_CENTER | TAT_PRINTF, "%d", i + 1); + tab_text_format (t, 1, i + 2, TAB_CENTER, "%d", i + 1); if ( bad_contrast[i] ) tab_text (t, count + 2, i + 2, TAB_RIGHT, "?" ); else - tab_text (t, count + 2, i + 2, TAB_RIGHT | TAT_PRINTF, "%g", - subc_list_double_at (&cmd.dl_contrast[i], count) - ); + tab_text_format (t, count + 2, i + 2, TAB_RIGHT, "%g", + subc_list_double_at (&cmd.dl_contrast[i], count)); } } @@ -655,7 +663,7 @@ show_contrast_tests (short *bad_contrast) t = tab_create (n_cols, n_rows, 0); tab_headers (t, 3, 0, 1, 0); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); /* Put a frame around the entire box, and vertical lines inside */ tab_box (t, @@ -732,12 +740,13 @@ show_contrast_tests (short *bad_contrast) _("Does not assume equal")); } - tab_text (t, 2, (v * lines_per_variable) + i + 1, - TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d", i + 1); + tab_text_format (t, 2, (v * lines_per_variable) + i + 1, + TAB_CENTER | TAT_TITLE, "%d", i + 1); - tab_text (t, 2, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d", i + 1); + tab_text_format (t, 2, + (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + TAB_CENTER | TAT_TITLE, "%d", i + 1); if ( bad_contrast[i]) @@ -763,74 +772,69 @@ show_contrast_tests (short *bad_contrast) } sec_vneq = sqrt (sec_vneq); - df_numerator = pow2(df_numerator); + df_numerator = pow2 (df_numerator); - tab_float (t, 3, (v * lines_per_variable) + i + 1, - TAB_RIGHT, contrast_value, 8, 2); + tab_double (t, 3, (v * lines_per_variable) + i + 1, + TAB_RIGHT, contrast_value, NULL); - tab_float (t, 3, (v * lines_per_variable) + i + 1 + + tab_double (t, 3, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_RIGHT, contrast_value, 8, 2); + TAB_RIGHT, contrast_value, NULL); std_error_contrast = sqrt (grp_data->mse * coef_msq); /* Std. Error */ - tab_float (t, 4, (v * lines_per_variable) + i + 1, + tab_double (t, 4, (v * lines_per_variable) + i + 1, TAB_RIGHT, std_error_contrast, - 8, 3); + NULL); T = fabs (contrast_value / std_error_contrast); /* T Statistic */ - tab_float (t, 5, (v * lines_per_variable) + i + 1, + tab_double (t, 5, (v * lines_per_variable) + i + 1, TAB_RIGHT, T, - 8, 3); + NULL); df = grp_data->ugs.n - grp_data->n_groups; /* Degrees of Freedom */ - tab_float (t, 6, (v * lines_per_variable) + i + 1, + tab_fixed (t, 6, (v * lines_per_variable) + i + 1, TAB_RIGHT, df, 8, 0); /* Significance TWO TAILED !!*/ - tab_float (t, 7, (v * lines_per_variable) + i + 1, + tab_double (t, 7, (v * lines_per_variable) + i + 1, TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T, df), - 8, 3); - + NULL); /* Now for the Variances NOT Equal case */ /* Std. Error */ - tab_float (t, 4, + tab_double (t, 4, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, sec_vneq, - 8, 3); - + NULL); T = contrast_value / sec_vneq; - tab_float (t, 5, + tab_double (t, 5, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, T, - 8, 3); - + NULL); df = df_numerator / df_denominator; - tab_float (t, 6, + tab_double (t, 6, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, df, - 8, 3); + NULL); /* The Significance */ - tab_float (t, 7, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T, df), - 8, 3); - - + tab_double (t, 7, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T,df), + NULL); } if ( v > 0 ) @@ -838,7 +842,6 @@ show_contrast_tests (short *bad_contrast) } tab_submit (t); - } @@ -878,10 +881,25 @@ precalc (struct cmd_oneway *cmd UNUSED) } } +static int +compare_double_3way (const void *a_, const void *b_, const void *aux UNUSED) +{ + const double *a = a_; + const double *b = b_; + return *a < *b ? -1 : *a > *b; +} + +static unsigned +do_hash_double (const void *value_, const void *aux UNUSED) +{ + const double *value = value_; + return hash_double (*value, 0); +} + static void -free_value (void *value_, const void *aux UNUSED) +free_double (void *value_, const void *aux UNUSED) { - union value *value = value_; + double *value = value_; free (value); } @@ -894,22 +912,23 @@ run_oneway (struct cmd_oneway *cmd, struct dictionary *dict = dataset_dict (ds); enum mv_class exclude; struct casereader *reader; - struct ccase c; + struct ccase *c; - if (!casereader_peek (input, 0, &c)) + c = casereader_peek (input, 0); + if (c == NULL) { casereader_destroy (input); return; } - output_split_file_values (ds, &c); - case_destroy (&c); + output_split_file_values (ds, c); + case_unref (c); taint = taint_clone (casereader_get_taint (input)); global_group_hash = hsh_create (4, - compare_values_short, - hash_value_short, - free_value, + compare_double_3way, + do_hash_double, + free_double, indep_var); precalc (cmd); @@ -923,22 +942,25 @@ run_oneway (struct cmd_oneway *cmd, input = casereader_create_filter_weight (input, dict, NULL, NULL); reader = casereader_clone (input); - for (; casereader_read (reader, &c); case_destroy (&c)) + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { size_t i; - const double weight = dict_get_case_weight (dict, &c, NULL); + const double weight = dict_get_case_weight (dict, c, NULL); - const union value *indep_val = case_data (&c, indep_var); - void **p = hsh_probe (global_group_hash, indep_val); + const union value *indep_val = case_data (c, indep_var); + void **p = hsh_probe (global_group_hash, &indep_val->f); if (*p == NULL) - *p = value_dup (indep_val, var_get_width (indep_var)); + { + double *value = *p = xmalloc (sizeof *value); + *value = indep_val->f; + } for (i = 0; i < n_vars; ++i) { const struct variable *v = vars[i]; - const union value *val = case_data (&c, v); + const union value *val = case_data (c, v); struct group_proc *gp = group_proc_get (vars[i]); struct hsh_table *group_hash = gp->group_hash; @@ -1003,7 +1025,8 @@ run_oneway (struct cmd_oneway *cmd, ostensible_number_of_groups = hsh_count (global_group_hash); if (!taint_has_tainted_successor (taint)) - output_oneway (); + output_oneway (dict); + taint_destroy (taint); }