X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ft-test-paired.c;h=921233b68e52b8364c3c9e14e3496c8046cd88fd;hb=5cab4cf3322f29c0ed7134d23740e07382914f20;hp=4935e9a8cb866bd112d5d6118050af97f016ce46;hpb=37f0de7f0ee1870cf5fd4b802be092be844a5b95;p=pspp diff --git a/src/language/stats/t-test-paired.c b/src/language/stats/t-test-paired.c index 4935e9a8cb..921233b68e 100644 --- a/src/language/stats/t-test-paired.c +++ b/src/language/stats/t-test-paired.c @@ -27,18 +27,16 @@ #include "data/dictionary.h" #include "data/format.h" #include "data/variable.h" -#include "libpspp/hmapx.h" -#include "libpspp/hash-functions.h" -#include "output/tab.h" +#include "output/pivot-table.h" #include "gettext.h" +#define N_(msgid) msgid #define _(msgid) gettext (msgid) struct pair_stats { - int posn; double sum_of_prod; struct moments *mom0; const struct variable *var0; @@ -51,7 +49,8 @@ struct pair_stats struct paired_samp { - struct hmapx hmap; + struct pair_stats *ps; + size_t n_ps; }; static void paired_summary (const struct tt *tt, struct paired_samp *os); @@ -61,31 +60,21 @@ static void paired_test (const struct tt *tt, const struct paired_samp *os); void paired_run (const struct tt *tt, size_t n_pairs, vp *pairs, struct casereader *reader) { - int i; struct ccase *c; struct paired_samp ps; struct casereader *r; - struct hmapx_node *node; - struct pair_stats *pp = NULL; - hmapx_init (&ps.hmap); - - for (i = 0; i < n_pairs; ++i) + ps.ps = xcalloc (n_pairs, sizeof *ps.ps); + ps.n_ps = n_pairs; + for (size_t i = 0; i < n_pairs; ++i) { vp *pair = &pairs[i]; - unsigned int hash; - struct pair_stats *pp = xzalloc (sizeof *pp); - pp->posn = i; + struct pair_stats *pp = &ps.ps[i]; pp->var0 = (*pair)[0]; pp->var1 = (*pair)[1]; pp->mom0 = moments_create (MOMENT_VARIANCE); pp->mom1 = moments_create (MOMENT_VARIANCE); pp->mom_diff = moments_create (MOMENT_VARIANCE); - - hash = hash_pointer ((*pair)[0], 0); - hash = hash_pointer ((*pair)[1], hash); - - hmapx_insert (&ps.hmap, pp, hash); } r = casereader_clone (reader); @@ -93,10 +82,9 @@ paired_run (const struct tt *tt, size_t n_pairs, vp *pairs, struct casereader *r { double w = dict_get_case_weight (tt->dict, c, NULL); - struct hmapx_node *node; - struct pair_stats *pp = NULL; - HMAPX_FOR_EACH (pp, node, &ps.hmap) + for (int i = 0; i < ps.n_ps; i++) { + struct pair_stats *pp = &ps.ps[i]; const union value *val0 = case_data (c, pp->var0); const union value *val1 = case_data (c, pp->var1); if (var_is_value_missing (pp->var0, val0, tt->exclude)) @@ -117,10 +105,9 @@ paired_run (const struct tt *tt, size_t n_pairs, vp *pairs, struct casereader *r { double w = dict_get_case_weight (tt->dict, c, NULL); - struct hmapx_node *node; - struct pair_stats *pp = NULL; - HMAPX_FOR_EACH (pp, node, &ps.hmap) + for (int i = 0; i < ps.n_ps; i++) { + struct pair_stats *pp = &ps.ps[i]; const union value *val0 = case_data (c, pp->var0); const union value *val1 = case_data (c, pp->var1); if (var_is_value_missing (pp->var0, val0, tt->exclude)) @@ -142,214 +129,172 @@ paired_run (const struct tt *tt, size_t n_pairs, vp *pairs, struct casereader *r paired_test (tt, &ps); /* Clean up */ - HMAPX_FOR_EACH (pp, node, &ps.hmap) + + for (int i = 0; i < ps.n_ps; i++) { + struct pair_stats *pp = &ps.ps[i]; moments_destroy (pp->mom0); moments_destroy (pp->mom1); moments_destroy (pp->mom_diff); - free (pp); } - - hmapx_destroy (&ps.hmap); + free (ps.ps); } static void paired_summary (const struct tt *tt, struct paired_samp *os) { - size_t n_pairs = hmapx_count (&os->hmap); - struct hmapx_node *node; - struct pair_stats *pp = NULL; - - const int heading_rows = 1; - const int heading_cols = 2; - - const int cols = 4 + heading_cols; - const int rows = n_pairs * 2 + heading_rows; - struct tab_table *t = tab_create (cols, rows); - const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0; - tab_set_format (t, RC_WEIGHT, wfmt); - tab_headers (t, 0, 0, heading_rows, 0); - tab_box (t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1); - tab_box (t, -1, -1, TAL_0, TAL_1, heading_cols, 0, cols - 1, rows - 1); - - tab_hline (t, TAL_2, 0, cols - 1, 1); - - tab_title (t, _("Paired Sample Statistics")); - tab_vline (t, TAL_2, heading_cols, 0, rows - 1); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("N")); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); - tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean")); - - HMAPX_FOR_EACH (pp, node, &os->hmap) - { - int v = pp->posn; - double cc, mean, sigma; + struct pivot_table *table = pivot_table_create ( + N_("Paired Sample Statistics")); + pivot_table_set_weight_var (table, tt->wv); - tab_text_format (t, 0, v * 2 + heading_rows, TAB_LEFT, _("Pair %d"), pp->posn + 1); - - /* first var */ - moments_calculate (pp->mom0, &cc, &mean, &sigma, NULL, NULL); - tab_text (t, 1, v * 2 + heading_rows, TAB_LEFT, var_to_string (pp->var0)); - tab_double (t, 3, v * 2 + heading_rows, TAB_RIGHT, cc, NULL, RC_WEIGHT); - tab_double (t, 2, v * 2 + heading_rows, TAB_RIGHT, mean, NULL, RC_OTHER); - tab_double (t, 4, v * 2 + heading_rows, TAB_RIGHT, sqrt (sigma), NULL, RC_OTHER); - tab_double (t, 5, v * 2 + heading_rows, TAB_RIGHT, sqrt (sigma / cc), NULL, RC_OTHER); - - /* second var */ - moments_calculate (pp->mom1, &cc, &mean, &sigma, NULL, NULL); - tab_text (t, 1, v * 2 + 1 + heading_rows, TAB_LEFT, var_to_string (pp->var1)); - tab_double (t, 3, v * 2 + 1 + heading_rows, TAB_RIGHT, cc, NULL, RC_WEIGHT); - tab_double (t, 2, v * 2 + 1 + heading_rows, TAB_RIGHT, mean, NULL, RC_OTHER); - tab_double (t, 4, v * 2 + 1 + heading_rows, TAB_RIGHT, sqrt (sigma), NULL, RC_OTHER); - tab_double (t, 5, v * 2 + 1 + heading_rows, TAB_RIGHT, sqrt (sigma / cc), NULL, RC_OTHER); + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("N"), PIVOT_RC_COUNT, + N_("Mean"), PIVOT_RC_OTHER, + N_("Std. Deviation"), PIVOT_RC_OTHER, + N_("S.E. Mean"), PIVOT_RC_OTHER); + + struct pivot_dimension *variables = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Variables")); + + for (size_t i = 0; i < os->n_ps; i++) + { + struct pair_stats *pp = &os->ps[i]; + struct pivot_category *pair = pivot_category_create_group__ ( + variables->root, pivot_value_new_text_format (N_("Pair %d"), i + 1)); + + for (int j = 0; j < 2; j++) + { + const struct variable *var = j ? pp->var1 : pp->var0; + const struct moments *mom = j ? pp->mom1 : pp->mom0; + double cc, mean, sigma; + moments_calculate (mom, &cc, &mean, &sigma, NULL, NULL); + + int var_idx = pivot_category_create_leaf ( + pair, pivot_value_new_variable (var)); + + double entries[] = { cc, mean, sqrt (sigma), sqrt (sigma / cc) }; + for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) + pivot_table_put2 (table, j, var_idx, + pivot_value_new_number (entries[j])); + } } - tab_submit (t); + pivot_table_submit (table); } static void paired_correlations (const struct tt *tt, struct paired_samp *os) { - size_t n_pairs = hmapx_count (&os->hmap); - struct hmapx_node *node; - struct pair_stats *pp = NULL; - const int heading_rows = 1; - const int heading_cols = 2; - - const int cols = 5; - const int rows = n_pairs + heading_rows; - struct tab_table *t = tab_create (cols, rows); - const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0; - tab_set_format (t, RC_WEIGHT, wfmt); - tab_headers (t, 0, 0, heading_rows, 0); - tab_box (t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1); - - tab_hline (t, TAL_2, 0, cols - 1, 1); - - tab_title (t, _("Paired Samples Correlations")); - tab_vline (t, TAL_2, heading_cols, 0, rows - 1); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Correlation")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Sig.")); - - HMAPX_FOR_EACH (pp, node, &os->hmap) - { - double corr; - double cc0, mean0, sigma0; - double cc1, mean1, sigma1; - int v = pp->posn; + struct pivot_table *table = pivot_table_create ( + N_("Paired Samples Correlations")); + pivot_table_set_weight_var (table, tt->wv); + + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("N"), PIVOT_RC_COUNT, + N_("Correlation"), PIVOT_RC_CORRELATION, + N_("Sig."), PIVOT_RC_SIGNIFICANCE); - tab_text_format (t, 0, v + heading_rows, TAB_LEFT, _("Pair %d"), pp->posn + 1); + struct pivot_dimension *pairs = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Pairs")); - tab_text_format (t, 1, v + heading_rows, TAB_LEFT, _("%s & %s"), - var_to_string (pp->var0), - var_to_string (pp->var1)); + for (size_t i = 0; i < os->n_ps; i++) + { + struct pair_stats *pp = &os->ps[i]; + struct pivot_category *group = pivot_category_create_group__ ( + pairs->root, pivot_value_new_text_format (N_("Pair %d"), i + 1)); + + int row = pivot_category_create_leaf ( + group, pivot_value_new_text_format (N_("%s & %s"), + var_to_string (pp->var0), + var_to_string (pp->var1))); + double cc0, mean0, sigma0; + double cc1, mean1, sigma1; moments_calculate (pp->mom0, &cc0, &mean0, &sigma0, NULL, NULL); moments_calculate (pp->mom1, &cc1, &mean1, &sigma1, NULL, NULL); - /* If this fails, then we're not dealing with missing values properly */ assert (cc0 == cc1); - tab_double (t, 2, v + heading_rows, TAB_RIGHT, cc0, NULL, RC_WEIGHT); - - corr = pp->sum_of_prod / cc0 - (mean0 * mean1); - corr /= sqrt (sigma0 * sigma1); - corr *= cc0 / (cc0 - 1); - - tab_double (t, 3, v + heading_rows, TAB_RIGHT, corr, NULL, RC_OTHER); - tab_double (t, 4, v + heading_rows, TAB_RIGHT, - 2.0 * significance_of_correlation (corr, cc0), NULL, RC_PVALUE); + double corr = ((pp->sum_of_prod / cc0 - mean0 * mean1) + / sqrt (sigma0 * sigma1) * cc0 / (cc0 - 1)); + double sig = 2.0 * significance_of_correlation (corr, cc0); + double entries[] = { cc0, corr, sig }; + for (size_t i = 0; i < sizeof entries / sizeof *entries; i++) + pivot_table_put2 (table, i, row, pivot_value_new_number (entries[i])); } - tab_submit (t); + pivot_table_submit (table); } static void paired_test (const struct tt *tt, const struct paired_samp *os) { - size_t n_pairs = hmapx_count (&os->hmap); - struct hmapx_node *node; - struct pair_stats *pp = NULL; - - const int heading_rows = 3; - const int heading_cols = 2; - const size_t rows = heading_rows + n_pairs; - const size_t cols = 10; - const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0; - - struct tab_table *t = tab_create (cols, rows); - tab_set_format (t, RC_WEIGHT, wfmt); - tab_headers (t, 0, 0, heading_rows, 0); - tab_box (t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1); - tab_hline (t, TAL_2, 0, cols - 1, 3); - - tab_title (t, _("Paired Samples Test")); - tab_hline (t, TAL_1, heading_cols, 6, 1); - tab_vline (t, TAL_2, heading_cols, 0, rows - 1); - - tab_box (t, -1, -1, -1, TAL_1, heading_cols, 0, cols - 1, rows - 1); - - tab_joint_text (t, 2, 0, 6, 0, TAB_CENTER, - _("Paired Differences")); - - tab_joint_text_format (t, 5, 1, 6, 1, TAB_CENTER, - _("%g%% Confidence Interval of the Difference"), - tt->confidence * 100.0); - - tab_hline (t, TAL_1, 5, 6, 2); - tab_text (t, 7, 2, TAB_CENTER | TAT_TITLE, _("t")); - tab_text (t, 8, 2, TAB_CENTER | TAT_TITLE, _("df")); - tab_text (t, 9, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); - tab_text (t, 4, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Mean")); - tab_text (t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); - tab_text (t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean")); - - tab_text (t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower")); - tab_text (t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper")); - - HMAPX_FOR_EACH (pp, node, &os->hmap) + struct pivot_table *table = pivot_table_create (N_("Paired Samples Test")); + pivot_table_set_weight_var (table, tt->wv); + + struct pivot_dimension *statistics = pivot_dimension_create ( + table, PIVOT_AXIS_COLUMN, N_("Statistics")); + struct pivot_category *group = pivot_category_create_group ( + statistics->root, N_("Paired Differences"), + N_("Mean"), PIVOT_RC_OTHER, + N_("Std. Deviation"), PIVOT_RC_OTHER, + N_("S.E. Mean"), PIVOT_RC_OTHER); + struct pivot_category *interval = pivot_category_create_group__ ( + group, pivot_value_new_text_format ( + N_("%g%% Confidence Interval of the Difference"), + tt->confidence * 100.0)); + pivot_category_create_leaves (interval, + N_("Lower"), PIVOT_RC_OTHER, + N_("Upper"), PIVOT_RC_OTHER); + pivot_category_create_leaves (statistics->root, + N_("t"), PIVOT_RC_OTHER, + N_("df"), PIVOT_RC_COUNT, + N_("Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE); + + struct pivot_dimension *pairs = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Pairs")); + + for (size_t i = 0; i < os->n_ps; i++) { - int v = pp->posn; - double cc, mean, sigma; - double df ; - double tval; - double p, q; - double se_mean; - - moments_calculate (pp->mom_diff, &cc, &mean, &sigma, NULL, NULL); - - df = cc - 1.0; - tab_text_format (t, 0, v + heading_rows, TAB_LEFT, _("Pair %d"), v + 1); + struct pair_stats *pp = &os->ps[i]; + struct pivot_category *group = pivot_category_create_group__ ( + pairs->root, pivot_value_new_text_format (N_("Pair %d"), i + 1)); - tab_text_format (t, 1, v + heading_rows, TAB_LEFT, _("%s - %s"), - var_to_string (pp->var0), - var_to_string (pp->var1)); + int row = pivot_category_create_leaf ( + group, pivot_value_new_text_format (N_("%s - %s"), + var_to_string (pp->var0), + var_to_string (pp->var1))); - tval = mean * sqrt (cc / sigma); - se_mean = sqrt (sigma / cc); - - tab_double (t, 2, v + heading_rows, TAB_RIGHT, mean, NULL, RC_OTHER); - tab_double (t, 3, v + heading_rows, TAB_RIGHT, sqrt (sigma), NULL, RC_OTHER); - tab_double (t, 4, v + heading_rows, TAB_RIGHT, se_mean, NULL, RC_OTHER); + double cc, mean, sigma; + moments_calculate (pp->mom_diff, &cc, &mean, &sigma, NULL, NULL); - tab_double (t, 7, v + heading_rows, TAB_RIGHT, tval, NULL, RC_OTHER); - tab_double (t, 8, v + heading_rows, TAB_RIGHT, df, NULL, RC_WEIGHT); + double df = cc - 1.0; + double t = mean * sqrt (cc / sigma); + double se_mean = sqrt (sigma / cc); - p = gsl_cdf_tdist_P (tval, df); - q = gsl_cdf_tdist_Q (tval, df); + double p = gsl_cdf_tdist_P (t, df); + double q = gsl_cdf_tdist_Q (t, df); + double sig = 2.0 * (t > 0 ? q : p); - tab_double (t, 9, v + heading_rows, TAB_RIGHT, 2.0 * (tval > 0 ? q : p), NULL, RC_PVALUE); + double t_qinv = gsl_cdf_tdist_Qinv ((1.0 - tt->confidence) / 2.0, df); - tval = gsl_cdf_tdist_Qinv ( (1.0 - tt->confidence) / 2.0, df); + double entries[] = { + mean, + sqrt (sigma), + se_mean, + mean - t_qinv * se_mean, + mean + t_qinv * se_mean, + t, + df, + sig, + }; + for (size_t i = 0; i < sizeof entries / sizeof *entries; i++) + pivot_table_put2 (table, i, row, pivot_value_new_number (entries[i])); - tab_double (t, 5, v + heading_rows, TAB_RIGHT, mean - tval * se_mean, NULL, RC_OTHER); - tab_double (t, 6, v + heading_rows, TAB_RIGHT, mean + tval * se_mean, NULL, RC_OTHER); } - tab_submit (t); + pivot_table_submit (table); }