X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Foneway.c;h=f9476d5c951bc4b732c09f10613af0de3a96838c;hb=4443108e98b43b196f3f07217c6ec8bd96581367;hp=78bd215088d289b81e63a71a5ba5d511df4c61a9;hpb=178a771cd0c7cf2f4d6d08ac4b8431656aa29274;p=pspp diff --git a/src/language/stats/oneway.c b/src/language/stats/oneway.c index 78bd215088..f9476d5c95 100644 --- a/src/language/stats/oneway.c +++ b/src/language/stats/oneway.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -39,6 +39,7 @@ #include "linreg/sweep.h" #include "tukey/tukey.h" #include "math/categoricals.h" +#include "math/interaction.h" #include "math/covariance.h" #include "math/levene.h" #include "math/moments.h" @@ -51,6 +52,7 @@ /* Workspace variable for each dependent variable */ struct per_var_ws { + struct interaction *iact; struct categoricals *cat; struct covariance *cov; struct levene *nl; @@ -163,6 +165,9 @@ df_individual (const struct per_var_ws *pvw UNUSED, const struct moments1 *mom_i moments1_calculate (mom_i, &n_i, NULL, &var_i, 0, 0); moments1_calculate (mom_j, &n_j, NULL, &var_j, 0, 0); + + if ( n_i <= 1.0 || n_j <= 1.0) + return SYSMIS; nom = pow2 (var_i/n_i + var_j/n_j); denom = pow2 (var_i/n_i) / (n_i - 1) + pow2 (var_j/n_j) / (n_j - 1); @@ -190,6 +195,9 @@ static double sidak_pinv (double std_err, double alpha, double df, int k, const static double tukey_pinv (double std_err, double alpha, double df, int k, const struct moments1 *mom_i UNUSED, const struct moments1 *mom_j UNUSED) { + if ( k < 2 || df < 2) + return SYSMIS; + return std_err / sqrt (2.0) * qtukey (1 - alpha, 1.0, k, df, 1, 0); } @@ -210,6 +218,9 @@ static double gh_pinv (double std_err UNUSED, double alpha, double df, int k, co m = sqrt ((var_i/n_i + var_j/n_j) / 2.0); + if ( k < 2 || df < 2) + return SYSMIS; + return m * qtukey (1 - alpha, 1.0, k, df, 1, 0); } @@ -223,6 +234,8 @@ multiple_comparison_sig (double std_err, int k = pvw->n_groups; double df = ph->dff (pvw, dd_i->mom, dd_j->mom); double ts = ph->tsf (k, dd_i->mom, dd_j->mom, std_err); + if ( df == SYSMIS) + return SYSMIS; return ph->p1f (ts, k - 1, df); } @@ -231,13 +244,20 @@ mc_half_range (const struct oneway_spec *cmd, const struct per_var_ws *pvw, doub { int k = pvw->n_groups; double df = ph->dff (pvw, dd_i->mom, dd_j->mom); + if ( df == SYSMIS) + return SYSMIS; return ph->pinv (std_err, cmd->alpha, df, k, dd_i->mom, dd_j->mom); } static double tukey_1tailsig (double ts, double df1, double df2) { - double twotailedsig = 1.0 - ptukey (ts, 1.0, df1 + 1, df2, 1, 0); + double twotailedsig; + + if (df2 < 2 || df1 < 1) + return SYSMIS; + + twotailedsig = 1.0 - ptukey (ts, 1.0, df1 + 1, df2, 1, 0); return twotailedsig / 2.0; } @@ -365,6 +385,37 @@ static void show_homogeneity (const struct oneway_spec *, const struct oneway_wo static void output_oneway (const struct oneway_spec *, struct oneway_workspace *ws); static void run_oneway (const struct oneway_spec *cmd, struct casereader *input, const struct dataset *ds); + +static void +destroy_coeff_list (struct contrasts_node *coeff_list) +{ + struct coeff_node *cn = NULL; + struct coeff_node *cnx = NULL; + struct ll_list *cl = &coeff_list->coefficient_list; + + ll_for_each_safe (cn, cnx, struct coeff_node, ll, cl) + { + free (cn); + } + + free (coeff_list); +} + +static void +oneway_cleanup (struct oneway_spec *cmd) +{ + struct contrasts_node *coeff_list = NULL; + struct contrasts_node *coeff_next = NULL; + ll_for_each_safe (coeff_list, coeff_next, struct contrasts_node, ll, &cmd->contrast_list) + { + destroy_coeff_list (coeff_list); + } + + free (cmd->posthoc); +} + + + int cmd_oneway (struct lexer *lexer, struct dataset *ds) { @@ -486,6 +537,7 @@ cmd_oneway (struct lexer *lexer, struct dataset *ds) } else { + destroy_coeff_list (cl); lex_error (lexer, NULL); goto error; } @@ -541,10 +593,12 @@ cmd_oneway (struct lexer *lexer, struct dataset *ds) ok = proc_commit (ds) && ok; } + oneway_cleanup (&oneway); free (oneway.vars); return CMD_SUCCESS; error: + oneway_cleanup (&oneway); free (oneway.vars); return CMD_FAILURE; } @@ -574,7 +628,7 @@ dd_destroy (struct descriptive_data *dd) } static void * -makeit (void *aux1, void *aux2 UNUSED) +makeit (const void *aux1, void *aux2 UNUSED) { const struct variable *var = aux1; @@ -584,27 +638,25 @@ makeit (void *aux1, void *aux2 UNUSED) } static void -updateit (void *user_data, - enum mv_class exclude, - const struct variable *wv, - const struct variable *catvar UNUSED, - const struct ccase *c, - void *aux1, void *aux2) +killit (const void *aux1 UNUSED, void *aux2 UNUSED, void *user_data) { struct descriptive_data *dd = user_data; - const struct variable *varp = aux1; + dd_destroy (dd); +} - const union value *valx = case_data (c, varp); - struct descriptive_data *dd_total = aux2; +static void +updateit (const void *aux1, void *aux2, void *user_data, + const struct ccase *c, double weight) +{ + struct descriptive_data *dd = user_data; - double weight; + const struct variable *varp = aux1; - if ( var_is_value_missing (varp, valx, exclude)) - return; + const union value *valx = case_data (c, varp); - weight = wv != NULL ? case_data (c, wv)->f : 1.0; + struct descriptive_data *dd_total = aux2; moments1_add (dd->mom, valx->f, weight); if (valx->f < dd->minimum) @@ -651,11 +703,20 @@ run_oneway (const struct oneway_spec *cmd, for (v = 0; v < cmd->n_vars; ++v) { - ws.vws[v].cat = categoricals_create (&cmd->indep_var, 1, cmd->wv, - cmd->exclude, makeit, updateit, - CONST_CAST (struct variable *, - cmd->vars[v]), - ws.dd_total[v]); + struct payload payload; + payload.create = makeit; + payload.update = updateit; + payload.calculate = NULL; + payload.destroy = killit; + + ws.vws[v].iact = interaction_create (cmd->indep_var); + ws.vws[v].cat = categoricals_create (&ws.vws[v].iact, 1, cmd->wv, + cmd->exclude, cmd->exclude); + + categoricals_set_payload (ws.vws[v].cat, &payload, + CONST_CAST (struct variable *, cmd->vars[v]), + ws.dd_total[v]); + ws.vws[v].cov = covariance_2pass_create (1, &cmd->vars[v], ws.vws[v].cat, @@ -754,9 +815,24 @@ run_oneway (const struct oneway_spec *cmd, for (v = 0; v < cmd->n_vars; ++v) { + const gsl_matrix *ucm; + gsl_matrix *cm; struct per_var_ws *pvw = &ws.vws[v]; - gsl_matrix *cm = covariance_calculate_unnormalized (pvw->cov); const struct categoricals *cats = covariance_get_categoricals (pvw->cov); + const bool ok = categoricals_sane (cats); + + if ( ! ok) + { + msg (MW, + _("Dependent variable %s has no non-missing values. No analysis for this variable will be done."), + var_get_name (cmd->vars[v])); + continue; + } + + ucm = covariance_calculate_unnormalized (pvw->cov); + + cm = gsl_matrix_alloc (ucm->size1, ucm->size2); + gsl_matrix_memcpy (cm, ucm); moments1_calculate (ws.dd_total[v]->mom, &pvw->n, NULL, NULL, NULL, NULL); @@ -768,21 +844,22 @@ run_oneway (const struct oneway_spec *cmd, pvw->ssa = pvw->sst - pvw->sse; - pvw->n_groups = categoricals_total (cats); + pvw->n_groups = categoricals_n_total (cats); pvw->mse = (pvw->sst - pvw->ssa) / (pvw->n - pvw->n_groups); - - gsl_matrix_free (cm); } for (v = 0; v < cmd->n_vars; ++v) { const struct categoricals *cats = covariance_get_categoricals (ws.vws[v].cov); - categoricals_done (cats); - - if (categoricals_total (cats) > ws.actual_number_of_groups) - ws.actual_number_of_groups = categoricals_total (cats); + if ( ! categoricals_is_complete (cats)) + { + continue; + } + + if (categoricals_n_total (cats) > ws.actual_number_of_groups) + ws.actual_number_of_groups = categoricals_n_total (cats); } casereader_destroy (input); @@ -793,12 +870,15 @@ run_oneway (const struct oneway_spec *cmd, taint_destroy (taint); finish: + for (v = 0; v < cmd->n_vars; ++v) { covariance_destroy (ws.vws[v].cov); levene_destroy (ws.vws[v].nl); dd_destroy (ws.dd_total[v]); + interaction_destroy (ws.vws[v].iact); } + free (ws.vws); free (ws.dd_total); } @@ -825,10 +905,11 @@ output_oneway (const struct oneway_spec *cmd, struct oneway_workspace *ws) if (ll_count (cl) != ws->actual_number_of_groups) { msg (SW, - _("In contrast list %zu, the number of coefficients (%d) does not equal the number of groups (%d). This contrast list will be ignored."), + _("In contrast list %zu, the number of coefficients (%zu) does not equal the number of groups (%d). This contrast list will be ignored."), i, ll_count (cl), ws->actual_number_of_groups); ll_remove (&coeff_list->ll); + destroy_coeff_list (coeff_list); continue; } @@ -857,7 +938,12 @@ output_oneway (const struct oneway_spec *cmd, struct oneway_workspace *ws) { int v; for (v = 0 ; v < cmd->n_vars; ++v) - show_comparisons (cmd, ws, v); + { + const struct categoricals *cats = covariance_get_categoricals (ws->vws[v].cov); + + if ( categoricals_is_complete (cats)) + show_comparisons (cmd, ws, v); + } } } @@ -1013,7 +1099,7 @@ show_descriptives (const struct oneway_spec *cmd, const struct oneway_workspace if ( v > 0) tab_hline (t, TAL_1, 0, n_cols - 1, row); - for (count = 0; count < categoricals_total (cats); ++count) + for (count = 0; count < categoricals_n_total (cats); ++count) { double T; double n, mean, variance; @@ -1021,7 +1107,7 @@ show_descriptives (const struct oneway_spec *cmd, const struct oneway_workspace struct string vstr; - const union value *gval = categoricals_get_value_by_category (cats, count); + const struct ccase *gcc = categoricals_get_case_by_category (cats, count); const struct descriptive_data *dd = categoricals_get_user_data_by_category (cats, count); moments1_calculate (dd->mom, &n, &mean, &variance, NULL, NULL); @@ -1031,7 +1117,7 @@ show_descriptives (const struct oneway_spec *cmd, const struct oneway_workspace ds_init_empty (&vstr); - var_append_value_name (cmd->indep_var, gval, &vstr); + var_append_value_name (cmd->indep_var, case_data (gcc, cmd->indep_var), &vstr); tab_text (t, 1, row + count, TAB_LEFT | TAT_TITLE, @@ -1066,6 +1152,7 @@ show_descriptives (const struct oneway_spec *cmd, const struct oneway_workspace tab_double (t, 9, row + count, 0, dd->maximum, fmt); } + if (categoricals_is_complete (cats)) { double T; double n, mean, variance; @@ -1097,12 +1184,13 @@ show_descriptives (const struct oneway_spec *cmd, const struct oneway_workspace tab_double (t, 7, row + count, 0, mean + T * std_error, NULL); + /* Min and Max */ tab_double (t, 8, row + count, 0, ws->dd_total[v]->minimum, fmt); tab_double (t, 9, row + count, 0, ws->dd_total[v]->maximum, fmt); } - row += categoricals_total (cats) + 1; + row += categoricals_n_total (cats) + 1; } tab_submit (t); @@ -1231,13 +1319,13 @@ show_contrast_coeffs (const struct oneway_spec *cmd, const struct oneway_workspa ++count, coeffi = ll_next (coeffi)) { const struct categoricals *cats = covariance_get_categoricals (cov); - const union value *val = categoricals_get_value_by_category (cats, count); + const struct ccase *gcc = categoricals_get_case_by_category (cats, count); struct coeff_node *coeffn = ll_data (coeffi, struct coeff_node, ll); struct string vstr; ds_init_empty (&vstr); - var_append_value_name (cmd->indep_var, val, &vstr); + var_append_value_name (cmd->indep_var, case_data (gcc, cmd->indep_var), &vstr); tab_text (t, count + 2, 1, TAB_CENTER | TAT_TITLE, ds_cstr (&vstr)); @@ -1485,7 +1573,7 @@ show_comparisons (const struct oneway_spec *cmd, const struct oneway_workspace * tab_vline (t, TAL_2, heading_cols, 0, n_rows - 1); - tab_title (t, _("Multiple Comparisons")); + tab_title (t, _("Multiple Comparisons (%s)"), var_to_string (cmd->vars[v])); tab_text_format (t, 1, 1, TAB_LEFT | TAT_TITLE, _("(I) %s"), var_to_string (cmd->indep_var)); tab_text_format (t, 2, 1, TAB_LEFT | TAT_TITLE, _("(J) %s"), var_to_string (cmd->indep_var)); @@ -1518,10 +1606,11 @@ show_comparisons (const struct oneway_spec *cmd, const struct oneway_workspace * struct string vstr; int j; struct descriptive_data *dd_i = categoricals_get_user_data_by_category (cat, i); - const union value *gval = categoricals_get_value_by_category (cat, i); + const struct ccase *gcc = categoricals_get_case_by_category (cat, i); + ds_init_empty (&vstr); - var_append_value_name (cmd->indep_var, gval, &vstr); + var_append_value_name (cmd->indep_var, case_data (gcc, cmd->indep_var), &vstr); if ( i != 0) tab_hline (t, TAL_1, 1, n_cols - 1, r); @@ -1534,13 +1623,14 @@ show_comparisons (const struct oneway_spec *cmd, const struct oneway_workspace * double std_err; double weight_j, mean_j, var_j; double half_range; + const struct ccase *cc; struct descriptive_data *dd_j = categoricals_get_user_data_by_category (cat, j); if (j == i) continue; ds_clear (&vstr); - gval = categoricals_get_value_by_category (cat, j); - var_append_value_name (cmd->indep_var, gval, &vstr); + cc = categoricals_get_case_by_category (cat, j); + var_append_value_name (cmd->indep_var, case_data (cc, cmd->indep_var), &vstr); tab_text (t, 2, r + rx, TAB_LEFT | TAT_TITLE, ds_cstr (&vstr)); moments1_calculate (dd_j->mom, &weight_j, &mean_j, &var_j, 0, 0);