X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fglm.c;h=d0dc4bc83ca9d2a926593c233cabdba58755070f;hb=cd221d80fafb54e550398c6de105d4c1b7f02ba0;hp=a5e047e86d1dcb118a5785b126a30411e52ea695;hpb=933f16e8847bacf67eec53e7ba77b6c069125700;p=pspp diff --git a/src/language/stats/glm.c b/src/language/stats/glm.c index a5e047e86d..d0dc4bc83c 100644 --- a/src/language/stats/glm.c +++ b/src/language/stats/glm.c @@ -43,9 +43,10 @@ #include "math/covariance.h" #include "math/interaction.h" #include "math/moments.h" -#include "output/tab.h" +#include "output/pivot-table.h" #include "gettext.h" +#define N_(msgid) msgid #define _(msgid) gettext (msgid) struct glm_spec @@ -81,7 +82,7 @@ struct glm_workspace struct categoricals *cats; - /* + /* Sums of squares due to different variables. Element 0 is the SSE for the entire model. For i > 0, element i is the SS due to variable i. @@ -155,7 +156,8 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) PV_NO_DUPLICATE | PV_NUMERIC)) goto error; - lex_force_match (lexer, T_BY); + if (! lex_force_match (lexer, T_BY)) + goto error; if (!parse_variables_const (lexer, glm.dict, &glm.factor_vars, &glm.n_factor_vars, @@ -229,10 +231,10 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) lex_error (lexer, NULL); goto error; } - + glm.alpha = lex_number (lexer); lex_get (lexer); - if ( ! lex_force_match (lexer, T_RPAREN)) + if (! lex_force_match (lexer, T_RPAREN)) { lex_error (lexer, NULL); goto error; @@ -248,34 +250,28 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) else if (lex_match_id (lexer, "METHOD")) { lex_match (lexer, T_EQUALS); - if ( !lex_force_match_id (lexer, "SSTYPE")) + if (!lex_force_match_id (lexer, "SSTYPE")) { lex_error (lexer, NULL); goto error; } - if ( ! lex_force_match (lexer, T_LPAREN)) + if (! lex_force_match (lexer, T_LPAREN)) { lex_error (lexer, NULL); goto error; } - if ( ! lex_force_int (lexer)) + if (!lex_force_int_range (lexer, "SSTYPE", 1, 3)) { lex_error (lexer, NULL); goto error; } glm.ss_type = lex_integer (lexer); - if (1 > glm.ss_type || 3 < glm.ss_type ) - { - msg (ME, _("Only types 1, 2 & 3 sums of squares are currently implemented")); - goto error; - } - lex_get (lexer); - if ( ! lex_force_match (lexer, T_RPAREN)) + if (! lex_force_match (lexer, T_RPAREN)) { lex_error (lexer, NULL); goto error; @@ -305,7 +301,7 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) } } - if ( ! design ) + if (! design) { design_full (&glm); } @@ -359,11 +355,11 @@ fill_submatrix (const gsl_matrix * cov, gsl_matrix * submatrix, bool *dropped_f) size_t j; size_t n = 0; size_t m = 0; - + for (i = 0; i < cov->size1; i++) { if (not_dropped (i, dropped_f)) - { + { m = 0; for (j = 0; j < cov->size2; j++) { @@ -372,7 +368,7 @@ fill_submatrix (const gsl_matrix * cov, gsl_matrix * submatrix, bool *dropped_f) gsl_matrix_set (submatrix, n, m, gsl_matrix_get (cov, i, j)); m++; - } + } } n++; } @@ -380,7 +376,7 @@ fill_submatrix (const gsl_matrix * cov, gsl_matrix * submatrix, bool *dropped_f) } -/* +/* Type 1 sums of squares. Populate SSQ with the Type 1 sums of squares according to COV */ @@ -390,8 +386,8 @@ ssq_type1 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) const gsl_matrix *cm = covariance_calculate_unnormalized (cov); size_t i; size_t k; - bool *model_dropped = xcalloc (covariance_dim (cov), sizeof (*model_dropped)); - bool *submodel_dropped = xcalloc (covariance_dim (cov), sizeof (*submodel_dropped)); + bool *model_dropped = XCALLOC (covariance_dim (cov), bool); + bool *submodel_dropped = XCALLOC (covariance_dim (cov), bool); const struct categoricals *cats = covariance_get_categoricals (cov); size_t n_dropped_model = 0; @@ -409,7 +405,7 @@ ssq_type1 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) { gsl_matrix *model_cov = NULL; gsl_matrix *submodel_cov = NULL; - + n_dropped_submodel = n_dropped_model; for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++) { @@ -418,10 +414,10 @@ ssq_type1 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++) { - const struct interaction * x = + const struct interaction * x = categoricals_get_interaction_by_subscript (cats, i - cmd->n_dep_vars); - if ( x == cmd->interactions [k]) + if (x == cmd->interactions [k]) { model_dropped[i] = false; n_dropped_model--; @@ -439,7 +435,7 @@ ssq_type1 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) gsl_vector_set (ssq, k + 1, gsl_matrix_get (submodel_cov, 0, 0) - gsl_matrix_get (model_cov, 0, 0) - ); + ); gsl_matrix_free (model_cov); gsl_matrix_free (submodel_cov); @@ -449,7 +445,7 @@ ssq_type1 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) free (submodel_dropped); } -/* +/* Type 2 sums of squares. Populate SSQ with the Type 2 sums of squares according to COV */ @@ -459,8 +455,8 @@ ssq_type2 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) const gsl_matrix *cm = covariance_calculate_unnormalized (cov); size_t i; size_t k; - bool *model_dropped = xcalloc (covariance_dim (cov), sizeof (*model_dropped)); - bool *submodel_dropped = xcalloc (covariance_dim (cov), sizeof (*submodel_dropped)); + bool *model_dropped = XCALLOC (covariance_dim (cov), bool); + bool *submodel_dropped = XCALLOC (covariance_dim (cov), bool); const struct categoricals *cats = covariance_get_categoricals (cov); for (k = 0; k < cmd->n_interactions; k++) @@ -471,7 +467,7 @@ ssq_type2 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) size_t n_dropped_submodel = 0; for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++) { - const struct interaction * x = + const struct interaction * x = categoricals_get_interaction_by_subscript (cats, i - cmd->n_dep_vars); model_dropped[i] = false; @@ -482,7 +478,7 @@ ssq_type2 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) n_dropped_submodel++; submodel_dropped[i] = true; - if ( cmd->interactions [k]->n_vars < x->n_vars) + if (cmd->interactions [k]->n_vars < x->n_vars) { assert (n_dropped_model < covariance_dim (cov)); n_dropped_model++; @@ -502,7 +498,7 @@ ssq_type2 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) gsl_vector_set (ssq, k + 1, gsl_matrix_get (submodel_cov, 0, 0) - gsl_matrix_get (model_cov, 0, 0) - ); + ); gsl_matrix_free (model_cov); gsl_matrix_free (submodel_cov); @@ -512,7 +508,7 @@ ssq_type2 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) free (submodel_dropped); } -/* +/* Type 3 sums of squares. Populate SSQ with the Type 2 sums of squares according to COV */ @@ -522,8 +518,8 @@ ssq_type3 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) const gsl_matrix *cm = covariance_calculate_unnormalized (cov); size_t i; size_t k; - bool *model_dropped = xcalloc (covariance_dim (cov), sizeof (*model_dropped)); - bool *submodel_dropped = xcalloc (covariance_dim (cov), sizeof (*submodel_dropped)); + bool *model_dropped = XCALLOC (covariance_dim (cov), bool); + bool *submodel_dropped = XCALLOC (covariance_dim (cov), bool); const struct categoricals *cats = covariance_get_categoricals (cov); double ss0; @@ -541,12 +537,12 @@ ssq_type3 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++) { - const struct interaction * x = + const struct interaction * x = categoricals_get_interaction_by_subscript (cats, i - cmd->n_dep_vars); model_dropped[i] = false; - if ( cmd->interactions [k] == x) + if (cmd->interactions [k] == x) { assert (n_dropped_model < covariance_dim (cov)); n_dropped_model++; @@ -586,11 +582,21 @@ run_glm (struct glm_spec *cmd, struct casereader *input, struct glm_workspace ws; struct covariance *cov; + input = casereader_create_filter_missing (input, + cmd->dep_vars, cmd->n_dep_vars, + cmd->exclude, + NULL, NULL); + + input = casereader_create_filter_missing (input, + cmd->factor_vars, cmd->n_factor_vars, + cmd->exclude, + NULL, NULL); + ws.cats = categoricals_create (cmd->interactions, cmd->n_interactions, - cmd->wv, cmd->exclude, MV_ANY); + cmd->wv, MV_ANY); cov = covariance_2pass_create (cmd->n_dep_vars, cmd->dep_vars, - ws.cats, cmd->wv, cmd->exclude); + ws.cats, cmd->wv, cmd->exclude, true); c = casereader_peek (input, 0); @@ -640,16 +646,14 @@ run_glm (struct glm_spec *cmd, struct casereader *input, if (cmd->dump_coding) { - struct tab_table *t = - covariance_dump_enc_header (cov, - 1 + casereader_count_cases (input)); + struct pivot_table *t = covariance_dump_enc_header (cov); for (reader = input; (c = casereader_read (reader)) != NULL; case_unref (c)) { covariance_dump_enc (cov, c, t); } - casereader_destroy (reader); - tab_submit (t); + + pivot_table_submit (t); } { @@ -698,175 +702,120 @@ run_glm (struct glm_spec *cmd, struct casereader *input, taint_destroy (taint); } -static const char *roman[] = - { - "", /* The Romans had no concept of zero */ - "I", - "II", - "III", - "IV" - }; +static void +put_glm_row (struct pivot_table *table, int row, + double a, double b, double c, double d, double e) +{ + double entries[] = { a, b, c, d, e }; + + for (size_t col = 0; col < sizeof entries / sizeof *entries; col++) + if (entries[col] != SYSMIS) + pivot_table_put2 (table, col, row, + pivot_value_new_number (entries[col])); +} static void output_glm (const struct glm_spec *cmd, const struct glm_workspace *ws) { - const struct fmt_spec *wfmt = - cmd->wv ? var_get_print_format (cmd->wv) : &F_8_0; + struct pivot_table *table = pivot_table_create ( + N_("Tests of Between-Subjects Effects")); + + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + (cmd->ss_type == 1 ? N_("Type I Sum Of Squares") + : cmd->ss_type == 2 ? N_("Type II Sum Of Squares") + : N_("Type III Sum Of Squares")), PIVOT_RC_OTHER, + N_("df"), PIVOT_RC_COUNT, + N_("Mean Square"), PIVOT_RC_OTHER, + N_("F"), PIVOT_RC_OTHER, + N_("Sig."), PIVOT_RC_SIGNIFICANCE); + + struct pivot_dimension *source = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Source"), + cmd->intercept ? N_("Corrected Model") : N_("Model")); - double intercept_ssq; - double ssq_effects; double n_total, mean; - double df_corr = 1.0; - double mse = 0; - - int f; - int r; - const int heading_columns = 1; - const int heading_rows = 1; - struct tab_table *t; - - const int nc = 6; - int nr = heading_rows + 3 + cmd->n_interactions; - if (cmd->intercept) - nr += 2; - - msg (MW, "GLM is experimental. Do not rely on these results."); - t = tab_create (nc, nr); - tab_set_format (t, RC_WEIGHT, wfmt); - tab_title (t, _("Tests of Between-Subjects Effects")); - - tab_headers (t, heading_columns, 0, heading_rows, 0); - - tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1); - - tab_hline (t, TAL_2, 0, nc - 1, heading_rows); - tab_vline (t, TAL_2, heading_columns, 0, nr - 1); - - tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source")); - - /* TRANSLATORS: The parameter is a roman numeral */ - tab_text_format (t, 1, 0, TAB_CENTER | TAT_TITLE, - _("Type %s Sum of Squares"), - roman[cmd->ss_type]); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("df")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean Square")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("F")); - tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("Sig.")); - moments_calculate (ws->totals, &n_total, &mean, NULL, NULL, NULL); - df_corr += categoricals_df_total (ws->cats); - - r = heading_rows; - if (cmd->intercept) - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Corrected Model")); - else - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Model")); - - r++; + double df_corr = 1.0 + categoricals_df_total (ws->cats); - mse = gsl_vector_get (ws->ssq, 0) / (n_total - df_corr); - - intercept_ssq = pow2 (mean * n_total) / n_total; - - ssq_effects = 0.0; + double mse = gsl_vector_get (ws->ssq, 0) / (n_total - df_corr); + double intercept_ssq = pow2 (mean * n_total) / n_total; if (cmd->intercept) { - const double df = 1.0; - const double F = intercept_ssq / df / mse; - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Intercept")); + int row = pivot_category_create_leaf ( + source->root, pivot_value_new_text (N_("Intercept"))); + /* The intercept for unbalanced models is of limited use and nobody knows how to calculate it properly */ if (categoricals_isbalanced (ws->cats)) - { - tab_double (t, 1, r, 0, intercept_ssq, NULL, RC_OTHER); - tab_double (t, 2, r, 0, 1.00, NULL, RC_WEIGHT); - tab_double (t, 3, r, 0, intercept_ssq / df, NULL, RC_OTHER); - tab_double (t, 4, r, 0, F, NULL, RC_OTHER); - tab_double (t, 5, r, 0, gsl_cdf_fdist_Q (F, df, n_total - df_corr), - NULL, RC_PVALUE); - } - r++; + { + const double df = 1.0; + const double F = intercept_ssq / df / mse; + put_glm_row (table, row, intercept_ssq, 1.0, intercept_ssq / df, + F, gsl_cdf_fdist_Q (F, df, n_total - df_corr)); + } } - for (f = 0; f < cmd->n_interactions; ++f) + double ssq_effects = 0.0; + for (int f = 0; f < cmd->n_interactions; ++f) { - struct string str = DS_EMPTY_INITIALIZER; double df = categoricals_df (ws->cats, f); - double ssq = gsl_vector_get (ws->ssq, f + 1); - double F; - ssq_effects += ssq; - - if (! cmd->intercept) + if (!cmd->intercept) { df++; ssq += intercept_ssq; } + double F = ssq / df / mse; - F = ssq / df / mse; + struct string str = DS_EMPTY_INITIALIZER; interaction_to_string (cmd->interactions[f], &str); - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, ds_cstr (&str)); - ds_destroy (&str); - - tab_double (t, 1, r, 0, ssq, NULL, RC_OTHER); - tab_double (t, 2, r, 0, df, NULL, RC_WEIGHT); - tab_double (t, 3, r, 0, ssq / df, NULL, RC_OTHER); - tab_double (t, 4, r, 0, F, NULL, RC_OTHER); + int row = pivot_category_create_leaf ( + source->root, pivot_value_new_user_text_nocopy (ds_steal_cstr (&str))); - tab_double (t, 5, r, 0, gsl_cdf_fdist_Q (F, df, n_total - df_corr), - NULL, RC_PVALUE); - r++; + put_glm_row (table, row, ssq, df, ssq / df, F, + gsl_cdf_fdist_Q (F, df, n_total - df_corr)); } { /* Model / Corrected Model */ double df = df_corr; double ssq = ws->total_ssq - gsl_vector_get (ws->ssq, 0); - double F; - - if ( cmd->intercept ) - df --; + if (cmd->intercept) + df--; else ssq += intercept_ssq; - - F = ssq / df / mse; - tab_double (t, 1, heading_rows, 0, ssq, NULL, RC_OTHER); - tab_double (t, 2, heading_rows, 0, df, NULL, RC_WEIGHT); - tab_double (t, 3, heading_rows, 0, ssq / df, NULL, RC_OTHER); - tab_double (t, 4, heading_rows, 0, F, NULL, RC_OTHER); - - tab_double (t, 5, heading_rows, 0, - gsl_cdf_fdist_Q (F, df, n_total - df_corr), NULL, RC_PVALUE); + double F = ssq / df / mse; + put_glm_row (table, 0, ssq, df, ssq / df, F, + gsl_cdf_fdist_Q (F, df, n_total - df_corr)); } { + int row = pivot_category_create_leaf (source->root, + pivot_value_new_text (N_("Error"))); const double df = n_total - df_corr; const double ssq = gsl_vector_get (ws->ssq, 0); const double mse = ssq / df; - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Error")); - tab_double (t, 1, r, 0, ssq, NULL, RC_OTHER); - tab_double (t, 2, r, 0, df, NULL, RC_WEIGHT); - tab_double (t, 3, r++, 0, mse, NULL, RC_OTHER); + put_glm_row (table, row, ssq, df, mse, SYSMIS, SYSMIS); } { - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Total")); - tab_double (t, 1, r, 0, ws->total_ssq + intercept_ssq, NULL, RC_OTHER); - tab_double (t, 2, r, 0, n_total, NULL, RC_WEIGHT); - - r++; + int row = pivot_category_create_leaf (source->root, + pivot_value_new_text (N_("Total"))); + put_glm_row (table, row, ws->total_ssq + intercept_ssq, n_total, + SYSMIS, SYSMIS, SYSMIS); } if (cmd->intercept) { - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Corrected Total")); - tab_double (t, 1, r, 0, ws->total_ssq, NULL, RC_OTHER); - tab_double (t, 2, r, 0, n_total - 1.0, NULL, RC_WEIGHT); + int row = pivot_category_create_leaf ( + source->root, pivot_value_new_text (N_("Corrected Total"))); + put_glm_row (table, row, ws->total_ssq, n_total - 1.0, SYSMIS, + SYSMIS, SYSMIS); } - tab_submit (t); + pivot_table_submit (table); } #if 0 @@ -893,19 +842,19 @@ static bool parse_nested_variable (struct lexer *lexer, struct glm_spec *glm) { const struct variable *v = NULL; - if ( ! lex_match_variable (lexer, glm->dict, &v)) + if (! lex_match_variable (lexer, glm->dict, &v)) return false; if (lex_match (lexer, T_LPAREN)) { - if ( ! parse_nested_variable (lexer, glm)) + if (! parse_nested_variable (lexer, glm)) return false; - if ( ! lex_force_match (lexer, T_RPAREN)) + if (! lex_force_match (lexer, T_RPAREN)) return false; } - lex_error (lexer, "Nested variables are not yet implemented"); return false; + lex_error (lexer, "Nested variables are not yet implemented"); return false; return true; } @@ -922,7 +871,7 @@ parse_design_term (struct lexer *lexer, struct glm_spec *glm) return true; } - if ( parse_nested_variable (lexer, glm)) + if (parse_nested_variable (lexer, glm)) return true; return false; @@ -940,7 +889,7 @@ parse_design_spec (struct lexer *lexer, struct glm_spec *glm) if (lex_token (lexer) == T_ENDCMD || lex_token (lexer) == T_SLASH) return true; - if ( ! parse_design_term (lexer, glm)) + if (! parse_design_term (lexer, glm)) return false; lex_match (lexer, T_COMMA);