X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fglm.c;h=a6898502708506e7ebc5be5ca76254a4424d8a73;hb=ca8ad314c70b94315b8e0d553843d4561b58c70c;hp=74e918b886b21f26d05b1fadf2d245faa60096bb;hpb=edd5c738dfef01c90d02e06a33b93fc9d38320b8;p=pspp diff --git a/src/language/stats/glm.c b/src/language/stats/glm.c index 74e918b886..a689850270 100644 --- a/src/language/stats/glm.c +++ b/src/language/stats/glm.c @@ -43,9 +43,10 @@ #include "math/covariance.h" #include "math/interaction.h" #include "math/moments.h" -#include "output/tab.h" +#include "output/pivot-table.h" #include "gettext.h" +#define N_(msgid) msgid #define _(msgid) gettext (msgid) struct glm_spec @@ -233,7 +234,7 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) glm.alpha = lex_number (lexer); lex_get (lexer); - if ( ! lex_force_match (lexer, T_RPAREN)) + if (! lex_force_match (lexer, T_RPAREN)) { lex_error (lexer, NULL); goto error; @@ -249,34 +250,28 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) else if (lex_match_id (lexer, "METHOD")) { lex_match (lexer, T_EQUALS); - if ( !lex_force_match_id (lexer, "SSTYPE")) + if (!lex_force_match_id (lexer, "SSTYPE")) { lex_error (lexer, NULL); goto error; } - if ( ! lex_force_match (lexer, T_LPAREN)) + if (! lex_force_match (lexer, T_LPAREN)) { lex_error (lexer, NULL); goto error; } - if ( ! lex_force_int (lexer)) + if (!lex_force_int_range (lexer, "SSTYPE", 1, 3)) { lex_error (lexer, NULL); goto error; } glm.ss_type = lex_integer (lexer); - if (1 > glm.ss_type || 3 < glm.ss_type ) - { - msg (ME, _("Only types 1, 2 & 3 sums of squares are currently implemented")); - goto error; - } - lex_get (lexer); - if ( ! lex_force_match (lexer, T_RPAREN)) + if (! lex_force_match (lexer, T_RPAREN)) { lex_error (lexer, NULL); goto error; @@ -306,7 +301,7 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) } } - if ( ! design ) + if (! design) { design_full (&glm); } @@ -391,8 +386,8 @@ ssq_type1 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) const gsl_matrix *cm = covariance_calculate_unnormalized (cov); size_t i; size_t k; - bool *model_dropped = xcalloc (covariance_dim (cov), sizeof (*model_dropped)); - bool *submodel_dropped = xcalloc (covariance_dim (cov), sizeof (*submodel_dropped)); + bool *model_dropped = XCALLOC (covariance_dim (cov), bool); + bool *submodel_dropped = XCALLOC (covariance_dim (cov), bool); const struct categoricals *cats = covariance_get_categoricals (cov); size_t n_dropped_model = 0; @@ -422,7 +417,7 @@ ssq_type1 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) const struct interaction * x = categoricals_get_interaction_by_subscript (cats, i - cmd->n_dep_vars); - if ( x == cmd->interactions [k]) + if (x == cmd->interactions [k]) { model_dropped[i] = false; n_dropped_model--; @@ -440,7 +435,7 @@ ssq_type1 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) gsl_vector_set (ssq, k + 1, gsl_matrix_get (submodel_cov, 0, 0) - gsl_matrix_get (model_cov, 0, 0) - ); + ); gsl_matrix_free (model_cov); gsl_matrix_free (submodel_cov); @@ -460,8 +455,8 @@ ssq_type2 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) const gsl_matrix *cm = covariance_calculate_unnormalized (cov); size_t i; size_t k; - bool *model_dropped = xcalloc (covariance_dim (cov), sizeof (*model_dropped)); - bool *submodel_dropped = xcalloc (covariance_dim (cov), sizeof (*submodel_dropped)); + bool *model_dropped = XCALLOC (covariance_dim (cov), bool); + bool *submodel_dropped = XCALLOC (covariance_dim (cov), bool); const struct categoricals *cats = covariance_get_categoricals (cov); for (k = 0; k < cmd->n_interactions; k++) @@ -483,7 +478,7 @@ ssq_type2 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) n_dropped_submodel++; submodel_dropped[i] = true; - if ( cmd->interactions [k]->n_vars < x->n_vars) + if (cmd->interactions [k]->n_vars < x->n_vars) { assert (n_dropped_model < covariance_dim (cov)); n_dropped_model++; @@ -503,7 +498,7 @@ ssq_type2 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) gsl_vector_set (ssq, k + 1, gsl_matrix_get (submodel_cov, 0, 0) - gsl_matrix_get (model_cov, 0, 0) - ); + ); gsl_matrix_free (model_cov); gsl_matrix_free (submodel_cov); @@ -523,8 +518,8 @@ ssq_type3 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) const gsl_matrix *cm = covariance_calculate_unnormalized (cov); size_t i; size_t k; - bool *model_dropped = xcalloc (covariance_dim (cov), sizeof (*model_dropped)); - bool *submodel_dropped = xcalloc (covariance_dim (cov), sizeof (*submodel_dropped)); + bool *model_dropped = XCALLOC (covariance_dim (cov), bool); + bool *submodel_dropped = XCALLOC (covariance_dim (cov), bool); const struct categoricals *cats = covariance_get_categoricals (cov); double ss0; @@ -547,7 +542,7 @@ ssq_type3 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) model_dropped[i] = false; - if ( cmd->interactions [k] == x) + if (cmd->interactions [k] == x) { assert (n_dropped_model < covariance_dim (cov)); n_dropped_model++; @@ -598,7 +593,7 @@ run_glm (struct glm_spec *cmd, struct casereader *input, NULL, NULL); ws.cats = categoricals_create (cmd->interactions, cmd->n_interactions, - cmd->wv, cmd->exclude, MV_ANY); + cmd->wv, MV_ANY); cov = covariance_2pass_create (cmd->n_dep_vars, cmd->dep_vars, ws.cats, cmd->wv, cmd->exclude, true); @@ -651,16 +646,14 @@ run_glm (struct glm_spec *cmd, struct casereader *input, if (cmd->dump_coding) { - struct tab_table *t = - covariance_dump_enc_header (cov, - 1 + casereader_count_cases (input)); + struct pivot_table *t = covariance_dump_enc_header (cov); for (reader = input; (c = casereader_read (reader)) != NULL; case_unref (c)) { covariance_dump_enc (cov, c, t); } - casereader_destroy (reader); - tab_submit (t); + + pivot_table_submit (t); } { @@ -709,174 +702,120 @@ run_glm (struct glm_spec *cmd, struct casereader *input, taint_destroy (taint); } -static const char *roman[] = - { - "", /* The Romans had no concept of zero */ - "I", - "II", - "III", - "IV" - }; +static void +put_glm_row (struct pivot_table *table, int row, + double a, double b, double c, double d, double e) +{ + double entries[] = { a, b, c, d, e }; + + for (size_t col = 0; col < sizeof entries / sizeof *entries; col++) + if (entries[col] != SYSMIS) + pivot_table_put2 (table, col, row, + pivot_value_new_number (entries[col])); +} static void output_glm (const struct glm_spec *cmd, const struct glm_workspace *ws) { - const struct fmt_spec *wfmt = - cmd->wv ? var_get_print_format (cmd->wv) : &F_8_0; + struct pivot_table *table = pivot_table_create ( + N_("Tests of Between-Subjects Effects")); + + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + (cmd->ss_type == 1 ? N_("Type I Sum Of Squares") + : cmd->ss_type == 2 ? N_("Type II Sum Of Squares") + : N_("Type III Sum Of Squares")), PIVOT_RC_OTHER, + N_("df"), PIVOT_RC_COUNT, + N_("Mean Square"), PIVOT_RC_OTHER, + N_("F"), PIVOT_RC_OTHER, + N_("Sig."), PIVOT_RC_SIGNIFICANCE); + + struct pivot_dimension *source = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Source"), + cmd->intercept ? N_("Corrected Model") : N_("Model")); - double intercept_ssq; - double ssq_effects; double n_total, mean; - double df_corr = 1.0; - double mse = 0; - - int f; - int r; - const int heading_columns = 1; - const int heading_rows = 1; - struct tab_table *t; - - const int nc = 6; - int nr = heading_rows + 3 + cmd->n_interactions; - if (cmd->intercept) - nr += 2; - - t = tab_create (nc, nr); - tab_set_format (t, RC_WEIGHT, wfmt); - tab_title (t, _("Tests of Between-Subjects Effects")); - - tab_headers (t, heading_columns, 0, heading_rows, 0); - - tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1); - - tab_hline (t, TAL_2, 0, nc - 1, heading_rows); - tab_vline (t, TAL_2, heading_columns, 0, nr - 1); - - tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source")); - - /* TRANSLATORS: The parameter is a roman numeral */ - tab_text_format (t, 1, 0, TAB_CENTER | TAT_TITLE, - _("Type %s Sum of Squares"), - roman[cmd->ss_type]); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("df")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean Square")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("F")); - tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("Sig.")); - moments_calculate (ws->totals, &n_total, &mean, NULL, NULL, NULL); - df_corr += categoricals_df_total (ws->cats); + double df_corr = 1.0 + categoricals_df_total (ws->cats); - r = heading_rows; - if (cmd->intercept) - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Corrected Model")); - else - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Model")); - - r++; - - mse = gsl_vector_get (ws->ssq, 0) / (n_total - df_corr); - - intercept_ssq = pow2 (mean * n_total) / n_total; - - ssq_effects = 0.0; + double mse = gsl_vector_get (ws->ssq, 0) / (n_total - df_corr); + double intercept_ssq = pow2 (mean * n_total) / n_total; if (cmd->intercept) { - const double df = 1.0; - const double F = intercept_ssq / df / mse; - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Intercept")); + int row = pivot_category_create_leaf ( + source->root, pivot_value_new_text (N_("Intercept"))); + /* The intercept for unbalanced models is of limited use and nobody knows how to calculate it properly */ if (categoricals_isbalanced (ws->cats)) - { - tab_double (t, 1, r, 0, intercept_ssq, NULL, RC_OTHER); - tab_double (t, 2, r, 0, 1.00, NULL, RC_WEIGHT); - tab_double (t, 3, r, 0, intercept_ssq / df, NULL, RC_OTHER); - tab_double (t, 4, r, 0, F, NULL, RC_OTHER); - tab_double (t, 5, r, 0, gsl_cdf_fdist_Q (F, df, n_total - df_corr), - NULL, RC_PVALUE); - } - r++; + { + const double df = 1.0; + const double F = intercept_ssq / df / mse; + put_glm_row (table, row, intercept_ssq, 1.0, intercept_ssq / df, + F, gsl_cdf_fdist_Q (F, df, n_total - df_corr)); + } } - for (f = 0; f < cmd->n_interactions; ++f) + double ssq_effects = 0.0; + for (int f = 0; f < cmd->n_interactions; ++f) { - struct string str = DS_EMPTY_INITIALIZER; double df = categoricals_df (ws->cats, f); - double ssq = gsl_vector_get (ws->ssq, f + 1); - double F; - ssq_effects += ssq; - - if (! cmd->intercept) + if (!cmd->intercept) { df++; ssq += intercept_ssq; } + double F = ssq / df / mse; - F = ssq / df / mse; + struct string str = DS_EMPTY_INITIALIZER; interaction_to_string (cmd->interactions[f], &str); - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, ds_cstr (&str)); - ds_destroy (&str); + int row = pivot_category_create_leaf ( + source->root, pivot_value_new_user_text_nocopy (ds_steal_cstr (&str))); - tab_double (t, 1, r, 0, ssq, NULL, RC_OTHER); - tab_double (t, 2, r, 0, df, NULL, RC_WEIGHT); - tab_double (t, 3, r, 0, ssq / df, NULL, RC_OTHER); - tab_double (t, 4, r, 0, F, NULL, RC_OTHER); - - tab_double (t, 5, r, 0, gsl_cdf_fdist_Q (F, df, n_total - df_corr), - NULL, RC_PVALUE); - r++; + put_glm_row (table, row, ssq, df, ssq / df, F, + gsl_cdf_fdist_Q (F, df, n_total - df_corr)); } { /* Model / Corrected Model */ double df = df_corr; double ssq = ws->total_ssq - gsl_vector_get (ws->ssq, 0); - double F; - - if ( cmd->intercept ) - df --; + if (cmd->intercept) + df--; else ssq += intercept_ssq; - - F = ssq / df / mse; - tab_double (t, 1, heading_rows, 0, ssq, NULL, RC_OTHER); - tab_double (t, 2, heading_rows, 0, df, NULL, RC_WEIGHT); - tab_double (t, 3, heading_rows, 0, ssq / df, NULL, RC_OTHER); - tab_double (t, 4, heading_rows, 0, F, NULL, RC_OTHER); - - tab_double (t, 5, heading_rows, 0, - gsl_cdf_fdist_Q (F, df, n_total - df_corr), NULL, RC_PVALUE); + double F = ssq / df / mse; + put_glm_row (table, 0, ssq, df, ssq / df, F, + gsl_cdf_fdist_Q (F, df, n_total - df_corr)); } { + int row = pivot_category_create_leaf (source->root, + pivot_value_new_text (N_("Error"))); const double df = n_total - df_corr; const double ssq = gsl_vector_get (ws->ssq, 0); const double mse = ssq / df; - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Error")); - tab_double (t, 1, r, 0, ssq, NULL, RC_OTHER); - tab_double (t, 2, r, 0, df, NULL, RC_WEIGHT); - tab_double (t, 3, r++, 0, mse, NULL, RC_OTHER); + put_glm_row (table, row, ssq, df, mse, SYSMIS, SYSMIS); } { - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Total")); - tab_double (t, 1, r, 0, ws->total_ssq + intercept_ssq, NULL, RC_OTHER); - tab_double (t, 2, r, 0, n_total, NULL, RC_WEIGHT); - - r++; + int row = pivot_category_create_leaf (source->root, + pivot_value_new_text (N_("Total"))); + put_glm_row (table, row, ws->total_ssq + intercept_ssq, n_total, + SYSMIS, SYSMIS, SYSMIS); } if (cmd->intercept) { - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Corrected Total")); - tab_double (t, 1, r, 0, ws->total_ssq, NULL, RC_OTHER); - tab_double (t, 2, r, 0, n_total - 1.0, NULL, RC_WEIGHT); + int row = pivot_category_create_leaf ( + source->root, pivot_value_new_text (N_("Corrected Total"))); + put_glm_row (table, row, ws->total_ssq, n_total - 1.0, SYSMIS, + SYSMIS, SYSMIS); } - tab_submit (t); + pivot_table_submit (table); } #if 0 @@ -903,20 +842,20 @@ static bool parse_nested_variable (struct lexer *lexer, struct glm_spec *glm) { const struct variable *v = NULL; - if ( ! lex_match_variable (lexer, glm->dict, &v)) + if (! lex_match_variable (lexer, glm->dict, &v)) return false; if (lex_match (lexer, T_LPAREN)) { - if ( ! parse_nested_variable (lexer, glm)) + if (! parse_nested_variable (lexer, glm)) return false; - if ( ! lex_force_match (lexer, T_RPAREN)) + if (! lex_force_match (lexer, T_RPAREN)) return false; } - lex_error (lexer, "Nested variables are not yet implemented"); return false; - return true; + lex_error (lexer, "Nested variables are not yet implemented"); + return false; } /* A design term is an interaction OR a nested variable */ @@ -927,12 +866,12 @@ parse_design_term (struct lexer *lexer, struct glm_spec *glm) if (parse_design_interaction (lexer, glm->dict, &iact)) { /* Interaction parsing successful. Add to list of interactions */ - glm->interactions = xrealloc (glm->interactions, sizeof *glm->interactions * ++glm->n_interactions); + glm->interactions = xrealloc (glm->interactions, sizeof (*glm->interactions) * ++glm->n_interactions); glm->interactions[glm->n_interactions - 1] = iact; return true; } - if ( parse_nested_variable (lexer, glm)) + if (parse_nested_variable (lexer, glm)) return true; return false; @@ -950,11 +889,10 @@ parse_design_spec (struct lexer *lexer, struct glm_spec *glm) if (lex_token (lexer) == T_ENDCMD || lex_token (lexer) == T_SLASH) return true; - if ( ! parse_design_term (lexer, glm)) + if (! parse_design_term (lexer, glm)) return false; lex_match (lexer, T_COMMA); return parse_design_spec (lexer, glm); } -