X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fglm.c;h=4e5732f53ba1cec9778e2afb45012dbca3b423cb;hb=f982df09b0eb59a54faaef4e54b864ad8c9e8d70;hp=f8a7f133cc254d19493c8856f16a656bee4ad935;hpb=111a0feadc4db60a4ed413724706a954cbd9e105;p=pspp diff --git a/src/language/stats/glm.c b/src/language/stats/glm.c index f8a7f133cc..4e5732f53b 100644 --- a/src/language/stats/glm.c +++ b/src/language/stats/glm.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,6 +33,7 @@ #include "language/lexer/lexer.h" #include "language/lexer/value-parser.h" #include "language/lexer/variable-parser.h" +#include "libpspp/assertion.h" #include "libpspp/ll.h" #include "libpspp/message.h" #include "libpspp/misc.h" @@ -42,78 +43,78 @@ #include "math/covariance.h" #include "math/interaction.h" #include "math/moments.h" -#include "output/tab.h" +#include "output/pivot-table.h" #include "gettext.h" +#define N_(msgid) msgid #define _(msgid) gettext (msgid) struct glm_spec -{ - size_t n_dep_vars; - const struct variable **dep_vars; + { + const struct variable **dep_vars; + size_t n_dep_vars; - size_t n_factor_vars; - const struct variable **factor_vars; + const struct variable **factor_vars; + size_t n_factor_vars; - size_t n_interactions; - struct interaction **interactions; + struct interaction **interactions; + size_t n_interactions; - enum mv_class exclude; + enum mv_class exclude; - /* The weight variable */ - const struct variable *wv; + const struct variable *wv; /* The weight variable */ - const struct dictionary *dict; + const struct dictionary *dict; - bool intercept; + int ss_type; + bool intercept; - double alpha; -}; + double alpha; -struct glm_workspace -{ - double total_ssq; - struct moments *totals; + bool dump_coding; + }; - struct categoricals *cats; +struct glm_workspace + { + double total_ssq; + struct moments *totals; - /* - Sums of squares due to different variables. Element 0 is the SSE - for the entire model. For i > 0, element i is the SS due to - variable i. - */ - gsl_vector *ssq; -}; + struct categoricals *cats; + /* + Sums of squares due to different variables. Element 0 is the SSE + for the entire model. For i > 0, element i is the SS due to + variable i. + */ + gsl_vector *ssq; + }; /* Default design: all possible interactions */ static void design_full (struct glm_spec *glm) { - int sz; - int i = 0; - glm->n_interactions = (1 << glm->n_factor_vars) - 1; - - glm->interactions = xcalloc (glm->n_interactions, sizeof *glm->interactions); + size_t n = (1 << glm->n_factor_vars) - 1; + glm->interactions = xnmalloc (n, sizeof *glm->interactions); /* All subsets, with exception of the empty set, of [0, glm->n_factor_vars) */ - for (sz = 1; sz <= glm->n_factor_vars; ++sz) + for (size_t sz = 1; sz <= glm->n_factor_vars; ++sz) { gsl_combination *c = gsl_combination_calloc (glm->n_factor_vars, sz); do { struct interaction *iact = interaction_create (NULL); - int e; - for (e = 0 ; e < gsl_combination_k (c); ++e) - interaction_add_variable (iact, glm->factor_vars [gsl_combination_get (c, e)]); + for (int e = 0; e < gsl_combination_k (c); ++e) + interaction_add_variable ( + iact, glm->factor_vars [gsl_combination_get (c, e)]); - glm->interactions[i++] = iact; + glm->interactions[glm->n_interactions++] = iact; } while (gsl_combination_next (c) == GSL_SUCCESS); gsl_combination_free (c); } + assert (glm->n_interactions == n); } static void output_glm (const struct glm_spec *, @@ -121,37 +122,33 @@ static void output_glm (const struct glm_spec *, static void run_glm (struct glm_spec *cmd, struct casereader *input, const struct dataset *ds); - -static bool parse_design_spec (struct lexer *lexer, struct glm_spec *glm); - -/* Define to 1 if the /DESIGN subcommand should not be optional */ -#define DESIGN_MANDATORY 1 +static struct interaction *parse_design_term (struct lexer *, + const struct dictionary *); int cmd_glm (struct lexer *lexer, struct dataset *ds) { - int i; struct const_var_set *factors = NULL; - struct glm_spec glm; bool design = false; - glm.dict = dataset_dict (ds); - glm.n_dep_vars = 0; - glm.n_factor_vars = 0; - glm.n_interactions = 0; - glm.interactions = NULL; - glm.dep_vars = NULL; - glm.factor_vars = NULL; - glm.exclude = MV_ANY; - glm.intercept = true; - glm.wv = dict_get_weight (glm.dict); - glm.alpha = 0.05; - + struct dictionary *dict = dataset_dict (ds); + struct glm_spec glm = { + .dict = dict, + .exclude = MV_ANY, + .intercept = true, + .wv = dict_get_weight (dict), + .alpha = 0.05, + .ss_type = 3, + }; + + int dep_vars_start = lex_ofs (lexer); if (!parse_variables_const (lexer, glm.dict, &glm.dep_vars, &glm.n_dep_vars, PV_NO_DUPLICATE | PV_NUMERIC)) goto error; + int dep_vars_end = lex_ofs (lexer) - 1; - lex_force_match (lexer, T_BY); + if (!lex_force_match (lexer, T_BY)) + goto error; if (!parse_variables_const (lexer, glm.dict, &glm.factor_vars, &glm.n_factor_vars, @@ -160,13 +157,14 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) if (glm.n_dep_vars > 1) { - msg (ME, _("Multivariate analysis is not yet implemented")); - return CMD_FAILURE; + lex_ofs_error (lexer, dep_vars_start, dep_vars_end, + _("Multivariate analysis is not yet implemented.")); + goto error; } - factors = - const_var_set_create_from_array (glm.factor_vars, glm.n_factor_vars); + factors = const_var_set_create_from_array (glm.factor_vars, glm.n_factor_vars); + size_t allocated_interactions = 0; while (lex_token (lexer) != T_ENDCMD) { lex_match (lexer, T_SLASH); @@ -178,16 +176,12 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "INCLUDE")) - { - glm.exclude = MV_SYSTEM; - } + glm.exclude = MV_SYSTEM; else if (lex_match_id (lexer, "EXCLUDE")) - { - glm.exclude = MV_ANY; - } + glm.exclude = MV_ANY; else { - lex_error (lexer, NULL); + lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); goto error; } } @@ -199,16 +193,12 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "INCLUDE")) - { - glm.intercept = true; - } + glm.intercept = true; else if (lex_match_id (lexer, "EXCLUDE")) - { - glm.intercept = false; - } + glm.intercept = false; else { - lex_error (lexer, NULL); + lex_error_expecting (lexer, "INCLUDE", "EXCLUDE"); goto error; } } @@ -216,130 +206,87 @@ cmd_glm (struct lexer *lexer, struct dataset *ds) else if (lex_match_id (lexer, "CRITERIA")) { lex_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "ALPHA")) - { - if (lex_force_match (lexer, T_LPAREN)) - { - if (! lex_force_num (lexer)) - { - lex_error (lexer, NULL); - goto error; - } - - glm.alpha = lex_number (lexer); - lex_get (lexer); - if ( ! lex_force_match (lexer, T_RPAREN)) - { - lex_error (lexer, NULL); - goto error; - } - } - } - else - { - lex_error (lexer, NULL); - goto error; - } + if (!lex_force_match_phrase (lexer, "ALPHA(") + || !lex_force_num (lexer)) + goto error; + glm.alpha = lex_number (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, T_RPAREN)) + goto error; } else if (lex_match_id (lexer, "METHOD")) { lex_match (lexer, T_EQUALS); - if ( !lex_force_match_id (lexer, "SSTYPE")) - { - lex_error (lexer, NULL); - goto error; - } - - if ( ! lex_force_match (lexer, T_LPAREN)) - { - lex_error (lexer, NULL); - goto error; - } - - if ( ! lex_force_int (lexer)) - { - lex_error (lexer, NULL); - goto error; - } - - if (3 != lex_integer (lexer)) - { - msg (ME, _("Only type 3 sum of squares are currently implemented")); - goto error; - } + if (!lex_force_match_phrase (lexer, "SSTYPE(") + || !lex_force_int_range (lexer, "SSTYPE", 1, 3)) + goto error; + glm.ss_type = lex_integer (lexer); lex_get (lexer); - if ( ! lex_force_match (lexer, T_RPAREN)) - { - lex_error (lexer, NULL); - goto error; - } + if (!lex_force_match (lexer, T_RPAREN)) + goto error; } else if (lex_match_id (lexer, "DESIGN")) { lex_match (lexer, T_EQUALS); - if (! parse_design_spec (lexer, &glm)) - goto error; + do + { + struct interaction *iact = parse_design_term (lexer, glm.dict); + if (!iact) + goto error; + + if (glm.n_interactions >= allocated_interactions) + glm.interactions = x2nrealloc (glm.interactions, + &allocated_interactions, + sizeof *glm.interactions); + glm.interactions[glm.n_interactions++] = iact; + + lex_match (lexer, T_COMMA); + } + while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH); -#if DESIGN_MANDATORY - if ( glm.n_interactions == 0) - { - msg (ME, _("One or more design variables must be given")); - goto error; - } - - design = true; -#else if (glm.n_interactions > 0) design = true; -#endif + } + else if (lex_match_id (lexer, "SHOWCODES")) + { + /* Undocumented debug option */ + glm.dump_coding = true; } else { - lex_error (lexer, NULL); + lex_error_expecting (lexer, "MISSING", "INTERCEPT", "CRITERIA", + "METHOD", "DESIGN"); goto error; } } - if ( ! design ) - { -#if DESIGN_MANDATORY - lex_error (lexer, _("/DESIGN is mandatory in GLM")); - goto error; -#endif + if (!design) + design_full (&glm); - design_full (&glm); - } - - { - struct casegrouper *grouper; - struct casereader *group; - bool ok; - - grouper = casegrouper_create_splits (proc_open (ds), glm.dict); - while (casegrouper_get_next_group (grouper, &group)) - run_glm (&glm, group, ds); - ok = casegrouper_destroy (grouper); - ok = proc_commit (ds) && ok; - } + struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), glm.dict); + struct casereader *group; + while (casegrouper_get_next_group (grouper, &group)) + run_glm (&glm, group, ds); + bool ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; const_var_set_destroy (factors); free (glm.factor_vars); - for (i = 0 ; i < glm.n_interactions; ++i) + for (size_t i = 0; i < glm.n_interactions; ++i) interaction_destroy (glm.interactions[i]); + free (glm.interactions); free (glm.dep_vars); - return CMD_SUCCESS; error: - const_var_set_destroy (factors); free (glm.factor_vars); - for (i = 0 ; i < glm.n_interactions; ++i) + for (size_t i = 0; i < glm.n_interactions; ++i) interaction_destroy (glm.interactions[i]); free (glm.interactions); @@ -348,143 +295,297 @@ error: return CMD_FAILURE; } -static void get_ssq (struct covariance *, gsl_vector *, - const struct glm_spec *); +static inline bool +not_dropped (size_t j, const bool *ff) +{ + return !ff[j]; +} -static bool -not_dropped (size_t j, const size_t *dropped, size_t n_dropped) +static void +fill_submatrix (const gsl_matrix * cov, gsl_matrix * submatrix, bool *dropped_f) { size_t i; + size_t j; + size_t n = 0; + size_t m = 0; - for (i = 0; i < n_dropped; i++) + for (i = 0; i < cov->size1; i++) { - if (j == dropped[i]) - return false; + if (not_dropped (i, dropped_f)) + { + m = 0; + for (j = 0; j < cov->size2; j++) + { + if (not_dropped (j, dropped_f)) + { + gsl_matrix_set (submatrix, n, m, + gsl_matrix_get (cov, i, j)); + m++; + } + } + n++; + } } - return true; } + +/* + Type 1 sums of squares. + Populate SSQ with the Type 1 sums of squares according to COV + */ static void -get_ssq (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) +ssq_type1 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) { - gsl_matrix *cm = covariance_calculate_unnormalized (cov); + const gsl_matrix *cm = covariance_calculate_unnormalized (cov); size_t i; - size_t j; size_t k; - size_t *dropped = xcalloc (covariance_dim (cov), sizeof (*dropped)); + bool *model_dropped = XCALLOC (covariance_dim (cov), bool); + bool *submodel_dropped = XCALLOC (covariance_dim (cov), bool); const struct categoricals *cats = covariance_get_categoricals (cov); + size_t n_dropped_model = 0; + size_t n_dropped_submodel = 0; + + for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++) + { + n_dropped_model++; + n_dropped_submodel++; + model_dropped[i] = true; + submodel_dropped[i] = true; + } + for (k = 0; k < cmd->n_interactions; k++) { - size_t n = 0; - size_t m = 0; - gsl_matrix *small_cov = NULL; - size_t n_dropped = 0; + gsl_matrix *model_cov = NULL; + gsl_matrix *submodel_cov = NULL; + + n_dropped_submodel = n_dropped_model; + for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++) + submodel_dropped[i] = model_dropped[i]; + for (i = cmd->n_dep_vars; i < covariance_dim (cov); i++) { - if (categoricals_get_interaction_by_subscript (cats, i - cmd->n_dep_vars) - == cmd->interactions[k]) + const struct interaction * x = + categoricals_get_interaction_by_subscript (cats, i - cmd->n_dep_vars); + + if (x == cmd->interactions [k]) { - assert (n_dropped < covariance_dim (cov)); - dropped[n_dropped++] = i; + model_dropped[i] = false; + n_dropped_model--; } } - small_cov = - gsl_matrix_alloc (cm->size1 - n_dropped, cm->size2 - n_dropped); - gsl_matrix_set (small_cov, 0, 0, gsl_matrix_get (cm, 0, 0)); - for (i = 0; i < cm->size1; i++) + + model_cov = gsl_matrix_alloc (cm->size1 - n_dropped_model, cm->size2 - n_dropped_model); + submodel_cov = gsl_matrix_alloc (cm->size1 - n_dropped_submodel, cm->size2 - n_dropped_submodel); + + fill_submatrix (cm, model_cov, model_dropped); + fill_submatrix (cm, submodel_cov, submodel_dropped); + + reg_sweep (model_cov, 0); + reg_sweep (submodel_cov, 0); + + gsl_vector_set (ssq, k + 1, + gsl_matrix_get (submodel_cov, 0, 0) - gsl_matrix_get (model_cov, 0, 0) + ); + + gsl_matrix_free (model_cov); + gsl_matrix_free (submodel_cov); + } + + free (model_dropped); + free (submodel_dropped); +} + +/* + Type 2 sums of squares. + Populate SSQ with the Type 2 sums of squares according to COV + */ +static void +ssq_type2 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) +{ + const gsl_matrix *cm = covariance_calculate_unnormalized (cov); + bool *model_dropped = XCALLOC (covariance_dim (cov), bool); + bool *submodel_dropped = XCALLOC (covariance_dim (cov), bool); + const struct categoricals *cats = covariance_get_categoricals (cov); + + for (size_t k = 0; k < cmd->n_interactions; k++) + { + gsl_matrix *model_cov = NULL; + gsl_matrix *submodel_cov = NULL; + size_t n_dropped_model = 0; + size_t n_dropped_submodel = 0; + for (size_t i = cmd->n_dep_vars; i < covariance_dim (cov); i++) { - if (not_dropped (i, dropped, n_dropped)) + const struct interaction * x = + categoricals_get_interaction_by_subscript (cats, i - cmd->n_dep_vars); + + model_dropped[i] = false; + submodel_dropped[i] = false; + if (interaction_is_subset (cmd->interactions [k], x)) { - m = 0; - for (j = 0; j < cm->size2; j++) + assert (n_dropped_submodel < covariance_dim (cov)); + n_dropped_submodel++; + submodel_dropped[i] = true; + + if (cmd->interactions [k]->n_vars < x->n_vars) { - if (not_dropped (j, dropped, n_dropped)) - { - gsl_matrix_set (small_cov, n, m, - gsl_matrix_get (cm, i, j)); - m++; - } + assert (n_dropped_model < covariance_dim (cov)); + n_dropped_model++; + model_dropped[i] = true; } - n++; } } - reg_sweep (small_cov, 0); + + model_cov = gsl_matrix_alloc (cm->size1 - n_dropped_model, cm->size2 - n_dropped_model); + submodel_cov = gsl_matrix_alloc (cm->size1 - n_dropped_submodel, cm->size2 - n_dropped_submodel); + + fill_submatrix (cm, model_cov, model_dropped); + fill_submatrix (cm, submodel_cov, submodel_dropped); + + reg_sweep (model_cov, 0); + reg_sweep (submodel_cov, 0); + gsl_vector_set (ssq, k + 1, - gsl_matrix_get (small_cov, 0, 0) - - gsl_vector_get (ssq, 0)); - gsl_matrix_free (small_cov); + gsl_matrix_get (submodel_cov, 0, 0) - gsl_matrix_get (model_cov, 0, 0) + ); + + gsl_matrix_free (model_cov); + gsl_matrix_free (submodel_cov); } - free (dropped); - gsl_matrix_free (cm); + free (model_dropped); + free (submodel_dropped); } -//static void dump_matrix (const gsl_matrix *m); +/* + Type 3 sums of squares. + Populate SSQ with the Type 2 sums of squares according to COV + */ +static void +ssq_type3 (struct covariance *cov, gsl_vector *ssq, const struct glm_spec *cmd) +{ + const gsl_matrix *cm = covariance_calculate_unnormalized (cov); + bool *model_dropped = XCALLOC (covariance_dim (cov), bool); + bool *submodel_dropped = XCALLOC (covariance_dim (cov), bool); + const struct categoricals *cats = covariance_get_categoricals (cov); + + gsl_matrix *submodel_cov = gsl_matrix_alloc (cm->size1, cm->size2); + fill_submatrix (cm, submodel_cov, submodel_dropped); + reg_sweep (submodel_cov, 0); + double ss0 = gsl_matrix_get (submodel_cov, 0, 0); + gsl_matrix_free (submodel_cov); + free (submodel_dropped); + + for (size_t k = 0; k < cmd->n_interactions; k++) + { + size_t n_dropped_model = 0; + for (size_t i = cmd->n_dep_vars; i < covariance_dim (cov); i++) + { + const struct interaction * x = + categoricals_get_interaction_by_subscript (cats, i - cmd->n_dep_vars); + + model_dropped[i] = false; + + if (cmd->interactions [k] == x) + { + assert (n_dropped_model < covariance_dim (cov)); + n_dropped_model++; + model_dropped[i] = true; + } + } + + gsl_matrix *model_cov = gsl_matrix_alloc (cm->size1 - n_dropped_model, + cm->size2 - n_dropped_model); + + fill_submatrix (cm, model_cov, model_dropped); + + reg_sweep (model_cov, 0); + + gsl_vector_set (ssq, k + 1, gsl_matrix_get (model_cov, 0, 0) - ss0); + + gsl_matrix_free (model_cov); + } + free (model_dropped); +} static void run_glm (struct glm_spec *cmd, struct casereader *input, const struct dataset *ds) { bool warn_bad_weight = true; - int v; - struct taint *taint; struct dictionary *dict = dataset_dict (ds); - struct casereader *reader; - struct ccase *c; - struct glm_workspace ws; - struct covariance *cov; - ws.cats = categoricals_create (cmd->interactions, cmd->n_interactions, - cmd->wv, cmd->exclude, - NULL, NULL, NULL, NULL); + input = casereader_create_filter_missing (input, + cmd->dep_vars, cmd->n_dep_vars, + cmd->exclude, + NULL, NULL); - cov = covariance_2pass_create (cmd->n_dep_vars, cmd->dep_vars, - ws.cats, cmd->wv, cmd->exclude); + input = casereader_create_filter_missing (input, + cmd->factor_vars, cmd->n_factor_vars, + cmd->exclude, + NULL, NULL); + struct glm_workspace ws = { + .cats = categoricals_create (cmd->interactions, cmd->n_interactions, + cmd->wv, MV_ANY) + }; - c = casereader_peek (input, 0); - if (c == NULL) - { - casereader_destroy (input); - return; - } - output_split_file_values (ds, c); - case_unref (c); + struct covariance *cov = covariance_2pass_create ( + cmd->n_dep_vars, cmd->dep_vars, ws.cats, cmd->wv, cmd->exclude, true); - taint = taint_clone (casereader_get_taint (input)); + output_split_file_values_peek (ds, input); + + struct taint *taint = taint_clone (casereader_get_taint (input)); ws.totals = moments_create (MOMENT_VARIANCE); - for (reader = casereader_clone (input); - (c = casereader_read (reader)) != NULL; case_unref (c)) + struct casereader *reader = casereader_clone (input); + struct ccase *c; + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { double weight = dict_get_case_weight (dict, c, &warn_bad_weight); - for (v = 0; v < cmd->n_dep_vars; ++v) - moments_pass_one (ws.totals, case_data (c, cmd->dep_vars[v])->f, - weight); + for (int v = 0; v < cmd->n_dep_vars; ++v) + moments_pass_one (ws.totals, case_num (c, cmd->dep_vars[v]), weight); covariance_accumulate_pass1 (cov, c); } casereader_destroy (reader); - for (reader = input; - (c = casereader_read (reader)) != NULL; case_unref (c)) + if (cmd->dump_coding) + reader = casereader_clone (input); + else + reader = input; + + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { double weight = dict_get_case_weight (dict, c, &warn_bad_weight); - for (v = 0; v < cmd->n_dep_vars; ++v) - moments_pass_two (ws.totals, case_data (c, cmd->dep_vars[v])->f, - weight); + for (size_t v = 0; v < cmd->n_dep_vars; ++v) + moments_pass_two (ws.totals, case_num (c, cmd->dep_vars[v]), weight); covariance_accumulate_pass2 (cov, c); } casereader_destroy (reader); + + if (cmd->dump_coding) + { + struct pivot_table *t = covariance_dump_enc_header (cov); + for (reader = input; + (c = casereader_read (reader)) != NULL; case_unref (c)) + { + covariance_dump_enc (cov, c, t); + } + + pivot_table_submit (t); + } + { - gsl_matrix *cm = covariance_calculate_unnormalized (cov); + const gsl_matrix *ucm = covariance_calculate_unnormalized (cov); + gsl_matrix *cm = gsl_matrix_alloc (ucm->size1, ucm->size2); + gsl_matrix_memcpy (cm, ucm); // dump_matrix (cm); @@ -497,9 +598,22 @@ run_glm (struct glm_spec *cmd, struct casereader *input, */ ws.ssq = gsl_vector_alloc (cm->size1); gsl_vector_set (ws.ssq, 0, gsl_matrix_get (cm, 0, 0)); - get_ssq (cov, ws.ssq, cmd); + switch (cmd->ss_type) + { + case 1: + ssq_type1 (cov, ws.ssq, cmd); + break; + case 2: + ssq_type2 (cov, ws.ssq, cmd); + break; + case 3: + ssq_type3 (cov, ws.ssq, cmd); + break; + default: + NOT_REACHED (); + break; + } // dump_matrix (cm); - gsl_matrix_free (cm); } @@ -515,138 +629,119 @@ run_glm (struct glm_spec *cmd, struct casereader *input, } static void -output_glm (const struct glm_spec *cmd, const struct glm_workspace *ws) +put_glm_row (struct pivot_table *table, int row, + double a, double b, double c, double d, double e) { - const struct fmt_spec *wfmt = - cmd->wv ? var_get_print_format (cmd->wv) : &F_8_0; - - double n_total, mean; - double df_corr = 0.0; - double mse = 0; - - int f; - int r; - const int heading_columns = 1; - const int heading_rows = 1; - struct tab_table *t; - - const int nc = 6; - int nr = heading_rows + 4 + cmd->n_interactions; - if (cmd->intercept) - nr++; - - t = tab_create (nc, nr); - tab_title (t, _("Tests of Between-Subjects Effects")); + double entries[] = { a, b, c, d, e }; - tab_headers (t, heading_columns, 0, heading_rows, 0); - - tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1); - - tab_hline (t, TAL_2, 0, nc - 1, heading_rows); - tab_vline (t, TAL_2, heading_columns, 0, nr - 1); - - tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source")); + for (size_t col = 0; col < sizeof entries / sizeof *entries; col++) + if (entries[col] != SYSMIS) + pivot_table_put2 (table, col, row, + pivot_value_new_number (entries[col])); +} - /* TRANSLATORS: The parameter is a roman numeral */ - tab_text_format (t, 1, 0, TAB_CENTER | TAT_TITLE, - _("Type %s Sum of Squares"), "III"); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("df")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean Square")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("F")); - tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("Sig.")); +static void +output_glm (const struct glm_spec *cmd, const struct glm_workspace *ws) +{ + struct pivot_table *table = pivot_table_create ( + N_("Tests of Between-Subjects Effects")); + + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + (cmd->ss_type == 1 ? N_("Type I Sum Of Squares") + : cmd->ss_type == 2 ? N_("Type II Sum Of Squares") + : N_("Type III Sum Of Squares")), PIVOT_RC_OTHER, + N_("df"), PIVOT_RC_COUNT, + N_("Mean Square"), PIVOT_RC_OTHER, + N_("F"), PIVOT_RC_OTHER, + N_("Sig."), PIVOT_RC_SIGNIFICANCE); + + struct pivot_dimension *source = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Source"), + cmd->intercept ? N_("Corrected Model") : N_("Model")); + double n_total, mean; moments_calculate (ws->totals, &n_total, &mean, NULL, NULL, NULL); - if (cmd->intercept) - df_corr += 1.0; - - df_corr += categoricals_df_total (ws->cats); - - mse = gsl_vector_get (ws->ssq, 0) / (n_total - df_corr); - - r = heading_rows; - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Corrected Model")); - - r++; + double df_corr = 1.0 + categoricals_df_total (ws->cats); + double mse = gsl_vector_get (ws->ssq, 0) / (n_total - df_corr); + double intercept_ssq = pow2 (mean * n_total) / n_total; if (cmd->intercept) { - const double intercept = pow2 (mean * n_total) / n_total; - const double df = 1.0; - const double F = intercept / df / mse; - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Intercept")); - tab_double (t, 1, r, 0, intercept, NULL); - tab_double (t, 2, r, 0, 1.00, wfmt); - tab_double (t, 3, r, 0, intercept / df, NULL); - tab_double (t, 4, r, 0, F, NULL); - tab_double (t, 5, r, 0, gsl_cdf_fdist_Q (F, df, n_total - df_corr), - NULL); - r++; + int row = pivot_category_create_leaf ( + source->root, pivot_value_new_text (N_("Intercept"))); + + /* The intercept for unbalanced models is of limited use and + nobody knows how to calculate it properly */ + if (categoricals_isbalanced (ws->cats)) + { + const double df = 1.0; + const double F = intercept_ssq / df / mse; + put_glm_row (table, row, intercept_ssq, 1.0, intercept_ssq / df, + F, gsl_cdf_fdist_Q (F, df, n_total - df_corr)); + } } - for (f = 0; f < cmd->n_interactions; ++f) + double ssq_effects = 0.0; + for (int f = 0; f < cmd->n_interactions; ++f) { + double df = categoricals_df (ws->cats, f); + double ssq = gsl_vector_get (ws->ssq, f + 1); + ssq_effects += ssq; + if (!cmd->intercept) + { + df++; + ssq += intercept_ssq; + } + double F = ssq / df / mse; + struct string str = DS_EMPTY_INITIALIZER; - const double df = categoricals_df (ws->cats, f); - const double ssq = gsl_vector_get (ws->ssq, f + 1); - const double F = ssq / df / mse; interaction_to_string (cmd->interactions[f], &str); - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, ds_cstr (&str)); - ds_destroy (&str); - - tab_double (t, 1, r, 0, ssq, NULL); - tab_double (t, 2, r, 0, df, wfmt); - tab_double (t, 3, r, 0, ssq / df, NULL); - tab_double (t, 4, r, 0, F, NULL); + int row = pivot_category_create_leaf ( + source->root, pivot_value_new_user_text_nocopy (ds_steal_cstr (&str))); - tab_double (t, 5, r, 0, gsl_cdf_fdist_Q (F, df, n_total - df_corr), - NULL); - r++; + put_glm_row (table, row, ssq, df, ssq / df, F, + gsl_cdf_fdist_Q (F, df, n_total - df_corr)); } { - /* Corrected Model */ - const double df = df_corr - 1.0; - const double ssq = ws->total_ssq - gsl_vector_get (ws->ssq, 0); - const double F = ssq / df / mse; - tab_double (t, 1, heading_rows, 0, ssq, NULL); - tab_double (t, 2, heading_rows, 0, df, wfmt); - tab_double (t, 3, heading_rows, 0, ssq / df, NULL); - tab_double (t, 4, heading_rows, 0, F, NULL); - - tab_double (t, 5, heading_rows, 0, - gsl_cdf_fdist_Q (F, df, n_total - df_corr), NULL); + /* Model / Corrected Model */ + double df = df_corr; + double ssq = ws->total_ssq - gsl_vector_get (ws->ssq, 0); + if (cmd->intercept) + df--; + else + ssq += intercept_ssq; + double F = ssq / df / mse; + put_glm_row (table, 0, ssq, df, ssq / df, F, + gsl_cdf_fdist_Q (F, df, n_total - df_corr)); } { + int row = pivot_category_create_leaf (source->root, + pivot_value_new_text (N_("Error"))); const double df = n_total - df_corr; const double ssq = gsl_vector_get (ws->ssq, 0); const double mse = ssq / df; - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Error")); - tab_double (t, 1, r, 0, ssq, NULL); - tab_double (t, 2, r, 0, df, wfmt); - tab_double (t, 3, r++, 0, mse, NULL); + put_glm_row (table, row, ssq, df, mse, SYSMIS, SYSMIS); + } + + { + int row = pivot_category_create_leaf (source->root, + pivot_value_new_text (N_("Total"))); + put_glm_row (table, row, ws->total_ssq + intercept_ssq, n_total, + SYSMIS, SYSMIS, SYSMIS); } if (cmd->intercept) { - const double intercept = pow2 (mean * n_total) / n_total; - const double ssq = intercept + ws->total_ssq; - - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Total")); - tab_double (t, 1, r, 0, ssq, NULL); - tab_double (t, 2, r, 0, n_total, wfmt); - - r++; + int row = pivot_category_create_leaf ( + source->root, pivot_value_new_text (N_("Corrected Total"))); + put_glm_row (table, row, ws->total_ssq, n_total - 1.0, SYSMIS, + SYSMIS, SYSMIS); } - tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, _("Corrected Total")); - - - tab_double (t, 1, r, 0, ws->total_ssq, NULL); - tab_double (t, 2, r, 0, n_total - 1.0, wfmt); - - tab_submit (t); + pivot_table_submit (table); } #if 0 @@ -669,123 +764,29 @@ dump_matrix (const gsl_matrix * m) - -/* Match a variable. - If the match succeeds, the variable will be placed in VAR. - Returns true if successful */ -static bool -lex_match_variable (struct lexer *lexer, const struct glm_spec *glm, const struct variable **var) +static struct interaction * +parse_design_term (struct lexer *lexer, const struct dictionary *dict) { - if (lex_token (lexer) != T_ID) - return false; - - *var = parse_variable_const (lexer, glm->dict); - - if ( *var == NULL) - return false; - return true; -} - -/* An interaction is a variable followed by {*, BY} followed by an interaction */ -static bool -parse_design_interaction (struct lexer *lexer, struct glm_spec *glm, struct interaction **iact) -{ - const struct variable *v = NULL; - assert (iact); - - switch (lex_next_token (lexer, 1)) + struct interaction *iact = interaction_create (NULL); + do { - case T_ENDCMD: - case T_SLASH: - case T_COMMA: - case T_ID: - case T_BY: - case T_ASTERISK: - break; - default: - return false; - break; + struct variable *var = parse_variable (lexer, dict); + if (!var) + goto error; + interaction_add_variable (iact, var); + + if (lex_match (lexer, T_LPAREN) || lex_match_id (lexer, "WITHIN")) + { + lex_next_error (lexer, -1, -1, + "Nested variables are not yet implemented."); + goto error; + } } + while (lex_match (lexer, T_ASTERISK)); - if (! lex_match_variable (lexer, glm, &v)) - { - interaction_destroy (*iact); - *iact = NULL; - return false; - } - - assert (v); - - if ( *iact == NULL) - *iact = interaction_create (v); - else - interaction_add_variable (*iact, v); - - if ( lex_match (lexer, T_ASTERISK) || lex_match (lexer, T_BY)) - { - // lex_error (lexer, "Interactions are not yet implemented"); return false; - return parse_design_interaction (lexer, glm, iact); - } + return iact; - return true; -} - -static bool -parse_nested_variable (struct lexer *lexer, struct glm_spec *glm) -{ - const struct variable *v = NULL; - if ( ! lex_match_variable (lexer, glm, &v)) - return false; - - if (lex_match (lexer, T_LPAREN)) - { - if ( ! parse_nested_variable (lexer, glm)) - return false; - - if ( ! lex_force_match (lexer, T_RPAREN)) - return false; - } - - lex_error (lexer, "Nested variables are not yet implemented"); return false; - return true; -} - -/* A design term is an interaction OR a nested variable */ -static bool -parse_design_term (struct lexer *lexer, struct glm_spec *glm) -{ - struct interaction *iact = NULL; - if (parse_design_interaction (lexer, glm, &iact)) - { - /* Interaction parsing successful. Add to list of interactions */ - glm->interactions = xrealloc (glm->interactions, sizeof *glm->interactions * ++glm->n_interactions); - glm->interactions[glm->n_interactions - 1] = iact; - return true; - } - - if ( parse_nested_variable (lexer, glm)) - return true; - - return false; -} - - - -/* Parse a complete DESIGN specification. - A design spec is a design term, optionally followed by a comma, - and another design spec. -*/ -static bool -parse_design_spec (struct lexer *lexer, struct glm_spec *glm) -{ - if (lex_token (lexer) == T_ENDCMD || lex_token (lexer) == T_SLASH) - return true; - - if ( ! parse_design_term (lexer, glm)) - return false; - - lex_match (lexer, T_COMMA); - - return parse_design_spec (lexer, glm); +error: + interaction_destroy (iact); + return NULL; } -