1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <gsl/gsl_cdf.h>
20 #include <gsl/gsl_matrix.h>
21 #include <gsl/gsl_vector.h>
25 #include <data/case.h>
26 #include <data/category.h>
27 #include <data/casegrouper.h>
28 #include <data/casereader.h>
29 #include <data/dictionary.h>
30 #include <data/missing-values.h>
31 #include <data/procedure.h>
32 #include <data/transformations.h>
33 #include <data/value-labels.h>
34 #include <data/variable.h>
35 #include <language/command.h>
36 #include <language/dictionary/split-file.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/lexer/lexer.h>
39 #include <libpspp/compiler.h>
40 #include <libpspp/hash.h>
41 #include <libpspp/message.h>
42 #include <math/covariance-matrix.h>
43 #include <math/coefficient.h>
44 #include <math/linreg.h>
45 #include <math/moments.h>
46 #include <output/table.h>
61 static struct cmd_glm cmd;
64 Moments for each of the variables used.
72 const struct variable *v;
77 Dependent variable used.
79 static const struct variable **v_dependent;
82 Number of dependent variables.
84 static size_t n_dependent;
88 Return value for the procedure.
90 static int pspp_glm_rc = CMD_SUCCESS;
92 int cmd_glm (struct lexer *lexer, struct dataset *ds);
95 static bool run_glm (struct casereader *,
97 const struct dataset *);
100 cmd_glm (struct lexer *lexer, struct dataset *ds)
102 struct casegrouper *grouper;
103 struct casereader *group;
107 if (!parse_glm (lexer, ds, &cmd, NULL))
111 grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
112 while (casegrouper_get_next_group (grouper, &group))
114 run_glm (group, &cmd, ds);
116 ok = casegrouper_destroy (grouper);
117 ok = proc_commit (ds) && ok;
120 return ok ? CMD_SUCCESS : CMD_FAILURE;
123 /* Parser for the dependent sub command */
125 glm_custom_dependent (struct lexer *lexer, struct dataset *ds,
126 struct cmd_glm *cmd UNUSED, void *aux UNUSED)
128 const struct dictionary *dict = dataset_dict (ds);
130 if ((lex_token (lexer) != T_ID
131 || dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
132 && lex_token (lexer) != T_ALL)
135 if (!parse_variables_const
136 (lexer, dict, &v_dependent, &n_dependent, PV_NONE))
141 assert (n_dependent);
143 msg (SE, _("Multivariate GLM not yet supported"));
144 n_dependent = 1; /* Drop this line after adding support for multivariate GLM. */
150 COV is the covariance matrix for variables included in the
151 model. That means the dependent variable is in there, too.
154 coeff_init (pspp_linreg_cache * c, const struct design_matrix *cov)
156 c->coeff = xnmalloc (cov->m->size2, sizeof (*c->coeff));
157 c->n_coeffs = cov->m->size2 - 1;
158 pspp_coeff_init (c->coeff, cov);
162 static pspp_linreg_cache *
163 fit_model (const struct covariance_matrix *cov,
164 const struct variable *dep_var,
165 const struct variable ** indep_vars,
166 size_t n_data, size_t n_indep)
168 pspp_linreg_cache *result = NULL;
169 result = pspp_linreg_cache_alloc (dep_var, indep_vars, n_data, n_indep);
170 coeff_init (result, covariance_to_design (cov));
171 pspp_linreg_with_cov (cov, result);
177 run_glm (struct casereader *input,
179 const struct dataset *ds)
182 const struct variable **indep_vars;
183 const struct variable **all_vars;
185 pspp_linreg_cache *model = NULL;
186 pspp_linreg_opts lopts;
190 size_t n_data; /* Number of valid cases. */
191 struct casereader *reader;
192 struct covariance_matrix *cov;
194 if (!casereader_peek (input, 0, &c))
196 casereader_destroy (input);
199 output_split_file_values (ds, &c);
204 dict_get_vars (dataset_dict (ds), &v_dependent, &n_dependent,
208 lopts.get_depvar_mean_std = 1;
210 lopts.get_indep_mean_std = xnmalloc (n_dependent, sizeof (int));
211 indep_vars = xnmalloc (cmd->n_by, sizeof *indep_vars);
212 n_all_vars = cmd->n_by + n_dependent;
213 all_vars = xnmalloc (n_all_vars, sizeof *all_vars);
215 for (i = 0; i < n_dependent; i++)
217 all_vars[i] = v_dependent[i];
219 for (i = 0; i < cmd->n_by; i++)
221 indep_vars[i] = cmd->v_by[i];
222 all_vars[i + n_dependent] = cmd->v_by[i];
226 reader = casereader_clone (input);
227 reader = casereader_create_filter_missing (reader, indep_vars, n_indep,
229 reader = casereader_create_filter_missing (reader, v_dependent, 1,
234 for (i = 0; i < n_all_vars; i++)
235 if (var_is_alpha (all_vars[i]))
236 cat_stored_values_create (all_vars[i]);
238 cov = covariance_matrix_init (n_all_vars, all_vars, ONE_PASS, PAIRWISE, MV_ANY);
239 reader = casereader_create_counter (reader, &row, -1);
240 for (; casereader_read (reader, &c); case_destroy (&c))
243 Accumulate the covariance matrix.
245 covariance_matrix_accumulate (cov, &c);
248 covariance_matrix_compute (cov);
250 for (i = 0; i < n_dependent; i++)
252 model = fit_model (cov, v_dependent[i], indep_vars, n_data, n_indep);
253 pspp_linreg_cache_free (model);
256 casereader_destroy (reader);
257 covariance_matrix_destroy (cov);
261 msg (SE, gettext ("No valid data found. This command was skipped."));
264 free (lopts.get_indep_mean_std);
265 casereader_destroy (input);