1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <gsl/gsl_cdf.h>
20 #include <gsl/gsl_matrix.h>
21 #include <gsl/gsl_vector.h>
25 #include <data/case.h>
26 #include <data/category.h>
27 #include <data/casegrouper.h>
28 #include <data/casereader.h>
29 #include <data/dictionary.h>
30 #include <data/missing-values.h>
31 #include <data/procedure.h>
32 #include <data/transformations.h>
33 #include <data/value-labels.h>
34 #include <data/variable.h>
35 #include <language/command.h>
36 #include <language/dictionary/split-file.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/lexer/lexer.h>
39 #include <libpspp/compiler.h>
40 #include <libpspp/hash.h>
41 #include <libpspp/message.h>
42 #include <math/covariance.h>
43 #include <math/coefficient.h>
44 #include <math/linreg.h>
45 #include <math/moments.h>
46 #include <output/table.h>
62 static struct cmd_glm cmd;
66 Moments for each of the variables used.
74 const struct variable *v;
79 Dependent variable used.
81 static const struct variable **v_dependent;
84 Number of dependent variables.
86 static size_t n_dependent;
88 size_t n_inter; /* Number of interactions. */
89 size_t n_members; /* Number of memebr variables in an interaction. */
91 struct interaction_variable **interactions;
93 int cmd_glm (struct lexer *lexer, struct dataset *ds);
95 static bool run_glm (struct casereader *,
97 const struct dataset *);
99 If the DESIGN subcommand was not specified, specify all possible
100 two-way interactions.
103 check_interactions (struct dataset *ds, struct cmd_glm *cmd)
108 struct variable **interaction_vars;
111 User did not specify the design matrix, so we
114 n_inter = (cmd->n_by + cmd->n_with) * (cmd->n_by + cmd->n_with - 1) / 2;
115 interactions = xnmalloc (n_inter, sizeof (*interactions));
116 interaction_vars = xnmalloc (2, sizeof (*interaction_vars));
117 for (i = 0; i < cmd->n_by; i++)
119 for (j = i + 1; j < cmd->n_by; j++)
121 interaction_vars[0] = cmd->v_by[i];
122 interaction_vars[1] = cmd->v_by[j];
123 interactions[k] = interaction_variable_create (interaction_vars, 2);
126 for (j = 0; j < cmd->n_with; j++)
128 interaction_vars[0] = cmd->v_by[i];
129 interaction_vars[1] = cmd->v_with[j];
130 interactions[k] = interaction_variable_create (interaction_vars, 2);
134 for (i = 0; i < cmd->n_with; i++)
136 for (j = i + 1; j < cmd->n_with; j++)
138 interaction_vars[0] = cmd->v_with[i];
139 interaction_vars[1] = cmd->v_with[j];
140 interactions[k] = interaction_variable_create (interaction_vars, 2);
146 cmd_glm (struct lexer *lexer, struct dataset *ds)
148 struct casegrouper *grouper;
149 struct casereader *group;
153 if (!parse_glm (lexer, ds, &cmd, NULL))
156 if (!lex_match_id (lexer, "DESIGN"))
158 check_interactions (ds, &cmd);
161 grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
162 while (casegrouper_get_next_group (grouper, &group))
164 run_glm (group, &cmd, ds);
166 ok = casegrouper_destroy (grouper);
167 ok = proc_commit (ds) && ok;
170 return ok ? CMD_SUCCESS : CMD_FAILURE;
174 parse_interactions (struct lexer *lexer, const struct variable **interaction_vars, int n_members,
175 int max_members, struct dataset *ds)
177 if (lex_match (lexer, '*'))
179 if (n_members > max_members)
182 xnrealloc (interaction_vars, max_members, sizeof (*interaction_vars));
184 interaction_vars[n_members] = parse_variable (lexer, dataset_dict (ds));
185 parse_interactions (lexer, interaction_vars, n_members++, max_members, ds);
189 /* Parser for the design subcommand. */
191 glm_custom_design (struct lexer *lexer, struct dataset *ds,
192 struct cmd_glm *cmd UNUSED, void *aux UNUSED)
194 size_t n_allocated = 2;
196 struct variable **interaction_vars;
197 struct variable *this_var;
199 interactions = xnmalloc (n_allocated, sizeof (*interactions));
201 while (lex_token (lexer) != T_STOP && lex_token (lexer) != '.')
203 this_var = parse_variable (lexer, dataset_dict (ds));
204 if (lex_match (lexer, '('))
206 lex_force_match (lexer, ')');
208 else if (lex_match (lexer, '*'))
210 interaction_vars = xnmalloc (2 * n_inter, sizeof (*interaction_vars));
211 n_members = parse_interactions (lexer, interaction_vars, 1, 2 * n_inter, ds);
212 if (n_allocated < n_inter)
215 xnrealloc (interactions, n_allocated, sizeof (*interactions));
217 interactions [n_inter - 1] =
218 interaction_variable_create (interaction_vars, n_members);
220 free (interaction_vars);
225 /* Parser for the dependent sub command */
227 glm_custom_dependent (struct lexer *lexer, struct dataset *ds,
228 struct cmd_glm *cmd UNUSED, void *aux UNUSED)
230 const struct dictionary *dict = dataset_dict (ds);
233 if ((lex_token (lexer) != T_ID
234 || dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
235 && lex_token (lexer) != T_ALL)
238 if (!parse_variables_const (lexer, dict, &v_dependent, &n_dependent, PV_NONE))
243 for (i = 0; i < n_dependent; i++)
245 assert (var_is_numeric (v_dependent[i]));
247 assert (n_dependent);
249 msg (SE, _("Multivariate GLM not yet supported"));
250 n_dependent = 1; /* Drop this line after adding support for multivariate GLM. */
256 COV is the covariance matrix for variables included in the
257 model. That means the dependent variable is in there, too.
260 coeff_init (pspp_linreg_cache * c, const struct design_matrix *cov)
262 c->coeff = xnmalloc (cov->m->size2, sizeof (*c->coeff));
263 c->n_coeffs = cov->m->size2 - 1;
264 pspp_coeff_init (c->coeff, cov);
268 static pspp_linreg_cache *
269 fit_model (const struct covariance *cov,
270 const struct variable *dep_var,
271 const struct variable ** indep_vars,
272 size_t n_data, size_t n_indep)
274 pspp_linreg_cache *result = NULL;
280 run_glm (struct casereader *input,
282 const struct dataset *ds)
285 const struct variable **numerics = NULL;
286 const struct variable **categoricals = NULL;
288 pspp_linreg_cache *model = NULL;
289 pspp_linreg_opts lopts;
292 size_t n_data; /* Number of valid cases. */
293 size_t n_categoricals = 0;
295 struct casereader *reader;
296 struct covariance *cov;
298 c = casereader_peek (input, 0);
301 casereader_destroy (input);
304 output_split_file_values (ds, c);
309 dict_get_vars (dataset_dict (ds), &v_dependent, &n_dependent,
313 lopts.get_depvar_mean_std = 1;
315 lopts.get_indep_mean_std = xnmalloc (n_dependent, sizeof (int));
318 n_numerics = n_dependent;
319 for (i = 0; i < cmd->n_with; i++)
321 if (var_is_alpha (cmd->v_with[i]))
330 for (i = 0; i < cmd->n_by; i++)
332 if (var_is_alpha (cmd->v_by[i]))
341 numerics = xnmalloc (n_numerics, sizeof *numerics);
342 categoricals = xnmalloc (n_categoricals, sizeof (*categoricals));
345 for (i = 0; i < cmd->n_by; i++)
347 if (var_is_alpha (cmd->v_by[i]))
349 categoricals[j] = cmd->v_by[i];
354 numerics[k] = cmd->v_by[i];
358 for (i = 0; i < cmd->n_with; i++)
360 if (var_is_alpha (cmd->v_with[i]))
362 categoricals[j] = cmd->v_with[i];
367 numerics[k] = cmd->v_with[i];
371 for (i = 0; i < n_dependent; i++)
373 numerics[k] = v_dependent[i];
377 cov = covariance_2pass_create (n_numerics, numerics, n_categoricals, categoricals, NULL, MV_NEVER);
379 reader = casereader_clone (input);
380 reader = casereader_create_filter_missing (reader, numerics, n_numerics,
382 reader = casereader_create_filter_missing (reader, categoricals, n_categoricals,
384 struct casereader *r = casereader_clone (reader);
386 reader = casereader_create_counter (reader, &row, -1);
388 for (; (c = casereader_read (reader)) != NULL; case_unref (c))
390 covariance_accumulate_pass1 (cov, c);
392 for (; (c = casereader_read (r)) != NULL; case_unref (c))
394 covariance_accumulate_pass1 (cov, c);
396 covariance_destroy (cov);
397 casereader_destroy (reader);
398 casereader_destroy (r);
402 free (lopts.get_indep_mean_std);
403 casereader_destroy (input);