1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2005, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <gsl/gsl_cdf.h>
20 #include <gsl/gsl_matrix.h>
21 #include <gsl/gsl_vector.h>
24 #include <data/case.h>
25 #include <data/casegrouper.h>
26 #include <data/casereader.h>
27 #include <data/dictionary.h>
28 #include <data/missing-values.h>
29 #include <data/procedure.h>
30 #include <data/transformations.h>
31 #include <data/value-labels.h>
32 #include <data/variable.h>
33 #include <language/command.h>
34 #include <language/dictionary/split-file.h>
35 #include <language/data-io/file-handle.h>
36 #include <language/lexer/lexer.h>
37 #include <libpspp/compiler.h>
38 #include <libpspp/message.h>
39 #include <libpspp/taint.h>
40 #include <math/covariance.h>
41 #include <math/linreg.h>
42 #include <math/moments.h>
43 #include <output/tab.h>
48 #define _(msgid) gettext (msgid)
50 #define REG_LARGE_DATA 1000
55 "REGRESSION" (regression_):
75 +save[sv_]=resid,pred;
80 static struct cmd_regression cmd;
83 Moments for each of the variables used.
88 const struct variable *v;
92 Transformations for saving predicted values
97 int n_trns; /* Number of transformations. */
98 int trns_id; /* Which trns is this one? */
99 linreg *c; /* Linear model for this trns. */
102 Variables used (both explanatory and response).
104 static const struct variable **v_variables;
109 static size_t n_variables;
111 static bool run_regression (struct casereader *, struct cmd_regression *,
112 struct dataset *, linreg **);
115 STATISTICS subcommand output functions.
117 static void reg_stats_r (linreg *, void *);
118 static void reg_stats_coeff (linreg *, void *);
119 static void reg_stats_anova (linreg *, void *);
120 static void reg_stats_outs (linreg *, void *);
121 static void reg_stats_zpp (linreg *, void *);
122 static void reg_stats_label (linreg *, void *);
123 static void reg_stats_sha (linreg *, void *);
124 static void reg_stats_ci (linreg *, void *);
125 static void reg_stats_f (linreg *, void *);
126 static void reg_stats_bcov (linreg *, void *);
127 static void reg_stats_ses (linreg *, void *);
128 static void reg_stats_xtx (linreg *, void *);
129 static void reg_stats_collin (linreg *, void *);
130 static void reg_stats_tol (linreg *, void *);
131 static void reg_stats_selection (linreg *, void *);
132 static void statistics_keyword_output (void (*)(linreg *, void *),
133 int, linreg *, void *);
136 reg_stats_r (linreg *c, void *aux UNUSED)
146 rsq = linreg_ssreg (c) / linreg_sst (c);
147 adjrsq = 1.0 - (1.0 - rsq) * (linreg_n_obs (c) - 1.0) / (linreg_n_obs (c) - linreg_n_coeffs (c));
148 std_error = sqrt (linreg_mse (c));
149 t = tab_create (n_cols, n_rows);
150 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
151 tab_hline (t, TAL_2, 0, n_cols - 1, 1);
152 tab_vline (t, TAL_2, 2, 0, n_rows - 1);
153 tab_vline (t, TAL_0, 1, 0, 0);
155 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("R"));
156 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("R Square"));
157 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Adjusted R Square"));
158 tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Error of the Estimate"));
159 tab_double (t, 1, 1, TAB_RIGHT, sqrt (rsq), NULL);
160 tab_double (t, 2, 1, TAB_RIGHT, rsq, NULL);
161 tab_double (t, 3, 1, TAB_RIGHT, adjrsq, NULL);
162 tab_double (t, 4, 1, TAB_RIGHT, std_error, NULL);
163 tab_title (t, _("Model Summary"));
168 Table showing estimated regression coefficients.
171 reg_stats_coeff (linreg * c, void *aux_)
183 const struct variable *v;
185 gsl_matrix *cov = aux_;
188 n_rows = linreg_n_coeffs (c) + 3;
190 t = tab_create (n_cols, n_rows);
191 tab_headers (t, 2, 0, 1, 0);
192 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
193 tab_hline (t, TAL_2, 0, n_cols - 1, 1);
194 tab_vline (t, TAL_2, 2, 0, n_rows - 1);
195 tab_vline (t, TAL_0, 1, 0, 0);
197 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("B"));
198 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
199 tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Beta"));
200 tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("t"));
201 tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
202 tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("(Constant)"));
203 tab_double (t, 2, 1, 0, linreg_intercept (c), NULL);
204 std_err = sqrt (gsl_matrix_get (linreg_cov (c), 0, 0));
205 tab_double (t, 3, 1, 0, std_err, NULL);
206 tab_double (t, 4, 1, 0, 0.0, NULL);
207 t_stat = linreg_intercept (c) / std_err;
208 tab_double (t, 5, 1, 0, t_stat, NULL);
209 pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), 1.0);
210 tab_double (t, 6, 1, 0, pval, NULL);
211 for (j = 0; j < linreg_n_coeffs (c); j++)
214 ds_init_empty (&tstr);
217 v = linreg_indep_var (c, j);
218 label = var_to_string (v);
219 /* Do not overwrite the variable's name. */
220 ds_put_cstr (&tstr, label);
221 tab_text (t, 1, this_row, TAB_CENTER, ds_cstr (&tstr));
223 Regression coefficients.
225 tab_double (t, 2, this_row, 0, linreg_coeff (c, j), NULL);
227 Standard error of the coefficients.
229 std_err = sqrt (gsl_matrix_get (linreg_cov (c), j + 1, j + 1));
230 tab_double (t, 3, this_row, 0, std_err, NULL);
232 Standardized coefficient, i.e., regression coefficient
233 if all variables had unit variance.
235 beta = sqrt (gsl_matrix_get (cov, j, j));
236 beta *= linreg_coeff (c, j) /
237 sqrt (gsl_matrix_get (cov, cov->size1 - 1, cov->size2 - 1));
238 tab_double (t, 4, this_row, 0, beta, NULL);
241 Test statistic for H0: coefficient is 0.
243 t_stat = linreg_coeff (c, j) / std_err;
244 tab_double (t, 5, this_row, 0, t_stat, NULL);
246 P values for the test statistic above.
249 2 * gsl_cdf_tdist_Q (fabs (t_stat),
250 (double) (linreg_n_obs (c) - linreg_n_coeffs (c)));
251 tab_double (t, 6, this_row, 0, pval, NULL);
254 tab_title (t, _("Coefficients"));
259 Display the ANOVA table.
262 reg_stats_anova (linreg * c, void *aux UNUSED)
266 const double msm = linreg_ssreg (c) / linreg_dfmodel (c);
267 const double mse = linreg_mse (c);
268 const double F = msm / mse;
269 const double pval = gsl_cdf_fdist_Q (F, c->dfm, c->dfe);
274 t = tab_create (n_cols, n_rows);
275 tab_headers (t, 2, 0, 1, 0);
277 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
279 tab_hline (t, TAL_2, 0, n_cols - 1, 1);
280 tab_vline (t, TAL_2, 2, 0, n_rows - 1);
281 tab_vline (t, TAL_0, 1, 0, 0);
283 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Sum of Squares"));
284 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df"));
285 tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Mean Square"));
286 tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("F"));
287 tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
289 tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("Regression"));
290 tab_text (t, 1, 2, TAB_LEFT | TAT_TITLE, _("Residual"));
291 tab_text (t, 1, 3, TAB_LEFT | TAT_TITLE, _("Total"));
293 /* Sums of Squares */
294 tab_double (t, 2, 1, 0, linreg_ssreg (c), NULL);
295 tab_double (t, 2, 3, 0, linreg_sst (c), NULL);
296 tab_double (t, 2, 2, 0, linreg_sse (c), NULL);
299 /* Degrees of freedom */
300 tab_text_format (t, 3, 1, TAB_RIGHT, "%g", c->dfm);
301 tab_text_format (t, 3, 2, TAB_RIGHT, "%g", c->dfe);
302 tab_text_format (t, 3, 3, TAB_RIGHT, "%g", c->dft);
305 tab_double (t, 4, 1, TAB_RIGHT, msm, NULL);
306 tab_double (t, 4, 2, TAB_RIGHT, mse, NULL);
308 tab_double (t, 5, 1, 0, F, NULL);
310 tab_double (t, 6, 1, 0, pval, NULL);
312 tab_title (t, _("ANOVA"));
317 reg_stats_outs (linreg * c, void *aux UNUSED)
323 reg_stats_zpp (linreg * c, void *aux UNUSED)
329 reg_stats_label (linreg * c, void *aux UNUSED)
335 reg_stats_sha (linreg * c, void *aux UNUSED)
340 reg_stats_ci (linreg * c, void *aux UNUSED)
345 reg_stats_f (linreg * c, void *aux UNUSED)
350 reg_stats_bcov (linreg * c, void *aux UNUSED)
362 n_cols = c->n_indeps + 1 + 2;
363 n_rows = 2 * (c->n_indeps + 1);
364 t = tab_create (n_cols, n_rows);
365 tab_headers (t, 2, 0, 1, 0);
366 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
367 tab_hline (t, TAL_2, 0, n_cols - 1, 1);
368 tab_vline (t, TAL_2, 2, 0, n_rows - 1);
369 tab_vline (t, TAL_0, 1, 0, 0);
370 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Model"));
371 tab_text (t, 1, 1, TAB_CENTER | TAT_TITLE, _("Covariances"));
372 for (i = 0; i < linreg_n_coeffs (c); i++)
374 const struct variable *v = linreg_indep_var (c, i);
375 label = var_to_string (v);
376 tab_text (t, 2, i, TAB_CENTER, label);
377 tab_text (t, i + 2, 0, TAB_CENTER, label);
378 for (k = 1; k < linreg_n_coeffs (c); k++)
380 col = (i <= k) ? k : i;
381 row = (i <= k) ? i : k;
382 tab_double (t, k + 2, i, TAB_CENTER,
383 gsl_matrix_get (c->cov, row, col), NULL);
386 tab_title (t, _("Coefficient Correlations"));
390 reg_stats_ses (linreg * c, void *aux UNUSED)
395 reg_stats_xtx (linreg * c, void *aux UNUSED)
400 reg_stats_collin (linreg * c, void *aux UNUSED)
405 reg_stats_tol (linreg * c, void *aux UNUSED)
410 reg_stats_selection (linreg * c, void *aux UNUSED)
416 statistics_keyword_output (void (*function) (linreg *, void *),
417 int keyword, linreg * c, void *aux)
421 (*function) (c, aux);
426 subcommand_statistics (int *keywords, linreg * c, void *aux)
429 The order here must match the order in which the STATISTICS
430 keywords appear in the specification section above.
457 Set everything but F.
459 for (i = 0; i < f; i++)
466 for (i = 0; i < all; i++)
474 Default output: ANOVA table, parameter estimates,
475 and statistics for variables not entered into model,
478 if (keywords[defaults] | d)
486 statistics_keyword_output (reg_stats_r, keywords[r], c, aux);
487 statistics_keyword_output (reg_stats_anova, keywords[anova], c, aux);
488 statistics_keyword_output (reg_stats_coeff, keywords[coeff], c, aux);
489 statistics_keyword_output (reg_stats_outs, keywords[outs], c, aux);
490 statistics_keyword_output (reg_stats_zpp, keywords[zpp], c, aux);
491 statistics_keyword_output (reg_stats_label, keywords[label], c, aux);
492 statistics_keyword_output (reg_stats_sha, keywords[sha], c, aux);
493 statistics_keyword_output (reg_stats_ci, keywords[ci], c, aux);
494 statistics_keyword_output (reg_stats_f, keywords[f], c, aux);
495 statistics_keyword_output (reg_stats_bcov, keywords[bcov], c, aux);
496 statistics_keyword_output (reg_stats_ses, keywords[ses], c, aux);
497 statistics_keyword_output (reg_stats_xtx, keywords[xtx], c, aux);
498 statistics_keyword_output (reg_stats_collin, keywords[collin], c, aux);
499 statistics_keyword_output (reg_stats_tol, keywords[tol], c, aux);
500 statistics_keyword_output (reg_stats_selection, keywords[selection], c, aux);
504 Free the transformation. Free its linear model if this
505 transformation is the last one.
508 regression_trns_free (void *t_)
511 struct reg_trns *t = t_;
513 if (t->trns_id == t->n_trns)
515 result = linreg_free (t->c);
523 Gets the predicted values.
526 regression_trns_pred_proc (void *t_, struct ccase **c,
527 casenumber case_idx UNUSED)
531 struct reg_trns *trns = t_;
533 union value *output = NULL;
534 const union value *tmp;
536 const struct variable **vars = NULL;
538 assert (trns != NULL);
540 assert (model != NULL);
541 assert (model->depvar != NULL);
542 assert (model->pred != NULL);
544 vars = linreg_get_vars (model);
545 n_vals = linreg_n_coeffs (model);
546 vals = xnmalloc (n_vals, sizeof (*vals));
547 *c = case_unshare (*c);
549 output = case_data_rw (*c, model->pred);
551 for (i = 0; i < n_vals; i++)
553 tmp = case_data (*c, vars[i]);
556 output->f = linreg_predict (model, vals, n_vals);
558 return TRNS_CONTINUE;
565 regression_trns_resid_proc (void *t_, struct ccase **c,
566 casenumber case_idx UNUSED)
570 struct reg_trns *trns = t_;
572 union value *output = NULL;
573 const union value *tmp;
576 const struct variable **vars = NULL;
578 assert (trns != NULL);
580 assert (model != NULL);
581 assert (model->depvar != NULL);
582 assert (model->resid != NULL);
584 vars = linreg_get_vars (model);
585 n_vals = linreg_n_coeffs (model);
587 vals = xnmalloc (n_vals, sizeof (*vals));
588 *c = case_unshare (*c);
589 output = case_data_rw (*c, model->resid);
590 assert (output != NULL);
592 for (i = 0; i < n_vals; i++)
594 tmp = case_data (*c, vars[i]);
597 tmp = case_data (*c, model->depvar);
599 output->f = linreg_residual (model, obs, vals, n_vals);
602 return TRNS_CONTINUE;
606 Returns false if NAME is a duplicate of any existing variable name.
609 try_name (const struct dictionary *dict, const char *name)
611 if (dict_lookup_var (dict, name) != NULL)
618 reg_get_name (const struct dictionary *dict, char name[VAR_NAME_LEN],
619 const char prefix[VAR_NAME_LEN])
623 snprintf (name, VAR_NAME_LEN, "%s%d", prefix, i);
624 while (!try_name (dict, name))
627 snprintf (name, VAR_NAME_LEN, "%s%d", prefix, i);
632 reg_save_var (struct dataset *ds, const char *prefix, trns_proc_func * f,
633 linreg * c, struct variable **v, int n_trns)
635 struct dictionary *dict = dataset_dict (ds);
636 static int trns_index = 1;
637 char name[VAR_NAME_LEN];
638 struct variable *new_var;
639 struct reg_trns *t = NULL;
641 t = xmalloc (sizeof (*t));
642 t->trns_id = trns_index;
645 reg_get_name (dict, name, prefix);
646 new_var = dict_create_var (dict, name, 0);
647 assert (new_var != NULL);
649 add_transformation (ds, f, regression_trns_free, t);
653 subcommand_save (struct dataset *ds, int save, linreg ** models)
661 /* Count the number of transformations we will need. */
662 for (i = 0; i < REGRESSION_SV_count; i++)
669 n_trns *= cmd.n_dependent;
671 for (lc = models; lc < models + cmd.n_dependent; lc++)
675 if ((*lc)->depvar != NULL)
677 if (cmd.a_save[REGRESSION_SV_RESID])
679 reg_save_var (ds, "RES", regression_trns_resid_proc, *lc,
680 &(*lc)->resid, n_trns);
682 if (cmd.a_save[REGRESSION_SV_PRED])
684 reg_save_var (ds, "PRED", regression_trns_pred_proc, *lc,
685 &(*lc)->pred, n_trns);
693 for (lc = models; lc < models + cmd.n_dependent; lc++)
704 cmd_regression (struct lexer *lexer, struct dataset *ds)
706 struct casegrouper *grouper;
707 struct casereader *group;
712 if (!parse_regression (lexer, ds, &cmd, NULL))
717 models = xnmalloc (cmd.n_dependent, sizeof *models);
718 for (i = 0; i < cmd.n_dependent; i++)
724 grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
725 while (casegrouper_get_next_group (grouper, &group))
726 run_regression (group, &cmd, ds, models);
727 ok = casegrouper_destroy (grouper);
728 ok = proc_commit (ds) && ok;
730 subcommand_save (ds, cmd.sbc_save, models);
733 free_regression (&cmd);
735 return ok ? CMD_SUCCESS : CMD_FAILURE;
739 Is variable k the dependent variable?
742 is_depvar (size_t k, const struct variable *v)
744 return v == v_variables[k];
747 /* Parser for the variables sub command */
749 regression_custom_variables (struct lexer *lexer, struct dataset *ds,
750 struct cmd_regression *cmd UNUSED,
753 const struct dictionary *dict = dataset_dict (ds);
755 lex_match (lexer, '=');
757 if ((lex_token (lexer) != T_ID
758 || dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
759 && lex_token (lexer) != T_ALL)
763 if (!parse_variables_const
764 (lexer, dict, &v_variables, &n_variables, PV_NONE))
769 assert (n_variables);
774 /* Identify the explanatory variables in v_variables. Returns
775 the number of independent variables. */
777 identify_indep_vars (const struct variable **indep_vars,
778 const struct variable *depvar)
780 int n_indep_vars = 0;
783 for (i = 0; i < n_variables; i++)
784 if (!is_depvar (i, depvar))
785 indep_vars[n_indep_vars++] = v_variables[i];
786 if ((n_indep_vars < 1) && is_depvar (0, depvar))
789 There is only one independent variable, and it is the same
790 as the dependent variable. Print a warning and continue.
793 gettext ("The dependent variable is equal to the independent variable."
794 "The least squares line is therefore Y=X."
795 "Standard errors and related statistics may be meaningless."));
797 indep_vars[0] = v_variables[0];
802 fill_covariance (gsl_matrix *cov, struct covariance *all_cov,
803 const struct variable **vars,
804 size_t n_vars, const struct variable *dep_var,
805 const struct variable **all_vars, size_t n_all_vars,
810 size_t dep_subscript;
812 const gsl_matrix *ssizes;
813 const gsl_matrix *cm;
814 const gsl_matrix *mean_matrix;
815 const gsl_matrix *ssize_matrix;
818 cm = covariance_calculate_unnormalized (all_cov);
819 rows = xnmalloc (cov->size1 - 1, sizeof (*rows));
821 for (i = 0; i < n_all_vars; i++)
823 for (j = 0; j < n_vars; j++)
825 if (vars[j] == all_vars[i])
830 if (all_vars[i] == dep_var)
835 mean_matrix = covariance_moments (all_cov, MOMENT_MEAN);
836 ssize_matrix = covariance_moments (all_cov, MOMENT_NONE);
837 for (i = 0; i < cov->size1 - 1; i++)
839 means[i] = gsl_matrix_get (mean_matrix, rows[i], 0)
840 / gsl_matrix_get (ssize_matrix, rows[i], 0);
841 for (j = 0; j < cov->size2 - 1; j++)
843 gsl_matrix_set (cov, i, j, gsl_matrix_get (cm, rows[i], rows[j]));
844 gsl_matrix_set (cov, j, i, gsl_matrix_get (cm, rows[j], rows[i]));
847 means[cov->size1 - 1] = gsl_matrix_get (mean_matrix, dep_subscript, 0)
848 / gsl_matrix_get (ssize_matrix, dep_subscript, 0);
849 ssizes = covariance_moments (all_cov, MOMENT_NONE);
850 result = gsl_matrix_get (ssizes, dep_subscript, rows[0]);
851 for (i = 0; i < cov->size1 - 1; i++)
853 gsl_matrix_set (cov, i, cov->size1 - 1,
854 gsl_matrix_get (cm, rows[i], dep_subscript));
855 gsl_matrix_set (cov, cov->size1 - 1, i,
856 gsl_matrix_get (cm, rows[i], dep_subscript));
857 if (result > gsl_matrix_get (ssizes, rows[i], dep_subscript))
859 result = gsl_matrix_get (ssizes, rows[i], dep_subscript);
862 gsl_matrix_set (cov, cov->size1 - 1, cov->size1 - 1,
863 gsl_matrix_get (cm, dep_subscript, dep_subscript));
865 gsl_matrix_free (cm);
869 get_n_all_vars (struct cmd_regression *cmd)
871 size_t result = n_variables;
875 result += cmd->n_dependent;
876 for (i = 0; i < cmd->n_dependent; i++)
878 for (j = 0; j < n_variables; j++)
880 if (v_variables[j] == cmd->v_dependent[i])
889 fill_all_vars (const struct variable **vars, struct cmd_regression *cmd)
895 for (i = 0; i < n_variables; i++)
897 vars[i] = v_variables[i];
899 for (i = 0; i < cmd->n_dependent; i++)
902 for (j = 0; j < n_variables; j++)
904 if (cmd->v_dependent[i] == v_variables[j])
912 vars[i + n_variables] = cmd->v_dependent[i];
917 run_regression (struct casereader *input, struct cmd_regression *cmd,
918 struct dataset *ds, linreg **models)
926 struct covariance *cov;
927 const struct variable **vars;
928 const struct variable **all_vars;
929 const struct variable *dep_var;
930 struct casereader *reader;
931 const struct dictionary *dict;
934 assert (models != NULL);
936 for (i = 0; i < n_variables; i++)
938 if (!var_is_numeric (v_variables[i]))
940 msg (SE, _("REGRESSION requires numeric variables."));
945 c = casereader_peek (input, 0);
948 casereader_destroy (input);
951 output_split_file_values (ds, c);
954 dict = dataset_dict (ds);
957 dict_get_vars (dict, &v_variables, &n_variables, 0);
959 n_all_vars = get_n_all_vars (cmd);
960 all_vars = xnmalloc (n_all_vars, sizeof (*all_vars));
961 fill_all_vars (all_vars, cmd);
962 vars = xnmalloc (n_variables, sizeof (*vars));
963 means = xnmalloc (n_all_vars, sizeof (*means));
964 cov = covariance_1pass_create (n_all_vars, all_vars,
965 dict_get_weight (dict), MV_ANY);
967 reader = casereader_clone (input);
968 reader = casereader_create_filter_missing (reader, v_variables, n_variables,
970 for (; (c = casereader_read (reader)) != NULL; case_unref (c))
972 covariance_accumulate (cov, c);
975 for (k = 0; k < cmd->n_dependent; k++)
978 dep_var = cmd->v_dependent[k];
979 n_indep = identify_indep_vars (vars, dep_var);
981 this_cm = gsl_matrix_alloc (n_indep + 1, n_indep + 1);
982 n_data = fill_covariance (this_cm, cov, vars, n_indep,
983 dep_var, all_vars, n_all_vars, means);
984 models[k] = linreg_alloc (dep_var, (const struct variable **) vars,
986 models[k]->depvar = dep_var;
987 for (i = 0; i < n_indep; i++)
989 linreg_set_indep_variable_mean (models[k], i, means[i]);
991 linreg_set_depvar_mean (models[k], means[i]);
993 For large data sets, use QR decomposition.
995 if (n_data > sqrt (n_indep) && n_data > REG_LARGE_DATA)
997 models[k]->method = LINREG_QR;
1003 Find the least-squares estimates and other statistics.
1005 linreg_fit (this_cm, models[k]);
1007 if (!taint_has_tainted_successor (casereader_get_taint (input)))
1009 subcommand_statistics (cmd->a_statistics, models[k], this_cm);
1015 gettext ("No valid data found. This command was skipped."));
1016 linreg_free (models[k]);
1019 gsl_matrix_free (this_cm);
1022 casereader_destroy (reader);
1026 casereader_destroy (input);
1027 covariance_destroy (cov);