1 /* PSPP - linear regression.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3 Written by Jason H Stover <jason@sakla.net>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include <gsl/gsl_cdf.h>
23 #include <gsl/gsl_vector.h>
24 #include <gsl/gsl_matrix.h>
29 #include "cat-routines.h"
31 #include "design-matrix.h"
32 #include "dictionary.h"
34 #include "file-handle.h"
37 #include <linreg/pspp_linreg.h>
38 #include "missing-values.h"
39 #include "regression_export.h"
41 #include "value-labels.h"
45 #define REG_LARGE_DATA 1000
50 "REGRESSION" (regression_):
75 static struct cmd_regression cmd;
78 Array holding the subscripts of the independent variables.
83 File where the model will be saved if the EXPORT subcommand
86 struct file_handle *model_file;
89 Return value for the procedure.
91 int pspp_reg_rc = CMD_SUCCESS;
93 static void run_regression (const struct casefile *, void *);
96 STATISTICS subcommand output functions.
98 static void reg_stats_r (pspp_linreg_cache *);
99 static void reg_stats_coeff (pspp_linreg_cache *);
100 static void reg_stats_anova (pspp_linreg_cache *);
101 static void reg_stats_outs (pspp_linreg_cache *);
102 static void reg_stats_zpp (pspp_linreg_cache *);
103 static void reg_stats_label (pspp_linreg_cache *);
104 static void reg_stats_sha (pspp_linreg_cache *);
105 static void reg_stats_ci (pspp_linreg_cache *);
106 static void reg_stats_f (pspp_linreg_cache *);
107 static void reg_stats_bcov (pspp_linreg_cache *);
108 static void reg_stats_ses (pspp_linreg_cache *);
109 static void reg_stats_xtx (pspp_linreg_cache *);
110 static void reg_stats_collin (pspp_linreg_cache *);
111 static void reg_stats_tol (pspp_linreg_cache *);
112 static void reg_stats_selection (pspp_linreg_cache *);
113 static void statistics_keyword_output (void (*)(pspp_linreg_cache *),
114 int, pspp_linreg_cache *);
117 reg_stats_r (pspp_linreg_cache * c)
127 rsq = c->ssm / c->sst;
128 adjrsq = 1.0 - (1.0 - rsq) * (c->n_obs - 1.0) / (c->n_obs - c->n_indeps);
129 std_error = sqrt ((c->n_indeps - 1.0) / (c->n_obs - 1.0));
130 t = tab_create (n_cols, n_rows, 0);
131 tab_dim (t, tab_natural_dimensions);
132 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
133 tab_hline (t, TAL_2, 0, n_cols - 1, 1);
134 tab_vline (t, TAL_2, 2, 0, n_rows - 1);
135 tab_vline (t, TAL_0, 1, 0, 0);
137 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("R"));
138 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("R Square"));
139 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Adjusted R Square"));
140 tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Error of the Estimate"));
141 tab_float (t, 1, 1, TAB_RIGHT, sqrt (rsq), 10, 2);
142 tab_float (t, 2, 1, TAB_RIGHT, rsq, 10, 2);
143 tab_float (t, 3, 1, TAB_RIGHT, adjrsq, 10, 2);
144 tab_float (t, 4, 1, TAB_RIGHT, std_error, 10, 2);
145 tab_title (t, 0, _("Model Summary"));
150 Table showing estimated regression coefficients.
153 reg_stats_coeff (pspp_linreg_cache * c)
166 const struct variable *v;
167 const union value *val;
172 tmp = xnmalloc (MAX_STRING, sizeof (*tmp));
173 n_rows = c->n_coeffs + 2;
175 t = tab_create (n_cols, n_rows, 0);
176 tab_headers (t, 2, 0, 1, 0);
177 tab_dim (t, tab_natural_dimensions);
178 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
179 tab_hline (t, TAL_2, 0, n_cols - 1, 1);
180 tab_vline (t, TAL_2, 2, 0, n_rows - 1);
181 tab_vline (t, TAL_0, 1, 0, 0);
183 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("B"));
184 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
185 tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Beta"));
186 tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("t"));
187 tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
188 tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("(Constant)"));
189 coeff = c->coeff[0].estimate;
190 tab_float (t, 2, 1, 0, coeff, 10, 2);
191 std_err = sqrt (gsl_matrix_get (c->cov, 0, 0));
192 tab_float (t, 3, 1, 0, std_err, 10, 2);
193 beta = coeff / c->depvar_std;
194 tab_float (t, 4, 1, 0, beta, 10, 2);
195 t_stat = coeff / std_err;
196 tab_float (t, 5, 1, 0, t_stat, 10, 2);
197 pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), 1.0);
198 tab_float (t, 6, 1, 0, pval, 10, 2);
199 for (j = 1; j <= c->n_indeps; j++)
202 v = pspp_linreg_coeff_get_var (c->coeff + j, 0);
203 label = var_to_string (v);
204 /* Do not overwrite the variable's name. */
205 strncpy (tmp, label, MAX_STRING);
206 if (v->type == ALPHA)
209 Append the value associated with this coefficient.
210 This makes sense only if we us the usual binary encoding
214 val = pspp_linreg_coeff_get_value (c->coeff + j, v);
215 val_s = value_to_string (val, v);
216 strncat (tmp, val_s, MAX_STRING);
219 tab_text (t, 1, j + 1, TAB_CENTER, tmp);
221 Regression coefficients.
223 coeff = c->coeff[j].estimate;
224 tab_float (t, 2, j + 1, 0, coeff, 10, 2);
226 Standard error of the coefficients.
228 std_err = sqrt (gsl_matrix_get (c->cov, j, j));
229 tab_float (t, 3, j + 1, 0, std_err, 10, 2);
231 'Standardized' coefficient, i.e., regression coefficient
232 if all variables had unit variance.
234 beta = gsl_vector_get (c->indep_std, j);
235 beta *= coeff / c->depvar_std;
236 tab_float (t, 4, j + 1, 0, beta, 10, 2);
239 Test statistic for H0: coefficient is 0.
241 t_stat = coeff / std_err;
242 tab_float (t, 5, j + 1, 0, t_stat, 10, 2);
244 P values for the test statistic above.
246 pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), 1.0);
247 tab_float (t, 6, j + 1, 0, pval, 10, 2);
249 tab_title (t, 0, _("Coefficients"));
255 Display the ANOVA table.
258 reg_stats_anova (pspp_linreg_cache * c)
262 const double msm = c->ssm / c->dfm;
263 const double mse = c->sse / c->dfe;
264 const double F = msm / mse;
265 const double pval = gsl_cdf_fdist_Q (F, c->dfm, c->dfe);
270 t = tab_create (n_cols, n_rows, 0);
271 tab_headers (t, 2, 0, 1, 0);
272 tab_dim (t, tab_natural_dimensions);
274 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
276 tab_hline (t, TAL_2, 0, n_cols - 1, 1);
277 tab_vline (t, TAL_2, 2, 0, n_rows - 1);
278 tab_vline (t, TAL_0, 1, 0, 0);
280 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Sum of Squares"));
281 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df"));
282 tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Mean Square"));
283 tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("F"));
284 tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance"));
286 tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("Regression"));
287 tab_text (t, 1, 2, TAB_LEFT | TAT_TITLE, _("Residual"));
288 tab_text (t, 1, 3, TAB_LEFT | TAT_TITLE, _("Total"));
290 /* Sums of Squares */
291 tab_float (t, 2, 1, 0, c->ssm, 10, 2);
292 tab_float (t, 2, 3, 0, c->sst, 10, 2);
293 tab_float (t, 2, 2, 0, c->sse, 10, 2);
296 /* Degrees of freedom */
297 tab_float (t, 3, 1, 0, c->dfm, 4, 0);
298 tab_float (t, 3, 2, 0, c->dfe, 4, 0);
299 tab_float (t, 3, 3, 0, c->dft, 4, 0);
303 tab_float (t, 4, 1, TAB_RIGHT, msm, 8, 3);
304 tab_float (t, 4, 2, TAB_RIGHT, mse, 8, 3);
306 tab_float (t, 5, 1, 0, F, 8, 3);
308 tab_float (t, 6, 1, 0, pval, 8, 3);
310 tab_title (t, 0, _("ANOVA"));
314 reg_stats_outs (pspp_linreg_cache * c)
319 reg_stats_zpp (pspp_linreg_cache * c)
324 reg_stats_label (pspp_linreg_cache * c)
329 reg_stats_sha (pspp_linreg_cache * c)
334 reg_stats_ci (pspp_linreg_cache * c)
339 reg_stats_f (pspp_linreg_cache * c)
344 reg_stats_bcov (pspp_linreg_cache * c)
357 n_cols = c->n_indeps + 1 + 2;
358 n_rows = 2 * (c->n_indeps + 1);
359 t = tab_create (n_cols, n_rows, 0);
360 tab_headers (t, 2, 0, 1, 0);
361 tab_dim (t, tab_natural_dimensions);
362 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, n_cols - 1, n_rows - 1);
363 tab_hline (t, TAL_2, 0, n_cols - 1, 1);
364 tab_vline (t, TAL_2, 2, 0, n_rows - 1);
365 tab_vline (t, TAL_0, 1, 0, 0);
366 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Model"));
367 tab_text (t, 1, 1, TAB_CENTER | TAT_TITLE, _("Covariances"));
368 for (i = 1; i < c->n_indeps + 1; i++)
370 j = indep_vars[(i - 1)];
371 struct variable *v = cmd.v_variables[j];
372 label = var_to_string (v);
373 tab_text (t, 2, i, TAB_CENTER, label);
374 tab_text (t, i + 2, 0, TAB_CENTER, label);
375 for (k = 1; k < c->n_indeps + 1; k++)
377 col = (i <= k) ? k : i;
378 row = (i <= k) ? i : k;
379 tab_float (t, k + 2, i, TAB_CENTER,
380 gsl_matrix_get (c->cov, row, col), 8, 3);
383 tab_title (t, 0, _("Coefficient Correlations"));
387 reg_stats_ses (pspp_linreg_cache * c)
392 reg_stats_xtx (pspp_linreg_cache * c)
397 reg_stats_collin (pspp_linreg_cache * c)
402 reg_stats_tol (pspp_linreg_cache * c)
407 reg_stats_selection (pspp_linreg_cache * c)
413 statistics_keyword_output (void (*function) (pspp_linreg_cache *),
414 int keyword, pspp_linreg_cache * c)
423 subcommand_statistics (int *keywords, pspp_linreg_cache * c)
426 The order here must match the order in which the STATISTICS
427 keywords appear in the specification section above.
454 Set everything but F.
456 for (i = 0; i < f; i++)
463 for (i = 0; i < all; i++)
471 Default output: ANOVA table, parameter estimates,
472 and statistics for variables not entered into model,
475 if (keywords[defaults] | d)
483 statistics_keyword_output (reg_stats_r, keywords[r], c);
484 statistics_keyword_output (reg_stats_anova, keywords[anova], c);
485 statistics_keyword_output (reg_stats_coeff, keywords[coeff], c);
486 statistics_keyword_output (reg_stats_outs, keywords[outs], c);
487 statistics_keyword_output (reg_stats_zpp, keywords[zpp], c);
488 statistics_keyword_output (reg_stats_label, keywords[label], c);
489 statistics_keyword_output (reg_stats_sha, keywords[sha], c);
490 statistics_keyword_output (reg_stats_ci, keywords[ci], c);
491 statistics_keyword_output (reg_stats_f, keywords[f], c);
492 statistics_keyword_output (reg_stats_bcov, keywords[bcov], c);
493 statistics_keyword_output (reg_stats_ses, keywords[ses], c);
494 statistics_keyword_output (reg_stats_xtx, keywords[xtx], c);
495 statistics_keyword_output (reg_stats_collin, keywords[collin], c);
496 statistics_keyword_output (reg_stats_tol, keywords[tol], c);
497 statistics_keyword_output (reg_stats_selection, keywords[selection], c);
500 reg_inserted (const struct variable *v, struct variable **varlist, int n_vars)
504 for (i = 0; i < n_vars; i++)
506 if (v->index == varlist[i]->index)
514 reg_print_categorical_encoding (FILE * fp, pspp_linreg_cache * c)
519 struct variable **varlist;
520 struct pspp_linreg_coeff *coeff;
521 const struct variable *v;
524 fprintf (fp, "%s", reg_export_categorical_encode_1);
526 varlist = xnmalloc (c->n_indeps, sizeof (*varlist));
527 for (i = 1; i < c->n_indeps; i++) /* c->coeff[0] is the intercept. */
529 coeff = c->coeff + i;
530 v = pspp_linreg_coeff_get_var (coeff, 0);
531 if (v->type == ALPHA)
533 if (!reg_inserted (v, varlist, n_vars))
535 fprintf (fp, "struct pspp_reg_categorical_variable %s;\n\t",
537 varlist[n_vars] = (struct variable *) v;
542 fprintf (fp, "int n_vars = %d;\n\t", n_vars);
543 fprintf (fp, "struct pspp_reg_categorical_variable *varlist[%d] = {",
545 for (i = 0; i < n_vars - 1; i++)
547 fprintf (fp, "&%s,\n\t\t", varlist[i]->name);
549 fprintf (fp, "&%s};\n\t", varlist[i]->name);
551 for (i = 0; i < n_vars; i++)
553 coeff = c->coeff + i;
554 fprintf (fp, "%s.name = \"%s\";\n\t", varlist[i]->name,
556 fprintf (fp, "%s.n_vals = %d;\n\t", varlist[i]->name,
557 varlist[i]->obs_vals->n_categories);
559 for (j = 0; j < varlist[i]->obs_vals->n_categories; j++)
561 val = cat_subscript_to_value ((const size_t) j, varlist[i]);
562 fprintf (fp, "%s.values[%d] = \"%s\";\n\t", varlist[i]->name, j,
563 value_to_string (val, varlist[i]));
566 fprintf (fp, "%s", reg_export_categorical_encode_2);
570 reg_print_depvars (FILE * fp, pspp_linreg_cache * c)
573 struct pspp_linreg_coeff *coeff;
574 const struct variable *v;
576 fprintf (fp, "char *model_depvars[%d] = {", c->n_indeps);
577 for (i = 1; i < c->n_indeps; i++)
579 coeff = c->coeff + i;
580 v = pspp_linreg_coeff_get_var (coeff, 0);
581 fprintf (fp, "\"%s\",\n\t\t", v->name);
583 coeff = c->coeff + i;
584 v = pspp_linreg_coeff_get_var (coeff, 0);
585 fprintf (fp, "\"%s\"};\n\t", v->name);
588 reg_print_getvar (FILE * fp, pspp_linreg_cache * c)
590 fprintf (fp, "static int\npspp_reg_getvar (char *v_name)\n{\n\t");
591 fprintf (fp, "int i;\n\tint n_vars = %d;\n\t", c->n_indeps);
592 reg_print_depvars (fp, c);
593 fprintf (fp, "for (i = 0; i < n_vars; i++)\n\t{\n\t\t");
595 "if (strncmp (v_name, model_depvars[i], PSPP_REG_MAXLEN) == 0)\n\t\t{\n\t\t\t");
596 fprintf (fp, "return i;\n\t\t}\n\t}\n}\n");
599 subcommand_export (int export, pspp_linreg_cache * c)
603 int n_quantiles = 100;
606 struct pspp_linreg_coeff coeff;
612 assert (model_file != NULL);
614 fp = fopen (fh_get_filename (model_file), "w");
615 fprintf (fp, "%s", reg_preamble);
616 reg_print_getvar (fp, c);
617 reg_print_categorical_encoding (fp, c);
618 fprintf (fp, "%s", reg_export_t_quantiles_1);
619 increment = 0.5 / (double) increment;
620 for (i = 0; i < n_quantiles - 1; i++)
622 tmp = 0.5 + 0.005 * (double) i;
623 fprintf (fp, "%.15e,\n\t\t",
624 gsl_cdf_tdist_Pinv (tmp, c->n_obs - c->n_indeps));
626 fprintf (fp, "%.15e};\n\t",
627 gsl_cdf_tdist_Pinv (.9995, c->n_obs - c->n_indeps));
628 fprintf (fp, "%s", reg_export_t_quantiles_2);
629 fprintf (fp, "%s", reg_mean_cmt);
630 fprintf (fp, "double\npspp_reg_estimate (const double *var_vals,");
631 fprintf (fp, "const char *var_names[])\n{\n\t");
632 fprintf (fp, "double model_coeffs[%d] = {", c->n_indeps);
633 for (i = 1; i < c->n_indeps; i++)
636 fprintf (fp, "%.15e,\n\t\t", coeff.estimate);
639 fprintf (fp, "%.15e};\n\t", coeff.estimate);
641 fprintf (fp, "double estimate = %.15e;\n\t", coeff.estimate);
642 fprintf (fp, "int i;\n\tint j;\n\n\t");
643 fprintf (fp, "for (i = 0; i < %d; i++)\n\t", c->n_indeps);
644 fprintf (fp, "%s", reg_getvar);
645 fprintf (fp, "const double cov[%d][%d] = {\n\t", c->n_coeffs,
647 for (i = 0; i < c->cov->size1 - 1; i++)
650 for (j = 0; j < c->cov->size2 - 1; j++)
652 fprintf (fp, "%.15e, ", gsl_matrix_get (c->cov, i, j));
654 fprintf (fp, "%.15e},\n\t", gsl_matrix_get (c->cov, i, j));
657 for (j = 0; j < c->cov->size2 - 1; j++)
659 fprintf (fp, "%.15e, ",
660 gsl_matrix_get (c->cov, c->cov->size1 - 1, j));
662 fprintf (fp, "%.15e}\n\t",
663 gsl_matrix_get (c->cov, c->cov->size1 - 1, c->cov->size2 - 1));
664 fprintf (fp, "};\n\tint n_vars = %d;\n\tint i;\n\tint j;\n\t",
666 fprintf (fp, "double unshuffled_vals[%d];\n\t", c->n_indeps);
667 fprintf (fp, "%s", reg_variance);
668 fprintf (fp, "%s", reg_export_confidence_interval);
669 tmp = c->mse * c->mse;
670 fprintf (fp, "%s %.15e", reg_export_prediction_interval_1, tmp);
671 fprintf (fp, "%s %.15e", reg_export_prediction_interval_2, tmp);
672 fprintf (fp, "%s", reg_export_prediction_interval_3);
674 fp = fopen ("pspp_model_reg.h", "w");
675 fprintf (fp, "%s", reg_header);
680 regression_custom_export (struct cmd_regression *cmd)
682 /* 0 on failure, 1 on success, 2 on failure that should result in syntax error */
683 if (!lex_force_match ('('))
690 model_file = fh_parse ();
691 if (model_file == NULL)
695 if (!lex_force_match (')'))
702 cmd_regression (void)
704 if (!parse_regression (&cmd))
708 multipass_procedure_with_splits (run_regression, &cmd);
714 Is variable k one of the dependent variables?
720 for (j = 0; j < cmd.n_dependent; j++)
723 compare_var_names returns 0 if the variable
726 if (!compare_var_names (cmd.v_dependent[j], cmd.v_variables[k], NULL))
733 Mark missing cases. Return the number of non-missing cases.
736 mark_missing_cases (const struct casefile *cf, struct variable *v,
737 int *is_missing_case, double n_data)
739 struct casereader *r;
742 const union value *val;
744 for (r = casefile_get_reader (cf);
745 casereader_read (r, &c); case_destroy (&c))
747 row = casereader_cnum (r) - 1;
749 val = case_data (&c, v->fv);
750 cat_value_update (v, val);
751 if (mv_is_value_missing (&v->miss, val))
753 if (!is_missing_case[row])
755 /* Now it is missing. */
757 is_missing_case[row] = 1;
761 casereader_destroy (r);
767 run_regression (const struct casefile *cf, void *cmd_ UNUSED)
777 Keep track of the missing cases.
779 int *is_missing_case;
780 const union value *val;
781 struct casereader *r;
784 struct variable *depvar;
785 struct variable **indep_vars;
786 struct design_matrix *X;
788 pspp_linreg_cache *lcache;
789 pspp_linreg_opts lopts;
791 n_data = casefile_get_case_cnt (cf);
793 for (i = 0; i < cmd.n_dependent; i++)
795 if (cmd.v_dependent[i]->type != NUMERIC)
797 msg (SE, gettext ("Dependent variable must be numeric."));
798 pspp_reg_rc = CMD_FAILURE;
803 is_missing_case = xnmalloc (n_data, sizeof (*is_missing_case));
804 for (i = 0; i < n_data; i++)
805 is_missing_case[i] = 0;
807 n_indep = cmd.n_variables - cmd.n_dependent;
808 indep_vars = xnmalloc (n_indep, sizeof *indep_vars);
810 lopts.get_depvar_mean_std = 1;
811 lopts.get_indep_mean_std = xnmalloc (n_indep, sizeof (int));
814 Read from the active file. The first pass encodes categorical
815 variables and drops cases with missing values.
818 for (i = 0; i < cmd.n_variables; i++)
822 v = cmd.v_variables[i];
825 if (v->type == ALPHA)
827 /* Make a place to hold the binary vectors
828 corresponding to this variable's values. */
829 cat_stored_values_create (v);
831 n_data = mark_missing_cases (cf, v, is_missing_case, n_data);
836 Drop cases with missing values for any dependent variable.
839 for (i = 0; i < cmd.n_dependent; i++)
841 v = cmd.v_dependent[i];
843 n_data = mark_missing_cases (cf, v, is_missing_case, n_data);
846 for (k = 0; k < cmd.n_dependent; k++)
848 depvar = cmd.v_dependent[k];
849 Y = gsl_vector_alloc (n_data);
852 design_matrix_create (n_indep, (const struct variable **) indep_vars,
854 for (i = 0; i < X->m->size2; i++)
856 lopts.get_indep_mean_std[i] = 1;
858 lcache = pspp_linreg_cache_alloc (X->m->size1, X->m->size2);
859 lcache->indep_means = gsl_vector_alloc (X->m->size2);
860 lcache->indep_std = gsl_vector_alloc (X->m->size2);
861 lcache->depvar = (const struct variable *) depvar;
863 For large data sets, use QR decomposition.
865 if (n_data > sqrt (n_indep) && n_data > REG_LARGE_DATA)
867 lcache->method = PSPP_LINREG_SVD;
871 The second pass creates the design matrix.
874 for (r = casefile_get_reader (cf); casereader_read (r, &c);
876 /* Iterate over the cases. */
878 case_num = casereader_cnum (r) - 1;
879 if (!is_missing_case[case_num])
881 for (i = 0; i < cmd.n_variables; ++i) /* Iterate over the variables
882 for the current case.
885 v = cmd.v_variables[i];
886 val = case_data (&c, v->fv);
888 Independent/dependent variable separation. The
889 'variables' subcommand specifies a varlist which contains
890 both dependent and independent variables. The dependent
891 variables are specified with the 'dependent'
892 subcommand, and maybe also in the 'variables' subcommand.
893 We need to separate the two.
897 if (v->type == ALPHA)
899 design_matrix_set_categorical (X, row, v, val);
901 else if (v->type == NUMERIC)
903 design_matrix_set_numeric (X, row, v, val);
907 val = case_data (&c, depvar->fv);
908 gsl_vector_set (Y, row, val->f);
913 Now that we know the number of coefficients, allocate space
914 and store pointers to the variables that correspond to the
917 pspp_linreg_coeff_init (lcache, X);
920 Find the least-squares estimates and other statistics.
922 pspp_linreg ((const gsl_vector *) Y, X->m, &lopts, lcache);
923 subcommand_statistics (cmd.a_statistics, lcache);
924 subcommand_export (cmd.sbc_export, lcache);
926 design_matrix_destroy (X);
927 pspp_linreg_cache_free (lcache);
928 free (lopts.get_indep_mean_std);
929 casereader_destroy (r);
932 free (is_missing_case);