X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fregression.q;h=d5008b5681178b5890584ca08b20ee04169a6e31;hb=26791c51431aa1b848b6e3997d2402680513c448;hp=1f2ac2a6bac15051def19c85dc6b7e1af180dcde;hpb=dc78471910e82d59232ce9b137b7c4fc4992d174;p=pspp-builds.git diff --git a/src/language/stats/regression.q b/src/language/stats/regression.q index 1f2ac2a6..d5008b56 100644 --- a/src/language/stats/regression.q +++ b/src/language/stats/regression.q @@ -1,20 +1,18 @@ -/* PSPP - linear regression. +/* PSPP - a program for statistical analysis. Copyright (C) 2005 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include @@ -119,14 +117,14 @@ static size_t n_variables; /* File where the model will be saved if the EXPORT subcommand - is given. + is given. */ static struct file_handle *model_file; static bool run_regression (struct casereader *, struct cmd_regression *, - struct dataset *); + struct dataset *); -/* +/* STATISTICS subcommand output functions. */ static void reg_stats_r (pspp_linreg_cache *); @@ -454,8 +452,8 @@ statistics_keyword_output (void (*function) (pspp_linreg_cache *), static void subcommand_statistics (int *keywords, pspp_linreg_cache * c) { - /* - The order here must match the order in which the STATISTICS + /* + The order here must match the order in which the STATISTICS keywords appear in the specification section above. */ enum @@ -628,7 +626,7 @@ regression_trns_resid_proc (void *t_, struct ccase *c, return TRNS_CONTINUE; } -/* +/* Returns false if NAME is a duplicate of any existing variable name. */ static bool @@ -1014,7 +1012,8 @@ regression_custom_variables (struct lexer *lexer, struct dataset *ds, /* Identify the explanatory variables in v_variables. Returns the number of independent variables. */ static int -identify_indep_vars (const struct variable **indep_vars, const struct variable *depvar) +identify_indep_vars (const struct variable **indep_vars, + const struct variable *depvar) { int n_indep_vars = 0; int i; @@ -1030,8 +1029,8 @@ identify_indep_vars (const struct variable **indep_vars, const struct variable * Returns number of valid cases. */ static int prepare_categories (struct casereader *input, - const struct variable **vars, size_t n_vars, - struct moments_var *mom) + const struct variable **vars, size_t n_vars, + struct moments_var *mom) { int n_data; struct ccase c; @@ -1042,23 +1041,23 @@ prepare_categories (struct casereader *input, cat_stored_values_create (vars[i]); n_data = 0; - for (; casereader_read (input, &c); case_destroy (&c)) + for (; casereader_read (input, &c); case_destroy (&c)) { /* - The second condition ensures the program will run even if - there is only one variable to act as both explanatory and - response. + The second condition ensures the program will run even if + there is only one variable to act as both explanatory and + response. */ for (i = 0; i < n_vars; i++) - { - const union value *val = case_data (&c, vars[i]); - if (var_is_alpha (vars[i])) - cat_value_update (vars[i], val); - else - moments1_add (mom[i].m, val->f, 1.0); - } - n_data++; - } + { + const union value *val = case_data (&c, vars[i]); + if (var_is_alpha (vars[i])) + cat_value_update (vars[i], val); + else + moments1_add (mom[i].m, val->f, 1.0); + } + n_data++; + } casereader_destroy (input); return n_data; @@ -1108,7 +1107,7 @@ compute_moments (pspp_linreg_cache * c, struct moments_var *mom, static bool run_regression (struct casereader *input, struct cmd_regression *cmd, - struct dataset *ds) + struct dataset *ds) { size_t i; int n_indep = 0; @@ -1161,17 +1160,17 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, casenumber row; struct ccase c; size_t n_data; /* Number of valid cases. */ - + dep_var = cmd->v_dependent[k]; n_indep = identify_indep_vars (indep_vars, dep_var); reader = casereader_clone (input); reader = casereader_create_filter_missing (reader, indep_vars, n_indep, - MV_ANY, NULL); + MV_ANY, NULL); reader = casereader_create_filter_missing (reader, &dep_var, 1, - MV_ANY, NULL); - n_data = prepare_categories (casereader_clone (reader), - indep_vars, n_indep, mom); + MV_ANY, NULL); + n_data = prepare_categories (casereader_clone (reader), + indep_vars, n_indep, mom); if ((n_data > 0) && (n_indep > 0)) { @@ -1187,33 +1186,32 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, models[k] = pspp_linreg_cache_alloc (X->m->size1, X->m->size2); models[k]->indep_means = gsl_vector_alloc (X->m->size2); models[k]->indep_std = gsl_vector_alloc (X->m->size2); - models[k]->depvar = dep_var; - /* + models[k]->depvar = dep_var; + /* For large data sets, use QR decomposition. */ if (n_data > sqrt (n_indep) && n_data > REG_LARGE_DATA) { - models[k]->method = PSPP_LINREG_SVD; + models[k]->method = PSPP_LINREG_QR; } /* - The second pass fills the design matrix. - */ - reader = casereader_create_counter (reader, &row, -1); - for (; casereader_read (reader, &c); case_destroy (&c)) - { - for (i = 0; i < n_indep; ++i) - { - const struct variable *v = indep_vars[i]; - const union value *val = case_data (&c, v); - if (var_is_alpha (v)) - design_matrix_set_categorical (X, row, v, val); - else - design_matrix_set_numeric (X, row, v, val); - } - gsl_vector_set (Y, row, case_num (&c, dep_var)); - } - casereader_destroy (reader); + The second pass fills the design matrix. + */ + reader = casereader_create_counter (reader, &row, -1); + for (; casereader_read (reader, &c); case_destroy (&c)) + { + for (i = 0; i < n_indep; ++i) + { + const struct variable *v = indep_vars[i]; + const union value *val = case_data (&c, v); + if (var_is_alpha (v)) + design_matrix_set_categorical (X, row, v, val); + else + design_matrix_set_numeric (X, row, v, val); + } + gsl_vector_set (Y, row, case_num (&c, dep_var)); + } /* Now that we know the number of coefficients, allocate space and store pointers to the variables that correspond to the @@ -1221,25 +1219,27 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, */ coeff_init (models[k], X); - /* + /* Find the least-squares estimates and other statistics. */ pspp_linreg ((const gsl_vector *) Y, X->m, &lopts, models[k]); compute_moments (models[k], mom, X, n_variables); - if (!taint_has_tainted_successor (casereader_get_taint (input))) - { - subcommand_statistics (cmd->a_statistics, models[k]); - subcommand_export (cmd->sbc_export, models[k]); - } + if (!taint_has_tainted_successor (casereader_get_taint (input))) + { + subcommand_statistics (cmd->a_statistics, models[k]); + subcommand_export (cmd->sbc_export, models[k]); + } gsl_vector_free (Y); design_matrix_destroy (X); } else { - msg (SE, gettext ("No valid data found. This command was skipped.")); + msg (SE, + gettext ("No valid data found. This command was skipped.")); } + casereader_destroy (reader); } free (indep_vars); free (lopts.get_indep_mean_std); @@ -1249,7 +1249,7 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, } /* - Local Variables: + Local Variables: mode: c End: */