X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fregression.q;h=4ef12c98479ec2aac3b0711aaedef9a7410899c5;hb=refs%2Fbuilds%2F20111003030502%2Fpspp;hp=6dc10821e5788a14753e6c6ad7060b2c2b03f6f1;hpb=8f69116a2c31b8aed0527e225c368b3466da6c8d;p=pspp diff --git a/src/language/stats/regression.q b/src/language/stats/regression.q index 6dc10821e5..4ef12c9847 100644 --- a/src/language/stats/regression.q +++ b/src/language/stats/regression.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2005, 2009 Free Software Foundation, Inc. + Copyright (C) 2005, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,28 +21,30 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xalloc.h" + +#include "data/case.h" +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/missing-values.h" +#include "data/transformations.h" +#include "data/value-labels.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/data-io/file-handle.h" +#include "language/dictionary/split-file.h" +#include "language/lexer/lexer.h" +#include "libpspp/compiler.h" +#include "libpspp/message.h" +#include "libpspp/taint.h" +#include "math/covariance.h" +#include "math/linreg.h" +#include "math/moments.h" +#include "output/tab.h" + +#include "gl/intprops.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -206,7 +208,7 @@ reg_stats_coeff (linreg * c, void *aux_) tab_double (t, 4, 1, 0, 0.0, NULL); t_stat = linreg_intercept (c) / std_err; tab_double (t, 5, 1, 0, t_stat, NULL); - pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), 1.0); + pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), (double) (linreg_n_obs (c) - linreg_n_coeffs (c))); tab_double (t, 6, 1, 0, pval, NULL); for (j = 0; j < linreg_n_coeffs (c); j++) { @@ -602,29 +604,19 @@ regression_trns_resid_proc (void *t_, struct ccase **c, return TRNS_CONTINUE; } -/* - Returns false if NAME is a duplicate of any existing variable name. -*/ -static bool -try_name (const struct dictionary *dict, const char *name) -{ - if (dict_lookup_var (dict, name) != NULL) - return false; - - return true; -} - -static void -reg_get_name (const struct dictionary *dict, char name[VAR_NAME_LEN], - const char prefix[VAR_NAME_LEN]) +static char * +reg_get_name (const struct dictionary *dict, const char *prefix) { - int i = 1; + char *name; + int i; - snprintf (name, VAR_NAME_LEN, "%s%d", prefix, i); - while (!try_name (dict, name)) + /* XXX handle too-long prefixes */ + name = xmalloc (strlen (prefix) + INT_BUFSIZE_BOUND (i) + 1); + for (i = 1; ; i++) { - i++; - snprintf (name, VAR_NAME_LEN, "%s%d", prefix, i); + sprintf (name, "%s%d", prefix, i); + if (dict_lookup_var (dict, name) == NULL) + return name; } } @@ -634,7 +626,7 @@ reg_save_var (struct dataset *ds, const char *prefix, trns_proc_func * f, { struct dictionary *dict = dataset_dict (ds); static int trns_index = 1; - char name[VAR_NAME_LEN]; + char *name; struct variable *new_var; struct reg_trns *t = NULL; @@ -642,9 +634,11 @@ reg_save_var (struct dataset *ds, const char *prefix, trns_proc_func * f, t->trns_id = trns_index; t->n_trns = n_trns; t->c = c; - reg_get_name (dict, name, prefix); - new_var = dict_create_var (dict, name, 0); - assert (new_var != NULL); + + name = reg_get_name (dict, prefix); + new_var = dict_create_var_assert (dict, name, 0); + free (name); + *v = new_var; add_transformation (ds, f, regression_trns_free, t); trns_index++; @@ -752,10 +746,10 @@ regression_custom_variables (struct lexer *lexer, struct dataset *ds, { const struct dictionary *dict = dataset_dict (ds); - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if ((lex_token (lexer) != T_ID - || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + || dict_lookup_var (dict, lex_tokcstr (lexer)) == NULL) && lex_token (lexer) != T_ALL) return 2; @@ -798,6 +792,7 @@ identify_indep_vars (const struct variable **indep_vars, } return n_indep_vars; } + static double fill_covariance (gsl_matrix *cov, struct covariance *all_cov, const struct variable **vars, @@ -810,12 +805,15 @@ fill_covariance (gsl_matrix *cov, struct covariance *all_cov, size_t dep_subscript; size_t *rows; const gsl_matrix *ssizes; - const gsl_matrix *cm; const gsl_matrix *mean_matrix; const gsl_matrix *ssize_matrix; double result = 0.0; - cm = covariance_calculate_unnormalized (all_cov); + gsl_matrix *cm = covariance_calculate_unnormalized (all_cov); + + if ( cm == NULL) + return 0; + rows = xnmalloc (cov->size1 - 1, sizeof (*rows)); for (i = 0; i < n_all_vars; i++) @@ -862,6 +860,7 @@ fill_covariance (gsl_matrix *cov, struct covariance *all_cov, gsl_matrix_set (cov, cov->size1 - 1, cov->size1 - 1, gsl_matrix_get (cm, dep_subscript, dep_subscript)); free (rows); + gsl_matrix_free (cm); return result; } static size_t @@ -928,7 +927,7 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, const struct variable *dep_var; struct casereader *reader; const struct dictionary *dict; - gsl_matrix *this_cm; + size_t n_all_vars; assert (models != NULL); @@ -955,7 +954,7 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, { dict_get_vars (dict, &v_variables, &n_variables, 0); } - size_t n_all_vars = get_n_all_vars (cmd); + n_all_vars = get_n_all_vars (cmd); all_vars = xnmalloc (n_all_vars, sizeof (*all_vars)); fill_all_vars (all_vars, cmd); vars = xnmalloc (n_variables, sizeof (*vars)); @@ -973,6 +972,7 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, for (k = 0; k < cmd->n_dependent; k++) { + gsl_matrix *this_cm; dep_var = cmd->v_dependent[k]; n_indep = identify_indep_vars (vars, dep_var); @@ -1014,6 +1014,7 @@ run_regression (struct casereader *input, struct cmd_regression *cmd, linreg_free (models[k]); models[k] = NULL; } + gsl_matrix_free (this_cm); } casereader_destroy (reader);