From d594340cb1ae007a92d094ae67116f9f622f2b5d Mon Sep 17 00:00:00 2001 From: Jason Stover Date: Mon, 26 Dec 2005 07:14:15 +0000 Subject: [PATCH 1/1] Added confidence and prediction intervals to model export --- src/reg_export_comments.h | 69 +++++++++++++++++++++++++++++++++++++++ src/regression.q | 40 ++++++++++++++++++++--- 2 files changed, 104 insertions(+), 5 deletions(-) diff --git a/src/reg_export_comments.h b/src/reg_export_comments.h index 212de4d1..396f3905 100644 --- a/src/reg_export_comments.h +++ b/src/reg_export_comments.h @@ -17,11 +17,17 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +/* + Exported C code for a regression model. The EXPORT subcommand causes PSPP + to save a model as a small C program. This file contains some of the code + of that saved program. + */ #ifndef REG_EXPORT_COMMENTS_H #define REG_EXPORT_COMMENTS_H const char reg_preamble[] = "/*\n This program contains functions which return estimates\n" " and confidence intervals for a linear model. The EXPORT subcommand\n" " of the REGRESSION procedure of GNU PSPP generated this program.\n*/\n\n"; + const char reg_mean_cmt[] = "/*\n Estimate the mean of Y, the dependent variable for\n" " the linear model of the form \n\n" " Y = b0 + b1 * X1 + b2 * X2 + ... + bk * Xk + error\n\n" @@ -30,4 +36,67 @@ const char reg_mean_cmt[] = "/*\n Estimate the mean of Y, the dependent varia " as known by PSPP, are stored in var_names. The estimated \n" " regression coefficients (i.e., the estimates of b0,...,bk) \n" " are stored in model_coeffs.\n*/\n"; + +const char reg_getvar[] = "{\n\t\tj = pspp_reg_getvar (var_names[i]);\n" +"\t\testimate += var_vals[j] * model_coeffs[j];\n" +"\t}\n\t\n\treturn estimate;\n}\n\n" +"/*\n Variance of an estimated mean of this form:\n\t" +"Y = b0 + b1 * X1 + ... + bk * Xk\n where X1,...Xk are the dependent variables," +" stored in\n var_vals and b0,...,bk are the estimated regression coefficients.\n*/\n" +"double\npspp_reg_variance (const double *var_vals, " +"const char *var_names[])\n{\n\t"; + +const char reg_export_t_quantiles_1[] = "/*\n Quantiles for the T distribution.\n*/\n" +"static int\npspp_reg_t_quantile " +"(double prob)\n{\n\n\tint i;\n\tdouble quantiles[] = {\n\t\t"; + +const char reg_export_t_quantiles_2[] = "i = (int) 100.0 * prob;\n\treturn quantiles[i];\n}\n"; + +const char reg_variance[] = "double result = 0.0;\n\n\tfor(i = 0; i < n_vars; i++)\n\t" +"{\n\t\tj = pspp_reg_getvar (var_names[i]);\n\t\t" +"unshuffled_vals[j] = var_vals[i];\n\t}\n\t" +"for (i = 0; i < n_vars; i++)\n\t" +"{\n\t\tresult += cov[i][i] * unshuffled_vals[i] * unshuffled_vals[i];\n\t\t" +"for (j = i + 1; j < n_vars; j++)\n\t\t{\n\t\t\t" +"result += 2.0 * cov[i][j] * unshuffled_vals[i] * unshuffled_vals[j];" +"\n\t\t}\n\t}\n\treturn result;\n}\n"; + +const char reg_export_confidence_interval[] = "/*\n Upper confidence limit for an " +"estimated mean b0 + b1 * X1 + ... + bk * Xk.\n The confidence interval is a " +"100 * p percent confidence interval.\n*/\n" +"double pspp_reg_confidence_interval_U " +"(const double *var_vals, const char *var_names[], double p)\n{\n\t" +"double result;\n\t" +"result = sqrt (pspp_reg_variance (var_vals, var_names);\n\treturn result;\n\t" +"result *= pspp_reg_t_quantile ((1.0 + p) / 2.0);\n\t" +"result += pspp_reg_estimate (var_vals, var_names);\n}\n" +"/*\n Lower confidence limit for an " +"estimated mean b0 + b1 * X1 + ... + bk * Xk.\n The confidence interval is a " +"100 * p percent confidence interval.\n*/\n" +"double pspp_reg_confidence_interval_L " +"(const double *var_vals, const char *var_names[], double p)\n{\n\t" +"double result;\n\t" +"result = -sqrt (pspp_reg_variance (var_vals, var_names);\n\treturn result;\n\t" +"result *= pspp_reg_t_quantile ((1.0 + p) / 2.0);\n\t" +"result += pspp_reg_estimate (var_vals, var_names);\n}\n"; + +const char reg_export_prediction_interval[] = "/*\n Upper prediction limit for a " +"predicted value b0 + b1 * X1 + ... + bk * Xk.\n The prediction interval is a " +"100 * p percent prediction interval.\n*/\n" +"double pspp_reg_prediction_interval_U " +"(const double *var_vals, const char *var_names[], double p)\n{\n\t" +"double result;\n\t" +"result = 1 + sqrt (pspp_reg_variance (var_vals, var_names);\n\treturn result;\n\t" +"result *= pspp_reg_t_quantile ((1.0 + p) / 2.0);\n\t" +"result += pspp_reg_estimate (var_vals, var_names);\n}\n" +"/*\n Lower prediction limit for a " +"predicted value b0 + b1 * X1 + ... + bk * Xk.\n The prediction interval is a " +"100 * p percent prediction interval.\n*/\n" +"double pspp_reg_prediction_interval_L " +"(const double *var_vals, const char *var_names[], double p)\n{\n\t" +"double result;\n\t" +"result = -1.0 - sqrt (pspp_reg_variance (var_vals, var_names);\n\treturn result;\n\t" +"result *= pspp_reg_t_quantile ((1.0 + p) / 2.0);\n\t" +"result += pspp_reg_estimate (var_vals, var_names);\n}\n"; + #endif diff --git a/src/regression.q b/src/regression.q index bf99a5fb..90ce53c1 100644 --- a/src/regression.q +++ b/src/regression.q @@ -502,6 +502,10 @@ subcommand_export (int export, pspp_linreg_cache *c) { FILE *fp; size_t i; + size_t j; + int n_quantiles = 100; + double increment; + double tmp; struct pspp_linreg_coeff coeff; if (export) @@ -513,6 +517,15 @@ subcommand_export (int export, pspp_linreg_cache *c) fprintf (fp, "%s", reg_preamble); fprintf (fp, "#include \n\n"); reg_print_getvar (fp, c); + fprintf (fp, "%s", reg_export_t_quantiles_1); + increment = 0.5 / (double) increment; + for (i = 0; i < n_quantiles - 1; i++) + { + tmp = 0.5 + 0.005 * (double) i; + fprintf (fp, "%.15e,\n\t\t", gsl_cdf_tdist_Pinv (tmp, c->n_obs - c->n_indeps)); + } + fprintf (fp, "%.15e};\n\t", gsl_cdf_tdist_Pinv (.9995, c->n_obs - c->n_indeps)); + fprintf (fp, "%s", reg_export_t_quantiles_2); fprintf (fp, "%s", reg_mean_cmt); fprintf (fp, "double\npspp_reg_estimate (const double *var_vals,"); fprintf (fp, "const char *var_names[])\n{\n\t"); @@ -528,11 +541,28 @@ subcommand_export (int export, pspp_linreg_cache *c) fprintf (fp, "double estimate = %.15e;\n\t", coeff.estimate); fprintf (fp, "int i;\n\tint j;\n\n\t"); fprintf (fp, "for (i = 0; i < %d; i++)\n\t", c->n_indeps); - fprintf (fp, "{\n\t\tj = pspp_reg_getvar (var_names[i]);\n"); - fprintf (fp, "\t\testimate += var_vals[j] * model_coeffs[j];\n"); - fprintf (fp, "\t}\n\t\n\treturn estimate;\n}\n\n"); - fprintf (fp, "double\npspp_reg_standard_error (const double *var_vals,"); - fprintf (fp, "const char *var_names[])\n{\n\t"); + fprintf (fp, "%s", reg_getvar); + fprintf (fp, "const double cov[%d][%d] = {\n\t",c->n_indeps, c->n_indeps); + for (i = 0; i < c->cov->size1 - 1; i++) + { + fprintf (fp, "{"); + for (j = 0; j < c->cov->size2 - 1; j++) + { + fprintf (fp, "%.15e, ", gsl_matrix_get (c->cov, i, j)); + } + fprintf (fp, "%.15e},\n\t", gsl_matrix_get (c->cov, i, j)); + } + fprintf (fp, "{"); + for (j = 0; j < c->cov->size2 - 1; j++) + { + fprintf (fp, "%.15e, ", gsl_matrix_get (c->cov, c->cov->size1 - 1, j)); + } + fprintf (fp, "%.15e}\n\t", gsl_matrix_get (c->cov, c->cov->size1 - 1, c->cov->size2 - 1)); + fprintf (fp, "};\n\tint n_vars = %d;\n\tint i;\n\tint j;\n\t", c->n_indeps); + fprintf (fp, "double unshuffled_vals[%d];\n\t",c->n_indeps); + fprintf (fp, "%s", reg_variance); + fprintf (fp, "%s", reg_export_confidence_interval); + fprintf (fp, "%s", reg_export_prediction_interval); fclose (fp); } } -- 2.30.2