Changes from 0.10.2 to 0.10.4:
+ * The REGRESSION command now has a /ORIGIN subcommand to perform
+ regression through the origin.
+
* The FACTOR command can now analyse matrix files prepared with MATRIX DATA.
* The MATRIX DATA command has been added.
/VARIABLES=@var{var_list}
/DEPENDENT=@var{var_list}
/STATISTICS=@{ALL, DEFAULTS, R, COEFF, ANOVA, BCOV, CI[@var{conf}]@}
+ @{ /ORIGIN | /NOORIGIN @}
/SAVE=@{PRED, RESID@}
@end display
or if it is specified without any parameters.
@end table
+The @subcmd{ORIGIN} and @subcmd{NOORIGIN} subcommands are mutually
+exclusive. @subcmd{ORIGIN} indicates that the regression should be
+performed through the origin. You should use this option if, and
+only if you have reason to believe that the regression does indeed
+pass through the origin --- that is to say, the value @math{b_0} above,
+is zero. The default is @subcmd{NOORIGIN}.
+
The @subcmd{SAVE} subcommand causes @pspp{} to save the residuals or predicted
values from the fitted
model to the active dataset. @pspp{} will store the residuals in a variable
gsl_matrix *corr_matrix = NULL;
struct covariance *cov = covariance_2pass_create (corr->n_vars_total, corr->vars,
NULL,
- opts->wv, opts->exclude);
+ opts->wv, opts->exclude,
+ true);
struct casereader *rc = casereader_clone (r);
for ( ; (c = casereader_read (r) ); case_unref (c))
struct idata *idata = idata_alloc (factor->n_vars);
idata->cvm = covariance_1pass_create (factor->n_vars, factor->vars,
- factor->wv, factor->exclude);
+ factor->wv, factor->exclude, true);
for ( ; (c = casereader_read (r) ); case_unref (c))
{
cmd->wv, cmd->exclude, MV_ANY);
cov = covariance_2pass_create (cmd->n_dep_vars, cmd->dep_vars,
- ws.cats, cmd->wv, cmd->exclude);
+ ws.cats, cmd->wv, cmd->exclude, true);
c = casereader_peek (input, 0);
ws.vws[v].cov = covariance_2pass_create (1, &cmd->vars[v],
ws.vws[v].cat,
- cmd->wv, cmd->exclude);
+ cmd->wv, cmd->exclude, true);
ws.vws[v].nl = levene_create (var_get_width (cmd->indep_var), NULL);
}
/* PSPP - a program for statistical analysis.
- Copyright (C) 2005, 2009, 2010, 2011, 2012, 2013, 2014, 2016 Free Software Foundation, Inc.
+ Copyright (C) 2005, 2009, 2010, 2011, 2012, 2013, 2014,
+ 2016, 2017 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
bool resid;
bool pred;
+
+ bool origin;
};
struct regression_workspace
regression.resid = false;
regression.ds = ds;
+ regression.origin = false;
bool variables_seen = false;
bool method_seen = false;
PV_NO_DUPLICATE | PV_NUMERIC))
goto error;
}
+ else if (lex_match_id (lexer, "ORIGIN"))
+ {
+ regression.origin = true;
+ }
+ else if (lex_match_id (lexer, "NOORIGIN"))
+ {
+ regression.origin = false;
+ }
else if (lex_match_id (lexer, "METHOD"))
{
method_seen = true;
fill_all_vars (all_vars, cmd);
cov = covariance_1pass_create (n_all_vars, all_vars,
dict_get_weight (dataset_dict (cmd->ds)),
- MV_ANY);
+ MV_ANY, cmd->origin == false);
reader = casereader_clone (input);
reader = casereader_create_filter_missing (reader, all_vars, n_all_vars,
gsl_matrix *this_cm = gsl_matrix_alloc (n_indep + 1, n_indep + 1);
double n_data = fill_covariance (this_cm, cov, vars, n_indep,
dep_var, all_vars, n_all_vars, means);
- models[k] = linreg_alloc (dep_var, vars, n_data, n_indep);
+ models[k] = linreg_alloc (dep_var, vars, n_data, n_indep, cmd->origin);
models[k]->depvar = dep_var;
for (i = 0; i < n_indep; i++)
{
int n_cols = 7;
const int heading_rows = 2;
int n_rows;
- int this_row;
- double t_stat;
+ int this_row = heading_rows;
double pval;
double std_err;
double beta;
tab_text (t, 4, 1, TAB_CENTER | TAT_TITLE, _("Beta"));
tab_text (t, 5, 1, TAB_CENTER | TAT_TITLE, _("t"));
tab_text (t, 6, 1, TAB_CENTER | TAT_TITLE, _("Sig."));
- tab_text (t, 1, heading_rows, TAB_LEFT | TAT_TITLE, _("(Constant)"));
- tab_double (t, 2, heading_rows, 0, linreg_intercept (c), NULL, RC_OTHER);
+
std_err = sqrt (gsl_matrix_get (linreg_cov (c), 0, 0));
if (cmd->stats & STATS_CI)
tab_text (t, 7, 1, TAB_CENTER | TAT_TITLE, _("Lower Bound"));
tab_text (t, 8, 1, TAB_CENTER | TAT_TITLE, _("Upper Bound"));
}
- tab_double (t, 3, heading_rows, 0, std_err, NULL, RC_OTHER);
- tab_double (t, 4, heading_rows, 0, 0.0, NULL, RC_OTHER);
- t_stat = linreg_intercept (c) / std_err;
- tab_double (t, 5, heading_rows, 0, t_stat, NULL, RC_OTHER);
- pval =
- 2 * gsl_cdf_tdist_Q (fabs (t_stat),
- (double) (linreg_n_obs (c) - linreg_n_coeffs (c)));
- tab_double (t, 6, heading_rows, 0, pval, NULL, RC_PVALUE);
-
- for (j = 0; j < linreg_n_coeffs (c); j++)
+
+ if (!cmd->origin)
+ {
+ tab_text (t, 1, this_row, TAB_LEFT | TAT_TITLE, _("(Constant)"));
+ tab_double (t, 2, this_row, 0, linreg_intercept (c), NULL, RC_OTHER);
+ tab_double (t, 3, this_row, 0, std_err, NULL, RC_OTHER);
+ tab_double (t, 4, this_row, 0, 0.0, NULL, RC_OTHER);
+ double t_stat = linreg_intercept (c) / std_err;
+ tab_double (t, 5, this_row, 0, t_stat, NULL, RC_OTHER);
+
+ double pval =
+ 2 * gsl_cdf_tdist_Q (fabs (t_stat),
+ (double) (linreg_n_obs (c) - linreg_n_coeffs (c)));
+ tab_double (t, 6, this_row, 0, pval, NULL, RC_PVALUE);
+ this_row++;
+ }
+
+ for (j = 0; j < linreg_n_coeffs (c); j++, this_row++)
{
struct string tstr;
ds_init_empty (&tstr);
- this_row = j + heading_rows + 1;
v = linreg_indep_var (c, j);
label = var_to_string (v);
/*
Test statistic for H0: coefficient is 0.
*/
- t_stat = linreg_coeff (c, j) / std_err;
+ double t_stat = linreg_coeff (c, j) / std_err;
tab_double (t, 5, this_row, 0, t_stat, NULL, RC_OTHER);
/*
P values for the test statistic above.
struct covariance
{
+ /* True if the covariances are centerered. (ie Real covariances) */
+ bool centered;
+
/* The variables for which the covariance matrix is to be calculated. */
size_t n_vars;
const struct variable *const *vars;
*/
struct covariance *
covariance_1pass_create (size_t n_vars, const struct variable *const *vars,
- const struct variable *weight, enum mv_class exclude)
+ const struct variable *weight, enum mv_class exclude,
+ bool centered)
{
size_t i;
struct covariance *cov = xzalloc (sizeof *cov);
+ cov->centered = centered;
cov->passes = 1;
cov->state = 0;
cov->pass_one_first_case_seen = cov->pass_two_first_case_seen = false;
struct covariance *
covariance_2pass_create (size_t n_vars, const struct variable *const *vars,
struct categoricals *cats,
- const struct variable *wv, enum mv_class exclude)
+ const struct variable *wv, enum mv_class exclude,
+ bool centered)
{
size_t i;
struct covariance *cov = xmalloc (sizeof *cov);
+ cov->centered = centered;
cov->passes = 2;
cov->state = 0;
cov->pass_one_first_case_seen = cov->pass_two_first_case_seen = false;
}
}
- /* Centre the moments */
- for ( j = 0 ; j < cov->dim - 1; ++j)
+ if (cov->centered)
{
- for (i = j + 1 ; i < cov->dim; ++i)
+ /* Centre the moments */
+ for ( j = 0 ; j < cov->dim - 1; ++j)
{
- double *x = &cov->cm [cm_idx (cov, i, j)];
+ for (i = j + 1 ; i < cov->dim; ++i)
+ {
+ double *x = &cov->cm [cm_idx (cov, i, j)];
- *x /= gsl_matrix_get (cov->moments[0], i, j);
+ *x /= gsl_matrix_get (cov->moments[0], i, j);
- *x -=
- gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
- *
- gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i);
+ *x -=
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
+ *
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i);
+ }
}
}
{
size_t i, j;
- for (i = 0 ; i < cov->dim; ++i)
+ if (cov->centered)
{
- for (j = 0 ; j < cov->dim; ++j)
+ for (i = 0 ; i < cov->dim; ++i)
{
- double *x = gsl_matrix_ptr (cov->moments[MOMENT_VARIANCE], i, j);
- *x -= pow2 (gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j))
- / gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
+ for (j = 0 ; j < cov->dim; ++j)
+ {
+ double *x = gsl_matrix_ptr (cov->moments[MOMENT_VARIANCE], i, j);
+ *x -= pow2 (gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j))
+ / gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
+ }
}
- }
- for ( j = 0 ; j < cov->dim - 1; ++j)
- {
- for (i = j + 1 ; i < cov->dim; ++i)
+
+ for ( j = 0 ; j < cov->dim - 1; ++j)
{
- double *x = &cov->cm [cm_idx (cov, i, j)];
+ for (i = j + 1 ; i < cov->dim; ++i)
+ {
+ double *x = &cov->cm [cm_idx (cov, i, j)];
- *x -=
- gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
- *
- gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i)
- / gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
+ *x -=
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], i, j)
+ *
+ gsl_matrix_get (cov->moments[MOMENT_MEAN], j, i)
+ / gsl_matrix_get (cov->moments[MOMENT_NONE], i, j);
+ }
}
}
-
+
return cm_to_gsl (cov);
}
struct categoricals;
struct covariance * covariance_1pass_create (size_t n_vars, const struct variable *const *vars,
- const struct variable *wv, enum mv_class excl);
+ const struct variable *wv, enum mv_class excl, bool centered);
struct covariance *
covariance_2pass_create (size_t n_vars, const struct variable *const *vars,
struct categoricals *cats,
- const struct variable *wv, enum mv_class excl);
+ const struct variable *wv, enum mv_class excl, bool centered);
void covariance_accumulate (struct covariance *, const struct ccase *);
void covariance_accumulate_pass1 (struct covariance *, const struct ccase *);
/* PSPP - a program for statistical analysis.
- Copyright (C) 2005, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2005, 2010, 2011, 2017 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
*/
linreg *
linreg_alloc (const struct variable *depvar, const struct variable **indep_vars,
- double n, size_t p)
+ double n, size_t p, bool origin)
{
linreg *c;
size_t i;
c->n_coeffs = p;
c->coeff = xnmalloc (p, sizeof (*c->coeff));
c->cov = gsl_matrix_calloc (c->n_coeffs + 1, c->n_coeffs + 1);
- c->dft = n - 1;
+ c->dft = n;
+ if (!origin)
+ c->dft--;
+
c->dfm = p;
c->dfe = c->dft - c->dfm;
c->intercept = 0.0;
c->refcnt = 1;
+ c->origin = origin;
+
return c;
}
static void
post_sweep_computations (linreg *l, gsl_matrix *sw)
{
- gsl_matrix *xm;
- gsl_matrix_view xtx;
- gsl_matrix_view xmxtx;
double m;
size_t i;
size_t j;
double tmp = -1.0 * l->mse * gsl_matrix_get (sw, i, j);
gsl_matrix_set (l->cov, i + 1, j + 1, tmp);
}
- /*
- Get the covariances related to the intercept.
- */
- xtx = gsl_matrix_submatrix (sw, 0, 0, l->n_indeps, l->n_indeps);
- xmxtx = gsl_matrix_submatrix (l->cov, 0, 1, 1, l->n_indeps);
- xm = gsl_matrix_calloc (1, l->n_indeps);
- for (i = 0; i < xm->size2; i++)
- {
- gsl_matrix_set (xm, 0, i,
- linreg_get_indep_variable_mean (l, i));
- }
- rc = gsl_blas_dsymm (CblasRight, CblasUpper, l->mse,
- &xtx.matrix, xm, 0.0, &xmxtx.matrix);
- gsl_matrix_free (xm);
- if (rc == GSL_SUCCESS)
+
+ if (! l->origin)
{
- double tmp = l->mse / l->n_obs;
- for (i = 1; i < 1 + l->n_indeps; i++)
+ gsl_matrix *xm;
+ gsl_matrix_view xtx;
+ gsl_matrix_view xmxtx;
+ /*
+ Get the covariances related to the intercept.
+ */
+ xtx = gsl_matrix_submatrix (sw, 0, 0, l->n_indeps, l->n_indeps);
+ xmxtx = gsl_matrix_submatrix (l->cov, 0, 1, 1, l->n_indeps);
+ xm = gsl_matrix_calloc (1, l->n_indeps);
+ for (i = 0; i < xm->size2; i++)
{
- tmp -= gsl_matrix_get (l->cov, 0, i)
- * linreg_get_indep_variable_mean (l, i - 1);
+ gsl_matrix_set (xm, 0, i,
+ linreg_get_indep_variable_mean (l, i));
+ }
+ rc = gsl_blas_dsymm (CblasRight, CblasUpper, l->mse,
+ &xtx.matrix, xm, 0.0, &xmxtx.matrix);
+ gsl_matrix_free (xm);
+ if (rc == GSL_SUCCESS)
+ {
+ double tmp = l->mse / l->n_obs;
+ for (i = 1; i < 1 + l->n_indeps; i++)
+ {
+ tmp -= gsl_matrix_get (l->cov, 0, i)
+ * linreg_get_indep_variable_mean (l, i - 1);
+ }
+ gsl_matrix_set (l->cov, 0, 0, tmp);
+
+ l->intercept = m;
+ }
+ else
+ {
+ fprintf (stderr, "%s:%d:gsl_blas_dsymm: %s\n",
+ __FILE__, __LINE__, gsl_strerror (rc));
+ exit (rc);
}
- gsl_matrix_set (l->cov, 0, 0, tmp);
-
- l->intercept = m;
- }
- else
- {
- fprintf (stderr, "%s:%d:gsl_blas_dsymm: %s\n",
- __FILE__, __LINE__, gsl_strerror (rc));
- exit (rc);
}
}
int dependent_column; /* Column containing the dependent variable. Defaults to last column. */
int refcnt;
+
+ bool origin;
};
typedef struct linreg_struct linreg;
linreg *linreg_alloc (const struct variable *, const struct variable **,
- double, size_t);
+ double, size_t, bool);
void linreg_unref (linreg *);
void linreg_ref (linreg *);
])
AT_CLEANUP
+
+
+
+AT_SETUP([LINEAR REGRESSION /ORIGIN])
+AT_DATA([regression-origin.sps], [dnl
+SET FORMAT=F10.3.
+
+DATA LIST notable LIST /number * value *.
+BEGIN DATA
+ 16 7.25
+ 0 .00
+ 1 .10
+ 9 27.9
+ 0 .00
+ 7 3.65
+ 14 16.8
+ 24 9.15
+ 0 .00
+ 24 19.0
+ 7 4.05
+ 12 7.90
+ 6 .75
+ 11 1.40
+ 0 .00
+ 3 2.30
+ 12 7.60
+ 11 6.80
+ 16 8.65
+END DATA.
+
+REGRESSION
+ /STATISTICS COEFF R ANOVA
+ /DEPENDENT value
+ /ORIGIN
+ /METHOD=ENTER number.
+])
+
+
+AT_CHECK([pspp -O format=csv regression-origin.sps], [0], [dnl
+Table: Model Summary (value)
+,R,R Square,Adjusted R Square,Std. Error of the Estimate
+,.802,.643,.622,6.032
+
+Table: ANOVA (value)
+,,Sum of Squares,df,Mean Square,F,Sig.
+,Regression,1181.726,1,1181.726,32.475,.000
+,Residual,654.989,18,36.388,,
+,Total,1836.715,19,,,
+
+Table: Coefficients (value)
+,,Unstandardized Coefficients,,Standardized Coefficients,,
+,,B,Std. Error,Beta,t,Sig.
+,number,.672,.118,.802,5.699,.000
+,,,,,,
+])
+
+AT_CLEANUP