From 3d9f94e464bd3b760898914304d16cc9c3990f11 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Tue, 16 Oct 2012 20:41:44 +0200 Subject: [PATCH] First attempt at a LOGISTIC REGRESSION command --- NEWS | 1 + doc/statistics.texi | 77 ++- src/language/command.def | 2 +- src/language/stats/automake.mk | 1 + src/language/stats/logistic.c | 1029 ++++++++++++++++++++++++++++++ tests/automake.mk | 1 + tests/language/stats/logistic.at | 291 +++++++++ 7 files changed, 1400 insertions(+), 2 deletions(-) create mode 100644 src/language/stats/logistic.c create mode 100644 tests/language/stats/logistic.at diff --git a/NEWS b/NEWS index 71ba057c00..5302e26542 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,7 @@ Changes from 0.6.2 to 0.7.9: - DATASET DECLARE - DATASET DISPLAY - DATASET NAME + - LOGISTIC REGRESSION - MATCH FILES - MEANS - MRSETS diff --git a/doc/statistics.texi b/doc/statistics.texi index 35c47eea2c..6e8b5c67a4 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -10,7 +10,8 @@ far. * EXAMINE:: Testing data for normality. * CORRELATIONS:: Correlation tables. * CROSSTABS:: Crosstabulation tables. -* FACTOR:: Factor analysis and Principal Components analysis +* FACTOR:: Factor analysis and Principal Components analysis. +* LOGISTIC REGRESSION:: Bivariate Logistic Regression. * MEANS:: Average values and other statistics. * NPAR TESTS:: Nonparametric tests. * T-TEST:: Test hypotheses about means. @@ -723,6 +724,80 @@ If @subcmd{PAIRWISE} is set, then a case is considered missing only if either of values for the particular coefficient are missing. The default is @subcmd{LISTWISE}. +@node LOGISTIC REGRESSION +@section LOGISTIC REGRESSION + +@vindex LOGISTIC REGRESSION +@cindex logistic regression +@cindex bivariate logistic regression + +@display +LOGISTIC REGRESSION [VARIABLES =] @var{dependent_var} WITH @var{var_list} + + [@{/NOCONST | /ORIGIN | /NOORIGIN @}] + + [/PRINT = [SUMMARY] [DEFAULT] [CI(@var{confidence})] [ALL]] + + [/CRITERIA = [BCON(@var{min_delta})] [ITERATE(@var{max_interations})] + [LCON(@var{min_likelihood_delta})] [EPS(@var{min_epsilon})]] + + [/MISSING = @{INCLUDE|EXCLUDE@}] +@end display + +Bivariate Logistic Regression is used when you want to explain a dichotomous dependent +variable in terms of one or more predictor variables. + +The minimum command is +@example +LOGISTIC REGRESSION @var{y} WITH @var{x1} @var{x2} @dots{} @var{xn}. +@end example +Here, @var{y} is the dependent variable, which must be dichotomous and @var{x1} @dots{} @var{xn} +are the predictor variables whose coefficients the procedure estimates. + +By default, a constant term is included in the model. +Hence, the full model is +@math{ +{\bf y} += b_0 + b_1 {\bf x_1} ++ b_2 {\bf x_2} ++ \dots ++ b_n {\bf x_n} +} +If you want a model without the constant term @math{b_0}, use the keyword @subcmd{/ORIGIN}. +@subcmd{/NOCONST} is a synonym for @subcmd{/ORIGIN}. + +An iterative Newton-Raphson procedure is used to fit the model. +The @subcmd{/CRITERIA} subcommand is used to specify the stopping criteria of the procedure. +During iterations, if any one of the stopping criteria are satisfied, the procedure is +considered complete. +The criteria are: +@itemize +@item The number of iterations exceeds @var{max_iterations}. + The default value of @var{max_iterations} is 20. +@item The change in the all coefficient estimates are less than @var{min_delta}. +The default value of @var{min_delta} is 0.001. +@item The magnitude of change in the likelihood estimate is less than @var{min_likelihood_delta}. +The default value of @var{min_delta} is zero. +This means that this criterion is disabled. +@item The differential of the estimated probability for all cases is less than @var{min_epsilon}. +In other words, the probabilities are close to zero or one. +The default value of @var{min_epsilon} is 0.00000001. +@end itemize + +The @subcmd{PRINT} subcommand controls the display of optional statistics. +Currently there is one such option, @subcmd{CI}, which indicates that the +confidence interval of the odds ratio should be displayed as well as its value. +@subcmd{CI} should be followed by an integer in parentheses, to indicate the +confidence level of the desired confidence interval. + +The @subcmd{MISSING} subcommand determines the handling of missing +variables. +If @subcmd{INCLUDE} is set, then user-missing values are included in the +calculations, but system-missing values are not. +If @subcmd{EXCLUDE} is set, which is the default, user-missing +values are excluded as well as system-missing values. +This is the default. + @node MEANS @section MEANS diff --git a/src/language/command.def b/src/language/command.def index 05b18d6c61..90ee34888f 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -121,6 +121,7 @@ DEF_CMD (S_DATA, 0, "FLIP", cmd_flip) DEF_CMD (S_DATA, 0, "FREQUENCIES", cmd_frequencies) DEF_CMD (S_DATA, 0, "GLM", cmd_glm) DEF_CMD (S_DATA, 0, "LIST", cmd_list) +DEF_CMD (S_DATA, 0, "LOGISTIC REGRESSION", cmd_logistic) DEF_CMD (S_DATA, 0, "MEANS", cmd_means) DEF_CMD (S_DATA, 0, "MODIFY VARS", cmd_modify_vars) DEF_CMD (S_DATA, 0, "NPAR TESTS", cmd_npar_tests) @@ -198,7 +199,6 @@ UNIMPL_CMD ("IGRAPH", "Interactive graphs") UNIMPL_CMD ("INFO", "Local Documentation") UNIMPL_CMD ("KEYED DATA LIST", "Read nonsequential data") UNIMPL_CMD ("KM", "Kaplan-Meier") -UNIMPL_CMD ("LOGISTIC REGRESSION", "Regression Analysis") UNIMPL_CMD ("LOGLINEAR", "General model fitting") UNIMPL_CMD ("MANOVA", "Multivariate analysis of variance") UNIMPL_CMD ("MAPS", "Geographical display") diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk index 8e89d014b9..7bcb4d911a 100644 --- a/src/language/stats/automake.mk +++ b/src/language/stats/automake.mk @@ -30,6 +30,7 @@ language_stats_sources = \ src/language/stats/kruskal-wallis.h \ src/language/stats/ks-one-sample.c \ src/language/stats/ks-one-sample.h \ + src/language/stats/logistic.c \ src/language/stats/jonckheere-terpstra.c \ src/language/stats/jonckheere-terpstra.h \ src/language/stats/mann-whitney.c \ diff --git a/src/language/stats/logistic.c b/src/language/stats/logistic.c new file mode 100644 index 0000000000..1a9e313477 --- /dev/null +++ b/src/language/stats/logistic.c @@ -0,0 +1,1029 @@ +/* pspp - a program for statistical analysis. + Copyright (C) 2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + +/* + References: + 1. "Coding Logistic Regression with Newton-Raphson", James McCaffrey + http://msdn.microsoft.com/en-us/magazine/jj618304.aspx + + 2. "SPSS Statistical Algorithms" Chapter LOGISTIC REGRESSION Algorithms + + + The Newton Raphson method finds successive approximations to $\bf b$ where + approximation ${\bf b}_t$ is (hopefully) better than the previous ${\bf b}_{t-1}$. + + $ {\bf b}_t = {\bf b}_{t -1} + ({\bf X}^T{\bf W}_{t-1}{\bf X})^{-1}{\bf X}^T({\bf y} - {\bf \pi}_{t-1})$ + where: + + $\bf X$ is the $n \times p$ design matrix, $n$ being the number of cases, + $p$ the number of parameters, \par + $\bf W$ is the diagonal matrix whose diagonal elements are + $\hat{\pi}_0(1 - \hat{\pi}_0), \, \hat{\pi}_1(1 - \hat{\pi}_2)\dots \hat{\pi}_{n-1}(1 - \hat{\pi}_{n-1})$ + \par + +*/ + +#include + +#include + +#include +#include +#include +#include +#include + +#include "data/case.h" +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/value.h" +#include "language/command.h" +#include "language/dictionary/split-file.h" +#include "language/lexer/lexer.h" +#include "language/lexer/value-parser.h" +#include "language/lexer/variable-parser.h" +#include "libpspp/assertion.h" +#include "libpspp/ll.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "math/categoricals.h" +#include "output/tab.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + + + + +#define PRINT_EACH_STEP 0x01 +#define PRINT_SUMMARY 0x02 +#define PRINT_CORR 0x04 +#define PRINT_ITER 0x08 +#define PRINT_GOODFIT 0x10 +#define PRINT_CI 0x20 + + +#define PRINT_DEFAULT (PRINT_SUMMARY | PRINT_EACH_STEP) + +/* + The constant parameters of the procedure. + That is, those which are set by the user. +*/ +struct lr_spec +{ + /* The dependent variable */ + const struct variable *dep_var; + + size_t n_predictor_vars; + const struct variable **predictor_vars; + + /* Which classes of missing vars are to be excluded */ + enum mv_class exclude; + + /* The weight variable */ + const struct variable *wv; + + const struct dictionary *dict; + + /* True iff the constant (intercept) is to be included in the model */ + bool constant; + + /* Ths maximum number of iterations */ + int max_iter; + + /* Other iteration limiting conditions */ + double bcon; + double min_epsilon; + double lcon; + + /* The confidence interval (in percent) */ + int confidence; + + /* What results should be presented */ + unsigned int print; + + double cut_point; +}; + +/* The results and intermediate result of the procedure. + These are mutated as the procedure runs. Used for + temporary variables etc. +*/ +struct lr_result +{ + bool warn_bad_weight; + + + /* The two values of the dependent variable. */ + union value y0; + union value y1; + + + /* The sum of caseweights */ + double cc; + + casenumber n_missing; + casenumber n_nonmissing; +}; + + +/* + Convert INPUT into a dichotomous scalar. For simple cases, this is a 1:1 mapping + The return value is always either 0 or 1 +*/ +static double +map_dependent_var (const struct lr_spec *cmd, const struct lr_result *res, const union value *input) +{ + int width = var_get_width (cmd->dep_var); + if (value_equal (input, &res->y0, width)) + return 0; + + if (value_equal (input, &res->y1, width)) + return 1; + + NOT_REACHED (); + + return SYSMIS; +} + + + +static void output_depvarmap (const struct lr_spec *cmd, const struct lr_result *); + +static void output_variables (const struct lr_spec *cmd, + const gsl_vector *, + const gsl_vector *); + +static void output_model_summary (const struct lr_result *, + double initial_likelihood, double likelihood); + +static void case_processing_summary (const struct lr_result *); + + +/* + Return the probability estimator (that is the estimator of logit(y) ) + corresponding to the coefficient estimator beta_hat for case C +*/ +static double +pi_hat (const struct lr_spec *cmd, + const gsl_vector *beta_hat, + const struct variable **x, size_t n_x, + const struct ccase *c) +{ + int v0; + double pi = 0; + for (v0 = 0; v0 < n_x; ++v0) + { + pi += gsl_vector_get (beta_hat, v0) * + case_data (c, x[v0])->f; + } + + if (cmd->constant) + pi += gsl_vector_get (beta_hat, beta_hat->size - 1); + + pi = 1.0 / (1.0 + exp(-pi)); + + return pi; +} + + +/* + Calculates the Hessian matrix X' V X, + where: X is the n by N_X matrix comprising the n cases in INPUT + V is a diagonal matrix { (pi_hat_0)(1 - pi_hat_0), (pi_hat_1)(1 - pi_hat_1), ... (pi_hat_{N-1})(1 - pi_hat_{N-1})} + (the partial derivative of the predicted values) + + If ALL predicted values derivatives are close to zero or one, then CONVERGED + will be set to true. +*/ +static gsl_matrix * +hessian (const struct lr_spec *cmd, + struct lr_result *res, + struct casereader *input, + const struct variable **x, size_t n_x, + const gsl_vector *beta_hat, + bool *converged + ) +{ + struct casereader *reader; + struct ccase *c; + gsl_matrix *output = gsl_matrix_calloc (beta_hat->size, beta_hat->size); + + double max_w = -DBL_MAX; + + for (reader = casereader_clone (input); + (c = casereader_read (reader)) != NULL; case_unref (c)) + { + int v0, v1; + double pi = pi_hat (cmd, beta_hat, x, n_x, c); + + double weight = dict_get_case_weight (cmd->dict, c, &res->warn_bad_weight); + double w = pi * (1 - pi); + if (w > max_w) + max_w = w; + w *= weight; + + for (v0 = 0; v0 < beta_hat->size; ++v0) + { + double in0 = v0 < n_x ? case_data (c, x[v0])->f : 1.0; + for (v1 = 0; v1 < beta_hat->size; ++v1) + { + double in1 = v1 < n_x ? case_data (c, x[v1])->f : 1.0 ; + double *o = gsl_matrix_ptr (output, v0, v1); + *o += in0 * w * in1; + } + } + } + casereader_destroy (reader); + + + if ( max_w < cmd->min_epsilon) + { + *converged = true; + msg (MN, _("All predicted values are either 1 or 0")); + } + + return output; +} + + +/* Calculates the value X' (y - pi) + where X is the design model, + y is the vector of observed independent variables + pi is the vector of estimates for y + + As a side effect, the likelihood is stored in LIKELIHOOD +*/ +static gsl_vector * +xt_times_y_pi (const struct lr_spec *cmd, + struct lr_result *res, + struct casereader *input, + const struct variable **x, size_t n_x, + const struct variable *y_var, + const gsl_vector *beta_hat, + double *likelihood) +{ + struct casereader *reader; + struct ccase *c; + gsl_vector *output = gsl_vector_calloc (beta_hat->size); + + *likelihood = 1.0; + for (reader = casereader_clone (input); + (c = casereader_read (reader)) != NULL; case_unref (c)) + { + int v0; + double pi = pi_hat (cmd, beta_hat, x, n_x, c); + double weight = dict_get_case_weight (cmd->dict, c, &res->warn_bad_weight); + + + double y = map_dependent_var (cmd, res, case_data (c, y_var)); + + *likelihood *= pow (pi, weight * y) * pow (1 - pi, weight * (1 - y)); + + for (v0 = 0; v0 < beta_hat->size; ++v0) + { + double in0 = v0 < n_x ? case_data (c, x[v0])->f : 1.0; + double *o = gsl_vector_ptr (output, v0); + *o += in0 * (y - pi) * weight; + } + } + + casereader_destroy (reader); + + return output; +} + + +/* + Makes an initial pass though the data, checks that the dependent variable is + dichotomous, and calculates necessary initial values. + + Returns an initial value for \hat\beta the vector of estimators of \beta +*/ +static gsl_vector * +beta_hat_initial (const struct lr_spec *cmd, struct lr_result *res, struct casereader *input) +{ + const int width = var_get_width (cmd->dep_var); + + struct ccase *c; + struct casereader *reader; + gsl_vector *b0 ; + double sum; + double sumA = 0.0; + double sumB = 0.0; + + bool v0set = false; + bool v1set = false; + + size_t n_coefficients = cmd->n_predictor_vars; + if (cmd->constant) + n_coefficients++; + + b0 = gsl_vector_calloc (n_coefficients); + + res->cc = 0; + for (reader = casereader_clone (input); + (c = casereader_read (reader)) != NULL; case_unref (c)) + { + int v; + bool missing = false; + double weight = dict_get_case_weight (cmd->dict, c, &res->warn_bad_weight); + const union value *depval = case_data (c, cmd->dep_var); + + for (v = 0; v < cmd->n_predictor_vars; ++v) + { + const union value *val = case_data (c, cmd->predictor_vars[v]); + if (var_is_value_missing (cmd->predictor_vars[v], val, cmd->exclude)) + { + missing = true; + break; + } + } + + if (missing) + { + res->n_missing++; + continue; + } + + res->n_nonmissing++; + + if (!v0set) + { + value_clone (&res->y0, depval, width); + v0set = true; + } + else if (!v1set) + { + if ( !value_equal (&res->y0, depval, width)) + { + value_clone (&res->y1, depval, width); + v1set = true; + } + } + else + { + if (! value_equal (&res->y0, depval, width) + && + ! value_equal (&res->y1, depval, width) + ) + { + msg (ME, _("Dependent variable's values are not dichotomous.")); + goto error; + } + } + + if (value_equal (&res->y0, depval, width)) + sumA += weight; + + if (value_equal (&res->y1, depval, width)) + sumB += weight; + + + res->cc += weight; + } + casereader_destroy (reader); + + sum = sumB; + + /* Ensure that Y0 is less than Y1. Otherwise the mapping gets + inverted, which is confusing to users */ + if (var_is_numeric (cmd->dep_var) && value_compare_3way (&res->y0, &res->y1, width) > 0) + { + union value tmp; + value_clone (&tmp, &res->y0, width); + value_copy (&res->y0, &res->y1, width); + value_copy (&res->y1, &tmp, width); + value_destroy (&tmp, width); + sum = sumA; + } + + if ( cmd->constant) + { + double mean = sum / res->cc; + gsl_vector_set (b0, b0->size - 1, log (mean / (1 - mean))); + } + + return b0; + + error: + casereader_destroy (reader); + return NULL; +} + + + +static bool +run_lr (const struct lr_spec *cmd, struct casereader *input, + const struct dataset *ds UNUSED) +{ + int i,j; + + gsl_vector *beta_hat; + gsl_vector *se ; + + bool converged = false; + double likelihood; + double prev_likelihood = -1; + double initial_likelihood ; + + struct lr_result work; + work.n_missing = 0; + work.n_nonmissing = 0; + work.warn_bad_weight = true; + + + /* Get the initial estimates of \beta and their standard errors */ + beta_hat = beta_hat_initial (cmd, &work, input); + if (NULL == beta_hat) + return false; + + output_depvarmap (cmd, &work); + + se = gsl_vector_alloc (beta_hat->size); + + case_processing_summary (&work); + + + input = casereader_create_filter_missing (input, + cmd->predictor_vars, + cmd->n_predictor_vars, + cmd->exclude, + NULL, + NULL); + + + /* Start the Newton Raphson iteration process... */ + for( i = 0 ; i < cmd->max_iter ; ++i) + { + double min, max; + gsl_matrix *m ; + gsl_vector *v ; + + m = hessian (cmd, &work, input, + cmd->predictor_vars, cmd->n_predictor_vars, + beta_hat, + &converged); + + gsl_linalg_cholesky_decomp (m); + gsl_linalg_cholesky_invert (m); + + v = xt_times_y_pi (cmd, &work, input, + cmd->predictor_vars, cmd->n_predictor_vars, + cmd->dep_var, + beta_hat, + &likelihood); + + { + /* delta = M.v */ + gsl_vector *delta = gsl_vector_alloc (v->size); + gsl_blas_dgemv (CblasNoTrans, 1.0, m, v, 0, delta); + gsl_vector_free (v); + + for (j = 0; j < se->size; ++j) + { + double *ptr = gsl_vector_ptr (se, j); + *ptr = gsl_matrix_get (m, j, j); + } + + gsl_matrix_free (m); + + gsl_vector_add (beta_hat, delta); + + gsl_vector_minmax (delta, &min, &max); + + if ( fabs (min) < cmd->bcon && fabs (max) < cmd->bcon) + { + msg (MN, _("Estimation terminated at iteration number %d because parameter estimates changed by less than %g"), + i + 1, cmd->bcon); + converged = true; + } + + gsl_vector_free (delta); + } + + if ( prev_likelihood >= 0) + { + if (-log (likelihood) > -(1.0 - cmd->lcon) * log (prev_likelihood)) + { + msg (MN, _("Estimation terminated at iteration number %d because Log Likelihood decreased by less than %g%%"), i + 1, 100 * cmd->lcon); + converged = true; + } + } + if (i == 0) + initial_likelihood = likelihood; + prev_likelihood = likelihood; + + if (converged) + break; + } + casereader_destroy (input); + + for (i = 0; i < se->size; ++i) + { + double *ptr = gsl_vector_ptr (se, i); + *ptr = sqrt (*ptr); + } + + output_model_summary (&work, initial_likelihood, likelihood); + output_variables (cmd, beta_hat, se); + + gsl_vector_free (beta_hat); + gsl_vector_free (se); + + return true; +} + +/* Parse the LOGISTIC REGRESSION command syntax */ +int +cmd_logistic (struct lexer *lexer, struct dataset *ds) +{ + struct lr_spec lr; + lr.dict = dataset_dict (ds); + lr.n_predictor_vars = 0; + lr.predictor_vars = NULL; + lr.exclude = MV_ANY; + lr.wv = dict_get_weight (lr.dict); + lr.max_iter = 20; + lr.lcon = 0.0000; + lr.bcon = 0.001; + lr.min_epsilon = 0.00000001; + lr.cut_point = 0.5; + lr.constant = true; + lr.confidence = 95; + lr.print = PRINT_DEFAULT; + + + if (lex_match_id (lexer, "VARIABLES")) + lex_match (lexer, T_EQUALS); + + if (! (lr.dep_var = parse_variable_const (lexer, lr.dict))) + goto error; + + lex_force_match (lexer, T_WITH); + + if (!parse_variables_const (lexer, lr.dict, + &lr.predictor_vars, &lr.n_predictor_vars, + PV_NO_DUPLICATE | PV_NUMERIC)) + goto error; + + + while (lex_token (lexer) != T_ENDCMD) + { + lex_match (lexer, T_SLASH); + + if (lex_match_id (lexer, "MISSING")) + { + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD + && lex_token (lexer) != T_SLASH) + { + if (lex_match_id (lexer, "INCLUDE")) + { + lr.exclude = MV_SYSTEM; + } + else if (lex_match_id (lexer, "EXCLUDE")) + { + lr.exclude = MV_ANY; + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + } + else if (lex_match_id (lexer, "ORIGIN")) + { + lr.constant = false; + } + else if (lex_match_id (lexer, "NOORIGIN")) + { + lr.constant = true; + } + else if (lex_match_id (lexer, "NOCONST")) + { + lr.constant = false; + } + else if (lex_match_id (lexer, "EXTERNAL")) + { + /* This is for compatibility. It does nothing */ + } + else if (lex_match_id (lexer, "PRINT")) + { + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) + { + if (lex_match_id (lexer, "DEFAULT")) + { + lr.print |= PRINT_DEFAULT; + } + else if (lex_match_id (lexer, "SUMMARY")) + { + lr.print |= PRINT_SUMMARY; + } +#if 0 + else if (lex_match_id (lexer, "CORR")) + { + lr.print |= PRINT_CORR; + } + else if (lex_match_id (lexer, "ITER")) + { + lr.print |= PRINT_ITER; + } + else if (lex_match_id (lexer, "GOODFIT")) + { + lr.print |= PRINT_GOODFIT; + } +#endif + else if (lex_match_id (lexer, "CI")) + { + lr.print |= PRINT_CI; + if (lex_force_match (lexer, T_LPAREN)) + { + if (! lex_force_int (lexer)) + { + lex_error (lexer, NULL); + goto error; + } + lr.confidence = lex_integer (lexer); + lex_get (lexer); + if ( ! lex_force_match (lexer, T_RPAREN)) + { + lex_error (lexer, NULL); + goto error; + } + } + } + else if (lex_match_id (lexer, "ALL")) + { + lr.print = ~0x0000; + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + } + else if (lex_match_id (lexer, "CRITERIA")) + { + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) + { + if (lex_match_id (lexer, "BCON")) + { + if (lex_force_match (lexer, T_LPAREN)) + { + if (! lex_force_num (lexer)) + { + lex_error (lexer, NULL); + goto error; + } + lr.bcon = lex_number (lexer); + lex_get (lexer); + if ( ! lex_force_match (lexer, T_RPAREN)) + { + lex_error (lexer, NULL); + goto error; + } + } + } + else if (lex_match_id (lexer, "ITERATE")) + { + if (lex_force_match (lexer, T_LPAREN)) + { + if (! lex_force_int (lexer)) + { + lex_error (lexer, NULL); + goto error; + } + lr.max_iter = lex_integer (lexer); + lex_get (lexer); + if ( ! lex_force_match (lexer, T_RPAREN)) + { + lex_error (lexer, NULL); + goto error; + } + } + } + else if (lex_match_id (lexer, "LCON")) + { + if (lex_force_match (lexer, T_LPAREN)) + { + if (! lex_force_num (lexer)) + { + lex_error (lexer, NULL); + goto error; + } + lr.lcon = lex_number (lexer); + lex_get (lexer); + if ( ! lex_force_match (lexer, T_RPAREN)) + { + lex_error (lexer, NULL); + goto error; + } + } + } + else if (lex_match_id (lexer, "EPS")) + { + if (lex_force_match (lexer, T_LPAREN)) + { + if (! lex_force_num (lexer)) + { + lex_error (lexer, NULL); + goto error; + } + lr.min_epsilon = lex_number (lexer); + lex_get (lexer); + if ( ! lex_force_match (lexer, T_RPAREN)) + { + lex_error (lexer, NULL); + goto error; + } + } + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + + + + { + struct casegrouper *grouper; + struct casereader *group; + bool ok; + + grouper = casegrouper_create_splits (proc_open (ds), lr.dict); + while (casegrouper_get_next_group (grouper, &group)) + ok = run_lr (&lr, group, ds); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + } + + free (lr.predictor_vars); + return CMD_SUCCESS; + + error: + + free (lr.predictor_vars); + return CMD_FAILURE; +} + + + + +/* Show the Dependent Variable Encoding box. + This indicates how the dependent variable + is mapped to the internal zero/one values. +*/ +static void +output_depvarmap (const struct lr_spec *cmd, const struct lr_result *res) +{ + const int heading_columns = 0; + const int heading_rows = 1; + struct tab_table *t; + struct string str; + + const int nc = 2; + int nr = heading_rows + 2; + + t = tab_create (nc, nr); + tab_title (t, _("Dependent Variable Encoding")); + + tab_headers (t, heading_columns, 0, heading_rows, 0); + + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1); + + tab_hline (t, TAL_2, 0, nc - 1, heading_rows); + tab_vline (t, TAL_2, heading_columns, 0, nr - 1); + + tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Original Value")); + tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Internal Value")); + + + + ds_init_empty (&str); + var_append_value_name (cmd->dep_var, &res->y0, &str); + tab_text (t, 0, 0 + heading_rows, 0, ds_cstr (&str)); + + ds_clear (&str); + var_append_value_name (cmd->dep_var, &res->y1, &str); + tab_text (t, 0, 1 + heading_rows, 0, ds_cstr (&str)); + + + tab_double (t, 1, 0 + heading_rows, 0, map_dependent_var (cmd, res, &res->y0), &F_8_0); + tab_double (t, 1, 1 + heading_rows, 0, map_dependent_var (cmd, res, &res->y1), &F_8_0); + ds_destroy (&str); + + tab_submit (t); +} + + +/* Show the Variables in the Equation box */ +static void +output_variables (const struct lr_spec *cmd, + const gsl_vector *beta, + const gsl_vector *se) +{ + int row = 0; + const int heading_columns = 1; + int heading_rows = 1; + struct tab_table *t; + + int idx; + int n_rows = cmd->n_predictor_vars; + + int nc = 8; + int nr ; + if (cmd->print & PRINT_CI) + { + nc += 2; + heading_rows += 1; + row++; + } + nr = heading_rows + cmd->n_predictor_vars; + if (cmd->constant) + nr++; + + t = tab_create (nc, nr); + tab_title (t, _("Variables in the Equation")); + + tab_headers (t, heading_columns, 0, heading_rows, 0); + + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1); + + tab_hline (t, TAL_2, 0, nc - 1, heading_rows); + tab_vline (t, TAL_2, heading_columns, 0, nr - 1); + + tab_text (t, 0, row + 1, TAB_CENTER | TAT_TITLE, _("Step 1")); + + tab_text (t, 2, row, TAB_CENTER | TAT_TITLE, _("B")); + tab_text (t, 3, row, TAB_CENTER | TAT_TITLE, _("S.E.")); + tab_text (t, 4, row, TAB_CENTER | TAT_TITLE, _("Wald")); + tab_text (t, 5, row, TAB_CENTER | TAT_TITLE, _("df")); + tab_text (t, 6, row, TAB_CENTER | TAT_TITLE, _("Sig.")); + tab_text (t, 7, row, TAB_CENTER | TAT_TITLE, _("Exp(B)")); + + if (cmd->print & PRINT_CI) + { + tab_joint_text_format (t, 8, 0, 9, 0, + TAB_CENTER | TAT_TITLE, _("%d%% CI for Exp(B)"), cmd->confidence); + + tab_text (t, 8, row, TAB_CENTER | TAT_TITLE, _("Lower")); + tab_text (t, 9, row, TAB_CENTER | TAT_TITLE, _("Upper")); + } + + if (cmd->constant) + n_rows++; + + for (idx = 0 ; idx < n_rows; ++idx) + { + const int r = idx + heading_rows; + + const double b = gsl_vector_get (beta, idx); + const double sigma = gsl_vector_get (se, idx); + const double wald = pow2 (b / sigma); + const double df = 1; + + if (idx < cmd->n_predictor_vars) + tab_text (t, 1, r, TAB_LEFT | TAT_TITLE, + var_to_string (cmd->predictor_vars[idx])); + + tab_double (t, 2, r, 0, b, 0); + tab_double (t, 3, r, 0, sigma, 0); + tab_double (t, 4, r, 0, wald, 0); + tab_double (t, 5, r, 0, df, &F_8_0); + tab_double (t, 6, r, 0, gsl_cdf_chisq_Q (wald, df), 0); + tab_double (t, 7, r, 0, exp (b), 0); + + if (cmd->print & PRINT_CI) + { + double wc = gsl_cdf_ugaussian_Pinv (0.5 + cmd->confidence / 200.0); + wc *= sigma; + + if (idx < cmd->n_predictor_vars) + { + tab_double (t, 8, r, 0, exp (b - wc), 0); + tab_double (t, 9, r, 0, exp (b + wc), 0); + } + } + } + + if ( cmd->constant) + tab_text (t, 1, nr - 1, TAB_LEFT | TAT_TITLE, _("Constant")); + + tab_submit (t); +} + + +/* Show the model summary box */ +static void +output_model_summary (const struct lr_result *res, + double initial_likelihood, double likelihood) +{ + const int heading_columns = 0; + const int heading_rows = 1; + struct tab_table *t; + + const int nc = 4; + int nr = heading_rows + 1; + double cox; + + t = tab_create (nc, nr); + tab_title (t, _("Model Summary")); + + tab_headers (t, heading_columns, 0, heading_rows, 0); + + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1); + + tab_hline (t, TAL_2, 0, nc - 1, heading_rows); + tab_vline (t, TAL_2, heading_columns, 0, nr - 1); + + tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("Step 1")); + tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("-2 Log likelihood")); + tab_double (t, 1, 1, 0, -2 * log (likelihood), 0); + + + tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Cox & Snell R Square")); + cox = 1.0 - pow (initial_likelihood /likelihood, 2 / res->cc); + tab_double (t, 2, 1, 0, cox, 0); + + tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Nagelkerke R Square")); + tab_double (t, 3, 1, 0, cox / ( 1.0 - pow (initial_likelihood, 2 / res->cc)), 0); + + + tab_submit (t); +} + +/* Show the case processing summary box */ +static void +case_processing_summary (const struct lr_result *res) +{ + const int heading_columns = 1; + const int heading_rows = 1; + struct tab_table *t; + + const int nc = 3; + const int nr = heading_rows + 3; + casenumber total; + + t = tab_create (nc, nr); + tab_title (t, _("Case Processing Summary")); + + tab_headers (t, heading_columns, 0, heading_rows, 0); + + tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1); + + tab_hline (t, TAL_2, 0, nc - 1, heading_rows); + tab_vline (t, TAL_2, heading_columns, 0, nr - 1); + + tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("Unweighted Cases")); + tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Percent")); + + + tab_text (t, 0, 1, TAB_LEFT | TAT_TITLE, _("Included in Analysis")); + tab_text (t, 0, 2, TAB_LEFT | TAT_TITLE, _("Missing Cases")); + tab_text (t, 0, 3, TAB_LEFT | TAT_TITLE, _("Total")); + + tab_double (t, 1, 1, 0, res->n_nonmissing, &F_8_0); + tab_double (t, 1, 2, 0, res->n_missing, &F_8_0); + + total = res->n_nonmissing + res->n_missing; + tab_double (t, 1, 3, 0, total , &F_8_0); + + tab_double (t, 2, 1, 0, 100 * res->n_nonmissing / (double) total, 0); + tab_double (t, 2, 2, 0, 100 * res->n_missing / (double) total, 0); + tab_double (t, 2, 3, 0, 100 * total / (double) total, 0); + + tab_submit (t); +} + diff --git a/tests/automake.mk b/tests/automake.mk index b8e4c2dd19..6c4cef20aa 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -293,6 +293,7 @@ TESTSUITE_AT = \ tests/language/stats/flip.at \ tests/language/stats/frequencies.at \ tests/language/stats/glm.at \ + tests/language/stats/logistic.at \ tests/language/stats/means.at \ tests/language/stats/npar.at \ tests/language/stats/oneway.at \ diff --git a/tests/language/stats/logistic.at b/tests/language/stats/logistic.at new file mode 100644 index 0000000000..8903c2069d --- /dev/null +++ b/tests/language/stats/logistic.at @@ -0,0 +1,291 @@ +AT_BANNER([LOGISTIC REGRESSION]) + +dnl These examples are adapted from +dnl http://www.uvm.edu/~dhowell/gradstat/psych341/lectures/Logistic%20Regression/LogisticReg1.html + + + +m4_define([LOGIT_TEST_DATA], + [AT_DATA([lr-data.txt], dnl + 105.00 1.00 33.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1 + 106.00 1.00 50.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1 + 107.00 1.00 91.00 3.00 2.00 .28 15.00 7.00 .00897 -1.00905 1 + 108.00 1.00 90.00 3.00 2.00 .20 2.00 2.00 .00972 -1.00982 1 + 109.00 1.00 70.00 3.00 3.00 .38 23.00 27.00 .04745 -1.04981 1 + 111.00 2.00 31.00 2.00 2.00 .00 19.00 10.00 .54159 1.84640 1 + 112.00 1.00 91.00 2.00 3.00 .18 6.00 16.00 .00897 -1.00905 1 + 113.00 1.00 81.00 3.00 2.00 .00 3.00 9.00 .01998 -1.02039 1 + 114.00 2.00 15.00 1.00 2.00 .13 19.00 13.00 .81241 1.23090 1 + 116.00 2.00 1.00 1.00 2.00 .88 15.00 7.00 .93102 1.07410 1 + 117.00 1.00 93.00 3.00 2.00 .18 9.00 15.00 .00764 -1.00770 1 + 118.00 2.00 14.00 1.00 3.00 .15 23.00 18.00 .82447 1.21289 1 + 120.00 1.00 91.00 2.00 2.00 .43 17.00 14.00 .00897 -1.00905 1 + 121.00 1.00 55.00 3.00 2.00 .69 20.00 14.00 .14409 -1.16834 1 + 122.00 1.00 70.00 2.00 3.00 .03 .00 6.00 .04745 -1.04981 1 + 123.00 1.00 25.00 2.00 2.00 .45 4.00 10.00 .65789 -2.92301 1 + 125.00 1.00 91.00 2.00 2.00 .13 .00 3.00 .00897 -1.00905 1 + 126.00 1.00 91.00 3.00 3.00 .23 4.00 6.00 .00897 -1.00905 1 + 127.00 1.00 91.00 3.00 2.00 .00 8.00 8.00 .00897 -1.00905 1 + 128.00 2.00 13.00 2.00 2.00 .65 16.00 14.00 .83592 1.19629 1 + 129.00 1.00 50.00 2.00 2.00 .25 20.00 23.00 .20168 -1.25264 1 + 135.00 1.00 90.00 3.00 3.00 .03 5.00 12.00 .00972 -1.00982 1 + 138.00 1.00 70.00 3.00 3.00 .10 1.00 6.00 .04745 -1.04981 1 + 139.00 2.00 19.00 3.00 3.00 .10 11.00 12.00 .75787 1.31949 1 + 149.00 2.00 50.00 3.00 2.00 .03 .00 .00 .20168 4.95826 1 + 204.00 1.00 50.00 3.00 1.00 .13 .00 1.00 .20168 -1.25264 1 + 205.00 1.00 91.00 3.00 3.00 .72 16.00 18.00 .00897 -1.00905 1 + 206.00 2.00 24.00 1.00 1.00 .10 5.00 21.00 .67592 1.47947 1 + 207.00 1.00 80.00 3.00 3.00 .13 6.00 7.00 .02164 -1.02212 1 + 208.00 1.00 87.00 2.00 2.00 .18 9.00 20.00 .01237 -1.01253 1 + 209.00 1.00 70.00 2.00 2.00 .53 15.00 12.00 .04745 -1.04981 1 + 211.00 1.00 55.00 2.00 1.00 .33 8.00 5.00 .14409 -1.16834 1 + 212.00 1.00 56.00 3.00 1.00 .30 6.00 20.00 .13436 -1.15522 1 + 214.00 1.00 54.00 2.00 2.00 .15 .00 16.00 .15439 -1.18258 1 + 215.00 1.00 71.00 3.00 3.00 .35 12.00 12.00 .04391 -1.04592 1 + 217.00 2.00 36.00 1.00 1.00 .10 12.00 8.00 .44049 2.27020 1 + 218.00 1.00 91.00 2.00 2.00 .05 11.00 25.00 .00897 -1.00905 1 + 219.00 1.00 91.00 2.00 2.00 1.23 11.00 24.00 .00897 -1.00905 1 + 220.00 1.00 91.00 2.00 3.00 .08 8.00 11.00 .00897 -1.00905 1 + 221.00 1.00 91.00 2.00 2.00 .33 5.00 11.00 .00897 -1.00905 1 + 222.00 2.00 36.00 2.00 1.00 .18 5.00 3.00 .44049 2.27020 1 + 223.00 1.00 70.00 2.00 3.00 .18 14.00 3.00 .04745 -1.04981 1 + 224.00 1.00 91.00 2.00 2.00 .43 2.00 10.00 .00897 -1.00905 1 + 225.00 1.00 55.00 2.00 1.00 .18 6.00 11.00 .14409 -1.16834 1 + 229.00 2.00 75.00 2.00 2.00 .40 30.00 25.00 .03212 31.12941 1 + 232.00 1.00 91.00 3.00 2.00 .15 6.00 3.00 .00897 -1.00905 1 + 233.00 1.00 70.00 2.00 1.00 .00 11.00 8.00 .04745 -1.04981 1 + 234.00 1.00 54.00 3.00 2.00 .10 .00 .00 .15439 -1.18258 1 + 237.00 1.00 70.00 3.00 2.00 .18 5.00 25.00 .04745 -1.04981 1 + 241.00 1.00 19.00 2.00 3.00 .33 13.00 9.00 .75787 -4.12995 1 + 304.00 2.00 18.00 2.00 2.00 .26 25.00 6.00 .77245 1.29458 1 + 305.00 1.00 88.00 3.00 2.00 1.35 17.00 29.00 .01142 -1.01155 1 + 306.00 1.00 70.00 2.00 3.00 .63 14.00 33.00 .04745 -1.04981 1 + 307.00 1.00 85.00 2.00 2.00 2.65 18.00 14.00 .01452 -1.01474 1 + 308.00 1.00 13.00 2.00 2.00 .23 5.00 5.00 .83592 -6.09442 1 + 309.00 2.00 13.00 2.00 2.00 .23 7.00 17.00 .83592 1.19629 1 + 311.00 2.00 1.00 2.00 2.00 .50 20.00 14.00 .93102 1.07410 1 + 315.00 1.00 19.00 2.00 3.00 .18 1.00 11.00 .75787 -4.12995 1 + 316.00 1.00 88.00 2.00 2.00 .38 12.00 11.00 .01142 -1.01155 2 + 318.00 1.00 88.00 3.00 2.00 .03 5.00 5.00 .01142 -1.01155 3 + 319.00 2.00 18.00 2.00 3.00 .30 15.00 16.00 .77245 1.29458 1 + 321.00 2.00 15.00 2.00 2.00 .63 15.00 18.00 .81241 1.23090 1 + 322.00 1.00 88.00 3.00 2.00 .40 18.00 15.00 .01142 -1.01155 1 + 325.00 2.00 18.00 2.00 3.00 1.00 28.00 18.00 .77245 1.29458 1 + 329.00 1.00 88.00 3.00 2.00 .03 7.00 11.00 .01142 -1.01155 4 + 332.00 2.00 2.00 2.00 2.00 .05 8.00 9.00 .92562 1.08036 1 +)]) + +dnl Note: In the above data cases 305, 316 318 and 329 have identical values +dnl of the 2nd and 3rd variables. We use this for weight testing. + +AT_SETUP([LOGISTIC REGRESSION basic test]) + +LOGIT_TEST_DATA + +AT_DATA([lr-data.sps], [dnl +set format = F12.3. +set decimal dot. +data list notable file='lr-data.txt' + list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. + +logistic regression + variables = outcome with survrate + . +]) + +AT_CHECK([pspp -O format=csv lr-data.sps], [0], + [dnl +Table: Dependent Variable Encoding +Original Value,Internal Value +1.000,0 +2.000,1 + +Table: Case Processing Summary +Unweighted Cases,N,Percent +Included in Analysis,66,100.000 +Missing Cases,0,.000 +Total,66,100.000 + +note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001 + +Table: Model Summary +Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square +,37.323,.455,.659 + +Table: Variables in the Equation +,,B,S.E.,Wald,df,Sig.,Exp(B) +Step 1,survrate,-.081,.019,17.756,1,.000,.922 +,Constant,2.684,.811,10.941,1,.001,14.639 +]) + + +AT_CLEANUP + +AT_SETUP([LOGISTIC REGRESSION missing values]) + +LOGIT_TEST_DATA + +AT_DATA([lr-data.sps], [dnl +set format = F12.3. +set decimal dot. +data list notable file='lr-data.txt' + list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. + +missing values survrate (999) avoid (44444). + +logistic regression + variables = outcome with survrate avoid + . +]) + +AT_CHECK([pspp -O format=csv lr-data.sps > run0], [0], [ignore]) + +cat >> lr-data.txt << HERE + 105.00 1.00 999.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1 + 106.00 1.00 999.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1 + 107.00 1.00 5.00 3.00 2.00 .28 44444 34 .00897 -1.00905 1 +HERE + +AT_CHECK([pspp -O format=csv lr-data.sps > run1], [0], [ignore]) + +dnl Only the summary information should be different +AT_CHECK([diff run0 run1], [1], [dnl +8,10c8,10 +< Included in Analysis,66,100.000 +< Missing Cases,0,.000 +< Total,66,100.000 +--- +> Included in Analysis,66,95.652 +> Missing Cases,3,4.348 +> Total,69,100.000 +]) + +AT_CLEANUP + + + +dnl Check that a weighted dataset is interpreted correctly +dnl To do this, the same data set is used, one weighted, one not. +dnl The weighted dataset omits certain cases which are identical +AT_SETUP([LOGISTIC REGRESSION weights]) + +LOGIT_TEST_DATA + +AT_DATA([lr-data-unweighted.sps], [dnl +set format = F12.3. +set decimal dot. +data list notable file='lr-data.txt' + list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. + +logistic regression + variables = outcome with survrate + . +]) + +AT_DATA([lr-data-weighted.sps], [dnl +set format = F12.3. +set decimal dot. +data list notable file='lr-data.txt' + list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *. + +weight by w. + +* Omit duplicate cases. +select if id <> 305 and id <> 316 and id <> 318. + +logistic regression + variables = outcome with survrate + . +]) + + +AT_CHECK([pspp -O format=csv lr-data-unweighted.sps > unweighted-result], [0], [ignore]) +AT_CHECK([pspp -O format=csv lr-data-weighted.sps > weighted-result], [0], [ignore]) + +dnl The only difference should be the summary information, since +dnl this displays the unweighted totals. +AT_CHECK([diff unweighted-result weighted-result], [1], [dnl +8c8 +< Included in Analysis,66,100.000 +--- +> Included in Analysis,63,100.000 +10c10 +< Total,66,100.000 +--- +> Total,63,100.000 +]) + + +AT_CLEANUP + + +dnl Check that the /NOCONST option works as intended. +dnl The results this produces are very similar to those +dnl at the example in http://www.ats.ucla.edu/stat/SPSS/faq/logregconst.htm +AT_SETUP([LOGISTIC REGRESSION without constant]) + +AT_DATA([non-const.sps], [dnl +set format=F20.3. + +input program. + loop #i = 1 to 200. + compute female = (#i > 91). + end case. + end loop. +end file. +end input program. + +compute constant = 1. + +logistic regression female with constant /noconst. +]) + +AT_CHECK([pspp -O format=csv non-const.sps], [0], + [dnl +Table: Dependent Variable Encoding +Original Value,Internal Value +.00,0 +1.00,1 + +Table: Case Processing Summary +Unweighted Cases,N,Percent +Included in Analysis,200,100.000 +Missing Cases,0,.000 +Total,200,100.000 + +note: Estimation terminated at iteration number 2 because parameter estimates changed by less than 0.001 + +Table: Model Summary +Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square +,275.637,.008,.011 + +Table: Variables in the Equation +,,B,S.E.,Wald,df,Sig.,Exp(B) +Step 1,constant,.180,.142,1.616,1,.204,1.198 +]) + +AT_CLEANUP + + + +dnl Check that if somebody passes a dependent variable which is not dichtomous, +dnl then an error is raised. +AT_SETUP([LOGISTIC REGRESSION non-dichotomous dep var]) + +AT_DATA([non-dich.sps], [dnl +data list notable list /y x1 x2 x3 x4. +begin data. +1 2 3 4 5 +0 2 3 4 8 +2 3 4 5 6 +end data. + +logistic regression y with x1 x2 x3 x4. +]) + +AT_CHECK([pspp -O format=csv non-dich.sps], [1], + [dnl +error: Dependent variable's values are not dichotomous. +]) + +AT_CLEANUP \ No newline at end of file -- 2.30.2