Update all #include directives to the currently preferred style.

[pspp-builds.git] / src / language / stats / regression.q
diff --git a/src/language/stats/regression.q b/src/language/stats/regression.q

index c7e8179a579086620064f53e0bd6bb5eace213ce..5ca581d8e14c7301ed4ff79b5df5c15bf3105ab5 100644 (file)
--- a/src/language/stats/regression.q
+++ b/src/language/stats/regression.q
@@ -1,5 +1,5 @@
  /* PSPP - a program for statistical analysis.
-   Copyright (C) 2005, 2009 Free Software Foundation, Inc.
+   Copyright (C) 2005, 2009, 2010, 2011 Free Software Foundation, Inc.
  
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -21,28 +21,30 @@
  #include <gsl/gsl_vector.h>
  #include <math.h>
  #include <stdlib.h>
-#include <data/case.h>
-#include <data/casegrouper.h>
-#include <data/casereader.h>
-#include <data/dictionary.h>
-#include <data/missing-values.h>
-#include <data/procedure.h>
-#include <data/transformations.h>
-#include <data/value-labels.h>
-#include <data/variable.h>
-#include <language/command.h>
-#include <language/dictionary/split-file.h>
-#include <language/data-io/file-handle.h>
-#include <language/lexer/lexer.h>
-#include <libpspp/compiler.h>
-#include <libpspp/message.h>
-#include <libpspp/taint.h>
-#include <math/covariance.h>
-#include <math/linreg.h>
-#include <math/moments.h>
-#include <output/tab.h>
-
-#include "xalloc.h"
+
+#include "data/case.h"
+#include "data/casegrouper.h"
+#include "data/casereader.h"
+#include "data/dictionary.h"
+#include "data/missing-values.h"
+#include "data/procedure.h"
+#include "data/transformations.h"
+#include "data/value-labels.h"
+#include "data/variable.h"
+#include "language/command.h"
+#include "language/data-io/file-handle.h"
+#include "language/dictionary/split-file.h"
+#include "language/lexer/lexer.h"
+#include "libpspp/compiler.h"
+#include "libpspp/message.h"
+#include "libpspp/taint.h"
+#include "math/covariance.h"
+#include "math/linreg.h"
+#include "math/moments.h"
+#include "output/tab.h"
+
+#include "gl/intprops.h"
+#include "gl/xalloc.h"
  
  #include "gettext.h"
  #define _(msgid) gettext (msgid)
@@ -206,7 +208,7 @@ reg_stats_coeff (linreg * c, void *aux_)
    tab_double (t, 4, 1, 0, 0.0, NULL);
    t_stat = linreg_intercept (c) / std_err;
    tab_double (t, 5, 1, 0, t_stat, NULL);
-  pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), 1.0);
+  pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), (double) (linreg_n_obs (c) - linreg_n_coeffs (c)));
    tab_double (t, 6, 1, 0, pval, NULL);
    for (j = 0; j < linreg_n_coeffs (c); j++)
      {
@@ -602,29 +604,19 @@ regression_trns_resid_proc (void *t_, struct ccase **c,
    return TRNS_CONTINUE;
  }
  
-/*
-   Returns false if NAME is a duplicate of any existing variable name.
-*/
-static bool
-try_name (const struct dictionary *dict, const char *name)
+static char *
+reg_get_name (const struct dictionary *dict, const char *prefix)
  {
-  if (dict_lookup_var (dict, name) != NULL)
-    return false;
-
-  return true;
-}
-
-static void
-reg_get_name (const struct dictionary *dict, char name[VAR_NAME_LEN],
-             const char prefix[VAR_NAME_LEN])
-{
-  int i = 1;
+  char *name;
+  int i;
  
-  snprintf (name, VAR_NAME_LEN, "%s%d", prefix, i);
-  while (!try_name (dict, name))
+  /* XXX handle too-long prefixes */
+  name = xmalloc (strlen (prefix) + INT_BUFSIZE_BOUND (i) + 1);
+  for (i = 1; ; i++)
      {
-      i++;
-      snprintf (name, VAR_NAME_LEN, "%s%d", prefix, i);
+      sprintf (name, "%s%d", prefix, i);
+      if (dict_lookup_var (dict, name) == NULL)
+        return name;
      }
  }
  
@@ -634,7 +626,7 @@ reg_save_var (struct dataset *ds, const char *prefix, trns_proc_func * f,
  {
    struct dictionary *dict = dataset_dict (ds);
    static int trns_index = 1;
-  char name[VAR_NAME_LEN];
+  char *name;
    struct variable *new_var;
    struct reg_trns *t = NULL;
  
@@ -642,9 +634,11 @@ reg_save_var (struct dataset *ds, const char *prefix, trns_proc_func * f,
    t->trns_id = trns_index;
    t->n_trns = n_trns;
    t->c = c;
-  reg_get_name (dict, name, prefix);
-  new_var = dict_create_var (dict, name, 0);
-  assert (new_var != NULL);
+
+  name = reg_get_name (dict, prefix);
+  new_var = dict_create_var_assert (dict, name, 0);
+  free (name);
+
    *v = new_var;
    add_transformation (ds, f, regression_trns_free, t);
    trns_index++;
@@ -752,10 +746,10 @@ regression_custom_variables (struct lexer *lexer, struct dataset *ds,
  {
    const struct dictionary *dict = dataset_dict (ds);
  
-  lex_match (lexer, '=');
+  lex_match (lexer, T_EQUALS);
  
    if ((lex_token (lexer) != T_ID
-       || dict_lookup_var (dict, lex_tokid (lexer)) == NULL)
+       || dict_lookup_var (dict, lex_tokcstr (lexer)) == NULL)
        && lex_token (lexer) != T_ALL)
      return 2;
  
@@ -810,7 +804,7 @@ fill_covariance (gsl_matrix *cov, struct covariance *all_cov,
    size_t dep_subscript;
    size_t *rows;
    const gsl_matrix *ssizes;
-  const gsl_matrix *cm;
+  gsl_matrix *cm;
    const gsl_matrix *mean_matrix;
    const gsl_matrix *ssize_matrix;
    double result = 0.0;
@@ -862,9 +856,57 @@ fill_covariance (gsl_matrix *cov, struct covariance *all_cov,
    gsl_matrix_set (cov, cov->size1 - 1, cov->size1 - 1, 
                   gsl_matrix_get (cm, dep_subscript, dep_subscript));
    free (rows);
+  gsl_matrix_free (cm);
    return result;
  }
+static size_t
+get_n_all_vars (struct cmd_regression *cmd)
+{
+  size_t result = n_variables;
+  size_t i;
+  size_t j;
  
+  result += cmd->n_dependent;
+  for (i = 0; i < cmd->n_dependent; i++)
+    {
+      for (j = 0; j < n_variables; j++)
+       {
+         if (v_variables[j] == cmd->v_dependent[i])
+           {
+             result--;
+           }
+       }
+    }
+  return result;
+}
+static void
+fill_all_vars (const struct variable **vars, struct cmd_regression *cmd)
+{
+  size_t i;
+  size_t j;
+  bool absent;
+  
+  for (i = 0; i < n_variables; i++)
+    {
+      vars[i] = v_variables[i];
+    }
+  for (i = 0; i < cmd->n_dependent; i++)
+    {
+      absent = true;
+      for (j = 0; j < n_variables; j++)
+       {
+         if (cmd->v_dependent[i] == v_variables[j])
+           {
+             absent = false;
+             break;
+           }
+       }
+      if (absent)
+       {
+         vars[i + n_variables] = cmd->v_dependent[i];
+       }
+    }
+}
  static bool
  run_regression (struct casereader *input, struct cmd_regression *cmd,
                 struct dataset *ds, linreg **models)
@@ -877,10 +919,11 @@ run_regression (struct casereader *input, struct cmd_regression *cmd,
    struct ccase *c;
    struct covariance *cov;
    const struct variable **vars;
+  const struct variable **all_vars;
    const struct variable *dep_var;
    struct casereader *reader;
    const struct dictionary *dict;
-  gsl_matrix *this_cm;
+  size_t n_all_vars;
  
    assert (models != NULL);
  
@@ -907,9 +950,12 @@ run_regression (struct casereader *input, struct cmd_regression *cmd,
      {
        dict_get_vars (dict, &v_variables, &n_variables, 0);
      }
+  n_all_vars = get_n_all_vars (cmd);
+  all_vars = xnmalloc (n_all_vars, sizeof (*all_vars));
+  fill_all_vars (all_vars, cmd);
    vars = xnmalloc (n_variables, sizeof (*vars));
-  means  = xnmalloc (n_variables, sizeof (*means));
-  cov = covariance_1pass_create (n_variables, v_variables,
+  means  = xnmalloc (n_all_vars, sizeof (*means));
+  cov = covariance_1pass_create (n_all_vars, all_vars,
                                  dict_get_weight (dict), MV_ANY);
  
    reader = casereader_clone (input);
@@ -922,12 +968,13 @@ run_regression (struct casereader *input, struct cmd_regression *cmd,
    
    for (k = 0; k < cmd->n_dependent; k++)
      {
+      gsl_matrix *this_cm;
        dep_var = cmd->v_dependent[k];
        n_indep = identify_indep_vars (vars, dep_var);
        
        this_cm = gsl_matrix_alloc (n_indep + 1, n_indep + 1);
        n_data = fill_covariance (this_cm, cov, vars, n_indep, 
-                               dep_var, v_variables, n_variables, means);
+                               dep_var, all_vars, n_all_vars, means);
        models[k] = linreg_alloc (dep_var, (const struct variable **) vars,
                                 n_data, n_indep);
        models[k]->depvar = dep_var;
@@ -963,10 +1010,12 @@ run_regression (struct casereader *input, struct cmd_regression *cmd,
           linreg_free (models[k]);
           models[k] = NULL;
         }
+      gsl_matrix_free (this_cm);
      }
    
    casereader_destroy (reader);
    free (vars);
+  free (all_vars);
    free (means);
    casereader_destroy (input);
    covariance_destroy (cov);