Added parser for the ROC command.
authorJohn Darrington <john@darrington.wattle.id.au>
Tue, 9 Jun 2009 11:15:08 +0000 (19:15 +0800)
committerJohn Darrington <john@darrington.wattle.id.au>
Tue, 9 Jun 2009 11:15:08 +0000 (19:15 +0800)
src/language/command.def
src/language/stats/automake.mk
src/language/stats/roc.c [new file with mode: 0644]
src/language/stats/roc.h [new file with mode: 0644]

index 4c8df335722aa192eb2e38297c6449f5679b19f4..fa1bb1e88c6b2da3f683cbc49cb15449a9d24d07 100644 (file)
@@ -117,6 +117,7 @@ DEF_CMD (S_DATA, 0, "RANK", cmd_rank)
 DEF_CMD (S_DATA, 0, "REGRESSION", cmd_regression)
 DEF_CMD (S_DATA, 0, "RELIABILITY", cmd_reliability)
 DEF_CMD (S_DATA, 0, "RENAME VARIABLES", cmd_rename_variables)
+DEF_CMD (S_DATA, 0, "ROC", cmd_roc)
 DEF_CMD (S_DATA, 0, "SAMPLE", cmd_sample)
 DEF_CMD (S_DATA, 0, "SAVE", cmd_save)
 DEF_CMD (S_DATA, 0, "SORT CASES", cmd_sort_cases)
@@ -237,7 +238,6 @@ UNIMPL_CMD ("REPEATING DATA", "Specify multiple cases per input record")
 UNIMPL_CMD ("REPORT", "Pretty print working file")
 UNIMPL_CMD ("RESTORE", "Restore settings")
 UNIMPL_CMD ("RMV", "Replace missing values")
-UNIMPL_CMD ("ROC", "Receiver operating characteristic")
 UNIMPL_CMD ("SAVE TRANSLATE", "Save to foriegn format")
 UNIMPL_CMD ("SCRIPT", "Run script file")
 UNIMPL_CMD ("SEASON", "Estimate seasonal factors")
index 5aee445c4b29818f0995adb991f30ad3e9ea70fc..1a68b906bfe313181353dc472bb48e3f202e6c72 100644 (file)
@@ -32,10 +32,12 @@ language_stats_sources = \
        src/language/stats/freq.h \
        src/language/stats/npar-summary.c \
        src/language/stats/npar-summary.h \
-       src/language/stats/wilcoxon.c \
-       src/language/stats/wilcoxon.h \
+       src/language/stats/roc.c \
+       src/language/stats/roc.h \
        src/language/stats/sign.c \
-       src/language/stats/sign.h
+       src/language/stats/sign.h \
+       src/language/stats/wilcoxon.c \
+       src/language/stats/wilcoxon.h
 
 all_q_sources += $(src_language_stats_built_sources:.c=.q)
 EXTRA_DIST += $(src_language_stats_built_sources:.c=.q)
diff --git a/src/language/stats/roc.c b/src/language/stats/roc.c
new file mode 100644 (file)
index 0000000..ba7ad3c
--- /dev/null
@@ -0,0 +1,248 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "roc.h"
+#include <data/procedure.h>
+#include <language/lexer/variable-parser.h>
+#include <language/lexer/value-parser.h>
+#include <language/lexer/lexer.h>
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) msgid
+
+struct cmd_roc
+{
+  size_t n_vars;
+  const struct variable **vars;
+
+  struct variable *state_var ;
+  union value state_value;
+
+  /* Plot the roc curve */
+  bool curve;
+  /* Plot the reference line */
+  bool reference;
+
+  double ci;
+
+  bool print_coords;
+  bool print_se;
+  bool bi_neg_exp; /* True iff the bi-negative exponential critieria
+                     should be used */
+  enum mv_class exclude;
+
+  bool invert ; /* True iff a smaller test result variable indicates
+                  a positive result */
+
+};
+
+
+int
+cmd_roc (struct lexer *lexer, struct dataset *ds)
+{
+  struct cmd_roc roc ;
+  const struct dictionary *dict = dataset_dict (ds);
+
+  roc.vars = NULL;
+  roc.n_vars = 0;
+  roc.print_se = false;
+  roc.print_coords = false;
+  roc.exclude = MV_ANY;
+  roc.curve = true;
+  roc.reference = false;
+  roc.ci = 95;
+  roc.bi_neg_exp = false;
+  roc.invert = false;
+
+  if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars,
+                             PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
+    return 2;
+
+  if ( ! lex_force_match (lexer, T_BY))
+    {
+      return 2;
+    }
+
+  roc.state_var = parse_variable (lexer, dict);
+
+  if ( !lex_force_match (lexer, '('))
+    {
+      return 2;
+    }
+
+  parse_value (lexer, &roc.state_value, var_get_width (roc.state_var));
+
+
+  if ( !lex_force_match (lexer, ')'))
+    {
+      return 2;
+    }
+
+
+  while (lex_token (lexer) != '.')
+    {
+      lex_match (lexer, '/');
+      if (lex_match_id (lexer, "MISSING"))
+        {
+          lex_match (lexer, '=');
+          while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
+            {
+             if (lex_match_id (lexer, "INCLUDE"))
+               {
+                 roc.exclude = MV_SYSTEM;
+               }
+             else if (lex_match_id (lexer, "EXCLUDE"))
+               {
+                 roc.exclude = MV_ANY;
+               }
+             else
+               {
+                  lex_error (lexer, NULL);
+                 return 2;
+               }
+           }
+       }
+      else if (lex_match_id (lexer, "PLOT"))
+       {
+         lex_match (lexer, '=');
+         if (lex_match_id (lexer, "CURVE"))
+           {
+             roc.curve = true;
+             if (lex_match (lexer, '('))
+               {
+                 roc.reference = true;
+                 lex_force_match_id (lexer, "REFERENCE");
+                 lex_force_match (lexer, ')');
+               }
+           }
+         else if (lex_match_id (lexer, "NONE"))
+           {
+             roc.curve = false;
+           }
+         else
+           {
+             lex_error (lexer, NULL);
+             return 2;
+           }
+       }
+      else if (lex_match_id (lexer, "PRINT"))
+       {
+         lex_match (lexer, '=');
+          while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
+           {
+             if (lex_match_id (lexer, "SE"))
+               {
+                 roc.print_se = true;
+               }
+             else if (lex_match_id (lexer, "COORDINATES"))
+               {
+                 roc.print_coords = true;
+               }
+             else
+               {
+                 lex_error (lexer, NULL);
+                 return 2;
+               }
+           }
+       }
+      else if (lex_match_id (lexer, "CRITERIA"))
+       {
+         lex_match (lexer, '=');
+          while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
+           {
+             if (lex_match_id (lexer, "CUTOFF"))
+               {
+                 lex_force_match (lexer, '(');
+                 if (lex_match_id (lexer, "INCLUDE"))
+                   {
+                     roc.exclude = MV_SYSTEM;
+                   }
+                 else if (lex_match_id (lexer, "EXCLUDE"))
+                   {
+                     roc.exclude = MV_USER | MV_SYSTEM;
+                   }
+                 else
+                   {
+                     lex_error (lexer, NULL);
+                     return 2;
+                   }
+                 lex_force_match (lexer, ')');
+               }
+             else if (lex_match_id (lexer, "TESTPOS"))
+               {
+                 lex_force_match (lexer, '(');
+                 if (lex_match_id (lexer, "LARGE"))
+                   {
+                     roc.invert = false;
+                   }
+                 else if (lex_match_id (lexer, "SMALL"))
+                   {
+                     roc.invert = true;
+                   }
+                 else
+                   {
+                     lex_error (lexer, NULL);
+                     return 2;
+                   }
+                 lex_force_match (lexer, ')');
+               }
+             else if (lex_match_id (lexer, "CI"))
+               {
+                 lex_force_match (lexer, '(');
+                 lex_force_num (lexer);
+                 roc.ci = lex_number (lexer);
+                 lex_get (lexer);
+                 lex_force_match (lexer, ')');
+               }
+             else if (lex_match_id (lexer, "DISTRIBUTION"))
+               {
+                 lex_force_match (lexer, '(');
+                 if (lex_match_id (lexer, "FREE"))
+                   {
+                     roc.bi_neg_exp = false;
+                   }
+                 else if (lex_match_id (lexer, "NEGEXPO"))
+                   {
+                     roc.bi_neg_exp = true;
+                   }
+                 else
+                   {
+                     lex_error (lexer, NULL);
+                     return 2;
+                   }
+                 lex_force_match (lexer, ')');
+               }
+             else
+               {
+                 lex_error (lexer, NULL);
+                 return 2;
+               }
+           }
+       }
+      else
+       {
+         lex_error (lexer, NULL);
+         break;
+       }
+    }
+
+
+  return 1;
+}
+
diff --git a/src/language/stats/roc.h b/src/language/stats/roc.h
new file mode 100644 (file)
index 0000000..54028d5
--- /dev/null
@@ -0,0 +1,24 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. 
+*/
+#ifndef ROC_H
+#define ROC_H
+
+struct dataset;
+struct lexer;
+int cmd_roc (struct lexer *lexer, struct dataset *ds);
+
+#endif