From 7e0f8af8463a3a9df128b220d3b3e37267d8cc5f Mon Sep 17 00:00:00 2001 From: John Darrington Date: Tue, 9 Jun 2009 19:15:08 +0800 Subject: [PATCH] Added parser for the ROC command. --- src/language/command.def | 2 +- src/language/stats/automake.mk | 8 +- src/language/stats/roc.c | 248 +++++++++++++++++++++++++++++++++ src/language/stats/roc.h | 24 ++++ 4 files changed, 278 insertions(+), 4 deletions(-) create mode 100644 src/language/stats/roc.c create mode 100644 src/language/stats/roc.h diff --git a/src/language/command.def b/src/language/command.def index 4c8df335..fa1bb1e8 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -117,6 +117,7 @@ DEF_CMD (S_DATA, 0, "RANK", cmd_rank) DEF_CMD (S_DATA, 0, "REGRESSION", cmd_regression) DEF_CMD (S_DATA, 0, "RELIABILITY", cmd_reliability) DEF_CMD (S_DATA, 0, "RENAME VARIABLES", cmd_rename_variables) +DEF_CMD (S_DATA, 0, "ROC", cmd_roc) DEF_CMD (S_DATA, 0, "SAMPLE", cmd_sample) DEF_CMD (S_DATA, 0, "SAVE", cmd_save) DEF_CMD (S_DATA, 0, "SORT CASES", cmd_sort_cases) @@ -237,7 +238,6 @@ UNIMPL_CMD ("REPEATING DATA", "Specify multiple cases per input record") UNIMPL_CMD ("REPORT", "Pretty print working file") UNIMPL_CMD ("RESTORE", "Restore settings") UNIMPL_CMD ("RMV", "Replace missing values") -UNIMPL_CMD ("ROC", "Receiver operating characteristic") UNIMPL_CMD ("SAVE TRANSLATE", "Save to foriegn format") UNIMPL_CMD ("SCRIPT", "Run script file") UNIMPL_CMD ("SEASON", "Estimate seasonal factors") diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk index 5aee445c..1a68b906 100644 --- a/src/language/stats/automake.mk +++ b/src/language/stats/automake.mk @@ -32,10 +32,12 @@ language_stats_sources = \ src/language/stats/freq.h \ src/language/stats/npar-summary.c \ src/language/stats/npar-summary.h \ - src/language/stats/wilcoxon.c \ - src/language/stats/wilcoxon.h \ + src/language/stats/roc.c \ + src/language/stats/roc.h \ src/language/stats/sign.c \ - src/language/stats/sign.h + src/language/stats/sign.h \ + src/language/stats/wilcoxon.c \ + src/language/stats/wilcoxon.h all_q_sources += $(src_language_stats_built_sources:.c=.q) EXTRA_DIST += $(src_language_stats_built_sources:.c=.q) diff --git a/src/language/stats/roc.c b/src/language/stats/roc.c new file mode 100644 index 00000000..ba7ad3ce --- /dev/null +++ b/src/language/stats/roc.c @@ -0,0 +1,248 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "roc.h" +#include +#include +#include +#include + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + +struct cmd_roc +{ + size_t n_vars; + const struct variable **vars; + + struct variable *state_var ; + union value state_value; + + /* Plot the roc curve */ + bool curve; + /* Plot the reference line */ + bool reference; + + double ci; + + bool print_coords; + bool print_se; + bool bi_neg_exp; /* True iff the bi-negative exponential critieria + should be used */ + enum mv_class exclude; + + bool invert ; /* True iff a smaller test result variable indicates + a positive result */ + +}; + + +int +cmd_roc (struct lexer *lexer, struct dataset *ds) +{ + struct cmd_roc roc ; + const struct dictionary *dict = dataset_dict (ds); + + roc.vars = NULL; + roc.n_vars = 0; + roc.print_se = false; + roc.print_coords = false; + roc.exclude = MV_ANY; + roc.curve = true; + roc.reference = false; + roc.ci = 95; + roc.bi_neg_exp = false; + roc.invert = false; + + if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars, + PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC)) + return 2; + + if ( ! lex_force_match (lexer, T_BY)) + { + return 2; + } + + roc.state_var = parse_variable (lexer, dict); + + if ( !lex_force_match (lexer, '(')) + { + return 2; + } + + parse_value (lexer, &roc.state_value, var_get_width (roc.state_var)); + + + if ( !lex_force_match (lexer, ')')) + { + return 2; + } + + + while (lex_token (lexer) != '.') + { + lex_match (lexer, '/'); + if (lex_match_id (lexer, "MISSING")) + { + lex_match (lexer, '='); + while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + { + if (lex_match_id (lexer, "INCLUDE")) + { + roc.exclude = MV_SYSTEM; + } + else if (lex_match_id (lexer, "EXCLUDE")) + { + roc.exclude = MV_ANY; + } + else + { + lex_error (lexer, NULL); + return 2; + } + } + } + else if (lex_match_id (lexer, "PLOT")) + { + lex_match (lexer, '='); + if (lex_match_id (lexer, "CURVE")) + { + roc.curve = true; + if (lex_match (lexer, '(')) + { + roc.reference = true; + lex_force_match_id (lexer, "REFERENCE"); + lex_force_match (lexer, ')'); + } + } + else if (lex_match_id (lexer, "NONE")) + { + roc.curve = false; + } + else + { + lex_error (lexer, NULL); + return 2; + } + } + else if (lex_match_id (lexer, "PRINT")) + { + lex_match (lexer, '='); + while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + { + if (lex_match_id (lexer, "SE")) + { + roc.print_se = true; + } + else if (lex_match_id (lexer, "COORDINATES")) + { + roc.print_coords = true; + } + else + { + lex_error (lexer, NULL); + return 2; + } + } + } + else if (lex_match_id (lexer, "CRITERIA")) + { + lex_match (lexer, '='); + while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + { + if (lex_match_id (lexer, "CUTOFF")) + { + lex_force_match (lexer, '('); + if (lex_match_id (lexer, "INCLUDE")) + { + roc.exclude = MV_SYSTEM; + } + else if (lex_match_id (lexer, "EXCLUDE")) + { + roc.exclude = MV_USER | MV_SYSTEM; + } + else + { + lex_error (lexer, NULL); + return 2; + } + lex_force_match (lexer, ')'); + } + else if (lex_match_id (lexer, "TESTPOS")) + { + lex_force_match (lexer, '('); + if (lex_match_id (lexer, "LARGE")) + { + roc.invert = false; + } + else if (lex_match_id (lexer, "SMALL")) + { + roc.invert = true; + } + else + { + lex_error (lexer, NULL); + return 2; + } + lex_force_match (lexer, ')'); + } + else if (lex_match_id (lexer, "CI")) + { + lex_force_match (lexer, '('); + lex_force_num (lexer); + roc.ci = lex_number (lexer); + lex_get (lexer); + lex_force_match (lexer, ')'); + } + else if (lex_match_id (lexer, "DISTRIBUTION")) + { + lex_force_match (lexer, '('); + if (lex_match_id (lexer, "FREE")) + { + roc.bi_neg_exp = false; + } + else if (lex_match_id (lexer, "NEGEXPO")) + { + roc.bi_neg_exp = true; + } + else + { + lex_error (lexer, NULL); + return 2; + } + lex_force_match (lexer, ')'); + } + else + { + lex_error (lexer, NULL); + return 2; + } + } + } + else + { + lex_error (lexer, NULL); + break; + } + } + + + return 1; +} + diff --git a/src/language/stats/roc.h b/src/language/stats/roc.h new file mode 100644 index 00000000..54028d58 --- /dev/null +++ b/src/language/stats/roc.h @@ -0,0 +1,24 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +#ifndef ROC_H +#define ROC_H + +struct dataset; +struct lexer; +int cmd_roc (struct lexer *lexer, struct dataset *ds); + +#endif -- 2.30.2