X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Froc.c;h=d011bf547e7a20fd4ab4acf9699d6e0db941ab99;hb=refs%2Fheads%2Fcenter-titles;hp=724bc523a3beef42df39ca5b801da5918f88d398;hpb=f550aee00a62fe1d8baf62d83cd7efef6cc2ee92;p=pspp diff --git a/src/language/stats/roc.c b/src/language/stats/roc.c index 724bc523a3..d011bf547e 100644 --- a/src/language/stats/roc.c +++ b/src/language/stats/roc.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2009 Free Software Foundation, Inc. + Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,27 +16,27 @@ #include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "language/stats/roc.h" #include +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/subcase.h" +#include "language/command.h" +#include "language/lexer/lexer.h" +#include "language/lexer/value-parser.h" +#include "language/lexer/variable-parser.h" +#include "libpspp/misc.h" +#include "math/sort.h" +#include "output/chart-item.h" +#include "output/charts/roc-chart.h" +#include "output/tab.h" + #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) msgid @@ -47,8 +47,9 @@ struct cmd_roc const struct variable **vars; const struct dictionary *dict; - const struct variable *state_var ; + const struct variable *state_var; union value state_value; + size_t state_var_width; /* Plot the roc curve */ bool curve; @@ -93,7 +94,10 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) roc.pos = roc.pos_weighted = 0; roc.neg = roc.neg_weighted = 0; roc.dict = dataset_dict (ds); + roc.state_var = NULL; + roc.state_var_width = -1; + lex_match (lexer, T_SLASH); if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars, PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC)) goto error; @@ -105,28 +109,28 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) roc.state_var = parse_variable (lexer, dict); - if ( !lex_force_match (lexer, '(')) + if ( !lex_force_match (lexer, T_LPAREN)) { goto error; } - value_init (&roc.state_value, var_get_width (roc.state_var)); - parse_value (lexer, &roc.state_value, var_get_width (roc.state_var)); + roc.state_var_width = var_get_width (roc.state_var); + value_init (&roc.state_value, roc.state_var_width); + parse_value (lexer, &roc.state_value, roc.state_var); - if ( !lex_force_match (lexer, ')')) + if ( !lex_force_match (lexer, T_RPAREN)) { goto error; } - - while (lex_token (lexer) != '.') + while (lex_token (lexer) != T_ENDCMD) { - lex_match (lexer, '/'); + lex_match (lexer, T_SLASH); if (lex_match_id (lexer, "MISSING")) { - lex_match (lexer, '='); - while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "INCLUDE")) { @@ -145,15 +149,15 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "PLOT")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "CURVE")) { roc.curve = true; - if (lex_match (lexer, '(')) + if (lex_match (lexer, T_LPAREN)) { roc.reference = true; lex_force_match_id (lexer, "REFERENCE"); - lex_force_match (lexer, ')'); + lex_force_match (lexer, T_RPAREN); } } else if (lex_match_id (lexer, "NONE")) @@ -168,8 +172,8 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "PRINT")) { - lex_match (lexer, '='); - while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "SE")) { @@ -188,12 +192,12 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "CRITERIA")) { - lex_match (lexer, '='); - while (lex_token (lexer) != '.' && lex_token (lexer) != '/') + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match_id (lexer, "CUTOFF")) { - lex_force_match (lexer, '('); + lex_force_match (lexer, T_LPAREN); if (lex_match_id (lexer, "INCLUDE")) { roc.exclude = MV_SYSTEM; @@ -207,11 +211,11 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) lex_error (lexer, NULL); goto error; } - lex_force_match (lexer, ')'); + lex_force_match (lexer, T_RPAREN); } else if (lex_match_id (lexer, "TESTPOS")) { - lex_force_match (lexer, '('); + lex_force_match (lexer, T_LPAREN); if (lex_match_id (lexer, "LARGE")) { roc.invert = false; @@ -225,19 +229,19 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) lex_error (lexer, NULL); goto error; } - lex_force_match (lexer, ')'); + lex_force_match (lexer, T_RPAREN); } else if (lex_match_id (lexer, "CI")) { - lex_force_match (lexer, '('); + lex_force_match (lexer, T_LPAREN); lex_force_num (lexer); roc.ci = lex_number (lexer); lex_get (lexer); - lex_force_match (lexer, ')'); + lex_force_match (lexer, T_RPAREN); } else if (lex_match_id (lexer, "DISTRIBUTION")) { - lex_force_match (lexer, '('); + lex_force_match (lexer, T_LPAREN); if (lex_match_id (lexer, "FREE")) { roc.bi_neg_exp = false; @@ -251,7 +255,7 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) lex_error (lexer, NULL); goto error; } - lex_force_match (lexer, ')'); + lex_force_match (lexer, T_RPAREN); } else { @@ -270,12 +274,14 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) if ( ! run_roc (ds, &roc)) goto error; - value_destroy (&roc.state_value, var_get_width (roc.state_var)); + if ( roc.state_var) + value_destroy (&roc.state_value, roc.state_var_width); free (roc.vars); return CMD_SUCCESS; error: - value_destroy (&roc.state_value, var_get_width (roc.state_var)); + if ( roc.state_var) + value_destroy (&roc.state_value, roc.state_var_width); free (roc.vars); return CMD_FAILURE; } @@ -394,19 +400,18 @@ struct roc_state WEIGHT is the value of a single count. */ static struct casereader * -accumulate_counts (struct casereader *cutpoint_rdr, +accumulate_counts (struct casereader *input, double result, double weight, bool (*pos_cond) (double, double), int true_index, int false_index) { - const struct caseproto *proto = casereader_get_proto (cutpoint_rdr); + const struct caseproto *proto = casereader_get_proto (input); struct casewriter *w = autopaging_writer_create (proto); - struct casereader *r = casereader_clone (cutpoint_rdr); struct ccase *cpc; double prev_cp = SYSMIS; - for ( ; (cpc = casereader_read (r) ); case_unref (cpc)) + for ( ; (cpc = casereader_read (input) ); case_unref (cpc)) { struct ccase *new_case; const double cp = case_data_idx (cpc, ROC_CUTPOINT)->f; @@ -428,7 +433,7 @@ accumulate_counts (struct casereader *cutpoint_rdr, casewriter_write (w, new_case); } - casereader_destroy (r); + casereader_destroy (input); return casewriter_make_reader (w); } @@ -526,9 +531,12 @@ process_group (const struct variable *var, struct casereader *reader, casereader_destroy (r2); } + casereader_destroy (r1); casereader_destroy (rclone); + caseproto_unref (proto); + return casewriter_make_reader (wtr); } @@ -625,24 +633,29 @@ prepare_cutpoints (struct cmd_roc *roc, struct roc_state *rs, struct casereader int i; struct casereader *r = casereader_clone (input); struct ccase *c; - struct caseproto *proto = caseproto_create (); - struct subcase ordering; - subcase_init (&ordering, ROC_CUTPOINT, 0, SC_ASCEND); + { + struct caseproto *proto = caseproto_create (); + struct subcase ordering; + subcase_init (&ordering, ROC_CUTPOINT, 0, SC_ASCEND); - proto = caseproto_add_width (proto, 0); /* cutpoint */ - proto = caseproto_add_width (proto, 0); /* ROC_TP */ - proto = caseproto_add_width (proto, 0); /* ROC_FN */ - proto = caseproto_add_width (proto, 0); /* ROC_TN */ - proto = caseproto_add_width (proto, 0); /* ROC_FP */ + proto = caseproto_add_width (proto, 0); /* cutpoint */ + proto = caseproto_add_width (proto, 0); /* ROC_TP */ + proto = caseproto_add_width (proto, 0); /* ROC_FN */ + proto = caseproto_add_width (proto, 0); /* ROC_TN */ + proto = caseproto_add_width (proto, 0); /* ROC_FP */ - for (i = 0 ; i < roc->n_vars; ++i) - { - rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto); - rs[i].prev_result = SYSMIS; - rs[i].max = -DBL_MAX; - rs[i].min = DBL_MAX; - } + for (i = 0 ; i < roc->n_vars; ++i) + { + rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto); + rs[i].prev_result = SYSMIS; + rs[i].max = -DBL_MAX; + rs[i].min = DBL_MAX; + } + + caseproto_unref (proto); + subcase_destroy (&ordering); + } for (; (c = casereader_read (r)) != NULL; case_unref (c)) { @@ -689,7 +702,7 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) struct casereader *negatives = NULL; struct casereader *positives = NULL; - struct caseproto *n_proto = caseproto_create (); + struct caseproto *n_proto = NULL; struct subcase up_ordering; struct subcase down_ordering; @@ -740,14 +753,13 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) struct ccase *c; struct ccase *cpos; - struct casereader *n_neg ; + struct casereader *n_neg_reader ; const struct variable *var = roc->vars[i]; struct casereader *neg ; struct casereader *pos = casereader_clone (positives); - - struct casereader *n_pos = + struct casereader *n_pos_reader = process_positive_group (var, pos, dict, &rs[i]); if ( negatives == NULL) @@ -757,18 +769,17 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) neg = casereader_clone (negatives); - n_neg = process_negative_group (var, neg, dict, &rs[i]); - + n_neg_reader = process_negative_group (var, neg, dict, &rs[i]); /* Merge the n_pos and n_neg casereaders */ w = sort_create_writer (&up_ordering, n_proto); - for ( ; (cpos = casereader_read (n_pos) ); case_unref (cpos)) + for ( ; (cpos = casereader_read (n_pos_reader) ); case_unref (cpos)) { struct ccase *pos_case = case_create (n_proto); struct ccase *cneg; const double jpos = case_data_idx (cpos, VALUE)->f; - while ((cneg = casereader_read (n_neg))) + while ((cneg = casereader_read (n_neg_reader))) { struct ccase *nc = case_create (n_proto); @@ -798,6 +809,9 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) casewriter_write (w, pos_case); } + casereader_destroy (n_pos_reader); + casereader_destroy (n_neg_reader); + /* These aren't used anymore */ #undef N_EQ #undef N_PRED @@ -825,6 +839,7 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) prev_pos_gt = n_pos_gt; } + casereader_destroy (r); r = casewriter_make_reader (w); } @@ -849,6 +864,7 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) prev_neg_lt = n_neg_lt; } + casereader_destroy (r); r = casewriter_make_reader (w); } @@ -856,7 +872,7 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) struct ccase *prev_case = NULL; for ( ; (c = casereader_read (r) ); case_unref (c)) { - const struct ccase *next_case = casereader_peek (r, 0); + struct ccase *next_case = casereader_peek (r, 0); const double j = case_data_idx (c, VALUE)->f; double n_pos_eq = case_data_idx (c, N_POS_EQ)->f; @@ -890,9 +906,12 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) } + case_unref (next_case); case_unref (prev_case); prev_case = case_clone (c); } + casereader_destroy (r); + case_unref (prev_case); rs[i].auc /= rs[i].n1 * rs[i].n2; if ( roc->invert ) @@ -914,7 +933,14 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) casereader_destroy (positives); casereader_destroy (negatives); + caseproto_unref (n_proto); + subcase_destroy (&up_ordering); + subcase_destroy (&down_ordering); + output_roc (rs, roc); + + for (i = 0 ; i < roc->n_vars; ++i) + casereader_destroy (rs[i].cutpoint_rdr); free (rs); } @@ -973,7 +999,7 @@ show_auc (struct roc_state *rs, const struct cmd_roc *roc) { tab_text (tbl, 0, 2 + i, TAT_TITLE, var_to_string (roc->vars[i])); - tab_double (tbl, n_cols - n_fields, 2 + i, 0, rs[i].auc, NULL); + tab_double (tbl, n_cols - n_fields, 2 + i, 0, rs[i].auc, NULL, RC_OTHER); if ( roc->print_se ) { @@ -992,22 +1018,22 @@ show_auc (struct roc_state *rs, const struct cmd_roc *roc) tab_double (tbl, n_cols - 4, 2 + i, 0, se, - NULL); + NULL, RC_OTHER); ci = 1 - roc->ci / 100.0; yy = gsl_cdf_gaussian_Qinv (ci, se) ; tab_double (tbl, n_cols - 2, 2 + i, 0, rs[i].auc - yy, - NULL); + NULL, RC_OTHER); tab_double (tbl, n_cols - 1, 2 + i, 0, rs[i].auc + yy, - NULL); + NULL, RC_OTHER); tab_double (tbl, n_cols - 3, 2 + i, 0, 2.0 * gsl_cdf_ugaussian_Q (fabs ((rs[i].auc - 0.5 ) / sd_0_5)), - NULL); + NULL, RC_PVALUE); } } @@ -1054,11 +1080,11 @@ show_summary (const struct cmd_roc *roc) tab_text (tbl, 0, 3, TAB_LEFT, _("Negative")); - tab_double (tbl, 1, 2, 0, roc->pos, &F_8_0); - tab_double (tbl, 1, 3, 0, roc->neg, &F_8_0); + tab_double (tbl, 1, 2, 0, roc->pos, NULL, RC_INTEGER); + tab_double (tbl, 1, 3, 0, roc->neg, NULL, RC_INTEGER); - tab_double (tbl, 2, 2, 0, roc->pos_weighted, 0); - tab_double (tbl, 2, 3, 0, roc->neg_weighted, 0); + tab_double (tbl, 2, 2, 0, roc->pos_weighted, NULL, RC_OTHER); + tab_double (tbl, 2, 3, 0, roc->neg_weighted, NULL, RC_OTHER); tab_submit (tbl); } @@ -1135,10 +1161,10 @@ show_coords (struct roc_state *rs, const struct cmd_roc *roc) ); tab_double (tbl, n_cols - 3, x, 0, case_data_idx (cc, ROC_CUTPOINT)->f, - var_get_print_format (roc->vars[i])); + var_get_print_format (roc->vars[i]), RC_OTHER); - tab_double (tbl, n_cols - 2, x, 0, se, NULL); - tab_double (tbl, n_cols - 1, x, 0, 1 - sp, NULL); + tab_double (tbl, n_cols - 2, x, 0, se, NULL, RC_OTHER); + tab_double (tbl, n_cols - 1, x, 0, 1 - sp, NULL, RC_OTHER); } casereader_destroy (r);