1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <language/stats/roc.h>
21 #include <data/casegrouper.h>
22 #include <data/casereader.h>
23 #include <data/casewriter.h>
24 #include <data/dictionary.h>
25 #include <data/format.h>
26 #include <data/procedure.h>
27 #include <data/subcase.h>
28 #include <language/command.h>
29 #include <language/lexer/lexer.h>
30 #include <language/lexer/value-parser.h>
31 #include <language/lexer/variable-parser.h>
32 #include <libpspp/misc.h>
33 #include <math/sort.h>
34 #include <output/chart-item.h>
35 #include <output/charts/roc-chart.h>
36 #include <output/tab.h>
38 #include <gsl/gsl_cdf.h>
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) msgid
47 const struct variable **vars;
48 const struct dictionary *dict;
50 const struct variable *state_var;
51 union value state_value;
53 /* Plot the roc curve */
55 /* Plot the reference line */
62 bool bi_neg_exp; /* True iff the bi-negative exponential critieria
64 enum mv_class exclude;
66 bool invert ; /* True iff a smaller test result variable indicates
75 static int run_roc (struct dataset *ds, struct cmd_roc *roc);
78 cmd_roc (struct lexer *lexer, struct dataset *ds)
81 const struct dictionary *dict = dataset_dict (ds);
86 roc.print_coords = false;
89 roc.reference = false;
91 roc.bi_neg_exp = false;
93 roc.pos = roc.pos_weighted = 0;
94 roc.neg = roc.neg_weighted = 0;
95 roc.dict = dataset_dict (ds);
98 if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars,
99 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
102 if ( ! lex_force_match (lexer, T_BY))
107 roc.state_var = parse_variable (lexer, dict);
109 if ( !lex_force_match (lexer, '('))
114 value_init (&roc.state_value, var_get_width (roc.state_var));
115 parse_value (lexer, &roc.state_value, var_get_width (roc.state_var));
118 if ( !lex_force_match (lexer, ')'))
124 while (lex_token (lexer) != '.')
126 lex_match (lexer, '/');
127 if (lex_match_id (lexer, "MISSING"))
129 lex_match (lexer, '=');
130 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
132 if (lex_match_id (lexer, "INCLUDE"))
134 roc.exclude = MV_SYSTEM;
136 else if (lex_match_id (lexer, "EXCLUDE"))
138 roc.exclude = MV_ANY;
142 lex_error (lexer, NULL);
147 else if (lex_match_id (lexer, "PLOT"))
149 lex_match (lexer, '=');
150 if (lex_match_id (lexer, "CURVE"))
153 if (lex_match (lexer, '('))
155 roc.reference = true;
156 lex_force_match_id (lexer, "REFERENCE");
157 lex_force_match (lexer, ')');
160 else if (lex_match_id (lexer, "NONE"))
166 lex_error (lexer, NULL);
170 else if (lex_match_id (lexer, "PRINT"))
172 lex_match (lexer, '=');
173 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
175 if (lex_match_id (lexer, "SE"))
179 else if (lex_match_id (lexer, "COORDINATES"))
181 roc.print_coords = true;
185 lex_error (lexer, NULL);
190 else if (lex_match_id (lexer, "CRITERIA"))
192 lex_match (lexer, '=');
193 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
195 if (lex_match_id (lexer, "CUTOFF"))
197 lex_force_match (lexer, '(');
198 if (lex_match_id (lexer, "INCLUDE"))
200 roc.exclude = MV_SYSTEM;
202 else if (lex_match_id (lexer, "EXCLUDE"))
204 roc.exclude = MV_USER | MV_SYSTEM;
208 lex_error (lexer, NULL);
211 lex_force_match (lexer, ')');
213 else if (lex_match_id (lexer, "TESTPOS"))
215 lex_force_match (lexer, '(');
216 if (lex_match_id (lexer, "LARGE"))
220 else if (lex_match_id (lexer, "SMALL"))
226 lex_error (lexer, NULL);
229 lex_force_match (lexer, ')');
231 else if (lex_match_id (lexer, "CI"))
233 lex_force_match (lexer, '(');
234 lex_force_num (lexer);
235 roc.ci = lex_number (lexer);
237 lex_force_match (lexer, ')');
239 else if (lex_match_id (lexer, "DISTRIBUTION"))
241 lex_force_match (lexer, '(');
242 if (lex_match_id (lexer, "FREE"))
244 roc.bi_neg_exp = false;
246 else if (lex_match_id (lexer, "NEGEXPO"))
248 roc.bi_neg_exp = true;
252 lex_error (lexer, NULL);
255 lex_force_match (lexer, ')');
259 lex_error (lexer, NULL);
266 lex_error (lexer, NULL);
271 if ( ! run_roc (ds, &roc))
274 value_destroy (&roc.state_value, var_get_width (roc.state_var));
280 value_destroy (&roc.state_value, var_get_width (roc.state_var));
289 do_roc (struct cmd_roc *roc, struct casereader *group, struct dictionary *dict);
293 run_roc (struct dataset *ds, struct cmd_roc *roc)
295 struct dictionary *dict = dataset_dict (ds);
297 struct casereader *group;
299 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
300 while (casegrouper_get_next_group (grouper, &group))
302 do_roc (roc, group, dataset_dict (ds));
304 ok = casegrouper_destroy (grouper);
305 ok = proc_commit (ds) && ok;
312 dump_casereader (struct casereader *reader)
315 struct casereader *r = casereader_clone (reader);
317 for ( ; (c = casereader_read (r) ); case_unref (c))
320 for (i = 0 ; i < case_get_value_cnt (c); ++i)
322 printf ("%g ", case_data_idx (c, i)->f);
327 casereader_destroy (r);
333 Return true iff the state variable indicates that C has positive actual state.
335 As a side effect, this function also accumulates the roc->{pos,neg} and
336 roc->{pos,neg}_weighted counts.
339 match_positives (const struct ccase *c, void *aux)
341 struct cmd_roc *roc = aux;
342 const struct variable *wv = dict_get_weight (roc->dict);
343 const double weight = wv ? case_data (c, wv)->f : 1.0;
345 const bool positive =
346 ( 0 == value_compare_3way (case_data (c, roc->state_var), &roc->state_value,
347 var_get_width (roc->state_var)));
352 roc->pos_weighted += weight;
357 roc->neg_weighted += weight;
368 /* Some intermediate state for calculating the cutpoints and the
369 standard error values */
372 double auc; /* Area under the curve */
374 double n1; /* total weight of positives */
375 double n2; /* total weight of negatives */
377 /* intermediates for standard error */
381 /* intermediates for cutpoints */
382 struct casewriter *cutpoint_wtr;
383 struct casereader *cutpoint_rdr;
390 Return a new casereader based upon CUTPOINT_RDR.
391 The number of "positive" cases are placed into
392 the position TRUE_INDEX, and the number of "negative" cases
394 POS_COND and RESULT determine the semantics of what is
396 WEIGHT is the value of a single count.
398 static struct casereader *
399 accumulate_counts (struct casereader *cutpoint_rdr,
400 double result, double weight,
401 bool (*pos_cond) (double, double),
402 int true_index, int false_index)
404 const struct caseproto *proto = casereader_get_proto (cutpoint_rdr);
405 struct casewriter *w =
406 autopaging_writer_create (proto);
407 struct casereader *r = casereader_clone (cutpoint_rdr);
409 double prev_cp = SYSMIS;
411 for ( ; (cpc = casereader_read (r) ); case_unref (cpc))
413 struct ccase *new_case;
414 const double cp = case_data_idx (cpc, ROC_CUTPOINT)->f;
416 assert (cp != SYSMIS);
418 /* We don't want duplicates here */
422 new_case = case_clone (cpc);
424 if ( pos_cond (result, cp))
425 case_data_rw_idx (new_case, true_index)->f += weight;
427 case_data_rw_idx (new_case, false_index)->f += weight;
431 casewriter_write (w, new_case);
433 casereader_destroy (r);
435 return casewriter_make_reader (w);
440 static void output_roc (struct roc_state *rs, const struct cmd_roc *roc);
443 This function does 3 things:
445 1. Counts the number of cases which are equal to every other case in READER,
446 and those cases for which the relationship between it and every other case
447 satifies PRED (normally either > or <). VAR is variable defining a case's value
450 2. Counts the number of true and false cases in reader, and populates
451 CUTPOINT_RDR accordingly. TRUE_INDEX and FALSE_INDEX are the indices
452 which receive these values. POS_COND is the condition defining true
455 3. CC is filled with the cumulative weight of all cases of READER.
457 static struct casereader *
458 process_group (const struct variable *var, struct casereader *reader,
459 bool (*pred) (double, double),
460 const struct dictionary *dict,
462 struct casereader **cutpoint_rdr,
463 bool (*pos_cond) (double, double),
467 const struct variable *w = dict_get_weight (dict);
469 struct casereader *r1 =
470 casereader_create_distinct (sort_execute_1var (reader, var), var, w);
472 const int weight_idx = w ? var_get_case_index (w) :
473 caseproto_get_n_widths (casereader_get_proto (r1)) - 1;
477 struct casereader *rclone = casereader_clone (r1);
478 struct casewriter *wtr;
479 struct caseproto *proto = caseproto_create ();
481 proto = caseproto_add_width (proto, 0);
482 proto = caseproto_add_width (proto, 0);
483 proto = caseproto_add_width (proto, 0);
485 wtr = autopaging_writer_create (proto);
489 for ( ; (c1 = casereader_read (r1) ); case_unref (c1))
491 struct ccase *new_case = case_create (proto);
493 struct casereader *r2 = casereader_clone (rclone);
495 const double weight1 = case_data_idx (c1, weight_idx)->f;
496 const double d1 = case_data (c1, var)->f;
500 *cutpoint_rdr = accumulate_counts (*cutpoint_rdr, d1, weight1,
502 true_index, false_index);
506 for ( ; (c2 = casereader_read (r2) ); case_unref (c2))
508 const double d2 = case_data (c2, var)->f;
509 const double weight2 = case_data_idx (c2, weight_idx)->f;
516 else if ( pred (d2, d1))
522 case_data_rw_idx (new_case, VALUE)->f = d1;
523 case_data_rw_idx (new_case, N_EQ)->f = n_eq;
524 case_data_rw_idx (new_case, N_PRED)->f = n_pred;
526 casewriter_write (wtr, new_case);
528 casereader_destroy (r2);
531 casereader_destroy (r1);
532 casereader_destroy (rclone);
534 return casewriter_make_reader (wtr);
537 /* Some more indeces into case data */
538 #define N_POS_EQ 1 /* number of positive cases with values equal to n */
539 #define N_POS_GT 2 /* number of postive cases with values greater than n */
540 #define N_NEG_EQ 3 /* number of negative cases with values equal to n */
541 #define N_NEG_LT 4 /* number of negative cases with values less than n */
544 gt (double d1, double d2)
551 ge (double d1, double d2)
557 lt (double d1, double d2)
564 Return a casereader with width 3,
565 populated with cases based upon READER.
566 The cases will have the values:
567 (N, number of cases equal to N, number of cases greater than N)
568 As a side effect, update RS->n1 with the number of positive cases.
570 static struct casereader *
571 process_positive_group (const struct variable *var, struct casereader *reader,
572 const struct dictionary *dict,
573 struct roc_state *rs)
575 return process_group (var, reader, gt, dict, &rs->n1,
582 Return a casereader with width 3,
583 populated with cases based upon READER.
584 The cases will have the values:
585 (N, number of cases equal to N, number of cases less than N)
586 As a side effect, update RS->n2 with the number of negative cases.
588 static struct casereader *
589 process_negative_group (const struct variable *var, struct casereader *reader,
590 const struct dictionary *dict,
591 struct roc_state *rs)
593 return process_group (var, reader, lt, dict, &rs->n2,
603 append_cutpoint (struct casewriter *writer, double cutpoint)
605 struct ccase *cc = case_create (casewriter_get_proto (writer));
607 case_data_rw_idx (cc, ROC_CUTPOINT)->f = cutpoint;
608 case_data_rw_idx (cc, ROC_TP)->f = 0;
609 case_data_rw_idx (cc, ROC_FN)->f = 0;
610 case_data_rw_idx (cc, ROC_TN)->f = 0;
611 case_data_rw_idx (cc, ROC_FP)->f = 0;
613 casewriter_write (writer, cc);
618 Create and initialise the rs[x].cutpoint_rdr casereaders. That is, the readers will
619 be created with width 5, ready to take the values (cutpoint, ROC_TP, ROC_FN, ROC_TN, ROC_FP), and the
620 reader will be populated with its final number of cases.
621 However on exit from this function, only ROC_CUTPOINT entries will be set to their final
622 value. The other entries will be initialised to zero.
625 prepare_cutpoints (struct cmd_roc *roc, struct roc_state *rs, struct casereader *input)
628 struct casereader *r = casereader_clone (input);
630 struct caseproto *proto = caseproto_create ();
632 struct subcase ordering;
633 subcase_init (&ordering, ROC_CUTPOINT, 0, SC_ASCEND);
635 proto = caseproto_add_width (proto, 0); /* cutpoint */
636 proto = caseproto_add_width (proto, 0); /* ROC_TP */
637 proto = caseproto_add_width (proto, 0); /* ROC_FN */
638 proto = caseproto_add_width (proto, 0); /* ROC_TN */
639 proto = caseproto_add_width (proto, 0); /* ROC_FP */
641 for (i = 0 ; i < roc->n_vars; ++i)
643 rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto);
644 rs[i].prev_result = SYSMIS;
645 rs[i].max = -DBL_MAX;
649 for (; (c = casereader_read (r)) != NULL; case_unref (c))
651 for (i = 0 ; i < roc->n_vars; ++i)
653 const union value *v = case_data (c, roc->vars[i]);
654 const double result = v->f;
656 if ( mv_is_value_missing (var_get_missing_values (roc->vars[i]), v, roc->exclude))
659 minimize (&rs[i].min, result);
660 maximize (&rs[i].max, result);
662 if ( rs[i].prev_result != SYSMIS && rs[i].prev_result != result )
664 const double mean = (result + rs[i].prev_result ) / 2.0;
665 append_cutpoint (rs[i].cutpoint_wtr, mean);
668 rs[i].prev_result = result;
671 casereader_destroy (r);
674 /* Append the min and max cutpoints */
675 for (i = 0 ; i < roc->n_vars; ++i)
677 append_cutpoint (rs[i].cutpoint_wtr, rs[i].min - 1);
678 append_cutpoint (rs[i].cutpoint_wtr, rs[i].max + 1);
680 rs[i].cutpoint_rdr = casewriter_make_reader (rs[i].cutpoint_wtr);
685 do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict)
689 struct roc_state *rs = xcalloc (roc->n_vars, sizeof *rs);
691 struct casereader *negatives = NULL;
692 struct casereader *positives = NULL;
694 struct caseproto *n_proto = caseproto_create ();
696 struct subcase up_ordering;
697 struct subcase down_ordering;
699 struct casewriter *neg_wtr = NULL;
701 struct casereader *input = casereader_create_filter_missing (reader,
702 roc->vars, roc->n_vars,
707 input = casereader_create_filter_missing (input,
713 neg_wtr = autopaging_writer_create (casereader_get_proto (input));
715 prepare_cutpoints (roc, rs, input);
718 /* Separate the positive actual state cases from the negative ones */
720 casereader_create_filter_func (input,
726 n_proto = caseproto_create ();
728 n_proto = caseproto_add_width (n_proto, 0);
729 n_proto = caseproto_add_width (n_proto, 0);
730 n_proto = caseproto_add_width (n_proto, 0);
731 n_proto = caseproto_add_width (n_proto, 0);
732 n_proto = caseproto_add_width (n_proto, 0);
734 subcase_init (&up_ordering, VALUE, 0, SC_ASCEND);
735 subcase_init (&down_ordering, VALUE, 0, SC_DESCEND);
737 for (i = 0 ; i < roc->n_vars; ++i)
739 struct casewriter *w = NULL;
740 struct casereader *r = NULL;
745 struct casereader *n_neg ;
746 const struct variable *var = roc->vars[i];
748 struct casereader *neg ;
749 struct casereader *pos = casereader_clone (positives);
752 struct casereader *n_pos =
753 process_positive_group (var, pos, dict, &rs[i]);
755 if ( negatives == NULL)
757 negatives = casewriter_make_reader (neg_wtr);
760 neg = casereader_clone (negatives);
762 n_neg = process_negative_group (var, neg, dict, &rs[i]);
765 /* Merge the n_pos and n_neg casereaders */
766 w = sort_create_writer (&up_ordering, n_proto);
767 for ( ; (cpos = casereader_read (n_pos) ); case_unref (cpos))
769 struct ccase *pos_case = case_create (n_proto);
771 const double jpos = case_data_idx (cpos, VALUE)->f;
773 while ((cneg = casereader_read (n_neg)))
775 struct ccase *nc = case_create (n_proto);
777 const double jneg = case_data_idx (cneg, VALUE)->f;
779 case_data_rw_idx (nc, VALUE)->f = jneg;
780 case_data_rw_idx (nc, N_POS_EQ)->f = 0;
782 case_data_rw_idx (nc, N_POS_GT)->f = SYSMIS;
784 *case_data_rw_idx (nc, N_NEG_EQ) = *case_data_idx (cneg, N_EQ);
785 *case_data_rw_idx (nc, N_NEG_LT) = *case_data_idx (cneg, N_PRED);
787 casewriter_write (w, nc);
794 case_data_rw_idx (pos_case, VALUE)->f = jpos;
795 *case_data_rw_idx (pos_case, N_POS_EQ) = *case_data_idx (cpos, N_EQ);
796 *case_data_rw_idx (pos_case, N_POS_GT) = *case_data_idx (cpos, N_PRED);
797 case_data_rw_idx (pos_case, N_NEG_EQ)->f = 0;
798 case_data_rw_idx (pos_case, N_NEG_LT)->f = SYSMIS;
800 casewriter_write (w, pos_case);
803 /* These aren't used anymore */
807 r = casewriter_make_reader (w);
809 /* Propagate the N_POS_GT values from the positive cases
810 to the negative ones */
812 double prev_pos_gt = rs[i].n1;
813 w = sort_create_writer (&down_ordering, n_proto);
815 for ( ; (c = casereader_read (r) ); case_unref (c))
817 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
818 struct ccase *nc = case_clone (c);
820 if ( n_pos_gt == SYSMIS)
822 n_pos_gt = prev_pos_gt;
823 case_data_rw_idx (nc, N_POS_GT)->f = n_pos_gt;
826 casewriter_write (w, nc);
827 prev_pos_gt = n_pos_gt;
830 r = casewriter_make_reader (w);
833 /* Propagate the N_NEG_LT values from the negative cases
834 to the positive ones */
836 double prev_neg_lt = rs[i].n2;
837 w = sort_create_writer (&up_ordering, n_proto);
839 for ( ; (c = casereader_read (r) ); case_unref (c))
841 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
842 struct ccase *nc = case_clone (c);
844 if ( n_neg_lt == SYSMIS)
846 n_neg_lt = prev_neg_lt;
847 case_data_rw_idx (nc, N_NEG_LT)->f = n_neg_lt;
850 casewriter_write (w, nc);
851 prev_neg_lt = n_neg_lt;
854 r = casewriter_make_reader (w);
858 struct ccase *prev_case = NULL;
859 for ( ; (c = casereader_read (r) ); case_unref (c))
861 const struct ccase *next_case = casereader_peek (r, 0);
863 const double j = case_data_idx (c, VALUE)->f;
864 double n_pos_eq = case_data_idx (c, N_POS_EQ)->f;
865 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
866 double n_neg_eq = case_data_idx (c, N_NEG_EQ)->f;
867 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
869 if ( prev_case && j == case_data_idx (prev_case, VALUE)->f)
871 if ( 0 == case_data_idx (c, N_POS_EQ)->f)
873 n_pos_eq = case_data_idx (prev_case, N_POS_EQ)->f;
874 n_pos_gt = case_data_idx (prev_case, N_POS_GT)->f;
877 if ( 0 == case_data_idx (c, N_NEG_EQ)->f)
879 n_neg_eq = case_data_idx (prev_case, N_NEG_EQ)->f;
880 n_neg_lt = case_data_idx (prev_case, N_NEG_LT)->f;
884 if ( NULL == next_case || j != case_data_idx (next_case, VALUE)->f)
886 rs[i].auc += n_pos_gt * n_neg_eq + (n_pos_eq * n_neg_eq) / 2.0;
889 n_neg_eq * ( pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0);
891 n_pos_eq * ( pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0);
895 case_unref (prev_case);
896 prev_case = case_clone (c);
899 rs[i].auc /= rs[i].n1 * rs[i].n2;
901 rs[i].auc = 1 - rs[i].auc;
903 if ( roc->bi_neg_exp )
905 rs[i].q1hat = rs[i].auc / ( 2 - rs[i].auc);
906 rs[i].q2hat = 2 * pow2 (rs[i].auc) / ( 1 + rs[i].auc);
910 rs[i].q1hat /= rs[i].n2 * pow2 (rs[i].n1);
911 rs[i].q2hat /= rs[i].n1 * pow2 (rs[i].n2);
916 casereader_destroy (positives);
917 casereader_destroy (negatives);
919 output_roc (rs, roc);
925 show_auc (struct roc_state *rs, const struct cmd_roc *roc)
928 const int n_fields = roc->print_se ? 5 : 1;
929 const int n_cols = roc->n_vars > 1 ? n_fields + 1: n_fields;
930 const int n_rows = 2 + roc->n_vars;
931 struct tab_table *tbl = tab_create (n_cols, n_rows);
933 if ( roc->n_vars > 1)
934 tab_title (tbl, _("Area Under the Curve"));
936 tab_title (tbl, _("Area Under the Curve (%s)"), var_to_string (roc->vars[0]));
938 tab_headers (tbl, n_cols - n_fields, 0, 1, 0);
941 tab_text (tbl, n_cols - n_fields, 1, TAT_TITLE, _("Area"));
943 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
954 tab_text (tbl, n_cols - 4, 1, TAT_TITLE, _("Std. Error"));
955 tab_text (tbl, n_cols - 3, 1, TAT_TITLE, _("Asymptotic Sig."));
957 tab_text (tbl, n_cols - 2, 1, TAT_TITLE, _("Lower Bound"));
958 tab_text (tbl, n_cols - 1, 1, TAT_TITLE, _("Upper Bound"));
960 tab_joint_text_format (tbl, n_cols - 2, 0, 4, 0,
961 TAT_TITLE | TAB_CENTER,
962 _("Asymp. %g%% Confidence Interval"), roc->ci);
963 tab_vline (tbl, 0, n_cols - 1, 0, 0);
964 tab_hline (tbl, TAL_1, n_cols - 2, n_cols - 1, 1);
967 if ( roc->n_vars > 1)
968 tab_text (tbl, 0, 1, TAT_TITLE, _("Variable under test"));
970 if ( roc->n_vars > 1)
971 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
974 for ( i = 0 ; i < roc->n_vars ; ++i )
976 tab_text (tbl, 0, 2 + i, TAT_TITLE, var_to_string (roc->vars[i]));
978 tab_double (tbl, n_cols - n_fields, 2 + i, 0, rs[i].auc, NULL);
983 const double sd_0_5 = sqrt ((rs[i].n1 + rs[i].n2 + 1) /
984 (12 * rs[i].n1 * rs[i].n2));
988 se = rs[i].auc * (1 - rs[i].auc) + (rs[i].n1 - 1) * (rs[i].q1hat - pow2 (rs[i].auc)) +
989 (rs[i].n2 - 1) * (rs[i].q2hat - pow2 (rs[i].auc));
991 se /= rs[i].n1 * rs[i].n2;
995 tab_double (tbl, n_cols - 4, 2 + i, 0,
999 ci = 1 - roc->ci / 100.0;
1000 yy = gsl_cdf_gaussian_Qinv (ci, se) ;
1002 tab_double (tbl, n_cols - 2, 2 + i, 0,
1006 tab_double (tbl, n_cols - 1, 2 + i, 0,
1010 tab_double (tbl, n_cols - 3, 2 + i, 0,
1011 2.0 * gsl_cdf_ugaussian_Q (fabs ((rs[i].auc - 0.5 ) / sd_0_5)),
1021 show_summary (const struct cmd_roc *roc)
1023 const int n_cols = 3;
1024 const int n_rows = 4;
1025 struct tab_table *tbl = tab_create (n_cols, n_rows);
1027 tab_title (tbl, _("Case Summary"));
1029 tab_headers (tbl, 1, 0, 2, 0);
1038 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
1039 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1042 tab_hline (tbl, TAL_2, 1, n_cols - 1, 1);
1043 tab_vline (tbl, TAL_1, 2, 1, n_rows - 1);
1046 tab_text (tbl, 0, 1, TAT_TITLE | TAB_LEFT, var_to_string (roc->state_var));
1047 tab_text (tbl, 1, 1, TAT_TITLE, _("Unweighted"));
1048 tab_text (tbl, 2, 1, TAT_TITLE, _("Weighted"));
1050 tab_joint_text (tbl, 1, 0, 2, 0,
1051 TAT_TITLE | TAB_CENTER,
1052 _("Valid N (listwise)"));
1055 tab_text (tbl, 0, 2, TAB_LEFT, _("Positive"));
1056 tab_text (tbl, 0, 3, TAB_LEFT, _("Negative"));
1059 tab_double (tbl, 1, 2, 0, roc->pos, &F_8_0);
1060 tab_double (tbl, 1, 3, 0, roc->neg, &F_8_0);
1062 tab_double (tbl, 2, 2, 0, roc->pos_weighted, 0);
1063 tab_double (tbl, 2, 3, 0, roc->neg_weighted, 0);
1070 show_coords (struct roc_state *rs, const struct cmd_roc *roc)
1074 const int n_cols = roc->n_vars > 1 ? 4 : 3;
1076 struct tab_table *tbl ;
1078 for (i = 0; i < roc->n_vars; ++i)
1079 n_rows += casereader_count_cases (rs[i].cutpoint_rdr);
1081 tbl = tab_create (n_cols, n_rows);
1083 if ( roc->n_vars > 1)
1084 tab_title (tbl, _("Coordinates of the Curve"));
1086 tab_title (tbl, _("Coordinates of the Curve (%s)"), var_to_string (roc->vars[0]));
1089 tab_headers (tbl, 1, 0, 1, 0);
1091 tab_hline (tbl, TAL_2, 0, n_cols - 1, 1);
1093 if ( roc->n_vars > 1)
1094 tab_text (tbl, 0, 0, TAT_TITLE, _("Test variable"));
1096 tab_text (tbl, n_cols - 3, 0, TAT_TITLE, _("Positive if greater than or equal to"));
1097 tab_text (tbl, n_cols - 2, 0, TAT_TITLE, _("Sensitivity"));
1098 tab_text (tbl, n_cols - 1, 0, TAT_TITLE, _("1 - Specificity"));
1107 if ( roc->n_vars > 1)
1108 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1110 for (i = 0; i < roc->n_vars; ++i)
1113 struct casereader *r = casereader_clone (rs[i].cutpoint_rdr);
1115 if ( roc->n_vars > 1)
1116 tab_text (tbl, 0, x, TAT_TITLE, var_to_string (roc->vars[i]));
1119 tab_hline (tbl, TAL_1, 0, n_cols - 1, x);
1122 for (; (cc = casereader_read (r)) != NULL;
1123 case_unref (cc), x++)
1125 const double se = case_data_idx (cc, ROC_TP)->f /
1127 case_data_idx (cc, ROC_TP)->f
1129 case_data_idx (cc, ROC_FN)->f
1132 const double sp = case_data_idx (cc, ROC_TN)->f /
1134 case_data_idx (cc, ROC_TN)->f
1136 case_data_idx (cc, ROC_FP)->f
1139 tab_double (tbl, n_cols - 3, x, 0, case_data_idx (cc, ROC_CUTPOINT)->f,
1140 var_get_print_format (roc->vars[i]));
1142 tab_double (tbl, n_cols - 2, x, 0, se, NULL);
1143 tab_double (tbl, n_cols - 1, x, 0, 1 - sp, NULL);
1146 casereader_destroy (r);
1154 output_roc (struct roc_state *rs, const struct cmd_roc *roc)
1160 struct roc_chart *rc;
1163 rc = roc_chart_create (roc->reference);
1164 for (i = 0; i < roc->n_vars; i++)
1165 roc_chart_add_var (rc, var_get_name (roc->vars[i]),
1166 rs[i].cutpoint_rdr);
1167 roc_chart_submit (rc);
1172 if ( roc->print_coords )
1173 show_coords (rs, roc);