1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/procedure.h>
20 #include <language/lexer/variable-parser.h>
21 #include <language/lexer/value-parser.h>
22 #include <language/command.h>
23 #include <language/lexer/lexer.h>
25 #include <data/casegrouper.h>
26 #include <data/casereader.h>
27 #include <data/casewriter.h>
28 #include <data/dictionary.h>
29 #include <data/format.h>
30 #include <math/sort.h>
31 #include <data/subcase.h>
34 #include <libpspp/misc.h>
36 #include <gsl/gsl_cdf.h>
37 #include <output/table.h>
39 #include <output/charts/plot-chart.h>
40 #include <output/charts/cartesian.h>
43 #define _(msgid) gettext (msgid)
44 #define N_(msgid) msgid
49 const struct variable **vars;
50 const struct dictionary *dict;
52 const struct variable *state_var ;
53 union value state_value;
55 /* Plot the roc curve */
57 /* Plot the reference line */
64 bool bi_neg_exp; /* True iff the bi-negative exponential critieria
66 enum mv_class exclude;
68 bool invert ; /* True iff a smaller test result variable indicates
77 static int run_roc (struct dataset *ds, struct cmd_roc *roc);
80 cmd_roc (struct lexer *lexer, struct dataset *ds)
83 const struct dictionary *dict = dataset_dict (ds);
88 roc.print_coords = false;
91 roc.reference = false;
93 roc.bi_neg_exp = false;
95 roc.pos = roc.pos_weighted = 0;
96 roc.neg = roc.neg_weighted = 0;
97 roc.dict = dataset_dict (ds);
99 if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars,
100 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
103 if ( ! lex_force_match (lexer, T_BY))
108 roc.state_var = parse_variable (lexer, dict);
110 if ( !lex_force_match (lexer, '('))
115 parse_value (lexer, &roc.state_value, var_get_width (roc.state_var));
118 if ( !lex_force_match (lexer, ')'))
124 while (lex_token (lexer) != '.')
126 lex_match (lexer, '/');
127 if (lex_match_id (lexer, "MISSING"))
129 lex_match (lexer, '=');
130 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
132 if (lex_match_id (lexer, "INCLUDE"))
134 roc.exclude = MV_SYSTEM;
136 else if (lex_match_id (lexer, "EXCLUDE"))
138 roc.exclude = MV_ANY;
142 lex_error (lexer, NULL);
147 else if (lex_match_id (lexer, "PLOT"))
149 lex_match (lexer, '=');
150 if (lex_match_id (lexer, "CURVE"))
153 if (lex_match (lexer, '('))
155 roc.reference = true;
156 lex_force_match_id (lexer, "REFERENCE");
157 lex_force_match (lexer, ')');
160 else if (lex_match_id (lexer, "NONE"))
166 lex_error (lexer, NULL);
170 else if (lex_match_id (lexer, "PRINT"))
172 lex_match (lexer, '=');
173 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
175 if (lex_match_id (lexer, "SE"))
179 else if (lex_match_id (lexer, "COORDINATES"))
181 roc.print_coords = true;
185 lex_error (lexer, NULL);
190 else if (lex_match_id (lexer, "CRITERIA"))
192 lex_match (lexer, '=');
193 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
195 if (lex_match_id (lexer, "CUTOFF"))
197 lex_force_match (lexer, '(');
198 if (lex_match_id (lexer, "INCLUDE"))
200 roc.exclude = MV_SYSTEM;
202 else if (lex_match_id (lexer, "EXCLUDE"))
204 roc.exclude = MV_USER | MV_SYSTEM;
208 lex_error (lexer, NULL);
211 lex_force_match (lexer, ')');
213 else if (lex_match_id (lexer, "TESTPOS"))
215 lex_force_match (lexer, '(');
216 if (lex_match_id (lexer, "LARGE"))
220 else if (lex_match_id (lexer, "SMALL"))
226 lex_error (lexer, NULL);
229 lex_force_match (lexer, ')');
231 else if (lex_match_id (lexer, "CI"))
233 lex_force_match (lexer, '(');
234 lex_force_num (lexer);
235 roc.ci = lex_number (lexer);
237 lex_force_match (lexer, ')');
239 else if (lex_match_id (lexer, "DISTRIBUTION"))
241 lex_force_match (lexer, '(');
242 if (lex_match_id (lexer, "FREE"))
244 roc.bi_neg_exp = false;
246 else if (lex_match_id (lexer, "NEGEXPO"))
248 roc.bi_neg_exp = true;
252 lex_error (lexer, NULL);
255 lex_force_match (lexer, ')');
259 lex_error (lexer, NULL);
266 lex_error (lexer, NULL);
271 if ( ! run_roc (ds, &roc))
286 do_roc (struct cmd_roc *roc, struct casereader *group, struct dictionary *dict);
290 run_roc (struct dataset *ds, struct cmd_roc *roc)
292 struct dictionary *dict = dataset_dict (ds);
294 struct casereader *group;
296 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
297 while (casegrouper_get_next_group (grouper, &group))
299 do_roc (roc, group, dataset_dict (ds));
301 ok = casegrouper_destroy (grouper);
302 ok = proc_commit (ds) && ok;
309 dump_casereader (struct casereader *reader)
312 struct casereader *r = casereader_clone (reader);
314 for ( ; (c = casereader_read (r) ); case_unref (c))
317 for (i = 0 ; i < case_get_value_cnt (c); ++i)
319 printf ("%g ", case_data_idx (c, i)->f);
324 casereader_destroy (r);
330 Return true iff the state variable indicates that C has positive actual state.
332 As a side effect, this function also accumulates the roc->{pos,neg} and
333 roc->{pos,neg}_weighted counts.
336 match_positives (const struct ccase *c, void *aux)
338 struct cmd_roc *roc = aux;
339 const struct variable *wv = dict_get_weight (roc->dict);
340 const double weight = wv ? case_data (c, wv)->f : 1.0;
342 const bool positive =
343 ( 0 == value_compare_3way (case_data (c, roc->state_var), &roc->state_value,
344 var_get_width (roc->state_var)));
349 roc->pos_weighted += weight;
354 roc->neg_weighted += weight;
365 /* Some intermediate state for calculating the cutpoints and the
366 standard error values */
369 double auc; /* Area under the curve */
371 double n1; /* total weight of positives */
372 double n2; /* total weight of negatives */
374 /* intermediates for standard error */
378 /* intermediates for cutpoints */
379 struct casewriter *cutpoint_wtr;
380 struct casereader *cutpoint_rdr;
394 Return a new casereader based upon CUTPOINT_RDR.
395 The number of "positive" cases are placed into
396 the position TRUE_INDEX, and the number of "negative" cases
398 POS_COND and RESULT determine the semantics of what is
400 WEIGHT is the value of a single count.
402 static struct casereader *
403 accumulate_counts (struct casereader *cutpoint_rdr,
404 double result, double weight,
405 bool (*pos_cond) (double, double),
406 int true_index, int false_index)
408 const struct caseproto *proto = casereader_get_proto (cutpoint_rdr);
409 struct casewriter *w =
410 autopaging_writer_create (proto);
411 struct casereader *r = casereader_clone (cutpoint_rdr);
413 double prev_cp = SYSMIS;
415 for ( ; (cpc = casereader_read (r) ); case_unref (cpc))
417 struct ccase *new_case;
418 const double cp = case_data_idx (cpc, CUTPOINT)->f;
420 assert (cp != SYSMIS);
422 /* We don't want duplicates here */
426 new_case = case_clone (cpc);
428 if ( pos_cond (result, cp))
429 case_data_rw_idx (new_case, true_index)->f += weight;
431 case_data_rw_idx (new_case, false_index)->f += weight;
435 casewriter_write (w, new_case);
437 casereader_destroy (r);
439 return casewriter_make_reader (w);
444 static void output_roc (struct roc_state *rs, const struct cmd_roc *roc);
447 This function does 3 things:
449 1. Counts the number of cases which are equal to every other case in READER,
450 and those cases for which the relationship between it and every other case
451 satifies PRED (normally either > or <). VAR is variable defining a case's value
454 2. Counts the number of true and false cases in reader, and populates
455 CUTPOINT_RDR accordingly. TRUE_INDEX and FALSE_INDEX are the indices
456 which receive these values. POS_COND is the condition defining true
459 3. CC is filled with the cumulative weight of all cases of READER.
461 static struct casereader *
462 process_group (const struct variable *var, struct casereader *reader,
463 bool (*pred) (double, double),
464 const struct dictionary *dict,
466 struct casereader **cutpoint_rdr,
467 bool (*pos_cond) (double, double),
471 const struct variable *w = dict_get_weight (dict);
473 struct casereader *r1 =
474 casereader_create_distinct (sort_execute_1var (reader, var), var, w);
476 const int weight_idx = w ? var_get_case_index (w) :
477 caseproto_get_n_widths (casereader_get_proto (r1)) - 1;
481 struct casereader *rclone = casereader_clone (r1);
482 struct casewriter *wtr;
483 struct caseproto *proto = caseproto_create ();
485 proto = caseproto_add_width (proto, 0);
486 proto = caseproto_add_width (proto, 0);
487 proto = caseproto_add_width (proto, 0);
489 wtr = autopaging_writer_create (proto);
493 for ( ; (c1 = casereader_read (r1) ); case_unref (c1))
495 struct ccase *new_case = case_create (proto);
497 struct casereader *r2 = casereader_clone (rclone);
499 const double weight1 = case_data_idx (c1, weight_idx)->f;
500 const double d1 = case_data (c1, var)->f;
504 *cutpoint_rdr = accumulate_counts (*cutpoint_rdr, d1, weight1,
506 true_index, false_index);
510 for ( ; (c2 = casereader_read (r2) ); case_unref (c2))
512 const double d2 = case_data (c2, var)->f;
513 const double weight2 = case_data_idx (c2, weight_idx)->f;
520 else if ( pred (d2, d1))
526 case_data_rw_idx (new_case, VALUE)->f = d1;
527 case_data_rw_idx (new_case, N_EQ)->f = n_eq;
528 case_data_rw_idx (new_case, N_PRED)->f = n_pred;
530 casewriter_write (wtr, new_case);
532 casereader_destroy (r2);
535 casereader_destroy (r1);
536 casereader_destroy (rclone);
538 return casewriter_make_reader (wtr);
541 /* Some more indeces into case data */
542 #define N_POS_EQ 1 /* number of positive cases with values equal to n */
543 #define N_POS_GT 2 /* number of postive cases with values greater than n */
544 #define N_NEG_EQ 3 /* number of negative cases with values equal to n */
545 #define N_NEG_LT 4 /* number of negative cases with values less than n */
548 gt (double d1, double d2)
555 ge (double d1, double d2)
561 lt (double d1, double d2)
568 Return a casereader with width 3,
569 populated with cases based upon READER.
570 The cases will have the values:
571 (N, number of cases equal to N, number of cases greater than N)
572 As a side effect, update RS->n1 with the number of positive cases.
574 static struct casereader *
575 process_positive_group (const struct variable *var, struct casereader *reader,
576 const struct dictionary *dict,
577 struct roc_state *rs)
579 return process_group (var, reader, gt, dict, &rs->n1,
586 Return a casereader with width 3,
587 populated with cases based upon READER.
588 The cases will have the values:
589 (N, number of cases equal to N, number of cases less than N)
590 As a side effect, update RS->n2 with the number of negative cases.
592 static struct casereader *
593 process_negative_group (const struct variable *var, struct casereader *reader,
594 const struct dictionary *dict,
595 struct roc_state *rs)
597 return process_group (var, reader, lt, dict, &rs->n2,
607 append_cutpoint (struct casewriter *writer, double cutpoint)
609 struct ccase *cc = case_create (casewriter_get_proto (writer));
611 case_data_rw_idx (cc, CUTPOINT)->f = cutpoint;
612 case_data_rw_idx (cc, TP)->f = 0;
613 case_data_rw_idx (cc, FN)->f = 0;
614 case_data_rw_idx (cc, TN)->f = 0;
615 case_data_rw_idx (cc, FP)->f = 0;
617 casewriter_write (writer, cc);
622 Create and initialise the rs[x].cutpoint_rdr casereaders. That is, the readers will
623 be created with width 5, ready to take the values (cutpoint, TP, FN, TN, FP), and the
624 reader will be populated with its final number of cases.
625 However on exit from this function, only CUTPOINT entries will be set to their final
626 value. The other entries will be initialised to zero.
629 prepare_cutpoints (struct cmd_roc *roc, struct roc_state *rs, struct casereader *input)
632 struct casereader *r = casereader_clone (input);
634 struct caseproto *proto = caseproto_create ();
636 struct subcase ordering;
637 subcase_init (&ordering, CUTPOINT, 0, SC_ASCEND);
639 proto = caseproto_add_width (proto, 0); /* cutpoint */
640 proto = caseproto_add_width (proto, 0); /* TP */
641 proto = caseproto_add_width (proto, 0); /* FN */
642 proto = caseproto_add_width (proto, 0); /* TN */
643 proto = caseproto_add_width (proto, 0); /* FP */
645 for (i = 0 ; i < roc->n_vars; ++i)
647 rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto);
648 rs[i].prev_result = SYSMIS;
649 rs[i].max = -DBL_MAX;
653 for (; (c = casereader_read (r)) != NULL; case_unref (c))
655 for (i = 0 ; i < roc->n_vars; ++i)
657 const union value *v = case_data (c, roc->vars[i]);
658 const double result = v->f;
660 if ( mv_is_value_missing (var_get_missing_values (roc->vars[i]), v, roc->exclude))
663 minimize (&rs[i].min, result);
664 maximize (&rs[i].max, result);
666 if ( rs[i].prev_result != SYSMIS && rs[i].prev_result != result )
668 const double mean = (result + rs[i].prev_result ) / 2.0;
669 append_cutpoint (rs[i].cutpoint_wtr, mean);
672 rs[i].prev_result = result;
675 casereader_destroy (r);
678 /* Append the min and max cutpoints */
679 for (i = 0 ; i < roc->n_vars; ++i)
681 append_cutpoint (rs[i].cutpoint_wtr, rs[i].min - 1);
682 append_cutpoint (rs[i].cutpoint_wtr, rs[i].max + 1);
684 rs[i].cutpoint_rdr = casewriter_make_reader (rs[i].cutpoint_wtr);
689 do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict)
693 struct roc_state *rs = xcalloc (roc->n_vars, sizeof *rs);
695 struct casereader *negatives = NULL;
696 struct casereader *positives = NULL;
698 struct caseproto *n_proto = caseproto_create ();
700 struct subcase up_ordering;
701 struct subcase down_ordering;
703 struct casewriter *neg_wtr = NULL;
705 struct casereader *input = casereader_create_filter_missing (reader,
706 roc->vars, roc->n_vars,
711 input = casereader_create_filter_missing (input,
717 neg_wtr = autopaging_writer_create (casereader_get_proto (input));
719 prepare_cutpoints (roc, rs, input);
722 /* Separate the positive actual state cases from the negative ones */
724 casereader_create_filter_func (input,
730 n_proto = caseproto_create ();
732 n_proto = caseproto_add_width (n_proto, 0);
733 n_proto = caseproto_add_width (n_proto, 0);
734 n_proto = caseproto_add_width (n_proto, 0);
735 n_proto = caseproto_add_width (n_proto, 0);
736 n_proto = caseproto_add_width (n_proto, 0);
738 subcase_init (&up_ordering, VALUE, 0, SC_ASCEND);
739 subcase_init (&down_ordering, VALUE, 0, SC_DESCEND);
741 for (i = 0 ; i < roc->n_vars; ++i)
743 struct casewriter *w = NULL;
744 struct casereader *r = NULL;
749 struct casereader *n_neg ;
750 const struct variable *var = roc->vars[i];
752 struct casereader *neg ;
753 struct casereader *pos = casereader_clone (positives);
756 struct casereader *n_pos =
757 process_positive_group (var, pos, dict, &rs[i]);
759 if ( negatives == NULL)
761 negatives = casewriter_make_reader (neg_wtr);
764 neg = casereader_clone (negatives);
766 n_neg = process_negative_group (var, neg, dict, &rs[i]);
769 /* Merge the n_pos and n_neg casereaders */
770 w = sort_create_writer (&up_ordering, n_proto);
771 for ( ; (cpos = casereader_read (n_pos) ); case_unref (cpos))
773 struct ccase *pos_case = case_create (n_proto);
775 const double jpos = case_data_idx (cpos, VALUE)->f;
777 while ((cneg = casereader_read (n_neg)))
779 struct ccase *nc = case_create (n_proto);
781 const double jneg = case_data_idx (cneg, VALUE)->f;
783 case_data_rw_idx (nc, VALUE)->f = jneg;
784 case_data_rw_idx (nc, N_POS_EQ)->f = 0;
786 case_data_rw_idx (nc, N_POS_GT)->f = SYSMIS;
788 *case_data_rw_idx (nc, N_NEG_EQ) = *case_data_idx (cneg, N_EQ);
789 *case_data_rw_idx (nc, N_NEG_LT) = *case_data_idx (cneg, N_PRED);
791 casewriter_write (w, nc);
798 case_data_rw_idx (pos_case, VALUE)->f = jpos;
799 *case_data_rw_idx (pos_case, N_POS_EQ) = *case_data_idx (cpos, N_EQ);
800 *case_data_rw_idx (pos_case, N_POS_GT) = *case_data_idx (cpos, N_PRED);
801 case_data_rw_idx (pos_case, N_NEG_EQ)->f = 0;
802 case_data_rw_idx (pos_case, N_NEG_LT)->f = SYSMIS;
804 casewriter_write (w, pos_case);
807 /* These aren't used anymore */
811 r = casewriter_make_reader (w);
813 /* Propagate the N_POS_GT values from the positive cases
814 to the negative ones */
816 double prev_pos_gt = rs[i].n1;
817 w = sort_create_writer (&down_ordering, n_proto);
819 for ( ; (c = casereader_read (r) ); case_unref (c))
821 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
822 struct ccase *nc = case_clone (c);
824 if ( n_pos_gt == SYSMIS)
826 n_pos_gt = prev_pos_gt;
827 case_data_rw_idx (nc, N_POS_GT)->f = n_pos_gt;
830 casewriter_write (w, nc);
831 prev_pos_gt = n_pos_gt;
834 r = casewriter_make_reader (w);
837 /* Propagate the N_NEG_LT values from the negative cases
838 to the positive ones */
840 double prev_neg_lt = rs[i].n2;
841 w = sort_create_writer (&up_ordering, n_proto);
843 for ( ; (c = casereader_read (r) ); case_unref (c))
845 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
846 struct ccase *nc = case_clone (c);
848 if ( n_neg_lt == SYSMIS)
850 n_neg_lt = prev_neg_lt;
851 case_data_rw_idx (nc, N_NEG_LT)->f = n_neg_lt;
854 casewriter_write (w, nc);
855 prev_neg_lt = n_neg_lt;
858 r = casewriter_make_reader (w);
862 struct ccase *prev_case = NULL;
863 for ( ; (c = casereader_read (r) ); case_unref (c))
865 const struct ccase *next_case = casereader_peek (r, 0);
867 const double j = case_data_idx (c, VALUE)->f;
868 double n_pos_eq = case_data_idx (c, N_POS_EQ)->f;
869 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
870 double n_neg_eq = case_data_idx (c, N_NEG_EQ)->f;
871 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
873 if ( prev_case && j == case_data_idx (prev_case, VALUE)->f)
875 if ( 0 == case_data_idx (c, N_POS_EQ)->f)
877 n_pos_eq = case_data_idx (prev_case, N_POS_EQ)->f;
878 n_pos_gt = case_data_idx (prev_case, N_POS_GT)->f;
881 if ( 0 == case_data_idx (c, N_NEG_EQ)->f)
883 n_neg_eq = case_data_idx (prev_case, N_NEG_EQ)->f;
884 n_neg_lt = case_data_idx (prev_case, N_NEG_LT)->f;
888 if ( NULL == next_case || j != case_data_idx (next_case, VALUE)->f)
890 rs[i].auc += n_pos_gt * n_neg_eq + (n_pos_eq * n_neg_eq) / 2.0;
893 n_neg_eq * ( pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0);
895 n_pos_eq * ( pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0);
899 case_unref (prev_case);
900 prev_case = case_clone (c);
903 rs[i].auc /= rs[i].n1 * rs[i].n2;
905 rs[i].auc = 1 - rs[i].auc;
907 if ( roc->bi_neg_exp )
909 rs[i].q1hat = rs[i].auc / ( 2 - rs[i].auc);
910 rs[i].q2hat = 2 * pow2 (rs[i].auc) / ( 1 + rs[i].auc);
914 rs[i].q1hat /= rs[i].n2 * pow2 (rs[i].n1);
915 rs[i].q2hat /= rs[i].n1 * pow2 (rs[i].n2);
920 casereader_destroy (positives);
921 casereader_destroy (negatives);
923 output_roc (rs, roc);
929 show_auc (struct roc_state *rs, const struct cmd_roc *roc)
932 const int n_fields = roc->print_se ? 5 : 1;
933 const int n_cols = roc->n_vars > 1 ? n_fields + 1: n_fields;
934 const int n_rows = 2 + roc->n_vars;
935 struct tab_table *tbl = tab_create (n_cols, n_rows, 0);
937 if ( roc->n_vars > 1)
938 tab_title (tbl, _("Area Under the Curve"));
940 tab_title (tbl, _("Area Under the Curve (%s)"), var_to_string (roc->vars[0]));
942 tab_headers (tbl, n_cols - n_fields, 0, 1, 0);
944 tab_dim (tbl, tab_natural_dimensions, NULL);
946 tab_text (tbl, n_cols - n_fields, 1, TAT_TITLE, _("Area"));
948 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
959 tab_text (tbl, n_cols - 4, 1, TAT_TITLE, _("Std. Error"));
960 tab_text (tbl, n_cols - 3, 1, TAT_TITLE, _("Asymptotic Sig."));
962 tab_text (tbl, n_cols - 2, 1, TAT_TITLE, _("Lower Bound"));
963 tab_text (tbl, n_cols - 1, 1, TAT_TITLE, _("Upper Bound"));
965 tab_joint_text_format (tbl, n_cols - 2, 0, 4, 0,
966 TAT_TITLE | TAB_CENTER,
967 _("Asymp. %g%% Confidence Interval"), roc->ci);
968 tab_vline (tbl, 0, n_cols - 1, 0, 0);
969 tab_hline (tbl, TAL_1, n_cols - 2, n_cols - 1, 1);
972 if ( roc->n_vars > 1)
973 tab_text (tbl, 0, 1, TAT_TITLE, _("Variable under test"));
975 if ( roc->n_vars > 1)
976 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
979 for ( i = 0 ; i < roc->n_vars ; ++i )
981 tab_text (tbl, 0, 2 + i, TAT_TITLE, var_to_string (roc->vars[i]));
983 tab_double (tbl, n_cols - n_fields, 2 + i, 0, rs[i].auc, NULL);
988 const double sd_0_5 = sqrt ((rs[i].n1 + rs[i].n2 + 1) /
989 (12 * rs[i].n1 * rs[i].n2));
993 se = rs[i].auc * (1 - rs[i].auc) + (rs[i].n1 - 1) * (rs[i].q1hat - pow2 (rs[i].auc)) +
994 (rs[i].n2 - 1) * (rs[i].q2hat - pow2 (rs[i].auc));
996 se /= rs[i].n1 * rs[i].n2;
1000 tab_double (tbl, n_cols - 4, 2 + i, 0,
1004 ci = 1 - roc->ci / 100.0;
1005 yy = gsl_cdf_gaussian_Qinv (ci, se) ;
1007 tab_double (tbl, n_cols - 2, 2 + i, 0,
1011 tab_double (tbl, n_cols - 1, 2 + i, 0,
1015 tab_double (tbl, n_cols - 3, 2 + i, 0,
1016 2.0 * gsl_cdf_ugaussian_Q (fabs ((rs[i].auc - 0.5 ) / sd_0_5)),
1026 show_summary (const struct cmd_roc *roc)
1028 const int n_cols = 3;
1029 const int n_rows = 4;
1030 struct tab_table *tbl = tab_create (n_cols, n_rows, 0);
1032 tab_title (tbl, _("Case Summary"));
1034 tab_headers (tbl, 1, 0, 2, 0);
1036 tab_dim (tbl, tab_natural_dimensions, NULL);
1045 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
1046 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1049 tab_hline (tbl, TAL_2, 1, n_cols - 1, 1);
1050 tab_vline (tbl, TAL_1, 2, 1, n_rows - 1);
1053 tab_text (tbl, 0, 1, TAT_TITLE | TAB_LEFT, var_to_string (roc->state_var));
1054 tab_text (tbl, 1, 1, TAT_TITLE, _("Unweighted"));
1055 tab_text (tbl, 2, 1, TAT_TITLE, _("Weighted"));
1057 tab_joint_text (tbl, 1, 0, 2, 0,
1058 TAT_TITLE | TAB_CENTER,
1059 _("Valid N (listwise)"));
1062 tab_text (tbl, 0, 2, TAB_LEFT, _("Positive"));
1063 tab_text (tbl, 0, 3, TAB_LEFT, _("Negative"));
1066 tab_double (tbl, 1, 2, 0, roc->pos, &F_8_0);
1067 tab_double (tbl, 1, 3, 0, roc->neg, &F_8_0);
1069 tab_double (tbl, 2, 2, 0, roc->pos_weighted, 0);
1070 tab_double (tbl, 2, 3, 0, roc->neg_weighted, 0);
1077 show_coords (struct roc_state *rs, const struct cmd_roc *roc)
1081 const int n_cols = roc->n_vars > 1 ? 4 : 3;
1083 struct tab_table *tbl ;
1085 for (i = 0; i < roc->n_vars; ++i)
1086 n_rows += casereader_count_cases (rs[i].cutpoint_rdr);
1088 tbl = tab_create (n_cols, n_rows, 0);
1090 if ( roc->n_vars > 1)
1091 tab_title (tbl, _("Coordinates of the Curve"));
1093 tab_title (tbl, _("Coordinates of the Curve (%s)"), var_to_string (roc->vars[0]));
1096 tab_headers (tbl, 1, 0, 1, 0);
1098 tab_dim (tbl, tab_natural_dimensions, NULL);
1100 tab_hline (tbl, TAL_2, 0, n_cols - 1, 1);
1102 if ( roc->n_vars > 1)
1103 tab_text (tbl, 0, 0, TAT_TITLE, _("Test variable"));
1105 tab_text (tbl, n_cols - 3, 0, TAT_TITLE, _("Positive if greater than or equal to"));
1106 tab_text (tbl, n_cols - 2, 0, TAT_TITLE, _("Sensitivity"));
1107 tab_text (tbl, n_cols - 1, 0, TAT_TITLE, _("1 - Specificity"));
1116 if ( roc->n_vars > 1)
1117 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1119 for (i = 0; i < roc->n_vars; ++i)
1122 struct casereader *r = casereader_clone (rs[i].cutpoint_rdr);
1124 if ( roc->n_vars > 1)
1125 tab_text (tbl, 0, x, TAT_TITLE, var_to_string (roc->vars[i]));
1128 tab_hline (tbl, TAL_1, 0, n_cols - 1, x);
1131 for (; (cc = casereader_read (r)) != NULL;
1132 case_unref (cc), x++)
1134 const double se = case_data_idx (cc, TP)->f /
1136 case_data_idx (cc, TP)->f
1138 case_data_idx (cc, FN)->f
1141 const double sp = case_data_idx (cc, TN)->f /
1143 case_data_idx (cc, TN)->f
1145 case_data_idx (cc, FP)->f
1148 tab_double (tbl, n_cols - 3, x, 0, case_data_idx (cc, CUTPOINT)->f,
1149 var_get_print_format (roc->vars[i]));
1151 tab_double (tbl, n_cols - 2, x, 0, se, NULL);
1152 tab_double (tbl, n_cols - 1, x, 0, 1 - sp, NULL);
1155 casereader_destroy (r);
1163 draw_roc (struct roc_state *rs, const struct cmd_roc *roc)
1167 struct chart *roc_chart = chart_create ();
1169 chart_write_title (roc_chart, _("ROC Curve"));
1170 chart_write_xlabel (roc_chart, _("1 - Specificity"));
1171 chart_write_ylabel (roc_chart, _("Sensitivity"));
1173 chart_write_xscale (roc_chart, 0, 1, 5);
1174 chart_write_yscale (roc_chart, 0, 1, 5);
1176 if ( roc->reference )
1178 chart_line (roc_chart, 1.0, 0,
1183 for (i = 0; i < roc->n_vars; ++i)
1186 struct casereader *r = casereader_clone (rs[i].cutpoint_rdr);
1188 chart_vector_start (roc_chart, var_get_name (roc->vars[i]));
1189 for (; (cc = casereader_read (r)) != NULL;
1192 double se = case_data_idx (cc, TP)->f;
1193 double sp = case_data_idx (cc, TN)->f;
1195 se /= case_data_idx (cc, FN)->f +
1196 case_data_idx (cc, TP)->f ;
1198 sp /= case_data_idx (cc, TN)->f +
1199 case_data_idx (cc, FP)->f ;
1201 chart_vector (roc_chart, 1 - sp, se);
1203 chart_vector_end (roc_chart);
1204 casereader_destroy (r);
1207 chart_write_legend (roc_chart);
1209 chart_submit (roc_chart);
1214 output_roc (struct roc_state *rs, const struct cmd_roc *roc)
1224 if ( roc->print_coords )
1225 show_coords (rs, roc);