1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/stats/roc.h"
21 #include <gsl/gsl_cdf.h>
23 #include "data/casegrouper.h"
24 #include "data/casereader.h"
25 #include "data/casewriter.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/format.h"
29 #include "data/subcase.h"
30 #include "language/command.h"
31 #include "language/lexer/lexer.h"
32 #include "language/lexer/value-parser.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/misc.h"
35 #include "math/sort.h"
36 #include "output/chart-item.h"
37 #include "output/charts/roc-chart.h"
38 #include "output/tab.h"
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) msgid
47 const struct variable **vars;
48 const struct dictionary *dict;
50 const struct variable *state_var;
51 union value state_value;
53 /* Plot the roc curve */
55 /* Plot the reference line */
62 bool bi_neg_exp; /* True iff the bi-negative exponential critieria
64 enum mv_class exclude;
66 bool invert ; /* True iff a smaller test result variable indicates
75 static int run_roc (struct dataset *ds, struct cmd_roc *roc);
78 cmd_roc (struct lexer *lexer, struct dataset *ds)
81 const struct dictionary *dict = dataset_dict (ds);
86 roc.print_coords = false;
89 roc.reference = false;
91 roc.bi_neg_exp = false;
93 roc.pos = roc.pos_weighted = 0;
94 roc.neg = roc.neg_weighted = 0;
95 roc.dict = dataset_dict (ds);
98 lex_match (lexer, T_SLASH);
99 if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars,
100 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
103 if ( ! lex_force_match (lexer, T_BY))
108 roc.state_var = parse_variable (lexer, dict);
110 if ( !lex_force_match (lexer, T_LPAREN))
115 value_init (&roc.state_value, var_get_width (roc.state_var));
116 parse_value (lexer, &roc.state_value, var_get_width (roc.state_var));
119 if ( !lex_force_match (lexer, T_RPAREN))
125 while (lex_token (lexer) != T_ENDCMD)
127 lex_match (lexer, T_SLASH);
128 if (lex_match_id (lexer, "MISSING"))
130 lex_match (lexer, T_EQUALS);
131 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
133 if (lex_match_id (lexer, "INCLUDE"))
135 roc.exclude = MV_SYSTEM;
137 else if (lex_match_id (lexer, "EXCLUDE"))
139 roc.exclude = MV_ANY;
143 lex_error (lexer, NULL);
148 else if (lex_match_id (lexer, "PLOT"))
150 lex_match (lexer, T_EQUALS);
151 if (lex_match_id (lexer, "CURVE"))
154 if (lex_match (lexer, T_LPAREN))
156 roc.reference = true;
157 lex_force_match_id (lexer, "REFERENCE");
158 lex_force_match (lexer, T_RPAREN);
161 else if (lex_match_id (lexer, "NONE"))
167 lex_error (lexer, NULL);
171 else if (lex_match_id (lexer, "PRINT"))
173 lex_match (lexer, T_EQUALS);
174 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
176 if (lex_match_id (lexer, "SE"))
180 else if (lex_match_id (lexer, "COORDINATES"))
182 roc.print_coords = true;
186 lex_error (lexer, NULL);
191 else if (lex_match_id (lexer, "CRITERIA"))
193 lex_match (lexer, T_EQUALS);
194 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
196 if (lex_match_id (lexer, "CUTOFF"))
198 lex_force_match (lexer, T_LPAREN);
199 if (lex_match_id (lexer, "INCLUDE"))
201 roc.exclude = MV_SYSTEM;
203 else if (lex_match_id (lexer, "EXCLUDE"))
205 roc.exclude = MV_USER | MV_SYSTEM;
209 lex_error (lexer, NULL);
212 lex_force_match (lexer, T_RPAREN);
214 else if (lex_match_id (lexer, "TESTPOS"))
216 lex_force_match (lexer, T_LPAREN);
217 if (lex_match_id (lexer, "LARGE"))
221 else if (lex_match_id (lexer, "SMALL"))
227 lex_error (lexer, NULL);
230 lex_force_match (lexer, T_RPAREN);
232 else if (lex_match_id (lexer, "CI"))
234 lex_force_match (lexer, T_LPAREN);
235 lex_force_num (lexer);
236 roc.ci = lex_number (lexer);
238 lex_force_match (lexer, T_RPAREN);
240 else if (lex_match_id (lexer, "DISTRIBUTION"))
242 lex_force_match (lexer, T_LPAREN);
243 if (lex_match_id (lexer, "FREE"))
245 roc.bi_neg_exp = false;
247 else if (lex_match_id (lexer, "NEGEXPO"))
249 roc.bi_neg_exp = true;
253 lex_error (lexer, NULL);
256 lex_force_match (lexer, T_RPAREN);
260 lex_error (lexer, NULL);
267 lex_error (lexer, NULL);
272 if ( ! run_roc (ds, &roc))
275 value_destroy (&roc.state_value, var_get_width (roc.state_var));
281 value_destroy (&roc.state_value, var_get_width (roc.state_var));
290 do_roc (struct cmd_roc *roc, struct casereader *group, struct dictionary *dict);
294 run_roc (struct dataset *ds, struct cmd_roc *roc)
296 struct dictionary *dict = dataset_dict (ds);
298 struct casereader *group;
300 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
301 while (casegrouper_get_next_group (grouper, &group))
303 do_roc (roc, group, dataset_dict (ds));
305 ok = casegrouper_destroy (grouper);
306 ok = proc_commit (ds) && ok;
313 dump_casereader (struct casereader *reader)
316 struct casereader *r = casereader_clone (reader);
318 for ( ; (c = casereader_read (r) ); case_unref (c))
321 for (i = 0 ; i < case_get_value_cnt (c); ++i)
323 printf ("%g ", case_data_idx (c, i)->f);
328 casereader_destroy (r);
334 Return true iff the state variable indicates that C has positive actual state.
336 As a side effect, this function also accumulates the roc->{pos,neg} and
337 roc->{pos,neg}_weighted counts.
340 match_positives (const struct ccase *c, void *aux)
342 struct cmd_roc *roc = aux;
343 const struct variable *wv = dict_get_weight (roc->dict);
344 const double weight = wv ? case_data (c, wv)->f : 1.0;
346 const bool positive =
347 ( 0 == value_compare_3way (case_data (c, roc->state_var), &roc->state_value,
348 var_get_width (roc->state_var)));
353 roc->pos_weighted += weight;
358 roc->neg_weighted += weight;
369 /* Some intermediate state for calculating the cutpoints and the
370 standard error values */
373 double auc; /* Area under the curve */
375 double n1; /* total weight of positives */
376 double n2; /* total weight of negatives */
378 /* intermediates for standard error */
382 /* intermediates for cutpoints */
383 struct casewriter *cutpoint_wtr;
384 struct casereader *cutpoint_rdr;
391 Return a new casereader based upon CUTPOINT_RDR.
392 The number of "positive" cases are placed into
393 the position TRUE_INDEX, and the number of "negative" cases
395 POS_COND and RESULT determine the semantics of what is
397 WEIGHT is the value of a single count.
399 static struct casereader *
400 accumulate_counts (struct casereader *input,
401 double result, double weight,
402 bool (*pos_cond) (double, double),
403 int true_index, int false_index)
405 const struct caseproto *proto = casereader_get_proto (input);
406 struct casewriter *w =
407 autopaging_writer_create (proto);
409 double prev_cp = SYSMIS;
411 for ( ; (cpc = casereader_read (input) ); case_unref (cpc))
413 struct ccase *new_case;
414 const double cp = case_data_idx (cpc, ROC_CUTPOINT)->f;
416 assert (cp != SYSMIS);
418 /* We don't want duplicates here */
422 new_case = case_clone (cpc);
424 if ( pos_cond (result, cp))
425 case_data_rw_idx (new_case, true_index)->f += weight;
427 case_data_rw_idx (new_case, false_index)->f += weight;
431 casewriter_write (w, new_case);
433 casereader_destroy (input);
435 return casewriter_make_reader (w);
440 static void output_roc (struct roc_state *rs, const struct cmd_roc *roc);
443 This function does 3 things:
445 1. Counts the number of cases which are equal to every other case in READER,
446 and those cases for which the relationship between it and every other case
447 satifies PRED (normally either > or <). VAR is variable defining a case's value
450 2. Counts the number of true and false cases in reader, and populates
451 CUTPOINT_RDR accordingly. TRUE_INDEX and FALSE_INDEX are the indices
452 which receive these values. POS_COND is the condition defining true
455 3. CC is filled with the cumulative weight of all cases of READER.
457 static struct casereader *
458 process_group (const struct variable *var, struct casereader *reader,
459 bool (*pred) (double, double),
460 const struct dictionary *dict,
462 struct casereader **cutpoint_rdr,
463 bool (*pos_cond) (double, double),
467 const struct variable *w = dict_get_weight (dict);
469 struct casereader *r1 =
470 casereader_create_distinct (sort_execute_1var (reader, var), var, w);
472 const int weight_idx = w ? var_get_case_index (w) :
473 caseproto_get_n_widths (casereader_get_proto (r1)) - 1;
477 struct casereader *rclone = casereader_clone (r1);
478 struct casewriter *wtr;
479 struct caseproto *proto = caseproto_create ();
481 proto = caseproto_add_width (proto, 0);
482 proto = caseproto_add_width (proto, 0);
483 proto = caseproto_add_width (proto, 0);
485 wtr = autopaging_writer_create (proto);
489 for ( ; (c1 = casereader_read (r1) ); case_unref (c1))
491 struct ccase *new_case = case_create (proto);
493 struct casereader *r2 = casereader_clone (rclone);
495 const double weight1 = case_data_idx (c1, weight_idx)->f;
496 const double d1 = case_data (c1, var)->f;
500 *cutpoint_rdr = accumulate_counts (*cutpoint_rdr, d1, weight1,
502 true_index, false_index);
506 for ( ; (c2 = casereader_read (r2) ); case_unref (c2))
508 const double d2 = case_data (c2, var)->f;
509 const double weight2 = case_data_idx (c2, weight_idx)->f;
516 else if ( pred (d2, d1))
522 case_data_rw_idx (new_case, VALUE)->f = d1;
523 case_data_rw_idx (new_case, N_EQ)->f = n_eq;
524 case_data_rw_idx (new_case, N_PRED)->f = n_pred;
526 casewriter_write (wtr, new_case);
528 casereader_destroy (r2);
532 casereader_destroy (r1);
533 casereader_destroy (rclone);
535 caseproto_unref (proto);
537 return casewriter_make_reader (wtr);
540 /* Some more indeces into case data */
541 #define N_POS_EQ 1 /* number of positive cases with values equal to n */
542 #define N_POS_GT 2 /* number of postive cases with values greater than n */
543 #define N_NEG_EQ 3 /* number of negative cases with values equal to n */
544 #define N_NEG_LT 4 /* number of negative cases with values less than n */
547 gt (double d1, double d2)
554 ge (double d1, double d2)
560 lt (double d1, double d2)
567 Return a casereader with width 3,
568 populated with cases based upon READER.
569 The cases will have the values:
570 (N, number of cases equal to N, number of cases greater than N)
571 As a side effect, update RS->n1 with the number of positive cases.
573 static struct casereader *
574 process_positive_group (const struct variable *var, struct casereader *reader,
575 const struct dictionary *dict,
576 struct roc_state *rs)
578 return process_group (var, reader, gt, dict, &rs->n1,
585 Return a casereader with width 3,
586 populated with cases based upon READER.
587 The cases will have the values:
588 (N, number of cases equal to N, number of cases less than N)
589 As a side effect, update RS->n2 with the number of negative cases.
591 static struct casereader *
592 process_negative_group (const struct variable *var, struct casereader *reader,
593 const struct dictionary *dict,
594 struct roc_state *rs)
596 return process_group (var, reader, lt, dict, &rs->n2,
606 append_cutpoint (struct casewriter *writer, double cutpoint)
608 struct ccase *cc = case_create (casewriter_get_proto (writer));
610 case_data_rw_idx (cc, ROC_CUTPOINT)->f = cutpoint;
611 case_data_rw_idx (cc, ROC_TP)->f = 0;
612 case_data_rw_idx (cc, ROC_FN)->f = 0;
613 case_data_rw_idx (cc, ROC_TN)->f = 0;
614 case_data_rw_idx (cc, ROC_FP)->f = 0;
616 casewriter_write (writer, cc);
621 Create and initialise the rs[x].cutpoint_rdr casereaders. That is, the readers will
622 be created with width 5, ready to take the values (cutpoint, ROC_TP, ROC_FN, ROC_TN, ROC_FP), and the
623 reader will be populated with its final number of cases.
624 However on exit from this function, only ROC_CUTPOINT entries will be set to their final
625 value. The other entries will be initialised to zero.
628 prepare_cutpoints (struct cmd_roc *roc, struct roc_state *rs, struct casereader *input)
631 struct casereader *r = casereader_clone (input);
635 struct caseproto *proto = caseproto_create ();
636 struct subcase ordering;
637 subcase_init (&ordering, ROC_CUTPOINT, 0, SC_ASCEND);
639 proto = caseproto_add_width (proto, 0); /* cutpoint */
640 proto = caseproto_add_width (proto, 0); /* ROC_TP */
641 proto = caseproto_add_width (proto, 0); /* ROC_FN */
642 proto = caseproto_add_width (proto, 0); /* ROC_TN */
643 proto = caseproto_add_width (proto, 0); /* ROC_FP */
645 for (i = 0 ; i < roc->n_vars; ++i)
647 rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto);
648 rs[i].prev_result = SYSMIS;
649 rs[i].max = -DBL_MAX;
653 caseproto_unref (proto);
654 subcase_destroy (&ordering);
657 for (; (c = casereader_read (r)) != NULL; case_unref (c))
659 for (i = 0 ; i < roc->n_vars; ++i)
661 const union value *v = case_data (c, roc->vars[i]);
662 const double result = v->f;
664 if ( mv_is_value_missing (var_get_missing_values (roc->vars[i]), v, roc->exclude))
667 minimize (&rs[i].min, result);
668 maximize (&rs[i].max, result);
670 if ( rs[i].prev_result != SYSMIS && rs[i].prev_result != result )
672 const double mean = (result + rs[i].prev_result ) / 2.0;
673 append_cutpoint (rs[i].cutpoint_wtr, mean);
676 rs[i].prev_result = result;
679 casereader_destroy (r);
682 /* Append the min and max cutpoints */
683 for (i = 0 ; i < roc->n_vars; ++i)
685 append_cutpoint (rs[i].cutpoint_wtr, rs[i].min - 1);
686 append_cutpoint (rs[i].cutpoint_wtr, rs[i].max + 1);
688 rs[i].cutpoint_rdr = casewriter_make_reader (rs[i].cutpoint_wtr);
693 do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict)
697 struct roc_state *rs = xcalloc (roc->n_vars, sizeof *rs);
699 struct casereader *negatives = NULL;
700 struct casereader *positives = NULL;
702 struct caseproto *n_proto = NULL;
704 struct subcase up_ordering;
705 struct subcase down_ordering;
707 struct casewriter *neg_wtr = NULL;
709 struct casereader *input = casereader_create_filter_missing (reader,
710 roc->vars, roc->n_vars,
715 input = casereader_create_filter_missing (input,
721 neg_wtr = autopaging_writer_create (casereader_get_proto (input));
723 prepare_cutpoints (roc, rs, input);
726 /* Separate the positive actual state cases from the negative ones */
728 casereader_create_filter_func (input,
734 n_proto = caseproto_create ();
736 n_proto = caseproto_add_width (n_proto, 0);
737 n_proto = caseproto_add_width (n_proto, 0);
738 n_proto = caseproto_add_width (n_proto, 0);
739 n_proto = caseproto_add_width (n_proto, 0);
740 n_proto = caseproto_add_width (n_proto, 0);
742 subcase_init (&up_ordering, VALUE, 0, SC_ASCEND);
743 subcase_init (&down_ordering, VALUE, 0, SC_DESCEND);
745 for (i = 0 ; i < roc->n_vars; ++i)
747 struct casewriter *w = NULL;
748 struct casereader *r = NULL;
753 struct casereader *n_neg_reader ;
754 const struct variable *var = roc->vars[i];
756 struct casereader *neg ;
757 struct casereader *pos = casereader_clone (positives);
759 struct casereader *n_pos_reader =
760 process_positive_group (var, pos, dict, &rs[i]);
762 if ( negatives == NULL)
764 negatives = casewriter_make_reader (neg_wtr);
767 neg = casereader_clone (negatives);
769 n_neg_reader = process_negative_group (var, neg, dict, &rs[i]);
771 /* Merge the n_pos and n_neg casereaders */
772 w = sort_create_writer (&up_ordering, n_proto);
773 for ( ; (cpos = casereader_read (n_pos_reader) ); case_unref (cpos))
775 struct ccase *pos_case = case_create (n_proto);
777 const double jpos = case_data_idx (cpos, VALUE)->f;
779 while ((cneg = casereader_read (n_neg_reader)))
781 struct ccase *nc = case_create (n_proto);
783 const double jneg = case_data_idx (cneg, VALUE)->f;
785 case_data_rw_idx (nc, VALUE)->f = jneg;
786 case_data_rw_idx (nc, N_POS_EQ)->f = 0;
788 case_data_rw_idx (nc, N_POS_GT)->f = SYSMIS;
790 *case_data_rw_idx (nc, N_NEG_EQ) = *case_data_idx (cneg, N_EQ);
791 *case_data_rw_idx (nc, N_NEG_LT) = *case_data_idx (cneg, N_PRED);
793 casewriter_write (w, nc);
800 case_data_rw_idx (pos_case, VALUE)->f = jpos;
801 *case_data_rw_idx (pos_case, N_POS_EQ) = *case_data_idx (cpos, N_EQ);
802 *case_data_rw_idx (pos_case, N_POS_GT) = *case_data_idx (cpos, N_PRED);
803 case_data_rw_idx (pos_case, N_NEG_EQ)->f = 0;
804 case_data_rw_idx (pos_case, N_NEG_LT)->f = SYSMIS;
806 casewriter_write (w, pos_case);
809 casereader_destroy (n_pos_reader);
810 casereader_destroy (n_neg_reader);
812 /* These aren't used anymore */
816 r = casewriter_make_reader (w);
818 /* Propagate the N_POS_GT values from the positive cases
819 to the negative ones */
821 double prev_pos_gt = rs[i].n1;
822 w = sort_create_writer (&down_ordering, n_proto);
824 for ( ; (c = casereader_read (r) ); case_unref (c))
826 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
827 struct ccase *nc = case_clone (c);
829 if ( n_pos_gt == SYSMIS)
831 n_pos_gt = prev_pos_gt;
832 case_data_rw_idx (nc, N_POS_GT)->f = n_pos_gt;
835 casewriter_write (w, nc);
836 prev_pos_gt = n_pos_gt;
839 casereader_destroy (r);
840 r = casewriter_make_reader (w);
843 /* Propagate the N_NEG_LT values from the negative cases
844 to the positive ones */
846 double prev_neg_lt = rs[i].n2;
847 w = sort_create_writer (&up_ordering, n_proto);
849 for ( ; (c = casereader_read (r) ); case_unref (c))
851 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
852 struct ccase *nc = case_clone (c);
854 if ( n_neg_lt == SYSMIS)
856 n_neg_lt = prev_neg_lt;
857 case_data_rw_idx (nc, N_NEG_LT)->f = n_neg_lt;
860 casewriter_write (w, nc);
861 prev_neg_lt = n_neg_lt;
864 casereader_destroy (r);
865 r = casewriter_make_reader (w);
869 struct ccase *prev_case = NULL;
870 for ( ; (c = casereader_read (r) ); case_unref (c))
872 struct ccase *next_case = casereader_peek (r, 0);
874 const double j = case_data_idx (c, VALUE)->f;
875 double n_pos_eq = case_data_idx (c, N_POS_EQ)->f;
876 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
877 double n_neg_eq = case_data_idx (c, N_NEG_EQ)->f;
878 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
880 if ( prev_case && j == case_data_idx (prev_case, VALUE)->f)
882 if ( 0 == case_data_idx (c, N_POS_EQ)->f)
884 n_pos_eq = case_data_idx (prev_case, N_POS_EQ)->f;
885 n_pos_gt = case_data_idx (prev_case, N_POS_GT)->f;
888 if ( 0 == case_data_idx (c, N_NEG_EQ)->f)
890 n_neg_eq = case_data_idx (prev_case, N_NEG_EQ)->f;
891 n_neg_lt = case_data_idx (prev_case, N_NEG_LT)->f;
895 if ( NULL == next_case || j != case_data_idx (next_case, VALUE)->f)
897 rs[i].auc += n_pos_gt * n_neg_eq + (n_pos_eq * n_neg_eq) / 2.0;
900 n_neg_eq * ( pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0);
902 n_pos_eq * ( pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0);
906 case_unref (next_case);
907 case_unref (prev_case);
908 prev_case = case_clone (c);
910 casereader_destroy (r);
911 case_unref (prev_case);
913 rs[i].auc /= rs[i].n1 * rs[i].n2;
915 rs[i].auc = 1 - rs[i].auc;
917 if ( roc->bi_neg_exp )
919 rs[i].q1hat = rs[i].auc / ( 2 - rs[i].auc);
920 rs[i].q2hat = 2 * pow2 (rs[i].auc) / ( 1 + rs[i].auc);
924 rs[i].q1hat /= rs[i].n2 * pow2 (rs[i].n1);
925 rs[i].q2hat /= rs[i].n1 * pow2 (rs[i].n2);
930 casereader_destroy (positives);
931 casereader_destroy (negatives);
933 caseproto_unref (n_proto);
934 subcase_destroy (&up_ordering);
935 subcase_destroy (&down_ordering);
937 output_roc (rs, roc);
939 for (i = 0 ; i < roc->n_vars; ++i)
940 casereader_destroy (rs[i].cutpoint_rdr);
946 show_auc (struct roc_state *rs, const struct cmd_roc *roc)
949 const int n_fields = roc->print_se ? 5 : 1;
950 const int n_cols = roc->n_vars > 1 ? n_fields + 1: n_fields;
951 const int n_rows = 2 + roc->n_vars;
952 struct tab_table *tbl = tab_create (n_cols, n_rows);
954 if ( roc->n_vars > 1)
955 tab_title (tbl, _("Area Under the Curve"));
957 tab_title (tbl, _("Area Under the Curve (%s)"), var_to_string (roc->vars[0]));
959 tab_headers (tbl, n_cols - n_fields, 0, 1, 0);
962 tab_text (tbl, n_cols - n_fields, 1, TAT_TITLE, _("Area"));
964 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
975 tab_text (tbl, n_cols - 4, 1, TAT_TITLE, _("Std. Error"));
976 tab_text (tbl, n_cols - 3, 1, TAT_TITLE, _("Asymptotic Sig."));
978 tab_text (tbl, n_cols - 2, 1, TAT_TITLE, _("Lower Bound"));
979 tab_text (tbl, n_cols - 1, 1, TAT_TITLE, _("Upper Bound"));
981 tab_joint_text_format (tbl, n_cols - 2, 0, 4, 0,
982 TAT_TITLE | TAB_CENTER,
983 _("Asymp. %g%% Confidence Interval"), roc->ci);
984 tab_vline (tbl, 0, n_cols - 1, 0, 0);
985 tab_hline (tbl, TAL_1, n_cols - 2, n_cols - 1, 1);
988 if ( roc->n_vars > 1)
989 tab_text (tbl, 0, 1, TAT_TITLE, _("Variable under test"));
991 if ( roc->n_vars > 1)
992 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
995 for ( i = 0 ; i < roc->n_vars ; ++i )
997 tab_text (tbl, 0, 2 + i, TAT_TITLE, var_to_string (roc->vars[i]));
999 tab_double (tbl, n_cols - n_fields, 2 + i, 0, rs[i].auc, NULL);
1001 if ( roc->print_se )
1004 const double sd_0_5 = sqrt ((rs[i].n1 + rs[i].n2 + 1) /
1005 (12 * rs[i].n1 * rs[i].n2));
1009 se = rs[i].auc * (1 - rs[i].auc) + (rs[i].n1 - 1) * (rs[i].q1hat - pow2 (rs[i].auc)) +
1010 (rs[i].n2 - 1) * (rs[i].q2hat - pow2 (rs[i].auc));
1012 se /= rs[i].n1 * rs[i].n2;
1016 tab_double (tbl, n_cols - 4, 2 + i, 0,
1020 ci = 1 - roc->ci / 100.0;
1021 yy = gsl_cdf_gaussian_Qinv (ci, se) ;
1023 tab_double (tbl, n_cols - 2, 2 + i, 0,
1027 tab_double (tbl, n_cols - 1, 2 + i, 0,
1031 tab_double (tbl, n_cols - 3, 2 + i, 0,
1032 2.0 * gsl_cdf_ugaussian_Q (fabs ((rs[i].auc - 0.5 ) / sd_0_5)),
1042 show_summary (const struct cmd_roc *roc)
1044 const int n_cols = 3;
1045 const int n_rows = 4;
1046 struct tab_table *tbl = tab_create (n_cols, n_rows);
1048 tab_title (tbl, _("Case Summary"));
1050 tab_headers (tbl, 1, 0, 2, 0);
1059 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
1060 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1063 tab_hline (tbl, TAL_2, 1, n_cols - 1, 1);
1064 tab_vline (tbl, TAL_1, 2, 1, n_rows - 1);
1067 tab_text (tbl, 0, 1, TAT_TITLE | TAB_LEFT, var_to_string (roc->state_var));
1068 tab_text (tbl, 1, 1, TAT_TITLE, _("Unweighted"));
1069 tab_text (tbl, 2, 1, TAT_TITLE, _("Weighted"));
1071 tab_joint_text (tbl, 1, 0, 2, 0,
1072 TAT_TITLE | TAB_CENTER,
1073 _("Valid N (listwise)"));
1076 tab_text (tbl, 0, 2, TAB_LEFT, _("Positive"));
1077 tab_text (tbl, 0, 3, TAB_LEFT, _("Negative"));
1080 tab_double (tbl, 1, 2, 0, roc->pos, &F_8_0);
1081 tab_double (tbl, 1, 3, 0, roc->neg, &F_8_0);
1083 tab_double (tbl, 2, 2, 0, roc->pos_weighted, 0);
1084 tab_double (tbl, 2, 3, 0, roc->neg_weighted, 0);
1091 show_coords (struct roc_state *rs, const struct cmd_roc *roc)
1095 const int n_cols = roc->n_vars > 1 ? 4 : 3;
1097 struct tab_table *tbl ;
1099 for (i = 0; i < roc->n_vars; ++i)
1100 n_rows += casereader_count_cases (rs[i].cutpoint_rdr);
1102 tbl = tab_create (n_cols, n_rows);
1104 if ( roc->n_vars > 1)
1105 tab_title (tbl, _("Coordinates of the Curve"));
1107 tab_title (tbl, _("Coordinates of the Curve (%s)"), var_to_string (roc->vars[0]));
1110 tab_headers (tbl, 1, 0, 1, 0);
1112 tab_hline (tbl, TAL_2, 0, n_cols - 1, 1);
1114 if ( roc->n_vars > 1)
1115 tab_text (tbl, 0, 0, TAT_TITLE, _("Test variable"));
1117 tab_text (tbl, n_cols - 3, 0, TAT_TITLE, _("Positive if greater than or equal to"));
1118 tab_text (tbl, n_cols - 2, 0, TAT_TITLE, _("Sensitivity"));
1119 tab_text (tbl, n_cols - 1, 0, TAT_TITLE, _("1 - Specificity"));
1128 if ( roc->n_vars > 1)
1129 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1131 for (i = 0; i < roc->n_vars; ++i)
1134 struct casereader *r = casereader_clone (rs[i].cutpoint_rdr);
1136 if ( roc->n_vars > 1)
1137 tab_text (tbl, 0, x, TAT_TITLE, var_to_string (roc->vars[i]));
1140 tab_hline (tbl, TAL_1, 0, n_cols - 1, x);
1143 for (; (cc = casereader_read (r)) != NULL;
1144 case_unref (cc), x++)
1146 const double se = case_data_idx (cc, ROC_TP)->f /
1148 case_data_idx (cc, ROC_TP)->f
1150 case_data_idx (cc, ROC_FN)->f
1153 const double sp = case_data_idx (cc, ROC_TN)->f /
1155 case_data_idx (cc, ROC_TN)->f
1157 case_data_idx (cc, ROC_FP)->f
1160 tab_double (tbl, n_cols - 3, x, 0, case_data_idx (cc, ROC_CUTPOINT)->f,
1161 var_get_print_format (roc->vars[i]));
1163 tab_double (tbl, n_cols - 2, x, 0, se, NULL);
1164 tab_double (tbl, n_cols - 1, x, 0, 1 - sp, NULL);
1167 casereader_destroy (r);
1175 output_roc (struct roc_state *rs, const struct cmd_roc *roc)
1181 struct roc_chart *rc;
1184 rc = roc_chart_create (roc->reference);
1185 for (i = 0; i < roc->n_vars; i++)
1186 roc_chart_add_var (rc, var_get_name (roc->vars[i]),
1187 rs[i].cutpoint_rdr);
1188 roc_chart_submit (rc);
1193 if ( roc->print_coords )
1194 show_coords (rs, roc);