1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/stats/roc.h"
21 #include <gsl/gsl_cdf.h>
23 #include "data/casegrouper.h"
24 #include "data/casereader.h"
25 #include "data/casewriter.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/format.h"
29 #include "data/subcase.h"
30 #include "language/command.h"
31 #include "language/lexer/lexer.h"
32 #include "language/lexer/value-parser.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/misc.h"
35 #include "math/sort.h"
36 #include "output/chart-item.h"
37 #include "output/charts/roc-chart.h"
38 #include "output/tab.h"
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) msgid
47 const struct variable **vars;
48 const struct dictionary *dict;
50 const struct variable *state_var;
51 union value state_value;
52 size_t state_var_width;
54 /* Plot the roc curve */
56 /* Plot the reference line */
63 bool bi_neg_exp; /* True iff the bi-negative exponential critieria
65 enum mv_class exclude;
67 bool invert ; /* True iff a smaller test result variable indicates
76 static int run_roc (struct dataset *ds, struct cmd_roc *roc);
79 cmd_roc (struct lexer *lexer, struct dataset *ds)
82 const struct dictionary *dict = dataset_dict (ds);
87 roc.print_coords = false;
90 roc.reference = false;
92 roc.bi_neg_exp = false;
94 roc.pos = roc.pos_weighted = 0;
95 roc.neg = roc.neg_weighted = 0;
96 roc.dict = dataset_dict (ds);
98 roc.state_var_width = -1;
100 lex_match (lexer, T_SLASH);
101 if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars,
102 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
105 if ( ! lex_force_match (lexer, T_BY))
110 roc.state_var = parse_variable (lexer, dict);
116 if ( !lex_force_match (lexer, T_LPAREN))
121 roc.state_var_width = var_get_width (roc.state_var);
122 value_init (&roc.state_value, roc.state_var_width);
123 parse_value (lexer, &roc.state_value, roc.state_var);
126 if ( !lex_force_match (lexer, T_RPAREN))
131 while (lex_token (lexer) != T_ENDCMD)
133 lex_match (lexer, T_SLASH);
134 if (lex_match_id (lexer, "MISSING"))
136 lex_match (lexer, T_EQUALS);
137 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
139 if (lex_match_id (lexer, "INCLUDE"))
141 roc.exclude = MV_SYSTEM;
143 else if (lex_match_id (lexer, "EXCLUDE"))
145 roc.exclude = MV_ANY;
149 lex_error (lexer, NULL);
154 else if (lex_match_id (lexer, "PLOT"))
156 lex_match (lexer, T_EQUALS);
157 if (lex_match_id (lexer, "CURVE"))
160 if (lex_match (lexer, T_LPAREN))
162 roc.reference = true;
163 if (! lex_force_match_id (lexer, "REFERENCE"))
165 if (! lex_force_match (lexer, T_RPAREN))
169 else if (lex_match_id (lexer, "NONE"))
175 lex_error (lexer, NULL);
179 else if (lex_match_id (lexer, "PRINT"))
181 lex_match (lexer, T_EQUALS);
182 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
184 if (lex_match_id (lexer, "SE"))
188 else if (lex_match_id (lexer, "COORDINATES"))
190 roc.print_coords = true;
194 lex_error (lexer, NULL);
199 else if (lex_match_id (lexer, "CRITERIA"))
201 lex_match (lexer, T_EQUALS);
202 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
204 if (lex_match_id (lexer, "CUTOFF"))
206 if (! lex_force_match (lexer, T_LPAREN))
208 if (lex_match_id (lexer, "INCLUDE"))
210 roc.exclude = MV_SYSTEM;
212 else if (lex_match_id (lexer, "EXCLUDE"))
214 roc.exclude = MV_USER | MV_SYSTEM;
218 lex_error (lexer, NULL);
221 if (! lex_force_match (lexer, T_RPAREN))
224 else if (lex_match_id (lexer, "TESTPOS"))
226 if (! lex_force_match (lexer, T_LPAREN))
228 if (lex_match_id (lexer, "LARGE"))
232 else if (lex_match_id (lexer, "SMALL"))
238 lex_error (lexer, NULL);
241 if (! lex_force_match (lexer, T_RPAREN))
244 else if (lex_match_id (lexer, "CI"))
246 if (!lex_force_match (lexer, T_LPAREN))
248 if (! lex_force_num (lexer))
250 roc.ci = lex_number (lexer);
252 if (!lex_force_match (lexer, T_RPAREN))
255 else if (lex_match_id (lexer, "DISTRIBUTION"))
257 if (!lex_force_match (lexer, T_LPAREN))
259 if (lex_match_id (lexer, "FREE"))
261 roc.bi_neg_exp = false;
263 else if (lex_match_id (lexer, "NEGEXPO"))
265 roc.bi_neg_exp = true;
269 lex_error (lexer, NULL);
272 if (!lex_force_match (lexer, T_RPAREN))
277 lex_error (lexer, NULL);
284 lex_error (lexer, NULL);
289 if ( ! run_roc (ds, &roc))
293 value_destroy (&roc.state_value, roc.state_var_width);
299 value_destroy (&roc.state_value, roc.state_var_width);
308 do_roc (struct cmd_roc *roc, struct casereader *group, struct dictionary *dict);
312 run_roc (struct dataset *ds, struct cmd_roc *roc)
314 struct dictionary *dict = dataset_dict (ds);
316 struct casereader *group;
318 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
319 while (casegrouper_get_next_group (grouper, &group))
321 do_roc (roc, group, dataset_dict (ds));
323 ok = casegrouper_destroy (grouper);
324 ok = proc_commit (ds) && ok;
331 dump_casereader (struct casereader *reader)
334 struct casereader *r = casereader_clone (reader);
336 for ( ; (c = casereader_read (r) ); case_unref (c))
339 for (i = 0 ; i < case_get_value_cnt (c); ++i)
341 printf ("%g ", case_data_idx (c, i)->f);
346 casereader_destroy (r);
352 Return true iff the state variable indicates that C has positive actual state.
354 As a side effect, this function also accumulates the roc->{pos,neg} and
355 roc->{pos,neg}_weighted counts.
358 match_positives (const struct ccase *c, void *aux)
360 struct cmd_roc *roc = aux;
361 const struct variable *wv = dict_get_weight (roc->dict);
362 const double weight = wv ? case_data (c, wv)->f : 1.0;
364 const bool positive =
365 ( 0 == value_compare_3way (case_data (c, roc->state_var), &roc->state_value,
366 var_get_width (roc->state_var)));
371 roc->pos_weighted += weight;
376 roc->neg_weighted += weight;
387 /* Some intermediate state for calculating the cutpoints and the
388 standard error values */
391 double auc; /* Area under the curve */
393 double n1; /* total weight of positives */
394 double n2; /* total weight of negatives */
396 /* intermediates for standard error */
400 /* intermediates for cutpoints */
401 struct casewriter *cutpoint_wtr;
402 struct casereader *cutpoint_rdr;
409 Return a new casereader based upon CUTPOINT_RDR.
410 The number of "positive" cases are placed into
411 the position TRUE_INDEX, and the number of "negative" cases
413 POS_COND and RESULT determine the semantics of what is
415 WEIGHT is the value of a single count.
417 static struct casereader *
418 accumulate_counts (struct casereader *input,
419 double result, double weight,
420 bool (*pos_cond) (double, double),
421 int true_index, int false_index)
423 const struct caseproto *proto = casereader_get_proto (input);
424 struct casewriter *w =
425 autopaging_writer_create (proto);
427 double prev_cp = SYSMIS;
429 for ( ; (cpc = casereader_read (input) ); case_unref (cpc))
431 struct ccase *new_case;
432 const double cp = case_data_idx (cpc, ROC_CUTPOINT)->f;
434 assert (cp != SYSMIS);
436 /* We don't want duplicates here */
440 new_case = case_clone (cpc);
442 if ( pos_cond (result, cp))
443 case_data_rw_idx (new_case, true_index)->f += weight;
445 case_data_rw_idx (new_case, false_index)->f += weight;
449 casewriter_write (w, new_case);
451 casereader_destroy (input);
453 return casewriter_make_reader (w);
458 static void output_roc (struct roc_state *rs, const struct cmd_roc *roc);
461 This function does 3 things:
463 1. Counts the number of cases which are equal to every other case in READER,
464 and those cases for which the relationship between it and every other case
465 satifies PRED (normally either > or <). VAR is variable defining a case's value
468 2. Counts the number of true and false cases in reader, and populates
469 CUTPOINT_RDR accordingly. TRUE_INDEX and FALSE_INDEX are the indices
470 which receive these values. POS_COND is the condition defining true
473 3. CC is filled with the cumulative weight of all cases of READER.
475 static struct casereader *
476 process_group (const struct variable *var, struct casereader *reader,
477 bool (*pred) (double, double),
478 const struct dictionary *dict,
480 struct casereader **cutpoint_rdr,
481 bool (*pos_cond) (double, double),
485 const struct variable *w = dict_get_weight (dict);
487 struct casereader *r1 =
488 casereader_create_distinct (sort_execute_1var (reader, var), var, w);
490 const int weight_idx = w ? var_get_case_index (w) :
491 caseproto_get_n_widths (casereader_get_proto (r1)) - 1;
495 struct casereader *rclone = casereader_clone (r1);
496 struct casewriter *wtr;
497 struct caseproto *proto = caseproto_create ();
499 proto = caseproto_add_width (proto, 0);
500 proto = caseproto_add_width (proto, 0);
501 proto = caseproto_add_width (proto, 0);
503 wtr = autopaging_writer_create (proto);
507 for ( ; (c1 = casereader_read (r1) ); case_unref (c1))
509 struct ccase *new_case = case_create (proto);
511 struct casereader *r2 = casereader_clone (rclone);
513 const double weight1 = case_data_idx (c1, weight_idx)->f;
514 const double d1 = case_data (c1, var)->f;
518 *cutpoint_rdr = accumulate_counts (*cutpoint_rdr, d1, weight1,
520 true_index, false_index);
524 for ( ; (c2 = casereader_read (r2) ); case_unref (c2))
526 const double d2 = case_data (c2, var)->f;
527 const double weight2 = case_data_idx (c2, weight_idx)->f;
534 else if ( pred (d2, d1))
540 case_data_rw_idx (new_case, VALUE)->f = d1;
541 case_data_rw_idx (new_case, N_EQ)->f = n_eq;
542 case_data_rw_idx (new_case, N_PRED)->f = n_pred;
544 casewriter_write (wtr, new_case);
546 casereader_destroy (r2);
550 casereader_destroy (r1);
551 casereader_destroy (rclone);
553 caseproto_unref (proto);
555 return casewriter_make_reader (wtr);
558 /* Some more indeces into case data */
559 #define N_POS_EQ 1 /* number of positive cases with values equal to n */
560 #define N_POS_GT 2 /* number of postive cases with values greater than n */
561 #define N_NEG_EQ 3 /* number of negative cases with values equal to n */
562 #define N_NEG_LT 4 /* number of negative cases with values less than n */
565 gt (double d1, double d2)
572 ge (double d1, double d2)
578 lt (double d1, double d2)
585 Return a casereader with width 3,
586 populated with cases based upon READER.
587 The cases will have the values:
588 (N, number of cases equal to N, number of cases greater than N)
589 As a side effect, update RS->n1 with the number of positive cases.
591 static struct casereader *
592 process_positive_group (const struct variable *var, struct casereader *reader,
593 const struct dictionary *dict,
594 struct roc_state *rs)
596 return process_group (var, reader, gt, dict, &rs->n1,
603 Return a casereader with width 3,
604 populated with cases based upon READER.
605 The cases will have the values:
606 (N, number of cases equal to N, number of cases less than N)
607 As a side effect, update RS->n2 with the number of negative cases.
609 static struct casereader *
610 process_negative_group (const struct variable *var, struct casereader *reader,
611 const struct dictionary *dict,
612 struct roc_state *rs)
614 return process_group (var, reader, lt, dict, &rs->n2,
624 append_cutpoint (struct casewriter *writer, double cutpoint)
626 struct ccase *cc = case_create (casewriter_get_proto (writer));
628 case_data_rw_idx (cc, ROC_CUTPOINT)->f = cutpoint;
629 case_data_rw_idx (cc, ROC_TP)->f = 0;
630 case_data_rw_idx (cc, ROC_FN)->f = 0;
631 case_data_rw_idx (cc, ROC_TN)->f = 0;
632 case_data_rw_idx (cc, ROC_FP)->f = 0;
634 casewriter_write (writer, cc);
639 Create and initialise the rs[x].cutpoint_rdr casereaders. That is, the readers will
640 be created with width 5, ready to take the values (cutpoint, ROC_TP, ROC_FN, ROC_TN, ROC_FP), and the
641 reader will be populated with its final number of cases.
642 However on exit from this function, only ROC_CUTPOINT entries will be set to their final
643 value. The other entries will be initialised to zero.
646 prepare_cutpoints (struct cmd_roc *roc, struct roc_state *rs, struct casereader *input)
649 struct casereader *r = casereader_clone (input);
653 struct caseproto *proto = caseproto_create ();
654 struct subcase ordering;
655 subcase_init (&ordering, ROC_CUTPOINT, 0, SC_ASCEND);
657 proto = caseproto_add_width (proto, 0); /* cutpoint */
658 proto = caseproto_add_width (proto, 0); /* ROC_TP */
659 proto = caseproto_add_width (proto, 0); /* ROC_FN */
660 proto = caseproto_add_width (proto, 0); /* ROC_TN */
661 proto = caseproto_add_width (proto, 0); /* ROC_FP */
663 for (i = 0 ; i < roc->n_vars; ++i)
665 rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto);
666 rs[i].prev_result = SYSMIS;
667 rs[i].max = -DBL_MAX;
671 caseproto_unref (proto);
672 subcase_destroy (&ordering);
675 for (; (c = casereader_read (r)) != NULL; case_unref (c))
677 for (i = 0 ; i < roc->n_vars; ++i)
679 const union value *v = case_data (c, roc->vars[i]);
680 const double result = v->f;
682 if ( mv_is_value_missing (var_get_missing_values (roc->vars[i]), v, roc->exclude))
685 minimize (&rs[i].min, result);
686 maximize (&rs[i].max, result);
688 if ( rs[i].prev_result != SYSMIS && rs[i].prev_result != result )
690 const double mean = (result + rs[i].prev_result ) / 2.0;
691 append_cutpoint (rs[i].cutpoint_wtr, mean);
694 rs[i].prev_result = result;
697 casereader_destroy (r);
700 /* Append the min and max cutpoints */
701 for (i = 0 ; i < roc->n_vars; ++i)
703 append_cutpoint (rs[i].cutpoint_wtr, rs[i].min - 1);
704 append_cutpoint (rs[i].cutpoint_wtr, rs[i].max + 1);
706 rs[i].cutpoint_rdr = casewriter_make_reader (rs[i].cutpoint_wtr);
711 do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict)
715 struct roc_state *rs = xcalloc (roc->n_vars, sizeof *rs);
717 struct casereader *negatives = NULL;
718 struct casereader *positives = NULL;
720 struct caseproto *n_proto = NULL;
722 struct subcase up_ordering;
723 struct subcase down_ordering;
725 struct casewriter *neg_wtr = NULL;
727 struct casereader *input = casereader_create_filter_missing (reader,
728 roc->vars, roc->n_vars,
733 input = casereader_create_filter_missing (input,
739 neg_wtr = autopaging_writer_create (casereader_get_proto (input));
741 prepare_cutpoints (roc, rs, input);
744 /* Separate the positive actual state cases from the negative ones */
746 casereader_create_filter_func (input,
752 n_proto = caseproto_create ();
754 n_proto = caseproto_add_width (n_proto, 0);
755 n_proto = caseproto_add_width (n_proto, 0);
756 n_proto = caseproto_add_width (n_proto, 0);
757 n_proto = caseproto_add_width (n_proto, 0);
758 n_proto = caseproto_add_width (n_proto, 0);
760 subcase_init (&up_ordering, VALUE, 0, SC_ASCEND);
761 subcase_init (&down_ordering, VALUE, 0, SC_DESCEND);
763 for (i = 0 ; i < roc->n_vars; ++i)
765 struct casewriter *w = NULL;
766 struct casereader *r = NULL;
771 struct casereader *n_neg_reader ;
772 const struct variable *var = roc->vars[i];
774 struct casereader *neg ;
775 struct casereader *pos = casereader_clone (positives);
777 struct casereader *n_pos_reader =
778 process_positive_group (var, pos, dict, &rs[i]);
780 if ( negatives == NULL)
782 negatives = casewriter_make_reader (neg_wtr);
785 neg = casereader_clone (negatives);
787 n_neg_reader = process_negative_group (var, neg, dict, &rs[i]);
789 /* Merge the n_pos and n_neg casereaders */
790 w = sort_create_writer (&up_ordering, n_proto);
791 for ( ; (cpos = casereader_read (n_pos_reader) ); case_unref (cpos))
793 struct ccase *pos_case = case_create (n_proto);
795 const double jpos = case_data_idx (cpos, VALUE)->f;
797 while ((cneg = casereader_read (n_neg_reader)))
799 struct ccase *nc = case_create (n_proto);
801 const double jneg = case_data_idx (cneg, VALUE)->f;
803 case_data_rw_idx (nc, VALUE)->f = jneg;
804 case_data_rw_idx (nc, N_POS_EQ)->f = 0;
806 case_data_rw_idx (nc, N_POS_GT)->f = SYSMIS;
808 *case_data_rw_idx (nc, N_NEG_EQ) = *case_data_idx (cneg, N_EQ);
809 *case_data_rw_idx (nc, N_NEG_LT) = *case_data_idx (cneg, N_PRED);
811 casewriter_write (w, nc);
818 case_data_rw_idx (pos_case, VALUE)->f = jpos;
819 *case_data_rw_idx (pos_case, N_POS_EQ) = *case_data_idx (cpos, N_EQ);
820 *case_data_rw_idx (pos_case, N_POS_GT) = *case_data_idx (cpos, N_PRED);
821 case_data_rw_idx (pos_case, N_NEG_EQ)->f = 0;
822 case_data_rw_idx (pos_case, N_NEG_LT)->f = SYSMIS;
824 casewriter_write (w, pos_case);
827 casereader_destroy (n_pos_reader);
828 casereader_destroy (n_neg_reader);
830 /* These aren't used anymore */
834 r = casewriter_make_reader (w);
836 /* Propagate the N_POS_GT values from the positive cases
837 to the negative ones */
839 double prev_pos_gt = rs[i].n1;
840 w = sort_create_writer (&down_ordering, n_proto);
842 for ( ; (c = casereader_read (r) ); case_unref (c))
844 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
845 struct ccase *nc = case_clone (c);
847 if ( n_pos_gt == SYSMIS)
849 n_pos_gt = prev_pos_gt;
850 case_data_rw_idx (nc, N_POS_GT)->f = n_pos_gt;
853 casewriter_write (w, nc);
854 prev_pos_gt = n_pos_gt;
857 casereader_destroy (r);
858 r = casewriter_make_reader (w);
861 /* Propagate the N_NEG_LT values from the negative cases
862 to the positive ones */
864 double prev_neg_lt = rs[i].n2;
865 w = sort_create_writer (&up_ordering, n_proto);
867 for ( ; (c = casereader_read (r) ); case_unref (c))
869 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
870 struct ccase *nc = case_clone (c);
872 if ( n_neg_lt == SYSMIS)
874 n_neg_lt = prev_neg_lt;
875 case_data_rw_idx (nc, N_NEG_LT)->f = n_neg_lt;
878 casewriter_write (w, nc);
879 prev_neg_lt = n_neg_lt;
882 casereader_destroy (r);
883 r = casewriter_make_reader (w);
887 struct ccase *prev_case = NULL;
888 for ( ; (c = casereader_read (r) ); case_unref (c))
890 struct ccase *next_case = casereader_peek (r, 0);
892 const double j = case_data_idx (c, VALUE)->f;
893 double n_pos_eq = case_data_idx (c, N_POS_EQ)->f;
894 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
895 double n_neg_eq = case_data_idx (c, N_NEG_EQ)->f;
896 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
898 if ( prev_case && j == case_data_idx (prev_case, VALUE)->f)
900 if ( 0 == case_data_idx (c, N_POS_EQ)->f)
902 n_pos_eq = case_data_idx (prev_case, N_POS_EQ)->f;
903 n_pos_gt = case_data_idx (prev_case, N_POS_GT)->f;
906 if ( 0 == case_data_idx (c, N_NEG_EQ)->f)
908 n_neg_eq = case_data_idx (prev_case, N_NEG_EQ)->f;
909 n_neg_lt = case_data_idx (prev_case, N_NEG_LT)->f;
913 if ( NULL == next_case || j != case_data_idx (next_case, VALUE)->f)
915 rs[i].auc += n_pos_gt * n_neg_eq + (n_pos_eq * n_neg_eq) / 2.0;
918 n_neg_eq * ( pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0);
920 n_pos_eq * ( pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0);
924 case_unref (next_case);
925 case_unref (prev_case);
926 prev_case = case_clone (c);
928 casereader_destroy (r);
929 case_unref (prev_case);
931 rs[i].auc /= rs[i].n1 * rs[i].n2;
933 rs[i].auc = 1 - rs[i].auc;
935 if ( roc->bi_neg_exp )
937 rs[i].q1hat = rs[i].auc / ( 2 - rs[i].auc);
938 rs[i].q2hat = 2 * pow2 (rs[i].auc) / ( 1 + rs[i].auc);
942 rs[i].q1hat /= rs[i].n2 * pow2 (rs[i].n1);
943 rs[i].q2hat /= rs[i].n1 * pow2 (rs[i].n2);
948 casereader_destroy (positives);
949 casereader_destroy (negatives);
951 caseproto_unref (n_proto);
952 subcase_destroy (&up_ordering);
953 subcase_destroy (&down_ordering);
955 output_roc (rs, roc);
957 for (i = 0 ; i < roc->n_vars; ++i)
958 casereader_destroy (rs[i].cutpoint_rdr);
964 show_auc (struct roc_state *rs, const struct cmd_roc *roc)
967 const int n_fields = roc->print_se ? 5 : 1;
968 const int n_cols = roc->n_vars > 1 ? n_fields + 1: n_fields;
969 const int n_rows = 2 + roc->n_vars;
970 struct tab_table *tbl = tab_create (n_cols, n_rows);
972 if ( roc->n_vars > 1)
973 tab_title (tbl, _("Area Under the Curve"));
975 tab_title (tbl, _("Area Under the Curve (%s)"), var_to_string (roc->vars[0]));
977 tab_headers (tbl, n_cols - n_fields, 0, 1, 0);
980 tab_text (tbl, n_cols - n_fields, 1, TAT_TITLE, _("Area"));
982 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
993 tab_text (tbl, n_cols - 4, 1, TAT_TITLE, _("Std. Error"));
994 tab_text (tbl, n_cols - 3, 1, TAT_TITLE, _("Asymptotic Sig."));
996 tab_text (tbl, n_cols - 2, 1, TAT_TITLE, _("Lower Bound"));
997 tab_text (tbl, n_cols - 1, 1, TAT_TITLE, _("Upper Bound"));
999 tab_joint_text_format (tbl, n_cols - 2, 0, 4, 0,
1000 TAT_TITLE | TAB_CENTER,
1001 _("Asymp. %g%% Confidence Interval"), roc->ci);
1002 tab_vline (tbl, 0, n_cols - 1, 0, 0);
1003 tab_hline (tbl, TAL_1, n_cols - 2, n_cols - 1, 1);
1006 if ( roc->n_vars > 1)
1007 tab_text (tbl, 0, 1, TAT_TITLE, _("Variable under test"));
1009 if ( roc->n_vars > 1)
1010 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1013 for ( i = 0 ; i < roc->n_vars ; ++i )
1015 tab_text (tbl, 0, 2 + i, TAT_TITLE, var_to_string (roc->vars[i]));
1017 tab_double (tbl, n_cols - n_fields, 2 + i, 0, rs[i].auc, NULL, RC_OTHER);
1019 if ( roc->print_se )
1022 const double sd_0_5 = sqrt ((rs[i].n1 + rs[i].n2 + 1) /
1023 (12 * rs[i].n1 * rs[i].n2));
1027 se = rs[i].auc * (1 - rs[i].auc) + (rs[i].n1 - 1) * (rs[i].q1hat - pow2 (rs[i].auc)) +
1028 (rs[i].n2 - 1) * (rs[i].q2hat - pow2 (rs[i].auc));
1030 se /= rs[i].n1 * rs[i].n2;
1034 tab_double (tbl, n_cols - 4, 2 + i, 0,
1038 ci = 1 - roc->ci / 100.0;
1039 yy = gsl_cdf_gaussian_Qinv (ci, se) ;
1041 tab_double (tbl, n_cols - 2, 2 + i, 0,
1045 tab_double (tbl, n_cols - 1, 2 + i, 0,
1049 tab_double (tbl, n_cols - 3, 2 + i, 0,
1050 2.0 * gsl_cdf_ugaussian_Q (fabs ((rs[i].auc - 0.5 ) / sd_0_5)),
1060 show_summary (const struct cmd_roc *roc)
1062 const int n_cols = 3;
1063 const int n_rows = 4;
1064 struct tab_table *tbl = tab_create (n_cols, n_rows);
1066 tab_title (tbl, _("Case Summary"));
1068 tab_headers (tbl, 1, 0, 2, 0);
1077 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
1078 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1081 tab_hline (tbl, TAL_2, 1, n_cols - 1, 1);
1082 tab_vline (tbl, TAL_1, 2, 1, n_rows - 1);
1085 tab_text (tbl, 0, 1, TAT_TITLE | TAB_LEFT, var_to_string (roc->state_var));
1086 tab_text (tbl, 1, 1, TAT_TITLE, _("Unweighted"));
1087 tab_text (tbl, 2, 1, TAT_TITLE, _("Weighted"));
1089 tab_joint_text (tbl, 1, 0, 2, 0,
1090 TAT_TITLE | TAB_CENTER,
1091 _("Valid N (listwise)"));
1094 tab_text (tbl, 0, 2, TAB_LEFT, _("Positive"));
1095 tab_text (tbl, 0, 3, TAB_LEFT, _("Negative"));
1098 tab_double (tbl, 1, 2, 0, roc->pos, NULL, RC_INTEGER);
1099 tab_double (tbl, 1, 3, 0, roc->neg, NULL, RC_INTEGER);
1101 tab_double (tbl, 2, 2, 0, roc->pos_weighted, NULL, RC_OTHER);
1102 tab_double (tbl, 2, 3, 0, roc->neg_weighted, NULL, RC_OTHER);
1109 show_coords (struct roc_state *rs, const struct cmd_roc *roc)
1113 const int n_cols = roc->n_vars > 1 ? 4 : 3;
1115 struct tab_table *tbl ;
1117 for (i = 0; i < roc->n_vars; ++i)
1118 n_rows += casereader_count_cases (rs[i].cutpoint_rdr);
1120 tbl = tab_create (n_cols, n_rows);
1122 if ( roc->n_vars > 1)
1123 tab_title (tbl, _("Coordinates of the Curve"));
1125 tab_title (tbl, _("Coordinates of the Curve (%s)"), var_to_string (roc->vars[0]));
1128 tab_headers (tbl, 1, 0, 1, 0);
1130 tab_hline (tbl, TAL_2, 0, n_cols - 1, 1);
1132 if ( roc->n_vars > 1)
1133 tab_text (tbl, 0, 0, TAT_TITLE, _("Test variable"));
1135 tab_text (tbl, n_cols - 3, 0, TAT_TITLE, _("Positive if greater than or equal to"));
1136 tab_text (tbl, n_cols - 2, 0, TAT_TITLE, _("Sensitivity"));
1137 tab_text (tbl, n_cols - 1, 0, TAT_TITLE, _("1 - Specificity"));
1146 if ( roc->n_vars > 1)
1147 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1149 for (i = 0; i < roc->n_vars; ++i)
1152 struct casereader *r = casereader_clone (rs[i].cutpoint_rdr);
1154 if ( roc->n_vars > 1)
1155 tab_text (tbl, 0, x, TAT_TITLE, var_to_string (roc->vars[i]));
1158 tab_hline (tbl, TAL_1, 0, n_cols - 1, x);
1161 for (; (cc = casereader_read (r)) != NULL;
1162 case_unref (cc), x++)
1164 const double se = case_data_idx (cc, ROC_TP)->f /
1166 case_data_idx (cc, ROC_TP)->f
1168 case_data_idx (cc, ROC_FN)->f
1171 const double sp = case_data_idx (cc, ROC_TN)->f /
1173 case_data_idx (cc, ROC_TN)->f
1175 case_data_idx (cc, ROC_FP)->f
1178 tab_double (tbl, n_cols - 3, x, 0, case_data_idx (cc, ROC_CUTPOINT)->f,
1179 var_get_print_format (roc->vars[i]), RC_OTHER);
1181 tab_double (tbl, n_cols - 2, x, 0, se, NULL, RC_OTHER);
1182 tab_double (tbl, n_cols - 1, x, 0, 1 - sp, NULL, RC_OTHER);
1185 casereader_destroy (r);
1193 output_roc (struct roc_state *rs, const struct cmd_roc *roc)
1199 struct roc_chart *rc;
1202 rc = roc_chart_create (roc->reference);
1203 for (i = 0; i < roc->n_vars; i++)
1204 roc_chart_add_var (rc, var_get_name (roc->vars[i]),
1205 rs[i].cutpoint_rdr);
1206 roc_chart_submit (rc);
1211 if ( roc->print_coords )
1212 show_coords (rs, roc);