1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/stats/roc.h"
21 #include <gsl/gsl_cdf.h>
23 #include "data/casegrouper.h"
24 #include "data/casereader.h"
25 #include "data/casewriter.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/format.h"
29 #include "data/subcase.h"
30 #include "language/command.h"
31 #include "language/lexer/lexer.h"
32 #include "language/lexer/value-parser.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/misc.h"
35 #include "math/sort.h"
36 #include "output/chart-item.h"
37 #include "output/charts/roc-chart.h"
38 #include "output/tab.h"
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) msgid
47 const struct variable **vars;
48 const struct dictionary *dict;
50 const struct variable *state_var;
51 union value state_value;
52 size_t state_var_width;
54 /* Plot the roc curve */
56 /* Plot the reference line */
63 bool bi_neg_exp; /* True iff the bi-negative exponential critieria
65 enum mv_class exclude;
67 bool invert ; /* True iff a smaller test result variable indicates
76 static int run_roc (struct dataset *ds, struct cmd_roc *roc);
79 cmd_roc (struct lexer *lexer, struct dataset *ds)
82 const struct dictionary *dict = dataset_dict (ds);
87 roc.print_coords = false;
90 roc.reference = false;
92 roc.bi_neg_exp = false;
94 roc.pos = roc.pos_weighted = 0;
95 roc.neg = roc.neg_weighted = 0;
96 roc.dict = dataset_dict (ds);
98 roc.state_var_width = -1;
100 lex_match (lexer, T_SLASH);
101 if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars,
102 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
105 if ( ! lex_force_match (lexer, T_BY))
110 roc.state_var = parse_variable (lexer, dict);
116 if ( !lex_force_match (lexer, T_LPAREN))
121 roc.state_var_width = var_get_width (roc.state_var);
122 value_init (&roc.state_value, roc.state_var_width);
123 parse_value (lexer, &roc.state_value, roc.state_var);
126 if ( !lex_force_match (lexer, T_RPAREN))
131 while (lex_token (lexer) != T_ENDCMD)
133 lex_match (lexer, T_SLASH);
134 if (lex_match_id (lexer, "MISSING"))
136 lex_match (lexer, T_EQUALS);
137 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
139 if (lex_match_id (lexer, "INCLUDE"))
141 roc.exclude = MV_SYSTEM;
143 else if (lex_match_id (lexer, "EXCLUDE"))
145 roc.exclude = MV_ANY;
149 lex_error (lexer, NULL);
154 else if (lex_match_id (lexer, "PLOT"))
156 lex_match (lexer, T_EQUALS);
157 if (lex_match_id (lexer, "CURVE"))
160 if (lex_match (lexer, T_LPAREN))
162 roc.reference = true;
163 lex_force_match_id (lexer, "REFERENCE");
164 lex_force_match (lexer, T_RPAREN);
167 else if (lex_match_id (lexer, "NONE"))
173 lex_error (lexer, NULL);
177 else if (lex_match_id (lexer, "PRINT"))
179 lex_match (lexer, T_EQUALS);
180 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
182 if (lex_match_id (lexer, "SE"))
186 else if (lex_match_id (lexer, "COORDINATES"))
188 roc.print_coords = true;
192 lex_error (lexer, NULL);
197 else if (lex_match_id (lexer, "CRITERIA"))
199 lex_match (lexer, T_EQUALS);
200 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
202 if (lex_match_id (lexer, "CUTOFF"))
204 lex_force_match (lexer, T_LPAREN);
205 if (lex_match_id (lexer, "INCLUDE"))
207 roc.exclude = MV_SYSTEM;
209 else if (lex_match_id (lexer, "EXCLUDE"))
211 roc.exclude = MV_USER | MV_SYSTEM;
215 lex_error (lexer, NULL);
218 lex_force_match (lexer, T_RPAREN);
220 else if (lex_match_id (lexer, "TESTPOS"))
222 lex_force_match (lexer, T_LPAREN);
223 if (lex_match_id (lexer, "LARGE"))
227 else if (lex_match_id (lexer, "SMALL"))
233 lex_error (lexer, NULL);
236 lex_force_match (lexer, T_RPAREN);
238 else if (lex_match_id (lexer, "CI"))
240 lex_force_match (lexer, T_LPAREN);
241 lex_force_num (lexer);
242 roc.ci = lex_number (lexer);
244 lex_force_match (lexer, T_RPAREN);
246 else if (lex_match_id (lexer, "DISTRIBUTION"))
248 lex_force_match (lexer, T_LPAREN);
249 if (lex_match_id (lexer, "FREE"))
251 roc.bi_neg_exp = false;
253 else if (lex_match_id (lexer, "NEGEXPO"))
255 roc.bi_neg_exp = true;
259 lex_error (lexer, NULL);
262 lex_force_match (lexer, T_RPAREN);
266 lex_error (lexer, NULL);
273 lex_error (lexer, NULL);
278 if ( ! run_roc (ds, &roc))
282 value_destroy (&roc.state_value, roc.state_var_width);
288 value_destroy (&roc.state_value, roc.state_var_width);
297 do_roc (struct cmd_roc *roc, struct casereader *group, struct dictionary *dict);
301 run_roc (struct dataset *ds, struct cmd_roc *roc)
303 struct dictionary *dict = dataset_dict (ds);
305 struct casereader *group;
307 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
308 while (casegrouper_get_next_group (grouper, &group))
310 do_roc (roc, group, dataset_dict (ds));
312 ok = casegrouper_destroy (grouper);
313 ok = proc_commit (ds) && ok;
320 dump_casereader (struct casereader *reader)
323 struct casereader *r = casereader_clone (reader);
325 for ( ; (c = casereader_read (r) ); case_unref (c))
328 for (i = 0 ; i < case_get_value_cnt (c); ++i)
330 printf ("%g ", case_data_idx (c, i)->f);
335 casereader_destroy (r);
341 Return true iff the state variable indicates that C has positive actual state.
343 As a side effect, this function also accumulates the roc->{pos,neg} and
344 roc->{pos,neg}_weighted counts.
347 match_positives (const struct ccase *c, void *aux)
349 struct cmd_roc *roc = aux;
350 const struct variable *wv = dict_get_weight (roc->dict);
351 const double weight = wv ? case_data (c, wv)->f : 1.0;
353 const bool positive =
354 ( 0 == value_compare_3way (case_data (c, roc->state_var), &roc->state_value,
355 var_get_width (roc->state_var)));
360 roc->pos_weighted += weight;
365 roc->neg_weighted += weight;
376 /* Some intermediate state for calculating the cutpoints and the
377 standard error values */
380 double auc; /* Area under the curve */
382 double n1; /* total weight of positives */
383 double n2; /* total weight of negatives */
385 /* intermediates for standard error */
389 /* intermediates for cutpoints */
390 struct casewriter *cutpoint_wtr;
391 struct casereader *cutpoint_rdr;
398 Return a new casereader based upon CUTPOINT_RDR.
399 The number of "positive" cases are placed into
400 the position TRUE_INDEX, and the number of "negative" cases
402 POS_COND and RESULT determine the semantics of what is
404 WEIGHT is the value of a single count.
406 static struct casereader *
407 accumulate_counts (struct casereader *input,
408 double result, double weight,
409 bool (*pos_cond) (double, double),
410 int true_index, int false_index)
412 const struct caseproto *proto = casereader_get_proto (input);
413 struct casewriter *w =
414 autopaging_writer_create (proto);
416 double prev_cp = SYSMIS;
418 for ( ; (cpc = casereader_read (input) ); case_unref (cpc))
420 struct ccase *new_case;
421 const double cp = case_data_idx (cpc, ROC_CUTPOINT)->f;
423 assert (cp != SYSMIS);
425 /* We don't want duplicates here */
429 new_case = case_clone (cpc);
431 if ( pos_cond (result, cp))
432 case_data_rw_idx (new_case, true_index)->f += weight;
434 case_data_rw_idx (new_case, false_index)->f += weight;
438 casewriter_write (w, new_case);
440 casereader_destroy (input);
442 return casewriter_make_reader (w);
447 static void output_roc (struct roc_state *rs, const struct cmd_roc *roc);
450 This function does 3 things:
452 1. Counts the number of cases which are equal to every other case in READER,
453 and those cases for which the relationship between it and every other case
454 satifies PRED (normally either > or <). VAR is variable defining a case's value
457 2. Counts the number of true and false cases in reader, and populates
458 CUTPOINT_RDR accordingly. TRUE_INDEX and FALSE_INDEX are the indices
459 which receive these values. POS_COND is the condition defining true
462 3. CC is filled with the cumulative weight of all cases of READER.
464 static struct casereader *
465 process_group (const struct variable *var, struct casereader *reader,
466 bool (*pred) (double, double),
467 const struct dictionary *dict,
469 struct casereader **cutpoint_rdr,
470 bool (*pos_cond) (double, double),
474 const struct variable *w = dict_get_weight (dict);
476 struct casereader *r1 =
477 casereader_create_distinct (sort_execute_1var (reader, var), var, w);
479 const int weight_idx = w ? var_get_case_index (w) :
480 caseproto_get_n_widths (casereader_get_proto (r1)) - 1;
484 struct casereader *rclone = casereader_clone (r1);
485 struct casewriter *wtr;
486 struct caseproto *proto = caseproto_create ();
488 proto = caseproto_add_width (proto, 0);
489 proto = caseproto_add_width (proto, 0);
490 proto = caseproto_add_width (proto, 0);
492 wtr = autopaging_writer_create (proto);
496 for ( ; (c1 = casereader_read (r1) ); case_unref (c1))
498 struct ccase *new_case = case_create (proto);
500 struct casereader *r2 = casereader_clone (rclone);
502 const double weight1 = case_data_idx (c1, weight_idx)->f;
503 const double d1 = case_data (c1, var)->f;
507 *cutpoint_rdr = accumulate_counts (*cutpoint_rdr, d1, weight1,
509 true_index, false_index);
513 for ( ; (c2 = casereader_read (r2) ); case_unref (c2))
515 const double d2 = case_data (c2, var)->f;
516 const double weight2 = case_data_idx (c2, weight_idx)->f;
523 else if ( pred (d2, d1))
529 case_data_rw_idx (new_case, VALUE)->f = d1;
530 case_data_rw_idx (new_case, N_EQ)->f = n_eq;
531 case_data_rw_idx (new_case, N_PRED)->f = n_pred;
533 casewriter_write (wtr, new_case);
535 casereader_destroy (r2);
539 casereader_destroy (r1);
540 casereader_destroy (rclone);
542 caseproto_unref (proto);
544 return casewriter_make_reader (wtr);
547 /* Some more indeces into case data */
548 #define N_POS_EQ 1 /* number of positive cases with values equal to n */
549 #define N_POS_GT 2 /* number of postive cases with values greater than n */
550 #define N_NEG_EQ 3 /* number of negative cases with values equal to n */
551 #define N_NEG_LT 4 /* number of negative cases with values less than n */
554 gt (double d1, double d2)
561 ge (double d1, double d2)
567 lt (double d1, double d2)
574 Return a casereader with width 3,
575 populated with cases based upon READER.
576 The cases will have the values:
577 (N, number of cases equal to N, number of cases greater than N)
578 As a side effect, update RS->n1 with the number of positive cases.
580 static struct casereader *
581 process_positive_group (const struct variable *var, struct casereader *reader,
582 const struct dictionary *dict,
583 struct roc_state *rs)
585 return process_group (var, reader, gt, dict, &rs->n1,
592 Return a casereader with width 3,
593 populated with cases based upon READER.
594 The cases will have the values:
595 (N, number of cases equal to N, number of cases less than N)
596 As a side effect, update RS->n2 with the number of negative cases.
598 static struct casereader *
599 process_negative_group (const struct variable *var, struct casereader *reader,
600 const struct dictionary *dict,
601 struct roc_state *rs)
603 return process_group (var, reader, lt, dict, &rs->n2,
613 append_cutpoint (struct casewriter *writer, double cutpoint)
615 struct ccase *cc = case_create (casewriter_get_proto (writer));
617 case_data_rw_idx (cc, ROC_CUTPOINT)->f = cutpoint;
618 case_data_rw_idx (cc, ROC_TP)->f = 0;
619 case_data_rw_idx (cc, ROC_FN)->f = 0;
620 case_data_rw_idx (cc, ROC_TN)->f = 0;
621 case_data_rw_idx (cc, ROC_FP)->f = 0;
623 casewriter_write (writer, cc);
628 Create and initialise the rs[x].cutpoint_rdr casereaders. That is, the readers will
629 be created with width 5, ready to take the values (cutpoint, ROC_TP, ROC_FN, ROC_TN, ROC_FP), and the
630 reader will be populated with its final number of cases.
631 However on exit from this function, only ROC_CUTPOINT entries will be set to their final
632 value. The other entries will be initialised to zero.
635 prepare_cutpoints (struct cmd_roc *roc, struct roc_state *rs, struct casereader *input)
638 struct casereader *r = casereader_clone (input);
642 struct caseproto *proto = caseproto_create ();
643 struct subcase ordering;
644 subcase_init (&ordering, ROC_CUTPOINT, 0, SC_ASCEND);
646 proto = caseproto_add_width (proto, 0); /* cutpoint */
647 proto = caseproto_add_width (proto, 0); /* ROC_TP */
648 proto = caseproto_add_width (proto, 0); /* ROC_FN */
649 proto = caseproto_add_width (proto, 0); /* ROC_TN */
650 proto = caseproto_add_width (proto, 0); /* ROC_FP */
652 for (i = 0 ; i < roc->n_vars; ++i)
654 rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto);
655 rs[i].prev_result = SYSMIS;
656 rs[i].max = -DBL_MAX;
660 caseproto_unref (proto);
661 subcase_destroy (&ordering);
664 for (; (c = casereader_read (r)) != NULL; case_unref (c))
666 for (i = 0 ; i < roc->n_vars; ++i)
668 const union value *v = case_data (c, roc->vars[i]);
669 const double result = v->f;
671 if ( mv_is_value_missing (var_get_missing_values (roc->vars[i]), v, roc->exclude))
674 minimize (&rs[i].min, result);
675 maximize (&rs[i].max, result);
677 if ( rs[i].prev_result != SYSMIS && rs[i].prev_result != result )
679 const double mean = (result + rs[i].prev_result ) / 2.0;
680 append_cutpoint (rs[i].cutpoint_wtr, mean);
683 rs[i].prev_result = result;
686 casereader_destroy (r);
689 /* Append the min and max cutpoints */
690 for (i = 0 ; i < roc->n_vars; ++i)
692 append_cutpoint (rs[i].cutpoint_wtr, rs[i].min - 1);
693 append_cutpoint (rs[i].cutpoint_wtr, rs[i].max + 1);
695 rs[i].cutpoint_rdr = casewriter_make_reader (rs[i].cutpoint_wtr);
700 do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict)
704 struct roc_state *rs = xcalloc (roc->n_vars, sizeof *rs);
706 struct casereader *negatives = NULL;
707 struct casereader *positives = NULL;
709 struct caseproto *n_proto = NULL;
711 struct subcase up_ordering;
712 struct subcase down_ordering;
714 struct casewriter *neg_wtr = NULL;
716 struct casereader *input = casereader_create_filter_missing (reader,
717 roc->vars, roc->n_vars,
722 input = casereader_create_filter_missing (input,
728 neg_wtr = autopaging_writer_create (casereader_get_proto (input));
730 prepare_cutpoints (roc, rs, input);
733 /* Separate the positive actual state cases from the negative ones */
735 casereader_create_filter_func (input,
741 n_proto = caseproto_create ();
743 n_proto = caseproto_add_width (n_proto, 0);
744 n_proto = caseproto_add_width (n_proto, 0);
745 n_proto = caseproto_add_width (n_proto, 0);
746 n_proto = caseproto_add_width (n_proto, 0);
747 n_proto = caseproto_add_width (n_proto, 0);
749 subcase_init (&up_ordering, VALUE, 0, SC_ASCEND);
750 subcase_init (&down_ordering, VALUE, 0, SC_DESCEND);
752 for (i = 0 ; i < roc->n_vars; ++i)
754 struct casewriter *w = NULL;
755 struct casereader *r = NULL;
760 struct casereader *n_neg_reader ;
761 const struct variable *var = roc->vars[i];
763 struct casereader *neg ;
764 struct casereader *pos = casereader_clone (positives);
766 struct casereader *n_pos_reader =
767 process_positive_group (var, pos, dict, &rs[i]);
769 if ( negatives == NULL)
771 negatives = casewriter_make_reader (neg_wtr);
774 neg = casereader_clone (negatives);
776 n_neg_reader = process_negative_group (var, neg, dict, &rs[i]);
778 /* Merge the n_pos and n_neg casereaders */
779 w = sort_create_writer (&up_ordering, n_proto);
780 for ( ; (cpos = casereader_read (n_pos_reader) ); case_unref (cpos))
782 struct ccase *pos_case = case_create (n_proto);
784 const double jpos = case_data_idx (cpos, VALUE)->f;
786 while ((cneg = casereader_read (n_neg_reader)))
788 struct ccase *nc = case_create (n_proto);
790 const double jneg = case_data_idx (cneg, VALUE)->f;
792 case_data_rw_idx (nc, VALUE)->f = jneg;
793 case_data_rw_idx (nc, N_POS_EQ)->f = 0;
795 case_data_rw_idx (nc, N_POS_GT)->f = SYSMIS;
797 *case_data_rw_idx (nc, N_NEG_EQ) = *case_data_idx (cneg, N_EQ);
798 *case_data_rw_idx (nc, N_NEG_LT) = *case_data_idx (cneg, N_PRED);
800 casewriter_write (w, nc);
807 case_data_rw_idx (pos_case, VALUE)->f = jpos;
808 *case_data_rw_idx (pos_case, N_POS_EQ) = *case_data_idx (cpos, N_EQ);
809 *case_data_rw_idx (pos_case, N_POS_GT) = *case_data_idx (cpos, N_PRED);
810 case_data_rw_idx (pos_case, N_NEG_EQ)->f = 0;
811 case_data_rw_idx (pos_case, N_NEG_LT)->f = SYSMIS;
813 casewriter_write (w, pos_case);
816 casereader_destroy (n_pos_reader);
817 casereader_destroy (n_neg_reader);
819 /* These aren't used anymore */
823 r = casewriter_make_reader (w);
825 /* Propagate the N_POS_GT values from the positive cases
826 to the negative ones */
828 double prev_pos_gt = rs[i].n1;
829 w = sort_create_writer (&down_ordering, n_proto);
831 for ( ; (c = casereader_read (r) ); case_unref (c))
833 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
834 struct ccase *nc = case_clone (c);
836 if ( n_pos_gt == SYSMIS)
838 n_pos_gt = prev_pos_gt;
839 case_data_rw_idx (nc, N_POS_GT)->f = n_pos_gt;
842 casewriter_write (w, nc);
843 prev_pos_gt = n_pos_gt;
846 casereader_destroy (r);
847 r = casewriter_make_reader (w);
850 /* Propagate the N_NEG_LT values from the negative cases
851 to the positive ones */
853 double prev_neg_lt = rs[i].n2;
854 w = sort_create_writer (&up_ordering, n_proto);
856 for ( ; (c = casereader_read (r) ); case_unref (c))
858 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
859 struct ccase *nc = case_clone (c);
861 if ( n_neg_lt == SYSMIS)
863 n_neg_lt = prev_neg_lt;
864 case_data_rw_idx (nc, N_NEG_LT)->f = n_neg_lt;
867 casewriter_write (w, nc);
868 prev_neg_lt = n_neg_lt;
871 casereader_destroy (r);
872 r = casewriter_make_reader (w);
876 struct ccase *prev_case = NULL;
877 for ( ; (c = casereader_read (r) ); case_unref (c))
879 struct ccase *next_case = casereader_peek (r, 0);
881 const double j = case_data_idx (c, VALUE)->f;
882 double n_pos_eq = case_data_idx (c, N_POS_EQ)->f;
883 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
884 double n_neg_eq = case_data_idx (c, N_NEG_EQ)->f;
885 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
887 if ( prev_case && j == case_data_idx (prev_case, VALUE)->f)
889 if ( 0 == case_data_idx (c, N_POS_EQ)->f)
891 n_pos_eq = case_data_idx (prev_case, N_POS_EQ)->f;
892 n_pos_gt = case_data_idx (prev_case, N_POS_GT)->f;
895 if ( 0 == case_data_idx (c, N_NEG_EQ)->f)
897 n_neg_eq = case_data_idx (prev_case, N_NEG_EQ)->f;
898 n_neg_lt = case_data_idx (prev_case, N_NEG_LT)->f;
902 if ( NULL == next_case || j != case_data_idx (next_case, VALUE)->f)
904 rs[i].auc += n_pos_gt * n_neg_eq + (n_pos_eq * n_neg_eq) / 2.0;
907 n_neg_eq * ( pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0);
909 n_pos_eq * ( pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0);
913 case_unref (next_case);
914 case_unref (prev_case);
915 prev_case = case_clone (c);
917 casereader_destroy (r);
918 case_unref (prev_case);
920 rs[i].auc /= rs[i].n1 * rs[i].n2;
922 rs[i].auc = 1 - rs[i].auc;
924 if ( roc->bi_neg_exp )
926 rs[i].q1hat = rs[i].auc / ( 2 - rs[i].auc);
927 rs[i].q2hat = 2 * pow2 (rs[i].auc) / ( 1 + rs[i].auc);
931 rs[i].q1hat /= rs[i].n2 * pow2 (rs[i].n1);
932 rs[i].q2hat /= rs[i].n1 * pow2 (rs[i].n2);
937 casereader_destroy (positives);
938 casereader_destroy (negatives);
940 caseproto_unref (n_proto);
941 subcase_destroy (&up_ordering);
942 subcase_destroy (&down_ordering);
944 output_roc (rs, roc);
946 for (i = 0 ; i < roc->n_vars; ++i)
947 casereader_destroy (rs[i].cutpoint_rdr);
953 show_auc (struct roc_state *rs, const struct cmd_roc *roc)
956 const int n_fields = roc->print_se ? 5 : 1;
957 const int n_cols = roc->n_vars > 1 ? n_fields + 1: n_fields;
958 const int n_rows = 2 + roc->n_vars;
959 struct tab_table *tbl = tab_create (n_cols, n_rows);
961 if ( roc->n_vars > 1)
962 tab_title (tbl, _("Area Under the Curve"));
964 tab_title (tbl, _("Area Under the Curve (%s)"), var_to_string (roc->vars[0]));
966 tab_headers (tbl, n_cols - n_fields, 0, 1, 0);
969 tab_text (tbl, n_cols - n_fields, 1, TAT_TITLE, _("Area"));
971 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
982 tab_text (tbl, n_cols - 4, 1, TAT_TITLE, _("Std. Error"));
983 tab_text (tbl, n_cols - 3, 1, TAT_TITLE, _("Asymptotic Sig."));
985 tab_text (tbl, n_cols - 2, 1, TAT_TITLE, _("Lower Bound"));
986 tab_text (tbl, n_cols - 1, 1, TAT_TITLE, _("Upper Bound"));
988 tab_joint_text_format (tbl, n_cols - 2, 0, 4, 0,
989 TAT_TITLE | TAB_CENTER,
990 _("Asymp. %g%% Confidence Interval"), roc->ci);
991 tab_vline (tbl, 0, n_cols - 1, 0, 0);
992 tab_hline (tbl, TAL_1, n_cols - 2, n_cols - 1, 1);
995 if ( roc->n_vars > 1)
996 tab_text (tbl, 0, 1, TAT_TITLE, _("Variable under test"));
998 if ( roc->n_vars > 1)
999 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1002 for ( i = 0 ; i < roc->n_vars ; ++i )
1004 tab_text (tbl, 0, 2 + i, TAT_TITLE, var_to_string (roc->vars[i]));
1006 tab_double (tbl, n_cols - n_fields, 2 + i, 0, rs[i].auc, NULL, RC_OTHER);
1008 if ( roc->print_se )
1011 const double sd_0_5 = sqrt ((rs[i].n1 + rs[i].n2 + 1) /
1012 (12 * rs[i].n1 * rs[i].n2));
1016 se = rs[i].auc * (1 - rs[i].auc) + (rs[i].n1 - 1) * (rs[i].q1hat - pow2 (rs[i].auc)) +
1017 (rs[i].n2 - 1) * (rs[i].q2hat - pow2 (rs[i].auc));
1019 se /= rs[i].n1 * rs[i].n2;
1023 tab_double (tbl, n_cols - 4, 2 + i, 0,
1027 ci = 1 - roc->ci / 100.0;
1028 yy = gsl_cdf_gaussian_Qinv (ci, se) ;
1030 tab_double (tbl, n_cols - 2, 2 + i, 0,
1034 tab_double (tbl, n_cols - 1, 2 + i, 0,
1038 tab_double (tbl, n_cols - 3, 2 + i, 0,
1039 2.0 * gsl_cdf_ugaussian_Q (fabs ((rs[i].auc - 0.5 ) / sd_0_5)),
1049 show_summary (const struct cmd_roc *roc)
1051 const int n_cols = 3;
1052 const int n_rows = 4;
1053 struct tab_table *tbl = tab_create (n_cols, n_rows);
1055 tab_title (tbl, _("Case Summary"));
1057 tab_headers (tbl, 1, 0, 2, 0);
1066 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
1067 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1070 tab_hline (tbl, TAL_2, 1, n_cols - 1, 1);
1071 tab_vline (tbl, TAL_1, 2, 1, n_rows - 1);
1074 tab_text (tbl, 0, 1, TAT_TITLE | TAB_LEFT, var_to_string (roc->state_var));
1075 tab_text (tbl, 1, 1, TAT_TITLE, _("Unweighted"));
1076 tab_text (tbl, 2, 1, TAT_TITLE, _("Weighted"));
1078 tab_joint_text (tbl, 1, 0, 2, 0,
1079 TAT_TITLE | TAB_CENTER,
1080 _("Valid N (listwise)"));
1083 tab_text (tbl, 0, 2, TAB_LEFT, _("Positive"));
1084 tab_text (tbl, 0, 3, TAB_LEFT, _("Negative"));
1087 tab_double (tbl, 1, 2, 0, roc->pos, NULL, RC_INTEGER);
1088 tab_double (tbl, 1, 3, 0, roc->neg, NULL, RC_INTEGER);
1090 tab_double (tbl, 2, 2, 0, roc->pos_weighted, NULL, RC_OTHER);
1091 tab_double (tbl, 2, 3, 0, roc->neg_weighted, NULL, RC_OTHER);
1098 show_coords (struct roc_state *rs, const struct cmd_roc *roc)
1102 const int n_cols = roc->n_vars > 1 ? 4 : 3;
1104 struct tab_table *tbl ;
1106 for (i = 0; i < roc->n_vars; ++i)
1107 n_rows += casereader_count_cases (rs[i].cutpoint_rdr);
1109 tbl = tab_create (n_cols, n_rows);
1111 if ( roc->n_vars > 1)
1112 tab_title (tbl, _("Coordinates of the Curve"));
1114 tab_title (tbl, _("Coordinates of the Curve (%s)"), var_to_string (roc->vars[0]));
1117 tab_headers (tbl, 1, 0, 1, 0);
1119 tab_hline (tbl, TAL_2, 0, n_cols - 1, 1);
1121 if ( roc->n_vars > 1)
1122 tab_text (tbl, 0, 0, TAT_TITLE, _("Test variable"));
1124 tab_text (tbl, n_cols - 3, 0, TAT_TITLE, _("Positive if greater than or equal to"));
1125 tab_text (tbl, n_cols - 2, 0, TAT_TITLE, _("Sensitivity"));
1126 tab_text (tbl, n_cols - 1, 0, TAT_TITLE, _("1 - Specificity"));
1135 if ( roc->n_vars > 1)
1136 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1138 for (i = 0; i < roc->n_vars; ++i)
1141 struct casereader *r = casereader_clone (rs[i].cutpoint_rdr);
1143 if ( roc->n_vars > 1)
1144 tab_text (tbl, 0, x, TAT_TITLE, var_to_string (roc->vars[i]));
1147 tab_hline (tbl, TAL_1, 0, n_cols - 1, x);
1150 for (; (cc = casereader_read (r)) != NULL;
1151 case_unref (cc), x++)
1153 const double se = case_data_idx (cc, ROC_TP)->f /
1155 case_data_idx (cc, ROC_TP)->f
1157 case_data_idx (cc, ROC_FN)->f
1160 const double sp = case_data_idx (cc, ROC_TN)->f /
1162 case_data_idx (cc, ROC_TN)->f
1164 case_data_idx (cc, ROC_FP)->f
1167 tab_double (tbl, n_cols - 3, x, 0, case_data_idx (cc, ROC_CUTPOINT)->f,
1168 var_get_print_format (roc->vars[i]), RC_OTHER);
1170 tab_double (tbl, n_cols - 2, x, 0, se, NULL, RC_OTHER);
1171 tab_double (tbl, n_cols - 1, x, 0, 1 - sp, NULL, RC_OTHER);
1174 casereader_destroy (r);
1182 output_roc (struct roc_state *rs, const struct cmd_roc *roc)
1188 struct roc_chart *rc;
1191 rc = roc_chart_create (roc->reference);
1192 for (i = 0; i < roc->n_vars; i++)
1193 roc_chart_add_var (rc, var_get_name (roc->vars[i]),
1194 rs[i].cutpoint_rdr);
1195 roc_chart_submit (rc);
1200 if ( roc->print_coords )
1201 show_coords (rs, roc);