1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/stats/roc.h"
21 #include <gsl/gsl_cdf.h>
23 #include "data/casegrouper.h"
24 #include "data/casereader.h"
25 #include "data/casewriter.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/format.h"
29 #include "data/subcase.h"
30 #include "language/command.h"
31 #include "language/lexer/lexer.h"
32 #include "language/lexer/value-parser.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/misc.h"
35 #include "math/sort.h"
36 #include "output/chart-item.h"
37 #include "output/charts/roc-chart.h"
38 #include "output/tab.h"
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) msgid
47 const struct variable **vars;
48 const struct dictionary *dict;
50 const struct variable *state_var;
51 union value state_value;
52 size_t state_var_width;
54 /* Plot the roc curve */
56 /* Plot the reference line */
63 bool bi_neg_exp; /* True iff the bi-negative exponential critieria
65 enum mv_class exclude;
67 bool invert ; /* True iff a smaller test result variable indicates
76 static int run_roc (struct dataset *ds, struct cmd_roc *roc);
79 cmd_roc (struct lexer *lexer, struct dataset *ds)
82 const struct dictionary *dict = dataset_dict (ds);
87 roc.print_coords = false;
90 roc.reference = false;
92 roc.bi_neg_exp = false;
94 roc.pos = roc.pos_weighted = 0;
95 roc.neg = roc.neg_weighted = 0;
96 roc.dict = dataset_dict (ds);
98 roc.state_var_width = -1;
100 lex_match (lexer, T_SLASH);
101 if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars,
102 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
105 if ( ! lex_force_match (lexer, T_BY))
110 roc.state_var = parse_variable (lexer, dict);
116 if ( !lex_force_match (lexer, T_LPAREN))
121 roc.state_var_width = var_get_width (roc.state_var);
122 value_init (&roc.state_value, roc.state_var_width);
123 parse_value (lexer, &roc.state_value, roc.state_var);
126 if ( !lex_force_match (lexer, T_RPAREN))
131 while (lex_token (lexer) != T_ENDCMD)
133 lex_match (lexer, T_SLASH);
134 if (lex_match_id (lexer, "MISSING"))
136 lex_match (lexer, T_EQUALS);
137 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
139 if (lex_match_id (lexer, "INCLUDE"))
141 roc.exclude = MV_SYSTEM;
143 else if (lex_match_id (lexer, "EXCLUDE"))
145 roc.exclude = MV_ANY;
149 lex_error (lexer, NULL);
154 else if (lex_match_id (lexer, "PLOT"))
156 lex_match (lexer, T_EQUALS);
157 if (lex_match_id (lexer, "CURVE"))
160 if (lex_match (lexer, T_LPAREN))
162 roc.reference = true;
163 lex_force_match_id (lexer, "REFERENCE");
164 lex_force_match (lexer, T_RPAREN);
167 else if (lex_match_id (lexer, "NONE"))
173 lex_error (lexer, NULL);
177 else if (lex_match_id (lexer, "PRINT"))
179 lex_match (lexer, T_EQUALS);
180 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
182 if (lex_match_id (lexer, "SE"))
186 else if (lex_match_id (lexer, "COORDINATES"))
188 roc.print_coords = true;
192 lex_error (lexer, NULL);
197 else if (lex_match_id (lexer, "CRITERIA"))
199 lex_match (lexer, T_EQUALS);
200 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
202 if (lex_match_id (lexer, "CUTOFF"))
204 lex_force_match (lexer, T_LPAREN);
205 if (lex_match_id (lexer, "INCLUDE"))
207 roc.exclude = MV_SYSTEM;
209 else if (lex_match_id (lexer, "EXCLUDE"))
211 roc.exclude = MV_USER | MV_SYSTEM;
215 lex_error (lexer, NULL);
218 lex_force_match (lexer, T_RPAREN);
220 else if (lex_match_id (lexer, "TESTPOS"))
222 lex_force_match (lexer, T_LPAREN);
223 if (lex_match_id (lexer, "LARGE"))
227 else if (lex_match_id (lexer, "SMALL"))
233 lex_error (lexer, NULL);
236 lex_force_match (lexer, T_RPAREN);
238 else if (lex_match_id (lexer, "CI"))
240 if (!lex_force_match (lexer, T_LPAREN))
242 if (! lex_force_num (lexer))
244 roc.ci = lex_number (lexer);
246 if (!lex_force_match (lexer, T_RPAREN))
249 else if (lex_match_id (lexer, "DISTRIBUTION"))
251 if (!lex_force_match (lexer, T_LPAREN))
253 if (lex_match_id (lexer, "FREE"))
255 roc.bi_neg_exp = false;
257 else if (lex_match_id (lexer, "NEGEXPO"))
259 roc.bi_neg_exp = true;
263 lex_error (lexer, NULL);
266 if (!lex_force_match (lexer, T_RPAREN))
271 lex_error (lexer, NULL);
278 lex_error (lexer, NULL);
283 if ( ! run_roc (ds, &roc))
287 value_destroy (&roc.state_value, roc.state_var_width);
293 value_destroy (&roc.state_value, roc.state_var_width);
302 do_roc (struct cmd_roc *roc, struct casereader *group, struct dictionary *dict);
306 run_roc (struct dataset *ds, struct cmd_roc *roc)
308 struct dictionary *dict = dataset_dict (ds);
310 struct casereader *group;
312 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
313 while (casegrouper_get_next_group (grouper, &group))
315 do_roc (roc, group, dataset_dict (ds));
317 ok = casegrouper_destroy (grouper);
318 ok = proc_commit (ds) && ok;
325 dump_casereader (struct casereader *reader)
328 struct casereader *r = casereader_clone (reader);
330 for ( ; (c = casereader_read (r) ); case_unref (c))
333 for (i = 0 ; i < case_get_value_cnt (c); ++i)
335 printf ("%g ", case_data_idx (c, i)->f);
340 casereader_destroy (r);
346 Return true iff the state variable indicates that C has positive actual state.
348 As a side effect, this function also accumulates the roc->{pos,neg} and
349 roc->{pos,neg}_weighted counts.
352 match_positives (const struct ccase *c, void *aux)
354 struct cmd_roc *roc = aux;
355 const struct variable *wv = dict_get_weight (roc->dict);
356 const double weight = wv ? case_data (c, wv)->f : 1.0;
358 const bool positive =
359 ( 0 == value_compare_3way (case_data (c, roc->state_var), &roc->state_value,
360 var_get_width (roc->state_var)));
365 roc->pos_weighted += weight;
370 roc->neg_weighted += weight;
381 /* Some intermediate state for calculating the cutpoints and the
382 standard error values */
385 double auc; /* Area under the curve */
387 double n1; /* total weight of positives */
388 double n2; /* total weight of negatives */
390 /* intermediates for standard error */
394 /* intermediates for cutpoints */
395 struct casewriter *cutpoint_wtr;
396 struct casereader *cutpoint_rdr;
403 Return a new casereader based upon CUTPOINT_RDR.
404 The number of "positive" cases are placed into
405 the position TRUE_INDEX, and the number of "negative" cases
407 POS_COND and RESULT determine the semantics of what is
409 WEIGHT is the value of a single count.
411 static struct casereader *
412 accumulate_counts (struct casereader *input,
413 double result, double weight,
414 bool (*pos_cond) (double, double),
415 int true_index, int false_index)
417 const struct caseproto *proto = casereader_get_proto (input);
418 struct casewriter *w =
419 autopaging_writer_create (proto);
421 double prev_cp = SYSMIS;
423 for ( ; (cpc = casereader_read (input) ); case_unref (cpc))
425 struct ccase *new_case;
426 const double cp = case_data_idx (cpc, ROC_CUTPOINT)->f;
428 assert (cp != SYSMIS);
430 /* We don't want duplicates here */
434 new_case = case_clone (cpc);
436 if ( pos_cond (result, cp))
437 case_data_rw_idx (new_case, true_index)->f += weight;
439 case_data_rw_idx (new_case, false_index)->f += weight;
443 casewriter_write (w, new_case);
445 casereader_destroy (input);
447 return casewriter_make_reader (w);
452 static void output_roc (struct roc_state *rs, const struct cmd_roc *roc);
455 This function does 3 things:
457 1. Counts the number of cases which are equal to every other case in READER,
458 and those cases for which the relationship between it and every other case
459 satifies PRED (normally either > or <). VAR is variable defining a case's value
462 2. Counts the number of true and false cases in reader, and populates
463 CUTPOINT_RDR accordingly. TRUE_INDEX and FALSE_INDEX are the indices
464 which receive these values. POS_COND is the condition defining true
467 3. CC is filled with the cumulative weight of all cases of READER.
469 static struct casereader *
470 process_group (const struct variable *var, struct casereader *reader,
471 bool (*pred) (double, double),
472 const struct dictionary *dict,
474 struct casereader **cutpoint_rdr,
475 bool (*pos_cond) (double, double),
479 const struct variable *w = dict_get_weight (dict);
481 struct casereader *r1 =
482 casereader_create_distinct (sort_execute_1var (reader, var), var, w);
484 const int weight_idx = w ? var_get_case_index (w) :
485 caseproto_get_n_widths (casereader_get_proto (r1)) - 1;
489 struct casereader *rclone = casereader_clone (r1);
490 struct casewriter *wtr;
491 struct caseproto *proto = caseproto_create ();
493 proto = caseproto_add_width (proto, 0);
494 proto = caseproto_add_width (proto, 0);
495 proto = caseproto_add_width (proto, 0);
497 wtr = autopaging_writer_create (proto);
501 for ( ; (c1 = casereader_read (r1) ); case_unref (c1))
503 struct ccase *new_case = case_create (proto);
505 struct casereader *r2 = casereader_clone (rclone);
507 const double weight1 = case_data_idx (c1, weight_idx)->f;
508 const double d1 = case_data (c1, var)->f;
512 *cutpoint_rdr = accumulate_counts (*cutpoint_rdr, d1, weight1,
514 true_index, false_index);
518 for ( ; (c2 = casereader_read (r2) ); case_unref (c2))
520 const double d2 = case_data (c2, var)->f;
521 const double weight2 = case_data_idx (c2, weight_idx)->f;
528 else if ( pred (d2, d1))
534 case_data_rw_idx (new_case, VALUE)->f = d1;
535 case_data_rw_idx (new_case, N_EQ)->f = n_eq;
536 case_data_rw_idx (new_case, N_PRED)->f = n_pred;
538 casewriter_write (wtr, new_case);
540 casereader_destroy (r2);
544 casereader_destroy (r1);
545 casereader_destroy (rclone);
547 caseproto_unref (proto);
549 return casewriter_make_reader (wtr);
552 /* Some more indeces into case data */
553 #define N_POS_EQ 1 /* number of positive cases with values equal to n */
554 #define N_POS_GT 2 /* number of postive cases with values greater than n */
555 #define N_NEG_EQ 3 /* number of negative cases with values equal to n */
556 #define N_NEG_LT 4 /* number of negative cases with values less than n */
559 gt (double d1, double d2)
566 ge (double d1, double d2)
572 lt (double d1, double d2)
579 Return a casereader with width 3,
580 populated with cases based upon READER.
581 The cases will have the values:
582 (N, number of cases equal to N, number of cases greater than N)
583 As a side effect, update RS->n1 with the number of positive cases.
585 static struct casereader *
586 process_positive_group (const struct variable *var, struct casereader *reader,
587 const struct dictionary *dict,
588 struct roc_state *rs)
590 return process_group (var, reader, gt, dict, &rs->n1,
597 Return a casereader with width 3,
598 populated with cases based upon READER.
599 The cases will have the values:
600 (N, number of cases equal to N, number of cases less than N)
601 As a side effect, update RS->n2 with the number of negative cases.
603 static struct casereader *
604 process_negative_group (const struct variable *var, struct casereader *reader,
605 const struct dictionary *dict,
606 struct roc_state *rs)
608 return process_group (var, reader, lt, dict, &rs->n2,
618 append_cutpoint (struct casewriter *writer, double cutpoint)
620 struct ccase *cc = case_create (casewriter_get_proto (writer));
622 case_data_rw_idx (cc, ROC_CUTPOINT)->f = cutpoint;
623 case_data_rw_idx (cc, ROC_TP)->f = 0;
624 case_data_rw_idx (cc, ROC_FN)->f = 0;
625 case_data_rw_idx (cc, ROC_TN)->f = 0;
626 case_data_rw_idx (cc, ROC_FP)->f = 0;
628 casewriter_write (writer, cc);
633 Create and initialise the rs[x].cutpoint_rdr casereaders. That is, the readers will
634 be created with width 5, ready to take the values (cutpoint, ROC_TP, ROC_FN, ROC_TN, ROC_FP), and the
635 reader will be populated with its final number of cases.
636 However on exit from this function, only ROC_CUTPOINT entries will be set to their final
637 value. The other entries will be initialised to zero.
640 prepare_cutpoints (struct cmd_roc *roc, struct roc_state *rs, struct casereader *input)
643 struct casereader *r = casereader_clone (input);
647 struct caseproto *proto = caseproto_create ();
648 struct subcase ordering;
649 subcase_init (&ordering, ROC_CUTPOINT, 0, SC_ASCEND);
651 proto = caseproto_add_width (proto, 0); /* cutpoint */
652 proto = caseproto_add_width (proto, 0); /* ROC_TP */
653 proto = caseproto_add_width (proto, 0); /* ROC_FN */
654 proto = caseproto_add_width (proto, 0); /* ROC_TN */
655 proto = caseproto_add_width (proto, 0); /* ROC_FP */
657 for (i = 0 ; i < roc->n_vars; ++i)
659 rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto);
660 rs[i].prev_result = SYSMIS;
661 rs[i].max = -DBL_MAX;
665 caseproto_unref (proto);
666 subcase_destroy (&ordering);
669 for (; (c = casereader_read (r)) != NULL; case_unref (c))
671 for (i = 0 ; i < roc->n_vars; ++i)
673 const union value *v = case_data (c, roc->vars[i]);
674 const double result = v->f;
676 if ( mv_is_value_missing (var_get_missing_values (roc->vars[i]), v, roc->exclude))
679 minimize (&rs[i].min, result);
680 maximize (&rs[i].max, result);
682 if ( rs[i].prev_result != SYSMIS && rs[i].prev_result != result )
684 const double mean = (result + rs[i].prev_result ) / 2.0;
685 append_cutpoint (rs[i].cutpoint_wtr, mean);
688 rs[i].prev_result = result;
691 casereader_destroy (r);
694 /* Append the min and max cutpoints */
695 for (i = 0 ; i < roc->n_vars; ++i)
697 append_cutpoint (rs[i].cutpoint_wtr, rs[i].min - 1);
698 append_cutpoint (rs[i].cutpoint_wtr, rs[i].max + 1);
700 rs[i].cutpoint_rdr = casewriter_make_reader (rs[i].cutpoint_wtr);
705 do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict)
709 struct roc_state *rs = xcalloc (roc->n_vars, sizeof *rs);
711 struct casereader *negatives = NULL;
712 struct casereader *positives = NULL;
714 struct caseproto *n_proto = NULL;
716 struct subcase up_ordering;
717 struct subcase down_ordering;
719 struct casewriter *neg_wtr = NULL;
721 struct casereader *input = casereader_create_filter_missing (reader,
722 roc->vars, roc->n_vars,
727 input = casereader_create_filter_missing (input,
733 neg_wtr = autopaging_writer_create (casereader_get_proto (input));
735 prepare_cutpoints (roc, rs, input);
738 /* Separate the positive actual state cases from the negative ones */
740 casereader_create_filter_func (input,
746 n_proto = caseproto_create ();
748 n_proto = caseproto_add_width (n_proto, 0);
749 n_proto = caseproto_add_width (n_proto, 0);
750 n_proto = caseproto_add_width (n_proto, 0);
751 n_proto = caseproto_add_width (n_proto, 0);
752 n_proto = caseproto_add_width (n_proto, 0);
754 subcase_init (&up_ordering, VALUE, 0, SC_ASCEND);
755 subcase_init (&down_ordering, VALUE, 0, SC_DESCEND);
757 for (i = 0 ; i < roc->n_vars; ++i)
759 struct casewriter *w = NULL;
760 struct casereader *r = NULL;
765 struct casereader *n_neg_reader ;
766 const struct variable *var = roc->vars[i];
768 struct casereader *neg ;
769 struct casereader *pos = casereader_clone (positives);
771 struct casereader *n_pos_reader =
772 process_positive_group (var, pos, dict, &rs[i]);
774 if ( negatives == NULL)
776 negatives = casewriter_make_reader (neg_wtr);
779 neg = casereader_clone (negatives);
781 n_neg_reader = process_negative_group (var, neg, dict, &rs[i]);
783 /* Merge the n_pos and n_neg casereaders */
784 w = sort_create_writer (&up_ordering, n_proto);
785 for ( ; (cpos = casereader_read (n_pos_reader) ); case_unref (cpos))
787 struct ccase *pos_case = case_create (n_proto);
789 const double jpos = case_data_idx (cpos, VALUE)->f;
791 while ((cneg = casereader_read (n_neg_reader)))
793 struct ccase *nc = case_create (n_proto);
795 const double jneg = case_data_idx (cneg, VALUE)->f;
797 case_data_rw_idx (nc, VALUE)->f = jneg;
798 case_data_rw_idx (nc, N_POS_EQ)->f = 0;
800 case_data_rw_idx (nc, N_POS_GT)->f = SYSMIS;
802 *case_data_rw_idx (nc, N_NEG_EQ) = *case_data_idx (cneg, N_EQ);
803 *case_data_rw_idx (nc, N_NEG_LT) = *case_data_idx (cneg, N_PRED);
805 casewriter_write (w, nc);
812 case_data_rw_idx (pos_case, VALUE)->f = jpos;
813 *case_data_rw_idx (pos_case, N_POS_EQ) = *case_data_idx (cpos, N_EQ);
814 *case_data_rw_idx (pos_case, N_POS_GT) = *case_data_idx (cpos, N_PRED);
815 case_data_rw_idx (pos_case, N_NEG_EQ)->f = 0;
816 case_data_rw_idx (pos_case, N_NEG_LT)->f = SYSMIS;
818 casewriter_write (w, pos_case);
821 casereader_destroy (n_pos_reader);
822 casereader_destroy (n_neg_reader);
824 /* These aren't used anymore */
828 r = casewriter_make_reader (w);
830 /* Propagate the N_POS_GT values from the positive cases
831 to the negative ones */
833 double prev_pos_gt = rs[i].n1;
834 w = sort_create_writer (&down_ordering, n_proto);
836 for ( ; (c = casereader_read (r) ); case_unref (c))
838 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
839 struct ccase *nc = case_clone (c);
841 if ( n_pos_gt == SYSMIS)
843 n_pos_gt = prev_pos_gt;
844 case_data_rw_idx (nc, N_POS_GT)->f = n_pos_gt;
847 casewriter_write (w, nc);
848 prev_pos_gt = n_pos_gt;
851 casereader_destroy (r);
852 r = casewriter_make_reader (w);
855 /* Propagate the N_NEG_LT values from the negative cases
856 to the positive ones */
858 double prev_neg_lt = rs[i].n2;
859 w = sort_create_writer (&up_ordering, n_proto);
861 for ( ; (c = casereader_read (r) ); case_unref (c))
863 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
864 struct ccase *nc = case_clone (c);
866 if ( n_neg_lt == SYSMIS)
868 n_neg_lt = prev_neg_lt;
869 case_data_rw_idx (nc, N_NEG_LT)->f = n_neg_lt;
872 casewriter_write (w, nc);
873 prev_neg_lt = n_neg_lt;
876 casereader_destroy (r);
877 r = casewriter_make_reader (w);
881 struct ccase *prev_case = NULL;
882 for ( ; (c = casereader_read (r) ); case_unref (c))
884 struct ccase *next_case = casereader_peek (r, 0);
886 const double j = case_data_idx (c, VALUE)->f;
887 double n_pos_eq = case_data_idx (c, N_POS_EQ)->f;
888 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
889 double n_neg_eq = case_data_idx (c, N_NEG_EQ)->f;
890 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
892 if ( prev_case && j == case_data_idx (prev_case, VALUE)->f)
894 if ( 0 == case_data_idx (c, N_POS_EQ)->f)
896 n_pos_eq = case_data_idx (prev_case, N_POS_EQ)->f;
897 n_pos_gt = case_data_idx (prev_case, N_POS_GT)->f;
900 if ( 0 == case_data_idx (c, N_NEG_EQ)->f)
902 n_neg_eq = case_data_idx (prev_case, N_NEG_EQ)->f;
903 n_neg_lt = case_data_idx (prev_case, N_NEG_LT)->f;
907 if ( NULL == next_case || j != case_data_idx (next_case, VALUE)->f)
909 rs[i].auc += n_pos_gt * n_neg_eq + (n_pos_eq * n_neg_eq) / 2.0;
912 n_neg_eq * ( pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0);
914 n_pos_eq * ( pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0);
918 case_unref (next_case);
919 case_unref (prev_case);
920 prev_case = case_clone (c);
922 casereader_destroy (r);
923 case_unref (prev_case);
925 rs[i].auc /= rs[i].n1 * rs[i].n2;
927 rs[i].auc = 1 - rs[i].auc;
929 if ( roc->bi_neg_exp )
931 rs[i].q1hat = rs[i].auc / ( 2 - rs[i].auc);
932 rs[i].q2hat = 2 * pow2 (rs[i].auc) / ( 1 + rs[i].auc);
936 rs[i].q1hat /= rs[i].n2 * pow2 (rs[i].n1);
937 rs[i].q2hat /= rs[i].n1 * pow2 (rs[i].n2);
942 casereader_destroy (positives);
943 casereader_destroy (negatives);
945 caseproto_unref (n_proto);
946 subcase_destroy (&up_ordering);
947 subcase_destroy (&down_ordering);
949 output_roc (rs, roc);
951 for (i = 0 ; i < roc->n_vars; ++i)
952 casereader_destroy (rs[i].cutpoint_rdr);
958 show_auc (struct roc_state *rs, const struct cmd_roc *roc)
961 const int n_fields = roc->print_se ? 5 : 1;
962 const int n_cols = roc->n_vars > 1 ? n_fields + 1: n_fields;
963 const int n_rows = 2 + roc->n_vars;
964 struct tab_table *tbl = tab_create (n_cols, n_rows);
966 if ( roc->n_vars > 1)
967 tab_title (tbl, _("Area Under the Curve"));
969 tab_title (tbl, _("Area Under the Curve (%s)"), var_to_string (roc->vars[0]));
971 tab_headers (tbl, n_cols - n_fields, 0, 1, 0);
974 tab_text (tbl, n_cols - n_fields, 1, TAT_TITLE, _("Area"));
976 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
987 tab_text (tbl, n_cols - 4, 1, TAT_TITLE, _("Std. Error"));
988 tab_text (tbl, n_cols - 3, 1, TAT_TITLE, _("Asymptotic Sig."));
990 tab_text (tbl, n_cols - 2, 1, TAT_TITLE, _("Lower Bound"));
991 tab_text (tbl, n_cols - 1, 1, TAT_TITLE, _("Upper Bound"));
993 tab_joint_text_format (tbl, n_cols - 2, 0, 4, 0,
994 TAT_TITLE | TAB_CENTER,
995 _("Asymp. %g%% Confidence Interval"), roc->ci);
996 tab_vline (tbl, 0, n_cols - 1, 0, 0);
997 tab_hline (tbl, TAL_1, n_cols - 2, n_cols - 1, 1);
1000 if ( roc->n_vars > 1)
1001 tab_text (tbl, 0, 1, TAT_TITLE, _("Variable under test"));
1003 if ( roc->n_vars > 1)
1004 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1007 for ( i = 0 ; i < roc->n_vars ; ++i )
1009 tab_text (tbl, 0, 2 + i, TAT_TITLE, var_to_string (roc->vars[i]));
1011 tab_double (tbl, n_cols - n_fields, 2 + i, 0, rs[i].auc, NULL, RC_OTHER);
1013 if ( roc->print_se )
1016 const double sd_0_5 = sqrt ((rs[i].n1 + rs[i].n2 + 1) /
1017 (12 * rs[i].n1 * rs[i].n2));
1021 se = rs[i].auc * (1 - rs[i].auc) + (rs[i].n1 - 1) * (rs[i].q1hat - pow2 (rs[i].auc)) +
1022 (rs[i].n2 - 1) * (rs[i].q2hat - pow2 (rs[i].auc));
1024 se /= rs[i].n1 * rs[i].n2;
1028 tab_double (tbl, n_cols - 4, 2 + i, 0,
1032 ci = 1 - roc->ci / 100.0;
1033 yy = gsl_cdf_gaussian_Qinv (ci, se) ;
1035 tab_double (tbl, n_cols - 2, 2 + i, 0,
1039 tab_double (tbl, n_cols - 1, 2 + i, 0,
1043 tab_double (tbl, n_cols - 3, 2 + i, 0,
1044 2.0 * gsl_cdf_ugaussian_Q (fabs ((rs[i].auc - 0.5 ) / sd_0_5)),
1054 show_summary (const struct cmd_roc *roc)
1056 const int n_cols = 3;
1057 const int n_rows = 4;
1058 struct tab_table *tbl = tab_create (n_cols, n_rows);
1060 tab_title (tbl, _("Case Summary"));
1062 tab_headers (tbl, 1, 0, 2, 0);
1071 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
1072 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1075 tab_hline (tbl, TAL_2, 1, n_cols - 1, 1);
1076 tab_vline (tbl, TAL_1, 2, 1, n_rows - 1);
1079 tab_text (tbl, 0, 1, TAT_TITLE | TAB_LEFT, var_to_string (roc->state_var));
1080 tab_text (tbl, 1, 1, TAT_TITLE, _("Unweighted"));
1081 tab_text (tbl, 2, 1, TAT_TITLE, _("Weighted"));
1083 tab_joint_text (tbl, 1, 0, 2, 0,
1084 TAT_TITLE | TAB_CENTER,
1085 _("Valid N (listwise)"));
1088 tab_text (tbl, 0, 2, TAB_LEFT, _("Positive"));
1089 tab_text (tbl, 0, 3, TAB_LEFT, _("Negative"));
1092 tab_double (tbl, 1, 2, 0, roc->pos, NULL, RC_INTEGER);
1093 tab_double (tbl, 1, 3, 0, roc->neg, NULL, RC_INTEGER);
1095 tab_double (tbl, 2, 2, 0, roc->pos_weighted, NULL, RC_OTHER);
1096 tab_double (tbl, 2, 3, 0, roc->neg_weighted, NULL, RC_OTHER);
1103 show_coords (struct roc_state *rs, const struct cmd_roc *roc)
1107 const int n_cols = roc->n_vars > 1 ? 4 : 3;
1109 struct tab_table *tbl ;
1111 for (i = 0; i < roc->n_vars; ++i)
1112 n_rows += casereader_count_cases (rs[i].cutpoint_rdr);
1114 tbl = tab_create (n_cols, n_rows);
1116 if ( roc->n_vars > 1)
1117 tab_title (tbl, _("Coordinates of the Curve"));
1119 tab_title (tbl, _("Coordinates of the Curve (%s)"), var_to_string (roc->vars[0]));
1122 tab_headers (tbl, 1, 0, 1, 0);
1124 tab_hline (tbl, TAL_2, 0, n_cols - 1, 1);
1126 if ( roc->n_vars > 1)
1127 tab_text (tbl, 0, 0, TAT_TITLE, _("Test variable"));
1129 tab_text (tbl, n_cols - 3, 0, TAT_TITLE, _("Positive if greater than or equal to"));
1130 tab_text (tbl, n_cols - 2, 0, TAT_TITLE, _("Sensitivity"));
1131 tab_text (tbl, n_cols - 1, 0, TAT_TITLE, _("1 - Specificity"));
1140 if ( roc->n_vars > 1)
1141 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1143 for (i = 0; i < roc->n_vars; ++i)
1146 struct casereader *r = casereader_clone (rs[i].cutpoint_rdr);
1148 if ( roc->n_vars > 1)
1149 tab_text (tbl, 0, x, TAT_TITLE, var_to_string (roc->vars[i]));
1152 tab_hline (tbl, TAL_1, 0, n_cols - 1, x);
1155 for (; (cc = casereader_read (r)) != NULL;
1156 case_unref (cc), x++)
1158 const double se = case_data_idx (cc, ROC_TP)->f /
1160 case_data_idx (cc, ROC_TP)->f
1162 case_data_idx (cc, ROC_FN)->f
1165 const double sp = case_data_idx (cc, ROC_TN)->f /
1167 case_data_idx (cc, ROC_TN)->f
1169 case_data_idx (cc, ROC_FP)->f
1172 tab_double (tbl, n_cols - 3, x, 0, case_data_idx (cc, ROC_CUTPOINT)->f,
1173 var_get_print_format (roc->vars[i]), RC_OTHER);
1175 tab_double (tbl, n_cols - 2, x, 0, se, NULL, RC_OTHER);
1176 tab_double (tbl, n_cols - 1, x, 0, 1 - sp, NULL, RC_OTHER);
1179 casereader_destroy (r);
1187 output_roc (struct roc_state *rs, const struct cmd_roc *roc)
1193 struct roc_chart *rc;
1196 rc = roc_chart_create (roc->reference);
1197 for (i = 0; i < roc->n_vars; i++)
1198 roc_chart_add_var (rc, var_get_name (roc->vars[i]),
1199 rs[i].cutpoint_rdr);
1200 roc_chart_submit (rc);
1205 if ( roc->print_coords )
1206 show_coords (rs, roc);