1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <language/stats/roc.h>
21 #include <data/casegrouper.h>
22 #include <data/casereader.h>
23 #include <data/casewriter.h>
24 #include <data/dictionary.h>
25 #include <data/format.h>
26 #include <data/procedure.h>
27 #include <data/subcase.h>
28 #include <language/command.h>
29 #include <language/lexer/lexer.h>
30 #include <language/lexer/value-parser.h>
31 #include <language/lexer/variable-parser.h>
32 #include <libpspp/misc.h>
33 #include <math/sort.h>
34 #include <output/chart-item.h>
35 #include <output/charts/roc-chart.h>
36 #include <output/tab.h>
38 #include <gsl/gsl_cdf.h>
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) msgid
47 const struct variable **vars;
48 const struct dictionary *dict;
50 const struct variable *state_var ;
51 union value state_value;
53 /* Plot the roc curve */
55 /* Plot the reference line */
62 bool bi_neg_exp; /* True iff the bi-negative exponential critieria
64 enum mv_class exclude;
66 bool invert ; /* True iff a smaller test result variable indicates
75 static int run_roc (struct dataset *ds, struct cmd_roc *roc);
78 cmd_roc (struct lexer *lexer, struct dataset *ds)
81 const struct dictionary *dict = dataset_dict (ds);
86 roc.print_coords = false;
89 roc.reference = false;
91 roc.bi_neg_exp = false;
93 roc.pos = roc.pos_weighted = 0;
94 roc.neg = roc.neg_weighted = 0;
95 roc.dict = dataset_dict (ds);
97 if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars,
98 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
101 if ( ! lex_force_match (lexer, T_BY))
106 roc.state_var = parse_variable (lexer, dict);
108 if ( !lex_force_match (lexer, '('))
113 value_init (&roc.state_value, var_get_width (roc.state_var));
114 parse_value (lexer, &roc.state_value, var_get_width (roc.state_var));
117 if ( !lex_force_match (lexer, ')'))
123 while (lex_token (lexer) != '.')
125 lex_match (lexer, '/');
126 if (lex_match_id (lexer, "MISSING"))
128 lex_match (lexer, '=');
129 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
131 if (lex_match_id (lexer, "INCLUDE"))
133 roc.exclude = MV_SYSTEM;
135 else if (lex_match_id (lexer, "EXCLUDE"))
137 roc.exclude = MV_ANY;
141 lex_error (lexer, NULL);
146 else if (lex_match_id (lexer, "PLOT"))
148 lex_match (lexer, '=');
149 if (lex_match_id (lexer, "CURVE"))
152 if (lex_match (lexer, '('))
154 roc.reference = true;
155 lex_force_match_id (lexer, "REFERENCE");
156 lex_force_match (lexer, ')');
159 else if (lex_match_id (lexer, "NONE"))
165 lex_error (lexer, NULL);
169 else if (lex_match_id (lexer, "PRINT"))
171 lex_match (lexer, '=');
172 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
174 if (lex_match_id (lexer, "SE"))
178 else if (lex_match_id (lexer, "COORDINATES"))
180 roc.print_coords = true;
184 lex_error (lexer, NULL);
189 else if (lex_match_id (lexer, "CRITERIA"))
191 lex_match (lexer, '=');
192 while (lex_token (lexer) != '.' && lex_token (lexer) != '/')
194 if (lex_match_id (lexer, "CUTOFF"))
196 lex_force_match (lexer, '(');
197 if (lex_match_id (lexer, "INCLUDE"))
199 roc.exclude = MV_SYSTEM;
201 else if (lex_match_id (lexer, "EXCLUDE"))
203 roc.exclude = MV_USER | MV_SYSTEM;
207 lex_error (lexer, NULL);
210 lex_force_match (lexer, ')');
212 else if (lex_match_id (lexer, "TESTPOS"))
214 lex_force_match (lexer, '(');
215 if (lex_match_id (lexer, "LARGE"))
219 else if (lex_match_id (lexer, "SMALL"))
225 lex_error (lexer, NULL);
228 lex_force_match (lexer, ')');
230 else if (lex_match_id (lexer, "CI"))
232 lex_force_match (lexer, '(');
233 lex_force_num (lexer);
234 roc.ci = lex_number (lexer);
236 lex_force_match (lexer, ')');
238 else if (lex_match_id (lexer, "DISTRIBUTION"))
240 lex_force_match (lexer, '(');
241 if (lex_match_id (lexer, "FREE"))
243 roc.bi_neg_exp = false;
245 else if (lex_match_id (lexer, "NEGEXPO"))
247 roc.bi_neg_exp = true;
251 lex_error (lexer, NULL);
254 lex_force_match (lexer, ')');
258 lex_error (lexer, NULL);
265 lex_error (lexer, NULL);
270 if ( ! run_roc (ds, &roc))
273 value_destroy (&roc.state_value, var_get_width (roc.state_var));
278 value_destroy (&roc.state_value, var_get_width (roc.state_var));
287 do_roc (struct cmd_roc *roc, struct casereader *group, struct dictionary *dict);
291 run_roc (struct dataset *ds, struct cmd_roc *roc)
293 struct dictionary *dict = dataset_dict (ds);
295 struct casereader *group;
297 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
298 while (casegrouper_get_next_group (grouper, &group))
300 do_roc (roc, group, dataset_dict (ds));
302 ok = casegrouper_destroy (grouper);
303 ok = proc_commit (ds) && ok;
310 dump_casereader (struct casereader *reader)
313 struct casereader *r = casereader_clone (reader);
315 for ( ; (c = casereader_read (r) ); case_unref (c))
318 for (i = 0 ; i < case_get_value_cnt (c); ++i)
320 printf ("%g ", case_data_idx (c, i)->f);
325 casereader_destroy (r);
331 Return true iff the state variable indicates that C has positive actual state.
333 As a side effect, this function also accumulates the roc->{pos,neg} and
334 roc->{pos,neg}_weighted counts.
337 match_positives (const struct ccase *c, void *aux)
339 struct cmd_roc *roc = aux;
340 const struct variable *wv = dict_get_weight (roc->dict);
341 const double weight = wv ? case_data (c, wv)->f : 1.0;
343 const bool positive =
344 ( 0 == value_compare_3way (case_data (c, roc->state_var), &roc->state_value,
345 var_get_width (roc->state_var)));
350 roc->pos_weighted += weight;
355 roc->neg_weighted += weight;
366 /* Some intermediate state for calculating the cutpoints and the
367 standard error values */
370 double auc; /* Area under the curve */
372 double n1; /* total weight of positives */
373 double n2; /* total weight of negatives */
375 /* intermediates for standard error */
379 /* intermediates for cutpoints */
380 struct casewriter *cutpoint_wtr;
381 struct casereader *cutpoint_rdr;
388 Return a new casereader based upon CUTPOINT_RDR.
389 The number of "positive" cases are placed into
390 the position TRUE_INDEX, and the number of "negative" cases
392 POS_COND and RESULT determine the semantics of what is
394 WEIGHT is the value of a single count.
396 static struct casereader *
397 accumulate_counts (struct casereader *cutpoint_rdr,
398 double result, double weight,
399 bool (*pos_cond) (double, double),
400 int true_index, int false_index)
402 const struct caseproto *proto = casereader_get_proto (cutpoint_rdr);
403 struct casewriter *w =
404 autopaging_writer_create (proto);
405 struct casereader *r = casereader_clone (cutpoint_rdr);
407 double prev_cp = SYSMIS;
409 for ( ; (cpc = casereader_read (r) ); case_unref (cpc))
411 struct ccase *new_case;
412 const double cp = case_data_idx (cpc, ROC_CUTPOINT)->f;
414 assert (cp != SYSMIS);
416 /* We don't want duplicates here */
420 new_case = case_clone (cpc);
422 if ( pos_cond (result, cp))
423 case_data_rw_idx (new_case, true_index)->f += weight;
425 case_data_rw_idx (new_case, false_index)->f += weight;
429 casewriter_write (w, new_case);
431 casereader_destroy (r);
433 return casewriter_make_reader (w);
438 static void output_roc (struct roc_state *rs, const struct cmd_roc *roc);
441 This function does 3 things:
443 1. Counts the number of cases which are equal to every other case in READER,
444 and those cases for which the relationship between it and every other case
445 satifies PRED (normally either > or <). VAR is variable defining a case's value
448 2. Counts the number of true and false cases in reader, and populates
449 CUTPOINT_RDR accordingly. TRUE_INDEX and FALSE_INDEX are the indices
450 which receive these values. POS_COND is the condition defining true
453 3. CC is filled with the cumulative weight of all cases of READER.
455 static struct casereader *
456 process_group (const struct variable *var, struct casereader *reader,
457 bool (*pred) (double, double),
458 const struct dictionary *dict,
460 struct casereader **cutpoint_rdr,
461 bool (*pos_cond) (double, double),
465 const struct variable *w = dict_get_weight (dict);
467 struct casereader *r1 =
468 casereader_create_distinct (sort_execute_1var (reader, var), var, w);
470 const int weight_idx = w ? var_get_case_index (w) :
471 caseproto_get_n_widths (casereader_get_proto (r1)) - 1;
475 struct casereader *rclone = casereader_clone (r1);
476 struct casewriter *wtr;
477 struct caseproto *proto = caseproto_create ();
479 proto = caseproto_add_width (proto, 0);
480 proto = caseproto_add_width (proto, 0);
481 proto = caseproto_add_width (proto, 0);
483 wtr = autopaging_writer_create (proto);
487 for ( ; (c1 = casereader_read (r1) ); case_unref (c1))
489 struct ccase *new_case = case_create (proto);
491 struct casereader *r2 = casereader_clone (rclone);
493 const double weight1 = case_data_idx (c1, weight_idx)->f;
494 const double d1 = case_data (c1, var)->f;
498 *cutpoint_rdr = accumulate_counts (*cutpoint_rdr, d1, weight1,
500 true_index, false_index);
504 for ( ; (c2 = casereader_read (r2) ); case_unref (c2))
506 const double d2 = case_data (c2, var)->f;
507 const double weight2 = case_data_idx (c2, weight_idx)->f;
514 else if ( pred (d2, d1))
520 case_data_rw_idx (new_case, VALUE)->f = d1;
521 case_data_rw_idx (new_case, N_EQ)->f = n_eq;
522 case_data_rw_idx (new_case, N_PRED)->f = n_pred;
524 casewriter_write (wtr, new_case);
526 casereader_destroy (r2);
529 casereader_destroy (r1);
530 casereader_destroy (rclone);
532 return casewriter_make_reader (wtr);
535 /* Some more indeces into case data */
536 #define N_POS_EQ 1 /* number of positive cases with values equal to n */
537 #define N_POS_GT 2 /* number of postive cases with values greater than n */
538 #define N_NEG_EQ 3 /* number of negative cases with values equal to n */
539 #define N_NEG_LT 4 /* number of negative cases with values less than n */
542 gt (double d1, double d2)
549 ge (double d1, double d2)
555 lt (double d1, double d2)
562 Return a casereader with width 3,
563 populated with cases based upon READER.
564 The cases will have the values:
565 (N, number of cases equal to N, number of cases greater than N)
566 As a side effect, update RS->n1 with the number of positive cases.
568 static struct casereader *
569 process_positive_group (const struct variable *var, struct casereader *reader,
570 const struct dictionary *dict,
571 struct roc_state *rs)
573 return process_group (var, reader, gt, dict, &rs->n1,
580 Return a casereader with width 3,
581 populated with cases based upon READER.
582 The cases will have the values:
583 (N, number of cases equal to N, number of cases less than N)
584 As a side effect, update RS->n2 with the number of negative cases.
586 static struct casereader *
587 process_negative_group (const struct variable *var, struct casereader *reader,
588 const struct dictionary *dict,
589 struct roc_state *rs)
591 return process_group (var, reader, lt, dict, &rs->n2,
601 append_cutpoint (struct casewriter *writer, double cutpoint)
603 struct ccase *cc = case_create (casewriter_get_proto (writer));
605 case_data_rw_idx (cc, ROC_CUTPOINT)->f = cutpoint;
606 case_data_rw_idx (cc, ROC_TP)->f = 0;
607 case_data_rw_idx (cc, ROC_FN)->f = 0;
608 case_data_rw_idx (cc, ROC_TN)->f = 0;
609 case_data_rw_idx (cc, ROC_FP)->f = 0;
611 casewriter_write (writer, cc);
616 Create and initialise the rs[x].cutpoint_rdr casereaders. That is, the readers will
617 be created with width 5, ready to take the values (cutpoint, ROC_TP, ROC_FN, ROC_TN, ROC_FP), and the
618 reader will be populated with its final number of cases.
619 However on exit from this function, only ROC_CUTPOINT entries will be set to their final
620 value. The other entries will be initialised to zero.
623 prepare_cutpoints (struct cmd_roc *roc, struct roc_state *rs, struct casereader *input)
626 struct casereader *r = casereader_clone (input);
628 struct caseproto *proto = caseproto_create ();
630 struct subcase ordering;
631 subcase_init (&ordering, ROC_CUTPOINT, 0, SC_ASCEND);
633 proto = caseproto_add_width (proto, 0); /* cutpoint */
634 proto = caseproto_add_width (proto, 0); /* ROC_TP */
635 proto = caseproto_add_width (proto, 0); /* ROC_FN */
636 proto = caseproto_add_width (proto, 0); /* ROC_TN */
637 proto = caseproto_add_width (proto, 0); /* ROC_FP */
639 for (i = 0 ; i < roc->n_vars; ++i)
641 rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto);
642 rs[i].prev_result = SYSMIS;
643 rs[i].max = -DBL_MAX;
647 for (; (c = casereader_read (r)) != NULL; case_unref (c))
649 for (i = 0 ; i < roc->n_vars; ++i)
651 const union value *v = case_data (c, roc->vars[i]);
652 const double result = v->f;
654 if ( mv_is_value_missing (var_get_missing_values (roc->vars[i]), v, roc->exclude))
657 minimize (&rs[i].min, result);
658 maximize (&rs[i].max, result);
660 if ( rs[i].prev_result != SYSMIS && rs[i].prev_result != result )
662 const double mean = (result + rs[i].prev_result ) / 2.0;
663 append_cutpoint (rs[i].cutpoint_wtr, mean);
666 rs[i].prev_result = result;
669 casereader_destroy (r);
672 /* Append the min and max cutpoints */
673 for (i = 0 ; i < roc->n_vars; ++i)
675 append_cutpoint (rs[i].cutpoint_wtr, rs[i].min - 1);
676 append_cutpoint (rs[i].cutpoint_wtr, rs[i].max + 1);
678 rs[i].cutpoint_rdr = casewriter_make_reader (rs[i].cutpoint_wtr);
683 do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict)
687 struct roc_state *rs = xcalloc (roc->n_vars, sizeof *rs);
689 struct casereader *negatives = NULL;
690 struct casereader *positives = NULL;
692 struct caseproto *n_proto = caseproto_create ();
694 struct subcase up_ordering;
695 struct subcase down_ordering;
697 struct casewriter *neg_wtr = NULL;
699 struct casereader *input = casereader_create_filter_missing (reader,
700 roc->vars, roc->n_vars,
705 input = casereader_create_filter_missing (input,
711 neg_wtr = autopaging_writer_create (casereader_get_proto (input));
713 prepare_cutpoints (roc, rs, input);
716 /* Separate the positive actual state cases from the negative ones */
718 casereader_create_filter_func (input,
724 n_proto = caseproto_create ();
726 n_proto = caseproto_add_width (n_proto, 0);
727 n_proto = caseproto_add_width (n_proto, 0);
728 n_proto = caseproto_add_width (n_proto, 0);
729 n_proto = caseproto_add_width (n_proto, 0);
730 n_proto = caseproto_add_width (n_proto, 0);
732 subcase_init (&up_ordering, VALUE, 0, SC_ASCEND);
733 subcase_init (&down_ordering, VALUE, 0, SC_DESCEND);
735 for (i = 0 ; i < roc->n_vars; ++i)
737 struct casewriter *w = NULL;
738 struct casereader *r = NULL;
743 struct casereader *n_neg ;
744 const struct variable *var = roc->vars[i];
746 struct casereader *neg ;
747 struct casereader *pos = casereader_clone (positives);
750 struct casereader *n_pos =
751 process_positive_group (var, pos, dict, &rs[i]);
753 if ( negatives == NULL)
755 negatives = casewriter_make_reader (neg_wtr);
758 neg = casereader_clone (negatives);
760 n_neg = process_negative_group (var, neg, dict, &rs[i]);
763 /* Merge the n_pos and n_neg casereaders */
764 w = sort_create_writer (&up_ordering, n_proto);
765 for ( ; (cpos = casereader_read (n_pos) ); case_unref (cpos))
767 struct ccase *pos_case = case_create (n_proto);
769 const double jpos = case_data_idx (cpos, VALUE)->f;
771 while ((cneg = casereader_read (n_neg)))
773 struct ccase *nc = case_create (n_proto);
775 const double jneg = case_data_idx (cneg, VALUE)->f;
777 case_data_rw_idx (nc, VALUE)->f = jneg;
778 case_data_rw_idx (nc, N_POS_EQ)->f = 0;
780 case_data_rw_idx (nc, N_POS_GT)->f = SYSMIS;
782 *case_data_rw_idx (nc, N_NEG_EQ) = *case_data_idx (cneg, N_EQ);
783 *case_data_rw_idx (nc, N_NEG_LT) = *case_data_idx (cneg, N_PRED);
785 casewriter_write (w, nc);
792 case_data_rw_idx (pos_case, VALUE)->f = jpos;
793 *case_data_rw_idx (pos_case, N_POS_EQ) = *case_data_idx (cpos, N_EQ);
794 *case_data_rw_idx (pos_case, N_POS_GT) = *case_data_idx (cpos, N_PRED);
795 case_data_rw_idx (pos_case, N_NEG_EQ)->f = 0;
796 case_data_rw_idx (pos_case, N_NEG_LT)->f = SYSMIS;
798 casewriter_write (w, pos_case);
801 /* These aren't used anymore */
805 r = casewriter_make_reader (w);
807 /* Propagate the N_POS_GT values from the positive cases
808 to the negative ones */
810 double prev_pos_gt = rs[i].n1;
811 w = sort_create_writer (&down_ordering, n_proto);
813 for ( ; (c = casereader_read (r) ); case_unref (c))
815 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
816 struct ccase *nc = case_clone (c);
818 if ( n_pos_gt == SYSMIS)
820 n_pos_gt = prev_pos_gt;
821 case_data_rw_idx (nc, N_POS_GT)->f = n_pos_gt;
824 casewriter_write (w, nc);
825 prev_pos_gt = n_pos_gt;
828 r = casewriter_make_reader (w);
831 /* Propagate the N_NEG_LT values from the negative cases
832 to the positive ones */
834 double prev_neg_lt = rs[i].n2;
835 w = sort_create_writer (&up_ordering, n_proto);
837 for ( ; (c = casereader_read (r) ); case_unref (c))
839 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
840 struct ccase *nc = case_clone (c);
842 if ( n_neg_lt == SYSMIS)
844 n_neg_lt = prev_neg_lt;
845 case_data_rw_idx (nc, N_NEG_LT)->f = n_neg_lt;
848 casewriter_write (w, nc);
849 prev_neg_lt = n_neg_lt;
852 r = casewriter_make_reader (w);
856 struct ccase *prev_case = NULL;
857 for ( ; (c = casereader_read (r) ); case_unref (c))
859 const struct ccase *next_case = casereader_peek (r, 0);
861 const double j = case_data_idx (c, VALUE)->f;
862 double n_pos_eq = case_data_idx (c, N_POS_EQ)->f;
863 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
864 double n_neg_eq = case_data_idx (c, N_NEG_EQ)->f;
865 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
867 if ( prev_case && j == case_data_idx (prev_case, VALUE)->f)
869 if ( 0 == case_data_idx (c, N_POS_EQ)->f)
871 n_pos_eq = case_data_idx (prev_case, N_POS_EQ)->f;
872 n_pos_gt = case_data_idx (prev_case, N_POS_GT)->f;
875 if ( 0 == case_data_idx (c, N_NEG_EQ)->f)
877 n_neg_eq = case_data_idx (prev_case, N_NEG_EQ)->f;
878 n_neg_lt = case_data_idx (prev_case, N_NEG_LT)->f;
882 if ( NULL == next_case || j != case_data_idx (next_case, VALUE)->f)
884 rs[i].auc += n_pos_gt * n_neg_eq + (n_pos_eq * n_neg_eq) / 2.0;
887 n_neg_eq * ( pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0);
889 n_pos_eq * ( pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0);
893 case_unref (prev_case);
894 prev_case = case_clone (c);
897 rs[i].auc /= rs[i].n1 * rs[i].n2;
899 rs[i].auc = 1 - rs[i].auc;
901 if ( roc->bi_neg_exp )
903 rs[i].q1hat = rs[i].auc / ( 2 - rs[i].auc);
904 rs[i].q2hat = 2 * pow2 (rs[i].auc) / ( 1 + rs[i].auc);
908 rs[i].q1hat /= rs[i].n2 * pow2 (rs[i].n1);
909 rs[i].q2hat /= rs[i].n1 * pow2 (rs[i].n2);
914 casereader_destroy (positives);
915 casereader_destroy (negatives);
917 output_roc (rs, roc);
923 show_auc (struct roc_state *rs, const struct cmd_roc *roc)
926 const int n_fields = roc->print_se ? 5 : 1;
927 const int n_cols = roc->n_vars > 1 ? n_fields + 1: n_fields;
928 const int n_rows = 2 + roc->n_vars;
929 struct tab_table *tbl = tab_create (n_cols, n_rows);
931 if ( roc->n_vars > 1)
932 tab_title (tbl, _("Area Under the Curve"));
934 tab_title (tbl, _("Area Under the Curve (%s)"), var_to_string (roc->vars[0]));
936 tab_headers (tbl, n_cols - n_fields, 0, 1, 0);
939 tab_text (tbl, n_cols - n_fields, 1, TAT_TITLE, _("Area"));
941 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
952 tab_text (tbl, n_cols - 4, 1, TAT_TITLE, _("Std. Error"));
953 tab_text (tbl, n_cols - 3, 1, TAT_TITLE, _("Asymptotic Sig."));
955 tab_text (tbl, n_cols - 2, 1, TAT_TITLE, _("Lower Bound"));
956 tab_text (tbl, n_cols - 1, 1, TAT_TITLE, _("Upper Bound"));
958 tab_joint_text_format (tbl, n_cols - 2, 0, 4, 0,
959 TAT_TITLE | TAB_CENTER,
960 _("Asymp. %g%% Confidence Interval"), roc->ci);
961 tab_vline (tbl, 0, n_cols - 1, 0, 0);
962 tab_hline (tbl, TAL_1, n_cols - 2, n_cols - 1, 1);
965 if ( roc->n_vars > 1)
966 tab_text (tbl, 0, 1, TAT_TITLE, _("Variable under test"));
968 if ( roc->n_vars > 1)
969 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
972 for ( i = 0 ; i < roc->n_vars ; ++i )
974 tab_text (tbl, 0, 2 + i, TAT_TITLE, var_to_string (roc->vars[i]));
976 tab_double (tbl, n_cols - n_fields, 2 + i, 0, rs[i].auc, NULL);
981 const double sd_0_5 = sqrt ((rs[i].n1 + rs[i].n2 + 1) /
982 (12 * rs[i].n1 * rs[i].n2));
986 se = rs[i].auc * (1 - rs[i].auc) + (rs[i].n1 - 1) * (rs[i].q1hat - pow2 (rs[i].auc)) +
987 (rs[i].n2 - 1) * (rs[i].q2hat - pow2 (rs[i].auc));
989 se /= rs[i].n1 * rs[i].n2;
993 tab_double (tbl, n_cols - 4, 2 + i, 0,
997 ci = 1 - roc->ci / 100.0;
998 yy = gsl_cdf_gaussian_Qinv (ci, se) ;
1000 tab_double (tbl, n_cols - 2, 2 + i, 0,
1004 tab_double (tbl, n_cols - 1, 2 + i, 0,
1008 tab_double (tbl, n_cols - 3, 2 + i, 0,
1009 2.0 * gsl_cdf_ugaussian_Q (fabs ((rs[i].auc - 0.5 ) / sd_0_5)),
1019 show_summary (const struct cmd_roc *roc)
1021 const int n_cols = 3;
1022 const int n_rows = 4;
1023 struct tab_table *tbl = tab_create (n_cols, n_rows);
1025 tab_title (tbl, _("Case Summary"));
1027 tab_headers (tbl, 1, 0, 2, 0);
1036 tab_hline (tbl, TAL_2, 0, n_cols - 1, 2);
1037 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1040 tab_hline (tbl, TAL_2, 1, n_cols - 1, 1);
1041 tab_vline (tbl, TAL_1, 2, 1, n_rows - 1);
1044 tab_text (tbl, 0, 1, TAT_TITLE | TAB_LEFT, var_to_string (roc->state_var));
1045 tab_text (tbl, 1, 1, TAT_TITLE, _("Unweighted"));
1046 tab_text (tbl, 2, 1, TAT_TITLE, _("Weighted"));
1048 tab_joint_text (tbl, 1, 0, 2, 0,
1049 TAT_TITLE | TAB_CENTER,
1050 _("Valid N (listwise)"));
1053 tab_text (tbl, 0, 2, TAB_LEFT, _("Positive"));
1054 tab_text (tbl, 0, 3, TAB_LEFT, _("Negative"));
1057 tab_double (tbl, 1, 2, 0, roc->pos, &F_8_0);
1058 tab_double (tbl, 1, 3, 0, roc->neg, &F_8_0);
1060 tab_double (tbl, 2, 2, 0, roc->pos_weighted, 0);
1061 tab_double (tbl, 2, 3, 0, roc->neg_weighted, 0);
1068 show_coords (struct roc_state *rs, const struct cmd_roc *roc)
1072 const int n_cols = roc->n_vars > 1 ? 4 : 3;
1074 struct tab_table *tbl ;
1076 for (i = 0; i < roc->n_vars; ++i)
1077 n_rows += casereader_count_cases (rs[i].cutpoint_rdr);
1079 tbl = tab_create (n_cols, n_rows);
1081 if ( roc->n_vars > 1)
1082 tab_title (tbl, _("Coordinates of the Curve"));
1084 tab_title (tbl, _("Coordinates of the Curve (%s)"), var_to_string (roc->vars[0]));
1087 tab_headers (tbl, 1, 0, 1, 0);
1089 tab_hline (tbl, TAL_2, 0, n_cols - 1, 1);
1091 if ( roc->n_vars > 1)
1092 tab_text (tbl, 0, 0, TAT_TITLE, _("Test variable"));
1094 tab_text (tbl, n_cols - 3, 0, TAT_TITLE, _("Positive if greater than or equal to"));
1095 tab_text (tbl, n_cols - 2, 0, TAT_TITLE, _("Sensitivity"));
1096 tab_text (tbl, n_cols - 1, 0, TAT_TITLE, _("1 - Specificity"));
1105 if ( roc->n_vars > 1)
1106 tab_vline (tbl, TAL_2, 1, 0, n_rows - 1);
1108 for (i = 0; i < roc->n_vars; ++i)
1111 struct casereader *r = casereader_clone (rs[i].cutpoint_rdr);
1113 if ( roc->n_vars > 1)
1114 tab_text (tbl, 0, x, TAT_TITLE, var_to_string (roc->vars[i]));
1117 tab_hline (tbl, TAL_1, 0, n_cols - 1, x);
1120 for (; (cc = casereader_read (r)) != NULL;
1121 case_unref (cc), x++)
1123 const double se = case_data_idx (cc, ROC_TP)->f /
1125 case_data_idx (cc, ROC_TP)->f
1127 case_data_idx (cc, ROC_FN)->f
1130 const double sp = case_data_idx (cc, ROC_TN)->f /
1132 case_data_idx (cc, ROC_TN)->f
1134 case_data_idx (cc, ROC_FP)->f
1137 tab_double (tbl, n_cols - 3, x, 0, case_data_idx (cc, ROC_CUTPOINT)->f,
1138 var_get_print_format (roc->vars[i]));
1140 tab_double (tbl, n_cols - 2, x, 0, se, NULL);
1141 tab_double (tbl, n_cols - 1, x, 0, 1 - sp, NULL);
1144 casereader_destroy (r);
1152 output_roc (struct roc_state *rs, const struct cmd_roc *roc)
1158 struct roc_chart *rc;
1161 rc = roc_chart_create (roc->reference);
1162 for (i = 0; i < roc->n_vars; i++)
1163 roc_chart_add_var (rc, var_get_name (roc->vars[i]),
1164 rs[i].cutpoint_rdr);
1165 roc_chart_submit (rc);
1170 if ( roc->print_coords )
1171 show_coords (rs, roc);