1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/stats/roc.h"
21 #include <gsl/gsl_cdf.h>
23 #include "data/casegrouper.h"
24 #include "data/casereader.h"
25 #include "data/casewriter.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/format.h"
29 #include "data/subcase.h"
30 #include "language/command.h"
31 #include "language/lexer/lexer.h"
32 #include "language/lexer/value-parser.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/misc.h"
35 #include "math/sort.h"
36 #include "output/chart-item.h"
37 #include "output/charts/roc-chart.h"
38 #include "output/pivot-table.h"
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) msgid
47 const struct variable **vars;
48 const struct dictionary *dict;
50 const struct variable *state_var;
51 union value state_value;
52 size_t state_var_width;
54 /* Plot the roc curve */
56 /* Plot the reference line */
63 bool bi_neg_exp; /* True iff the bi-negative exponential critieria
65 enum mv_class exclude;
67 bool invert ; /* True iff a smaller test result variable indicates
76 static int run_roc (struct dataset *ds, struct cmd_roc *roc);
79 cmd_roc (struct lexer *lexer, struct dataset *ds)
82 const struct dictionary *dict = dataset_dict (ds);
87 roc.print_coords = false;
90 roc.reference = false;
92 roc.bi_neg_exp = false;
94 roc.pos = roc.pos_weighted = 0;
95 roc.neg = roc.neg_weighted = 0;
96 roc.dict = dataset_dict (ds);
98 roc.state_var_width = -1;
100 lex_match (lexer, T_SLASH);
101 if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars,
102 PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
105 if (! lex_force_match (lexer, T_BY))
110 roc.state_var = parse_variable (lexer, dict);
116 if (!lex_force_match (lexer, T_LPAREN))
121 roc.state_var_width = var_get_width (roc.state_var);
122 value_init (&roc.state_value, roc.state_var_width);
123 parse_value (lexer, &roc.state_value, roc.state_var);
126 if (!lex_force_match (lexer, T_RPAREN))
131 while (lex_token (lexer) != T_ENDCMD)
133 lex_match (lexer, T_SLASH);
134 if (lex_match_id (lexer, "MISSING"))
136 lex_match (lexer, T_EQUALS);
137 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
139 if (lex_match_id (lexer, "INCLUDE"))
141 roc.exclude = MV_SYSTEM;
143 else if (lex_match_id (lexer, "EXCLUDE"))
145 roc.exclude = MV_ANY;
149 lex_error (lexer, NULL);
154 else if (lex_match_id (lexer, "PLOT"))
156 lex_match (lexer, T_EQUALS);
157 if (lex_match_id (lexer, "CURVE"))
160 if (lex_match (lexer, T_LPAREN))
162 roc.reference = true;
163 if (! lex_force_match_id (lexer, "REFERENCE"))
165 if (! lex_force_match (lexer, T_RPAREN))
169 else if (lex_match_id (lexer, "NONE"))
175 lex_error (lexer, NULL);
179 else if (lex_match_id (lexer, "PRINT"))
181 lex_match (lexer, T_EQUALS);
182 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
184 if (lex_match_id (lexer, "SE"))
188 else if (lex_match_id (lexer, "COORDINATES"))
190 roc.print_coords = true;
194 lex_error (lexer, NULL);
199 else if (lex_match_id (lexer, "CRITERIA"))
201 lex_match (lexer, T_EQUALS);
202 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
204 if (lex_match_id (lexer, "CUTOFF"))
206 if (! lex_force_match (lexer, T_LPAREN))
208 if (lex_match_id (lexer, "INCLUDE"))
210 roc.exclude = MV_SYSTEM;
212 else if (lex_match_id (lexer, "EXCLUDE"))
214 roc.exclude = MV_USER | MV_SYSTEM;
218 lex_error (lexer, NULL);
221 if (! lex_force_match (lexer, T_RPAREN))
224 else if (lex_match_id (lexer, "TESTPOS"))
226 if (! lex_force_match (lexer, T_LPAREN))
228 if (lex_match_id (lexer, "LARGE"))
232 else if (lex_match_id (lexer, "SMALL"))
238 lex_error (lexer, NULL);
241 if (! lex_force_match (lexer, T_RPAREN))
244 else if (lex_match_id (lexer, "CI"))
246 if (!lex_force_match (lexer, T_LPAREN))
248 if (! lex_force_num (lexer))
250 roc.ci = lex_number (lexer);
252 if (!lex_force_match (lexer, T_RPAREN))
255 else if (lex_match_id (lexer, "DISTRIBUTION"))
257 if (!lex_force_match (lexer, T_LPAREN))
259 if (lex_match_id (lexer, "FREE"))
261 roc.bi_neg_exp = false;
263 else if (lex_match_id (lexer, "NEGEXPO"))
265 roc.bi_neg_exp = true;
269 lex_error (lexer, NULL);
272 if (!lex_force_match (lexer, T_RPAREN))
277 lex_error (lexer, NULL);
284 lex_error (lexer, NULL);
289 if (! run_roc (ds, &roc))
293 value_destroy (&roc.state_value, roc.state_var_width);
299 value_destroy (&roc.state_value, roc.state_var_width);
308 do_roc (struct cmd_roc *roc, struct casereader *group, struct dictionary *dict);
312 run_roc (struct dataset *ds, struct cmd_roc *roc)
314 struct dictionary *dict = dataset_dict (ds);
316 struct casereader *group;
318 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
319 while (casegrouper_get_next_group (grouper, &group))
321 do_roc (roc, group, dataset_dict (ds));
323 ok = casegrouper_destroy (grouper);
324 ok = proc_commit (ds) && ok;
331 dump_casereader (struct casereader *reader)
334 struct casereader *r = casereader_clone (reader);
336 for (; (c = casereader_read (r)); case_unref (c))
339 for (i = 0 ; i < case_get_value_cnt (c); ++i)
341 printf ("%g ", case_data_idx (c, i)->f);
346 casereader_destroy (r);
352 Return true iff the state variable indicates that C has positive actual state.
354 As a side effect, this function also accumulates the roc->{pos,neg} and
355 roc->{pos,neg}_weighted counts.
358 match_positives (const struct ccase *c, void *aux)
360 struct cmd_roc *roc = aux;
361 const struct variable *wv = dict_get_weight (roc->dict);
362 const double weight = wv ? case_data (c, wv)->f : 1.0;
364 const bool positive =
365 (0 == value_compare_3way (case_data (c, roc->state_var), &roc->state_value,
366 var_get_width (roc->state_var)));
371 roc->pos_weighted += weight;
376 roc->neg_weighted += weight;
387 /* Some intermediate state for calculating the cutpoints and the
388 standard error values */
391 double auc; /* Area under the curve */
393 double n1; /* total weight of positives */
394 double n2; /* total weight of negatives */
396 /* intermediates for standard error */
400 /* intermediates for cutpoints */
401 struct casewriter *cutpoint_wtr;
402 struct casereader *cutpoint_rdr;
409 Return a new casereader based upon CUTPOINT_RDR.
410 The number of "positive" cases are placed into
411 the position TRUE_INDEX, and the number of "negative" cases
413 POS_COND and RESULT determine the semantics of what is
415 WEIGHT is the value of a single count.
417 static struct casereader *
418 accumulate_counts (struct casereader *input,
419 double result, double weight,
420 bool (*pos_cond) (double, double),
421 int true_index, int false_index)
423 const struct caseproto *proto = casereader_get_proto (input);
424 struct casewriter *w =
425 autopaging_writer_create (proto);
427 double prev_cp = SYSMIS;
429 for (; (cpc = casereader_read (input)); case_unref (cpc))
431 struct ccase *new_case;
432 const double cp = case_data_idx (cpc, ROC_CUTPOINT)->f;
434 assert (cp != SYSMIS);
436 /* We don't want duplicates here */
440 new_case = case_clone (cpc);
442 if (pos_cond (result, cp))
443 case_data_rw_idx (new_case, true_index)->f += weight;
445 case_data_rw_idx (new_case, false_index)->f += weight;
449 casewriter_write (w, new_case);
451 casereader_destroy (input);
453 return casewriter_make_reader (w);
458 static void output_roc (struct roc_state *rs, const struct cmd_roc *roc);
461 This function does 3 things:
463 1. Counts the number of cases which are equal to every other case in READER,
464 and those cases for which the relationship between it and every other case
465 satifies PRED (normally either > or <). VAR is variable defining a case's value
468 2. Counts the number of true and false cases in reader, and populates
469 CUTPOINT_RDR accordingly. TRUE_INDEX and FALSE_INDEX are the indices
470 which receive these values. POS_COND is the condition defining true
473 3. CC is filled with the cumulative weight of all cases of READER.
475 static struct casereader *
476 process_group (const struct variable *var, struct casereader *reader,
477 bool (*pred) (double, double),
478 const struct dictionary *dict,
480 struct casereader **cutpoint_rdr,
481 bool (*pos_cond) (double, double),
485 const struct variable *w = dict_get_weight (dict);
487 struct casereader *r1 =
488 casereader_create_distinct (sort_execute_1var (reader, var), var, w);
490 const int weight_idx = w ? var_get_case_index (w) :
491 caseproto_get_n_widths (casereader_get_proto (r1)) - 1;
495 struct casereader *rclone = casereader_clone (r1);
496 struct casewriter *wtr;
497 struct caseproto *proto = caseproto_create ();
499 proto = caseproto_add_width (proto, 0);
500 proto = caseproto_add_width (proto, 0);
501 proto = caseproto_add_width (proto, 0);
503 wtr = autopaging_writer_create (proto);
507 for (; (c1 = casereader_read (r1)); case_unref (c1))
509 struct ccase *new_case = case_create (proto);
511 struct casereader *r2 = casereader_clone (rclone);
513 const double weight1 = case_data_idx (c1, weight_idx)->f;
514 const double d1 = case_data (c1, var)->f;
518 *cutpoint_rdr = accumulate_counts (*cutpoint_rdr, d1, weight1,
520 true_index, false_index);
524 for (; (c2 = casereader_read (r2)); case_unref (c2))
526 const double d2 = case_data (c2, var)->f;
527 const double weight2 = case_data_idx (c2, weight_idx)->f;
534 else if (pred (d2, d1))
540 case_data_rw_idx (new_case, VALUE)->f = d1;
541 case_data_rw_idx (new_case, N_EQ)->f = n_eq;
542 case_data_rw_idx (new_case, N_PRED)->f = n_pred;
544 casewriter_write (wtr, new_case);
546 casereader_destroy (r2);
550 casereader_destroy (r1);
551 casereader_destroy (rclone);
553 caseproto_unref (proto);
555 return casewriter_make_reader (wtr);
558 /* Some more indeces into case data */
559 #define N_POS_EQ 1 /* number of positive cases with values equal to n */
560 #define N_POS_GT 2 /* number of positive cases with values greater than n */
561 #define N_NEG_EQ 3 /* number of negative cases with values equal to n */
562 #define N_NEG_LT 4 /* number of negative cases with values less than n */
565 gt (double d1, double d2)
572 ge (double d1, double d2)
578 lt (double d1, double d2)
585 Return a casereader with width 3,
586 populated with cases based upon READER.
587 The cases will have the values:
588 (N, number of cases equal to N, number of cases greater than N)
589 As a side effect, update RS->n1 with the number of positive cases.
591 static struct casereader *
592 process_positive_group (const struct variable *var, struct casereader *reader,
593 const struct dictionary *dict,
594 struct roc_state *rs)
596 return process_group (var, reader, gt, dict, &rs->n1,
603 Return a casereader with width 3,
604 populated with cases based upon READER.
605 The cases will have the values:
606 (N, number of cases equal to N, number of cases less than N)
607 As a side effect, update RS->n2 with the number of negative cases.
609 static struct casereader *
610 process_negative_group (const struct variable *var, struct casereader *reader,
611 const struct dictionary *dict,
612 struct roc_state *rs)
614 return process_group (var, reader, lt, dict, &rs->n2,
624 append_cutpoint (struct casewriter *writer, double cutpoint)
626 struct ccase *cc = case_create (casewriter_get_proto (writer));
628 case_data_rw_idx (cc, ROC_CUTPOINT)->f = cutpoint;
629 case_data_rw_idx (cc, ROC_TP)->f = 0;
630 case_data_rw_idx (cc, ROC_FN)->f = 0;
631 case_data_rw_idx (cc, ROC_TN)->f = 0;
632 case_data_rw_idx (cc, ROC_FP)->f = 0;
634 casewriter_write (writer, cc);
639 Create and initialise the rs[x].cutpoint_rdr casereaders. That is, the readers will
640 be created with width 5, ready to take the values (cutpoint, ROC_TP, ROC_FN, ROC_TN, ROC_FP), and the
641 reader will be populated with its final number of cases.
642 However on exit from this function, only ROC_CUTPOINT entries will be set to their final
643 value. The other entries will be initialised to zero.
646 prepare_cutpoints (struct cmd_roc *roc, struct roc_state *rs, struct casereader *input)
649 struct casereader *r = casereader_clone (input);
653 struct caseproto *proto = caseproto_create ();
654 struct subcase ordering;
655 subcase_init (&ordering, ROC_CUTPOINT, 0, SC_ASCEND);
657 proto = caseproto_add_width (proto, 0); /* cutpoint */
658 proto = caseproto_add_width (proto, 0); /* ROC_TP */
659 proto = caseproto_add_width (proto, 0); /* ROC_FN */
660 proto = caseproto_add_width (proto, 0); /* ROC_TN */
661 proto = caseproto_add_width (proto, 0); /* ROC_FP */
663 for (i = 0 ; i < roc->n_vars; ++i)
665 rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto);
666 rs[i].prev_result = SYSMIS;
667 rs[i].max = -DBL_MAX;
671 caseproto_unref (proto);
672 subcase_destroy (&ordering);
675 for (; (c = casereader_read (r)) != NULL; case_unref (c))
677 for (i = 0 ; i < roc->n_vars; ++i)
679 const union value *v = case_data (c, roc->vars[i]);
680 const double result = v->f;
682 if (mv_is_value_missing (var_get_missing_values (roc->vars[i]), v, roc->exclude))
685 minimize (&rs[i].min, result);
686 maximize (&rs[i].max, result);
688 if (rs[i].prev_result != SYSMIS && rs[i].prev_result != result)
690 const double mean = (result + rs[i].prev_result) / 2.0;
691 append_cutpoint (rs[i].cutpoint_wtr, mean);
694 rs[i].prev_result = result;
697 casereader_destroy (r);
700 /* Append the min and max cutpoints */
701 for (i = 0 ; i < roc->n_vars; ++i)
703 append_cutpoint (rs[i].cutpoint_wtr, rs[i].min - 1);
704 append_cutpoint (rs[i].cutpoint_wtr, rs[i].max + 1);
706 rs[i].cutpoint_rdr = casewriter_make_reader (rs[i].cutpoint_wtr);
711 do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict)
715 struct roc_state *rs = xcalloc (roc->n_vars, sizeof *rs);
717 struct casereader *negatives = NULL;
718 struct casereader *positives = NULL;
720 struct caseproto *n_proto = NULL;
722 struct subcase up_ordering;
723 struct subcase down_ordering;
725 struct casewriter *neg_wtr = NULL;
727 struct casereader *input = casereader_create_filter_missing (reader,
728 roc->vars, roc->n_vars,
733 input = casereader_create_filter_missing (input,
739 neg_wtr = autopaging_writer_create (casereader_get_proto (input));
741 prepare_cutpoints (roc, rs, input);
744 /* Separate the positive actual state cases from the negative ones */
746 casereader_create_filter_func (input,
752 n_proto = caseproto_create ();
754 n_proto = caseproto_add_width (n_proto, 0);
755 n_proto = caseproto_add_width (n_proto, 0);
756 n_proto = caseproto_add_width (n_proto, 0);
757 n_proto = caseproto_add_width (n_proto, 0);
758 n_proto = caseproto_add_width (n_proto, 0);
760 subcase_init (&up_ordering, VALUE, 0, SC_ASCEND);
761 subcase_init (&down_ordering, VALUE, 0, SC_DESCEND);
763 for (i = 0 ; i < roc->n_vars; ++i)
765 struct casewriter *w = NULL;
766 struct casereader *r = NULL;
771 struct casereader *n_neg_reader ;
772 const struct variable *var = roc->vars[i];
774 struct casereader *neg ;
775 struct casereader *pos = casereader_clone (positives);
777 struct casereader *n_pos_reader =
778 process_positive_group (var, pos, dict, &rs[i]);
780 if (negatives == NULL)
782 negatives = casewriter_make_reader (neg_wtr);
785 neg = casereader_clone (negatives);
787 n_neg_reader = process_negative_group (var, neg, dict, &rs[i]);
789 /* Merge the n_pos and n_neg casereaders */
790 w = sort_create_writer (&up_ordering, n_proto);
791 for (; (cpos = casereader_read (n_pos_reader)); case_unref (cpos))
793 struct ccase *pos_case = case_create (n_proto);
795 const double jpos = case_data_idx (cpos, VALUE)->f;
797 while ((cneg = casereader_read (n_neg_reader)))
799 struct ccase *nc = case_create (n_proto);
801 const double jneg = case_data_idx (cneg, VALUE)->f;
803 case_data_rw_idx (nc, VALUE)->f = jneg;
804 case_data_rw_idx (nc, N_POS_EQ)->f = 0;
806 case_data_rw_idx (nc, N_POS_GT)->f = SYSMIS;
808 *case_data_rw_idx (nc, N_NEG_EQ) = *case_data_idx (cneg, N_EQ);
809 *case_data_rw_idx (nc, N_NEG_LT) = *case_data_idx (cneg, N_PRED);
811 casewriter_write (w, nc);
818 case_data_rw_idx (pos_case, VALUE)->f = jpos;
819 *case_data_rw_idx (pos_case, N_POS_EQ) = *case_data_idx (cpos, N_EQ);
820 *case_data_rw_idx (pos_case, N_POS_GT) = *case_data_idx (cpos, N_PRED);
821 case_data_rw_idx (pos_case, N_NEG_EQ)->f = 0;
822 case_data_rw_idx (pos_case, N_NEG_LT)->f = SYSMIS;
824 casewriter_write (w, pos_case);
827 casereader_destroy (n_pos_reader);
828 casereader_destroy (n_neg_reader);
830 /* These aren't used anymore */
834 r = casewriter_make_reader (w);
836 /* Propagate the N_POS_GT values from the positive cases
837 to the negative ones */
839 double prev_pos_gt = rs[i].n1;
840 w = sort_create_writer (&down_ordering, n_proto);
842 for (; (c = casereader_read (r)); case_unref (c))
844 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
845 struct ccase *nc = case_clone (c);
847 if (n_pos_gt == SYSMIS)
849 n_pos_gt = prev_pos_gt;
850 case_data_rw_idx (nc, N_POS_GT)->f = n_pos_gt;
853 casewriter_write (w, nc);
854 prev_pos_gt = n_pos_gt;
857 casereader_destroy (r);
858 r = casewriter_make_reader (w);
861 /* Propagate the N_NEG_LT values from the negative cases
862 to the positive ones */
864 double prev_neg_lt = rs[i].n2;
865 w = sort_create_writer (&up_ordering, n_proto);
867 for (; (c = casereader_read (r)); case_unref (c))
869 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
870 struct ccase *nc = case_clone (c);
872 if (n_neg_lt == SYSMIS)
874 n_neg_lt = prev_neg_lt;
875 case_data_rw_idx (nc, N_NEG_LT)->f = n_neg_lt;
878 casewriter_write (w, nc);
879 prev_neg_lt = n_neg_lt;
882 casereader_destroy (r);
883 r = casewriter_make_reader (w);
887 struct ccase *prev_case = NULL;
888 for (; (c = casereader_read (r)); case_unref (c))
890 struct ccase *next_case = casereader_peek (r, 0);
892 const double j = case_data_idx (c, VALUE)->f;
893 double n_pos_eq = case_data_idx (c, N_POS_EQ)->f;
894 double n_pos_gt = case_data_idx (c, N_POS_GT)->f;
895 double n_neg_eq = case_data_idx (c, N_NEG_EQ)->f;
896 double n_neg_lt = case_data_idx (c, N_NEG_LT)->f;
898 if (prev_case && j == case_data_idx (prev_case, VALUE)->f)
900 if (0 == case_data_idx (c, N_POS_EQ)->f)
902 n_pos_eq = case_data_idx (prev_case, N_POS_EQ)->f;
903 n_pos_gt = case_data_idx (prev_case, N_POS_GT)->f;
906 if (0 == case_data_idx (c, N_NEG_EQ)->f)
908 n_neg_eq = case_data_idx (prev_case, N_NEG_EQ)->f;
909 n_neg_lt = case_data_idx (prev_case, N_NEG_LT)->f;
913 if (NULL == next_case || j != case_data_idx (next_case, VALUE)->f)
915 rs[i].auc += n_pos_gt * n_neg_eq + (n_pos_eq * n_neg_eq) / 2.0;
918 n_neg_eq * (pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0);
920 n_pos_eq * (pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0);
924 case_unref (next_case);
925 case_unref (prev_case);
926 prev_case = case_clone (c);
928 casereader_destroy (r);
929 case_unref (prev_case);
931 rs[i].auc /= rs[i].n1 * rs[i].n2;
933 rs[i].auc = 1 - rs[i].auc;
937 rs[i].q1hat = rs[i].auc / (2 - rs[i].auc);
938 rs[i].q2hat = 2 * pow2 (rs[i].auc) / (1 + rs[i].auc);
942 rs[i].q1hat /= rs[i].n2 * pow2 (rs[i].n1);
943 rs[i].q2hat /= rs[i].n1 * pow2 (rs[i].n2);
948 casereader_destroy (positives);
949 casereader_destroy (negatives);
951 caseproto_unref (n_proto);
952 subcase_destroy (&up_ordering);
953 subcase_destroy (&down_ordering);
955 output_roc (rs, roc);
957 for (i = 0 ; i < roc->n_vars; ++i)
958 casereader_destroy (rs[i].cutpoint_rdr);
964 show_auc (struct roc_state *rs, const struct cmd_roc *roc)
966 struct pivot_table *table = pivot_table_create (N_("Area Under the Curve"));
968 struct pivot_dimension *statistics = pivot_dimension_create (
969 table, PIVOT_AXIS_COLUMN, N_("Statistics"),
970 N_("Area"), PIVOT_RC_OTHER);
973 pivot_category_create_leaves (
975 N_("Std. Error"), PIVOT_RC_OTHER,
976 N_("Asymptotic Sig."), PIVOT_RC_SIGNIFICANCE);
977 struct pivot_category *interval = pivot_category_create_group__ (
979 pivot_value_new_text_format (N_("Asymp. %g%% Confidence Interval"),
981 pivot_category_create_leaves (interval,
982 N_("Lower Bound"), PIVOT_RC_OTHER,
983 N_("Upper Bound"), PIVOT_RC_OTHER);
986 struct pivot_dimension *variables = pivot_dimension_create (
987 table, PIVOT_AXIS_ROW, N_("Variable under test"));
988 variables->root->show_label = true;
990 for (size_t i = 0 ; i < roc->n_vars ; ++i)
992 int var_idx = pivot_category_create_leaf (
993 variables->root, pivot_value_new_variable (roc->vars[i]));
995 pivot_table_put2 (table, 0, var_idx, pivot_value_new_number (rs[i].auc));
999 double se = (rs[i].auc * (1 - rs[i].auc)
1000 + (rs[i].n1 - 1) * (rs[i].q1hat - pow2 (rs[i].auc))
1001 + (rs[i].n2 - 1) * (rs[i].q2hat - pow2 (rs[i].auc)));
1002 se /= rs[i].n1 * rs[i].n2;
1005 double ci = 1 - roc->ci / 100.0;
1006 double yy = gsl_cdf_gaussian_Qinv (ci, se);
1008 double sd_0_5 = sqrt ((rs[i].n1 + rs[i].n2 + 1) /
1009 (12 * rs[i].n1 * rs[i].n2));
1010 double sig = 2.0 * gsl_cdf_ugaussian_Q (fabs ((rs[i].auc - 0.5)
1012 double entries[] = { se, sig, rs[i].auc - yy, rs[i].auc + yy };
1013 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
1014 pivot_table_put2 (table, i + 1, var_idx,
1015 pivot_value_new_number (entries[i]));
1019 pivot_table_submit (table);
1024 show_summary (const struct cmd_roc *roc)
1026 struct pivot_table *table = pivot_table_create (N_("Case Summary"));
1028 struct pivot_dimension *statistics = pivot_dimension_create (
1029 table, PIVOT_AXIS_COLUMN, N_("Valid N (listwise)"),
1030 N_("Unweighted"), PIVOT_RC_INTEGER,
1031 N_("Weighted"), PIVOT_RC_OTHER);
1032 statistics->root->show_label = true;
1034 struct pivot_dimension *cases = pivot_dimension_create__ (
1035 table, PIVOT_AXIS_ROW, pivot_value_new_variable (roc->state_var));
1036 cases->root->show_label = true;
1037 pivot_category_create_leaves (cases->root, N_("Positive"), N_("Negative"));
1048 { 1, 0, roc->pos_weighted },
1049 { 1, 1, roc->neg_weighted },
1051 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
1053 const struct entry *e = &entries[i];
1054 pivot_table_put2 (table, e->stat_idx, e->case_idx,
1055 pivot_value_new_number (e->x));
1057 pivot_table_submit (table);
1061 show_coords (struct roc_state *rs, const struct cmd_roc *roc)
1063 struct pivot_table *table = pivot_table_create (
1064 N_("Coordinates of the Curve"));
1065 table->omit_empty = true;
1067 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
1068 N_("Positive if greater than or equal to"),
1069 N_("Sensitivity"), N_("1 - Specificity"));
1071 struct pivot_dimension *coordinates = pivot_dimension_create (
1072 table, PIVOT_AXIS_ROW, N_("Coordinates"));
1073 coordinates->hide_all_labels = true;
1075 struct pivot_dimension *variables = pivot_dimension_create (
1076 table, PIVOT_AXIS_ROW, N_("Test variable"));
1077 variables->root->show_label = true;
1081 for (size_t i = 0; i < roc->n_vars; ++i)
1083 struct casereader *r = casereader_clone (rs[i].cutpoint_rdr);
1085 int var_idx = pivot_category_create_leaf (
1086 variables->root, pivot_value_new_variable (roc->vars[i]));
1090 for (; (cc = casereader_read (r)) != NULL; case_unref (cc))
1092 const double se = case_data_idx (cc, ROC_TP)->f /
1093 (case_data_idx (cc, ROC_TP)->f + case_data_idx (cc, ROC_FN)->f);
1095 const double sp = case_data_idx (cc, ROC_TN)->f /
1096 (case_data_idx (cc, ROC_TN)->f + case_data_idx (cc, ROC_FP)->f);
1099 table, 0, coord_idx, var_idx,
1100 pivot_value_new_var_value (roc->vars[i],
1101 case_data_idx (cc, ROC_CUTPOINT)));
1103 pivot_table_put3 (table, 1, coord_idx, var_idx,
1104 pivot_value_new_number (se));
1105 pivot_table_put3 (table, 2, coord_idx, var_idx,
1106 pivot_value_new_number (1 - sp));
1110 if (coord_idx > n_coords)
1111 n_coords = coord_idx;
1113 casereader_destroy (r);
1116 for (size_t i = 0; i < n_coords; i++)
1117 pivot_category_create_leaf (coordinates->root,
1118 pivot_value_new_integer (i + 1));
1120 pivot_table_submit (table);
1125 output_roc (struct roc_state *rs, const struct cmd_roc *roc)
1131 struct roc_chart *rc;
1134 rc = roc_chart_create (roc->reference);
1135 for (i = 0; i < roc->n_vars; i++)
1136 roc_chart_add_var (rc, var_get_name (roc->vars[i]),
1137 rs[i].cutpoint_rdr);
1138 roc_chart_submit (rc);
1143 if (roc->print_coords)
1144 show_coords (rs, roc);