{
size_t n_vars;
const struct variable **vars;
+ const struct dictionary *dict;
- struct variable *state_var ;
+ const struct variable *state_var ;
union value state_value;
/* Plot the roc curve */
bool invert ; /* True iff a smaller test result variable indicates
a positive result */
+
+ double pos;
+ double neg;
+ double pos_weighted;
+ double neg_weighted;
};
static int run_roc (struct dataset *ds, struct cmd_roc *roc);
roc.ci = 95;
roc.bi_neg_exp = false;
roc.invert = false;
+ roc.pos = roc.pos_weighted = 0;
+ roc.neg = roc.neg_weighted = 0;
+ roc.dict = dataset_dict (ds);
if (!parse_variables_const (lexer, dict, &roc.vars, &roc.n_vars,
PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
return ok;
}
-
+#if 0
static void
dump_casereader (struct casereader *reader)
{
casereader_destroy (r);
}
+#endif
static bool
match_positives (const struct ccase *c, void *aux)
{
struct cmd_roc *roc = aux;
+ const struct variable *wv = dict_get_weight (roc->dict);
+ const double weight = wv ? case_data (c, wv)->f : 1.0;
+
+ bool positive = ( 0 == value_compare_3way (case_data (c, roc->state_var),
+ &roc->state_value,
+ var_get_width (roc->state_var)));
+
+ if ( positive )
+ {
+ roc->pos++;
+ roc->pos_weighted += weight;
+ }
+ else
+ {
+ roc->neg++;
+ roc->neg_weighted += weight;
+ }
- return 0 == value_compare_3way (case_data (c, roc->state_var),
- &roc->state_value,
- var_get_width (roc->state_var));
+ return positive;
}
struct casereader **cutpoint_rdr,
bool (*pos_cond) (double, double),
int true_index,
- int false_index
- )
+ int false_index)
{
const struct variable *w = dict_get_weight (dict);
+
struct casereader *r1 =
casereader_create_distinct (sort_execute_1var (reader, var), var, w);
for ( ; (c1 = casereader_read (r1) ); case_unref (c1))
{
+ struct ccase *new_case = case_create (proto);
struct ccase *c2;
struct casereader *r2 = casereader_clone (rclone);
double n_pred = 0.0;
*cutpoint_rdr = accumulate_counts (*cutpoint_rdr, d1, weight1,
- pos_cond,
- true_index, false_index);
-
- struct ccase *new_case = case_create (proto);
+ pos_cond,
+ true_index, false_index);
*cc += weight1;
}
+/* Prepare the cutpoints */
static void
-do_roc (struct cmd_roc *roc, struct casereader *input, struct dictionary *dict)
+prepare_cutpoints (struct cmd_roc *roc, struct roc_state *rs, struct casereader *input)
{
int i;
+ struct casereader *r = casereader_clone (input);
+ struct ccase *c;
+ struct caseproto *proto = caseproto_create ();
- struct roc_state *rs = xcalloc (roc->n_vars, sizeof *rs);
+ struct subcase ordering;
+ subcase_init (&ordering, CUTPOINT, 0, SC_ASCEND);
- struct casewriter *neg_wtr = autopaging_writer_create (casereader_get_proto (input));
+ proto = caseproto_add_width (proto, 0); /* cutpoint */
+ proto = caseproto_add_width (proto, 0); /* TP */
+ proto = caseproto_add_width (proto, 0); /* FN */
+ proto = caseproto_add_width (proto, 0); /* TN */
+ proto = caseproto_add_width (proto, 0); /* FP */
- struct casereader *negatives = NULL;
- struct casereader *positives = NULL;
+ for (i = 0 ; i < roc->n_vars; ++i)
+ {
+ rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto);
+ rs[i].prev_result = SYSMIS;
+ rs[i].max = -DBL_MAX;
+ rs[i].min = DBL_MAX;
+ }
+ for (; (c = casereader_read (r)) != NULL; case_unref (c))
+ {
+ for (i = 0 ; i < roc->n_vars; ++i)
+ {
+ const union value *v = case_data (c, roc->vars[i]);
+ const double result = v->f;
- /* Prepare the cutpoints */
- {
- struct casereader *r = casereader_clone (input);
- struct ccase *c;
- struct caseproto *proto = caseproto_create ();
+ if ( mv_is_value_missing (var_get_missing_values (roc->vars[i]), v, roc->exclude))
+ continue;
- struct subcase ordering;
- subcase_init (&ordering, CUTPOINT, 0, SC_ASCEND);
+ minimize (&rs[i].min, result);
+ maximize (&rs[i].max, result);
+ if ( rs[i].prev_result != SYSMIS && rs[i].prev_result != result )
+ {
+ const double mean = (result + rs[i].prev_result ) / 2.0;
+ append_cutpoint (rs[i].cutpoint_wtr, mean);
+ }
- proto = caseproto_add_width (proto, 0); /* cutpoint */
- proto = caseproto_add_width (proto, 0); /* TP */
- proto = caseproto_add_width (proto, 0); /* FN */
- proto = caseproto_add_width (proto, 0); /* TN */
- proto = caseproto_add_width (proto, 0); /* FP */
+ rs[i].prev_result = result;
+ }
+ }
+ casereader_destroy (r);
- for (i = 0 ; i < roc->n_vars; ++i)
- {
- rs[i].cutpoint_wtr = sort_create_writer (&ordering, proto);
- rs[i].prev_result = SYSMIS;
- rs[i].max = -DBL_MAX;
- rs[i].min = DBL_MAX;
- }
+ /* Append the min and max cutpoints */
+ for (i = 0 ; i < roc->n_vars; ++i)
+ {
+ append_cutpoint (rs[i].cutpoint_wtr, rs[i].min - 1);
+ append_cutpoint (rs[i].cutpoint_wtr, rs[i].max + 1);
- for (; (c = casereader_read (r)) != NULL; case_unref (c))
- {
- const double weight = dict_get_case_weight (dict, c, NULL);
- for (i = 0 ; i < roc->n_vars; ++i)
- {
- const double result = case_data (c, roc->vars[i])->f;
+ rs[i].cutpoint_rdr = casewriter_make_reader (rs[i].cutpoint_wtr);
+ }
+}
- minimize (&rs[i].min, result);
- maximize (&rs[i].max, result);
+static void
+do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict)
+{
+ int i;
- if ( rs[i].prev_result != SYSMIS && rs[i].prev_result != result )
- {
- const double mean = (result + rs[i].prev_result ) / 2.0;
- append_cutpoint (rs[i].cutpoint_wtr, mean);
- }
+ struct roc_state *rs = xcalloc (roc->n_vars, sizeof *rs);
- rs[i].prev_result = result;
- }
- }
- casereader_destroy (r);
+ struct casereader *negatives = NULL;
+ struct casereader *positives = NULL;
+ struct caseproto *n_proto = caseproto_create ();
- /* Append the min and max cutpoints */
- for (i = 0 ; i < roc->n_vars; ++i)
- {
- append_cutpoint (rs[i].cutpoint_wtr, rs[i].min - 1);
- append_cutpoint (rs[i].cutpoint_wtr, rs[i].max + 1);
+ struct subcase up_ordering;
+ struct subcase down_ordering;
- rs[i].cutpoint_rdr = casewriter_make_reader (rs[i].cutpoint_wtr);
- }
- }
+ struct casewriter *neg_wtr = NULL;
+
+ struct casereader *input = casereader_create_filter_missing (reader,
+ roc->vars, roc->n_vars,
+ roc->exclude,
+ NULL,
+ NULL);
+
+ input = casereader_create_filter_missing (input,
+ &roc->state_var, 1,
+ roc->exclude,
+ NULL,
+ NULL);
+
+ neg_wtr = autopaging_writer_create (casereader_get_proto (input));
+
+ prepare_cutpoints (roc, rs, input);
- positives =
+ positives =
casereader_create_filter_func (input,
match_positives,
NULL,
roc,
neg_wtr);
+ n_proto = caseproto_create ();
+
+ n_proto = caseproto_add_width (n_proto, 0);
+ n_proto = caseproto_add_width (n_proto, 0);
+ n_proto = caseproto_add_width (n_proto, 0);
+ n_proto = caseproto_add_width (n_proto, 0);
+ n_proto = caseproto_add_width (n_proto, 0);
+
+ subcase_init (&up_ordering, VALUE, 0, SC_ASCEND);
+ subcase_init (&down_ordering, VALUE, 0, SC_DESCEND);
for (i = 0 ; i < roc->n_vars; ++i)
{
+ struct casewriter *w = NULL;
+ struct casereader *r = NULL;
+
+ struct ccase *c;
+
struct ccase *cpos;
struct casereader *n_neg ;
const struct variable *var = roc->vars[i];
n_neg = process_negative_group (var, neg, dict, &rs[i]);
-
- printf ("Positives:\n");
- dump_casereader (n_pos);
-
- printf ("Negatives:\n");
- dump_casereader (n_neg);
-
-#if 0
- /* Simple join on VALUE */
+ w = sort_create_writer (&up_ordering, n_proto);
for ( ; (cpos = casereader_read (n_pos) ); case_unref (cpos))
{
- struct ccase *cneg = NULL;
- double dneg = -DBL_MAX;
- const double dpos = case_data_idx (cpos, VALUE)->f;
- while (dneg < dpos)
+ struct ccase *pos_case = case_create (n_proto);
+ struct ccase *cneg;
+ const double jpos = case_data_idx (cpos, VALUE)->f;
+
+ while ((cneg = casereader_read (n_neg)))
{
- if ( cneg )
- case_unref (cneg);
+ struct ccase *nc = case_create (n_proto);
+
+ const double jneg = case_data_idx (cneg, VALUE)->f;
+
+ case_data_rw_idx (nc, VALUE)->f = jneg;
+ case_data_rw_idx (nc, N_EQ)->f = 0;
+
+ case_data_rw_idx (nc, N_PRED)->f = SYSMIS;
- cneg = casereader_read (n_neg);
- if ( ! cneg )
+ *case_data_rw_idx (nc, 3) = *case_data_idx (cneg, N_EQ);
+ *case_data_rw_idx (nc, 4) = *case_data_idx (cneg, N_PRED);
+
+ casewriter_write (w, nc);
+
+ case_unref (cneg);
+ if ( jneg > jpos)
break;
- dneg = case_data_idx (cneg, VALUE)->f;
- }
-
- if ( dpos == dneg )
- {
- double n_pos_eq = case_data_idx (cpos, N_EQ)->f;
- double n_neg_eq = case_data_idx (cneg, N_EQ)->f;
- double n_pos_gt = case_data_idx (cpos, N_PRED)->f;
- double n_neg_lt = case_data_idx (cneg, N_PRED)->f;
-
- rs[i].auc += n_pos_gt * n_neg_eq + (n_pos_eq * n_neg_eq) / 2.0;
- rs[i].q1hat +=
- n_neg_eq * ( pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0);
- rs[i].q2hat +=
- n_pos_eq * ( pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0);
}
- if ( cneg )
- case_unref (cneg);
+ case_data_rw_idx (pos_case, VALUE)->f = jpos;
+ *case_data_rw_idx (pos_case, N_EQ) = *case_data_idx (cpos, N_EQ);
+ *case_data_rw_idx (pos_case, N_PRED) = *case_data_idx (cpos, N_PRED);
+ case_data_rw_idx (pos_case, 3)->f = 0;
+ case_data_rw_idx (pos_case, 4)->f = SYSMIS;
+
+ casewriter_write (w, pos_case);
}
- rs[i].auc /= rs[i].n1 * rs[i].n2;
- if ( roc->invert )
- rs[i].auc = 1 - rs[i].auc;
+ r = casewriter_make_reader (w);
- if ( roc->bi_neg_exp )
- {
- rs[i].q1hat = rs[i].auc / ( 2 - rs[i].auc);
- rs[i].q2hat = 2 * pow2 (rs[i].auc) / ( 1 + rs[i].auc);
- }
- else
- {
- rs[i].q1hat /= rs[i].n2 * pow2 (rs[i].n1);
- rs[i].q2hat /= rs[i].n1 * pow2 (rs[i].n2);
- }
+ {
+ double prev_pos_gt = rs[i].n1;
+ w = sort_create_writer (&down_ordering, n_proto);
+
+ for ( ; (c = casereader_read (r) ); case_unref (c))
+ {
+ double n_pos_gt = case_data_idx (c, N_PRED)->f;
+ struct ccase *nc = case_clone (c);
+
+ if ( n_pos_gt == SYSMIS)
+ {
+ n_pos_gt = prev_pos_gt;
+ case_data_rw_idx (nc, N_PRED)->f = n_pos_gt;
+ }
+
+ casewriter_write (w, nc);
+ prev_pos_gt = n_pos_gt;
+ }
+
+ r = casewriter_make_reader (w);
+ }
+
+ {
+ double prev_neg_lt = rs[i].n2;
+ w = sort_create_writer (&up_ordering, n_proto);
+
+ for ( ; (c = casereader_read (r) ); case_unref (c))
+ {
+ double n_neg_lt = case_data_idx (c, 4)->f;
+ struct ccase *nc = case_clone (c);
+
+ if ( n_neg_lt == SYSMIS)
+ {
+ n_neg_lt = prev_neg_lt;
+ case_data_rw_idx (nc, 4)->f = n_neg_lt;
+ }
+
+ casewriter_write (w, nc);
+ prev_neg_lt = n_neg_lt;
+ }
+
+ r = casewriter_make_reader (w);
+ }
+
+ {
+ struct ccase *prev_case = NULL;
+ for ( ; (c = casereader_read (r) ); case_unref (c))
+ {
+ const struct ccase *next_case = casereader_peek (r, 0);
+
+ const double j = case_data_idx (c, VALUE)->f;
+ double n_pos_eq = case_data_idx (c, N_EQ)->f;
+ double n_pos_gt = case_data_idx (c, N_PRED)->f;
+ double n_neg_eq = case_data_idx (c, 3)->f;
+ double n_neg_lt = case_data_idx (c, 4)->f;
+
+ if ( prev_case && j == case_data_idx (prev_case, VALUE)->f)
+ {
+ if ( 0 == case_data_idx (c, N_EQ)->f)
+ {
+ n_pos_eq = case_data_idx (prev_case, N_EQ)->f;
+ n_pos_gt = case_data_idx (prev_case, N_PRED)->f;
+ }
+
+ if ( 0 == case_data_idx (c, 3)->f)
+ {
+ n_neg_eq = case_data_idx (prev_case, 3)->f;
+ n_neg_lt = case_data_idx (prev_case, 4)->f;
+ }
+ }
+
+ if ( NULL == next_case || j != case_data_idx (next_case, VALUE)->f)
+ {
+ rs[i].auc += n_pos_gt * n_neg_eq + (n_pos_eq * n_neg_eq) / 2.0;
+
+ rs[i].q1hat +=
+ n_neg_eq * ( pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0);
+ rs[i].q2hat +=
+ n_pos_eq * ( pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0);
+
+ }
+
+ case_unref (prev_case);
+ prev_case = case_clone (c);
+ }
+
+ rs[i].auc /= rs[i].n1 * rs[i].n2;
+ if ( roc->invert )
+ rs[i].auc = 1 - rs[i].auc;
+
+ if ( roc->bi_neg_exp )
+ {
+ rs[i].q1hat = rs[i].auc / ( 2 - rs[i].auc);
+ rs[i].q2hat = 2 * pow2 (rs[i].auc) / ( 1 + rs[i].auc);
+ }
+ else
+ {
+ rs[i].q1hat /= rs[i].n2 * pow2 (rs[i].n1);
+ rs[i].q2hat /= rs[i].n1 * pow2 (rs[i].n2);
+ }
+ }
}
-#endif
casereader_destroy (positives);
casereader_destroy (negatives);
free (rs);
}
-
-
-
static void
show_auc (struct roc_state *rs, const struct cmd_roc *roc)
{
tab_text (tbl, 0, 3, TAB_LEFT, _("Negative"));
-#if 0
tab_double (tbl, 1, 2, 0, roc->pos, &F_8_0);
tab_double (tbl, 1, 3, 0, roc->neg, &F_8_0);
tab_double (tbl, 2, 2, 0, roc->pos_weighted, 0);
tab_double (tbl, 2, 3, 0, roc->neg_weighted, 0);
-#endif
tab_submit (tbl);
}