X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Froc.c;h=031110a046a235f5ab0e72df4837f83a2a3cf636;hb=df8b9e60f1533e84bf32d3391d3c82ff50ab7004;hp=eb2430a2541efc29e52e1101b0e7ff1e631d1ee1;hpb=5cab4cf3322f29c0ed7134d23740e07382914f20;p=pspp diff --git a/src/language/stats/roc.c b/src/language/stats/roc.c index eb2430a254..031110a046 100644 --- a/src/language/stats/roc.c +++ b/src/language/stats/roc.c @@ -33,7 +33,6 @@ #include "language/lexer/variable-parser.h" #include "libpspp/misc.h" #include "math/sort.h" -#include "output/chart-item.h" #include "output/charts/roc-chart.h" #include "output/pivot-table.h" @@ -102,7 +101,7 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC)) goto error; - if ( ! lex_force_match (lexer, T_BY)) + if (! lex_force_match (lexer, T_BY)) { goto error; } @@ -113,7 +112,7 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) goto error; } - if ( !lex_force_match (lexer, T_LPAREN)) + if (!lex_force_match (lexer, T_LPAREN)) { goto error; } @@ -123,7 +122,7 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) parse_value (lexer, &roc.state_value, roc.state_var); - if ( !lex_force_match (lexer, T_RPAREN)) + if (!lex_force_match (lexer, T_RPAREN)) { goto error; } @@ -286,16 +285,16 @@ cmd_roc (struct lexer *lexer, struct dataset *ds) } } - if ( ! run_roc (ds, &roc)) + if (! run_roc (ds, &roc)) goto error; - if ( roc.state_var) + if (roc.state_var) value_destroy (&roc.state_value, roc.state_var_width); free (roc.vars); return CMD_SUCCESS; error: - if ( roc.state_var) + if (roc.state_var) value_destroy (&roc.state_value, roc.state_var_width); free (roc.vars); return CMD_FAILURE; @@ -333,13 +332,11 @@ dump_casereader (struct casereader *reader) struct ccase *c; struct casereader *r = casereader_clone (reader); - for ( ; (c = casereader_read (r) ); case_unref (c)) + for (; (c = casereader_read (r)); case_unref (c)) { int i; - for (i = 0 ; i < case_get_value_cnt (c); ++i) - { - printf ("%g ", case_data_idx (c, i)->f); - } + for (i = 0 ; i < case_get_n_values (c); ++i) + printf ("%g ", case_num_idx (c, i)); printf ("\n"); } @@ -359,13 +356,13 @@ match_positives (const struct ccase *c, void *aux) { struct cmd_roc *roc = aux; const struct variable *wv = dict_get_weight (roc->dict); - const double weight = wv ? case_data (c, wv)->f : 1.0; + const double weight = wv ? case_num (c, wv) : 1.0; const bool positive = - ( 0 == value_compare_3way (case_data (c, roc->state_var), &roc->state_value, + (0 == value_compare_3way (case_data (c, roc->state_var), &roc->state_value, var_get_width (roc->state_var))); - if ( positive ) + if (positive) { roc->pos++; roc->pos_weighted += weight; @@ -426,23 +423,21 @@ accumulate_counts (struct casereader *input, struct ccase *cpc; double prev_cp = SYSMIS; - for ( ; (cpc = casereader_read (input) ); case_unref (cpc)) + for (; (cpc = casereader_read (input)); case_unref (cpc)) { struct ccase *new_case; - const double cp = case_data_idx (cpc, ROC_CUTPOINT)->f; + const double cp = case_num_idx (cpc, ROC_CUTPOINT); assert (cp != SYSMIS); /* We don't want duplicates here */ - if ( cp == prev_cp ) + if (cp == prev_cp) continue; new_case = case_clone (cpc); - if ( pos_cond (result, cp)) - case_data_rw_idx (new_case, true_index)->f += weight; - else - case_data_rw_idx (new_case, false_index)->f += weight; + int index = pos_cond (result, cp) ? true_index : false_index; + *case_num_rw_idx (new_case, index) += weight; prev_cp = cp; @@ -504,14 +499,14 @@ process_group (const struct variable *var, struct casereader *reader, *cc = 0; - for ( ; (c1 = casereader_read (r1) ); case_unref (c1)) + for (; (c1 = casereader_read (r1)); case_unref (c1)) { struct ccase *new_case = case_create (proto); struct ccase *c2; struct casereader *r2 = casereader_clone (rclone); - const double weight1 = case_data_idx (c1, weight_idx)->f; - const double d1 = case_data (c1, var)->f; + const double weight1 = case_num_idx (c1, weight_idx); + const double d1 = case_num (c1, var); double n_eq = 0.0; double n_pred = 0.0; @@ -521,25 +516,25 @@ process_group (const struct variable *var, struct casereader *reader, *cc += weight1; - for ( ; (c2 = casereader_read (r2) ); case_unref (c2)) + for (; (c2 = casereader_read (r2)); case_unref (c2)) { - const double d2 = case_data (c2, var)->f; - const double weight2 = case_data_idx (c2, weight_idx)->f; + const double d2 = case_num (c2, var); + const double weight2 = case_num_idx (c2, weight_idx); - if ( d1 == d2 ) + if (d1 == d2) { n_eq += weight2; continue; } - else if ( pred (d2, d1)) + else if (pred (d2, d1)) { n_pred += weight2; } } - case_data_rw_idx (new_case, VALUE)->f = d1; - case_data_rw_idx (new_case, N_EQ)->f = n_eq; - case_data_rw_idx (new_case, N_PRED)->f = n_pred; + *case_num_rw_idx (new_case, VALUE) = d1; + *case_num_rw_idx (new_case, N_EQ) = n_eq; + *case_num_rw_idx (new_case, N_PRED) = n_pred; casewriter_write (wtr, new_case); @@ -625,11 +620,11 @@ append_cutpoint (struct casewriter *writer, double cutpoint) { struct ccase *cc = case_create (casewriter_get_proto (writer)); - case_data_rw_idx (cc, ROC_CUTPOINT)->f = cutpoint; - case_data_rw_idx (cc, ROC_TP)->f = 0; - case_data_rw_idx (cc, ROC_FN)->f = 0; - case_data_rw_idx (cc, ROC_TN)->f = 0; - case_data_rw_idx (cc, ROC_FP)->f = 0; + *case_num_rw_idx (cc, ROC_CUTPOINT) = cutpoint; + *case_num_rw_idx (cc, ROC_TP) = 0; + *case_num_rw_idx (cc, ROC_FN) = 0; + *case_num_rw_idx (cc, ROC_TN) = 0; + *case_num_rw_idx (cc, ROC_FP) = 0; casewriter_write (writer, cc); } @@ -679,15 +674,16 @@ prepare_cutpoints (struct cmd_roc *roc, struct roc_state *rs, struct casereader const union value *v = case_data (c, roc->vars[i]); const double result = v->f; - if ( mv_is_value_missing (var_get_missing_values (roc->vars[i]), v, roc->exclude)) + if (mv_is_value_missing (var_get_missing_values (roc->vars[i]), v) + & roc->exclude) continue; minimize (&rs[i].min, result); maximize (&rs[i].max, result); - if ( rs[i].prev_result != SYSMIS && rs[i].prev_result != result ) + if (rs[i].prev_result != SYSMIS && rs[i].prev_result != result) { - const double mean = (result + rs[i].prev_result ) / 2.0; + const double mean = (result + rs[i].prev_result) / 2.0; append_cutpoint (rs[i].cutpoint_wtr, mean); } @@ -712,7 +708,7 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) { int i; - struct roc_state *rs = xcalloc (roc->n_vars, sizeof *rs); + struct roc_state *rs = XCALLOC (roc->n_vars, struct roc_state); struct casereader *negatives = NULL; struct casereader *positives = NULL; @@ -777,7 +773,7 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) struct casereader *n_pos_reader = process_positive_group (var, pos, dict, &rs[i]); - if ( negatives == NULL) + if (negatives == NULL) { negatives = casewriter_make_reader (neg_wtr); } @@ -788,22 +784,22 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) /* Merge the n_pos and n_neg casereaders */ w = sort_create_writer (&up_ordering, n_proto); - for ( ; (cpos = casereader_read (n_pos_reader) ); case_unref (cpos)) + for (; (cpos = casereader_read (n_pos_reader)); case_unref (cpos)) { struct ccase *pos_case = case_create (n_proto); struct ccase *cneg; - const double jpos = case_data_idx (cpos, VALUE)->f; + const double jpos = case_num_idx (cpos, VALUE); while ((cneg = casereader_read (n_neg_reader))) { struct ccase *nc = case_create (n_proto); - const double jneg = case_data_idx (cneg, VALUE)->f; + const double jneg = case_num_idx (cneg, VALUE); - case_data_rw_idx (nc, VALUE)->f = jneg; - case_data_rw_idx (nc, N_POS_EQ)->f = 0; + *case_num_rw_idx (nc, VALUE) = jneg; + *case_num_rw_idx (nc, N_POS_EQ) = 0; - case_data_rw_idx (nc, N_POS_GT)->f = SYSMIS; + *case_num_rw_idx (nc, N_POS_GT) = SYSMIS; *case_data_rw_idx (nc, N_NEG_EQ) = *case_data_idx (cneg, N_EQ); *case_data_rw_idx (nc, N_NEG_LT) = *case_data_idx (cneg, N_PRED); @@ -811,15 +807,15 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) casewriter_write (w, nc); case_unref (cneg); - if ( jneg > jpos) + if (jneg > jpos) break; } - case_data_rw_idx (pos_case, VALUE)->f = jpos; + *case_num_rw_idx (pos_case, VALUE) = jpos; *case_data_rw_idx (pos_case, N_POS_EQ) = *case_data_idx (cpos, N_EQ); *case_data_rw_idx (pos_case, N_POS_GT) = *case_data_idx (cpos, N_PRED); - case_data_rw_idx (pos_case, N_NEG_EQ)->f = 0; - case_data_rw_idx (pos_case, N_NEG_LT)->f = SYSMIS; + *case_num_rw_idx (pos_case, N_NEG_EQ) = 0; + *case_num_rw_idx (pos_case, N_NEG_LT) = SYSMIS; casewriter_write (w, pos_case); } @@ -839,15 +835,15 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) double prev_pos_gt = rs[i].n1; w = sort_create_writer (&down_ordering, n_proto); - for ( ; (c = casereader_read (r) ); case_unref (c)) + for (; (c = casereader_read (r)); case_unref (c)) { - double n_pos_gt = case_data_idx (c, N_POS_GT)->f; + double n_pos_gt = case_num_idx (c, N_POS_GT); struct ccase *nc = case_clone (c); - if ( n_pos_gt == SYSMIS) + if (n_pos_gt == SYSMIS) { n_pos_gt = prev_pos_gt; - case_data_rw_idx (nc, N_POS_GT)->f = n_pos_gt; + *case_num_rw_idx (nc, N_POS_GT) = n_pos_gt; } casewriter_write (w, nc); @@ -864,15 +860,15 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) double prev_neg_lt = rs[i].n2; w = sort_create_writer (&up_ordering, n_proto); - for ( ; (c = casereader_read (r) ); case_unref (c)) + for (; (c = casereader_read (r)); case_unref (c)) { - double n_neg_lt = case_data_idx (c, N_NEG_LT)->f; + double n_neg_lt = case_num_idx (c, N_NEG_LT); struct ccase *nc = case_clone (c); - if ( n_neg_lt == SYSMIS) + if (n_neg_lt == SYSMIS) { n_neg_lt = prev_neg_lt; - case_data_rw_idx (nc, N_NEG_LT)->f = n_neg_lt; + *case_num_rw_idx (nc, N_NEG_LT) = n_neg_lt; } casewriter_write (w, nc); @@ -885,39 +881,39 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) { struct ccase *prev_case = NULL; - for ( ; (c = casereader_read (r) ); case_unref (c)) + for (; (c = casereader_read (r)); case_unref (c)) { struct ccase *next_case = casereader_peek (r, 0); - const double j = case_data_idx (c, VALUE)->f; - double n_pos_eq = case_data_idx (c, N_POS_EQ)->f; - double n_pos_gt = case_data_idx (c, N_POS_GT)->f; - double n_neg_eq = case_data_idx (c, N_NEG_EQ)->f; - double n_neg_lt = case_data_idx (c, N_NEG_LT)->f; + const double j = case_num_idx (c, VALUE); + double n_pos_eq = case_num_idx (c, N_POS_EQ); + double n_pos_gt = case_num_idx (c, N_POS_GT); + double n_neg_eq = case_num_idx (c, N_NEG_EQ); + double n_neg_lt = case_num_idx (c, N_NEG_LT); - if ( prev_case && j == case_data_idx (prev_case, VALUE)->f) + if (prev_case && j == case_num_idx (prev_case, VALUE)) { - if ( 0 == case_data_idx (c, N_POS_EQ)->f) + if (0 == case_num_idx (c, N_POS_EQ)) { - n_pos_eq = case_data_idx (prev_case, N_POS_EQ)->f; - n_pos_gt = case_data_idx (prev_case, N_POS_GT)->f; + n_pos_eq = case_num_idx (prev_case, N_POS_EQ); + n_pos_gt = case_num_idx (prev_case, N_POS_GT); } - if ( 0 == case_data_idx (c, N_NEG_EQ)->f) + if (0 == case_num_idx (c, N_NEG_EQ)) { - n_neg_eq = case_data_idx (prev_case, N_NEG_EQ)->f; - n_neg_lt = case_data_idx (prev_case, N_NEG_LT)->f; + n_neg_eq = case_num_idx (prev_case, N_NEG_EQ); + n_neg_lt = case_num_idx (prev_case, N_NEG_LT); } } - if ( NULL == next_case || j != case_data_idx (next_case, VALUE)->f) + if (NULL == next_case || j != case_num_idx (next_case, VALUE)) { rs[i].auc += n_pos_gt * n_neg_eq + (n_pos_eq * n_neg_eq) / 2.0; rs[i].q1hat += - n_neg_eq * ( pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0); + n_neg_eq * (pow2 (n_pos_gt) + n_pos_gt * n_pos_eq + pow2 (n_pos_eq) / 3.0); rs[i].q2hat += - n_pos_eq * ( pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0); + n_pos_eq * (pow2 (n_neg_lt) + n_neg_lt * n_neg_eq + pow2 (n_neg_eq) / 3.0); } @@ -929,13 +925,13 @@ do_roc (struct cmd_roc *roc, struct casereader *reader, struct dictionary *dict) case_unref (prev_case); rs[i].auc /= rs[i].n1 * rs[i].n2; - if ( roc->invert ) + if (roc->invert) rs[i].auc = 1 - rs[i].auc; - if ( roc->bi_neg_exp ) + if (roc->bi_neg_exp) { - rs[i].q1hat = rs[i].auc / ( 2 - rs[i].auc); - rs[i].q2hat = 2 * pow2 (rs[i].auc) / ( 1 + rs[i].auc); + rs[i].q1hat = rs[i].auc / (2 - rs[i].auc); + rs[i].q2hat = 2 * pow2 (rs[i].auc) / (1 + rs[i].auc); } else { @@ -987,14 +983,14 @@ show_auc (struct roc_state *rs, const struct cmd_roc *roc) table, PIVOT_AXIS_ROW, N_("Variable under test")); variables->root->show_label = true; - for (size_t i = 0 ; i < roc->n_vars ; ++i ) + for (size_t i = 0 ; i < roc->n_vars ; ++i) { int var_idx = pivot_category_create_leaf ( variables->root, pivot_value_new_variable (roc->vars[i])); pivot_table_put2 (table, 0, var_idx, pivot_value_new_number (rs[i].auc)); - if ( roc->print_se ) + if (roc->print_se) { double se = (rs[i].auc * (1 - rs[i].auc) + (rs[i].n1 - 1) * (rs[i].q1hat - pow2 (rs[i].auc)) @@ -1062,7 +1058,6 @@ show_coords (struct roc_state *rs, const struct cmd_roc *roc) { struct pivot_table *table = pivot_table_create ( N_("Coordinates of the Curve")); - table->omit_empty = true; pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), N_("Positive if greater than or equal to"), @@ -1089,11 +1084,18 @@ show_coords (struct roc_state *rs, const struct cmd_roc *roc) int coord_idx = 0; for (; (cc = casereader_read (r)) != NULL; case_unref (cc)) { - const double se = case_data_idx (cc, ROC_TP)->f / - (case_data_idx (cc, ROC_TP)->f + case_data_idx (cc, ROC_FN)->f); + const double se = case_num_idx (cc, ROC_TP) / + (case_num_idx (cc, ROC_TP) + case_num_idx (cc, ROC_FN)); - const double sp = case_data_idx (cc, ROC_TN)->f / - (case_data_idx (cc, ROC_TN)->f + case_data_idx (cc, ROC_FP)->f); + const double sp = case_num_idx (cc, ROC_TN) / + (case_num_idx (cc, ROC_TN) + case_num_idx (cc, ROC_FP)); + + if (coord_idx >= n_coords) + { + assert (coord_idx == n_coords); + pivot_category_create_leaf ( + coordinates->root, pivot_value_new_integer (++n_coords)); + } pivot_table_put3 ( table, 0, coord_idx, var_idx, @@ -1107,16 +1109,9 @@ show_coords (struct roc_state *rs, const struct cmd_roc *roc) coord_idx++; } - if (coord_idx > n_coords) - n_coords = coord_idx; - casereader_destroy (r); } - for (size_t i = 0; i < n_coords; i++) - pivot_category_create_leaf (coordinates->root, - pivot_value_new_integer (i + 1)); - pivot_table_submit (table); } @@ -1126,7 +1121,7 @@ output_roc (struct roc_state *rs, const struct cmd_roc *roc) { show_summary (roc); - if ( roc->curve ) + if (roc->curve) { struct roc_chart *rc; size_t i; @@ -1140,7 +1135,7 @@ output_roc (struct roc_state *rs, const struct cmd_roc *roc) show_auc (rs, roc); - if ( roc->print_coords ) + if (roc->print_coords) show_coords (rs, roc); }