From: John Darrington Date: Tue, 6 Nov 2012 19:50:33 +0000 (+0100) Subject: Categoricals: Provide separate functions for effects and dummy coding. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=12140b59f70771295af36404296219cba9f3f6bc;hp=372c46a3648de939ec3224cc94ad944aa39623ae;p=pspp Categoricals: Provide separate functions for effects and dummy coding. Until now, the function categoricals_get_code_for_case returned the "effects" coding (ie, a bipolar code). Some uses require "dummy" coding (a binary coding). This change provides seperate functions to return whatever is desired. Conflicts: src/language/stats/logistic.c --- diff --git a/src/math/categoricals.c b/src/math/categoricals.c index 239a3a833c..c088ac0073 100644 --- a/src/math/categoricals.c +++ b/src/math/categoricals.c @@ -34,8 +34,6 @@ #define CATEGORICALS_DEBUG 0 -#define EFFECTS_CODING 1 - struct value_node { struct hmap_node node; /* Node in hash map. */ @@ -597,7 +595,7 @@ categoricals_done (const struct categoricals *cat_) struct interaction_value *iv = iap->reverse_interaction_value_map[y]; for (x = iap->base_subscript_short; x < iap->base_subscript_short + df ;++x) { - const double bin = categoricals_get_code_for_case (cat, x, iv->ccase); \ + const double bin = categoricals_get_effects_code_for_case (cat, x, iv->ccase); iap->enc_sum [x - iap->base_subscript_short] += bin * iv->cc; } if (cat->payload && cat->payload->calculate) @@ -657,11 +655,13 @@ categoricals_get_sum_by_subscript (const struct categoricals *cat, int subscript return vp->enc_sum[subscript - vp->base_subscript_short]; } + /* Returns unity if the value in case C at SUBSCRIPT is equal to the category for that subscript */ -double +static double categoricals_get_code_for_case (const struct categoricals *cat, int subscript, - const struct ccase *c) + const struct ccase *c, + bool effects_coding) { const struct interaction *iact = categoricals_get_interaction_by_subscript (cat, subscript); @@ -694,13 +694,10 @@ categoricals_get_code_for_case (const struct categoricals *cat, int subscript, const int index = ((subscript - base_index) % iap->df_prod[v] ) / dfp; dfp = iap->df_prod [v]; -#if EFFECTS_CODING - if ( valn->index == df ) + if (effects_coding && valn->index == df ) bin = -1.0; - else -#endif - if ( valn->index != index ) - bin = 0; + else if ( valn->index != index ) + bin = 0; result *= bin; } @@ -709,6 +706,28 @@ categoricals_get_code_for_case (const struct categoricals *cat, int subscript, } +/* Returns unity if the value in case C at SUBSCRIPT is equal to the category + for that subscript */ +double +categoricals_get_dummy_code_for_case (const struct categoricals *cat, int subscript, + const struct ccase *c) +{ + return categoricals_get_code_for_case (cat, subscript, c, false); +} + +/* Returns unity if the value in case C at SUBSCRIPT is equal to the category + for that subscript. + Else if it is the last category, return -1. + Otherwise return 0. + */ +double +categoricals_get_effects_code_for_case (const struct categoricals *cat, int subscript, + const struct ccase *c) +{ + return categoricals_get_code_for_case (cat, subscript, c, true); +} + + size_t categoricals_get_n_variables (const struct categoricals *cat) { diff --git a/src/math/categoricals.h b/src/math/categoricals.h index 62ff10c100..450a202487 100644 --- a/src/math/categoricals.h +++ b/src/math/categoricals.h @@ -82,7 +82,20 @@ const struct interaction *categoricals_get_interaction_by_subscript (const struc double categoricals_get_sum_by_subscript (const struct categoricals *cat, int subscript); -double categoricals_get_code_for_case (const struct categoricals *cat, int subscript, const struct ccase *c); +/* Returns unity if the value in case C at SUBSCRIPT is equal to the category + for that subscript */ +double +categoricals_get_dummy_code_for_case (const struct categoricals *cat, int subscript, + const struct ccase *c); + +/* Returns unity if the value in case C at SUBSCRIPT is equal to the category + for that subscript. + Else if it is the last category, return -1. + Otherwise return 0. + */ +double +categoricals_get_effects_code_for_case (const struct categoricals *cat, int subscript, + const struct ccase *c); /* These use the long map. Useful for descriptive statistics. */ diff --git a/src/math/covariance.c b/src/math/covariance.c index bc5382c0c1..2500f90423 100644 --- a/src/math/covariance.c +++ b/src/math/covariance.c @@ -268,7 +268,7 @@ get_val (const struct covariance *cov, int i, const struct ccase *c) return val->f; } - return categoricals_get_code_for_case (cov->categoricals, i - cov->n_vars, c); + return categoricals_get_effects_code_for_case (cov->categoricals, i - cov->n_vars, c); } #if 0