X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fcasereader-translator.c;h=a2963980253790dd5f398cf20fd44420350c26d3;hb=ed7039fafdba4e2f72ad6e4192197fc938c6d2b0;hp=b857b5b880e2d4b9cd71c1a5c49e008cf93a5e9f;hpb=cc6a060446e71cace2d828a864c85702e04aba7c;p=pspp-builds.git diff --git a/src/data/casereader-translator.c b/src/data/casereader-translator.c index b857b5b8..a2963980 100644 --- a/src/data/casereader-translator.c +++ b/src/data/casereader-translator.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (C) 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,11 +15,11 @@ along with this program. If not, see . */ #include - +#include #include - #include +#include #include #include @@ -33,7 +33,7 @@ struct casereader_translator { struct casereader *subreader; /* Source of input cases. */ - void (*translate) (struct ccase *input, struct ccase *output, void *aux); + struct ccase *(*translate) (struct ccase *input, void *aux); bool (*destroy) (void *aux); void *aux; }; @@ -42,9 +42,11 @@ static const struct casereader_class casereader_translator_class; /* Creates and returns a new casereader whose cases are produced by reading from SUBREADER and passing through TRANSLATE, which - must create case OUTPUT, with OUTPUT_VALUE_CNT values, and - populate it based on INPUT and auxiliary data AUX. TRANSLATE - must also destroy INPUT. + must return the translated case, and populate it based on + INPUT and auxiliary data AUX. TRANSLATE must destroy its + input case. + + The cases returned by TRANSLATE must match OUTPUT_PROTO. When the translating casereader is destroyed, DESTROY will be called to allow any state maintained by TRANSLATE to be freed. @@ -54,10 +56,9 @@ static const struct casereader_class casereader_translator_class; when the translating casereader is destroyed. */ struct casereader * casereader_create_translator (struct casereader *subreader, - size_t output_value_cnt, - void (*translate) (struct ccase *input, - struct ccase *output, - void *aux), + const struct caseproto *output_proto, + struct ccase *(*translate) (struct ccase *input, + void *aux), bool (*destroy) (void *aux), void *aux) { @@ -68,7 +69,7 @@ casereader_create_translator (struct casereader *subreader, ct->destroy = destroy; ct->aux = aux; reader = casereader_create_sequential ( - NULL, output_value_cnt, casereader_get_case_cnt (ct->subreader), + NULL, output_proto, casereader_get_case_cnt (ct->subreader), &casereader_translator_class, ct); taint_propagate (casereader_get_taint (ct->subreader), casereader_get_taint (reader)); @@ -76,20 +77,15 @@ casereader_create_translator (struct casereader *subreader, } /* Internal read function for translating casereader. */ -static bool +static struct ccase * casereader_translator_read (struct casereader *reader UNUSED, - void *ct_, struct ccase *c) + void *ct_) { struct casereader_translator *ct = ct_; - struct ccase tmp; - - if (casereader_read (ct->subreader, &tmp)) - { - ct->translate (&tmp, c, ct->aux); - return true; - } - else - return false; + struct ccase *tmp = casereader_read (ct->subreader); + if (tmp) + tmp = ct->translate (tmp, ct->aux); + return tmp; } /* Internal destroy function for translating casereader. */ @@ -110,3 +106,379 @@ static const struct casereader_class casereader_translator_class = NULL, NULL, }; + + + +struct casereader_append_numeric +{ + struct caseproto *proto; + casenumber n; + new_value_func *func; + void *aux; + void (*destroy) (void *aux); +}; + +static bool can_destroy (void *can_); + +static struct ccase *can_translate (struct ccase *, void *can_); + +/* Creates and returns a new casereader whose cases are produced + by reading from SUBREADER and appending an additional value, + generated by FUNC. AUX is an optional parameter which + gets passed to FUNC. FUNC will also receive N as it, which is + the ordinal number of the case in the reader. DESTROY is an + optional parameter used to destroy AUX. + + After this function is called, SUBREADER must not ever again + be referenced directly. It will be destroyed automatically + when the translating casereader is destroyed. */ +struct casereader * +casereader_create_append_numeric (struct casereader *subreader, + new_value_func func, void *aux, + void (*destroy) (void *aux)) +{ + struct casereader_append_numeric *can = xmalloc (sizeof *can); + can->proto = caseproto_ref (casereader_get_proto (subreader)); + can->proto = caseproto_add_width (can->proto, 0); + can->n = 0; + can->aux = aux; + can->func = func; + can->destroy = destroy; + return casereader_create_translator (subreader, can->proto, + can_translate, can_destroy, can); +} + + +static struct ccase * +can_translate (struct ccase *c, void *can_) +{ + struct casereader_append_numeric *can = can_; + double new_value = can->func (c, can->n++, can->aux); + c = case_unshare_and_resize (c, can->proto); + case_data_rw_idx (c, caseproto_get_n_widths (can->proto) - 1)->f = new_value; + return c; +} + +static bool +can_destroy (void *can_) +{ + struct casereader_append_numeric *can = can_; + if (can->destroy) + can->destroy (can->aux); + caseproto_unref (can->proto); + free (can); + return true; +} + + + +struct arithmetic_sequence +{ + double first; + double increment; +}; + +static double +next_arithmetic (const struct ccase *c UNUSED, + casenumber n, + void *aux) +{ + struct arithmetic_sequence *as = aux; + return n * as->increment + as->first; +} + +/* Creates and returns a new casereader whose cases are produced + by reading from SUBREADER and appending an additional value, + which takes the value FIRST in the first case, FIRST + + INCREMENT in the second case, FIRST + INCREMENT * 2 in the + third case, and so on. + + After this function is called, SUBREADER must not ever again + be referenced directly. It will be destroyed automatically + when the translating casereader is destroyed. */ +struct casereader * +casereader_create_arithmetic_sequence (struct casereader *subreader, + double first, double increment) +{ + struct arithmetic_sequence *as = xzalloc (sizeof *as); + as->first = first; + as->increment = increment; + return casereader_create_append_numeric (subreader, next_arithmetic, + as, free); +} + + + + +struct casereader_append_rank +{ + struct casereader *clone; + casenumber n; + const struct variable *var; + const struct variable *weight; + struct caseproto *proto; + casenumber n_common; + double mean_rank; + double cc; + distinct_func *distinct; + void *aux; + enum rank_error *err; + double prev_value; +}; + +static bool car_destroy (void *car_); + +static struct ccase *car_translate (struct ccase *input, void *car_); + +/* Creates and returns a new casereader whose cases are produced + by reading from SUBREADER and appending an additional value, + which is the rank of the observation. W is the weight variable + of the dictionary containing V, or NULL if there is no weight + variable. + + The following preconditions must be met: + + 1. SUBREADER must be sorted on V. + + 2. The weight variables, must be non-negative. + + If either of these preconditions are not satisfied, then the rank + variables may not be correct. In this case, if ERR is non-null, + it will be set according to the erroneous conditions encountered. + + If DISTINCT_CALLBACK is non-null, then it will be called exactly + once for every case containing a distinct value of V. AUX is + an auxilliary pointer passed to DISTINCT_CALLBACK. + + After this function is called, SUBREADER must not ever again + be referenced directly. It will be destroyed automatically + when the translating casereader is destroyed. */ +struct casereader * +casereader_create_append_rank (struct casereader *subreader, + const struct variable *v, + const struct variable *w, + enum rank_error *err, + distinct_func *distinct_callback, + void *aux + ) +{ + struct casereader_append_rank *car = xmalloc (sizeof *car); + car->proto = caseproto_ref (casereader_get_proto (subreader)); + car->proto = caseproto_add_width (car->proto, 0); + car->weight = w; + car->var = v; + car->n = 0; + car->n_common = 1; + car->cc = 0.0; + car->clone = casereader_clone (subreader); + car->distinct = distinct_callback; + car->aux = aux; + car->err = err; + car->prev_value = SYSMIS; + + return casereader_create_translator (subreader, car->proto, + car_translate, car_destroy, car); +} + + +static bool +car_destroy (void *car_) +{ + struct casereader_append_rank *car = car_; + casereader_destroy (car->clone); + caseproto_unref (car->proto); + free (car); + return true; +} + +static struct ccase * +car_translate (struct ccase *input, void *car_) +{ + struct casereader_append_rank *car = car_; + + const double value = case_data (input, car->var)->f; + + if ( car->prev_value != SYSMIS) + { + if (car->err && value < car->prev_value) + *car->err |= RANK_ERR_UNSORTED; + } + + if ( car->n_common == 1) + { + double vxx = SYSMIS; + casenumber k = 0; + double weight = 1.0; + if (car->weight) + { + weight = case_data (input, car->weight)->f; + if ( car->err && weight < 0 ) + *car->err |= RANK_ERR_NEGATIVE_WEIGHT; + } + + do + { + struct ccase *c = casereader_peek (car->clone, car->n + ++k); + if (c == NULL) + break; + vxx = case_data (c, car->var)->f; + + if ( vxx == value) + { + if (car->weight) + { + double w = case_data (c, car->weight)->f; + + if ( car->err && w < 0 ) + *car->err |= RANK_ERR_NEGATIVE_WEIGHT; + + weight += w; + } + else + weight += 1.0; + car->n_common++; + } + case_unref (c); + } + while (vxx == value); + car->mean_rank = car->cc + (weight + 1) / 2.0; + car->cc += weight; + + if (car->distinct) + car->distinct (value, car->n_common, weight, car->aux); + } + else + car->n_common--; + + car->n++; + + input = case_unshare_and_resize (input, car->proto); + case_data_rw_idx (input, caseproto_get_n_widths (car->proto) - 1)->f + = car->mean_rank; + car->prev_value = value; + return input; +} + + + + +struct consolidator +{ + const struct variable *key; + const struct variable *weight; + double cc; + double prev_cc; + + casenumber n; + struct casereader *clone; + struct caseproto *proto; + int direction; +}; + +static bool +uniquify (const struct ccase *c, void *aux) +{ + struct consolidator *cdr = aux; + const union value *current_value = case_data (c, cdr->key); + const int key_width = var_get_width (cdr->key); + const double weight = cdr->weight ? case_data (c, cdr->weight)->f : 1.0; + const struct ccase *next_case = casereader_peek (cdr->clone, cdr->n + 1); + int dir = 0; + + cdr->n ++; + cdr->cc += weight; + + if ( NULL == next_case) + goto end; + + dir = value_compare_3way (case_data (next_case, cdr->key), + current_value, key_width); + if ( dir != 0 ) + { + /* Insist that the data are sorted */ + assert (cdr->direction == 0 || dir == cdr->direction); + cdr->direction = dir; + goto end; + } + + return false; + + end: + cdr->prev_cc = cdr->cc; + cdr->cc = 0; + return true; +} + + + +static struct ccase * +consolodate_weight (struct ccase *input, void *aux) +{ + struct consolidator *cdr = aux; + struct ccase *c; + + c = case_unshare_and_resize (input, cdr->proto); + + if (cdr->weight) + case_data_rw (c, cdr->weight)->f = cdr->prev_cc; + else + case_data_rw_idx (c, caseproto_get_n_widths (cdr->proto) - 1)->f = cdr->prev_cc; + + return c; +} + + +static bool +uniquify_destroy (void *aux) +{ + struct consolidator *cdr = aux; + + casereader_destroy (cdr->clone); + free (cdr); + + return true; +} + + + +/* Returns a new casereader which is based upon INPUT, but which contains a maximum + of one case for each distinct value of KEY. + If WEIGHT is non-null, then the new casereader's values for this variable + will be the sum of all values matching KEY. + IF WEIGHT is null, then the new casereader will have an additional numeric + value appended, which will contain the total number of cases containing + KEY. + INPUT must be sorted on KEY +*/ +struct casereader * +casereader_create_distinct (struct casereader *input, + const struct variable *key, + const struct variable *weight) +{ + struct casereader *u ; + struct casereader *ud ; + const struct caseproto *output_proto = casereader_get_proto (input); + + struct consolidator *cdr = xmalloc (sizeof (*cdr)); + cdr->n = 0; + cdr->key = key; + cdr->weight = weight; + cdr->cc = 0; + cdr->clone = casereader_clone (input); + cdr->direction = 0; + + if ( NULL == cdr->weight ) + output_proto = caseproto_add_width (output_proto, 0); + + cdr->proto = output_proto; + + u = casereader_create_filter_func (input, uniquify, + NULL, cdr, NULL); + + ud = casereader_create_translator (u, + output_proto, + consolodate_weight, + uniquify_destroy, + cdr); +} +