X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fcasereader-translator.c;h=5c8b9f4209929f335c702f47e6f96312d8f1fabe;hb=refs%2Fheads%2Fdelete-variables;hp=feffa15c4510ba97e57cd95705cd9afd290c1e04;hpb=b40baf410822471fbdeeec553693619d60d7c7b6;p=pspp diff --git a/src/data/casereader-translator.c b/src/data/casereader-translator.c index feffa15c45..5c8b9f4209 100644 --- a/src/data/casereader-translator.c +++ b/src/data/casereader-translator.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009 Free Software Foundation, Inc. + Copyright (C) 2007, 2009, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,15 +15,16 @@ along with this program. If not, see . */ #include -#include -#include + #include -#include -#include -#include +#include "data/casereader-provider.h" +#include "data/casereader.h" +#include "data/val-type.h" +#include "data/variable.h" +#include "libpspp/taint.h" -#include "xalloc.h" +#include "gl/xalloc.h" /* Casereader that applies a user-supplied function to translate each case into another in an arbitrary fashion. */ @@ -32,9 +33,7 @@ struct casereader_translator { struct casereader *subreader; /* Source of input cases. */ - - struct ccase *(*translate) (struct ccase *input, void *aux); - bool (*destroy) (void *aux); + const struct casereader_translator_class *class; void *aux; }; @@ -46,6 +45,11 @@ static const struct casereader_class casereader_translator_class; INPUT and auxiliary data AUX. TRANSLATE must destroy its input case. + TRANSLATE may be stateful, that is, the output for a given + case may depend on previous cases. If TRANSLATE is stateless, + then you may want to use casereader_translate_stateless + instead, since it sometimes performs better. + The cases returned by TRANSLATE must match OUTPUT_PROTO. When the translating casereader is destroyed, DESTROY will be @@ -57,19 +61,18 @@ static const struct casereader_class casereader_translator_class; struct casereader * casereader_create_translator (struct casereader *subreader, const struct caseproto *output_proto, - struct ccase *(*translate) (struct ccase *input, - void *aux), - bool (*destroy) (void *aux), + const struct casereader_translator_class *class, void *aux) { struct casereader_translator *ct = xmalloc (sizeof *ct); - struct casereader *reader; - ct->subreader = casereader_rename (subreader); - ct->translate = translate; - ct->destroy = destroy; - ct->aux = aux; - reader = casereader_create_sequential ( - NULL, output_proto, casereader_get_case_cnt (ct->subreader), + *ct = (struct casereader_translator) { + .subreader = casereader_rename (subreader), + .class = class, + .aux = aux, + }; + + struct casereader *reader = casereader_create_sequential ( + NULL, output_proto, casereader_get_n_cases (ct->subreader), &casereader_translator_class, ct); taint_propagate (casereader_get_taint (ct->subreader), casereader_get_taint (reader)); @@ -84,7 +87,7 @@ casereader_translator_read (struct casereader *reader UNUSED, struct casereader_translator *ct = ct_; struct ccase *tmp = casereader_read (ct->subreader); if (tmp) - tmp = ct->translate (tmp, ct->aux); + tmp = ct->class->translate (tmp, ct->aux); return tmp; } @@ -94,7 +97,7 @@ casereader_translator_destroy (struct casereader *reader UNUSED, void *ct_) { struct casereader_translator *ct = ct_; casereader_destroy (ct->subreader); - ct->destroy (ct->aux); + ct->class->destroy (ct->aux); free (ct); } @@ -106,7 +109,102 @@ static const struct casereader_class casereader_translator_class = NULL, NULL, }; + +/* Casereader that applies a user-supplied function to translate + each case into another in a stateless fashion. */ + +/* A statelessly translating casereader. */ +struct casereader_stateless_translator + { + struct casereader *subreader; /* Source of input cases. */ + + casenumber case_offset; + const struct casereader_translator_class *class; + void *aux; + }; +static const struct casereader_random_class +casereader_stateless_translator_class; + +/* Creates and returns a new casereader whose cases are produced by reading + from SUBREADER and passing through the TRANSLATE function. TRANSLATE must + takes ownership of its input case and returns a translated case, populating + the translated case based on INPUT and auxiliary data AUX. + + TRANSLATE must be stateless, that is, the output for a given case must not + depend on previous cases. This is because cases may be retrieved in + arbitrary order, and some cases may be retrieved multiple times, and some + cases may be skipped and never retrieved at all. If TRANSLATE is stateful, + use casereader_create_translator instead. + + The cases returned by TRANSLATE must match OUTPUT_PROTO. + + When the stateless translating casereader is destroyed, DESTROY will be + called to allow any auxiliary data maintained by TRANSLATE to be freed. + + After this function is called, SUBREADER must not ever again be referenced + directly. It will be destroyed automatically when the translating + casereader is destroyed. */ +struct casereader * +casereader_translate_stateless ( + struct casereader *subreader, + const struct caseproto *output_proto, + const struct casereader_translator_class *class, + void *aux) +{ + struct casereader_stateless_translator *cst = xmalloc (sizeof *cst); + *cst = (struct casereader_stateless_translator) { + .subreader = casereader_rename (subreader), + .class = class, + .aux = aux, + }; + struct casereader *reader = casereader_create_random ( + output_proto, casereader_get_n_cases (cst->subreader), + &casereader_stateless_translator_class, cst); + taint_propagate (casereader_get_taint (cst->subreader), + casereader_get_taint (reader)); + return reader; +} + +/* Internal read function for stateless translating casereader. */ +static struct ccase * +casereader_stateless_translator_read (struct casereader *reader UNUSED, + void *cst_, casenumber idx) +{ + struct casereader_stateless_translator *cst = cst_; + struct ccase *tmp = casereader_peek (cst->subreader, idx); + if (tmp != NULL) + tmp = cst->class->translate (tmp, cst->aux); + return tmp; +} + +/* Internal destroy function for translating casereader. */ +static void +casereader_stateless_translator_destroy (struct casereader *reader UNUSED, + void *cst_) +{ + struct casereader_stateless_translator *cst = cst_; + casereader_destroy (cst->subreader); + cst->class->destroy (cst->aux); + free (cst); +} + +static void +casereader_stateless_translator_advance (struct casereader *reader UNUSED, + void *cst_, casenumber n) +{ + struct casereader_stateless_translator *cst = cst_; + cst->case_offset += casereader_advance (cst->subreader, n); +} + +/* Casereader class for stateless translating casereader. */ +static const struct casereader_random_class +casereader_stateless_translator_class = + { + casereader_stateless_translator_read, + casereader_stateless_translator_destroy, + casereader_stateless_translator_advance, + }; struct casereader_append_numeric @@ -144,8 +242,11 @@ casereader_create_append_numeric (struct casereader *subreader, can->aux = aux; can->func = func; can->destroy = destroy; - return casereader_create_translator (subreader, can->proto, - can_translate, can_destroy, can); + + static const struct casereader_translator_class class = { + can_translate, can_destroy, + }; + return casereader_create_translator (subreader, can->proto, &class, can); } @@ -155,7 +256,7 @@ can_translate (struct ccase *c, void *can_) struct casereader_append_numeric *can = can_; double new_value = can->func (c, can->n++, can->aux); c = case_unshare_and_resize (c, can->proto); - case_data_rw_idx (c, caseproto_get_n_widths (can->proto) - 1)->f = new_value; + *case_num_rw_idx (c, caseproto_get_n_widths (can->proto) - 1) = new_value; return c; } @@ -200,7 +301,7 @@ struct casereader * casereader_create_arithmetic_sequence (struct casereader *subreader, double first, double increment) { - struct arithmetic_sequence *as = xzalloc (sizeof *as); + struct arithmetic_sequence *as = XZALLOC (struct arithmetic_sequence); as->first = first; as->increment = increment; return casereader_create_append_numeric (subreader, next_arithmetic, @@ -248,7 +349,7 @@ static struct ccase *car_translate (struct ccase *input, void *car_); If DISTINCT_CALLBACK is non-null, then it will be called exactly once for every case containing a distinct value of V. AUX is - an auxilliary pointer passed to DISTINCT_CALLBACK. + an auxiliary pointer passed to DISTINCT_CALLBACK. After this function is called, SUBREADER must not ever again be referenced directly. It will be destroyed automatically @@ -260,7 +361,7 @@ casereader_create_append_rank (struct casereader *subreader, enum rank_error *err, distinct_func *distinct_callback, void *aux - ) + ) { struct casereader_append_rank *car = xmalloc (sizeof *car); car->proto = caseproto_ref (casereader_get_proto (subreader)); @@ -276,8 +377,10 @@ casereader_create_append_rank (struct casereader *subreader, car->err = err; car->prev_value = SYSMIS; - return casereader_create_translator (subreader, car->proto, - car_translate, car_destroy, car); + static const struct casereader_translator_class class = { + car_translate, car_destroy + }; + return casereader_create_translator (subreader, car->proto, &class, car); } @@ -296,23 +399,23 @@ car_translate (struct ccase *input, void *car_) { struct casereader_append_rank *car = car_; - const double value = case_data (input, car->var)->f; + const double value = case_num (input, car->var); - if ( car->prev_value != SYSMIS) + if (car->prev_value != SYSMIS) { if (car->err && value < car->prev_value) *car->err |= RANK_ERR_UNSORTED; } - if ( car->n_common == 1) + if (car->n_common == 1) { double vxx = SYSMIS; casenumber k = 0; double weight = 1.0; if (car->weight) { - weight = case_data (input, car->weight)->f; - if ( car->err && weight < 0 ) + weight = case_num (input, car->weight); + if (car->err && weight < 0) *car->err |= RANK_ERR_NEGATIVE_WEIGHT; } @@ -321,15 +424,15 @@ car_translate (struct ccase *input, void *car_) struct ccase *c = casereader_peek (car->clone, car->n + ++k); if (c == NULL) break; - vxx = case_data (c, car->var)->f; + vxx = case_num (c, car->var); - if ( vxx == value) + if (vxx == value) { if (car->weight) { - double w = case_data (c, car->weight)->f; + double w = case_num (c, car->weight); - if ( car->err && w < 0 ) + if (car->err && w < 0) *car->err |= RANK_ERR_NEGATIVE_WEIGHT; weight += w; @@ -353,7 +456,7 @@ car_translate (struct ccase *input, void *car_) car->n++; input = case_unshare_and_resize (input, car->proto); - case_data_rw_idx (input, caseproto_get_n_widths (car->proto) - 1)->f + *case_num_rw_idx (input, caseproto_get_n_widths (car->proto) - 1) = car->mean_rank; car->prev_value = value; return input; @@ -381,26 +484,32 @@ uniquify (const struct ccase *c, void *aux) struct consolidator *cdr = aux; const union value *current_value = case_data (c, cdr->key); const int key_width = var_get_width (cdr->key); - const double weight = cdr->weight ? case_data (c, cdr->weight)->f : 1.0; - const struct ccase *next_case = casereader_peek (cdr->clone, cdr->n + 1); + const double weight = cdr->weight ? case_num (c, cdr->weight) : 1.0; + struct ccase *next_case = casereader_peek (cdr->clone, cdr->n + 1); int dir = 0; cdr->n ++; cdr->cc += weight; - if ( NULL == next_case) + if (NULL == next_case) goto end; - + dir = value_compare_3way (case_data (next_case, cdr->key), current_value, key_width); - if ( dir != 0 ) + if (dir > 0) + dir = 1; + if (dir < 0) + dir = -1; + + case_unref (next_case); + if (dir != 0) { /* Insist that the data are sorted */ assert (cdr->direction == 0 || dir == cdr->direction); cdr->direction = dir; goto end; } - + return false; end: @@ -420,12 +529,12 @@ consolodate_weight (struct ccase *input, void *aux) if (cdr->weight) { c = case_unshare (input); - case_data_rw (c, cdr->weight)->f = cdr->prev_cc; + *case_num_rw (c, cdr->weight) = cdr->prev_cc; } else { c = case_unshare_and_resize (input, cdr->proto); - case_data_rw_idx (c, caseproto_get_n_widths (cdr->proto) - 1)->f = cdr->prev_cc; + *case_num_rw_idx (c, caseproto_get_n_widths (cdr->proto) - 1) = cdr->prev_cc; } return c; @@ -446,7 +555,7 @@ uniquify_destroy (void *aux) -/* Returns a new casereader which is based upon INPUT, but which contains a maximum +/* Returns a new casereader which is based upon INPUT, but which contains a maximum of one case for each distinct value of KEY. If WEIGHT is non-null, then the new casereader's values for this variable will be the sum of all values matching KEY. @@ -461,7 +570,6 @@ casereader_create_distinct (struct casereader *input, const struct variable *weight) { struct casereader *u ; - struct casereader *ud ; struct caseproto *output_proto = caseproto_ref (casereader_get_proto (input)); struct consolidator *cdr = xmalloc (sizeof (*cdr)); @@ -472,7 +580,7 @@ casereader_create_distinct (struct casereader *input, cdr->clone = casereader_clone (input); cdr->direction = 0; - if ( NULL == cdr->weight ) + if (NULL == cdr->weight) output_proto = caseproto_add_width (output_proto, 0); cdr->proto = output_proto; @@ -480,12 +588,9 @@ casereader_create_distinct (struct casereader *input, u = casereader_create_filter_func (input, uniquify, NULL, cdr, NULL); - ud = casereader_create_translator (u, - output_proto, - consolodate_weight, - uniquify_destroy, - cdr); - - return ud; + static const struct casereader_translator_class class = { + consolodate_weight, uniquify_destroy, + }; + return casereader_create_translator (u, output_proto, &class, cdr); }