X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fautorecode.c;h=887b12230d48222f805a97c37de6298aa15f7971;hb=b5c82cc9aabe7e641011130240ae1b2e84348e23;hp=89f546f2da8cf81e27fcb2f13a14cdc470834e81;hpb=505d1c592469ea99da7723c2770f13f5dc965046;p=pspp-builds.git diff --git a/src/language/stats/autorecode.c b/src/language/stats/autorecode.c index 89f546f2..887b1223 100644 --- a/src/language/stats/autorecode.c +++ b/src/language/stats/autorecode.c @@ -1,56 +1,62 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include #include #include +#include #include #include #include #include #include #include -#include +#include #include #include #include -#include #include #include +#include "xalloc.h" + #include "gettext.h" #define _(msgid) gettext (msgid) /* FIXME: Implement PRINT subcommand. */ +/* An AUTORECODE variable's original value. */ +union arc_value + { + double f; /* Numeric. */ + char *c; /* Short or long string. */ + }; + /* Explains how to recode one value. `from' must be first element. */ struct arc_item { - union value from; /* Original value. */ + union arc_value from; /* Original value. */ double to; /* Recoded value. */ }; /* Explains how to recode an AUTORECODE variable. */ struct arc_spec { - struct variable *src; /* Source variable. */ + const struct variable *src; /* Source variable. */ struct variable *dest; /* Target variable. */ struct hsh_table *items; /* Hash table of `freq's. */ }; @@ -64,19 +70,19 @@ struct autorecode_trns }; /* Descending or ascending sort order. */ -enum direction +enum direction { ASCENDING, DESCENDING }; /* AUTORECODE data. */ -struct autorecode_pgm +struct autorecode_pgm { - struct variable **src_vars; /* Source variables. */ + const struct variable **src_vars; /* Source variables. */ char **dst_names; /* Target variable names. */ struct variable **dst_vars; /* Target variables. */ - struct hsh_table **src_values; /* `union value's of source vars. */ + struct hsh_table **src_values; /* `union arc_value's of source vars. */ size_t var_cnt; /* Number of variables. */ struct pool *src_values_pool; /* Pool used by src_values. */ enum direction direction; /* Sort order. */ @@ -85,18 +91,19 @@ struct autorecode_pgm static trns_proc_func autorecode_trns_proc; static trns_free_func autorecode_trns_free; -static bool autorecode_proc_func (const struct ccase *, void *); static hsh_compare_func compare_alpha_value, compare_numeric_value; static hsh_hash_func hash_alpha_value, hash_numeric_value; -static void recode (const struct autorecode_pgm *); +static void recode (struct dataset *, const struct autorecode_pgm *); static void arc_free (struct autorecode_pgm *); /* Performs the AUTORECODE procedure. */ int -cmd_autorecode (void) +cmd_autorecode (struct lexer *lexer, struct dataset *ds) { struct autorecode_pgm arc; + struct casereader *input; + struct ccase *c; size_t dst_cnt; size_t i; bool ok; @@ -111,23 +118,24 @@ cmd_autorecode (void) arc.print = 0; dst_cnt = 0; - lex_match_id ("VARIABLES"); - lex_match ('='); - if (!parse_variables (default_dict, &arc.src_vars, &arc.var_cnt, + lex_match_id (lexer, "VARIABLES"); + lex_match (lexer, '='); + if (!parse_variables_const (lexer, dataset_dict (ds), &arc.src_vars, + &arc.var_cnt, PV_NO_DUPLICATE)) goto lossage; - if (!lex_force_match_id ("INTO")) + if (!lex_force_match_id (lexer, "INTO")) goto lossage; - lex_match ('='); - if (!parse_DATA_LIST_vars (&arc.dst_names, &dst_cnt, PV_NONE)) + lex_match (lexer, '='); + if (!parse_DATA_LIST_vars (lexer, &arc.dst_names, &dst_cnt, PV_NONE)) goto lossage; if (dst_cnt != arc.var_cnt) { size_t i; - msg (SE, _("Source variable count (%u) does not match " - "target variable count (%u)."), - (unsigned) arc.var_cnt, (unsigned) dst_cnt); + msg (SE, _("Source variable count (%zu) does not match " + "target variable count (%zu)."), + arc.var_cnt, dst_cnt); for (i = 0; i < dst_cnt; i++) free (arc.dst_names[i]); @@ -136,14 +144,14 @@ cmd_autorecode (void) goto lossage; } - while (lex_match ('/')) - if (lex_match_id ("DESCENDING")) + while (lex_match (lexer, '/')) + if (lex_match_id (lexer, "DESCENDING")) arc.direction = DESCENDING; - else if (lex_match_id ("PRINT")) + else if (lex_match_id (lexer, "PRINT")) arc.print = 1; - if (token != '.') + if (lex_token (lexer) != '.') { - lex_error (_("expecting end of command")); + lex_error (lexer, _("expecting end of command")); goto lossage; } @@ -151,7 +159,7 @@ cmd_autorecode (void) { int j; - if (dict_lookup_var (default_dict, arc.dst_names[i]) != NULL) + if (dict_lookup_var (dataset_dict (ds), arc.dst_names[i]) != NULL) { msg (SE, _("Target variable %s duplicates existing variable %s."), arc.dst_names[i], arc.dst_names[i]); @@ -170,20 +178,47 @@ cmd_autorecode (void) arc.dst_vars = xnmalloc (arc.var_cnt, sizeof *arc.dst_vars); arc.src_values = xnmalloc (arc.var_cnt, sizeof *arc.src_values); for (i = 0; i < dst_cnt; i++) - if (arc.src_vars[i]->type == ALPHA) + { + /* FIXME: consolodate this hsh_create */ + if (var_is_alpha (arc.src_vars[i])) arc.src_values[i] = hsh_create (10, compare_alpha_value, hash_alpha_value, NULL, arc.src_vars[i]); else arc.src_values[i] = hsh_create (10, compare_numeric_value, hash_numeric_value, NULL, NULL); - - ok = procedure (autorecode_proc_func, &arc); + } + + input = proc_open (ds); + for (; (c = casereader_read (input)) != NULL; case_unref (c)) + for (i = 0; i < arc.var_cnt; i++) + { + union arc_value v, *vp, **vpp; + + if (var_is_numeric (arc.src_vars[i])) + v.f = case_num (c, arc.src_vars[i]); + else + v.c = (char *) case_str (c, arc.src_vars[i]); + + vpp = (union arc_value **) hsh_probe (arc.src_values[i], &v); + if (*vpp == NULL) + { + vp = pool_alloc (arc.src_values_pool, sizeof *vp); + if (var_is_numeric (arc.src_vars[i])) + vp->f = v.f; + else + vp->c = pool_clone (arc.src_values_pool, + v.c, var_get_width (arc.src_vars[i])); + *vpp = vp; + } + } + ok = casereader_destroy (input); + ok = proc_commit (ds) && ok; for (i = 0; i < arc.var_cnt; i++) - arc.dst_vars[i] = dict_create_var_assert (default_dict, + arc.dst_vars[i] = dict_create_var_assert (dataset_dict (ds), arc.dst_names[i], 0); - recode (&arc); + recode (ds, &arc); arc_free (&arc); return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; @@ -193,19 +228,19 @@ lossage: } static void -arc_free (struct autorecode_pgm *arc) +arc_free (struct autorecode_pgm *arc) { free (arc->src_vars); - if (arc->dst_names != NULL) + if (arc->dst_names != NULL) { size_t i; - + for (i = 0; i < arc->var_cnt; i++) free (arc->dst_names[i]); free (arc->dst_names); } free (arc->dst_vars); - if (arc->src_values != NULL) + if (arc->src_values != NULL) { size_t i; @@ -220,7 +255,7 @@ arc_free (struct autorecode_pgm *arc) /* AUTORECODE transformation. */ static void -recode (const struct autorecode_pgm *arc) +recode (struct dataset *ds, const struct autorecode_pgm *arc) { struct autorecode_trns *trns; size_t i; @@ -238,7 +273,7 @@ recode (const struct autorecode_pgm *arc) spec->src = arc->src_vars[i]; spec->dest = arc->dst_vars[i]; - if (arc->src_vars[i]->type == ALPHA) + if (var_is_alpha (arc->src_vars[i])) spec->items = hsh_create (2 * count, compare_alpha_value, hash_alpha_value, NULL, arc->src_vars[i]); else @@ -248,40 +283,43 @@ recode (const struct autorecode_pgm *arc) for (j = 0; *p; p++, j++) { struct arc_item *item = pool_alloc (trns->pool, sizeof *item); - union value *vp = *p; - - if (arc->src_vars[i]->type == NUMERIC) + union arc_value *vp = *p; + + if (var_is_numeric (arc->src_vars[i])) item->from.f = vp->f; else item->from.c = pool_clone (trns->pool, vp->c, - arc->src_vars[i]->width); + var_get_width (arc->src_vars[i])); item->to = arc->direction == ASCENDING ? j + 1 : count - j; hsh_force_insert (spec->items, item); } } - add_transformation (autorecode_trns_proc, autorecode_trns_free, trns); + add_transformation (ds, + autorecode_trns_proc, autorecode_trns_free, trns); } /* Executes an AUTORECODE transformation. */ static int -autorecode_trns_proc (void *trns_, struct ccase *c, int case_idx UNUSED) +autorecode_trns_proc (void *trns_, struct ccase **c, + casenumber case_idx UNUSED) { struct autorecode_trns *trns = trns_; size_t i; + *c = case_unshare (*c); for (i = 0; i < trns->spec_cnt; i++) { struct arc_spec *spec = &trns->specs[i]; struct arc_item *item; - union value v; + union arc_value v; - if (spec->src->type == NUMERIC) - v.f = case_num (c, spec->src->fv); + if (var_is_numeric (spec->src)) + v.f = case_num (*c, spec->src); else - v.c = (char *) case_str (c, spec->src->fv); + v.c = (char *) case_str (*c, spec->src); item = hsh_force_find (spec->items, &v); - case_data_rw (c, spec->dest->fv)->f = item->to; + case_data_rw (*c, spec->dest)->f = item->to; } return TRNS_CONTINUE; } @@ -302,67 +340,37 @@ autorecode_trns_free (void *trns_) /* AUTORECODE procedure. */ static int -compare_alpha_value (const void *a_, const void *b_, void *v_) +compare_alpha_value (const void *a_, const void *b_, const void *v_) { - const union value *a = a_; - const union value *b = b_; + const union arc_value *a = a_; + const union arc_value *b = b_; const struct variable *v = v_; - return memcmp (a->c, b->c, v->width); + return memcmp (a->c, b->c, var_get_width (v)); } static unsigned -hash_alpha_value (const void *a_, void *v_) +hash_alpha_value (const void *a_, const void *v_) { - const union value *a = a_; + const union arc_value *a = a_; const struct variable *v = v_; - - return hsh_hash_bytes (a->c, v->width); + + return hash_bytes (a->c, var_get_width (v), 0); } static int -compare_numeric_value (const void *a_, const void *b_, void *foo UNUSED) +compare_numeric_value (const void *a_, const void *b_, const void *aux UNUSED) { - const union value *a = a_; - const union value *b = b_; + const union arc_value *a = a_; + const union arc_value *b = b_; return a->f < b->f ? -1 : a->f > b->f; } static unsigned -hash_numeric_value (const void *a_, void *foo UNUSED) +hash_numeric_value (const void *a_, const void *aux UNUSED) { - const union value *a = a_; - - return hsh_hash_double (a->f); -} + const union arc_value *a = a_; -static bool -autorecode_proc_func (const struct ccase *c, void *arc_) -{ - struct autorecode_pgm *arc = arc_; - size_t i; - - for (i = 0; i < arc->var_cnt; i++) - { - union value v, *vp, **vpp; - - if (arc->src_vars[i]->type == NUMERIC) - v.f = case_num (c, arc->src_vars[i]->fv); - else - v.c = (char *) case_str (c, arc->src_vars[i]->fv); - - vpp = (union value **) hsh_probe (arc->src_values[i], &v); - if (*vpp == NULL) - { - vp = pool_alloc (arc->src_values_pool, sizeof *vp); - if (arc->src_vars[i]->type == NUMERIC) - vp->f = v.f; - else - vp->c = pool_clone (arc->src_values_pool, - v.c, arc->src_vars[i]->width); - *vpp = vp; - } - } - return true; + return hash_double (a->f, 0); }