X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fautorecode.c;h=de9416821c15bf5fa8c37d5e9a241d7a647fd804;hb=cc57a28ef6796ae9a64ef80d453f72126956d49d;hp=f967dd3aca217d29a7594d861707bb6b82dd47f0;hpb=a37754c18dee3d5941e5fe041b2c1fa1a3370157;p=pspp-builds.git diff --git a/src/language/stats/autorecode.c b/src/language/stats/autorecode.c index f967dd3a..de941682 100644 --- a/src/language/stats/autorecode.c +++ b/src/language/stats/autorecode.c @@ -1,26 +1,24 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include #include #include +#include #include #include #include @@ -28,21 +26,21 @@ #include #include #include -#include #include #include #include -#include #include #include +#include "xalloc.h" + #include "gettext.h" #define _(msgid) gettext (msgid) /* FIXME: Implement PRINT subcommand. */ /* An AUTORECODE variable's original value. */ -union arc_value +union arc_value { double f; /* Numeric. */ char *c; /* Short or long string. */ @@ -58,7 +56,7 @@ struct arc_item /* Explains how to recode an AUTORECODE variable. */ struct arc_spec { - struct variable *src; /* Source variable. */ + const struct variable *src; /* Source variable. */ struct variable *dest; /* Target variable. */ struct hsh_table *items; /* Hash table of `freq's. */ }; @@ -72,16 +70,16 @@ struct autorecode_trns }; /* Descending or ascending sort order. */ -enum direction +enum direction { ASCENDING, DESCENDING }; /* AUTORECODE data. */ -struct autorecode_pgm +struct autorecode_pgm { - struct variable **src_vars; /* Source variables. */ + const struct variable **src_vars; /* Source variables. */ char **dst_names; /* Target variable names. */ struct variable **dst_vars; /* Target variables. */ struct hsh_table **src_values; /* `union arc_value's of source vars. */ @@ -93,18 +91,19 @@ struct autorecode_pgm static trns_proc_func autorecode_trns_proc; static trns_free_func autorecode_trns_free; -static bool autorecode_proc_func (const struct ccase *, void *); static hsh_compare_func compare_alpha_value, compare_numeric_value; static hsh_hash_func hash_alpha_value, hash_numeric_value; -static void recode (const struct autorecode_pgm *); +static void recode (struct dataset *, const struct autorecode_pgm *); static void arc_free (struct autorecode_pgm *); /* Performs the AUTORECODE procedure. */ int -cmd_autorecode (void) +cmd_autorecode (struct lexer *lexer, struct dataset *ds) { struct autorecode_pgm arc; + struct casereader *input; + struct ccase *c; size_t dst_cnt; size_t i; bool ok; @@ -119,23 +118,24 @@ cmd_autorecode (void) arc.print = 0; dst_cnt = 0; - lex_match_id ("VARIABLES"); - lex_match ('='); - if (!parse_variables (default_dict, &arc.src_vars, &arc.var_cnt, + lex_match_id (lexer, "VARIABLES"); + lex_match (lexer, '='); + if (!parse_variables_const (lexer, dataset_dict (ds), &arc.src_vars, + &arc.var_cnt, PV_NO_DUPLICATE)) goto lossage; - if (!lex_force_match_id ("INTO")) + if (!lex_force_match_id (lexer, "INTO")) goto lossage; - lex_match ('='); - if (!parse_DATA_LIST_vars (&arc.dst_names, &dst_cnt, PV_NONE)) + lex_match (lexer, '='); + if (!parse_DATA_LIST_vars (lexer, &arc.dst_names, &dst_cnt, PV_NONE)) goto lossage; if (dst_cnt != arc.var_cnt) { size_t i; - msg (SE, _("Source variable count (%u) does not match " - "target variable count (%u)."), - (unsigned) arc.var_cnt, (unsigned) dst_cnt); + msg (SE, _("Source variable count (%zu) does not match " + "target variable count (%zu)."), + arc.var_cnt, dst_cnt); for (i = 0; i < dst_cnt; i++) free (arc.dst_names[i]); @@ -144,14 +144,14 @@ cmd_autorecode (void) goto lossage; } - while (lex_match ('/')) - if (lex_match_id ("DESCENDING")) + while (lex_match (lexer, '/')) + if (lex_match_id (lexer, "DESCENDING")) arc.direction = DESCENDING; - else if (lex_match_id ("PRINT")) + else if (lex_match_id (lexer, "PRINT")) arc.print = 1; - if (token != '.') + if (lex_token (lexer) != '.') { - lex_error (_("expecting end of command")); + lex_error (lexer, _("expecting end of command")); goto lossage; } @@ -159,7 +159,7 @@ cmd_autorecode (void) { int j; - if (dict_lookup_var (default_dict, arc.dst_names[i]) != NULL) + if (dict_lookup_var (dataset_dict (ds), arc.dst_names[i]) != NULL) { msg (SE, _("Target variable %s duplicates existing variable %s."), arc.dst_names[i], arc.dst_names[i]); @@ -178,20 +178,47 @@ cmd_autorecode (void) arc.dst_vars = xnmalloc (arc.var_cnt, sizeof *arc.dst_vars); arc.src_values = xnmalloc (arc.var_cnt, sizeof *arc.src_values); for (i = 0; i < dst_cnt; i++) - if (arc.src_vars[i]->type == ALPHA) + { + /* FIXME: consolodate this hsh_create */ + if (var_is_alpha (arc.src_vars[i])) arc.src_values[i] = hsh_create (10, compare_alpha_value, hash_alpha_value, NULL, arc.src_vars[i]); else arc.src_values[i] = hsh_create (10, compare_numeric_value, hash_numeric_value, NULL, NULL); - - ok = procedure (autorecode_proc_func, &arc); + } + + input = proc_open (ds); + for (; (c = casereader_read (input)) != NULL; case_unref (c)) + for (i = 0; i < arc.var_cnt; i++) + { + union arc_value v, *vp, **vpp; + + if (var_is_numeric (arc.src_vars[i])) + v.f = case_num (c, arc.src_vars[i]); + else + v.c = (char *) case_str (c, arc.src_vars[i]); + + vpp = (union arc_value **) hsh_probe (arc.src_values[i], &v); + if (*vpp == NULL) + { + vp = pool_alloc (arc.src_values_pool, sizeof *vp); + if (var_is_numeric (arc.src_vars[i])) + vp->f = v.f; + else + vp->c = pool_clone (arc.src_values_pool, + v.c, var_get_width (arc.src_vars[i])); + *vpp = vp; + } + } + ok = casereader_destroy (input); + ok = proc_commit (ds) && ok; for (i = 0; i < arc.var_cnt; i++) - arc.dst_vars[i] = dict_create_var_assert (default_dict, + arc.dst_vars[i] = dict_create_var_assert (dataset_dict (ds), arc.dst_names[i], 0); - recode (&arc); + recode (ds, &arc); arc_free (&arc); return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; @@ -201,19 +228,19 @@ lossage: } static void -arc_free (struct autorecode_pgm *arc) +arc_free (struct autorecode_pgm *arc) { free (arc->src_vars); - if (arc->dst_names != NULL) + if (arc->dst_names != NULL) { size_t i; - + for (i = 0; i < arc->var_cnt; i++) free (arc->dst_names[i]); free (arc->dst_names); } free (arc->dst_vars); - if (arc->src_values != NULL) + if (arc->src_values != NULL) { size_t i; @@ -228,7 +255,7 @@ arc_free (struct autorecode_pgm *arc) /* AUTORECODE transformation. */ static void -recode (const struct autorecode_pgm *arc) +recode (struct dataset *ds, const struct autorecode_pgm *arc) { struct autorecode_trns *trns; size_t i; @@ -246,7 +273,7 @@ recode (const struct autorecode_pgm *arc) spec->src = arc->src_vars[i]; spec->dest = arc->dst_vars[i]; - if (arc->src_vars[i]->type == ALPHA) + if (var_is_alpha (arc->src_vars[i])) spec->items = hsh_create (2 * count, compare_alpha_value, hash_alpha_value, NULL, arc->src_vars[i]); else @@ -257,39 +284,42 @@ recode (const struct autorecode_pgm *arc) { struct arc_item *item = pool_alloc (trns->pool, sizeof *item); union arc_value *vp = *p; - - if (arc->src_vars[i]->type == NUMERIC) + + if (var_is_numeric (arc->src_vars[i])) item->from.f = vp->f; else item->from.c = pool_clone (trns->pool, vp->c, - arc->src_vars[i]->width); + var_get_width (arc->src_vars[i])); item->to = arc->direction == ASCENDING ? j + 1 : count - j; hsh_force_insert (spec->items, item); } } - add_transformation (autorecode_trns_proc, autorecode_trns_free, trns); + add_transformation (ds, + autorecode_trns_proc, autorecode_trns_free, trns); } /* Executes an AUTORECODE transformation. */ static int -autorecode_trns_proc (void *trns_, struct ccase *c, casenum_t case_idx UNUSED) +autorecode_trns_proc (void *trns_, struct ccase **c, + casenumber case_idx UNUSED) { struct autorecode_trns *trns = trns_; size_t i; + *c = case_unshare (*c); for (i = 0; i < trns->spec_cnt; i++) { struct arc_spec *spec = &trns->specs[i]; struct arc_item *item; union arc_value v; - if (spec->src->type == NUMERIC) - v.f = case_num (c, spec->src->fv); + if (var_is_numeric (spec->src)) + v.f = case_num (*c, spec->src); else - v.c = (char *) case_str (c, spec->src->fv); + v.c = (char *) case_str (*c, spec->src); item = hsh_force_find (spec->items, &v); - case_data_rw (c, spec->dest->fv)->f = item->to; + case_data_rw (*c, spec->dest)->f = item->to; } return TRNS_CONTINUE; } @@ -310,26 +340,26 @@ autorecode_trns_free (void *trns_) /* AUTORECODE procedure. */ static int -compare_alpha_value (const void *a_, const void *b_, void *v_) +compare_alpha_value (const void *a_, const void *b_, const void *v_) { const union arc_value *a = a_; const union arc_value *b = b_; const struct variable *v = v_; - return memcmp (a->c, b->c, v->width); + return memcmp (a->c, b->c, var_get_width (v)); } static unsigned -hash_alpha_value (const void *a_, void *v_) +hash_alpha_value (const void *a_, const void *v_) { const union arc_value *a = a_; const struct variable *v = v_; - - return hsh_hash_bytes (a->c, v->width); + + return hsh_hash_bytes (a->c, var_get_width (v)); } static int -compare_numeric_value (const void *a_, const void *b_, void *foo UNUSED) +compare_numeric_value (const void *a_, const void *b_, const void *aux UNUSED) { const union arc_value *a = a_; const union arc_value *b = b_; @@ -338,39 +368,9 @@ compare_numeric_value (const void *a_, const void *b_, void *foo UNUSED) } static unsigned -hash_numeric_value (const void *a_, void *foo UNUSED) +hash_numeric_value (const void *a_, const void *aux UNUSED) { const union arc_value *a = a_; return hsh_hash_double (a->f); } - -static bool -autorecode_proc_func (const struct ccase *c, void *arc_) -{ - struct autorecode_pgm *arc = arc_; - size_t i; - - for (i = 0; i < arc->var_cnt; i++) - { - union arc_value v, *vp, **vpp; - - if (arc->src_vars[i]->type == NUMERIC) - v.f = case_num (c, arc->src_vars[i]->fv); - else - v.c = (char *) case_str (c, arc->src_vars[i]->fv); - - vpp = (union arc_value **) hsh_probe (arc->src_values[i], &v); - if (*vpp == NULL) - { - vp = pool_alloc (arc->src_values_pool, sizeof *vp); - if (arc->src_vars[i]->type == NUMERIC) - vp->f = v.f; - else - vp->c = pool_clone (arc->src_values_pool, - v.c, arc->src_vars[i]->width); - *vpp = vp; - } - } - return true; -}