-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 1997-9, 2000, 2009, 2010 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
+
#include <stdlib.h>
-#include <data/case.h>
-#include <data/dictionary.h>
-#include <data/procedure.h>
-#include <data/transformations.h>
-#include <data/variable.h>
-#include <language/command.h>
-#include <language/lexer/lexer.h>
-#include <language/lexer/variable-parser.h>
-#include <libpspp/alloc.h>
-#include <libpspp/compiler.h>
-#include <libpspp/hash.h>
-#include <libpspp/message.h>
-#include <libpspp/message.h>
-#include <libpspp/pool.h>
-#include <libpspp/str.h>
+#include "data/case.h"
+#include "data/casereader.h"
+#include "data/dictionary.h"
+#include "data/procedure.h"
+#include "data/transformations.h"
+#include "data/variable.h"
+#include "language/command.h"
+#include "language/lexer/lexer.h"
+#include "language/lexer/variable-parser.h"
+#include "libpspp/compiler.h"
+#include "libpspp/hash.h"
+#include "libpspp/message.h"
+#include "libpspp/pool.h"
+#include "libpspp/str.h"
+
+#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
/* FIXME: Implement PRINT subcommand. */
-/* An AUTORECODE variable's original value. */
-union arc_value
- {
- double f; /* Numeric. */
- char *c; /* Short or long string. */
- };
-
/* Explains how to recode one value. `from' must be first element. */
struct arc_item
{
- union arc_value from; /* Original value. */
+ union value from; /* Original value. */
double to; /* Recoded value. */
};
/* Explains how to recode an AUTORECODE variable. */
struct arc_spec
{
- struct variable *src; /* Source variable. */
+ const struct variable *src; /* Source variable. */
struct variable *dest; /* Target variable. */
struct hsh_table *items; /* Hash table of `freq's. */
};
};
/* Descending or ascending sort order. */
-enum direction
+enum direction
{
ASCENDING,
DESCENDING
};
/* AUTORECODE data. */
-struct autorecode_pgm
+struct autorecode_pgm
{
- struct variable **src_vars; /* Source variables. */
+ const struct variable **src_vars; /* Source variables. */
char **dst_names; /* Target variable names. */
struct variable **dst_vars; /* Target variables. */
- struct hsh_table **src_values; /* `union arc_value's of source vars. */
+ struct hsh_table **src_values; /* `union value's of source vars. */
size_t var_cnt; /* Number of variables. */
struct pool *src_values_pool; /* Pool used by src_values. */
enum direction direction; /* Sort order. */
static trns_proc_func autorecode_trns_proc;
static trns_free_func autorecode_trns_free;
-static bool autorecode_proc_func (const struct ccase *, void *, const struct dataset *);
-static hsh_compare_func compare_alpha_value, compare_numeric_value;
-static hsh_hash_func hash_alpha_value, hash_numeric_value;
+static hsh_compare_func compare_value;
+static hsh_hash_func hash_value;
static void recode (struct dataset *, const struct autorecode_pgm *);
static void arc_free (struct autorecode_pgm *);
cmd_autorecode (struct lexer *lexer, struct dataset *ds)
{
struct autorecode_pgm arc;
+ struct casereader *input;
+ struct ccase *c;
size_t dst_cnt;
size_t i;
bool ok;
lex_match_id (lexer, "VARIABLES");
lex_match (lexer, '=');
- if (!parse_variables (lexer, dataset_dict (ds), &arc.src_vars, &arc.var_cnt,
- PV_NO_DUPLICATE))
+ if (!parse_variables_const (lexer, dataset_dict (ds), &arc.src_vars,
+ &arc.var_cnt, PV_NO_DUPLICATE))
goto lossage;
if (!lex_force_match_id (lexer, "INTO"))
goto lossage;
{
size_t i;
- msg (SE, _("Source variable count (%u) does not match "
- "target variable count (%u)."),
- (unsigned) arc.var_cnt, (unsigned) dst_cnt);
+ msg (SE, _("Source variable count (%zu) does not match "
+ "target variable count (%zu)."),
+ arc.var_cnt, dst_cnt);
for (i = 0; i < dst_cnt; i++)
free (arc.dst_names[i]);
arc.dst_vars = xnmalloc (arc.var_cnt, sizeof *arc.dst_vars);
arc.src_values = xnmalloc (arc.var_cnt, sizeof *arc.src_values);
for (i = 0; i < dst_cnt; i++)
- if (var_is_alpha (arc.src_vars[i]))
- arc.src_values[i] = hsh_create (10, compare_alpha_value,
- hash_alpha_value, NULL, arc.src_vars[i]);
- else
- arc.src_values[i] = hsh_create (10, compare_numeric_value,
- hash_numeric_value, NULL, NULL);
-
- ok = procedure (ds, autorecode_proc_func, &arc);
+ arc.src_values[i] = hsh_create (10, compare_value, hash_value, NULL,
+ arc.src_vars[i]);
+
+
+ input = proc_open (ds);
+ for (; (c = casereader_read (input)) != NULL; case_unref (c))
+ for (i = 0; i < arc.var_cnt; i++)
+ {
+ const union value *vp;
+ union value **vpp;
+
+ vp = case_data (c, arc.src_vars[i]);
+ vpp = (union value **) hsh_probe (arc.src_values[i], vp);
+ if (*vpp == NULL)
+ {
+ *vpp = pool_alloc (arc.src_values_pool, sizeof **vpp);
+ value_clone_pool (arc.src_values_pool, *vpp, vp,
+ var_get_width (arc.src_vars[i]));
+ }
+ }
+ ok = casereader_destroy (input);
+ ok = proc_commit (ds) && ok;
for (i = 0; i < arc.var_cnt; i++)
arc.dst_vars[i] = dict_create_var_assert (dataset_dict (ds),
}
static void
-arc_free (struct autorecode_pgm *arc)
+arc_free (struct autorecode_pgm *arc)
{
free (arc->src_vars);
- if (arc->dst_names != NULL)
+ if (arc->dst_names != NULL)
{
size_t i;
-
+
for (i = 0; i < arc->var_cnt; i++)
free (arc->dst_names[i]);
free (arc->dst_names);
}
free (arc->dst_vars);
- if (arc->src_values != NULL)
+ if (arc->src_values != NULL)
{
size_t i;
spec->src = arc->src_vars[i];
spec->dest = arc->dst_vars[i];
-
- if (var_is_alpha (arc->src_vars[i]))
- spec->items = hsh_create (2 * count, compare_alpha_value,
- hash_alpha_value, NULL, arc->src_vars[i]);
- else
- spec->items = hsh_create (2 * count, compare_numeric_value,
- hash_numeric_value, NULL, NULL);
+ spec->items = hsh_create (2 * count, compare_value, hash_value,
+ NULL, arc->src_vars[i]);
for (j = 0; *p; p++, j++)
{
struct arc_item *item = pool_alloc (trns->pool, sizeof *item);
- union arc_value *vp = *p;
-
- if (var_is_numeric (arc->src_vars[i]))
- item->from.f = vp->f;
- else
- item->from.c = pool_clone (trns->pool, vp->c,
- var_get_width (arc->src_vars[i]));
+ union value *vp = *p;
+
+ value_clone_pool (trns->pool, &item->from, vp,
+ var_get_width (arc->src_vars[i]));
item->to = arc->direction == ASCENDING ? j + 1 : count - j;
hsh_force_insert (spec->items, item);
}
}
- add_transformation (ds,
+ add_transformation (ds,
autorecode_trns_proc, autorecode_trns_free, trns);
}
/* Executes an AUTORECODE transformation. */
static int
-autorecode_trns_proc (void *trns_, struct ccase *c, casenumber case_idx UNUSED)
+autorecode_trns_proc (void *trns_, struct ccase **c,
+ casenumber case_idx UNUSED)
{
struct autorecode_trns *trns = trns_;
size_t i;
+ *c = case_unshare (*c);
for (i = 0; i < trns->spec_cnt; i++)
{
struct arc_spec *spec = &trns->specs[i];
struct arc_item *item;
- union arc_value v;
- if (var_is_numeric (spec->src))
- v.f = case_num (c, spec->src);
- else
- v.c = (char *) case_str (c, spec->src);
- item = hsh_force_find (spec->items, &v);
+ item = hsh_force_find (spec->items, case_data (*c, spec->src));
- case_data_rw (c, spec->dest)->f = item->to;
+ case_data_rw (*c, spec->dest)->f = item->to;
}
return TRNS_CONTINUE;
}
/* AUTORECODE procedure. */
static int
-compare_alpha_value (const void *a_, const void *b_, const void *v_)
+compare_value (const void *a_, const void *b_, const void *var_)
{
- const union arc_value *a = a_;
- const union arc_value *b = b_;
- const struct variable *v = v_;
+ const union value *a = a_;
+ const union value *b = b_;
+ const struct variable *var = var_;
- return memcmp (a->c, b->c, var_get_width (v));
+ return value_compare_3way (a, b, var_get_width (var));
}
static unsigned
-hash_alpha_value (const void *a_, const void *v_)
+hash_value (const void *value_, const void *var_)
{
- const union arc_value *a = a_;
- const struct variable *v = v_;
-
- return hsh_hash_bytes (a->c, var_get_width (v));
-}
-
-static int
-compare_numeric_value (const void *a_, const void *b_, const void *aux UNUSED)
-{
- const union arc_value *a = a_;
- const union arc_value *b = b_;
-
- return a->f < b->f ? -1 : a->f > b->f;
-}
+ const union value *value = value_;
+ const struct variable *var = var_;
-static unsigned
-hash_numeric_value (const void *a_, const void *aux UNUSED)
-{
- const union arc_value *a = a_;
-
- return hsh_hash_double (a->f);
-}
-
-static bool
-autorecode_proc_func (const struct ccase *c, void *arc_, const struct dataset *ds UNUSED)
-{
- struct autorecode_pgm *arc = arc_;
- size_t i;
-
- for (i = 0; i < arc->var_cnt; i++)
- {
- union arc_value v, *vp, **vpp;
-
- if (var_is_numeric (arc->src_vars[i]))
- v.f = case_num (c, arc->src_vars[i]);
- else
- v.c = (char *) case_str (c, arc->src_vars[i]);
-
- vpp = (union arc_value **) hsh_probe (arc->src_values[i], &v);
- if (*vpp == NULL)
- {
- vp = pool_alloc (arc->src_values_pool, sizeof *vp);
- if (var_is_numeric (arc->src_vars[i]))
- vp->f = v.f;
- else
- vp->c = pool_clone (arc->src_values_pool,
- v.c, var_get_width (arc->src_vars[i]));
- *vpp = vp;
- }
- }
- return true;
+ return value_hash (value, var_get_width (var), 0);
}