/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
Written by Ben Pfaff <blp@gnu.org>.
This program is free software; you can redistribute it and/or
#include <language/command.h>
#include <language/data-io/file-handle.h>
#include <language/lexer/lexer.h>
+#include <language/lexer/variable-parser.h>
#include <language/stats/sort-criteria.h>
#include <libpspp/alloc.h>
-#include <libpspp/message.h>
+#include <libpspp/assertion.h>
#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/pool.h>
#include <math/moments.h>
#include <math/sort.h>
+#include "minmax.h"
+
#include "gettext.h"
#define _(msgid) gettext (msgid)
enum missing_treatment missing; /* How to treat missing values. */
struct agr_var *agr_vars; /* First aggregate variable. */
struct dictionary *dict; /* Aggregate dictionary. */
+ const struct dictionary *src_dict; /* Dict of the source */
int case_cnt; /* Counts aggregated cases. */
struct ccase agr_case; /* Aggregate case for output. */
};
const struct ccase *);
/* Prototypes. */
-static int parse_aggregate_functions (struct agr_proc *);
+static bool parse_aggregate_functions (struct lexer *, const struct dictionary *,
+ struct agr_proc *);
static void agr_destroy (struct agr_proc *);
-static int aggregate_single_case (struct agr_proc *agr,
- const struct ccase *input,
- struct ccase *output);
+static bool aggregate_single_case (struct agr_proc *agr,
+ const struct ccase *input,
+ struct ccase *output);
static void dump_aggregate_info (struct agr_proc *agr, struct ccase *output);
/* Aggregating to the active file. */
-static bool agr_to_active_file (const struct ccase *, void *aux);
+static bool agr_to_active_file (const struct ccase *, void *aux, const struct dataset *);
/* Aggregating to a system file. */
-static bool presorted_agr_to_sysfile (const struct ccase *, void *aux);
+static bool presorted_agr_to_sysfile (const struct ccase *, void *aux, const struct dataset *);
\f
/* Parsing. */
/* Parses and executes the AGGREGATE procedure. */
int
-cmd_aggregate (void)
+cmd_aggregate (struct lexer *lexer, struct dataset *ds)
{
+ struct dictionary *dict = dataset_dict (ds);
struct agr_proc agr;
struct file_handle *out_file = NULL;
case_nullify (&agr.break_case);
agr.dict = dict_create ();
- dict_set_label (agr.dict, dict_get_label (default_dict));
- dict_set_documents (agr.dict, dict_get_documents (default_dict));
+ agr.src_dict = dict;
+ dict_set_label (agr.dict, dict_get_label (dict));
+ dict_set_documents (agr.dict, dict_get_documents (dict));
/* OUTFILE subcommand must be first. */
- if (!lex_force_match_id ("OUTFILE"))
+ if (!lex_force_match_id (lexer, "OUTFILE"))
goto error;
- lex_match ('=');
- if (!lex_match ('*'))
+ lex_match (lexer, '=');
+ if (!lex_match (lexer, '*'))
{
- out_file = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
+ out_file = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
if (out_file == NULL)
goto error;
}
/* Read most of the subcommands. */
for (;;)
{
- lex_match ('/');
+ lex_match (lexer, '/');
- if (lex_match_id ("MISSING"))
+ if (lex_match_id (lexer, "MISSING"))
{
- lex_match ('=');
- if (!lex_match_id ("COLUMNWISE"))
+ lex_match (lexer, '=');
+ if (!lex_match_id (lexer, "COLUMNWISE"))
{
- lex_error (_("while expecting COLUMNWISE"));
+ lex_error (lexer, _("while expecting COLUMNWISE"));
goto error;
}
agr.missing = COLUMNWISE;
}
- else if (lex_match_id ("DOCUMENT"))
+ else if (lex_match_id (lexer, "DOCUMENT"))
copy_documents = true;
- else if (lex_match_id ("PRESORTED"))
+ else if (lex_match_id (lexer, "PRESORTED"))
presorted = true;
- else if (lex_match_id ("BREAK"))
+ else if (lex_match_id (lexer, "BREAK"))
{
int i;
- lex_match ('=');
- agr.sort = sort_parse_criteria (default_dict,
+ lex_match (lexer, '=');
+ agr.sort = sort_parse_criteria (lexer, dict,
&agr.break_vars, &agr.break_var_cnt,
&saw_direction, NULL);
if (agr.sort == NULL)
for (i = 0; i < agr.break_var_cnt; i++)
dict_clone_var_assert (agr.dict, agr.break_vars[i],
- agr.break_vars[i]->name);
+ var_get_name (agr.break_vars[i]));
/* BREAK must follow the options. */
break;
}
else
{
- lex_error (_("expecting BREAK"));
+ lex_error (lexer, _("expecting BREAK"));
goto error;
}
}
"the same way as the input data."));
/* Read in the aggregate functions. */
- lex_match ('/');
- if (!parse_aggregate_functions (&agr))
+ lex_match (lexer, '/');
+ if (!parse_aggregate_functions (lexer, dict, &agr))
goto error;
/* Delete documents. */
{
/* The active file will be replaced by the aggregated data,
so TEMPORARY is moot. */
- proc_cancel_temporary_transformations ();
+ proc_cancel_temporary_transformations (ds);
if (agr.sort != NULL && !presorted)
{
- if (!sort_active_file_in_place (agr.sort))
+ if (!sort_active_file_in_place (ds, agr.sort))
goto error;
}
agr.sink = create_case_sink (&storage_sink_class, agr.dict, NULL);
if (agr.sink->class->open != NULL)
agr.sink->class->open (agr.sink);
- proc_set_sink (create_case_sink (&null_sink_class, default_dict, NULL));
- if (!procedure (agr_to_active_file, &agr))
+ proc_set_sink (ds,
+ create_case_sink (&null_sink_class,
+ dict, NULL));
+ if (!procedure (ds, agr_to_active_file, &agr))
goto error;
if (agr.case_cnt > 0)
{
if (!agr.sink->class->write (agr.sink, &agr.agr_case))
goto error;
}
- discard_variables ();
- default_dict = agr.dict;
+ discard_variables (ds);
+ dict_destroy (dict);
+ dataset_set_dict (ds, agr.dict);
agr.dict = NULL;
- proc_set_source (agr.sink->class->make_source (agr.sink));
+ proc_set_source (ds,
+ agr.sink->class->make_source (agr.sink));
free_case_sink (agr.sink);
}
else
struct ccase c;
bool ok = true;
- dst = sort_active_file_to_casefile (agr.sort);
+ dst = sort_active_file_to_casefile (ds, agr.sort);
if (dst == NULL)
goto error;
reader = casefile_get_destructive_reader (dst);
else
{
/* Active file is already sorted. */
- if (!procedure (presorted_agr_to_sysfile, &agr))
+ if (!procedure (ds, presorted_agr_to_sysfile, &agr))
goto error;
}
}
/* Parse all the aggregate functions. */
-static int
-parse_aggregate_functions (struct agr_proc *agr)
+static bool
+parse_aggregate_functions (struct lexer *lexer, const struct dictionary *dict, struct agr_proc *agr)
{
struct agr_var *tail; /* Tail of linked list starting at agr->vars. */
char **dest;
char **dest_label;
size_t n_dest;
+ struct string function_name;
int include_missing;
const struct agr_func *function;
arg[1].c = NULL;
/* Parse the list of target variables. */
- while (!lex_match ('='))
+ while (!lex_match (lexer, '='))
{
size_t n_dest_prev = n_dest;
- if (!parse_DATA_LIST_vars (&dest, &n_dest,
+ if (!parse_DATA_LIST_vars (lexer, &dest, &n_dest,
PV_APPEND | PV_SINGLE | PV_NO_SCRATCH))
goto error;
for (j = n_dest_prev; j < n_dest; j++)
dest_label[j] = NULL;
}
+
+
- if (token == T_STRING)
+ if (lex_token (lexer) == T_STRING)
{
- ds_truncate (&tokstr, 255);
- dest_label[n_dest - 1] = xstrdup (ds_c_str (&tokstr));
- lex_get ();
+ struct string label;
+ ds_init_string (&label, lex_tokstr (lexer));
+
+ ds_truncate (&label, 255);
+ dest_label[n_dest - 1] = ds_xstrdup (&label);
+ lex_get (lexer);
+ ds_destroy (&label);
}
}
/* Get the name of the aggregation function. */
- if (token != T_ID)
+ if (lex_token (lexer) != T_ID)
{
- lex_error (_("expecting aggregation function"));
+ lex_error (lexer, _("expecting aggregation function"));
goto error;
}
include_missing = 0;
- if (tokid[strlen (tokid) - 1] == '.')
- {
+
+ ds_init_string (&function_name, lex_tokstr (lexer));
+
+ ds_chomp (&function_name, '.');
+
+ if (lex_tokid(lexer)[strlen (lex_tokid (lexer)) - 1] == '.')
include_missing = 1;
- tokid[strlen (tokid) - 1] = 0;
- }
-
+
for (function = agr_func_tab; function->name; function++)
- if (!strcasecmp (function->name, tokid))
+ if (!strcasecmp (function->name, ds_cstr (&function_name)))
break;
if (NULL == function->name)
{
- msg (SE, _("Unknown aggregation function %s."), tokid);
+ msg (SE, _("Unknown aggregation function %s."),
+ ds_cstr (&function_name));
goto error;
}
+ ds_destroy (&function_name);
func_index = function - agr_func_tab;
- lex_get ();
+ lex_get (lexer);
/* Check for leading lparen. */
- if (!lex_match ('('))
+ if (!lex_match (lexer, '('))
{
if (func_index == N)
func_index = N_NO_VARS;
func_index = NU_NO_VARS;
else
{
- lex_error (_("expecting `('"));
+ lex_error (lexer, _("expecting `('"));
goto error;
}
}
else if (function->n_args)
pv_opts |= PV_SAME_TYPE;
- if (!parse_variables (default_dict, &src, &n_src, pv_opts))
+ if (!parse_variables (lexer, dict, &src, &n_src, pv_opts))
goto error;
}
{
int type;
- lex_match (',');
- if (token == T_STRING)
+ lex_match (lexer, ',');
+ if (lex_token (lexer) == T_STRING)
{
- arg[i].c = xstrdup (ds_c_str (&tokstr));
+ arg[i].c = ds_xstrdup (lex_tokstr (lexer));
type = ALPHA;
}
- else if (lex_is_number ())
+ else if (lex_is_number (lexer))
{
- arg[i].f = tokval;
+ arg[i].f = lex_tokval (lexer);
type = NUMERIC;
} else {
msg (SE, _("Missing argument %d to %s."), i + 1,
goto error;
}
- lex_get ();
+ lex_get (lexer);
- if (type != src[0]->type)
+ if (type != var_get_type (src[0]))
{
msg (SE, _("Arguments to %s must be of same type as "
"source variables."),
}
/* Trailing rparen. */
- if (!lex_match(')'))
+ if (!lex_match (lexer, ')'))
{
- lex_error (_("expecting `)'"));
+ lex_error (lexer, _("expecting `)'"));
goto error;
}
if ((func_index == PIN || func_index == POUT
|| func_index == FIN || func_index == FOUT)
- && ((src[0]->type == NUMERIC && arg[0].f > arg[1].f)
- || (src[0]->type == ALPHA
- && str_compare_rpad (arg[0].c, arg[1].c) > 0)))
+ && (var_is_numeric (src[0])
+ ? arg[0].f > arg[1].f
+ : str_compare_rpad (arg[0].c, arg[1].c) > 0))
{
union agr_argument t = arg[0];
arg[0] = arg[1];
{
v->src = src[i];
- if (src[i]->type == ALPHA)
+ if (var_is_alpha (src[i]))
{
v->function |= FSTRING;
- v->string = xmalloc (src[i]->width);
+ v->string = xmalloc (var_get_width (src[i]));
}
if (function->alpha_type == ALPHA)
destvar = dict_clone_var (agr->dict, v->src, dest[i]);
else
{
- assert (v->src->type == NUMERIC
+ assert (var_is_numeric (v->src)
|| function->alpha_type == NUMERIC);
destvar = dict_create_var (agr->dict, dest[i], 0);
if (destvar != NULL)
{
+ struct fmt_spec f;
if ((func_index == N || func_index == NMISS)
- && dict_get_weight (default_dict) != NULL)
- destvar->print = destvar->write = f8_2;
+ && dict_get_weight (dict) != NULL)
+ f = fmt_for_output (FMT_F, 8, 2);
else
- destvar->print = destvar->write = function->format;
+ f = function->format;
+ var_set_both_formats (destvar, &f);
}
}
} else {
+ struct fmt_spec f;
v->src = NULL;
destvar = dict_create_var (agr->dict, dest[i], 0);
- if (func_index == N_NO_VARS
- && dict_get_weight (default_dict) != NULL)
- destvar->print = destvar->write = f8_2;
+ if (func_index == N_NO_VARS && dict_get_weight (dict) != NULL)
+ f = fmt_for_output (FMT_F, 8, 2);
else
- destvar->print = destvar->write = function->format;
+ f = function->format;
+ var_set_both_formats (destvar, &f);
}
if (!destvar)
free (dest[i]);
if (dest_label[i])
- {
- destvar->label = dest_label[i];
- dest_label[i] = NULL;
- }
+ var_set_label (destvar, dest_label[i]);
v->dest = destvar;
}
{
int j;
- if (v->src->type == NUMERIC)
+ if (var_is_numeric (v->src))
for (j = 0; j < function->n_args; j++)
v->arg[j].f = arg[j].f;
else
}
}
- if (src != NULL && src[0]->type == ALPHA)
+ if (src != NULL && var_is_alpha (src[0]))
for (i = 0; i < function->n_args; i++)
{
free (arg[i].c);
free (dest);
free (dest_label);
- if (!lex_match ('/'))
+ if (!lex_match (lexer, '/'))
{
- if (token == '.')
- return 1;
+ if (lex_token (lexer) == '.')
+ return true;
- lex_error ("expecting end of command");
- return 0;
+ lex_error (lexer, "expecting end of command");
+ return false;
}
continue;
error:
+ ds_destroy (&function_name);
for (i = 0; i < n_dest; i++)
{
free (dest[i]);
free (dest_label);
free (arg[0].c);
free (arg[1].c);
- if (src && n_src && src[0]->type == ALPHA)
+ if (src && n_src && var_is_alpha (src[0]))
for (i = 0; i < function->n_args; i++)
{
free (arg[i].c);
}
free (src);
- return 0;
+ return false;
}
}
static void dump_aggregate_info (struct agr_proc *, struct ccase *);
/* Processes a single case INPUT for aggregation. If output is
- warranted, writes it to OUTPUT and returns nonzero.
- Otherwise, returns zero and OUTPUT is unmodified. */
-static int
+ warranted, writes it to OUTPUT and returns true.
+ Otherwise, returns false and OUTPUT is unmodified. */
+static bool
aggregate_single_case (struct agr_proc *agr,
const struct ccase *input, struct ccase *output)
{
{
struct agr_var *iter;
double weight;
- int bad_warn = 1;
+ bool bad_warn = true;
- weight = dict_get_case_weight (default_dict, input, &bad_warn);
+ weight = dict_get_case_weight (agr->src_dict, input, &bad_warn);
for (iter = agr->agr_vars; iter; iter = iter->next)
if (iter->src)
{
const union value *v = case_data (input, iter->src->fv);
+ int src_width = var_get_width (iter->src);
- if ((!iter->include_missing
- && mv_is_value_missing (&iter->src->miss, v))
- || (iter->include_missing && iter->src->type == NUMERIC
- && v->f == SYSMIS))
+ if (iter->include_missing
+ ? var_is_numeric (iter->src) && v->f == SYSMIS
+ : var_is_value_missing (iter->src, v))
{
switch (iter->function)
{
moments1_add (iter->moments, v->f, weight);
break;
case MAX:
- iter->dbl[0] = max (iter->dbl[0], v->f);
+ iter->dbl[0] = MAX (iter->dbl[0], v->f);
iter->int1 = 1;
break;
case MAX | FSTRING:
- if (memcmp (iter->string, v->s, iter->src->width) < 0)
- memcpy (iter->string, v->s, iter->src->width);
+ if (memcmp (iter->string, v->s, src_width) < 0)
+ memcpy (iter->string, v->s, src_width);
iter->int1 = 1;
break;
case MIN:
- iter->dbl[0] = min (iter->dbl[0], v->f);
+ iter->dbl[0] = MIN (iter->dbl[0], v->f);
iter->int1 = 1;
break;
case MIN | FSTRING:
- if (memcmp (iter->string, v->s, iter->src->width) > 0)
- memcpy (iter->string, v->s, iter->src->width);
+ if (memcmp (iter->string, v->s, src_width) > 0)
+ memcpy (iter->string, v->s, src_width);
iter->int1 = 1;
break;
case FGT:
break;
case FGT | FSTRING:
case PGT | FSTRING:
- if (memcmp (iter->arg[0].c, v->s, iter->src->width) < 0)
+ if (memcmp (iter->arg[0].c, v->s, src_width) < 0)
iter->dbl[0] += weight;
iter->dbl[1] += weight;
break;
break;
case FLT | FSTRING:
case PLT | FSTRING:
- if (memcmp (iter->arg[0].c, v->s, iter->src->width) > 0)
+ if (memcmp (iter->arg[0].c, v->s, src_width) > 0)
iter->dbl[0] += weight;
iter->dbl[1] += weight;
break;
break;
case FIN | FSTRING:
case PIN | FSTRING:
- if (memcmp (iter->arg[0].c, v->s, iter->src->width) <= 0
- && memcmp (iter->arg[1].c, v->s, iter->src->width) >= 0)
+ if (memcmp (iter->arg[0].c, v->s, src_width) <= 0
+ && memcmp (iter->arg[1].c, v->s, src_width) >= 0)
iter->dbl[0] += weight;
iter->dbl[1] += weight;
break;
break;
case FOUT | FSTRING:
case POUT | FSTRING:
- if (memcmp (iter->arg[0].c, v->s, iter->src->width) > 0
- || memcmp (iter->arg[1].c, v->s, iter->src->width) < 0)
+ if (memcmp (iter->arg[0].c, v->s, src_width) > 0
+ || memcmp (iter->arg[1].c, v->s, src_width) < 0)
iter->dbl[0] += weight;
iter->dbl[1] += weight;
break;
case FIRST | FSTRING:
if (iter->int1 == 0)
{
- memcpy (iter->string, v->s, iter->src->width);
+ memcpy (iter->string, v->s, src_width);
iter->int1 = 1;
}
break;
iter->int1 = 1;
break;
case LAST | FSTRING:
- memcpy (iter->string, v->s, iter->src->width);
+ memcpy (iter->string, v->s, src_width);
iter->int1 = 1;
break;
case NMISS:
caught earlier. Nothing to do. */
break;
default:
- assert (0);
+ NOT_REACHED ();
}
} else {
switch (iter->function)
iter->int1++;
break;
default:
- assert (0);
+ NOT_REACHED ();
}
}
}
for (i = 0; i < agr->break_var_cnt; i++)
{
struct variable *v = agr->break_vars[i];
+ size_t value_cnt = var_get_value_cnt (v);
memcpy (case_data_rw (output, value_idx),
case_data (&agr->break_case, v->fv),
- sizeof (union value) * v->nv);
- value_idx += v->nv;
+ sizeof (union value) * value_cnt);
+ value_idx += value_cnt;
}
}
&& (i->function & FUNC) != N && (i->function & FUNC) != NU
&& (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS)
{
- if (i->dest->type == ALPHA)
- memset (v->s, ' ', i->dest->width);
+ if (var_is_alpha (i->dest))
+ memset (v->s, ' ', var_get_width (i->dest));
else
v->f = SYSMIS;
continue;
case MAX | FSTRING:
case MIN | FSTRING:
if (i->int1)
- memcpy (v->s, i->string, i->dest->width);
+ memcpy (v->s, i->string, var_get_width (i->dest));
else
- memset (v->s, ' ', i->dest->width);
+ memset (v->s, ' ', var_get_width (i->dest));
break;
case FGT:
case FGT | FSTRING:
case FIRST | FSTRING:
case LAST | FSTRING:
if (i->int1)
- memcpy (v->s, i->string, i->dest->width);
+ memcpy (v->s, i->string, var_get_width (i->dest));
else
- memset (v->s, ' ', i->dest->width);
+ memset (v->s, ' ', var_get_width (i->dest));
break;
case N_NO_VARS:
v->f = i->dbl[0];
v->f = i->int1;
break;
default:
- assert (0);
+ NOT_REACHED ();
}
}
}
iter->dbl[0] = DBL_MAX;
break;
case MIN | FSTRING:
- memset (iter->string, 255, iter->src->width);
+ memset (iter->string, 255, var_get_width (iter->src));
break;
case MAX:
iter->dbl[0] = -DBL_MAX;
break;
case MAX | FSTRING:
- memset (iter->string, 0, iter->src->width);
+ memset (iter->string, 0, var_get_width (iter->src));
break;
case SD:
if (iter->moments == NULL)
are dropped.
Returns true if successful, false if an I/O error occurred. */
static bool
-agr_to_active_file (const struct ccase *c, void *agr_)
+agr_to_active_file (const struct ccase *c, void *agr_, const struct dataset *ds UNUSED)
{
struct agr_proc *agr = agr_;
/* Aggregate the current case and output it if we passed a
breakpoint. */
static bool
-presorted_agr_to_sysfile (const struct ccase *c, void *agr_)
+presorted_agr_to_sysfile (const struct ccase *c, void *agr_,
+ const struct dataset *ds UNUSED)
{
struct agr_proc *agr = agr_;