You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
#include <config.h>
#include "error.h"
struct sort_criteria *sort; /* Sort criteria. */
struct variable **break_vars; /* Break variables. */
size_t break_var_cnt; /* Number of break variables. */
- union value *prev_break; /* Last values of break variables. */
+ struct ccase break_case; /* Last values of break variables. */
enum missing_treatment missing; /* How to treat missing values. */
struct agr_var *agr_vars; /* First aggregate variable. */
struct ccase agr_case; /* Aggregate case for output. */
};
-static void initialize_aggregate_info (struct agr_proc *);
+static void initialize_aggregate_info (struct agr_proc *,
+ const struct ccase *);
/* Prototypes. */
static int parse_aggregate_functions (struct agr_proc *);
struct agr_proc agr;
struct file_handle *out_file = NULL;
- /* Have we seen these subcommands? */
- unsigned seen = 0;
+ bool copy_documents = false;
+ bool presorted = false;
+ bool saw_direction;
- agr.writer = NULL;
- agr.sink = NULL;
+ memset(&agr, 0 , sizeof (agr));
agr.missing = ITEMWISE;
- agr.sort = NULL;
- agr.break_vars = NULL;
- agr.agr_vars = NULL;
- agr.dict = NULL;
- agr.case_cnt = 0;
- agr.prev_break = NULL;
+ case_nullify (&agr.break_case);
agr.dict = dict_create ();
dict_set_label (agr.dict, dict_get_label (default_dict));
dict_set_documents (agr.dict, dict_get_documents (default_dict));
+
+ /* OUTFILE subcommand must be first. */
+ if (!lex_force_match_id ("OUTFILE"))
+ goto error;
+ lex_match ('=');
+ if (!lex_match ('*'))
+ {
+ out_file = fh_parse ();
+ if (out_file == NULL)
+ goto error;
+ }
/* Read most of the subcommands. */
for (;;)
{
lex_match ('/');
- if (lex_match_id ("OUTFILE"))
- {
- if (seen & 1)
- {
- msg (SE, _("%s subcommand given multiple times."),"OUTFILE");
- goto error;
- }
- seen |= 1;
-
- lex_match ('=');
- if (!lex_match ('*'))
- {
- out_file = fh_parse ();
- if (out_file == NULL)
- goto error;
- }
- }
- else if (lex_match_id ("MISSING"))
+ if (lex_match_id ("MISSING"))
{
lex_match ('=');
if (!lex_match_id ("COLUMNWISE"))
agr.missing = COLUMNWISE;
}
else if (lex_match_id ("DOCUMENT"))
- seen |= 2;
+ copy_documents = true;
else if (lex_match_id ("PRESORTED"))
- seen |= 4;
+ presorted = true;
else if (lex_match_id ("BREAK"))
{
int i;
- if (seen & 8)
- {
- msg (SE, _("%s subcommand given multiple times."),"BREAK");
- goto error;
- }
- seen |= 8;
-
lex_match ('=');
agr.sort = sort_parse_criteria (default_dict,
- &agr.break_vars, &agr.break_var_cnt);
+ &agr.break_vars, &agr.break_var_cnt,
+ &saw_direction);
if (agr.sort == NULL)
goto error;
for (i = 0; i < agr.break_var_cnt; i++)
{
struct variable *v = dict_clone_var (agr.dict, agr.break_vars[i],
- agr.break_vars[i]->name);
+ agr.break_vars[i]->name,
+ agr.break_vars[i]->longname
+ );
assert (v != NULL);
}
+
+ /* BREAK must follow the options. */
+ break;
}
- else break;
+ else
+ {
+ lex_error (_("expecting BREAK"));
+ goto error;
+ }
}
-
- /* Check for proper syntax. */
- if (!(seen & 8))
- msg (SW, _("BREAK subcommand not specified."));
+ if (presorted && saw_direction)
+ msg (SW, _("When PRESORTED is specified, specifying sorting directions "
+ "with (A) or (D) has no effect. Output data will be sorted "
+ "the same way as the input data."));
/* Read in the aggregate functions. */
+ lex_match ('/');
if (!parse_aggregate_functions (&agr))
goto error;
/* Delete documents. */
- if (!(seen & 2))
+ if (!copy_documents)
dict_set_documents (agr.dict, NULL);
/* Cancel SPLIT FILE. */
/* Initialize. */
agr.case_cnt = 0;
case_create (&agr.agr_case, dict_get_next_value_idx (agr.dict));
- initialize_aggregate_info (&agr);
/* Output to active file or external file? */
if (out_file == NULL)
so TEMPORARY is moot. */
cancel_temporary ();
- if (agr.sort != NULL && (seen & 4) == 0)
+ if (agr.sort != NULL && !presorted)
sort_active_file_in_place (agr.sort);
agr.sink = create_case_sink (&storage_sink_class, agr.dict, NULL);
}
else
{
- agr.writer = sfm_open_writer (out_file, agr.dict, get_scompression ());
+ agr.writer = sfm_open_writer (out_file, agr.dict, get_scompression (), 0);
if (agr.writer == NULL)
goto error;
- if (agr.sort != NULL && (seen & 4) == 0)
+ if (agr.sort != NULL && !presorted)
{
/* Sorting is needed. */
struct casefile *dst;
}
for (function = agr_func_tab; function->name; function++)
- if (!strcmp (function->name, tokid))
+ if (!strcasecmp (function->name, tokid))
break;
if (NULL == function->name)
{
lex_error (_("expecting `('"));
goto error;
}
- } else {
+ }
+ else
+ {
/* Parse list of source variables. */
{
int pv_opts = PV_NO_SCRATCH;
arg[i].c = xstrdup (ds_c_str (&tokstr));
type = ALPHA;
}
- else if (token == T_NUM)
+ else if (lex_is_number ())
{
arg[i].f = tokval;
type = NUMERIC;
goto error;
}
- /* Now check that the number of source variables match the
- number of target variables. Do this here because if we
- do it earlier then the user can get very misleading error
- messages; i.e., `AGGREGATE x=SUM(y t).' will get this
- error message when a proper message would be more like
- `unknown variable t'. */
+ /* Now check that the number of source variables match
+ the number of target variables. If we check earlier
+ than this, the user can get very misleading error
+ message, i.e. `AGGREGATE x=SUM(y t).' will get this
+ error message when a proper message would be more
+ like `unknown variable t'. */
if (n_src != n_dest)
{
msg (SE, _("Number of source variables (%d) does not match "
n_src, n_dest);
goto error;
}
+
+ if ((func_index == PIN || func_index == POUT
+ || func_index == FIN || func_index == FOUT)
+ && ((src[0]->type == NUMERIC && arg[0].f > arg[1].f)
+ || (src[0]->type == ALPHA
+ && st_compare_pad (arg[0].c, strlen (arg[0].c),
+ arg[1].c, strlen (arg[1].c)) > 0)))
+ {
+ union value t = arg[0];
+ arg[0] = arg[1];
+ arg[1] = t;
+
+ msg (SW, _("The value arguments passed to the %s function "
+ "are out-of-order. They will be treated as if "
+ "they had been specified in the correct order."),
+ function->name);
+ }
}
/* Finally add these to the linked list of aggregation
/* Create the target variable in the aggregate
dictionary. */
{
+ static const struct fmt_spec f8_2 = {FMT_F, 8, 2};
struct variable *destvar;
v->function = func_index;
if (src)
{
- int output_width;
-
v->src = src[i];
if (src[i]->type == ALPHA)
v->function |= FSTRING;
v->string = xmalloc (src[i]->width);
}
-
- if (v->src->type == NUMERIC || function->alpha_type == NUMERIC)
- output_width = 0;
- else
- output_width = v->src->width;
if (function->alpha_type == ALPHA)
- destvar = dict_clone_var (agr->dict, v->src, dest[i]);
- else
- {
- destvar = dict_create_var (agr->dict, dest[i], output_width);
- if (output_width == 0)
- destvar->print = destvar->write = function->format;
- if (output_width == 0 && dict_get_weight (default_dict) != NULL
- && (func_index == N || func_index == N_NO_VARS
- || func_index == NU || func_index == NU_NO_VARS))
- {
- struct fmt_spec f = {FMT_F, 8, 2};
-
- destvar->print = destvar->write = f;
- }
- }
+ destvar = dict_clone_var (agr->dict, v->src, 0, dest[i] );
+ else if (v->src->type == NUMERIC
+ || function->alpha_type == NUMERIC)
+ {
+ destvar = dict_create_var (agr->dict, dest[i], 0);
+ if (destvar != NULL)
+ {
+ if ((func_index == N || func_index == NMISS)
+ && dict_get_weight (default_dict) != NULL)
+ destvar->print = destvar->write = f8_2;
+ else
+ destvar->print = destvar->write = function->format;
+ }
+ }
} else {
v->src = NULL;
destvar = dict_create_var (agr->dict, dest[i], 0);
+ if (func_index == N_NO_VARS
+ && dict_get_weight (default_dict) != NULL)
+ destvar->print = destvar->write = f8_2;
+ else
+ destvar->print = destvar->write = function->format;
}
if (!destvar)
"the aggregate variables and the break "
"variables."),
dest[i]);
- free (dest[i]);
goto error;
}
destvar->label = dest_label[i];
dest_label[i] = NULL;
}
- else if (function->alpha_type == ALPHA)
- destvar->print = destvar->write = function->format;
v->dest = destvar;
}
if (agr->sort != NULL)
sort_destroy_criteria (agr->sort);
free (agr->break_vars);
- free (agr->prev_break);
+ case_destroy (&agr->break_case);
for (iter = agr->agr_vars; iter; iter = next)
{
next = iter->next;
}
if (agr->dict != NULL)
dict_destroy (agr->dict);
+
case_destroy (&agr->agr_case);
}
\f
aggregate_single_case (struct agr_proc *agr,
const struct ccase *input, struct ccase *output)
{
- /* The first case always begins a new break group. We also need to
- preserve the values of the case for later comparison. */
+ bool finished_group = false;
+
if (agr->case_cnt++ == 0)
+ initialize_aggregate_info (agr, input);
+ else if (case_compare (&agr->break_case, input,
+ agr->break_vars, agr->break_var_cnt))
{
- int n_elem = 0;
-
- {
- int i;
-
- for (i = 0; i < agr->break_var_cnt; i++)
- n_elem += agr->break_vars[i]->nv;
- }
-
- agr->prev_break = xmalloc (sizeof *agr->prev_break * n_elem);
-
- /* Copy INPUT into prev_break. */
- {
- union value *iter = agr->prev_break;
- int i;
+ dump_aggregate_info (agr, output);
+ finished_group = true;
- for (i = 0; i < agr->break_var_cnt; i++)
- {
- struct variable *v = agr->break_vars[i];
-
- if (v->type == NUMERIC)
- (iter++)->f = case_num (input, v->fv);
- else
- {
- memcpy (iter->s, case_str (input, v->fv), v->width);
- iter += v->nv;
- }
- }
- }
-
- accumulate_aggregate_info (agr, input);
-
- return 0;
+ initialize_aggregate_info (agr, input);
}
-
- /* Compare the value of each break variable to the values on the
- previous case. */
- {
- union value *iter = agr->prev_break;
- int i;
-
- for (i = 0; i < agr->break_var_cnt; i++)
- {
- struct variable *v = agr->break_vars[i];
-
- switch (v->type)
- {
- case NUMERIC:
- if (case_num (input, v->fv) != iter->f)
- goto not_equal;
- iter++;
- break;
- case ALPHA:
- if (memcmp (case_str (input, v->fv), iter->s, v->width))
- goto not_equal;
- iter += v->nv;
- break;
- default:
- assert (0);
- }
- }
- }
-
- accumulate_aggregate_info (agr, input);
- return 0;
-
-not_equal:
- /* The values of the break variable are different from the values on
- the previous case. That means that it's time to dump aggregate
- info. */
- dump_aggregate_info (agr, output);
- initialize_aggregate_info (agr);
accumulate_aggregate_info (agr, input);
-
- /* Copy INPUT into prev_break. */
- {
- union value *iter = agr->prev_break;
- int i;
-
- for (i = 0; i < agr->break_var_cnt; i++)
- {
- struct variable *v = agr->break_vars[i];
-
- if (v->type == NUMERIC)
- (iter++)->f = case_num (input, v->fv);
- else
- {
- memcpy (iter->s, case_str (input, v->fv), v->width);
- iter += v->nv;
- }
- }
- }
-
- return 1;
+ return finished_group;
}
/* Accumulates aggregation data from the case INPUT. */
switch (iter->function)
{
case NMISS:
+ case NMISS | FSTRING:
iter->dbl[0] += weight;
break;
case NUMISS:
+ case NUMISS | FSTRING:
iter->int1++;
break;
}
switch (iter->function)
{
case SUM:
- iter->dbl[0] += v->f;
+ iter->dbl[0] += v->f * weight;
+ iter->int1 = 1;
break;
case MEAN:
iter->dbl[0] += v->f * weight;
case FOUT | FSTRING:
case POUT | FSTRING:
if (memcmp (iter->arg[0].c, v->s, iter->src->width) > 0
- && memcmp (iter->arg[1].c, v->s, iter->src->width) < 0)
+ || memcmp (iter->arg[1].c, v->s, iter->src->width) < 0)
iter->dbl[0] += weight;
iter->dbl[1] += weight;
break;
case N:
+ case N | FSTRING:
iter->dbl[0] += weight;
break;
case NU:
+ case NU | FSTRING:
iter->int1++;
break;
case FIRST:
memcpy (iter->string, v->s, iter->src->width);
iter->int1 = 1;
break;
+ case NMISS:
+ case NMISS | FSTRING:
+ case NUMISS:
+ case NUMISS | FSTRING:
+ /* Our value is not missing or it would have been
+ caught earlier. Nothing to do. */
+ break;
default:
assert (0);
}
for (i = 0; i < agr->break_var_cnt; i++)
{
- int nv = agr->break_vars[i]->nv;
+ struct variable *v = agr->break_vars[i];
memcpy (case_data_rw (output, value_idx),
- &agr->prev_break[value_idx],
- sizeof (union value) * nv);
- value_idx += nv;
+ case_data (&agr->break_case, v->fv),
+ sizeof (union value) * v->nv);
+ value_idx += v->nv;
}
}
&& (i->function & FUNC) != N && (i->function & FUNC) != NU
&& (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS)
{
- if (i->function & FSTRING)
+ if (i->dest->type == ALPHA)
memset (v->s, ' ', i->dest->width);
else
v->f = SYSMIS;
switch (i->function)
{
case SUM:
- v->f = i->dbl[0];
+ v->f = i->int1 ? i->dbl[0] : SYSMIS;
break;
case MEAN:
v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS;
else
memset (v->s, ' ', i->dest->width);
break;
- case FGT | FSTRING:
- case FLT | FSTRING:
- case FIN | FSTRING:
- case FOUT | FSTRING:
- v->f = i->int2 ? (double) i->int1 / (double) i->int2 : SYSMIS;
- break;
case FGT:
+ case FGT | FSTRING:
case FLT:
+ case FLT | FSTRING:
case FIN:
+ case FIN | FSTRING:
case FOUT:
+ case FOUT | FSTRING:
v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] : SYSMIS;
break;
case PGT:
v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] * 100.0 : SYSMIS;
break;
case N:
+ case N | FSTRING:
v->f = i->dbl[0];
break;
case NU:
+ case NU | FSTRING:
v->f = i->int1;
break;
case FIRST:
v->f = i->int1;
break;
case NMISS:
+ case NMISS | FSTRING:
v->f = i->dbl[0];
break;
case NUMISS:
+ case NUMISS | FSTRING:
v->f = i->int1;
break;
default:
/* Resets the state for all the aggregate functions. */
static void
-initialize_aggregate_info (struct agr_proc *agr)
+initialize_aggregate_info (struct agr_proc *agr, const struct ccase *input)
{
struct agr_var *iter;
+ case_destroy (&agr->break_case);
+ case_clone (&agr->break_case, input);
+
for (iter = agr->agr_vars; iter; iter = iter->next)
{
iter->missing = 0;
+ iter->dbl[0] = iter->dbl[1] = iter->dbl[2] = 0.0;
+ iter->int1 = iter->int2 = 0;
switch (iter->function)
{
case MIN:
else
moments1_clear (iter->moments);
break;
- default:
- iter->dbl[0] = iter->dbl[1] = iter->dbl[2] = 0.0;
- iter->int1 = iter->int2 = 0;
- break;
+ default:
+ break;
}
}
}