+Sat Aug 6 21:29:15 2005 Ben Pfaff <blp@gnu.org>
+
+ * factor_stats.c: Needed <config.h> included earlier.
+
+ * percentiles.c: Needed to include <config.h>.
+
+ * val.h: Don't include "config.h".
+
+Sat Aug 6 21:26:27 2005 Ben Pfaff <blp@gnu.org>
+
+ Clean up treatment of missing values by moving all the code into
+ one place. All references to the missing value function were
+ updated, but only major changes are detailed below.
+
+ * Makefile.am: Add missing-values.c, missing-values.h to sources.
+
+ * apply-dict.c: (cmd_apply_dictionary) Use mv_resize().
+
+ * dictionary.c: (dict_create_var) Initialize `miss' member with
+ mv_init().
+ (dict_clone_var) Copy `miss' member with mv_copy().
+
+ * get.c: (mtf_merge_dictionary) Use mv_copy().
+
+ * missing-values.c: New file.
+
+ * missing-values.h: New file.
+
+ * mis-val.c: Rewrite. New version implements updated semantics.
+
+ * pfm-read.c: (read_variables) Rewrite missing value handling.
+
+ * pfm-write.c: (write_variables) Rewrite missing value handling.
+
+ * sfm-read.c: (read_variables) Rewrite missing value handling.
+
+ * sfm-write.c: (write_variable) Rewrite missing value handling.
+
+ * sfmP.h: Include "magic.h" to get definition of
+ second_lowest_value.
+
+ * sysfile-info.c: (describe_variable) Rewrite missing value
+ handling.
+
+ * val.h: Include "magic.h" to get definition of
+ second_lowest_value.
+
+ * var.h: Include "missing-values.h". Drop MISSING_* enums.
+ (struct variable) Remove `miss_type', `missing'. Add `miss'.
+
+ * vars-atr.c: (is_num_user_missing) Removed--use
+ mv_is_num_user_missing().
+ (is_str_user_missing) Removed--use mv_is_str_user_missing().
+ (is_system_missing) Removed--use mv_is_value_system_missing().
+ (is_missing) Removed--use mv_is_value_missing().
+ (is_user_missing) Removed--use mv_is_value_user_missing().
+
Sun Jul 31 14:09:57 2005 Ben Pfaff <blp@gnu.org>
Adopt use of gnulib for portability.
chart_sources = dummy-chart.c
endif
-pspp_SOURCES = $(q_sources_c) $(chart_sources) \
-aggregate.c algorithm.c algorithm.h \
-alloc.c alloc.h apply-dict.c ascii.c autorecode.c bitvector.h \
-calendar.c calendar.h case.c case.h casefile.c casefile.h chart.c \
-chart.h cmdline.c cmdline.h command.c command.def \
-command.h compute.c copyleft.c copyleft.h count.c data-in.c data-in.h \
+pspp_SOURCES = $(q_sources_c) $(chart_sources) aggregate.c algorithm.c \
+algorithm.h alloc.c alloc.h apply-dict.c ascii.c autorecode.c \
+bitvector.h calendar.c calendar.h case.c case.h casefile.c casefile.h \
+chart.c chart.h cmdline.c cmdline.h command.c command.def command.h \
+compute.c copyleft.c copyleft.h count.c data-in.c data-in.h \
data-list.c data-list.h data-out.c date.c debug-print.h descript.c \
-devind.c devind.h dfm-read.c dfm-read.h dfm-write.c dfm-write.h \
-dictionary.c dictionary.h do-if.c do-ifP.h echo.c error.c \
-error.h factor_stats.c factor_stats.h file-handle.h \
-file-type.c filename.c filename.h flip.c font.h format.c format.def \
-format.h formats.c get.c getl.c getl.h glob.c glob.h \
-groff-font.c group.c group.h group_proc.h \
-hash.c hash.h histogram.c histogram.h \
-html.c htmlP.h include.c inpt-pgm.c lexer.c lexer.h levene.c levene.h \
-linked-list.c linked-list.h log.h loop.c magic.c magic.h main.c main.h \
-matrix-data.c mis-val.c misc.c misc.h modify-vars.c \
-moments.c moments.h numeric.c output.c output.h \
-percentiles.c percentiles.h permissions.c \
-pfm-read.c pfm-read.h \
-pfm-write.c pfm-write.h \
-pool.c pool.h postscript.c print.c recode.c \
+devind.c devind.h dfm-read.c dfm-read.h dfm-write.c dfm-write.h \
+dictionary.c dictionary.h do-if.c do-ifP.h echo.c error.c error.h \
+factor_stats.c factor_stats.h file-handle.h file-type.c filename.c \
+filename.h flip.c font.h format.c format.def format.h formats.c get.c \
+getl.c getl.h glob.c glob.h groff-font.c group.c group.h group_proc.h \
+hash.c hash.h histogram.c histogram.h html.c htmlP.h include.c \
+inpt-pgm.c lexer.c lexer.h levene.c levene.h linked-list.c \
+linked-list.h log.h loop.c magic.c magic.h main.c main.h matrix-data.c \
+mis-val.c misc.c misc.h missing-values.c missing-values.h \
+modify-vars.c moments.c moments.h numeric.c output.c output.h \
+percentiles.c percentiles.h permissions.c pfm-read.c pfm-read.h \
+pfm-write.c pfm-write.h pool.c pool.h postscript.c print.c recode.c \
rename-vars.c repeat.c repeat.h sample.c sel-if.c settings.h \
sfm-read.c sfm-read.h sfm-write.c sfm-write.h sfmP.h som.c som.h \
-sort.c sort.h sort-prs.c sort-prs.h \
-split-file.c str.c str.h subclist.c subclist.h \
-sysfile-info.c tab.c tab.h temporary.c mkfile.c mkfile.h \
-title.c val.h val-labs.c value-labels.c value-labels.h \
-var-display.c \
-var-labs.c var.h vars-atr.c vars-prs.c vector.c version.h \
-vfm.c vfm.h vfmP.h weight.c
+sort.c sort.h sort-prs.c sort-prs.h split-file.c str.c str.h \
+subclist.c subclist.h sysfile-info.c tab.c tab.h temporary.c mkfile.c \
+mkfile.h title.c val.h val-labs.c value-labels.c value-labels.h \
+var-display.c var-labs.c var.h vars-atr.c vars-prs.c vector.c \
+version.h vfm.c vfm.h vfmP.h weight.c
pspp_LDADD = \
{
const union value *v = case_data (input, iter->src->fv);
- if ((!iter->include_missing && is_missing (v, iter->src))
+ if ((!iter->include_missing
+ && mv_is_value_missing (&iter->src->miss, v))
|| (iter->include_missing && iter->src->type == NUMERIC
&& v->f == SYSMIS))
{
}
}
- if (s->miss_type != MISSING_NONE && t->width > MAX_SHORT_STRING)
+ if (!mv_is_empty (&s->miss) && t->width > MAX_SHORT_STRING)
msg (SW, _("Cannot apply missing values from source file to "
"long string variable %s."),
s->name);
- else if (s->miss_type != MISSING_NONE)
+ else if (!mv_is_empty (&s->miss))
{
- if (t->width < s->width)
- {
- static const int miss_count[MISSING_COUNT] =
- {
- 0, 1, 2, 3, 2, 1, 1, 3, 2, 2,
- };
-
- int j, k;
-
- for (j = 0; j < miss_count[s->miss_type]; j++)
- for (k = t->width; k < s->width; k++)
- if (s->missing[j].s[k] != ' ')
- goto skip_missing_values;
- }
-
- t->miss_type = s->miss_type;
- memcpy (t->missing, s->missing, sizeof s->missing);
+ if (mv_is_resizable (&s->miss, t->width))
+ {
+ mv_copy (&t->miss, &s->miss);
+ mv_resize (&t->miss, t->width);
+ }
}
- skip_missing_values: ;
if (s->type == NUMERIC)
{
counter++;
continue;
}
- if (cnt->missing >= 2 && is_num_user_missing (cmp, cnt->v[i]))
+ if (cnt->missing >= 2 && mv_is_num_user_missing (&cnt->v[i]->miss, cmp))
{
counter++;
continue;
assert (x != NULL);
for (j = 0; j < x->nvar; j++)
{
- if ((cmd.miss == CRS_TABLE
- && is_missing (case_data (c, x->vars[j]->fv), x->vars[j]))
+ const union value *v = case_data (c, x->vars[j]->fv);
+ const struct missing_values *mv = &x->vars[j]->miss;
+ if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v))
|| (cmd.miss == CRS_INCLUDE
- && is_system_missing (case_data (c, x->vars[j]->fv),
- x->vars[j])))
+ && mv_is_value_system_missing (mv, v)))
{
x->missing += weight;
goto next_crosstab;
/* Note that the first test also rules out SYSMIS. */
if ((value < vr->min || value >= vr->max)
- || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v)))
+ || (cmd.miss == CRS_TABLE
+ && mv_is_num_user_missing (&v->miss, value)))
{
x->missing += weight;
goto next_crosstab;
int r;
for (r = 0; r < n_rows; r++)
- if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
+ if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
{
int c;
int c;
for (c = 0; c < n_cols; c++)
- if (is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
+ if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
{
int r;
s.string = tab_alloc (table, var->print.w);
format_short (s.string, &var->print, v);
s.length = strlen (s.string);
- if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var))
+ if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f))
s.string[s.length++] = 'M';
while (s.length && *s.string == ' ')
{
int mark_missing = 0;
double expected_value = row_tot[r] * col_tot[c] / W;
if (cmd.miss == CRS_REPORT
- && (is_num_user_missing (cols[c].f, x->vars[COL_VAR])
- || is_num_user_missing (rows[r].f, x->vars[ROW_VAR])))
+ && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)
+ || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss,
+ rows[r].f)))
mark_missing = 1;
for (i = 0; i < num_cells; i++)
{
int mark_missing = 0;
if (cmd.miss == CRS_REPORT
- && is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))
+ && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f))
mark_missing = 1;
for (i = 0; i < num_cells; i++)
int i;
if (cmd.miss == CRS_REPORT && c < n_cols
- && is_num_user_missing (cols[c].f, x->vars[COL_VAR]))
+ && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f))
mark_missing = 1;
for (i = 0; i < num_cells; i++)
for (vars = t->vars; vars < t->vars + t->var_cnt; vars++)
{
double score = case_num (c, (*vars)->fv);
- if ( score == SYSMIS || (!t->include_user_missing
- && is_num_user_missing(score, *vars)) )
+ if ( score == SYSMIS
+ || (!t->include_user_missing
+ && mv_is_num_user_missing (&(*vars)->miss, score)))
{
all_sysmis = 1;
break;
if (z->mean == SYSMIS || z->std_dev == SYSMIS
|| all_sysmis || input == SYSMIS
- || (!t->include_user_missing && is_num_user_missing(input, z->v)))
+ || (!t->include_user_missing
+ && mv_is_num_user_missing (&z->v->miss, input)))
*output = SYSMIS;
else
*output = (input - z->mean) / z->std_dev;
if (dsc->missing_type != DSC_LISTWISE
&& (x == SYSMIS
|| (!dsc->include_user_missing
- && is_num_user_missing (x, dv->v))))
+ && mv_is_num_user_missing (&dv->v->miss, x))))
{
dv->missing += weight;
continue;
if (dsc->missing_type != DSC_LISTWISE
&& (x == SYSMIS
|| (!dsc->include_user_missing
- && is_num_user_missing (x, dv->v))))
+ && mv_is_num_user_missing (&dv->v->miss, x))))
continue;
if (dv->moments != NULL)
double x = case_num (c, dv->v->fv);
if (x == SYSMIS
- || (!dsc->include_user_missing && is_num_user_missing (x, dv->v)))
+ || (!dsc->include_user_missing
+ && mv_is_num_user_missing (&dv->v->miss, x)))
return 1;
}
return 0;
v->init = 1;
v->reinit = dict_class_from_id (v->name) != DC_SCRATCH;
v->index = d->var_cnt;
- v->miss_type = MISSING_NONE;
+ mv_init (&v->miss, width);
if (v->type == NUMERIC)
{
v->print = f8_2;
the same short name. */
nv->init = 1;
nv->reinit = ov->reinit;
- nv->miss_type = ov->miss_type;
- memcpy (nv->missing, ov->missing, sizeof nv->missing);
+ mv_copy (&nv->miss, &ov->miss);
nv->print = ov->print;
nv->write = ov->write;
val_labs_destroy (nv->val_labs);
else
{
double w = case_num (c, d->weight->fv);
- if ( w < 0.0 || w == SYSMIS || is_num_user_missing(w, d->weight) )
+ if (w < 0.0 || mv_is_num_missing (&d->weight->miss, w))
w = 0.0;
if ( w == 0.0 && *warn_on_invalid ) {
*warn_on_invalid = 0;
/* Function to use for testing for missing values */
-static is_missing_func value_is_missing;
+static is_missing_func *value_is_missing;
/* PERCENTILES */
/* If /MISSING=INCLUDE is set, then user missing values are ignored */
if (cmd.incl == XMN_INCLUDE )
- value_is_missing = is_system_missing;
+ value_is_missing = mv_is_value_system_missing;
else
- value_is_missing = is_missing;
+ value_is_missing = mv_is_value_missing;
if ( cmd.st_n == SYSMIS )
cmd.st_n = 5;
const struct variable *var = dependent_vars[v];
const union value *val = case_data (c, var->fv);
- if ( value_is_missing(val,var) || case_missing )
+ if ( value_is_missing (&var->miss, val) || case_missing )
val = 0;
metrics_calc( &(*foo)->m[v], val, weight, case_no);
const struct variable *var = dependent_vars[v];
const union value *val = case_data (&c, var->fv);
- if ( value_is_missing(val,var))
+ if ( value_is_missing(&var->miss, val))
case_missing = 1;
}
const struct variable *var = dependent_vars[v];
const union value *val = case_data (&c, var->fv);
- if ( value_is_missing(val,var) || case_missing )
+ if ( value_is_missing(&var->miss, val) || case_missing )
val = 0;
metrics_calc(&totals[v], val, weight, case_no);
{
const struct variable *var = v->var[(int) idx - 1];
double value = case_num (c, var->fv);
- return !is_num_user_missing (value, var) ? value : SYSMIS;
+ return !mv_is_num_user_missing (&var->miss, value) ? value : SYSMIS;
}
else
{
num_var v;
{
double d = case_num (c, v->fv);
- return !is_num_user_missing (d, v) ? d : SYSMIS;
+ return !mv_is_num_user_missing (&v->miss, d) ? d : SYSMIS;
}
no_opt string operator STR_VAR ()
if (c != NULL)
{
double x = case_num (c, v->fv);
- return !is_num_user_missing (x, v) ? x : SYSMIS;
+ return !mv_is_num_user_missing (&v->miss, x) ? x : SYSMIS;
}
else
return SYSMIS;
if (c != NULL)
{
double x = case_num (c, v->fv);
- return !is_num_user_missing (x, v) ? x : SYSMIS;
+ return !mv_is_num_user_missing (&v->miss, x) ? x : SYSMIS;
}
else
return SYSMIS;
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
+#include <config.h>
#include "factor_stats.h"
-#include "config.h"
#include "val.h"
#include "hash.h"
#include "algorithm.h"
const struct freq *f = f_;
struct variable *v = v_;
- return !is_missing (&f->v, v);
+ return !mv_is_value_missing (&v->miss, &f->v);
}
/* Summarizes the frequency table data for variable V. */
/* Find out the extremes of the x value */
for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) )
{
- if ( is_missing(&frq->v, var))
+ if ( mv_is_value_missing(&var->miss, &frq->v))
continue;
if ( frq->v.f < x_min ) x_min = frq->v.f ;
if (val_labs_count (dv->val_labs)
&& !val_labs_count (mv->val_labs))
mv->val_labs = val_labs_copy (dv->val_labs);
- if (dv->miss_type != MISSING_NONE
- && mv->miss_type == MISSING_NONE)
- copy_missing_values (mv, dv);
+ if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
+ mv_copy (&mv->miss, &dv->miss);
}
if (dv->label && !mv->label)
enum lev_missing missing;
/* Function to test for missing values */
- is_missing_func is_missing;
-
+ is_missing_func *is_missing;
};
/* First pass */
struct variable *v = l->v_dep[i];
const union value *val = case_data (c, v->fv);
- if (l->is_missing(val,v) )
+ if (l->is_missing (&v->miss, val) )
{
return 0;
}
if ( 0 == gs )
continue ;
- if ( ! l->is_missing(v,var))
+ if ( ! l->is_missing(&var->miss, v))
{
levene_z= fabs(v->f - gs->mean);
lz[i].grand_total += levene_z * weight;
struct variable *v = l->v_dep[i];
const union value *val = case_data (c, v->fv);
- if (l->is_missing(val,v) )
+ if (l->is_missing(&v->miss, val) )
{
return 0;
}
if ( 0 == gs )
continue;
- if ( ! l->is_missing(v,var) )
+ if ( ! l->is_missing (&var->miss, v) )
{
levene_z = fabs(v->f - gs->mean);
lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean);
#include "error.h"
#include <stdlib.h>
#include "command.h"
+#include "data-in.h"
#include "error.h"
#include "lexer.h"
#include "magic.h"
#include "debug-print.h"
-/* Variables on MIS VAL. */
-static struct variable **v;
-static int nv;
-
-/* Type of the variables on MIS VAL. */
-static int type;
-
-/* Width of string variables on MIS VAL. */
-static size_t width;
-
-/* Items to fill-in var structs with. */
-static int miss_type;
-static union value missing[3];
-
-static int parse_varnames (void);
-static int parse_numeric (void);
-static int parse_alpha (void);
+static bool parse_number (double *, const struct fmt_spec *);
int
cmd_missing_values (void)
{
- int i;
+ struct variable **v;
+ int nv;
+
+ int retval = CMD_PART_SUCCESS_MAYBE;
+ bool deferred_errors = false;
while (token != '.')
{
- if (!parse_varnames ())
- goto fail;
+ int i;
+
- if (token != ')')
- {
- if ((type == NUMERIC && !parse_numeric ())
- || (type == ALPHA && !parse_alpha ()))
- goto fail;
- }
- else
- miss_type = MISSING_NONE;
+ if (!parse_variables (default_dict, &v, &nv, PV_NONE))
+ goto done;
- if (!lex_match (')'))
- {
- msg (SE, _("`)' expected after value specification."));
- goto fail;
- }
+ if (!lex_match ('('))
+ {
+ lex_error (_("expecting `('"));
+ goto done;
+ }
for (i = 0; i < nv; i++)
- {
- v[i]->miss_type = miss_type;
- memcpy (v[i]->missing, missing, sizeof v[i]->missing);
- }
+ mv_init (&v[i]->miss, v[i]->width);
+
+ if (!lex_match (')'))
+ {
+ struct missing_values mv;
+
+ for (i = 0; i < nv; i++)
+ if (v[i]->type != v[0]->type)
+ {
+ const struct variable *n = v[0]->type == NUMERIC ? v[0] : v[i];
+ const struct variable *s = v[0]->type == NUMERIC ? v[i] : v[0];
+ msg (SE, _("Cannot mix numeric variables (e.g. %s) and "
+ "string variables (e.g. %s) within a single list."),
+ n->name, s->name);
+ goto done;
+ }
+
+ if (v[0]->type == NUMERIC)
+ {
+ mv_init (&mv, 0);
+ while (!lex_match (')'))
+ {
+ double x;
+
+ if (lex_match_id ("LO") || lex_match_id ("LOWEST"))
+ x = LOWEST;
+ else if (!parse_number (&x, &v[0]->print))
+ goto done;
+
+ if (lex_match_id ("THRU"))
+ {
+ double y;
+
+ if (lex_match_id ("HI") || lex_match_id ("HIGHEST"))
+ y = HIGHEST;
+ else if (!parse_number (&y, &v[0]->print))
+ goto done;
+
+ if (x == LOWEST && y == HIGHEST)
+ {
+ msg (SE, _("LO THRU HI is an invalid range."));
+ deferred_errors = true;
+ }
+ else if (!mv_add_num_range (&mv, x, y))
+ deferred_errors = true;
+ }
+ else
+ {
+ if (x == LOWEST)
+ {
+ msg (SE, _("LO or LOWEST must be part of a range."));
+ deferred_errors = true;
+ }
+ else if (!mv_add_num (&mv, x))
+ deferred_errors = true;
+ }
+
+ lex_match (',');
+ }
+ }
+ else
+ {
+ mv_init (&mv, MAX_SHORT_STRING);
+ while (!lex_match (')'))
+ {
+ if (!lex_force_string ())
+ {
+ deferred_errors = true;
+ break;
+ }
+
+ if (ds_length (&tokstr) > MAX_SHORT_STRING)
+ {
+ ds_truncate (&tokstr, MAX_SHORT_STRING);
+ msg (SE, _("Truncating missing value to short string "
+ "length (%d characters)."),
+ MAX_SHORT_STRING);
+ }
+ else
+ ds_rpad (&tokstr, MAX_SHORT_STRING, ' ');
+
+ if (!mv_add_str (&mv, ds_data (&tokstr)))
+ deferred_errors = true;
+
+ lex_get ();
+ lex_match (',');
+ }
+ }
+
+ for (i = 0; i < nv; i++)
+ {
+ if (!mv_is_resizable (&mv, v[i]->width))
+ {
+ msg (SE, _("Missing values provided are too long to assign "
+ "to variable of width %d."),
+ v[i]->width);
+ deferred_errors = true;
+ }
+ else
+ {
+ mv_copy (&v[i]->miss, &mv);
+ mv_resize (&v[i]->miss, v[i]->width);
+ }
+ }
+ }
lex_match ('/');
free (v);
+ v = NULL;
}
-
- return lex_end_of_command ();
-
-fail:
+ retval = lex_end_of_command ();
+
+ done:
free (v);
- return CMD_PART_SUCCESS_MAYBE;
+ if (deferred_errors)
+ retval = CMD_PART_SUCCESS_MAYBE;
+ return retval;
}
-static int
-parse_varnames (void)
+static bool
+parse_number (double *x, const struct fmt_spec *f)
{
- int i;
-
- if (!parse_variables (default_dict, &v, &nv, PV_SAME_TYPE))
- return 0;
- if (!lex_match ('('))
+ if (lex_is_number ())
{
- msg (SE, _("`(' expected after variable name%s."), nv > 1 ? "s" : "");
- return 0;
- }
-
- type = v[0]->type;
- if (type == NUMERIC)
- return 1;
-
- width = v[0]->width;
- for (i = 1; i < nv; i++)
- if (v[i]->type == ALPHA && v[i]->nv != 1)
- {
- msg (SE, _("Long string value specified."));
- return 0;
- }
- else if (v[i]->type == ALPHA && (int) width != v[i]->width)
- {
- msg (SE, _("Short strings must be of equal width."));
- return 0;
- }
-
- return 1;
-}
-
-/* Number or range? */
-enum
- {
- MV_NOR_NOTHING, /* Empty. */
- MV_NOR_NUMBER, /* Single number. */
- MV_NOR_RANGE /* Range. */
- };
-
-/* A single value or a range. */
-struct num_or_range
- {
- int type; /* One of NOR_*. */
- double d[2]; /* d[0]=lower bound or value, d[1]=upper bound. */
- };
-
-/* Parses something of the form <num>, or LO[WEST] THRU <num>, or
- <num> THRU HI[GHEST], or <num> THRU <num>, and sets the appropriate
- members of NOR. Returns success. */
-static int
-parse_num_or_range (struct num_or_range * nor)
-{
- if (lex_match_id ("LO") || lex_match_id ("LOWEST"))
- {
- nor->type = MV_NOR_RANGE;
- if (!lex_force_match_id ("THRU"))
- return 0;
- if (!lex_force_num ())
- return 0;
- nor->d[0] = LOWEST;
- nor->d[1] = tokval;
- }
- else if (lex_is_number ())
- {
- nor->d[0] = tokval;
+ *x = lex_number ();
lex_get ();
-
- if (lex_match_id ("THRU"))
- {
- nor->type = MV_NOR_RANGE;
- if (lex_match_id ("HI") || lex_match_id ("HIGHEST"))
- nor->d[1] = HIGHEST;
- else
- {
- if (!lex_force_num ())
- return 0;
- nor->d[1] = tokval;
- lex_get ();
-
- if (nor->d[0] > nor->d[1])
- {
- msg (SE, _("Range %g THRU %g is not valid because %g is "
- "greater than %g."),
- nor->d[0], nor->d[1], nor->d[0], nor->d[1]);
- return 0;
- }
- }
- }
- else
- nor->type = MV_NOR_NUMBER;
+ return true;
}
- else
- return -1;
-
- return 1;
-}
-
-/* Parses a set of numeric missing values and stores them into
- `missing[]' and `miss_type' global variables. */
-static int
-parse_numeric (void)
-{
- struct num_or_range set[3];
- int r;
-
- set[1].type = set[2].type = MV_NOR_NOTHING;
-
- /* Get first number or range. */
- r = parse_num_or_range (&set[0]);
- if (r < 1)
+ else if (token == T_STRING)
{
- if (r == -1)
- msg (SE, _("Number or range expected."));
- return 0;
- }
-
- /* Get second and third optional number or range. */
- lex_match (',');
- r = parse_num_or_range (&set[1]);
- if (r == 1)
- {
- lex_match (',');
- r = parse_num_or_range (&set[2]);
- }
- if (r == 0)
- return 0;
-
- /* Force range, if present, into set[0]. */
- if (set[1].type == MV_NOR_RANGE)
- {
- struct num_or_range t = set[1];
- set[1] = set[0];
- set[0] = t;
- }
- if (set[2].type == MV_NOR_RANGE)
- {
- struct num_or_range t = set[2];
- set[2] = set[0];
- set[0] = t;
- }
-
- /* Ensure there's not more than one range, or one range
- plus one value. */
- if (set[1].type == MV_NOR_RANGE || set[2].type == MV_NOR_RANGE)
- {
- msg (SE, _("At most one range can exist in the missing values "
- "for any one variable."));
- return 0;
- }
- if (set[0].type == MV_NOR_RANGE && set[2].type != MV_NOR_NOTHING)
- {
- msg (SE, _("At most one individual value can be missing along "
- "with one range."));
- return 0;
- }
-
- /* Set missing[] from set[]. */
- if (set[0].type == MV_NOR_RANGE)
- {
- int x = 0;
-
- if (set[0].d[0] == LOWEST)
- {
- miss_type = MISSING_LOW;
- missing[x++].f = set[0].d[1];
- }
- else if (set[0].d[1] == HIGHEST)
- {
- miss_type = MISSING_HIGH;
- missing[x++].f = set[0].d[0];
- }
- else
- {
- miss_type = MISSING_RANGE;
- missing[x++].f = set[0].d[0];
- missing[x++].f = set[0].d[1];
- }
-
- if (set[1].type == MV_NOR_NUMBER)
- {
- miss_type += 3;
- missing[x].f = set[1].d[0];
- }
- }
- else
- {
- if (set[0].type == MV_NOR_NUMBER)
- {
- miss_type = MISSING_1;
- missing[0].f = set[0].d[0];
- }
- if (set[1].type == MV_NOR_NUMBER)
- {
- miss_type = MISSING_2;
- missing[1].f = set[1].d[0];
- }
- if (set[2].type == MV_NOR_NUMBER)
- {
- miss_type = MISSING_3;
- missing[2].f = set[2].d[0];
- }
- }
-
- return 1;
-}
-
-static int
-parse_alpha (void)
-{
- for (miss_type = 0; token == T_STRING && miss_type < 3; miss_type++)
- {
- if (ds_length (&tokstr) != width)
- {
- msg (SE, _("String is not of proper length."));
- return 0;
- }
- strncpy (missing[miss_type].s, ds_c_str (&tokstr), MAX_SHORT_STRING);
+ struct data_in di;
+ union value v;
+ di.s = ds_data (&tokstr);
+ di.e = ds_end (&tokstr);
+ di.v = &v;
+ di.flags = 0;
+ di.f1 = 1;
+ di.f2 = ds_length (&tokstr);
+ di.format = *f;
+ data_in (&di);
lex_get ();
- lex_match (',');
+ *x = v.f;
+ return true;
}
- if (miss_type < 1)
+ else
{
- msg (SE, _("String expected."));
- return 0;
+ lex_error (_("expecting number or data string"));
+ return false;
}
-
- return 1;
}
-/* Copy the missing values from variable SRC to variable DEST. */
-void
-copy_missing_values (struct variable *dest, const struct variable *src)
-{
- static const int n_values[MISSING_COUNT] =
- {
- 0, 1, 2, 3, 2, 1, 1, 3, 2, 2,
- };
-
- assert (dest->width == src->width);
- assert (src->miss_type >= 0 && src->miss_type < MISSING_COUNT);
-
- {
- int i;
-
- dest->miss_type = src->miss_type;
- for (i = 0; i < n_values[src->miss_type]; i++)
- if (src->type == NUMERIC)
- dest->missing[i].f = src->missing[i].f;
- else
- memcpy (dest->missing[i].s, src->missing[i].s, src->width);
- }
-}
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@gnu.org>.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#include <config.h>
+#include "missing-values.h"
+#include <assert.h>
+#include <stdlib.h>
+#include "str.h"
+
+/* Initializes MV as a set of missing values for a variable of
+ the given WIDTH. Although only numeric variables and short
+ string variables may have missing values, WIDTH may be any
+ valid variable width. */
+void
+mv_init (struct missing_values *mv, int width)
+{
+ assert (width >= 0 && width <= MAX_STRING);
+ mv->type = MV_NONE;
+ mv->width = width;
+}
+
+/* Copies SRC to MV. */
+void
+mv_copy (struct missing_values *mv, const struct missing_values *src)
+{
+ *mv = *src;
+}
+
+/* Returns true if MV is an empty set of missing values. */
+bool
+mv_is_empty (const struct missing_values *mv)
+{
+ return mv->type == MV_NONE;
+}
+
+/* Returns the width of the missing values that MV may
+ contain. */
+int
+mv_get_width (const struct missing_values *mv)
+{
+ return mv->width;
+}
+
+/* Attempts to add individual value V to the set of missing
+ values MV. Returns true if successful, false if MV has no
+ more room for missing values. (Long string variables never
+ accept missing values.) */
+bool
+mv_add_value (struct missing_values *mv, const union value *v)
+{
+ if (mv->width > MAX_SHORT_STRING)
+ return false;
+ switch (mv->type)
+ {
+ case MV_NONE:
+ case MV_1:
+ case MV_2:
+ case MV_RANGE:
+ mv->values[mv->type & 3] = *v;
+ mv->type++;
+ return true;
+
+ case MV_3:
+ case MV_RANGE_1:
+ return false;
+ }
+ abort ();
+}
+
+/* Attempts to add S to the set of string missing values MV. S
+ must contain exactly as many characters as MV's width.
+ Returns true if successful, false if MV has no more room for
+ missing values. (Long string variables never accept missing
+ values.) */
+bool
+mv_add_str (struct missing_values *mv, const unsigned char s[])
+{
+ assert (mv->width > 0);
+ return mv_add_value (mv, (union value *) s);
+}
+
+/* Attempts to add D to the set of numeric missing values MV.
+ Returns true if successful, false if MV has no more room for
+ missing values. */
+bool
+mv_add_num (struct missing_values *mv, double d)
+{
+ assert (mv->width == 0);
+ return mv_add_value (mv, (union value *) &d);
+}
+
+/* Attempts to add range [LOW, HIGH] to the set of numeric
+ missing values MV. Returns true if successful, false if MV
+ has no room for a range. */
+bool
+mv_add_num_range (struct missing_values *mv, double low, double high)
+{
+ assert (mv->width == 0);
+ switch (mv->type)
+ {
+ case MV_NONE:
+ case MV_1:
+ mv->values[1].f = low;
+ mv->values[2].f = high;
+ mv->type |= 4;
+ return true;
+
+ case MV_2:
+ case MV_3:
+ case MV_RANGE:
+ case MV_RANGE_1:
+ return false;
+ }
+ abort ();
+}
+
+/* Returns true if MV contains an individual value,
+ false if MV is empty (or contains only a range). */
+bool
+mv_has_value (struct missing_values *mv)
+{
+ switch (mv->type)
+ {
+ case MV_1:
+ case MV_2:
+ case MV_3:
+ case MV_RANGE_1:
+ return true;
+
+ case MV_NONE:
+ case MV_RANGE:
+ return false;
+ }
+ abort ();
+}
+
+/* Removes one individual value from MV and stores it in *V.
+ MV must contain an individual value (as determined by
+ mv_has_value()). */
+void
+mv_pop_value (struct missing_values *mv, union value *v)
+{
+ assert (mv_has_value (mv));
+ mv->type--;
+ *v = mv->values[mv->type & 3];
+}
+
+/* Returns true if MV contains a numeric range,
+ false if MV is empty (or contains only individual values). */
+bool
+mv_has_range (struct missing_values *mv)
+{
+ switch (mv->type)
+ {
+ case MV_RANGE:
+ case MV_RANGE_1:
+ return true;
+
+ case MV_NONE:
+ case MV_1:
+ case MV_2:
+ case MV_3:
+ return false;
+ }
+ abort ();
+}
+
+/* Removes the numeric range from MV and stores it in *LOW and
+ *HIGH. MV must contain a individual range (as determined by
+ mv_has_range()). */
+void
+mv_pop_range (struct missing_values *mv, double *low, double *high)
+{
+ assert (mv_has_range (mv));
+ *low = mv->values[1].f;
+ *high = mv->values[2].f;
+ mv->type &= 3;
+}
+
+/* Returns true if values[IDX] is in use when the `type' member
+ is set to TYPE (in struct missing_values),
+ false otherwise. */
+static bool
+using_element (unsigned type, int idx)
+{
+ assert (idx >= 0 && idx < 3);
+
+ switch (type)
+ {
+ case MV_NONE:
+ return false;
+ case MV_1:
+ return idx < 1;
+ case MV_2:
+ return idx < 2;
+ case MV_3:
+ return true;
+ case MV_RANGE:
+ return idx > 0;
+ case MV_RANGE_1:
+ return true;
+ }
+ abort ();
+}
+
+/* Returns true if S contains only spaces between indexes
+ NEW_WIDTH (inclusive) and OLD_WIDTH (exclusive),
+ false otherwise. */
+static bool
+can_resize_string (const unsigned char *s, int old_width, int new_width)
+{
+ int i;
+
+ assert (new_width < old_width);
+ for (i = new_width; i < old_width; i++)
+ if (s[i] != ' ')
+ return false;
+ return true;
+}
+
+/* Returns true if MV can be resized to the given WIDTH with
+ mv_resize(), false otherwise. Resizing to the same width is
+ always possible. Resizing to a long string WIDTH is only
+ possible if MV is an empty set of missing values; otherwise,
+ resizing to a larger WIDTH is always possible. Resizing to a
+ shorter width is possible only when each missing value
+ contains only spaces in the characters that will be
+ trimmed. */
+bool
+mv_is_resizable (struct missing_values *mv, int width)
+{
+ assert ((width == 0) == (mv->width == 0));
+ if (width > MAX_SHORT_STRING && mv->type != MV_NONE)
+ return false;
+ else if (width >= mv->width)
+ return true;
+ else
+ {
+ int i;
+
+ for (i = 0; i < 3; i++)
+ if (using_element (mv->type, i)
+ && !can_resize_string (mv->values[i].s, mv->width, width))
+ return false;
+ return true;
+ }
+}
+
+/* Resizes MV to the given WIDTH. WIDTH must fit the constraints
+ explained for mv_is_resizable(). */
+void
+mv_resize (struct missing_values *mv, int width)
+{
+ assert (mv_is_resizable (mv, width));
+ if (width > mv->width)
+ {
+ int i;
+
+ for (i = 0; i < 3; i++)
+ memset (mv->values[i].s + mv->width, ' ', width - mv->width);
+ }
+ mv->width = width;
+}
+
+/* Returns true if V is system missing or a missing value in MV,
+ false otherwise. */
+bool
+mv_is_value_missing (const struct missing_values *mv, const union value *v)
+{
+ return (mv->width == 0
+ ? mv_is_num_missing (mv, v->f)
+ : mv_is_str_missing (mv, v->s));
+}
+
+/* Returns true if D is system missing or a missing value in MV,
+ false otherwise.
+ MV must be a set of numeric missing values. */
+bool
+mv_is_num_missing (const struct missing_values *mv, double d)
+{
+ assert (mv->width == 0);
+ return d == SYSMIS || mv_is_num_user_missing (mv, d);
+}
+
+/* Returns true if S[] is a missing value in MV, false otherwise.
+ MV must be a set of string missing values.
+ S[] must contain exactly as many characters as MV's width. */
+bool
+mv_is_str_missing (const struct missing_values *mv,
+ const unsigned char s[])
+{
+ return mv_is_str_user_missing (mv, s);
+}
+
+/* Returns true if V is a missing value in MV, false otherwise. */
+bool
+mv_is_value_user_missing (const struct missing_values *mv,
+ const union value *v)
+{
+ return (mv->width == 0
+ ? mv_is_num_user_missing (mv, v->f)
+ : mv_is_str_user_missing (mv, v->s));
+}
+
+/* Returns true if D is a missing value in MV, false otherwise.
+ MV must be a set of numeric missing values. */
+bool
+mv_is_num_user_missing (const struct missing_values *mv, double d)
+{
+ const union value *v = mv->values;
+ assert (mv->width == 0);
+ switch (mv->type)
+ {
+ case MV_NONE:
+ return false;
+ case MV_1:
+ return v[0].f == d;
+ case MV_2:
+ return v[0].f == d || v[1].f == d;
+ case MV_3:
+ return v[0].f == d || v[1].f == d || v[2].f == d;
+ case MV_RANGE:
+ return v[1].f <= d && d <= v[2].f;
+ case MV_RANGE_1:
+ return v[0].f == d || (v[1].f <= d && d <= v[2].f);
+ }
+ abort ();
+}
+
+/* Returns true if S[] is a missing value in MV, false otherwise.
+ MV must be a set of string missing values.
+ S[] must contain exactly as many characters as MV's width. */
+bool
+mv_is_str_user_missing (const struct missing_values *mv,
+ const unsigned char s[])
+{
+ const union value *v = mv->values;
+ assert (mv->width > 0);
+ switch (mv->type)
+ {
+ case MV_NONE:
+ return false;
+ case MV_1:
+ return !memcmp (v[0].s, s, mv->width);
+ case MV_2:
+ return (!memcmp (v[0].s, s, mv->width)
+ || !memcmp (v[1].s, s, mv->width));
+ case MV_3:
+ return (!memcmp (v[0].s, s, mv->width)
+ || !memcmp (v[1].s, s, mv->width)
+ || !memcmp (v[2].s, s, mv->width));
+ case MV_RANGE:
+ case MV_RANGE_1:
+ abort ();
+ }
+ abort ();
+}
+
+/* Returns true if MV is a set of numeric missing values and V is
+ the system missing value. */
+bool
+mv_is_value_system_missing (const struct missing_values *mv,
+ const union value *v)
+{
+ return mv->width == 0 ? v->f == SYSMIS : false;
+}
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2005 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@gnu.org>.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#if !missing_values_h
+#define missing_values_h 1
+
+#include <stdbool.h>
+#include "val.h"
+
+/* Types of user-missing values.
+ Invisible--use access functions defined below instead. */
+enum mv_type
+ {
+ MV_NONE = 0, /* No user-missing values. */
+ MV_1 = 1, /* One user-missing value. */
+ MV_2 = 2, /* Two user-missing values. */
+ MV_3 = 3, /* Three user-missing values. */
+ MV_RANGE = 4, /* A range of user-missing values. */
+ MV_RANGE_1 = 5 /* A range plus an individual value. */
+ };
+
+/* Missing values.
+ Opaque--use access functions defined below. */
+struct missing_values
+ {
+ unsigned type; /* Number and type of missing values. */
+ int width; /* 0=numeric, otherwise string width. */
+ union value values[3]; /* Missing values. [y,z] are the range. */
+ };
+
+void mv_init (struct missing_values *, int width);
+void mv_copy (struct missing_values *, const struct missing_values *);
+bool mv_is_empty (const struct missing_values *);
+int mv_get_width (const struct missing_values *);
+
+bool mv_add_value (struct missing_values *, const union value *);
+bool mv_add_str (struct missing_values *, const unsigned char[]);
+bool mv_add_num (struct missing_values *, double);
+bool mv_add_num_range (struct missing_values *, double low, double high);
+
+bool mv_has_value (struct missing_values *);
+void mv_pop_value (struct missing_values *, union value *);
+bool mv_has_range (struct missing_values *);
+void mv_pop_range (struct missing_values *, double *low, double *high);
+
+bool mv_is_resizable (struct missing_values *, int width);
+void mv_resize (struct missing_values *, int width);
+
+typedef bool is_missing_func (const struct missing_values *,
+ const union value *);
+
+/* Is a value system or user missing? */
+bool mv_is_value_missing (const struct missing_values *, const union value *);
+bool mv_is_num_missing (const struct missing_values *, double);
+bool mv_is_str_missing (const struct missing_values *, const unsigned char[]);
+
+/* Is a value user missing? */
+bool mv_is_value_user_missing (const struct missing_values *,
+ const union value *);
+bool mv_is_num_user_missing (const struct missing_values *, double);
+bool mv_is_str_user_missing (const struct missing_values *,
+ const unsigned char[]);
+
+/* Is a value system missing? */
+bool mv_is_value_system_missing (const struct missing_values *,
+ const union value *);
+
+#endif /* missing-values.h */
/* Function to use for testing for missing values */
-static is_missing_func value_is_missing;
+static is_missing_func *value_is_missing;
static void run_oneway(const struct casefile *cf, void *_mode);
/* If /MISSING=INCLUDE is set, then user missing values are ignored */
if (cmd.incl == ONEWAY_INCLUDE )
- value_is_missing = is_system_missing;
+ value_is_missing = mv_is_value_system_missing;
else
- value_is_missing = is_missing;
+ value_is_missing = mv_is_value_missing;
/* What statistics were requested */
if ( cmd.sbc_statistics )
const union value *indep_val = case_data (&c, indep_var->fv);
/* Deal with missing values */
- if ( value_is_missing(indep_val,indep_var) )
+ if ( value_is_missing(&indep_var->miss, indep_val) )
continue;
/* Skip the entire case if /MISSING=LISTWISE is set */
const struct variable *v = vars[i];
const union value *val = case_data (&c, v->fv);
- if (value_is_missing(val,v) )
+ if (value_is_missing(&v->miss, val) )
break;
}
if ( i != n_vars )
hsh_insert ( group_hash, (void *) gs );
}
- if (! value_is_missing(val,v) )
+ if (! value_is_missing(&v->miss, val) )
{
struct group_statistics *totals = &gp->ugs;
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA. */
+#include <config.h>
#include "factor_stats.h"
#include "percentiles.h"
#include "misc.h"
convert_format (r, &fmt[3], &v->write, v);
/* Range missing values. */
- if (match (r, 'B'))
- {
- v->miss_type = MISSING_RANGE;
- v->missing[0] = parse_value (r, v);
- v->missing[1] = parse_value (r, v);
- }
+ if (match (r, 'B'))
+ {
+ double x = read_float (r);
+ double y = read_float (r);
+ mv_add_num_range (&v->miss, x, y);
+ }
else if (match (r, 'A'))
- {
- v->miss_type = MISSING_HIGH;
- v->missing[0] = parse_value (r, v);
- }
+ mv_add_num_range (&v->miss, read_float (r), HIGHEST);
else if (match (r, '9'))
- {
- v->miss_type = MISSING_LOW;
- v->missing[0] = parse_value (r, v);
- }
+ mv_add_num_range (&v->miss, LOWEST, read_float (r));
/* Single missing values. */
- while (match (r, '8'))
- {
- static const int map_next[MISSING_COUNT] =
- {
- MISSING_1, MISSING_2, MISSING_3, -1,
- MISSING_RANGE_1, MISSING_LOW_1, MISSING_HIGH_1,
- -1, -1, -1,
- };
-
- static const int map_ofs[MISSING_COUNT] =
- {
- -1, 0, 1, 2, -1, -1, -1, 2, 1, 1,
- };
-
- v->miss_type = map_next[v->miss_type];
- if (v->miss_type == -1)
- error (r, _("Bad missing values for %s."), v->name);
-
- assert (map_ofs[v->miss_type] != -1);
- v->missing[map_ofs[v->miss_type]] = parse_value (r, v);
- }
+ while (match (r, '8'))
+ {
+ union value value = parse_value (r, v);
+ mv_add_value (&v->miss, &value);
+ }
if (match (r, 'C'))
{
for (i = 0; i < dict_get_var_cnt (dict); i++)
{
- static const char *miss_types[MISSING_COUNT] =
- {
- "", "8", "88", "888", "B ", "9", "A", "B 8", "98", "A8",
- };
-
- const char *m;
- int j;
-
struct variable *v = dict_get_var (dict, i);
+ struct missing_values mv;
if (!buf_write (w, "7", 1) || !write_int (w, v->width)
|| !write_string (w, v->short_name)
|| !write_format (w, &v->print) || !write_format (w, &v->write))
return 0;
- for (m = miss_types[v->miss_type], j = 0; j < (int) strlen (m); j++)
- if ((m[j] != ' ' && !buf_write (w, &m[j], 1))
- || !write_value (w, &v->missing[j], v))
- return 0;
+ /* Write missing values. */
+ mv_copy (&mv, &v->miss);
+ while (mv_has_range (&mv))
+ {
+ double x, y;
+ mv_pop_range (&mv, &x, &y);
+ if (x == LOWEST)
+ {
+ if (!buf_write (w, "9", 1) || !write_float (w, y))
+ return 0;
+ }
+ else if (y == HIGHEST)
+ {
+ if (!buf_write (w, "A", 1) || !write_float (w, y))
+ return 0;
+ }
+ else {
+ if (!buf_write (w, "B", 1) || !write_float (w, x)
+ || !write_float (w, y))
+ return 0;
+ }
+ }
+ while (mv_has_value (&mv))
+ {
+ union value value;
+ mv_pop_value (&mv, &value);
+ if (!buf_write (w, "8", 1) || !write_value (w, &value, v))
+ return 0;
+ }
if (v->label && (!buf_write (w, "C", 1) || !write_string (w, v->label)))
return 0;
case RCD_END:
return NULL;
case RCD_USER:
- if (is_num_user_missing (cmp, v->src))
+ if (mv_is_num_user_missing (&v->src->miss, cmp))
return cp;
break;
case RCD_SINGLE:
if (sv.n_missing_values != 0)
{
flt64 mv[3];
+ int mv_cnt = abs (sv.n_missing_values);
if (vv->width > MAX_SHORT_STRING)
lose ((ME, _("%s: Long string variable %s may not have missing "
"values."),
handle_get_filename (r->fh), vv->name));
- assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0);
+ assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
if (r->reverse_endian && vv->type == NUMERIC)
- for (j = 0; j < abs (sv.n_missing_values); j++)
+ for (j = 0; j < mv_cnt; j++)
bswap_flt64 (&mv[j]);
if (sv.n_missing_values > 0)
{
- vv->miss_type = sv.n_missing_values;
- if (vv->type == NUMERIC)
- for (j = 0; j < sv.n_missing_values; j++)
- vv->missing[j].f = mv[j];
- else
- for (j = 0; j < sv.n_missing_values; j++)
- memcpy (vv->missing[j].s, &mv[j], vv->width);
+ for (j = 0; j < sv.n_missing_values; j++)
+ if (vv->type == NUMERIC)
+ mv_add_num (&vv->miss, mv[j]);
+ else
+ mv_add_str (&vv->miss, (unsigned char *) &mv[j]);
}
else
{
- int x = 0;
-
if (vv->type == ALPHA)
lose ((ME, _("%s: String variable %s may not have missing "
"values specified as a range."),
handle_get_filename (r->fh), vv->name));
if (mv[0] == r->lowest)
- {
- vv->miss_type = MISSING_LOW;
- vv->missing[x++].f = mv[1];
- }
+ mv_add_num_range (&vv->miss, LOWEST, mv[1]);
else if (mv[1] == r->highest)
- {
- vv->miss_type = MISSING_HIGH;
- vv->missing[x++].f = mv[0];
- }
+ mv_add_num_range (&vv->miss, mv[0], HIGHEST);
else
- {
- vv->miss_type = MISSING_RANGE;
- vv->missing[x++].f = mv[0];
- vv->missing[x++].f = mv[1];
- }
+ mv_add_num_range (&vv->miss, mv[0], mv[1]);
if (sv.n_missing_values == -3)
- {
- vv->miss_type += 3;
- vv->missing[x++].f = mv[2];
- }
+ mv_add_num (&vv->miss, mv[2]);
}
}
- else
- vv->miss_type = MISSING_NONE;
if (!parse_format_spec (r, sv.print, &vv->print, vv)
|| !parse_format_spec (r, sv.write, &vv->write, vv))
struct sysfile_variable sv;
/* Missing values. */
+ struct missing_values mv;
flt64 m[3]; /* Missing value values. */
int nm; /* Number of missing values, possibly negative. */
sv.type = v->width;
sv.has_var_label = (v->label != NULL);
- switch (v->miss_type)
+ mv_copy (&mv, &v->miss);
+ nm = 0;
+ if (mv_has_range (&mv))
{
- case MISSING_NONE:
- nm = 0;
- break;
- case MISSING_1:
- case MISSING_2:
- case MISSING_3:
- for (nm = 0; nm < v->miss_type; nm++)
- m[nm] = v->missing[nm].f;
- break;
- case MISSING_RANGE:
- m[0] = v->missing[0].f;
- m[1] = v->missing[1].f;
- nm = -2;
- break;
- case MISSING_LOW:
- m[0] = second_lowest_flt64;
- m[1] = v->missing[0].f;
- nm = -2;
- break;
- case MISSING_HIGH:
- m[0] = v->missing[0].f;
- m[1] = FLT64_MAX;
- nm = -2;
- break;
- case MISSING_RANGE_1:
- m[0] = v->missing[0].f;
- m[1] = v->missing[1].f;
- m[2] = v->missing[2].f;
- nm = -3;
- break;
- case MISSING_LOW_1:
- m[0] = second_lowest_flt64;
- m[1] = v->missing[0].f;
- m[2] = v->missing[1].f;
- nm = -3;
- break;
- case MISSING_HIGH_1:
- m[0] = v->missing[0].f;
- m[1] = second_lowest_flt64;
- m[2] = v->missing[1].f;
- nm = -3;
- break;
- default:
- assert (0);
- abort ();
+ double x, y;
+ mv_pop_range (&mv, &x, &y);
+ m[nm++] = x == LOWEST ? second_lowest_flt64 : x;
+ m[nm++] = y == HIGHEST ? FLT64_MAX : y;
}
+ while (mv_has_value (&mv))
+ {
+ union value value;
+ mv_pop_value (&mv, &value);
+ if (v->type == NUMERIC)
+ m[nm] = value.f;
+ else
+ buf_copy_rpad ((char *) &m[nm], sizeof m[nm], value.s, v->width);
+ nm++;
+ }
+ if (mv_has_range (&v->miss))
+ nm = -nm;
sv.n_missing_values = nm;
write_format_spec (&v->print, &sv.print);
return 0;
}
- if (nm && !buf_write (w, m, sizeof *m * nm))
+ if (nm && !buf_write (w, m, sizeof *m * abs (nm)))
return 0;
if (v->type == ALPHA && v->width > (int) sizeof (flt64))
#endif
/* Figure out SYSMIS value for flt64. */
+#include "magic.h"
#if SIZEOF_DOUBLE == 8
#define second_lowest_flt64 second_lowest_value
#else
}
/* Missing values if any. */
- if (v->miss_type != MISSING_NONE)
+ if (!mv_is_empty (&v->miss))
{
- char buf[80];
- char *cp = stpcpy (buf, _("Missing Values: "));
-
- if (v->type == NUMERIC)
- switch (v->miss_type)
- {
- case MISSING_1:
- sprintf (cp, "%g", v->missing[0].f);
- break;
- case MISSING_2:
- sprintf (cp, "%g; %g", v->missing[0].f, v->missing[1].f);
- break;
- case MISSING_3:
- sprintf (cp, "%g; %g; %g", v->missing[0].f,
- v->missing[1].f, v->missing[2].f);
- break;
- case MISSING_RANGE:
- sprintf (cp, "%g THRU %g", v->missing[0].f, v->missing[1].f);
- break;
- case MISSING_LOW:
- sprintf (cp, "LOWEST THRU %g", v->missing[0].f);
- break;
- case MISSING_HIGH:
- sprintf (cp, "%g THRU HIGHEST", v->missing[0].f);
- break;
- case MISSING_RANGE_1:
- sprintf (cp, "%g THRU %g; %g",
- v->missing[0].f, v->missing[1].f, v->missing[2].f);
- break;
- case MISSING_LOW_1:
- sprintf (cp, "LOWEST THRU %g; %g",
- v->missing[0].f, v->missing[1].f);
- break;
- case MISSING_HIGH_1:
- sprintf (cp, "%g THRU HIGHEST; %g",
- v->missing[0].f, v->missing[1].f);
- break;
- default:
- assert (0);
- }
- else
- {
- int i;
-
- for (i = 0; i < v->miss_type; i++)
- {
- if (i != 0)
- cp = stpcpy (cp, "; ");
- *cp++ = '"';
- memcpy (cp, v->missing[i].s, v->width);
+ char buf[128];
+ char *cp;
+ struct missing_values mv;
+ int cnt = 0;
+
+ cp = stpcpy (buf, _("Missing Values: "));
+ mv_copy (&mv, &v->miss);
+ if (mv_has_range (&mv))
+ {
+ double x, y;
+ mv_pop_range (&mv, &x, &y);
+ if (x == LOWEST)
+ cp += nsprintf (cp, "LOWEST THRU %g", y);
+ else if (y == HIGHEST)
+ cp += nsprintf (cp, "%g THRU HIGHEST", x);
+ else
+ cp += nsprintf (cp, "%g THRU %g", x, y);
+ cnt++;
+ }
+ while (mv_has_value (&mv))
+ {
+ union value value;
+ mv_pop_value (&mv, &value);
+ if (cnt++ > 0)
+ cp += nsprintf (cp, "; ");
+ if (v->type == NUMERIC)
+ cp += nsprintf (cp, "%g", value.f);
+ else
+ {
+ *cp++ = '"';
+ memcpy (cp, value.s, v->width);
cp += v->width;
*cp++ = '"';
- }
- *cp = 0;
- }
+ *cp = '\0';
+ }
+ }
tab_joint_text (t, 1, r, 2, r, TAB_LEFT, buf);
r++;
/* Function to use for testing for missing values */
-static is_missing_func value_is_missing;
+static is_missing_func *value_is_missing;
/* Variable for the GROUPS subcommand, if given. */
static struct variable *indep_var;
/* If /MISSING=INCLUDE is set, then user missing values are ignored */
if (cmd.incl == TTS_INCLUDE )
- value_is_missing = is_system_missing;
+ value_is_missing = mv_is_value_system_missing;
else
- value_is_missing = is_missing;
+ value_is_missing = mv_is_value_missing;
bad_weight_warn = 1;
struct variable *v = cmd->v_variables[i];
const union value *val = case_data (c, v->fv);
- if (value_is_missing(val,v) )
+ if (value_is_missing(&v->miss, val) )
{
return 0;
}
if ( cmd->sbc_groups )
{
const union value *gv = case_data (c, indep_var->fv);
- if ( value_is_missing(gv,indep_var) )
+ if ( value_is_missing(&indep_var->miss, gv) )
{
return 0;
}
gs= &group_proc_get (cmd->v_variables[i])->ugs;
- if (! value_is_missing(val,v) )
+ if (! value_is_missing(&v->miss, val) )
{
gs->n+=weight;
gs->sum+=weight * val->f;
struct variable *v = cmd->v_variables[i];
const union value *val = case_data (c, v->fv);
- if (value_is_missing(val,v) )
+ if (value_is_missing(&v->miss, val) )
{
return 0;
}
gs= &group_proc_get (cmd->v_variables[i])->ugs;
- if ( ! value_is_missing(val,v))
+ if ( ! value_is_missing(&v->miss, val))
gs->sum_diff += weight * (val->f - cmd->n_testval[0]);
}
const union value *val0 = case_data (c, v0->fv);
const union value *val1 = case_data (c, v1->fv);
- if ( value_is_missing(val0,v0) ||
- value_is_missing(val1,v1) )
+ if ( value_is_missing(&v0->miss, val0) ||
+ value_is_missing(&v1->miss, val1) )
{
return 0;
}
const union value *val0 = case_data (c, v0->fv);
const union value *val1 = case_data (c, v1->fv);
- if ( ( !value_is_missing(val0,v0) && !value_is_missing(val1,v1) ) )
+ if ( ( !value_is_missing(&v0->miss, val0)
+ && !value_is_missing(&v1->miss, val1) ) )
{
pairs[i].n += weight;
pairs[i].sum[0] += weight * val0->f;
const double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn);
- if ( value_is_missing(gv,indep_var) )
+ if ( value_is_missing(&indep_var->miss, gv) )
{
return 0;
}
struct variable *v = cmd->v_variables[i];
const union value *val = case_data (c, v->fv);
- if (value_is_missing(val,v) )
+ if (value_is_missing(&v->miss, val) )
{
return 0;
}
if ( ! gs )
return 0;
- if ( !value_is_missing(val,var) )
+ if ( !value_is_missing(&var->miss, val) )
{
gs->n+=weight;
gs->sum+=weight * val->f;
#define val_h 1
#include <float.h>
-#include "config.h"
+#include "magic.h"
/* Values. */
#include "config.h"
#include <stdbool.h>
#include "format.h"
+#include "missing-values.h"
#include "val.h"
-
-
/* Script variables. */
/* Variable type. */
(STRING is pre-empted by lexer.h.) */
};
-/* Types of missing values. Order is significant, see
- mis-val.c:parse_numeric(), sfm-read.c, sfm-write.c,
- sysfile-info.c:cmd_sysfile_info(), mis-val.c:copy_missing_values(),
- pfm-read.c:read_variables(), pfm-write.c:write_variables(),
- apply-dict.c:cmd_apply_dictionary(), and more (?). */
-enum
- {
- MISSING_NONE, /* No user-missing values. */
- MISSING_1, /* One user-missing value. */
- MISSING_2, /* Two user-missing values. */
- MISSING_3, /* Three user-missing values. */
- MISSING_RANGE, /* [a,b]. */
- MISSING_LOW, /* (-inf,a]. */
- MISSING_HIGH, /* (a,+inf]. */
- MISSING_RANGE_1, /* [a,b], c. */
- MISSING_LOW_1, /* (-inf,a], b. */
- MISSING_HIGH_1, /* (a,+inf), b. */
- MISSING_COUNT
- };
-
-
/* A variable's dictionary entry. */
struct variable
{
int index; /* Dictionary index. */
/* Missing values. */
- int miss_type; /* One of the MISSING_* constants. */
- union value missing[3]; /* User-missing value. */
+ struct missing_values miss; /* Missing values. */
/* Display formats. */
struct fmt_spec print; /* Default format for PRINT. */
void cancel_temporary (void);
\f
-/* Functions. */
-
struct ccase;
void dump_split_vars (const struct ccase *);
-typedef int (* is_missing_func )(const union value *, const struct variable *);
-
-int is_num_user_missing (double, const struct variable *);
-int is_str_user_missing (const unsigned char[], const struct variable *);
-int is_missing (const union value *, const struct variable *);
-int is_system_missing (const union value *, const struct variable *);
-int is_user_missing (const union value *, const struct variable *);
-void copy_missing_values (struct variable *dest, const struct variable *src);
\f
/* Transformations. */
pgm_state = STATE_INIT;
}
-
-/* Return nonzero only if X is a user-missing value for numeric
- variable V. */
-inline int
-is_num_user_missing (double x, const struct variable *v)
-{
- switch (v->miss_type)
- {
- case MISSING_NONE:
- return 0;
- case MISSING_1:
- return x == v->missing[0].f;
- case MISSING_2:
- return x == v->missing[0].f || x == v->missing[1].f;
- case MISSING_3:
- return (x == v->missing[0].f || x == v->missing[1].f
- || x == v->missing[2].f);
- case MISSING_RANGE:
- return x >= v->missing[0].f && x <= v->missing[1].f;
- case MISSING_LOW:
- return x <= v->missing[0].f;
- case MISSING_HIGH:
- return x >= v->missing[0].f;
- case MISSING_RANGE_1:
- return ((x >= v->missing[0].f && x <= v->missing[1].f)
- || x == v->missing[2].f);
- case MISSING_LOW_1:
- return x <= v->missing[0].f || x == v->missing[1].f;
- case MISSING_HIGH_1:
- return x >= v->missing[0].f || x == v->missing[1].f;
- default:
- assert (0);
- }
- abort ();
-}
-
-/* Return nonzero only if string S is a user-missing variable for
- string variable V. */
-inline int
-is_str_user_missing (const unsigned char s[], const struct variable *v)
-{
- /* FIXME: should these be memcmp()? */
- switch (v->miss_type)
- {
- case MISSING_NONE:
- return 0;
- case MISSING_1:
- return !strncmp (s, v->missing[0].s, v->width);
- case MISSING_2:
- return (!strncmp (s, v->missing[0].s, v->width)
- || !strncmp (s, v->missing[1].s, v->width));
- case MISSING_3:
- return (!strncmp (s, v->missing[0].s, v->width)
- || !strncmp (s, v->missing[1].s, v->width)
- || !strncmp (s, v->missing[2].s, v->width));
- default:
- assert (0);
- }
- abort ();
-}
-
-/* Return nonzero only if value VAL is system-missing for variable
- V. */
-int
-is_system_missing (const union value *val, const struct variable *v)
-{
- return v->type == NUMERIC && val->f == SYSMIS;
-}
-
-/* Return nonzero only if value VAL is system- or user-missing for
- variable V. */
-int
-is_missing (const union value *val, const struct variable *v)
-{
- switch (v->type)
- {
- case NUMERIC:
- if (val->f == SYSMIS)
- return 1;
- return is_num_user_missing (val->f, v);
- case ALPHA:
- return is_str_user_missing (val->s, v);
- default:
- assert (0);
- }
- abort ();
-}
-
-/* Return nonzero only if value VAL is user-missing for variable V. */
-int
-is_user_missing (const union value *val, const struct variable *v)
-{
- switch (v->type)
- {
- case NUMERIC:
- return is_num_user_missing (val->f, v);
- case ALPHA:
- return is_str_user_missing (val->s, v);
- default:
- assert (0);
- }
- abort ();
-}
\f
/* Returns true if NAME is an acceptable name for a variable,
false otherwise. If ISSUE_ERROR is true, issues an
if (filter_var != NULL)
{
double f = case_num (c, filter_var->fv);
- if (f == 0.0 || f == SYSMIS || is_num_user_missing (f, filter_var))
+ if (f == 0.0 || mv_is_num_missing (&filter_var->miss, f))
return 1;
}
+Sat Aug 6 17:32:39 2005 Ben Pfaff <blp@gnu.org>
+
+ * command/missing-values.sh: New test.
+
+ * Makefile.am: Add new test.
+
Mon Aug 1 21:51:46 2005 Ben Pfaff <blp@gnu.org>
* bugs/big-input-2.sh: Don't use 1...100000 (etc.) with Perl
command/loop.sh \
command/longvars.sh \
command/match-files.sh \
+ command/missing-values.sh \
command/no_case_size.sh \
command/oneway.sh \
command/oneway-missing.sh \
--- /dev/null
+#!/bin/sh
+
+# This program tests MISSING VALUES
+
+TEMPDIR=/tmp/pspp-tst-$$
+TESTFILE=$TEMPDIR/`basename $0`.sps
+
+here=`pwd`;
+
+# ensure that top_srcdir is absolute
+cd $top_srcdir; top_srcdir=`pwd`
+
+STAT_CONFIG_PATH=$top_srcdir/config
+export STAT_CONFIG_PATH
+
+
+cleanup()
+{
+ cd /
+ rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+ echo $activity
+ echo FAILED
+ cleanup;
+ exit 1;
+}
+
+
+no_result()
+{
+ echo $activity
+ echo NO RESULT;
+ cleanup;
+ exit 2;
+}
+
+pass()
+{
+ cleanup;
+ exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+# Copy this file --- it's shared with another test
+activity="create data"
+cp $top_srcdir/tests/data-list.data $TEMPDIR
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+activity="create program"
+cat > $TEMPDIR/missing-values.stat << foobar
+DATA LIST NOTABLE/str1 1-5 (A) str2 6-8 (A) date1 9-19 (DATE) num1 20-25.
+
+/* Valid: numeric missing values.
+MISSING VALUES date1 num1 (1).
+MISSING VALUES date1 num1 (1, 2).
+MISSING VALUES date1 num1 (1, 2, 3).
+
+/* Valid: numeric missing values using the first variable's format.
+MISSING VALUES num1 date1 ('1').
+MISSING VALUES num1 date1 ('1', '2').
+MISSING VALUES num1 date1 ('1', '2', '3').
+MISSING VALUES date1 num1 ('06-AUG-05').
+MISSING VALUES date1 num1 ('06-AUG-05', '01-OCT-78').
+MISSING VALUES date1 num1 ('06-AUG-05', '01-OCT-78', '14-FEB-81').
+
+/* Valid: ranges of numeric missing values.
+MISSING VALUES num1 (1 THRU 2).
+MISSING VALUES num1 (LO THRU 2).
+MISSING VALUES num1 (LOWEST THRU 2).
+MISSING VALUES num1 (1 THRU HI).
+MISSING VALUES num1 (1 THRU HIGHEST).
+
+/* Valid: a range of numeric missing values, plus an individual value.
+MISSING VALUES num1 (1 THRU 2, 3).
+MISSING VALUES num1 (LO THRU 2, 3).
+MISSING VALUES num1 (LOWEST THRU 2, 3).
+MISSING VALUES num1 (1 THRU HI, -1).
+MISSING VALUES num1 (1 THRU HIGHEST, -1).
+
+/* Valid: string missing values.
+MISSING VALUES str1 str2 ('abc ','def').
+
+/* Invalid: too long for str2.
+MISSING VALUES str1 str2 ('abcde').
+
+/* Invalid: no string ranges.
+MISSING VALUES str1 ('a' THRU 'z').
+
+/* Invalid: mixing string and numeric variables.
+MISSING VALUES str1 num1 ('123').
+
+/* Valid: may mix variable types when clearing missing values.
+MISSING VALUES ALL ().
+
+foobar
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+activity="run program"
+$SUPERVISOR $here/../src/pspp --testing-mode -o raw-ascii --testing-mode $TEMPDIR/missing-values.stat > $TEMPDIR/errs
+# Note vv --- there are errors in input. Therefore, the command must FAIL
+if [ $? -eq 0 ] ; then fail ; fi
+
+activity="compare error messages"
+diff -w $TEMPDIR/errs - <<EOF
+$TEMPDIR/missing-values.stat:34: error: MISSING VALUES: Missing values provided are too long to assign to variable of width 3.
+$TEMPDIR/missing-values.stat:34: warning: Skipping the rest of this command. Part of this command may have been executed.
+$TEMPDIR/missing-values.stat:37: error: MISSING VALUES: Syntax error expecting string at \`THRU'.
+$TEMPDIR/missing-values.stat:37: error: MISSING VALUES: THRU is not a variable name.
+$TEMPDIR/missing-values.stat:37: warning: Skipping the rest of this command. Part of this command may have been executed.
+$TEMPDIR/missing-values.stat:40: error: MISSING VALUES: Cannot mix numeric variables (e.g. num1) and string variables (e.g. str1) within a single list.
+$TEMPDIR/missing-values.stat:40: warning: Skipping the rest of this command. Part of this command may have been executed.
+EOF
+if [ $? -ne 0 ] ; then fail ; fi
+
+
+pass;