From: Ben Pfaff Date: Sun, 7 Aug 2005 04:39:27 +0000 (+0000) Subject: Clean up treatment of missing values by moving all the code into X-Git-Tag: sav-api~2235 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=933b760efccdfa26f14254f1fae002ea3b0a1495;p=pspp Clean up treatment of missing values by moving all the code into one place. All references to the missing value function were updated, but only major changes are detailed below. --- diff --git a/src/ChangeLog b/src/ChangeLog index f034dd45fd..fb536fd127 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,60 @@ +Sat Aug 6 21:29:15 2005 Ben Pfaff + + * factor_stats.c: Needed included earlier. + + * percentiles.c: Needed to include . + + * val.h: Don't include "config.h". + +Sat Aug 6 21:26:27 2005 Ben Pfaff + + Clean up treatment of missing values by moving all the code into + one place. All references to the missing value function were + updated, but only major changes are detailed below. + + * Makefile.am: Add missing-values.c, missing-values.h to sources. + + * apply-dict.c: (cmd_apply_dictionary) Use mv_resize(). + + * dictionary.c: (dict_create_var) Initialize `miss' member with + mv_init(). + (dict_clone_var) Copy `miss' member with mv_copy(). + + * get.c: (mtf_merge_dictionary) Use mv_copy(). + + * missing-values.c: New file. + + * missing-values.h: New file. + + * mis-val.c: Rewrite. New version implements updated semantics. + + * pfm-read.c: (read_variables) Rewrite missing value handling. + + * pfm-write.c: (write_variables) Rewrite missing value handling. + + * sfm-read.c: (read_variables) Rewrite missing value handling. + + * sfm-write.c: (write_variable) Rewrite missing value handling. + + * sfmP.h: Include "magic.h" to get definition of + second_lowest_value. + + * sysfile-info.c: (describe_variable) Rewrite missing value + handling. + + * val.h: Include "magic.h" to get definition of + second_lowest_value. + + * var.h: Include "missing-values.h". Drop MISSING_* enums. + (struct variable) Remove `miss_type', `missing'. Add `miss'. + + * vars-atr.c: (is_num_user_missing) Removed--use + mv_is_num_user_missing(). + (is_str_user_missing) Removed--use mv_is_str_user_missing(). + (is_system_missing) Removed--use mv_is_value_system_missing(). + (is_missing) Removed--use mv_is_value_missing(). + (is_user_missing) Removed--use mv_is_value_user_missing(). + Sun Jul 31 14:09:57 2005 Ben Pfaff Adopt use of gnulib for portability. diff --git a/src/Makefile.am b/src/Makefile.am index dcb6d078f5..b6562c1665 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -41,37 +41,31 @@ else chart_sources = dummy-chart.c endif -pspp_SOURCES = $(q_sources_c) $(chart_sources) \ -aggregate.c algorithm.c algorithm.h \ -alloc.c alloc.h apply-dict.c ascii.c autorecode.c bitvector.h \ -calendar.c calendar.h case.c case.h casefile.c casefile.h chart.c \ -chart.h cmdline.c cmdline.h command.c command.def \ -command.h compute.c copyleft.c copyleft.h count.c data-in.c data-in.h \ +pspp_SOURCES = $(q_sources_c) $(chart_sources) aggregate.c algorithm.c \ +algorithm.h alloc.c alloc.h apply-dict.c ascii.c autorecode.c \ +bitvector.h calendar.c calendar.h case.c case.h casefile.c casefile.h \ +chart.c chart.h cmdline.c cmdline.h command.c command.def command.h \ +compute.c copyleft.c copyleft.h count.c data-in.c data-in.h \ data-list.c data-list.h data-out.c date.c debug-print.h descript.c \ -devind.c devind.h dfm-read.c dfm-read.h dfm-write.c dfm-write.h \ -dictionary.c dictionary.h do-if.c do-ifP.h echo.c error.c \ -error.h factor_stats.c factor_stats.h file-handle.h \ -file-type.c filename.c filename.h flip.c font.h format.c format.def \ -format.h formats.c get.c getl.c getl.h glob.c glob.h \ -groff-font.c group.c group.h group_proc.h \ -hash.c hash.h histogram.c histogram.h \ -html.c htmlP.h include.c inpt-pgm.c lexer.c lexer.h levene.c levene.h \ -linked-list.c linked-list.h log.h loop.c magic.c magic.h main.c main.h \ -matrix-data.c mis-val.c misc.c misc.h modify-vars.c \ -moments.c moments.h numeric.c output.c output.h \ -percentiles.c percentiles.h permissions.c \ -pfm-read.c pfm-read.h \ -pfm-write.c pfm-write.h \ -pool.c pool.h postscript.c print.c recode.c \ +devind.c devind.h dfm-read.c dfm-read.h dfm-write.c dfm-write.h \ +dictionary.c dictionary.h do-if.c do-ifP.h echo.c error.c error.h \ +factor_stats.c factor_stats.h file-handle.h file-type.c filename.c \ +filename.h flip.c font.h format.c format.def format.h formats.c get.c \ +getl.c getl.h glob.c glob.h groff-font.c group.c group.h group_proc.h \ +hash.c hash.h histogram.c histogram.h html.c htmlP.h include.c \ +inpt-pgm.c lexer.c lexer.h levene.c levene.h linked-list.c \ +linked-list.h log.h loop.c magic.c magic.h main.c main.h matrix-data.c \ +mis-val.c misc.c misc.h missing-values.c missing-values.h \ +modify-vars.c moments.c moments.h numeric.c output.c output.h \ +percentiles.c percentiles.h permissions.c pfm-read.c pfm-read.h \ +pfm-write.c pfm-write.h pool.c pool.h postscript.c print.c recode.c \ rename-vars.c repeat.c repeat.h sample.c sel-if.c settings.h \ sfm-read.c sfm-read.h sfm-write.c sfm-write.h sfmP.h som.c som.h \ -sort.c sort.h sort-prs.c sort-prs.h \ -split-file.c str.c str.h subclist.c subclist.h \ -sysfile-info.c tab.c tab.h temporary.c mkfile.c mkfile.h \ -title.c val.h val-labs.c value-labels.c value-labels.h \ -var-display.c \ -var-labs.c var.h vars-atr.c vars-prs.c vector.c version.h \ -vfm.c vfm.h vfmP.h weight.c +sort.c sort.h sort-prs.c sort-prs.h split-file.c str.c str.h \ +subclist.c subclist.h sysfile-info.c tab.c tab.h temporary.c mkfile.c \ +mkfile.h title.c val.h val-labs.c value-labels.c value-labels.h \ +var-display.c var-labs.c var.h vars-atr.c vars-prs.c vector.c \ +version.h vfm.c vfm.h vfmP.h weight.c pspp_LDADD = \ diff --git a/src/aggregate.c b/src/aggregate.c index 2c1495f0e2..bc554b97bf 100644 --- a/src/aggregate.c +++ b/src/aggregate.c @@ -726,7 +726,8 @@ accumulate_aggregate_info (struct agr_proc *agr, { const union value *v = case_data (input, iter->src->fv); - if ((!iter->include_missing && is_missing (v, iter->src)) + if ((!iter->include_missing + && mv_is_value_missing (&iter->src->miss, v)) || (iter->include_missing && iter->src->type == NUMERIC && v->f == SYSMIS)) { diff --git a/src/apply-dict.c b/src/apply-dict.c index 74c1642b96..473daf0148 100644 --- a/src/apply-dict.c +++ b/src/apply-dict.c @@ -129,31 +129,18 @@ cmd_apply_dictionary (void) } } - if (s->miss_type != MISSING_NONE && t->width > MAX_SHORT_STRING) + if (!mv_is_empty (&s->miss) && t->width > MAX_SHORT_STRING) msg (SW, _("Cannot apply missing values from source file to " "long string variable %s."), s->name); - else if (s->miss_type != MISSING_NONE) + else if (!mv_is_empty (&s->miss)) { - if (t->width < s->width) - { - static const int miss_count[MISSING_COUNT] = - { - 0, 1, 2, 3, 2, 1, 1, 3, 2, 2, - }; - - int j, k; - - for (j = 0; j < miss_count[s->miss_type]; j++) - for (k = t->width; k < s->width; k++) - if (s->missing[j].s[k] != ' ') - goto skip_missing_values; - } - - t->miss_type = s->miss_type; - memcpy (t->missing, s->missing, sizeof s->missing); + if (mv_is_resizable (&s->miss, t->width)) + { + mv_copy (&t->miss, &s->miss); + mv_resize (&t->miss, t->width); + } } - skip_missing_values: ; if (s->type == NUMERIC) { diff --git a/src/count.c b/src/count.c index bab59f90c7..90275084a6 100644 --- a/src/count.c +++ b/src/count.c @@ -385,7 +385,7 @@ count_numeric (struct counting * cnt, struct ccase * c) counter++; continue; } - if (cnt->missing >= 2 && is_num_user_missing (cmp, cnt->v[i])) + if (cnt->missing >= 2 && mv_is_num_user_missing (&cnt->v[i]->miss, cmp)) { counter++; continue; diff --git a/src/crosstabs.q b/src/crosstabs.q index 09873e85e1..21d2d3df4c 100644 --- a/src/crosstabs.q +++ b/src/crosstabs.q @@ -576,11 +576,11 @@ calc_general (struct ccase *c, void *aux UNUSED) assert (x != NULL); for (j = 0; j < x->nvar; j++) { - if ((cmd.miss == CRS_TABLE - && is_missing (case_data (c, x->vars[j]->fv), x->vars[j])) + const union value *v = case_data (c, x->vars[j]->fv); + const struct missing_values *mv = &x->vars[j]->miss; + if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v)) || (cmd.miss == CRS_INCLUDE - && is_system_missing (case_data (c, x->vars[j]->fv), - x->vars[j]))) + && mv_is_value_system_missing (mv, v))) { x->missing += weight; goto next_crosstab; @@ -650,7 +650,8 @@ calc_integer (struct ccase *c, void *aux UNUSED) /* Note that the first test also rules out SYSMIS. */ if ((value < vr->min || value >= vr->max) - || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v))) + || (cmd.miss == CRS_TABLE + && mv_is_num_user_missing (&v->miss, value))) { x->missing += weight; goto next_crosstab; @@ -1410,7 +1411,7 @@ delete_missing (void) int r; for (r = 0; r < n_rows; r++) - if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR])) + if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f)) { int c; @@ -1424,7 +1425,7 @@ delete_missing (void) int c; for (c = 0; c < n_cols; c++) - if (is_num_user_missing (cols[c].f, x->vars[COL_VAR])) + if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)) { int r; @@ -1657,7 +1658,7 @@ table_value_missing (struct tab_table *table, int c, int r, unsigned char opt, s.string = tab_alloc (table, var->print.w); format_short (s.string, &var->print, v); s.length = strlen (s.string); - if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var)) + if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f)) s.string[s.length++] = 'M'; while (s.length && *s.string == ' ') { @@ -1740,8 +1741,9 @@ display_crosstabulation (void) int mark_missing = 0; double expected_value = row_tot[r] * col_tot[c] / W; if (cmd.miss == CRS_REPORT - && (is_num_user_missing (cols[c].f, x->vars[COL_VAR]) - || is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))) + && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f) + || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, + rows[r].f))) mark_missing = 1; for (i = 0; i < num_cells; i++) { @@ -1806,7 +1808,7 @@ display_crosstabulation (void) int mark_missing = 0; if (cmd.miss == CRS_REPORT - && is_num_user_missing (rows[r].f, x->vars[ROW_VAR])) + && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f)) mark_missing = 1; for (i = 0; i < num_cells; i++) @@ -1862,7 +1864,7 @@ display_crosstabulation (void) int i; if (cmd.miss == CRS_REPORT && c < n_cols - && is_num_user_missing (cols[c].f, x->vars[COL_VAR])) + && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)) mark_missing = 1; for (i = 0; i < num_cells; i++) diff --git a/src/descript.c b/src/descript.c index fedba7f693..05e0767d40 100644 --- a/src/descript.c +++ b/src/descript.c @@ -583,8 +583,9 @@ descriptives_trns_proc (struct trns_header *trns, struct ccase * c, for (vars = t->vars; vars < t->vars + t->var_cnt; vars++) { double score = case_num (c, (*vars)->fv); - if ( score == SYSMIS || (!t->include_user_missing - && is_num_user_missing(score, *vars)) ) + if ( score == SYSMIS + || (!t->include_user_missing + && mv_is_num_user_missing (&(*vars)->miss, score))) { all_sysmis = 1; break; @@ -599,7 +600,8 @@ descriptives_trns_proc (struct trns_header *trns, struct ccase * c, if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis || input == SYSMIS - || (!t->include_user_missing && is_num_user_missing(input, z->v))) + || (!t->include_user_missing + && mv_is_num_user_missing (&z->v->miss, input))) *output = SYSMIS; else *output = (input - z->mean) / z->std_dev; @@ -739,7 +741,7 @@ calc_descriptives (const struct casefile *cf, void *dsc_) if (dsc->missing_type != DSC_LISTWISE && (x == SYSMIS || (!dsc->include_user_missing - && is_num_user_missing (x, dv->v)))) + && mv_is_num_user_missing (&dv->v->miss, x)))) { dv->missing += weight; continue; @@ -781,7 +783,7 @@ calc_descriptives (const struct casefile *cf, void *dsc_) if (dsc->missing_type != DSC_LISTWISE && (x == SYSMIS || (!dsc->include_user_missing - && is_num_user_missing (x, dv->v)))) + && mv_is_num_user_missing (&dv->v->miss, x)))) continue; if (dv->moments != NULL) @@ -844,7 +846,8 @@ listwise_missing (struct dsc_proc *dsc, const struct ccase *c) double x = case_num (c, dv->v->fv); if (x == SYSMIS - || (!dsc->include_user_missing && is_num_user_missing (x, dv->v))) + || (!dsc->include_user_missing + && mv_is_num_user_missing (&dv->v->miss, x))) return 1; } return 0; diff --git a/src/dictionary.c b/src/dictionary.c index 9ea9f31a2a..3f1177d35f 100644 --- a/src/dictionary.c +++ b/src/dictionary.c @@ -279,7 +279,7 @@ dict_create_var (struct dictionary *d, const char *name, int width) v->init = 1; v->reinit = dict_class_from_id (v->name) != DC_SCRATCH; v->index = d->var_cnt; - v->miss_type = MISSING_NONE; + mv_init (&v->miss, width); if (v->type == NUMERIC) { v->print = f8_2; @@ -354,8 +354,7 @@ dict_clone_var (struct dictionary *d, const struct variable *ov, the same short name. */ nv->init = 1; nv->reinit = ov->reinit; - nv->miss_type = ov->miss_type; - memcpy (nv->missing, ov->missing, sizeof nv->missing); + mv_copy (&nv->miss, &ov->miss); nv->print = ov->print; nv->write = ov->write; val_labs_destroy (nv->val_labs); @@ -678,7 +677,7 @@ dict_get_case_weight (const struct dictionary *d, const struct ccase *c, else { double w = case_num (c, d->weight->fv); - if ( w < 0.0 || w == SYSMIS || is_num_user_missing(w, d->weight) ) + if (w < 0.0 || mv_is_num_missing (&d->weight->miss, w)) w = 0.0; if ( w == 0.0 && *warn_on_invalid ) { *warn_on_invalid = 0; diff --git a/src/examine.q b/src/examine.q index b54f574b5c..33498d85e0 100644 --- a/src/examine.q +++ b/src/examine.q @@ -169,7 +169,7 @@ const char *factor_to_string_concise(const struct factor *fctr, /* Function to use for testing for missing values */ -static is_missing_func value_is_missing; +static is_missing_func *value_is_missing; /* PERCENTILES */ @@ -193,9 +193,9 @@ cmd_examine(void) /* If /MISSING=INCLUDE is set, then user missing values are ignored */ if (cmd.incl == XMN_INCLUDE ) - value_is_missing = is_system_missing; + value_is_missing = mv_is_value_system_missing; else - value_is_missing = is_missing; + value_is_missing = mv_is_value_missing; if ( cmd.st_n == SYSMIS ) cmd.st_n = 5; @@ -650,7 +650,7 @@ factor_calc(struct ccase *c, int case_no, double weight, int case_missing) const struct variable *var = dependent_vars[v]; const union value *val = case_data (c, var->fv); - if ( value_is_missing(val,var) || case_missing ) + if ( value_is_missing (&var->miss, val) || case_missing ) val = 0; metrics_calc( &(*foo)->m[v], val, weight, case_no); @@ -712,7 +712,7 @@ run_examine(const struct casefile *cf, void *cmd_ ) const struct variable *var = dependent_vars[v]; const union value *val = case_data (&c, var->fv); - if ( value_is_missing(val,var)) + if ( value_is_missing(&var->miss, val)) case_missing = 1; } @@ -723,7 +723,7 @@ run_examine(const struct casefile *cf, void *cmd_ ) const struct variable *var = dependent_vars[v]; const union value *val = case_data (&c, var->fv); - if ( value_is_missing(val,var) || case_missing ) + if ( value_is_missing(&var->miss, val) || case_missing ) val = 0; metrics_calc(&totals[v], val, weight, case_no); diff --git a/src/expressions/operations.def b/src/expressions/operations.def index 0d21c16a65..bd5af76c4e 100644 --- a/src/expressions/operations.def +++ b/src/expressions/operations.def @@ -896,7 +896,7 @@ no_opt operator VEC_ELEM_NUM (idx) { const struct variable *var = v->var[(int) idx - 1]; double value = case_num (c, var->fv); - return !is_num_user_missing (value, var) ? value : SYSMIS; + return !mv_is_num_user_missing (&var->miss, value) ? value : SYSMIS; } else { @@ -943,7 +943,7 @@ no_opt operator NUM_VAR () num_var v; { double d = case_num (c, v->fv); - return !is_num_user_missing (d, v) ? d : SYSMIS; + return !mv_is_num_user_missing (&v->miss, d) ? d : SYSMIS; } no_opt string operator STR_VAR () @@ -962,7 +962,7 @@ no_opt function LAG (num_var v, pos_int n_before) if (c != NULL) { double x = case_num (c, v->fv); - return !is_num_user_missing (x, v) ? x : SYSMIS; + return !mv_is_num_user_missing (&v->miss, x) ? x : SYSMIS; } else return SYSMIS; @@ -974,7 +974,7 @@ no_opt function LAG (num_var v) if (c != NULL) { double x = case_num (c, v->fv); - return !is_num_user_missing (x, v) ? x : SYSMIS; + return !mv_is_num_user_missing (&v->miss, x) ? x : SYSMIS; } else return SYSMIS; diff --git a/src/factor_stats.c b/src/factor_stats.c index 4508caf889..e090517bdd 100644 --- a/src/factor_stats.c +++ b/src/factor_stats.c @@ -18,8 +18,8 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include "factor_stats.h" -#include "config.h" #include "val.h" #include "hash.h" #include "algorithm.h" diff --git a/src/frequencies.q b/src/frequencies.q index c165c21d69..0ca83a45b2 100644 --- a/src/frequencies.q +++ b/src/frequencies.q @@ -697,7 +697,7 @@ not_missing (const void *f_, void *v_) const struct freq *f = f_; struct variable *v = v_; - return !is_missing (&f->v, v); + return !mv_is_value_missing (&v->miss, &f->v); } /* Summarizes the frequency table data for variable V. */ @@ -1564,7 +1564,7 @@ freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) /* Find out the extremes of the x value */ for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) ) { - if ( is_missing(&frq->v, var)) + if ( mv_is_value_missing(&var->miss, &frq->v)) continue; if ( frq->v.f < x_min ) x_min = frq->v.f ; diff --git a/src/get.c b/src/get.c index a13277a6bf..f484d120fc 100644 --- a/src/get.c +++ b/src/get.c @@ -1426,9 +1426,8 @@ mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f) if (val_labs_count (dv->val_labs) && !val_labs_count (mv->val_labs)) mv->val_labs = val_labs_copy (dv->val_labs); - if (dv->miss_type != MISSING_NONE - && mv->miss_type == MISSING_NONE) - copy_missing_values (mv, dv); + if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss)) + mv_copy (&mv->miss, &dv->miss); } if (dv->label && !mv->label) diff --git a/src/levene.c b/src/levene.c index 833a65e08d..7877c7ec6a 100644 --- a/src/levene.c +++ b/src/levene.c @@ -77,8 +77,7 @@ struct levene_info enum lev_missing missing; /* Function to test for missing values */ - is_missing_func is_missing; - + is_missing_func *is_missing; }; /* First pass */ @@ -202,7 +201,7 @@ levene_calc (const struct ccase *c, void *_l) struct variable *v = l->v_dep[i]; const union value *val = case_data (c, v->fv); - if (l->is_missing(val,v) ) + if (l->is_missing (&v->miss, val) ) { return 0; } @@ -225,7 +224,7 @@ levene_calc (const struct ccase *c, void *_l) if ( 0 == gs ) continue ; - if ( ! l->is_missing(v,var)) + if ( ! l->is_missing(&var->miss, v)) { levene_z= fabs(v->f - gs->mean); lz[i].grand_total += levene_z * weight; @@ -309,7 +308,7 @@ levene2_calc (const struct ccase *c, void *_l) struct variable *v = l->v_dep[i]; const union value *val = case_data (c, v->fv); - if (l->is_missing(val,v) ) + if (l->is_missing(&v->miss, val) ) { return 0; } @@ -330,7 +329,7 @@ levene2_calc (const struct ccase *c, void *_l) if ( 0 == gs ) continue; - if ( ! l->is_missing(v,var) ) + if ( ! l->is_missing (&var->miss, v) ) { levene_z = fabs(v->f - gs->mean); lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean); diff --git a/src/mis-val.c b/src/mis-val.c index 27a5134307..555dbf09be 100644 --- a/src/mis-val.c +++ b/src/mis-val.c @@ -21,6 +21,7 @@ #include "error.h" #include #include "command.h" +#include "data-in.h" #include "error.h" #include "lexer.h" #include "magic.h" @@ -32,317 +33,180 @@ #include "debug-print.h" -/* Variables on MIS VAL. */ -static struct variable **v; -static int nv; - -/* Type of the variables on MIS VAL. */ -static int type; - -/* Width of string variables on MIS VAL. */ -static size_t width; - -/* Items to fill-in var structs with. */ -static int miss_type; -static union value missing[3]; - -static int parse_varnames (void); -static int parse_numeric (void); -static int parse_alpha (void); +static bool parse_number (double *, const struct fmt_spec *); int cmd_missing_values (void) { - int i; + struct variable **v; + int nv; + + int retval = CMD_PART_SUCCESS_MAYBE; + bool deferred_errors = false; while (token != '.') { - if (!parse_varnames ()) - goto fail; + int i; + - if (token != ')') - { - if ((type == NUMERIC && !parse_numeric ()) - || (type == ALPHA && !parse_alpha ())) - goto fail; - } - else - miss_type = MISSING_NONE; + if (!parse_variables (default_dict, &v, &nv, PV_NONE)) + goto done; - if (!lex_match (')')) - { - msg (SE, _("`)' expected after value specification.")); - goto fail; - } + if (!lex_match ('(')) + { + lex_error (_("expecting `('")); + goto done; + } for (i = 0; i < nv; i++) - { - v[i]->miss_type = miss_type; - memcpy (v[i]->missing, missing, sizeof v[i]->missing); - } + mv_init (&v[i]->miss, v[i]->width); + + if (!lex_match (')')) + { + struct missing_values mv; + + for (i = 0; i < nv; i++) + if (v[i]->type != v[0]->type) + { + const struct variable *n = v[0]->type == NUMERIC ? v[0] : v[i]; + const struct variable *s = v[0]->type == NUMERIC ? v[i] : v[0]; + msg (SE, _("Cannot mix numeric variables (e.g. %s) and " + "string variables (e.g. %s) within a single list."), + n->name, s->name); + goto done; + } + + if (v[0]->type == NUMERIC) + { + mv_init (&mv, 0); + while (!lex_match (')')) + { + double x; + + if (lex_match_id ("LO") || lex_match_id ("LOWEST")) + x = LOWEST; + else if (!parse_number (&x, &v[0]->print)) + goto done; + + if (lex_match_id ("THRU")) + { + double y; + + if (lex_match_id ("HI") || lex_match_id ("HIGHEST")) + y = HIGHEST; + else if (!parse_number (&y, &v[0]->print)) + goto done; + + if (x == LOWEST && y == HIGHEST) + { + msg (SE, _("LO THRU HI is an invalid range.")); + deferred_errors = true; + } + else if (!mv_add_num_range (&mv, x, y)) + deferred_errors = true; + } + else + { + if (x == LOWEST) + { + msg (SE, _("LO or LOWEST must be part of a range.")); + deferred_errors = true; + } + else if (!mv_add_num (&mv, x)) + deferred_errors = true; + } + + lex_match (','); + } + } + else + { + mv_init (&mv, MAX_SHORT_STRING); + while (!lex_match (')')) + { + if (!lex_force_string ()) + { + deferred_errors = true; + break; + } + + if (ds_length (&tokstr) > MAX_SHORT_STRING) + { + ds_truncate (&tokstr, MAX_SHORT_STRING); + msg (SE, _("Truncating missing value to short string " + "length (%d characters)."), + MAX_SHORT_STRING); + } + else + ds_rpad (&tokstr, MAX_SHORT_STRING, ' '); + + if (!mv_add_str (&mv, ds_data (&tokstr))) + deferred_errors = true; + + lex_get (); + lex_match (','); + } + } + + for (i = 0; i < nv; i++) + { + if (!mv_is_resizable (&mv, v[i]->width)) + { + msg (SE, _("Missing values provided are too long to assign " + "to variable of width %d."), + v[i]->width); + deferred_errors = true; + } + else + { + mv_copy (&v[i]->miss, &mv); + mv_resize (&v[i]->miss, v[i]->width); + } + } + } lex_match ('/'); free (v); + v = NULL; } - - return lex_end_of_command (); - -fail: + retval = lex_end_of_command (); + + done: free (v); - return CMD_PART_SUCCESS_MAYBE; + if (deferred_errors) + retval = CMD_PART_SUCCESS_MAYBE; + return retval; } -static int -parse_varnames (void) +static bool +parse_number (double *x, const struct fmt_spec *f) { - int i; - - if (!parse_variables (default_dict, &v, &nv, PV_SAME_TYPE)) - return 0; - if (!lex_match ('(')) + if (lex_is_number ()) { - msg (SE, _("`(' expected after variable name%s."), nv > 1 ? "s" : ""); - return 0; - } - - type = v[0]->type; - if (type == NUMERIC) - return 1; - - width = v[0]->width; - for (i = 1; i < nv; i++) - if (v[i]->type == ALPHA && v[i]->nv != 1) - { - msg (SE, _("Long string value specified.")); - return 0; - } - else if (v[i]->type == ALPHA && (int) width != v[i]->width) - { - msg (SE, _("Short strings must be of equal width.")); - return 0; - } - - return 1; -} - -/* Number or range? */ -enum - { - MV_NOR_NOTHING, /* Empty. */ - MV_NOR_NUMBER, /* Single number. */ - MV_NOR_RANGE /* Range. */ - }; - -/* A single value or a range. */ -struct num_or_range - { - int type; /* One of NOR_*. */ - double d[2]; /* d[0]=lower bound or value, d[1]=upper bound. */ - }; - -/* Parses something of the form , or LO[WEST] THRU , or - THRU HI[GHEST], or THRU , and sets the appropriate - members of NOR. Returns success. */ -static int -parse_num_or_range (struct num_or_range * nor) -{ - if (lex_match_id ("LO") || lex_match_id ("LOWEST")) - { - nor->type = MV_NOR_RANGE; - if (!lex_force_match_id ("THRU")) - return 0; - if (!lex_force_num ()) - return 0; - nor->d[0] = LOWEST; - nor->d[1] = tokval; - } - else if (lex_is_number ()) - { - nor->d[0] = tokval; + *x = lex_number (); lex_get (); - - if (lex_match_id ("THRU")) - { - nor->type = MV_NOR_RANGE; - if (lex_match_id ("HI") || lex_match_id ("HIGHEST")) - nor->d[1] = HIGHEST; - else - { - if (!lex_force_num ()) - return 0; - nor->d[1] = tokval; - lex_get (); - - if (nor->d[0] > nor->d[1]) - { - msg (SE, _("Range %g THRU %g is not valid because %g is " - "greater than %g."), - nor->d[0], nor->d[1], nor->d[0], nor->d[1]); - return 0; - } - } - } - else - nor->type = MV_NOR_NUMBER; + return true; } - else - return -1; - - return 1; -} - -/* Parses a set of numeric missing values and stores them into - `missing[]' and `miss_type' global variables. */ -static int -parse_numeric (void) -{ - struct num_or_range set[3]; - int r; - - set[1].type = set[2].type = MV_NOR_NOTHING; - - /* Get first number or range. */ - r = parse_num_or_range (&set[0]); - if (r < 1) + else if (token == T_STRING) { - if (r == -1) - msg (SE, _("Number or range expected.")); - return 0; - } - - /* Get second and third optional number or range. */ - lex_match (','); - r = parse_num_or_range (&set[1]); - if (r == 1) - { - lex_match (','); - r = parse_num_or_range (&set[2]); - } - if (r == 0) - return 0; - - /* Force range, if present, into set[0]. */ - if (set[1].type == MV_NOR_RANGE) - { - struct num_or_range t = set[1]; - set[1] = set[0]; - set[0] = t; - } - if (set[2].type == MV_NOR_RANGE) - { - struct num_or_range t = set[2]; - set[2] = set[0]; - set[0] = t; - } - - /* Ensure there's not more than one range, or one range - plus one value. */ - if (set[1].type == MV_NOR_RANGE || set[2].type == MV_NOR_RANGE) - { - msg (SE, _("At most one range can exist in the missing values " - "for any one variable.")); - return 0; - } - if (set[0].type == MV_NOR_RANGE && set[2].type != MV_NOR_NOTHING) - { - msg (SE, _("At most one individual value can be missing along " - "with one range.")); - return 0; - } - - /* Set missing[] from set[]. */ - if (set[0].type == MV_NOR_RANGE) - { - int x = 0; - - if (set[0].d[0] == LOWEST) - { - miss_type = MISSING_LOW; - missing[x++].f = set[0].d[1]; - } - else if (set[0].d[1] == HIGHEST) - { - miss_type = MISSING_HIGH; - missing[x++].f = set[0].d[0]; - } - else - { - miss_type = MISSING_RANGE; - missing[x++].f = set[0].d[0]; - missing[x++].f = set[0].d[1]; - } - - if (set[1].type == MV_NOR_NUMBER) - { - miss_type += 3; - missing[x].f = set[1].d[0]; - } - } - else - { - if (set[0].type == MV_NOR_NUMBER) - { - miss_type = MISSING_1; - missing[0].f = set[0].d[0]; - } - if (set[1].type == MV_NOR_NUMBER) - { - miss_type = MISSING_2; - missing[1].f = set[1].d[0]; - } - if (set[2].type == MV_NOR_NUMBER) - { - miss_type = MISSING_3; - missing[2].f = set[2].d[0]; - } - } - - return 1; -} - -static int -parse_alpha (void) -{ - for (miss_type = 0; token == T_STRING && miss_type < 3; miss_type++) - { - if (ds_length (&tokstr) != width) - { - msg (SE, _("String is not of proper length.")); - return 0; - } - strncpy (missing[miss_type].s, ds_c_str (&tokstr), MAX_SHORT_STRING); + struct data_in di; + union value v; + di.s = ds_data (&tokstr); + di.e = ds_end (&tokstr); + di.v = &v; + di.flags = 0; + di.f1 = 1; + di.f2 = ds_length (&tokstr); + di.format = *f; + data_in (&di); lex_get (); - lex_match (','); + *x = v.f; + return true; } - if (miss_type < 1) + else { - msg (SE, _("String expected.")); - return 0; + lex_error (_("expecting number or data string")); + return false; } - - return 1; } -/* Copy the missing values from variable SRC to variable DEST. */ -void -copy_missing_values (struct variable *dest, const struct variable *src) -{ - static const int n_values[MISSING_COUNT] = - { - 0, 1, 2, 3, 2, 1, 1, 3, 2, 2, - }; - - assert (dest->width == src->width); - assert (src->miss_type >= 0 && src->miss_type < MISSING_COUNT); - - { - int i; - - dest->miss_type = src->miss_type; - for (i = 0; i < n_values[src->miss_type]; i++) - if (src->type == NUMERIC) - dest->missing[i].f = src->missing[i].f; - else - memcpy (dest->missing[i].s, src->missing[i].s, src->width); - } -} diff --git a/src/missing-values.c b/src/missing-values.c new file mode 100644 index 0000000000..09192179f1 --- /dev/null +++ b/src/missing-values.c @@ -0,0 +1,382 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2005 Free Software Foundation, Inc. + Written by Ben Pfaff . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#include +#include "missing-values.h" +#include +#include +#include "str.h" + +/* Initializes MV as a set of missing values for a variable of + the given WIDTH. Although only numeric variables and short + string variables may have missing values, WIDTH may be any + valid variable width. */ +void +mv_init (struct missing_values *mv, int width) +{ + assert (width >= 0 && width <= MAX_STRING); + mv->type = MV_NONE; + mv->width = width; +} + +/* Copies SRC to MV. */ +void +mv_copy (struct missing_values *mv, const struct missing_values *src) +{ + *mv = *src; +} + +/* Returns true if MV is an empty set of missing values. */ +bool +mv_is_empty (const struct missing_values *mv) +{ + return mv->type == MV_NONE; +} + +/* Returns the width of the missing values that MV may + contain. */ +int +mv_get_width (const struct missing_values *mv) +{ + return mv->width; +} + +/* Attempts to add individual value V to the set of missing + values MV. Returns true if successful, false if MV has no + more room for missing values. (Long string variables never + accept missing values.) */ +bool +mv_add_value (struct missing_values *mv, const union value *v) +{ + if (mv->width > MAX_SHORT_STRING) + return false; + switch (mv->type) + { + case MV_NONE: + case MV_1: + case MV_2: + case MV_RANGE: + mv->values[mv->type & 3] = *v; + mv->type++; + return true; + + case MV_3: + case MV_RANGE_1: + return false; + } + abort (); +} + +/* Attempts to add S to the set of string missing values MV. S + must contain exactly as many characters as MV's width. + Returns true if successful, false if MV has no more room for + missing values. (Long string variables never accept missing + values.) */ +bool +mv_add_str (struct missing_values *mv, const unsigned char s[]) +{ + assert (mv->width > 0); + return mv_add_value (mv, (union value *) s); +} + +/* Attempts to add D to the set of numeric missing values MV. + Returns true if successful, false if MV has no more room for + missing values. */ +bool +mv_add_num (struct missing_values *mv, double d) +{ + assert (mv->width == 0); + return mv_add_value (mv, (union value *) &d); +} + +/* Attempts to add range [LOW, HIGH] to the set of numeric + missing values MV. Returns true if successful, false if MV + has no room for a range. */ +bool +mv_add_num_range (struct missing_values *mv, double low, double high) +{ + assert (mv->width == 0); + switch (mv->type) + { + case MV_NONE: + case MV_1: + mv->values[1].f = low; + mv->values[2].f = high; + mv->type |= 4; + return true; + + case MV_2: + case MV_3: + case MV_RANGE: + case MV_RANGE_1: + return false; + } + abort (); +} + +/* Returns true if MV contains an individual value, + false if MV is empty (or contains only a range). */ +bool +mv_has_value (struct missing_values *mv) +{ + switch (mv->type) + { + case MV_1: + case MV_2: + case MV_3: + case MV_RANGE_1: + return true; + + case MV_NONE: + case MV_RANGE: + return false; + } + abort (); +} + +/* Removes one individual value from MV and stores it in *V. + MV must contain an individual value (as determined by + mv_has_value()). */ +void +mv_pop_value (struct missing_values *mv, union value *v) +{ + assert (mv_has_value (mv)); + mv->type--; + *v = mv->values[mv->type & 3]; +} + +/* Returns true if MV contains a numeric range, + false if MV is empty (or contains only individual values). */ +bool +mv_has_range (struct missing_values *mv) +{ + switch (mv->type) + { + case MV_RANGE: + case MV_RANGE_1: + return true; + + case MV_NONE: + case MV_1: + case MV_2: + case MV_3: + return false; + } + abort (); +} + +/* Removes the numeric range from MV and stores it in *LOW and + *HIGH. MV must contain a individual range (as determined by + mv_has_range()). */ +void +mv_pop_range (struct missing_values *mv, double *low, double *high) +{ + assert (mv_has_range (mv)); + *low = mv->values[1].f; + *high = mv->values[2].f; + mv->type &= 3; +} + +/* Returns true if values[IDX] is in use when the `type' member + is set to TYPE (in struct missing_values), + false otherwise. */ +static bool +using_element (unsigned type, int idx) +{ + assert (idx >= 0 && idx < 3); + + switch (type) + { + case MV_NONE: + return false; + case MV_1: + return idx < 1; + case MV_2: + return idx < 2; + case MV_3: + return true; + case MV_RANGE: + return idx > 0; + case MV_RANGE_1: + return true; + } + abort (); +} + +/* Returns true if S contains only spaces between indexes + NEW_WIDTH (inclusive) and OLD_WIDTH (exclusive), + false otherwise. */ +static bool +can_resize_string (const unsigned char *s, int old_width, int new_width) +{ + int i; + + assert (new_width < old_width); + for (i = new_width; i < old_width; i++) + if (s[i] != ' ') + return false; + return true; +} + +/* Returns true if MV can be resized to the given WIDTH with + mv_resize(), false otherwise. Resizing to the same width is + always possible. Resizing to a long string WIDTH is only + possible if MV is an empty set of missing values; otherwise, + resizing to a larger WIDTH is always possible. Resizing to a + shorter width is possible only when each missing value + contains only spaces in the characters that will be + trimmed. */ +bool +mv_is_resizable (struct missing_values *mv, int width) +{ + assert ((width == 0) == (mv->width == 0)); + if (width > MAX_SHORT_STRING && mv->type != MV_NONE) + return false; + else if (width >= mv->width) + return true; + else + { + int i; + + for (i = 0; i < 3; i++) + if (using_element (mv->type, i) + && !can_resize_string (mv->values[i].s, mv->width, width)) + return false; + return true; + } +} + +/* Resizes MV to the given WIDTH. WIDTH must fit the constraints + explained for mv_is_resizable(). */ +void +mv_resize (struct missing_values *mv, int width) +{ + assert (mv_is_resizable (mv, width)); + if (width > mv->width) + { + int i; + + for (i = 0; i < 3; i++) + memset (mv->values[i].s + mv->width, ' ', width - mv->width); + } + mv->width = width; +} + +/* Returns true if V is system missing or a missing value in MV, + false otherwise. */ +bool +mv_is_value_missing (const struct missing_values *mv, const union value *v) +{ + return (mv->width == 0 + ? mv_is_num_missing (mv, v->f) + : mv_is_str_missing (mv, v->s)); +} + +/* Returns true if D is system missing or a missing value in MV, + false otherwise. + MV must be a set of numeric missing values. */ +bool +mv_is_num_missing (const struct missing_values *mv, double d) +{ + assert (mv->width == 0); + return d == SYSMIS || mv_is_num_user_missing (mv, d); +} + +/* Returns true if S[] is a missing value in MV, false otherwise. + MV must be a set of string missing values. + S[] must contain exactly as many characters as MV's width. */ +bool +mv_is_str_missing (const struct missing_values *mv, + const unsigned char s[]) +{ + return mv_is_str_user_missing (mv, s); +} + +/* Returns true if V is a missing value in MV, false otherwise. */ +bool +mv_is_value_user_missing (const struct missing_values *mv, + const union value *v) +{ + return (mv->width == 0 + ? mv_is_num_user_missing (mv, v->f) + : mv_is_str_user_missing (mv, v->s)); +} + +/* Returns true if D is a missing value in MV, false otherwise. + MV must be a set of numeric missing values. */ +bool +mv_is_num_user_missing (const struct missing_values *mv, double d) +{ + const union value *v = mv->values; + assert (mv->width == 0); + switch (mv->type) + { + case MV_NONE: + return false; + case MV_1: + return v[0].f == d; + case MV_2: + return v[0].f == d || v[1].f == d; + case MV_3: + return v[0].f == d || v[1].f == d || v[2].f == d; + case MV_RANGE: + return v[1].f <= d && d <= v[2].f; + case MV_RANGE_1: + return v[0].f == d || (v[1].f <= d && d <= v[2].f); + } + abort (); +} + +/* Returns true if S[] is a missing value in MV, false otherwise. + MV must be a set of string missing values. + S[] must contain exactly as many characters as MV's width. */ +bool +mv_is_str_user_missing (const struct missing_values *mv, + const unsigned char s[]) +{ + const union value *v = mv->values; + assert (mv->width > 0); + switch (mv->type) + { + case MV_NONE: + return false; + case MV_1: + return !memcmp (v[0].s, s, mv->width); + case MV_2: + return (!memcmp (v[0].s, s, mv->width) + || !memcmp (v[1].s, s, mv->width)); + case MV_3: + return (!memcmp (v[0].s, s, mv->width) + || !memcmp (v[1].s, s, mv->width) + || !memcmp (v[2].s, s, mv->width)); + case MV_RANGE: + case MV_RANGE_1: + abort (); + } + abort (); +} + +/* Returns true if MV is a set of numeric missing values and V is + the system missing value. */ +bool +mv_is_value_system_missing (const struct missing_values *mv, + const union value *v) +{ + return mv->width == 0 ? v->f == SYSMIS : false; +} diff --git a/src/missing-values.h b/src/missing-values.h new file mode 100644 index 0000000000..710fc05e1b --- /dev/null +++ b/src/missing-values.h @@ -0,0 +1,84 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2005 Free Software Foundation, Inc. + Written by Ben Pfaff . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#if !missing_values_h +#define missing_values_h 1 + +#include +#include "val.h" + +/* Types of user-missing values. + Invisible--use access functions defined below instead. */ +enum mv_type + { + MV_NONE = 0, /* No user-missing values. */ + MV_1 = 1, /* One user-missing value. */ + MV_2 = 2, /* Two user-missing values. */ + MV_3 = 3, /* Three user-missing values. */ + MV_RANGE = 4, /* A range of user-missing values. */ + MV_RANGE_1 = 5 /* A range plus an individual value. */ + }; + +/* Missing values. + Opaque--use access functions defined below. */ +struct missing_values + { + unsigned type; /* Number and type of missing values. */ + int width; /* 0=numeric, otherwise string width. */ + union value values[3]; /* Missing values. [y,z] are the range. */ + }; + +void mv_init (struct missing_values *, int width); +void mv_copy (struct missing_values *, const struct missing_values *); +bool mv_is_empty (const struct missing_values *); +int mv_get_width (const struct missing_values *); + +bool mv_add_value (struct missing_values *, const union value *); +bool mv_add_str (struct missing_values *, const unsigned char[]); +bool mv_add_num (struct missing_values *, double); +bool mv_add_num_range (struct missing_values *, double low, double high); + +bool mv_has_value (struct missing_values *); +void mv_pop_value (struct missing_values *, union value *); +bool mv_has_range (struct missing_values *); +void mv_pop_range (struct missing_values *, double *low, double *high); + +bool mv_is_resizable (struct missing_values *, int width); +void mv_resize (struct missing_values *, int width); + +typedef bool is_missing_func (const struct missing_values *, + const union value *); + +/* Is a value system or user missing? */ +bool mv_is_value_missing (const struct missing_values *, const union value *); +bool mv_is_num_missing (const struct missing_values *, double); +bool mv_is_str_missing (const struct missing_values *, const unsigned char[]); + +/* Is a value user missing? */ +bool mv_is_value_user_missing (const struct missing_values *, + const union value *); +bool mv_is_num_user_missing (const struct missing_values *, double); +bool mv_is_str_user_missing (const struct missing_values *, + const unsigned char[]); + +/* Is a value system missing? */ +bool mv_is_value_system_missing (const struct missing_values *, + const union value *); + +#endif /* missing-values.h */ diff --git a/src/oneway.q b/src/oneway.q index 9aebbf61b4..8c9c768a87 100644 --- a/src/oneway.q +++ b/src/oneway.q @@ -87,7 +87,7 @@ static int ostensible_number_of_groups=-1; /* Function to use for testing for missing values */ -static is_missing_func value_is_missing; +static is_missing_func *value_is_missing; static void run_oneway(const struct casefile *cf, void *_mode); @@ -119,9 +119,9 @@ cmd_oneway(void) /* If /MISSING=INCLUDE is set, then user missing values are ignored */ if (cmd.incl == ONEWAY_INCLUDE ) - value_is_missing = is_system_missing; + value_is_missing = mv_is_value_system_missing; else - value_is_missing = is_missing; + value_is_missing = mv_is_value_missing; /* What statistics were requested */ if ( cmd.sbc_statistics ) @@ -913,7 +913,7 @@ run_oneway(const struct casefile *cf, void *cmd_) const union value *indep_val = case_data (&c, indep_var->fv); /* Deal with missing values */ - if ( value_is_missing(indep_val,indep_var) ) + if ( value_is_missing(&indep_var->miss, indep_val) ) continue; /* Skip the entire case if /MISSING=LISTWISE is set */ @@ -924,7 +924,7 @@ run_oneway(const struct casefile *cf, void *cmd_) const struct variable *v = vars[i]; const union value *val = case_data (&c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) break; } if ( i != n_vars ) @@ -964,7 +964,7 @@ run_oneway(const struct casefile *cf, void *cmd_) hsh_insert ( group_hash, (void *) gs ); } - if (! value_is_missing(val,v) ) + if (! value_is_missing(&v->miss, val) ) { struct group_statistics *totals = &gp->ugs; diff --git a/src/percentiles.c b/src/percentiles.c index 4e618ad4a3..2381f771d6 100644 --- a/src/percentiles.c +++ b/src/percentiles.c @@ -18,6 +18,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include "factor_stats.h" #include "percentiles.h" #include "misc.h" diff --git a/src/pfm-read.c b/src/pfm-read.c index c7a604dbf0..1999628c46 100644 --- a/src/pfm-read.c +++ b/src/pfm-read.c @@ -540,45 +540,23 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) convert_format (r, &fmt[3], &v->write, v); /* Range missing values. */ - if (match (r, 'B')) - { - v->miss_type = MISSING_RANGE; - v->missing[0] = parse_value (r, v); - v->missing[1] = parse_value (r, v); - } + if (match (r, 'B')) + { + double x = read_float (r); + double y = read_float (r); + mv_add_num_range (&v->miss, x, y); + } else if (match (r, 'A')) - { - v->miss_type = MISSING_HIGH; - v->missing[0] = parse_value (r, v); - } + mv_add_num_range (&v->miss, read_float (r), HIGHEST); else if (match (r, '9')) - { - v->miss_type = MISSING_LOW; - v->missing[0] = parse_value (r, v); - } + mv_add_num_range (&v->miss, LOWEST, read_float (r)); /* Single missing values. */ - while (match (r, '8')) - { - static const int map_next[MISSING_COUNT] = - { - MISSING_1, MISSING_2, MISSING_3, -1, - MISSING_RANGE_1, MISSING_LOW_1, MISSING_HIGH_1, - -1, -1, -1, - }; - - static const int map_ofs[MISSING_COUNT] = - { - -1, 0, 1, 2, -1, -1, -1, 2, 1, 1, - }; - - v->miss_type = map_next[v->miss_type]; - if (v->miss_type == -1) - error (r, _("Bad missing values for %s."), v->name); - - assert (map_ofs[v->miss_type] != -1); - v->missing[map_ofs[v->miss_type]] = parse_value (r, v); - } + while (match (r, '8')) + { + union value value = parse_value (r, v); + mv_add_value (&v->miss, &value); + } if (match (r, 'C')) { diff --git a/src/pfm-write.c b/src/pfm-write.c index 30615418fd..fbe56f6a2e 100644 --- a/src/pfm-write.c +++ b/src/pfm-write.c @@ -298,25 +298,43 @@ write_variables (struct pfm_writer *w, struct dictionary *dict) for (i = 0; i < dict_get_var_cnt (dict); i++) { - static const char *miss_types[MISSING_COUNT] = - { - "", "8", "88", "888", "B ", "9", "A", "B 8", "98", "A8", - }; - - const char *m; - int j; - struct variable *v = dict_get_var (dict, i); + struct missing_values mv; if (!buf_write (w, "7", 1) || !write_int (w, v->width) || !write_string (w, v->short_name) || !write_format (w, &v->print) || !write_format (w, &v->write)) return 0; - for (m = miss_types[v->miss_type], j = 0; j < (int) strlen (m); j++) - if ((m[j] != ' ' && !buf_write (w, &m[j], 1)) - || !write_value (w, &v->missing[j], v)) - return 0; + /* Write missing values. */ + mv_copy (&mv, &v->miss); + while (mv_has_range (&mv)) + { + double x, y; + mv_pop_range (&mv, &x, &y); + if (x == LOWEST) + { + if (!buf_write (w, "9", 1) || !write_float (w, y)) + return 0; + } + else if (y == HIGHEST) + { + if (!buf_write (w, "A", 1) || !write_float (w, y)) + return 0; + } + else { + if (!buf_write (w, "B", 1) || !write_float (w, x) + || !write_float (w, y)) + return 0; + } + } + while (mv_has_value (&mv)) + { + union value value; + mv_pop_value (&mv, &value); + if (!buf_write (w, "8", 1) || !write_value (w, &value, v)) + return 0; + } if (v->label && (!buf_write (w, "C", 1) || !write_string (w, v->label))) return 0; diff --git a/src/recode.c b/src/recode.c index b25ac4567e..9f7214bdf1 100644 --- a/src/recode.c +++ b/src/recode.c @@ -714,7 +714,7 @@ find_src_numeric (struct rcd_var * v, struct ccase * c) case RCD_END: return NULL; case RCD_USER: - if (is_num_user_missing (cmp, v->src)) + if (mv_is_num_user_missing (&v->src->miss, cmp)) return cp; break; case RCD_SINGLE: diff --git a/src/sfm-read.c b/src/sfm-read.c index 0c07150eac..bd8800bb9f 100644 --- a/src/sfm-read.c +++ b/src/sfm-read.c @@ -926,63 +926,45 @@ read_variables (struct sfm_reader *r, if (sv.n_missing_values != 0) { flt64 mv[3]; + int mv_cnt = abs (sv.n_missing_values); if (vv->width > MAX_SHORT_STRING) lose ((ME, _("%s: Long string variable %s may not have missing " "values."), handle_get_filename (r->fh), vv->name)); - assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0); + assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0); if (r->reverse_endian && vv->type == NUMERIC) - for (j = 0; j < abs (sv.n_missing_values); j++) + for (j = 0; j < mv_cnt; j++) bswap_flt64 (&mv[j]); if (sv.n_missing_values > 0) { - vv->miss_type = sv.n_missing_values; - if (vv->type == NUMERIC) - for (j = 0; j < sv.n_missing_values; j++) - vv->missing[j].f = mv[j]; - else - for (j = 0; j < sv.n_missing_values; j++) - memcpy (vv->missing[j].s, &mv[j], vv->width); + for (j = 0; j < sv.n_missing_values; j++) + if (vv->type == NUMERIC) + mv_add_num (&vv->miss, mv[j]); + else + mv_add_str (&vv->miss, (unsigned char *) &mv[j]); } else { - int x = 0; - if (vv->type == ALPHA) lose ((ME, _("%s: String variable %s may not have missing " "values specified as a range."), handle_get_filename (r->fh), vv->name)); if (mv[0] == r->lowest) - { - vv->miss_type = MISSING_LOW; - vv->missing[x++].f = mv[1]; - } + mv_add_num_range (&vv->miss, LOWEST, mv[1]); else if (mv[1] == r->highest) - { - vv->miss_type = MISSING_HIGH; - vv->missing[x++].f = mv[0]; - } + mv_add_num_range (&vv->miss, mv[0], HIGHEST); else - { - vv->miss_type = MISSING_RANGE; - vv->missing[x++].f = mv[0]; - vv->missing[x++].f = mv[1]; - } + mv_add_num_range (&vv->miss, mv[0], mv[1]); if (sv.n_missing_values == -3) - { - vv->miss_type += 3; - vv->missing[x++].f = mv[2]; - } + mv_add_num (&vv->miss, mv[2]); } } - else - vv->miss_type = MISSING_NONE; if (!parse_format_spec (r, sv.print, &vv->print, vv) || !parse_format_spec (r, sv.write, &vv->write, vv)) diff --git a/src/sfm-write.c b/src/sfm-write.c index e1e103e500..80b7840820 100644 --- a/src/sfm-write.c +++ b/src/sfm-write.c @@ -362,6 +362,7 @@ write_variable (struct sfm_writer *w, struct variable *v) struct sysfile_variable sv; /* Missing values. */ + struct missing_values mv; flt64 m[3]; /* Missing value values. */ int nm; /* Number of missing values, possibly negative. */ @@ -369,54 +370,27 @@ write_variable (struct sfm_writer *w, struct variable *v) sv.type = v->width; sv.has_var_label = (v->label != NULL); - switch (v->miss_type) + mv_copy (&mv, &v->miss); + nm = 0; + if (mv_has_range (&mv)) { - case MISSING_NONE: - nm = 0; - break; - case MISSING_1: - case MISSING_2: - case MISSING_3: - for (nm = 0; nm < v->miss_type; nm++) - m[nm] = v->missing[nm].f; - break; - case MISSING_RANGE: - m[0] = v->missing[0].f; - m[1] = v->missing[1].f; - nm = -2; - break; - case MISSING_LOW: - m[0] = second_lowest_flt64; - m[1] = v->missing[0].f; - nm = -2; - break; - case MISSING_HIGH: - m[0] = v->missing[0].f; - m[1] = FLT64_MAX; - nm = -2; - break; - case MISSING_RANGE_1: - m[0] = v->missing[0].f; - m[1] = v->missing[1].f; - m[2] = v->missing[2].f; - nm = -3; - break; - case MISSING_LOW_1: - m[0] = second_lowest_flt64; - m[1] = v->missing[0].f; - m[2] = v->missing[1].f; - nm = -3; - break; - case MISSING_HIGH_1: - m[0] = v->missing[0].f; - m[1] = second_lowest_flt64; - m[2] = v->missing[1].f; - nm = -3; - break; - default: - assert (0); - abort (); + double x, y; + mv_pop_range (&mv, &x, &y); + m[nm++] = x == LOWEST ? second_lowest_flt64 : x; + m[nm++] = y == HIGHEST ? FLT64_MAX : y; } + while (mv_has_value (&mv)) + { + union value value; + mv_pop_value (&mv, &value); + if (v->type == NUMERIC) + m[nm] = value.f; + else + buf_copy_rpad ((char *) &m[nm], sizeof m[nm], value.s, v->width); + nm++; + } + if (mv_has_range (&v->miss)) + nm = -nm; sv.n_missing_values = nm; write_format_spec (&v->print, &sv.print); @@ -445,7 +419,7 @@ write_variable (struct sfm_writer *w, struct variable *v) return 0; } - if (nm && !buf_write (w, m, sizeof *m * nm)) + if (nm && !buf_write (w, m, sizeof *m * abs (nm))) return 0; if (v->type == ALPHA && v->width > (int) sizeof (flt64)) diff --git a/src/sfmP.h b/src/sfmP.h index 978a3e129d..c127b85e71 100644 --- a/src/sfmP.h +++ b/src/sfmP.h @@ -55,6 +55,7 @@ #endif /* Figure out SYSMIS value for flt64. */ +#include "magic.h" #if SIZEOF_DOUBLE == 8 #define second_lowest_flt64 second_lowest_value #else diff --git a/src/sysfile-info.c b/src/sysfile-info.c index 08d5484f70..45bffad762 100644 --- a/src/sysfile-info.c +++ b/src/sysfile-info.c @@ -463,63 +463,44 @@ describe_variable (struct variable *v, struct tab_table *t, int r, int as) } /* Missing values if any. */ - if (v->miss_type != MISSING_NONE) + if (!mv_is_empty (&v->miss)) { - char buf[80]; - char *cp = stpcpy (buf, _("Missing Values: ")); - - if (v->type == NUMERIC) - switch (v->miss_type) - { - case MISSING_1: - sprintf (cp, "%g", v->missing[0].f); - break; - case MISSING_2: - sprintf (cp, "%g; %g", v->missing[0].f, v->missing[1].f); - break; - case MISSING_3: - sprintf (cp, "%g; %g; %g", v->missing[0].f, - v->missing[1].f, v->missing[2].f); - break; - case MISSING_RANGE: - sprintf (cp, "%g THRU %g", v->missing[0].f, v->missing[1].f); - break; - case MISSING_LOW: - sprintf (cp, "LOWEST THRU %g", v->missing[0].f); - break; - case MISSING_HIGH: - sprintf (cp, "%g THRU HIGHEST", v->missing[0].f); - break; - case MISSING_RANGE_1: - sprintf (cp, "%g THRU %g; %g", - v->missing[0].f, v->missing[1].f, v->missing[2].f); - break; - case MISSING_LOW_1: - sprintf (cp, "LOWEST THRU %g; %g", - v->missing[0].f, v->missing[1].f); - break; - case MISSING_HIGH_1: - sprintf (cp, "%g THRU HIGHEST; %g", - v->missing[0].f, v->missing[1].f); - break; - default: - assert (0); - } - else - { - int i; - - for (i = 0; i < v->miss_type; i++) - { - if (i != 0) - cp = stpcpy (cp, "; "); - *cp++ = '"'; - memcpy (cp, v->missing[i].s, v->width); + char buf[128]; + char *cp; + struct missing_values mv; + int cnt = 0; + + cp = stpcpy (buf, _("Missing Values: ")); + mv_copy (&mv, &v->miss); + if (mv_has_range (&mv)) + { + double x, y; + mv_pop_range (&mv, &x, &y); + if (x == LOWEST) + cp += nsprintf (cp, "LOWEST THRU %g", y); + else if (y == HIGHEST) + cp += nsprintf (cp, "%g THRU HIGHEST", x); + else + cp += nsprintf (cp, "%g THRU %g", x, y); + cnt++; + } + while (mv_has_value (&mv)) + { + union value value; + mv_pop_value (&mv, &value); + if (cnt++ > 0) + cp += nsprintf (cp, "; "); + if (v->type == NUMERIC) + cp += nsprintf (cp, "%g", value.f); + else + { + *cp++ = '"'; + memcpy (cp, value.s, v->width); cp += v->width; *cp++ = '"'; - } - *cp = 0; - } + *cp = '\0'; + } + } tab_joint_text (t, 1, r, 2, r, TAB_LEFT, buf); r++; diff --git a/src/t-test.q b/src/t-test.q index 07bcdd07f0..89f1741f33 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -67,7 +67,7 @@ /* Function to use for testing for missing values */ -static is_missing_func value_is_missing; +static is_missing_func *value_is_missing; /* Variable for the GROUPS subcommand, if given. */ static struct variable *indep_var; @@ -330,9 +330,9 @@ cmd_t_test(void) /* If /MISSING=INCLUDE is set, then user missing values are ignored */ if (cmd.incl == TTS_INCLUDE ) - value_is_missing = is_system_missing; + value_is_missing = mv_is_value_system_missing; else - value_is_missing = is_missing; + value_is_missing = mv_is_value_missing; bad_weight_warn = 1; @@ -1418,7 +1418,7 @@ common_calc (const struct ccase *c, void *_cmd) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1429,7 +1429,7 @@ common_calc (const struct ccase *c, void *_cmd) if ( cmd->sbc_groups ) { const union value *gv = case_data (c, indep_var->fv); - if ( value_is_missing(gv,indep_var) ) + if ( value_is_missing(&indep_var->miss, gv) ) { return 0; } @@ -1444,7 +1444,7 @@ common_calc (const struct ccase *c, void *_cmd) gs= &group_proc_get (cmd->v_variables[i])->ugs; - if (! value_is_missing(val,v) ) + if (! value_is_missing(&v->miss, val) ) { gs->n+=weight; gs->sum+=weight * val->f; @@ -1517,7 +1517,7 @@ one_sample_calc (const struct ccase *c, void *cmd_) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1532,7 +1532,7 @@ one_sample_calc (const struct ccase *c, void *cmd_) gs= &group_proc_get (cmd->v_variables[i])->ugs; - if ( ! value_is_missing(val,v)) + if ( ! value_is_missing(&v->miss, val)) gs->sum_diff += weight * (val->f - cmd->n_testval[0]); } @@ -1611,8 +1611,8 @@ paired_calc (const struct ccase *c, void *cmd_) const union value *val0 = case_data (c, v0->fv); const union value *val1 = case_data (c, v1->fv); - if ( value_is_missing(val0,v0) || - value_is_missing(val1,v1) ) + if ( value_is_missing(&v0->miss, val0) || + value_is_missing(&v1->miss, val1) ) { return 0; } @@ -1627,7 +1627,8 @@ paired_calc (const struct ccase *c, void *cmd_) const union value *val0 = case_data (c, v0->fv); const union value *val1 = case_data (c, v1->fv); - if ( ( !value_is_missing(val0,v0) && !value_is_missing(val1,v1) ) ) + if ( ( !value_is_missing(&v0->miss, val0) + && !value_is_missing(&v1->miss, val1) ) ) { pairs[i].n += weight; pairs[i].sum[0] += weight * val0->f; @@ -1744,7 +1745,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) const double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); - if ( value_is_missing(gv,indep_var) ) + if ( value_is_missing(&indep_var->miss, gv) ) { return 0; } @@ -1756,7 +1757,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1779,7 +1780,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) if ( ! gs ) return 0; - if ( !value_is_missing(val,var) ) + if ( !value_is_missing(&var->miss, val) ) { gs->n+=weight; gs->sum+=weight * val->f; diff --git a/src/val.h b/src/val.h index 96523de28b..57aaa2af74 100644 --- a/src/val.h +++ b/src/val.h @@ -21,7 +21,7 @@ #define val_h 1 #include -#include "config.h" +#include "magic.h" /* Values. */ diff --git a/src/var.h b/src/var.h index 3bfc43df7b..b4d5018208 100644 --- a/src/var.h +++ b/src/var.h @@ -25,10 +25,9 @@ #include "config.h" #include #include "format.h" +#include "missing-values.h" #include "val.h" - - /* Script variables. */ /* Variable type. */ @@ -39,27 +38,6 @@ enum (STRING is pre-empted by lexer.h.) */ }; -/* Types of missing values. Order is significant, see - mis-val.c:parse_numeric(), sfm-read.c, sfm-write.c, - sysfile-info.c:cmd_sysfile_info(), mis-val.c:copy_missing_values(), - pfm-read.c:read_variables(), pfm-write.c:write_variables(), - apply-dict.c:cmd_apply_dictionary(), and more (?). */ -enum - { - MISSING_NONE, /* No user-missing values. */ - MISSING_1, /* One user-missing value. */ - MISSING_2, /* Two user-missing values. */ - MISSING_3, /* Three user-missing values. */ - MISSING_RANGE, /* [a,b]. */ - MISSING_LOW, /* (-inf,a]. */ - MISSING_HIGH, /* (a,+inf]. */ - MISSING_RANGE_1, /* [a,b], c. */ - MISSING_LOW_1, /* (-inf,a], b. */ - MISSING_HIGH_1, /* (a,+inf), b. */ - MISSING_COUNT - }; - - /* A variable's dictionary entry. */ struct variable { @@ -75,8 +53,7 @@ struct variable int index; /* Dictionary index. */ /* Missing values. */ - int miss_type; /* One of the MISSING_* constants. */ - union value missing[3]; /* User-missing value. */ + struct missing_values miss; /* Missing values. */ /* Display formats. */ struct fmt_spec print; /* Default format for PRINT. */ @@ -178,18 +155,8 @@ extern int FILTER_before_TEMPORARY; void cancel_temporary (void); -/* Functions. */ - struct ccase; void dump_split_vars (const struct ccase *); -typedef int (* is_missing_func )(const union value *, const struct variable *); - -int is_num_user_missing (double, const struct variable *); -int is_str_user_missing (const unsigned char[], const struct variable *); -int is_missing (const union value *, const struct variable *); -int is_system_missing (const union value *, const struct variable *); -int is_user_missing (const union value *, const struct variable *); -void copy_missing_values (struct variable *dest, const struct variable *src); /* Transformations. */ diff --git a/src/vars-atr.c b/src/vars-atr.c index a854033a08..5e34cb5946 100644 --- a/src/vars-atr.c +++ b/src/vars-atr.c @@ -141,109 +141,6 @@ discard_variables (void) pgm_state = STATE_INIT; } - -/* Return nonzero only if X is a user-missing value for numeric - variable V. */ -inline int -is_num_user_missing (double x, const struct variable *v) -{ - switch (v->miss_type) - { - case MISSING_NONE: - return 0; - case MISSING_1: - return x == v->missing[0].f; - case MISSING_2: - return x == v->missing[0].f || x == v->missing[1].f; - case MISSING_3: - return (x == v->missing[0].f || x == v->missing[1].f - || x == v->missing[2].f); - case MISSING_RANGE: - return x >= v->missing[0].f && x <= v->missing[1].f; - case MISSING_LOW: - return x <= v->missing[0].f; - case MISSING_HIGH: - return x >= v->missing[0].f; - case MISSING_RANGE_1: - return ((x >= v->missing[0].f && x <= v->missing[1].f) - || x == v->missing[2].f); - case MISSING_LOW_1: - return x <= v->missing[0].f || x == v->missing[1].f; - case MISSING_HIGH_1: - return x >= v->missing[0].f || x == v->missing[1].f; - default: - assert (0); - } - abort (); -} - -/* Return nonzero only if string S is a user-missing variable for - string variable V. */ -inline int -is_str_user_missing (const unsigned char s[], const struct variable *v) -{ - /* FIXME: should these be memcmp()? */ - switch (v->miss_type) - { - case MISSING_NONE: - return 0; - case MISSING_1: - return !strncmp (s, v->missing[0].s, v->width); - case MISSING_2: - return (!strncmp (s, v->missing[0].s, v->width) - || !strncmp (s, v->missing[1].s, v->width)); - case MISSING_3: - return (!strncmp (s, v->missing[0].s, v->width) - || !strncmp (s, v->missing[1].s, v->width) - || !strncmp (s, v->missing[2].s, v->width)); - default: - assert (0); - } - abort (); -} - -/* Return nonzero only if value VAL is system-missing for variable - V. */ -int -is_system_missing (const union value *val, const struct variable *v) -{ - return v->type == NUMERIC && val->f == SYSMIS; -} - -/* Return nonzero only if value VAL is system- or user-missing for - variable V. */ -int -is_missing (const union value *val, const struct variable *v) -{ - switch (v->type) - { - case NUMERIC: - if (val->f == SYSMIS) - return 1; - return is_num_user_missing (val->f, v); - case ALPHA: - return is_str_user_missing (val->s, v); - default: - assert (0); - } - abort (); -} - -/* Return nonzero only if value VAL is user-missing for variable V. */ -int -is_user_missing (const union value *val, const struct variable *v) -{ - switch (v->type) - { - case NUMERIC: - return is_num_user_missing (val->f, v); - case ALPHA: - return is_str_user_missing (val->s, v); - default: - assert (0); - } - abort (); -} /* Returns true if NAME is an acceptable name for a variable, false otherwise. If ISSUE_ERROR is true, issues an diff --git a/src/vfm.c b/src/vfm.c index 0414234c01..6d7e526bf1 100644 --- a/src/vfm.c +++ b/src/vfm.c @@ -336,7 +336,7 @@ filter_case (const struct ccase *c, int case_idx) if (filter_var != NULL) { double f = case_num (c, filter_var->fv); - if (f == 0.0 || f == SYSMIS || is_num_user_missing (f, filter_var)) + if (f == 0.0 || mv_is_num_missing (&filter_var->miss, f)) return 1; } diff --git a/tests/ChangeLog b/tests/ChangeLog index 6dc05961b4..f515119302 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +Sat Aug 6 17:32:39 2005 Ben Pfaff + + * command/missing-values.sh: New test. + + * Makefile.am: Add new test. + Mon Aug 1 21:51:46 2005 Ben Pfaff * bugs/big-input-2.sh: Don't use 1...100000 (etc.) with Perl diff --git a/tests/Makefile.am b/tests/Makefile.am index 3a4172dd2e..21b70ddab6 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -22,6 +22,7 @@ TESTS = \ command/loop.sh \ command/longvars.sh \ command/match-files.sh \ + command/missing-values.sh \ command/no_case_size.sh \ command/oneway.sh \ command/oneway-missing.sh \ diff --git a/tests/command/missing-values.sh b/tests/command/missing-values.sh new file mode 100755 index 0000000000..4d530989c9 --- /dev/null +++ b/tests/command/missing-values.sh @@ -0,0 +1,125 @@ +#!/bin/sh + +# This program tests MISSING VALUES + +TEMPDIR=/tmp/pspp-tst-$$ +TESTFILE=$TEMPDIR/`basename $0`.sps + +here=`pwd`; + +# ensure that top_srcdir is absolute +cd $top_srcdir; top_srcdir=`pwd` + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + + +cleanup() +{ + cd / + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +# Copy this file --- it's shared with another test +activity="create data" +cp $top_srcdir/tests/data-list.data $TEMPDIR +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="create program" +cat > $TEMPDIR/missing-values.stat << foobar +DATA LIST NOTABLE/str1 1-5 (A) str2 6-8 (A) date1 9-19 (DATE) num1 20-25. + +/* Valid: numeric missing values. +MISSING VALUES date1 num1 (1). +MISSING VALUES date1 num1 (1, 2). +MISSING VALUES date1 num1 (1, 2, 3). + +/* Valid: numeric missing values using the first variable's format. +MISSING VALUES num1 date1 ('1'). +MISSING VALUES num1 date1 ('1', '2'). +MISSING VALUES num1 date1 ('1', '2', '3'). +MISSING VALUES date1 num1 ('06-AUG-05'). +MISSING VALUES date1 num1 ('06-AUG-05', '01-OCT-78'). +MISSING VALUES date1 num1 ('06-AUG-05', '01-OCT-78', '14-FEB-81'). + +/* Valid: ranges of numeric missing values. +MISSING VALUES num1 (1 THRU 2). +MISSING VALUES num1 (LO THRU 2). +MISSING VALUES num1 (LOWEST THRU 2). +MISSING VALUES num1 (1 THRU HI). +MISSING VALUES num1 (1 THRU HIGHEST). + +/* Valid: a range of numeric missing values, plus an individual value. +MISSING VALUES num1 (1 THRU 2, 3). +MISSING VALUES num1 (LO THRU 2, 3). +MISSING VALUES num1 (LOWEST THRU 2, 3). +MISSING VALUES num1 (1 THRU HI, -1). +MISSING VALUES num1 (1 THRU HIGHEST, -1). + +/* Valid: string missing values. +MISSING VALUES str1 str2 ('abc ','def'). + +/* Invalid: too long for str2. +MISSING VALUES str1 str2 ('abcde'). + +/* Invalid: no string ranges. +MISSING VALUES str1 ('a' THRU 'z'). + +/* Invalid: mixing string and numeric variables. +MISSING VALUES str1 num1 ('123'). + +/* Valid: may mix variable types when clearing missing values. +MISSING VALUES ALL (). + +foobar +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="run program" +$SUPERVISOR $here/../src/pspp --testing-mode -o raw-ascii --testing-mode $TEMPDIR/missing-values.stat > $TEMPDIR/errs +# Note vv --- there are errors in input. Therefore, the command must FAIL +if [ $? -eq 0 ] ; then fail ; fi + +activity="compare error messages" +diff -w $TEMPDIR/errs - <