From: Ben Pfaff Date: Sun, 7 Aug 2005 04:39:27 +0000 (+0000) Subject: Clean up treatment of missing values by moving all the code into X-Git-Tag: v0.6.0~1227 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=933b760efccdfa26f14254f1fae002ea3b0a1495;p=pspp-builds.git Clean up treatment of missing values by moving all the code into one place. All references to the missing value function were updated, but only major changes are detailed below. --- diff --git a/src/ChangeLog b/src/ChangeLog index f034dd45..fb536fd1 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,60 @@ +Sat Aug 6 21:29:15 2005 Ben Pfaff + + * factor_stats.c: Needed included earlier. + + * percentiles.c: Needed to include . + + * val.h: Don't include "config.h". + +Sat Aug 6 21:26:27 2005 Ben Pfaff + + Clean up treatment of missing values by moving all the code into + one place. All references to the missing value function were + updated, but only major changes are detailed below. + + * Makefile.am: Add missing-values.c, missing-values.h to sources. + + * apply-dict.c: (cmd_apply_dictionary) Use mv_resize(). + + * dictionary.c: (dict_create_var) Initialize `miss' member with + mv_init(). + (dict_clone_var) Copy `miss' member with mv_copy(). + + * get.c: (mtf_merge_dictionary) Use mv_copy(). + + * missing-values.c: New file. + + * missing-values.h: New file. + + * mis-val.c: Rewrite. New version implements updated semantics. + + * pfm-read.c: (read_variables) Rewrite missing value handling. + + * pfm-write.c: (write_variables) Rewrite missing value handling. + + * sfm-read.c: (read_variables) Rewrite missing value handling. + + * sfm-write.c: (write_variable) Rewrite missing value handling. + + * sfmP.h: Include "magic.h" to get definition of + second_lowest_value. + + * sysfile-info.c: (describe_variable) Rewrite missing value + handling. + + * val.h: Include "magic.h" to get definition of + second_lowest_value. + + * var.h: Include "missing-values.h". Drop MISSING_* enums. + (struct variable) Remove `miss_type', `missing'. Add `miss'. + + * vars-atr.c: (is_num_user_missing) Removed--use + mv_is_num_user_missing(). + (is_str_user_missing) Removed--use mv_is_str_user_missing(). + (is_system_missing) Removed--use mv_is_value_system_missing(). + (is_missing) Removed--use mv_is_value_missing(). + (is_user_missing) Removed--use mv_is_value_user_missing(). + Sun Jul 31 14:09:57 2005 Ben Pfaff Adopt use of gnulib for portability. diff --git a/src/Makefile.am b/src/Makefile.am index dcb6d078..b6562c16 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -41,37 +41,31 @@ else chart_sources = dummy-chart.c endif -pspp_SOURCES = $(q_sources_c) $(chart_sources) \ -aggregate.c algorithm.c algorithm.h \ -alloc.c alloc.h apply-dict.c ascii.c autorecode.c bitvector.h \ -calendar.c calendar.h case.c case.h casefile.c casefile.h chart.c \ -chart.h cmdline.c cmdline.h command.c command.def \ -command.h compute.c copyleft.c copyleft.h count.c data-in.c data-in.h \ +pspp_SOURCES = $(q_sources_c) $(chart_sources) aggregate.c algorithm.c \ +algorithm.h alloc.c alloc.h apply-dict.c ascii.c autorecode.c \ +bitvector.h calendar.c calendar.h case.c case.h casefile.c casefile.h \ +chart.c chart.h cmdline.c cmdline.h command.c command.def command.h \ +compute.c copyleft.c copyleft.h count.c data-in.c data-in.h \ data-list.c data-list.h data-out.c date.c debug-print.h descript.c \ -devind.c devind.h dfm-read.c dfm-read.h dfm-write.c dfm-write.h \ -dictionary.c dictionary.h do-if.c do-ifP.h echo.c error.c \ -error.h factor_stats.c factor_stats.h file-handle.h \ -file-type.c filename.c filename.h flip.c font.h format.c format.def \ -format.h formats.c get.c getl.c getl.h glob.c glob.h \ -groff-font.c group.c group.h group_proc.h \ -hash.c hash.h histogram.c histogram.h \ -html.c htmlP.h include.c inpt-pgm.c lexer.c lexer.h levene.c levene.h \ -linked-list.c linked-list.h log.h loop.c magic.c magic.h main.c main.h \ -matrix-data.c mis-val.c misc.c misc.h modify-vars.c \ -moments.c moments.h numeric.c output.c output.h \ -percentiles.c percentiles.h permissions.c \ -pfm-read.c pfm-read.h \ -pfm-write.c pfm-write.h \ -pool.c pool.h postscript.c print.c recode.c \ +devind.c devind.h dfm-read.c dfm-read.h dfm-write.c dfm-write.h \ +dictionary.c dictionary.h do-if.c do-ifP.h echo.c error.c error.h \ +factor_stats.c factor_stats.h file-handle.h file-type.c filename.c \ +filename.h flip.c font.h format.c format.def format.h formats.c get.c \ +getl.c getl.h glob.c glob.h groff-font.c group.c group.h group_proc.h \ +hash.c hash.h histogram.c histogram.h html.c htmlP.h include.c \ +inpt-pgm.c lexer.c lexer.h levene.c levene.h linked-list.c \ +linked-list.h log.h loop.c magic.c magic.h main.c main.h matrix-data.c \ +mis-val.c misc.c misc.h missing-values.c missing-values.h \ +modify-vars.c moments.c moments.h numeric.c output.c output.h \ +percentiles.c percentiles.h permissions.c pfm-read.c pfm-read.h \ +pfm-write.c pfm-write.h pool.c pool.h postscript.c print.c recode.c \ rename-vars.c repeat.c repeat.h sample.c sel-if.c settings.h \ sfm-read.c sfm-read.h sfm-write.c sfm-write.h sfmP.h som.c som.h \ -sort.c sort.h sort-prs.c sort-prs.h \ -split-file.c str.c str.h subclist.c subclist.h \ -sysfile-info.c tab.c tab.h temporary.c mkfile.c mkfile.h \ -title.c val.h val-labs.c value-labels.c value-labels.h \ -var-display.c \ -var-labs.c var.h vars-atr.c vars-prs.c vector.c version.h \ -vfm.c vfm.h vfmP.h weight.c +sort.c sort.h sort-prs.c sort-prs.h split-file.c str.c str.h \ +subclist.c subclist.h sysfile-info.c tab.c tab.h temporary.c mkfile.c \ +mkfile.h title.c val.h val-labs.c value-labels.c value-labels.h \ +var-display.c var-labs.c var.h vars-atr.c vars-prs.c vector.c \ +version.h vfm.c vfm.h vfmP.h weight.c pspp_LDADD = \ diff --git a/src/aggregate.c b/src/aggregate.c index 2c1495f0..bc554b97 100644 --- a/src/aggregate.c +++ b/src/aggregate.c @@ -726,7 +726,8 @@ accumulate_aggregate_info (struct agr_proc *agr, { const union value *v = case_data (input, iter->src->fv); - if ((!iter->include_missing && is_missing (v, iter->src)) + if ((!iter->include_missing + && mv_is_value_missing (&iter->src->miss, v)) || (iter->include_missing && iter->src->type == NUMERIC && v->f == SYSMIS)) { diff --git a/src/apply-dict.c b/src/apply-dict.c index 74c1642b..473daf01 100644 --- a/src/apply-dict.c +++ b/src/apply-dict.c @@ -129,31 +129,18 @@ cmd_apply_dictionary (void) } } - if (s->miss_type != MISSING_NONE && t->width > MAX_SHORT_STRING) + if (!mv_is_empty (&s->miss) && t->width > MAX_SHORT_STRING) msg (SW, _("Cannot apply missing values from source file to " "long string variable %s."), s->name); - else if (s->miss_type != MISSING_NONE) + else if (!mv_is_empty (&s->miss)) { - if (t->width < s->width) - { - static const int miss_count[MISSING_COUNT] = - { - 0, 1, 2, 3, 2, 1, 1, 3, 2, 2, - }; - - int j, k; - - for (j = 0; j < miss_count[s->miss_type]; j++) - for (k = t->width; k < s->width; k++) - if (s->missing[j].s[k] != ' ') - goto skip_missing_values; - } - - t->miss_type = s->miss_type; - memcpy (t->missing, s->missing, sizeof s->missing); + if (mv_is_resizable (&s->miss, t->width)) + { + mv_copy (&t->miss, &s->miss); + mv_resize (&t->miss, t->width); + } } - skip_missing_values: ; if (s->type == NUMERIC) { diff --git a/src/count.c b/src/count.c index bab59f90..90275084 100644 --- a/src/count.c +++ b/src/count.c @@ -385,7 +385,7 @@ count_numeric (struct counting * cnt, struct ccase * c) counter++; continue; } - if (cnt->missing >= 2 && is_num_user_missing (cmp, cnt->v[i])) + if (cnt->missing >= 2 && mv_is_num_user_missing (&cnt->v[i]->miss, cmp)) { counter++; continue; diff --git a/src/crosstabs.q b/src/crosstabs.q index 09873e85..21d2d3df 100644 --- a/src/crosstabs.q +++ b/src/crosstabs.q @@ -576,11 +576,11 @@ calc_general (struct ccase *c, void *aux UNUSED) assert (x != NULL); for (j = 0; j < x->nvar; j++) { - if ((cmd.miss == CRS_TABLE - && is_missing (case_data (c, x->vars[j]->fv), x->vars[j])) + const union value *v = case_data (c, x->vars[j]->fv); + const struct missing_values *mv = &x->vars[j]->miss; + if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v)) || (cmd.miss == CRS_INCLUDE - && is_system_missing (case_data (c, x->vars[j]->fv), - x->vars[j]))) + && mv_is_value_system_missing (mv, v))) { x->missing += weight; goto next_crosstab; @@ -650,7 +650,8 @@ calc_integer (struct ccase *c, void *aux UNUSED) /* Note that the first test also rules out SYSMIS. */ if ((value < vr->min || value >= vr->max) - || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v))) + || (cmd.miss == CRS_TABLE + && mv_is_num_user_missing (&v->miss, value))) { x->missing += weight; goto next_crosstab; @@ -1410,7 +1411,7 @@ delete_missing (void) int r; for (r = 0; r < n_rows; r++) - if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR])) + if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f)) { int c; @@ -1424,7 +1425,7 @@ delete_missing (void) int c; for (c = 0; c < n_cols; c++) - if (is_num_user_missing (cols[c].f, x->vars[COL_VAR])) + if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)) { int r; @@ -1657,7 +1658,7 @@ table_value_missing (struct tab_table *table, int c, int r, unsigned char opt, s.string = tab_alloc (table, var->print.w); format_short (s.string, &var->print, v); s.length = strlen (s.string); - if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var)) + if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f)) s.string[s.length++] = 'M'; while (s.length && *s.string == ' ') { @@ -1740,8 +1741,9 @@ display_crosstabulation (void) int mark_missing = 0; double expected_value = row_tot[r] * col_tot[c] / W; if (cmd.miss == CRS_REPORT - && (is_num_user_missing (cols[c].f, x->vars[COL_VAR]) - || is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))) + && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f) + || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, + rows[r].f))) mark_missing = 1; for (i = 0; i < num_cells; i++) { @@ -1806,7 +1808,7 @@ display_crosstabulation (void) int mark_missing = 0; if (cmd.miss == CRS_REPORT - && is_num_user_missing (rows[r].f, x->vars[ROW_VAR])) + && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f)) mark_missing = 1; for (i = 0; i < num_cells; i++) @@ -1862,7 +1864,7 @@ display_crosstabulation (void) int i; if (cmd.miss == CRS_REPORT && c < n_cols - && is_num_user_missing (cols[c].f, x->vars[COL_VAR])) + && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)) mark_missing = 1; for (i = 0; i < num_cells; i++) diff --git a/src/descript.c b/src/descript.c index fedba7f6..05e0767d 100644 --- a/src/descript.c +++ b/src/descript.c @@ -583,8 +583,9 @@ descriptives_trns_proc (struct trns_header *trns, struct ccase * c, for (vars = t->vars; vars < t->vars + t->var_cnt; vars++) { double score = case_num (c, (*vars)->fv); - if ( score == SYSMIS || (!t->include_user_missing - && is_num_user_missing(score, *vars)) ) + if ( score == SYSMIS + || (!t->include_user_missing + && mv_is_num_user_missing (&(*vars)->miss, score))) { all_sysmis = 1; break; @@ -599,7 +600,8 @@ descriptives_trns_proc (struct trns_header *trns, struct ccase * c, if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis || input == SYSMIS - || (!t->include_user_missing && is_num_user_missing(input, z->v))) + || (!t->include_user_missing + && mv_is_num_user_missing (&z->v->miss, input))) *output = SYSMIS; else *output = (input - z->mean) / z->std_dev; @@ -739,7 +741,7 @@ calc_descriptives (const struct casefile *cf, void *dsc_) if (dsc->missing_type != DSC_LISTWISE && (x == SYSMIS || (!dsc->include_user_missing - && is_num_user_missing (x, dv->v)))) + && mv_is_num_user_missing (&dv->v->miss, x)))) { dv->missing += weight; continue; @@ -781,7 +783,7 @@ calc_descriptives (const struct casefile *cf, void *dsc_) if (dsc->missing_type != DSC_LISTWISE && (x == SYSMIS || (!dsc->include_user_missing - && is_num_user_missing (x, dv->v)))) + && mv_is_num_user_missing (&dv->v->miss, x)))) continue; if (dv->moments != NULL) @@ -844,7 +846,8 @@ listwise_missing (struct dsc_proc *dsc, const struct ccase *c) double x = case_num (c, dv->v->fv); if (x == SYSMIS - || (!dsc->include_user_missing && is_num_user_missing (x, dv->v))) + || (!dsc->include_user_missing + && mv_is_num_user_missing (&dv->v->miss, x))) return 1; } return 0; diff --git a/src/dictionary.c b/src/dictionary.c index 9ea9f31a..3f1177d3 100644 --- a/src/dictionary.c +++ b/src/dictionary.c @@ -279,7 +279,7 @@ dict_create_var (struct dictionary *d, const char *name, int width) v->init = 1; v->reinit = dict_class_from_id (v->name) != DC_SCRATCH; v->index = d->var_cnt; - v->miss_type = MISSING_NONE; + mv_init (&v->miss, width); if (v->type == NUMERIC) { v->print = f8_2; @@ -354,8 +354,7 @@ dict_clone_var (struct dictionary *d, const struct variable *ov, the same short name. */ nv->init = 1; nv->reinit = ov->reinit; - nv->miss_type = ov->miss_type; - memcpy (nv->missing, ov->missing, sizeof nv->missing); + mv_copy (&nv->miss, &ov->miss); nv->print = ov->print; nv->write = ov->write; val_labs_destroy (nv->val_labs); @@ -678,7 +677,7 @@ dict_get_case_weight (const struct dictionary *d, const struct ccase *c, else { double w = case_num (c, d->weight->fv); - if ( w < 0.0 || w == SYSMIS || is_num_user_missing(w, d->weight) ) + if (w < 0.0 || mv_is_num_missing (&d->weight->miss, w)) w = 0.0; if ( w == 0.0 && *warn_on_invalid ) { *warn_on_invalid = 0; diff --git a/src/examine.q b/src/examine.q index b54f574b..33498d85 100644 --- a/src/examine.q +++ b/src/examine.q @@ -169,7 +169,7 @@ const char *factor_to_string_concise(const struct factor *fctr, /* Function to use for testing for missing values */ -static is_missing_func value_is_missing; +static is_missing_func *value_is_missing; /* PERCENTILES */ @@ -193,9 +193,9 @@ cmd_examine(void) /* If /MISSING=INCLUDE is set, then user missing values are ignored */ if (cmd.incl == XMN_INCLUDE ) - value_is_missing = is_system_missing; + value_is_missing = mv_is_value_system_missing; else - value_is_missing = is_missing; + value_is_missing = mv_is_value_missing; if ( cmd.st_n == SYSMIS ) cmd.st_n = 5; @@ -650,7 +650,7 @@ factor_calc(struct ccase *c, int case_no, double weight, int case_missing) const struct variable *var = dependent_vars[v]; const union value *val = case_data (c, var->fv); - if ( value_is_missing(val,var) || case_missing ) + if ( value_is_missing (&var->miss, val) || case_missing ) val = 0; metrics_calc( &(*foo)->m[v], val, weight, case_no); @@ -712,7 +712,7 @@ run_examine(const struct casefile *cf, void *cmd_ ) const struct variable *var = dependent_vars[v]; const union value *val = case_data (&c, var->fv); - if ( value_is_missing(val,var)) + if ( value_is_missing(&var->miss, val)) case_missing = 1; } @@ -723,7 +723,7 @@ run_examine(const struct casefile *cf, void *cmd_ ) const struct variable *var = dependent_vars[v]; const union value *val = case_data (&c, var->fv); - if ( value_is_missing(val,var) || case_missing ) + if ( value_is_missing(&var->miss, val) || case_missing ) val = 0; metrics_calc(&totals[v], val, weight, case_no); diff --git a/src/expressions/operations.def b/src/expressions/operations.def index 0d21c16a..bd5af76c 100644 --- a/src/expressions/operations.def +++ b/src/expressions/operations.def @@ -896,7 +896,7 @@ no_opt operator VEC_ELEM_NUM (idx) { const struct variable *var = v->var[(int) idx - 1]; double value = case_num (c, var->fv); - return !is_num_user_missing (value, var) ? value : SYSMIS; + return !mv_is_num_user_missing (&var->miss, value) ? value : SYSMIS; } else { @@ -943,7 +943,7 @@ no_opt operator NUM_VAR () num_var v; { double d = case_num (c, v->fv); - return !is_num_user_missing (d, v) ? d : SYSMIS; + return !mv_is_num_user_missing (&v->miss, d) ? d : SYSMIS; } no_opt string operator STR_VAR () @@ -962,7 +962,7 @@ no_opt function LAG (num_var v, pos_int n_before) if (c != NULL) { double x = case_num (c, v->fv); - return !is_num_user_missing (x, v) ? x : SYSMIS; + return !mv_is_num_user_missing (&v->miss, x) ? x : SYSMIS; } else return SYSMIS; @@ -974,7 +974,7 @@ no_opt function LAG (num_var v) if (c != NULL) { double x = case_num (c, v->fv); - return !is_num_user_missing (x, v) ? x : SYSMIS; + return !mv_is_num_user_missing (&v->miss, x) ? x : SYSMIS; } else return SYSMIS; diff --git a/src/factor_stats.c b/src/factor_stats.c index 4508caf8..e090517b 100644 --- a/src/factor_stats.c +++ b/src/factor_stats.c @@ -18,8 +18,8 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include "factor_stats.h" -#include "config.h" #include "val.h" #include "hash.h" #include "algorithm.h" diff --git a/src/frequencies.q b/src/frequencies.q index c165c21d..0ca83a45 100644 --- a/src/frequencies.q +++ b/src/frequencies.q @@ -697,7 +697,7 @@ not_missing (const void *f_, void *v_) const struct freq *f = f_; struct variable *v = v_; - return !is_missing (&f->v, v); + return !mv_is_value_missing (&v->miss, &f->v); } /* Summarizes the frequency table data for variable V. */ @@ -1564,7 +1564,7 @@ freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) /* Find out the extremes of the x value */ for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) ) { - if ( is_missing(&frq->v, var)) + if ( mv_is_value_missing(&var->miss, &frq->v)) continue; if ( frq->v.f < x_min ) x_min = frq->v.f ; diff --git a/src/get.c b/src/get.c index a13277a6..f484d120 100644 --- a/src/get.c +++ b/src/get.c @@ -1426,9 +1426,8 @@ mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f) if (val_labs_count (dv->val_labs) && !val_labs_count (mv->val_labs)) mv->val_labs = val_labs_copy (dv->val_labs); - if (dv->miss_type != MISSING_NONE - && mv->miss_type == MISSING_NONE) - copy_missing_values (mv, dv); + if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss)) + mv_copy (&mv->miss, &dv->miss); } if (dv->label && !mv->label) diff --git a/src/levene.c b/src/levene.c index 833a65e0..7877c7ec 100644 --- a/src/levene.c +++ b/src/levene.c @@ -77,8 +77,7 @@ struct levene_info enum lev_missing missing; /* Function to test for missing values */ - is_missing_func is_missing; - + is_missing_func *is_missing; }; /* First pass */ @@ -202,7 +201,7 @@ levene_calc (const struct ccase *c, void *_l) struct variable *v = l->v_dep[i]; const union value *val = case_data (c, v->fv); - if (l->is_missing(val,v) ) + if (l->is_missing (&v->miss, val) ) { return 0; } @@ -225,7 +224,7 @@ levene_calc (const struct ccase *c, void *_l) if ( 0 == gs ) continue ; - if ( ! l->is_missing(v,var)) + if ( ! l->is_missing(&var->miss, v)) { levene_z= fabs(v->f - gs->mean); lz[i].grand_total += levene_z * weight; @@ -309,7 +308,7 @@ levene2_calc (const struct ccase *c, void *_l) struct variable *v = l->v_dep[i]; const union value *val = case_data (c, v->fv); - if (l->is_missing(val,v) ) + if (l->is_missing(&v->miss, val) ) { return 0; } @@ -330,7 +329,7 @@ levene2_calc (const struct ccase *c, void *_l) if ( 0 == gs ) continue; - if ( ! l->is_missing(v,var) ) + if ( ! l->is_missing (&var->miss, v) ) { levene_z = fabs(v->f - gs->mean); lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean); diff --git a/src/mis-val.c b/src/mis-val.c index 27a51343..555dbf09 100644 --- a/src/mis-val.c +++ b/src/mis-val.c @@ -21,6 +21,7 @@ #include "error.h" #include #include "command.h" +#include "data-in.h" #include "error.h" #include "lexer.h" #include "magic.h" @@ -32,317 +33,180 @@ #include "debug-print.h" -/* Variables on MIS VAL. */ -static struct variable **v; -static int nv; - -/* Type of the variables on MIS VAL. */ -static int type; - -/* Width of string variables on MIS VAL. */ -static size_t width; - -/* Items to fill-in var structs with. */ -static int miss_type; -static union value missing[3]; - -static int parse_varnames (void); -static int parse_numeric (void); -static int parse_alpha (void); +static bool parse_number (double *, const struct fmt_spec *); int cmd_missing_values (void) { - int i; + struct variable **v; + int nv; + + int retval = CMD_PART_SUCCESS_MAYBE; + bool deferred_errors = false; while (token != '.') { - if (!parse_varnames ()) - goto fail; + int i; + - if (token != ')') - { - if ((type == NUMERIC && !parse_numeric ()) - || (type == ALPHA && !parse_alpha ())) - goto fail; - } - else - miss_type = MISSING_NONE; + if (!parse_variables (default_dict, &v, &nv, PV_NONE)) + goto done; - if (!lex_match (')')) - { - msg (SE, _("`)' expected after value specification.")); - goto fail; - } + if (!lex_match ('(')) + { + lex_error (_("expecting `('")); + goto done; + } for (i = 0; i < nv; i++) - { - v[i]->miss_type = miss_type; - memcpy (v[i]->missing, missing, sizeof v[i]->missing); - } + mv_init (&v[i]->miss, v[i]->width); + + if (!lex_match (')')) + { + struct missing_values mv; + + for (i = 0; i < nv; i++) + if (v[i]->type != v[0]->type) + { + const struct variable *n = v[0]->type == NUMERIC ? v[0] : v[i]; + const struct variable *s = v[0]->type == NUMERIC ? v[i] : v[0]; + msg (SE, _("Cannot mix numeric variables (e.g. %s) and " + "string variables (e.g. %s) within a single list."), + n->name, s->name); + goto done; + } + + if (v[0]->type == NUMERIC) + { + mv_init (&mv, 0); + while (!lex_match (')')) + { + double x; + + if (lex_match_id ("LO") || lex_match_id ("LOWEST")) + x = LOWEST; + else if (!parse_number (&x, &v[0]->print)) + goto done; + + if (lex_match_id ("THRU")) + { + double y; + + if (lex_match_id ("HI") || lex_match_id ("HIGHEST")) + y = HIGHEST; + else if (!parse_number (&y, &v[0]->print)) + goto done; + + if (x == LOWEST && y == HIGHEST) + { + msg (SE, _("LO THRU HI is an invalid range.")); + deferred_errors = true; + } + else if (!mv_add_num_range (&mv, x, y)) + deferred_errors = true; + } + else + { + if (x == LOWEST) + { + msg (SE, _("LO or LOWEST must be part of a range.")); + deferred_errors = true; + } + else if (!mv_add_num (&mv, x)) + deferred_errors = true; + } + + lex_match (','); + } + } + else + { + mv_init (&mv, MAX_SHORT_STRING); + while (!lex_match (')')) + { + if (!lex_force_string ()) + { + deferred_errors = true; + break; + } + + if (ds_length (&tokstr) > MAX_SHORT_STRING) + { + ds_truncate (&tokstr, MAX_SHORT_STRING); + msg (SE, _("Truncating missing value to short string " + "length (%d characters)."), + MAX_SHORT_STRING); + } + else + ds_rpad (&tokstr, MAX_SHORT_STRING, ' '); + + if (!mv_add_str (&mv, ds_data (&tokstr))) + deferred_errors = true; + + lex_get (); + lex_match (','); + } + } + + for (i = 0; i < nv; i++) + { + if (!mv_is_resizable (&mv, v[i]->width)) + { + msg (SE, _("Missing values provided are too long to assign " + "to variable of width %d."), + v[i]->width); + deferred_errors = true; + } + else + { + mv_copy (&v[i]->miss, &mv); + mv_resize (&v[i]->miss, v[i]->width); + } + } + } lex_match ('/'); free (v); + v = NULL; } - - return lex_end_of_command (); - -fail: + retval = lex_end_of_command (); + + done: free (v); - return CMD_PART_SUCCESS_MAYBE; + if (deferred_errors) + retval = CMD_PART_SUCCESS_MAYBE; + return retval; } -static int -parse_varnames (void) +static bool +parse_number (double *x, const struct fmt_spec *f) { - int i; - - if (!parse_variables (default_dict, &v, &nv, PV_SAME_TYPE)) - return 0; - if (!lex_match ('(')) + if (lex_is_number ()) { - msg (SE, _("`(' expected after variable name%s."), nv > 1 ? "s" : ""); - return 0; - } - - type = v[0]->type; - if (type == NUMERIC) - return 1; - - width = v[0]->width; - for (i = 1; i < nv; i++) - if (v[i]->type == ALPHA && v[i]->nv != 1) - { - msg (SE, _("Long string value specified.")); - return 0; - } - else if (v[i]->type == ALPHA && (int) width != v[i]->width) - { - msg (SE, _("Short strings must be of equal width.")); - return 0; - } - - return 1; -} - -/* Number or range? */ -enum - { - MV_NOR_NOTHING, /* Empty. */ - MV_NOR_NUMBER, /* Single number. */ - MV_NOR_RANGE /* Range. */ - }; - -/* A single value or a range. */ -struct num_or_range - { - int type; /* One of NOR_*. */ - double d[2]; /* d[0]=lower bound or value, d[1]=upper bound. */ - }; - -/* Parses something of the form , or LO[WEST] THRU , or - THRU HI[GHEST], or THRU , and sets the appropriate - members of NOR. Returns success. */ -static int -parse_num_or_range (struct num_or_range * nor) -{ - if (lex_match_id ("LO") || lex_match_id ("LOWEST")) - { - nor->type = MV_NOR_RANGE; - if (!lex_force_match_id ("THRU")) - return 0; - if (!lex_force_num ()) - return 0; - nor->d[0] = LOWEST; - nor->d[1] = tokval; - } - else if (lex_is_number ()) - { - nor->d[0] = tokval; + *x = lex_number (); lex_get (); - - if (lex_match_id ("THRU")) - { - nor->type = MV_NOR_RANGE; - if (lex_match_id ("HI") || lex_match_id ("HIGHEST")) - nor->d[1] = HIGHEST; - else - { - if (!lex_force_num ()) - return 0; - nor->d[1] = tokval; - lex_get (); - - if (nor->d[0] > nor->d[1]) - { - msg (SE, _("Range %g THRU %g is not valid because %g is " - "greater than %g."), - nor->d[0], nor->d[1], nor->d[0], nor->d[1]); - return 0; - } - } - } - else - nor->type = MV_NOR_NUMBER; + return true; } - else - return -1; - - return 1; -} - -/* Parses a set of numeric missing values and stores them into - `missing[]' and `miss_type' global variables. */ -static int -parse_numeric (void) -{ - struct num_or_range set[3]; - int r; - - set[1].type = set[2].type = MV_NOR_NOTHING; - - /* Get first number or range. */ - r = parse_num_or_range (&set[0]); - if (r < 1) + else if (token == T_STRING) { - if (r == -1) - msg (SE, _("Number or range expected.")); - return 0; - } - - /* Get second and third optional number or range. */ - lex_match (','); - r = parse_num_or_range (&set[1]); - if (r == 1) - { - lex_match (','); - r = parse_num_or_range (&set[2]); - } - if (r == 0) - return 0; - - /* Force range, if present, into set[0]. */ - if (set[1].type == MV_NOR_RANGE) - { - struct num_or_range t = set[1]; - set[1] = set[0]; - set[0] = t; - } - if (set[2].type == MV_NOR_RANGE) - { - struct num_or_range t = set[2]; - set[2] = set[0]; - set[0] = t; - } - - /* Ensure there's not more than one range, or one range - plus one value. */ - if (set[1].type == MV_NOR_RANGE || set[2].type == MV_NOR_RANGE) - { - msg (SE, _("At most one range can exist in the missing values " - "for any one variable.")); - return 0; - } - if (set[0].type == MV_NOR_RANGE && set[2].type != MV_NOR_NOTHING) - { - msg (SE, _("At most one individual value can be missing along " - "with one range.")); - return 0; - } - - /* Set missing[] from set[]. */ - if (set[0].type == MV_NOR_RANGE) - { - int x = 0; - - if (set[0].d[0] == LOWEST) - { - miss_type = MISSING_LOW; - missing[x++].f = set[0].d[1]; - } - else if (set[0].d[1] == HIGHEST) - { - miss_type = MISSING_HIGH; - missing[x++].f = set[0].d[0]; - } - else - { - miss_type = MISSING_RANGE; - missing[x++].f = set[0].d[0]; - missing[x++].f = set[0].d[1]; - } - - if (set[1].type == MV_NOR_NUMBER) - { - miss_type += 3; - missing[x].f = set[1].d[0]; - } - } - else - { - if (set[0].type == MV_NOR_NUMBER) - { - miss_type = MISSING_1; - missing[0].f = set[0].d[0]; - } - if (set[1].type == MV_NOR_NUMBER) - { - miss_type = MISSING_2; - missing[1].f = set[1].d[0]; - } - if (set[2].type == MV_NOR_NUMBER) - { - miss_type = MISSING_3; - missing[2].f = set[2].d[0]; - } - } - - return 1; -} - -static int -parse_alpha (void) -{ - for (miss_type = 0; token == T_STRING && miss_type < 3; miss_type++) - { - if (ds_length (&tokstr) != width) - { - msg (SE, _("String is not of proper length.")); - return 0; - } - strncpy (missing[miss_type].s, ds_c_str (&tokstr), MAX_SHORT_STRING); + struct data_in di; + union value v; + di.s = ds_data (&tokstr); + di.e = ds_end (&tokstr); + di.v = &v; + di.flags = 0; + di.f1 = 1; + di.f2 = ds_length (&tokstr); + di.format = *f; + data_in (&di); lex_get (); - lex_match (','); + *x = v.f; + return true; } - if (miss_type < 1) + else { - msg (SE, _("String expected.")); - return 0; + lex_error (_("expecting number or data string")); + return false; } - - return 1; } -/* Copy the missing values from variable SRC to variable DEST. */ -void -copy_missing_values (struct variable *dest, const struct variable *src) -{ - static const int n_values[MISSING_COUNT] = - { - 0, 1, 2, 3, 2, 1, 1, 3, 2, 2, - }; - - assert (dest->width == src->width); - assert (src->miss_type >= 0 && src->miss_type < MISSING_COUNT); - - { - int i; - - dest->miss_type = src->miss_type; - for (i = 0; i < n_values[src->miss_type]; i++) - if (src->type == NUMERIC) - dest->missing[i].f = src->missing[i].f; - else - memcpy (dest->missing[i].s, src->missing[i].s, src->width); - } -} diff --git a/src/missing-values.c b/src/missing-values.c new file mode 100644 index 00000000..09192179 --- /dev/null +++ b/src/missing-values.c @@ -0,0 +1,382 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2005 Free Software Foundation, Inc. + Written by Ben Pfaff . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#include +#include "missing-values.h" +#include +#include +#include "str.h" + +/* Initializes MV as a set of missing values for a variable of + the given WIDTH. Although only numeric variables and short + string variables may have missing values, WIDTH may be any + valid variable width. */ +void +mv_init (struct missing_values *mv, int width) +{ + assert (width >= 0 && width <= MAX_STRING); + mv->type = MV_NONE; + mv->width = width; +} + +/* Copies SRC to MV. */ +void +mv_copy (struct missing_values *mv, const struct missing_values *src) +{ + *mv = *src; +} + +/* Returns true if MV is an empty set of missing values. */ +bool +mv_is_empty (const struct missing_values *mv) +{ + return mv->type == MV_NONE; +} + +/* Returns the width of the missing values that MV may + contain. */ +int +mv_get_width (const struct missing_values *mv) +{ + return mv->width; +} + +/* Attempts to add individual value V to the set of missing + values MV. Returns true if successful, false if MV has no + more room for missing values. (Long string variables never + accept missing values.) */ +bool +mv_add_value (struct missing_values *mv, const union value *v) +{ + if (mv->width > MAX_SHORT_STRING) + return false; + switch (mv->type) + { + case MV_NONE: + case MV_1: + case MV_2: + case MV_RANGE: + mv->values[mv->type & 3] = *v; + mv->type++; + return true; + + case MV_3: + case MV_RANGE_1: + return false; + } + abort (); +} + +/* Attempts to add S to the set of string missing values MV. S + must contain exactly as many characters as MV's width. + Returns true if successful, false if MV has no more room for + missing values. (Long string variables never accept missing + values.) */ +bool +mv_add_str (struct missing_values *mv, const unsigned char s[]) +{ + assert (mv->width > 0); + return mv_add_value (mv, (union value *) s); +} + +/* Attempts to add D to the set of numeric missing values MV. + Returns true if successful, false if MV has no more room for + missing values. */ +bool +mv_add_num (struct missing_values *mv, double d) +{ + assert (mv->width == 0); + return mv_add_value (mv, (union value *) &d); +} + +/* Attempts to add range [LOW, HIGH] to the set of numeric + missing values MV. Returns true if successful, false if MV + has no room for a range. */ +bool +mv_add_num_range (struct missing_values *mv, double low, double high) +{ + assert (mv->width == 0); + switch (mv->type) + { + case MV_NONE: + case MV_1: + mv->values[1].f = low; + mv->values[2].f = high; + mv->type |= 4; + return true; + + case MV_2: + case MV_3: + case MV_RANGE: + case MV_RANGE_1: + return false; + } + abort (); +} + +/* Returns true if MV contains an individual value, + false if MV is empty (or contains only a range). */ +bool +mv_has_value (struct missing_values *mv) +{ + switch (mv->type) + { + case MV_1: + case MV_2: + case MV_3: + case MV_RANGE_1: + return true; + + case MV_NONE: + case MV_RANGE: + return false; + } + abort (); +} + +/* Removes one individual value from MV and stores it in *V. + MV must contain an individual value (as determined by + mv_has_value()). */ +void +mv_pop_value (struct missing_values *mv, union value *v) +{ + assert (mv_has_value (mv)); + mv->type--; + *v = mv->values[mv->type & 3]; +} + +/* Returns true if MV contains a numeric range, + false if MV is empty (or contains only individual values). */ +bool +mv_has_range (struct missing_values *mv) +{ + switch (mv->type) + { + case MV_RANGE: + case MV_RANGE_1: + return true; + + case MV_NONE: + case MV_1: + case MV_2: + case MV_3: + return false; + } + abort (); +} + +/* Removes the numeric range from MV and stores it in *LOW and + *HIGH. MV must contain a individual range (as determined by + mv_has_range()). */ +void +mv_pop_range (struct missing_values *mv, double *low, double *high) +{ + assert (mv_has_range (mv)); + *low = mv->values[1].f; + *high = mv->values[2].f; + mv->type &= 3; +} + +/* Returns true if values[IDX] is in use when the `type' member + is set to TYPE (in struct missing_values), + false otherwise. */ +static bool +using_element (unsigned type, int idx) +{ + assert (idx >= 0 && idx < 3); + + switch (type) + { + case MV_NONE: + return false; + case MV_1: + return idx < 1; + case MV_2: + return idx < 2; + case MV_3: + return true; + case MV_RANGE: + return idx > 0; + case MV_RANGE_1: + return true; + } + abort (); +} + +/* Returns true if S contains only spaces between indexes + NEW_WIDTH (inclusive) and OLD_WIDTH (exclusive), + false otherwise. */ +static bool +can_resize_string (const unsigned char *s, int old_width, int new_width) +{ + int i; + + assert (new_width < old_width); + for (i = new_width; i < old_width; i++) + if (s[i] != ' ') + return false; + return true; +} + +/* Returns true if MV can be resized to the given WIDTH with + mv_resize(), false otherwise. Resizing to the same width is + always possible. Resizing to a long string WIDTH is only + possible if MV is an empty set of missing values; otherwise, + resizing to a larger WIDTH is always possible. Resizing to a + shorter width is possible only when each missing value + contains only spaces in the characters that will be + trimmed. */ +bool +mv_is_resizable (struct missing_values *mv, int width) +{ + assert ((width == 0) == (mv->width == 0)); + if (width > MAX_SHORT_STRING && mv->type != MV_NONE) + return false; + else if (width >= mv->width) + return true; + else + { + int i; + + for (i = 0; i < 3; i++) + if (using_element (mv->type, i) + && !can_resize_string (mv->values[i].s, mv->width, width)) + return false; + return true; + } +} + +/* Resizes MV to the given WIDTH. WIDTH must fit the constraints + explained for mv_is_resizable(). */ +void +mv_resize (struct missing_values *mv, int width) +{ + assert (mv_is_resizable (mv, width)); + if (width > mv->width) + { + int i; + + for (i = 0; i < 3; i++) + memset (mv->values[i].s + mv->width, ' ', width - mv->width); + } + mv->width = width; +} + +/* Returns true if V is system missing or a missing value in MV, + false otherwise. */ +bool +mv_is_value_missing (const struct missing_values *mv, const union value *v) +{ + return (mv->width == 0 + ? mv_is_num_missing (mv, v->f) + : mv_is_str_missing (mv, v->s)); +} + +/* Returns true if D is system missing or a missing value in MV, + false otherwise. + MV must be a set of numeric missing values. */ +bool +mv_is_num_missing (const struct missing_values *mv, double d) +{ + assert (mv->width == 0); + return d == SYSMIS || mv_is_num_user_missing (mv, d); +} + +/* Returns true if S[] is a missing value in MV, false otherwise. + MV must be a set of string missing values. + S[] must contain exactly as many characters as MV's width. */ +bool +mv_is_str_missing (const struct missing_values *mv, + const unsigned char s[]) +{ + return mv_is_str_user_missing (mv, s); +} + +/* Returns true if V is a missing value in MV, false otherwise. */ +bool +mv_is_value_user_missing (const struct missing_values *mv, + const union value *v) +{ + return (mv->width == 0 + ? mv_is_num_user_missing (mv, v->f) + : mv_is_str_user_missing (mv, v->s)); +} + +/* Returns true if D is a missing value in MV, false otherwise. + MV must be a set of numeric missing values. */ +bool +mv_is_num_user_missing (const struct missing_values *mv, double d) +{ + const union value *v = mv->values; + assert (mv->width == 0); + switch (mv->type) + { + case MV_NONE: + return false; + case MV_1: + return v[0].f == d; + case MV_2: + return v[0].f == d || v[1].f == d; + case MV_3: + return v[0].f == d || v[1].f == d || v[2].f == d; + case MV_RANGE: + return v[1].f <= d && d <= v[2].f; + case MV_RANGE_1: + return v[0].f == d || (v[1].f <= d && d <= v[2].f); + } + abort (); +} + +/* Returns true if S[] is a missing value in MV, false otherwise. + MV must be a set of string missing values. + S[] must contain exactly as many characters as MV's width. */ +bool +mv_is_str_user_missing (const struct missing_values *mv, + const unsigned char s[]) +{ + const union value *v = mv->values; + assert (mv->width > 0); + switch (mv->type) + { + case MV_NONE: + return false; + case MV_1: + return !memcmp (v[0].s, s, mv->width); + case MV_2: + return (!memcmp (v[0].s, s, mv->width) + || !memcmp (v[1].s, s, mv->width)); + case MV_3: + return (!memcmp (v[0].s, s, mv->width) + || !memcmp (v[1].s, s, mv->width) + || !memcmp (v[2].s, s, mv->width)); + case MV_RANGE: + case MV_RANGE_1: + abort (); + } + abort (); +} + +/* Returns true if MV is a set of numeric missing values and V is + the system missing value. */ +bool +mv_is_value_system_missing (const struct missing_values *mv, + const union value *v) +{ + return mv->width == 0 ? v->f == SYSMIS : false; +} diff --git a/src/missing-values.h b/src/missing-values.h new file mode 100644 index 00000000..710fc05e --- /dev/null +++ b/src/missing-values.h @@ -0,0 +1,84 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2005 Free Software Foundation, Inc. + Written by Ben Pfaff . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#if !missing_values_h +#define missing_values_h 1 + +#include +#include "val.h" + +/* Types of user-missing values. + Invisible--use access functions defined below instead. */ +enum mv_type + { + MV_NONE = 0, /* No user-missing values. */ + MV_1 = 1, /* One user-missing value. */ + MV_2 = 2, /* Two user-missing values. */ + MV_3 = 3, /* Three user-missing values. */ + MV_RANGE = 4, /* A range of user-missing values. */ + MV_RANGE_1 = 5 /* A range plus an individual value. */ + }; + +/* Missing values. + Opaque--use access functions defined below. */ +struct missing_values + { + unsigned type; /* Number and type of missing values. */ + int width; /* 0=numeric, otherwise string width. */ + union value values[3]; /* Missing values. [y,z] are the range. */ + }; + +void mv_init (struct missing_values *, int width); +void mv_copy (struct missing_values *, const struct missing_values *); +bool mv_is_empty (const struct missing_values *); +int mv_get_width (const struct missing_values *); + +bool mv_add_value (struct missing_values *, const union value *); +bool mv_add_str (struct missing_values *, const unsigned char[]); +bool mv_add_num (struct missing_values *, double); +bool mv_add_num_range (struct missing_values *, double low, double high); + +bool mv_has_value (struct missing_values *); +void mv_pop_value (struct missing_values *, union value *); +bool mv_has_range (struct missing_values *); +void mv_pop_range (struct missing_values *, double *low, double *high); + +bool mv_is_resizable (struct missing_values *, int width); +void mv_resize (struct missing_values *, int width); + +typedef bool is_missing_func (const struct missing_values *, + const union value *); + +/* Is a value system or user missing? */ +bool mv_is_value_missing (const struct missing_values *, const union value *); +bool mv_is_num_missing (const struct missing_values *, double); +bool mv_is_str_missing (const struct missing_values *, const unsigned char[]); + +/* Is a value user missing? */ +bool mv_is_value_user_missing (const struct missing_values *, + const union value *); +bool mv_is_num_user_missing (const struct missing_values *, double); +bool mv_is_str_user_missing (const struct missing_values *, + const unsigned char[]); + +/* Is a value system missing? */ +bool mv_is_value_system_missing (const struct missing_values *, + const union value *); + +#endif /* missing-values.h */ diff --git a/src/oneway.q b/src/oneway.q index 9aebbf61..8c9c768a 100644 --- a/src/oneway.q +++ b/src/oneway.q @@ -87,7 +87,7 @@ static int ostensible_number_of_groups=-1; /* Function to use for testing for missing values */ -static is_missing_func value_is_missing; +static is_missing_func *value_is_missing; static void run_oneway(const struct casefile *cf, void *_mode); @@ -119,9 +119,9 @@ cmd_oneway(void) /* If /MISSING=INCLUDE is set, then user missing values are ignored */ if (cmd.incl == ONEWAY_INCLUDE ) - value_is_missing = is_system_missing; + value_is_missing = mv_is_value_system_missing; else - value_is_missing = is_missing; + value_is_missing = mv_is_value_missing; /* What statistics were requested */ if ( cmd.sbc_statistics ) @@ -913,7 +913,7 @@ run_oneway(const struct casefile *cf, void *cmd_) const union value *indep_val = case_data (&c, indep_var->fv); /* Deal with missing values */ - if ( value_is_missing(indep_val,indep_var) ) + if ( value_is_missing(&indep_var->miss, indep_val) ) continue; /* Skip the entire case if /MISSING=LISTWISE is set */ @@ -924,7 +924,7 @@ run_oneway(const struct casefile *cf, void *cmd_) const struct variable *v = vars[i]; const union value *val = case_data (&c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) break; } if ( i != n_vars ) @@ -964,7 +964,7 @@ run_oneway(const struct casefile *cf, void *cmd_) hsh_insert ( group_hash, (void *) gs ); } - if (! value_is_missing(val,v) ) + if (! value_is_missing(&v->miss, val) ) { struct group_statistics *totals = &gp->ugs; diff --git a/src/percentiles.c b/src/percentiles.c index 4e618ad4..2381f771 100644 --- a/src/percentiles.c +++ b/src/percentiles.c @@ -18,6 +18,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include "factor_stats.h" #include "percentiles.h" #include "misc.h" diff --git a/src/pfm-read.c b/src/pfm-read.c index c7a604db..1999628c 100644 --- a/src/pfm-read.c +++ b/src/pfm-read.c @@ -540,45 +540,23 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) convert_format (r, &fmt[3], &v->write, v); /* Range missing values. */ - if (match (r, 'B')) - { - v->miss_type = MISSING_RANGE; - v->missing[0] = parse_value (r, v); - v->missing[1] = parse_value (r, v); - } + if (match (r, 'B')) + { + double x = read_float (r); + double y = read_float (r); + mv_add_num_range (&v->miss, x, y); + } else if (match (r, 'A')) - { - v->miss_type = MISSING_HIGH; - v->missing[0] = parse_value (r, v); - } + mv_add_num_range (&v->miss, read_float (r), HIGHEST); else if (match (r, '9')) - { - v->miss_type = MISSING_LOW; - v->missing[0] = parse_value (r, v); - } + mv_add_num_range (&v->miss, LOWEST, read_float (r)); /* Single missing values. */ - while (match (r, '8')) - { - static const int map_next[MISSING_COUNT] = - { - MISSING_1, MISSING_2, MISSING_3, -1, - MISSING_RANGE_1, MISSING_LOW_1, MISSING_HIGH_1, - -1, -1, -1, - }; - - static const int map_ofs[MISSING_COUNT] = - { - -1, 0, 1, 2, -1, -1, -1, 2, 1, 1, - }; - - v->miss_type = map_next[v->miss_type]; - if (v->miss_type == -1) - error (r, _("Bad missing values for %s."), v->name); - - assert (map_ofs[v->miss_type] != -1); - v->missing[map_ofs[v->miss_type]] = parse_value (r, v); - } + while (match (r, '8')) + { + union value value = parse_value (r, v); + mv_add_value (&v->miss, &value); + } if (match (r, 'C')) { diff --git a/src/pfm-write.c b/src/pfm-write.c index 30615418..fbe56f6a 100644 --- a/src/pfm-write.c +++ b/src/pfm-write.c @@ -298,25 +298,43 @@ write_variables (struct pfm_writer *w, struct dictionary *dict) for (i = 0; i < dict_get_var_cnt (dict); i++) { - static const char *miss_types[MISSING_COUNT] = - { - "", "8", "88", "888", "B ", "9", "A", "B 8", "98", "A8", - }; - - const char *m; - int j; - struct variable *v = dict_get_var (dict, i); + struct missing_values mv; if (!buf_write (w, "7", 1) || !write_int (w, v->width) || !write_string (w, v->short_name) || !write_format (w, &v->print) || !write_format (w, &v->write)) return 0; - for (m = miss_types[v->miss_type], j = 0; j < (int) strlen (m); j++) - if ((m[j] != ' ' && !buf_write (w, &m[j], 1)) - || !write_value (w, &v->missing[j], v)) - return 0; + /* Write missing values. */ + mv_copy (&mv, &v->miss); + while (mv_has_range (&mv)) + { + double x, y; + mv_pop_range (&mv, &x, &y); + if (x == LOWEST) + { + if (!buf_write (w, "9", 1) || !write_float (w, y)) + return 0; + } + else if (y == HIGHEST) + { + if (!buf_write (w, "A", 1) || !write_float (w, y)) + return 0; + } + else { + if (!buf_write (w, "B", 1) || !write_float (w, x) + || !write_float (w, y)) + return 0; + } + } + while (mv_has_value (&mv)) + { + union value value; + mv_pop_value (&mv, &value); + if (!buf_write (w, "8", 1) || !write_value (w, &value, v)) + return 0; + } if (v->label && (!buf_write (w, "C", 1) || !write_string (w, v->label))) return 0; diff --git a/src/recode.c b/src/recode.c index b25ac456..9f7214bd 100644 --- a/src/recode.c +++ b/src/recode.c @@ -714,7 +714,7 @@ find_src_numeric (struct rcd_var * v, struct ccase * c) case RCD_END: return NULL; case RCD_USER: - if (is_num_user_missing (cmp, v->src)) + if (mv_is_num_user_missing (&v->src->miss, cmp)) return cp; break; case RCD_SINGLE: diff --git a/src/sfm-read.c b/src/sfm-read.c index 0c07150e..bd8800bb 100644 --- a/src/sfm-read.c +++ b/src/sfm-read.c @@ -926,63 +926,45 @@ read_variables (struct sfm_reader *r, if (sv.n_missing_values != 0) { flt64 mv[3]; + int mv_cnt = abs (sv.n_missing_values); if (vv->width > MAX_SHORT_STRING) lose ((ME, _("%s: Long string variable %s may not have missing " "values."), handle_get_filename (r->fh), vv->name)); - assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0); + assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0); if (r->reverse_endian && vv->type == NUMERIC) - for (j = 0; j < abs (sv.n_missing_values); j++) + for (j = 0; j < mv_cnt; j++) bswap_flt64 (&mv[j]); if (sv.n_missing_values > 0) { - vv->miss_type = sv.n_missing_values; - if (vv->type == NUMERIC) - for (j = 0; j < sv.n_missing_values; j++) - vv->missing[j].f = mv[j]; - else - for (j = 0; j < sv.n_missing_values; j++) - memcpy (vv->missing[j].s, &mv[j], vv->width); + for (j = 0; j < sv.n_missing_values; j++) + if (vv->type == NUMERIC) + mv_add_num (&vv->miss, mv[j]); + else + mv_add_str (&vv->miss, (unsigned char *) &mv[j]); } else { - int x = 0; - if (vv->type == ALPHA) lose ((ME, _("%s: String variable %s may not have missing " "values specified as a range."), handle_get_filename (r->fh), vv->name)); if (mv[0] == r->lowest) - { - vv->miss_type = MISSING_LOW; - vv->missing[x++].f = mv[1]; - } + mv_add_num_range (&vv->miss, LOWEST, mv[1]); else if (mv[1] == r->highest) - { - vv->miss_type = MISSING_HIGH; - vv->missing[x++].f = mv[0]; - } + mv_add_num_range (&vv->miss, mv[0], HIGHEST); else - { - vv->miss_type = MISSING_RANGE; - vv->missing[x++].f = mv[0]; - vv->missing[x++].f = mv[1]; - } + mv_add_num_range (&vv->miss, mv[0], mv[1]); if (sv.n_missing_values == -3) - { - vv->miss_type += 3; - vv->missing[x++].f = mv[2]; - } + mv_add_num (&vv->miss, mv[2]); } } - else - vv->miss_type = MISSING_NONE; if (!parse_format_spec (r, sv.print, &vv->print, vv) || !parse_format_spec (r, sv.write, &vv->write, vv)) diff --git a/src/sfm-write.c b/src/sfm-write.c index e1e103e5..80b78408 100644 --- a/src/sfm-write.c +++ b/src/sfm-write.c @@ -362,6 +362,7 @@ write_variable (struct sfm_writer *w, struct variable *v) struct sysfile_variable sv; /* Missing values. */ + struct missing_values mv; flt64 m[3]; /* Missing value values. */ int nm; /* Number of missing values, possibly negative. */ @@ -369,54 +370,27 @@ write_variable (struct sfm_writer *w, struct variable *v) sv.type = v->width; sv.has_var_label = (v->label != NULL); - switch (v->miss_type) + mv_copy (&mv, &v->miss); + nm = 0; + if (mv_has_range (&mv)) { - case MISSING_NONE: - nm = 0; - break; - case MISSING_1: - case MISSING_2: - case MISSING_3: - for (nm = 0; nm < v->miss_type; nm++) - m[nm] = v->missing[nm].f; - break; - case MISSING_RANGE: - m[0] = v->missing[0].f; - m[1] = v->missing[1].f; - nm = -2; - break; - case MISSING_LOW: - m[0] = second_lowest_flt64; - m[1] = v->missing[0].f; - nm = -2; - break; - case MISSING_HIGH: - m[0] = v->missing[0].f; - m[1] = FLT64_MAX; - nm = -2; - break; - case MISSING_RANGE_1: - m[0] = v->missing[0].f; - m[1] = v->missing[1].f; - m[2] = v->missing[2].f; - nm = -3; - break; - case MISSING_LOW_1: - m[0] = second_lowest_flt64; - m[1] = v->missing[0].f; - m[2] = v->missing[1].f; - nm = -3; - break; - case MISSING_HIGH_1: - m[0] = v->missing[0].f; - m[1] = second_lowest_flt64; - m[2] = v->missing[1].f; - nm = -3; - break; - default: - assert (0); - abort (); + double x, y; + mv_pop_range (&mv, &x, &y); + m[nm++] = x == LOWEST ? second_lowest_flt64 : x; + m[nm++] = y == HIGHEST ? FLT64_MAX : y; } + while (mv_has_value (&mv)) + { + union value value; + mv_pop_value (&mv, &value); + if (v->type == NUMERIC) + m[nm] = value.f; + else + buf_copy_rpad ((char *) &m[nm], sizeof m[nm], value.s, v->width); + nm++; + } + if (mv_has_range (&v->miss)) + nm = -nm; sv.n_missing_values = nm; write_format_spec (&v->print, &sv.print); @@ -445,7 +419,7 @@ write_variable (struct sfm_writer *w, struct variable *v) return 0; } - if (nm && !buf_write (w, m, sizeof *m * nm)) + if (nm && !buf_write (w, m, sizeof *m * abs (nm))) return 0; if (v->type == ALPHA && v->width > (int) sizeof (flt64)) diff --git a/src/sfmP.h b/src/sfmP.h index 978a3e12..c127b85e 100644 --- a/src/sfmP.h +++ b/src/sfmP.h @@ -55,6 +55,7 @@ #endif /* Figure out SYSMIS value for flt64. */ +#include "magic.h" #if SIZEOF_DOUBLE == 8 #define second_lowest_flt64 second_lowest_value #else diff --git a/src/sysfile-info.c b/src/sysfile-info.c index 08d5484f..45bffad7 100644 --- a/src/sysfile-info.c +++ b/src/sysfile-info.c @@ -463,63 +463,44 @@ describe_variable (struct variable *v, struct tab_table *t, int r, int as) } /* Missing values if any. */ - if (v->miss_type != MISSING_NONE) + if (!mv_is_empty (&v->miss)) { - char buf[80]; - char *cp = stpcpy (buf, _("Missing Values: ")); - - if (v->type == NUMERIC) - switch (v->miss_type) - { - case MISSING_1: - sprintf (cp, "%g", v->missing[0].f); - break; - case MISSING_2: - sprintf (cp, "%g; %g", v->missing[0].f, v->missing[1].f); - break; - case MISSING_3: - sprintf (cp, "%g; %g; %g", v->missing[0].f, - v->missing[1].f, v->missing[2].f); - break; - case MISSING_RANGE: - sprintf (cp, "%g THRU %g", v->missing[0].f, v->missing[1].f); - break; - case MISSING_LOW: - sprintf (cp, "LOWEST THRU %g", v->missing[0].f); - break; - case MISSING_HIGH: - sprintf (cp, "%g THRU HIGHEST", v->missing[0].f); - break; - case MISSING_RANGE_1: - sprintf (cp, "%g THRU %g; %g", - v->missing[0].f, v->missing[1].f, v->missing[2].f); - break; - case MISSING_LOW_1: - sprintf (cp, "LOWEST THRU %g; %g", - v->missing[0].f, v->missing[1].f); - break; - case MISSING_HIGH_1: - sprintf (cp, "%g THRU HIGHEST; %g", - v->missing[0].f, v->missing[1].f); - break; - default: - assert (0); - } - else - { - int i; - - for (i = 0; i < v->miss_type; i++) - { - if (i != 0) - cp = stpcpy (cp, "; "); - *cp++ = '"'; - memcpy (cp, v->missing[i].s, v->width); + char buf[128]; + char *cp; + struct missing_values mv; + int cnt = 0; + + cp = stpcpy (buf, _("Missing Values: ")); + mv_copy (&mv, &v->miss); + if (mv_has_range (&mv)) + { + double x, y; + mv_pop_range (&mv, &x, &y); + if (x == LOWEST) + cp += nsprintf (cp, "LOWEST THRU %g", y); + else if (y == HIGHEST) + cp += nsprintf (cp, "%g THRU HIGHEST", x); + else + cp += nsprintf (cp, "%g THRU %g", x, y); + cnt++; + } + while (mv_has_value (&mv)) + { + union value value; + mv_pop_value (&mv, &value); + if (cnt++ > 0) + cp += nsprintf (cp, "; "); + if (v->type == NUMERIC) + cp += nsprintf (cp, "%g", value.f); + else + { + *cp++ = '"'; + memcpy (cp, value.s, v->width); cp += v->width; *cp++ = '"'; - } - *cp = 0; - } + *cp = '\0'; + } + } tab_joint_text (t, 1, r, 2, r, TAB_LEFT, buf); r++; diff --git a/src/t-test.q b/src/t-test.q index 07bcdd07..89f1741f 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -67,7 +67,7 @@ /* Function to use for testing for missing values */ -static is_missing_func value_is_missing; +static is_missing_func *value_is_missing; /* Variable for the GROUPS subcommand, if given. */ static struct variable *indep_var; @@ -330,9 +330,9 @@ cmd_t_test(void) /* If /MISSING=INCLUDE is set, then user missing values are ignored */ if (cmd.incl == TTS_INCLUDE ) - value_is_missing = is_system_missing; + value_is_missing = mv_is_value_system_missing; else - value_is_missing = is_missing; + value_is_missing = mv_is_value_missing; bad_weight_warn = 1; @@ -1418,7 +1418,7 @@ common_calc (const struct ccase *c, void *_cmd) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1429,7 +1429,7 @@ common_calc (const struct ccase *c, void *_cmd) if ( cmd->sbc_groups ) { const union value *gv = case_data (c, indep_var->fv); - if ( value_is_missing(gv,indep_var) ) + if ( value_is_missing(&indep_var->miss, gv) ) { return 0; } @@ -1444,7 +1444,7 @@ common_calc (const struct ccase *c, void *_cmd) gs= &group_proc_get (cmd->v_variables[i])->ugs; - if (! value_is_missing(val,v) ) + if (! value_is_missing(&v->miss, val) ) { gs->n+=weight; gs->sum+=weight * val->f; @@ -1517,7 +1517,7 @@ one_sample_calc (const struct ccase *c, void *cmd_) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1532,7 +1532,7 @@ one_sample_calc (const struct ccase *c, void *cmd_) gs= &group_proc_get (cmd->v_variables[i])->ugs; - if ( ! value_is_missing(val,v)) + if ( ! value_is_missing(&v->miss, val)) gs->sum_diff += weight * (val->f - cmd->n_testval[0]); } @@ -1611,8 +1611,8 @@ paired_calc (const struct ccase *c, void *cmd_) const union value *val0 = case_data (c, v0->fv); const union value *val1 = case_data (c, v1->fv); - if ( value_is_missing(val0,v0) || - value_is_missing(val1,v1) ) + if ( value_is_missing(&v0->miss, val0) || + value_is_missing(&v1->miss, val1) ) { return 0; } @@ -1627,7 +1627,8 @@ paired_calc (const struct ccase *c, void *cmd_) const union value *val0 = case_data (c, v0->fv); const union value *val1 = case_data (c, v1->fv); - if ( ( !value_is_missing(val0,v0) && !value_is_missing(val1,v1) ) ) + if ( ( !value_is_missing(&v0->miss, val0) + && !value_is_missing(&v1->miss, val1) ) ) { pairs[i].n += weight; pairs[i].sum[0] += weight * val0->f; @@ -1744,7 +1745,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) const double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); - if ( value_is_missing(gv,indep_var) ) + if ( value_is_missing(&indep_var->miss, gv) ) { return 0; } @@ -1756,7 +1757,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1779,7 +1780,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) if ( ! gs ) return 0; - if ( !value_is_missing(val,var) ) + if ( !value_is_missing(&var->miss, val) ) { gs->n+=weight; gs->sum+=weight * val->f; diff --git a/src/val.h b/src/val.h index 96523de2..57aaa2af 100644 --- a/src/val.h +++ b/src/val.h @@ -21,7 +21,7 @@ #define val_h 1 #include -#include "config.h" +#include "magic.h" /* Values. */ diff --git a/src/var.h b/src/var.h index 3bfc43df..b4d50182 100644 --- a/src/var.h +++ b/src/var.h @@ -25,10 +25,9 @@ #include "config.h" #include #include "format.h" +#include "missing-values.h" #include "val.h" - - /* Script variables. */ /* Variable type. */ @@ -39,27 +38,6 @@ enum (STRING is pre-empted by lexer.h.) */ }; -/* Types of missing values. Order is significant, see - mis-val.c:parse_numeric(), sfm-read.c, sfm-write.c, - sysfile-info.c:cmd_sysfile_info(), mis-val.c:copy_missing_values(), - pfm-read.c:read_variables(), pfm-write.c:write_variables(), - apply-dict.c:cmd_apply_dictionary(), and more (?). */ -enum - { - MISSING_NONE, /* No user-missing values. */ - MISSING_1, /* One user-missing value. */ - MISSING_2, /* Two user-missing values. */ - MISSING_3, /* Three user-missing values. */ - MISSING_RANGE, /* [a,b]. */ - MISSING_LOW, /* (-inf,a]. */ - MISSING_HIGH, /* (a,+inf]. */ - MISSING_RANGE_1, /* [a,b], c. */ - MISSING_LOW_1, /* (-inf,a], b. */ - MISSING_HIGH_1, /* (a,+inf), b. */ - MISSING_COUNT - }; - - /* A variable's dictionary entry. */ struct variable { @@ -75,8 +53,7 @@ struct variable int index; /* Dictionary index. */ /* Missing values. */ - int miss_type; /* One of the MISSING_* constants. */ - union value missing[3]; /* User-missing value. */ + struct missing_values miss; /* Missing values. */ /* Display formats. */ struct fmt_spec print; /* Default format for PRINT. */ @@ -178,18 +155,8 @@ extern int FILTER_before_TEMPORARY; void cancel_temporary (void); -/* Functions. */ - struct ccase; void dump_split_vars (const struct ccase *); -typedef int (* is_missing_func )(const union value *, const struct variable *); - -int is_num_user_missing (double, const struct variable *); -int is_str_user_missing (const unsigned char[], const struct variable *); -int is_missing (const union value *, const struct variable *); -int is_system_missing (const union value *, const struct variable *); -int is_user_missing (const union value *, const struct variable *); -void copy_missing_values (struct variable *dest, const struct variable *src); /* Transformations. */ diff --git a/src/vars-atr.c b/src/vars-atr.c index a854033a..5e34cb59 100644 --- a/src/vars-atr.c +++ b/src/vars-atr.c @@ -141,109 +141,6 @@ discard_variables (void) pgm_state = STATE_INIT; } - -/* Return nonzero only if X is a user-missing value for numeric - variable V. */ -inline int -is_num_user_missing (double x, const struct variable *v) -{ - switch (v->miss_type) - { - case MISSING_NONE: - return 0; - case MISSING_1: - return x == v->missing[0].f; - case MISSING_2: - return x == v->missing[0].f || x == v->missing[1].f; - case MISSING_3: - return (x == v->missing[0].f || x == v->missing[1].f - || x == v->missing[2].f); - case MISSING_RANGE: - return x >= v->missing[0].f && x <= v->missing[1].f; - case MISSING_LOW: - return x <= v->missing[0].f; - case MISSING_HIGH: - return x >= v->missing[0].f; - case MISSING_RANGE_1: - return ((x >= v->missing[0].f && x <= v->missing[1].f) - || x == v->missing[2].f); - case MISSING_LOW_1: - return x <= v->missing[0].f || x == v->missing[1].f; - case MISSING_HIGH_1: - return x >= v->missing[0].f || x == v->missing[1].f; - default: - assert (0); - } - abort (); -} - -/* Return nonzero only if string S is a user-missing variable for - string variable V. */ -inline int -is_str_user_missing (const unsigned char s[], const struct variable *v) -{ - /* FIXME: should these be memcmp()? */ - switch (v->miss_type) - { - case MISSING_NONE: - return 0; - case MISSING_1: - return !strncmp (s, v->missing[0].s, v->width); - case MISSING_2: - return (!strncmp (s, v->missing[0].s, v->width) - || !strncmp (s, v->missing[1].s, v->width)); - case MISSING_3: - return (!strncmp (s, v->missing[0].s, v->width) - || !strncmp (s, v->missing[1].s, v->width) - || !strncmp (s, v->missing[2].s, v->width)); - default: - assert (0); - } - abort (); -} - -/* Return nonzero only if value VAL is system-missing for variable - V. */ -int -is_system_missing (const union value *val, const struct variable *v) -{ - return v->type == NUMERIC && val->f == SYSMIS; -} - -/* Return nonzero only if value VAL is system- or user-missing for - variable V. */ -int -is_missing (const union value *val, const struct variable *v) -{ - switch (v->type) - { - case NUMERIC: - if (val->f == SYSMIS) - return 1; - return is_num_user_missing (val->f, v); - case ALPHA: - return is_str_user_missing (val->s, v); - default: - assert (0); - } - abort (); -} - -/* Return nonzero only if value VAL is user-missing for variable V. */ -int -is_user_missing (const union value *val, const struct variable *v) -{ - switch (v->type) - { - case NUMERIC: - return is_num_user_missing (val->f, v); - case ALPHA: - return is_str_user_missing (val->s, v); - default: - assert (0); - } - abort (); -} /* Returns true if NAME is an acceptable name for a variable, false otherwise. If ISSUE_ERROR is true, issues an diff --git a/src/vfm.c b/src/vfm.c index 0414234c..6d7e526b 100644 --- a/src/vfm.c +++ b/src/vfm.c @@ -336,7 +336,7 @@ filter_case (const struct ccase *c, int case_idx) if (filter_var != NULL) { double f = case_num (c, filter_var->fv); - if (f == 0.0 || f == SYSMIS || is_num_user_missing (f, filter_var)) + if (f == 0.0 || mv_is_num_missing (&filter_var->miss, f)) return 1; } diff --git a/tests/ChangeLog b/tests/ChangeLog index 6dc05961..f5151193 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +Sat Aug 6 17:32:39 2005 Ben Pfaff + + * command/missing-values.sh: New test. + + * Makefile.am: Add new test. + Mon Aug 1 21:51:46 2005 Ben Pfaff * bugs/big-input-2.sh: Don't use 1...100000 (etc.) with Perl diff --git a/tests/Makefile.am b/tests/Makefile.am index 3a4172dd..21b70dda 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -22,6 +22,7 @@ TESTS = \ command/loop.sh \ command/longvars.sh \ command/match-files.sh \ + command/missing-values.sh \ command/no_case_size.sh \ command/oneway.sh \ command/oneway-missing.sh \ diff --git a/tests/command/missing-values.sh b/tests/command/missing-values.sh new file mode 100755 index 00000000..4d530989 --- /dev/null +++ b/tests/command/missing-values.sh @@ -0,0 +1,125 @@ +#!/bin/sh + +# This program tests MISSING VALUES + +TEMPDIR=/tmp/pspp-tst-$$ +TESTFILE=$TEMPDIR/`basename $0`.sps + +here=`pwd`; + +# ensure that top_srcdir is absolute +cd $top_srcdir; top_srcdir=`pwd` + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + + +cleanup() +{ + cd / + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +# Copy this file --- it's shared with another test +activity="create data" +cp $top_srcdir/tests/data-list.data $TEMPDIR +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="create program" +cat > $TEMPDIR/missing-values.stat << foobar +DATA LIST NOTABLE/str1 1-5 (A) str2 6-8 (A) date1 9-19 (DATE) num1 20-25. + +/* Valid: numeric missing values. +MISSING VALUES date1 num1 (1). +MISSING VALUES date1 num1 (1, 2). +MISSING VALUES date1 num1 (1, 2, 3). + +/* Valid: numeric missing values using the first variable's format. +MISSING VALUES num1 date1 ('1'). +MISSING VALUES num1 date1 ('1', '2'). +MISSING VALUES num1 date1 ('1', '2', '3'). +MISSING VALUES date1 num1 ('06-AUG-05'). +MISSING VALUES date1 num1 ('06-AUG-05', '01-OCT-78'). +MISSING VALUES date1 num1 ('06-AUG-05', '01-OCT-78', '14-FEB-81'). + +/* Valid: ranges of numeric missing values. +MISSING VALUES num1 (1 THRU 2). +MISSING VALUES num1 (LO THRU 2). +MISSING VALUES num1 (LOWEST THRU 2). +MISSING VALUES num1 (1 THRU HI). +MISSING VALUES num1 (1 THRU HIGHEST). + +/* Valid: a range of numeric missing values, plus an individual value. +MISSING VALUES num1 (1 THRU 2, 3). +MISSING VALUES num1 (LO THRU 2, 3). +MISSING VALUES num1 (LOWEST THRU 2, 3). +MISSING VALUES num1 (1 THRU HI, -1). +MISSING VALUES num1 (1 THRU HIGHEST, -1). + +/* Valid: string missing values. +MISSING VALUES str1 str2 ('abc ','def'). + +/* Invalid: too long for str2. +MISSING VALUES str1 str2 ('abcde'). + +/* Invalid: no string ranges. +MISSING VALUES str1 ('a' THRU 'z'). + +/* Invalid: mixing string and numeric variables. +MISSING VALUES str1 num1 ('123'). + +/* Valid: may mix variable types when clearing missing values. +MISSING VALUES ALL (). + +foobar +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="run program" +$SUPERVISOR $here/../src/pspp --testing-mode -o raw-ascii --testing-mode $TEMPDIR/missing-values.stat > $TEMPDIR/errs +# Note vv --- there are errors in input. Therefore, the command must FAIL +if [ $? -eq 0 ] ; then fail ; fi + +activity="compare error messages" +diff -w $TEMPDIR/errs - <