From 933b760efccdfa26f14254f1fae002ea3b0a1495 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 7 Aug 2005 04:39:27 +0000 Subject: [PATCH] Clean up treatment of missing values by moving all the code into one place. All references to the missing value function were updated, but only major changes are detailed below. --- src/ChangeLog | 57 +++++ src/Makefile.am | 50 ++-- src/aggregate.c | 3 +- src/apply-dict.c | 27 +- src/count.c | 2 +- src/crosstabs.q | 26 +- src/descript.c | 15 +- src/dictionary.c | 7 +- src/examine.q | 12 +- src/expressions/operations.def | 8 +- src/factor_stats.c | 2 +- src/frequencies.q | 4 +- src/get.c | 5 +- src/levene.c | 11 +- src/mis-val.c | 434 +++++++++++--------------------- src/missing-values.c | 382 ++++++++++++++++++++++++++++ src/missing-values.h | 84 +++++++ src/oneway.q | 12 +- src/percentiles.c | 1 + src/pfm-read.c | 48 +--- src/pfm-write.c | 42 +++- src/recode.c | 2 +- src/sfm-read.c | 42 +--- src/sfm-write.c | 68 ++--- src/sfmP.h | 1 + src/sysfile-info.c | 89 +++---- src/t-test.q | 29 +-- src/val.h | 2 +- src/var.h | 37 +-- src/vars-atr.c | 103 -------- src/vfm.c | 2 +- tests/ChangeLog | 6 + tests/Makefile.am | 1 + tests/command/missing-values.sh | 125 +++++++++ 34 files changed, 1021 insertions(+), 718 deletions(-) create mode 100644 src/missing-values.c create mode 100644 src/missing-values.h create mode 100755 tests/command/missing-values.sh diff --git a/src/ChangeLog b/src/ChangeLog index f034dd45..fb536fd1 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,60 @@ +Sat Aug 6 21:29:15 2005 Ben Pfaff + + * factor_stats.c: Needed included earlier. + + * percentiles.c: Needed to include . + + * val.h: Don't include "config.h". + +Sat Aug 6 21:26:27 2005 Ben Pfaff + + Clean up treatment of missing values by moving all the code into + one place. All references to the missing value function were + updated, but only major changes are detailed below. + + * Makefile.am: Add missing-values.c, missing-values.h to sources. + + * apply-dict.c: (cmd_apply_dictionary) Use mv_resize(). + + * dictionary.c: (dict_create_var) Initialize `miss' member with + mv_init(). + (dict_clone_var) Copy `miss' member with mv_copy(). + + * get.c: (mtf_merge_dictionary) Use mv_copy(). + + * missing-values.c: New file. + + * missing-values.h: New file. + + * mis-val.c: Rewrite. New version implements updated semantics. + + * pfm-read.c: (read_variables) Rewrite missing value handling. + + * pfm-write.c: (write_variables) Rewrite missing value handling. + + * sfm-read.c: (read_variables) Rewrite missing value handling. + + * sfm-write.c: (write_variable) Rewrite missing value handling. + + * sfmP.h: Include "magic.h" to get definition of + second_lowest_value. + + * sysfile-info.c: (describe_variable) Rewrite missing value + handling. + + * val.h: Include "magic.h" to get definition of + second_lowest_value. + + * var.h: Include "missing-values.h". Drop MISSING_* enums. + (struct variable) Remove `miss_type', `missing'. Add `miss'. + + * vars-atr.c: (is_num_user_missing) Removed--use + mv_is_num_user_missing(). + (is_str_user_missing) Removed--use mv_is_str_user_missing(). + (is_system_missing) Removed--use mv_is_value_system_missing(). + (is_missing) Removed--use mv_is_value_missing(). + (is_user_missing) Removed--use mv_is_value_user_missing(). + Sun Jul 31 14:09:57 2005 Ben Pfaff Adopt use of gnulib for portability. diff --git a/src/Makefile.am b/src/Makefile.am index dcb6d078..b6562c16 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -41,37 +41,31 @@ else chart_sources = dummy-chart.c endif -pspp_SOURCES = $(q_sources_c) $(chart_sources) \ -aggregate.c algorithm.c algorithm.h \ -alloc.c alloc.h apply-dict.c ascii.c autorecode.c bitvector.h \ -calendar.c calendar.h case.c case.h casefile.c casefile.h chart.c \ -chart.h cmdline.c cmdline.h command.c command.def \ -command.h compute.c copyleft.c copyleft.h count.c data-in.c data-in.h \ +pspp_SOURCES = $(q_sources_c) $(chart_sources) aggregate.c algorithm.c \ +algorithm.h alloc.c alloc.h apply-dict.c ascii.c autorecode.c \ +bitvector.h calendar.c calendar.h case.c case.h casefile.c casefile.h \ +chart.c chart.h cmdline.c cmdline.h command.c command.def command.h \ +compute.c copyleft.c copyleft.h count.c data-in.c data-in.h \ data-list.c data-list.h data-out.c date.c debug-print.h descript.c \ -devind.c devind.h dfm-read.c dfm-read.h dfm-write.c dfm-write.h \ -dictionary.c dictionary.h do-if.c do-ifP.h echo.c error.c \ -error.h factor_stats.c factor_stats.h file-handle.h \ -file-type.c filename.c filename.h flip.c font.h format.c format.def \ -format.h formats.c get.c getl.c getl.h glob.c glob.h \ -groff-font.c group.c group.h group_proc.h \ -hash.c hash.h histogram.c histogram.h \ -html.c htmlP.h include.c inpt-pgm.c lexer.c lexer.h levene.c levene.h \ -linked-list.c linked-list.h log.h loop.c magic.c magic.h main.c main.h \ -matrix-data.c mis-val.c misc.c misc.h modify-vars.c \ -moments.c moments.h numeric.c output.c output.h \ -percentiles.c percentiles.h permissions.c \ -pfm-read.c pfm-read.h \ -pfm-write.c pfm-write.h \ -pool.c pool.h postscript.c print.c recode.c \ +devind.c devind.h dfm-read.c dfm-read.h dfm-write.c dfm-write.h \ +dictionary.c dictionary.h do-if.c do-ifP.h echo.c error.c error.h \ +factor_stats.c factor_stats.h file-handle.h file-type.c filename.c \ +filename.h flip.c font.h format.c format.def format.h formats.c get.c \ +getl.c getl.h glob.c glob.h groff-font.c group.c group.h group_proc.h \ +hash.c hash.h histogram.c histogram.h html.c htmlP.h include.c \ +inpt-pgm.c lexer.c lexer.h levene.c levene.h linked-list.c \ +linked-list.h log.h loop.c magic.c magic.h main.c main.h matrix-data.c \ +mis-val.c misc.c misc.h missing-values.c missing-values.h \ +modify-vars.c moments.c moments.h numeric.c output.c output.h \ +percentiles.c percentiles.h permissions.c pfm-read.c pfm-read.h \ +pfm-write.c pfm-write.h pool.c pool.h postscript.c print.c recode.c \ rename-vars.c repeat.c repeat.h sample.c sel-if.c settings.h \ sfm-read.c sfm-read.h sfm-write.c sfm-write.h sfmP.h som.c som.h \ -sort.c sort.h sort-prs.c sort-prs.h \ -split-file.c str.c str.h subclist.c subclist.h \ -sysfile-info.c tab.c tab.h temporary.c mkfile.c mkfile.h \ -title.c val.h val-labs.c value-labels.c value-labels.h \ -var-display.c \ -var-labs.c var.h vars-atr.c vars-prs.c vector.c version.h \ -vfm.c vfm.h vfmP.h weight.c +sort.c sort.h sort-prs.c sort-prs.h split-file.c str.c str.h \ +subclist.c subclist.h sysfile-info.c tab.c tab.h temporary.c mkfile.c \ +mkfile.h title.c val.h val-labs.c value-labels.c value-labels.h \ +var-display.c var-labs.c var.h vars-atr.c vars-prs.c vector.c \ +version.h vfm.c vfm.h vfmP.h weight.c pspp_LDADD = \ diff --git a/src/aggregate.c b/src/aggregate.c index 2c1495f0..bc554b97 100644 --- a/src/aggregate.c +++ b/src/aggregate.c @@ -726,7 +726,8 @@ accumulate_aggregate_info (struct agr_proc *agr, { const union value *v = case_data (input, iter->src->fv); - if ((!iter->include_missing && is_missing (v, iter->src)) + if ((!iter->include_missing + && mv_is_value_missing (&iter->src->miss, v)) || (iter->include_missing && iter->src->type == NUMERIC && v->f == SYSMIS)) { diff --git a/src/apply-dict.c b/src/apply-dict.c index 74c1642b..473daf01 100644 --- a/src/apply-dict.c +++ b/src/apply-dict.c @@ -129,31 +129,18 @@ cmd_apply_dictionary (void) } } - if (s->miss_type != MISSING_NONE && t->width > MAX_SHORT_STRING) + if (!mv_is_empty (&s->miss) && t->width > MAX_SHORT_STRING) msg (SW, _("Cannot apply missing values from source file to " "long string variable %s."), s->name); - else if (s->miss_type != MISSING_NONE) + else if (!mv_is_empty (&s->miss)) { - if (t->width < s->width) - { - static const int miss_count[MISSING_COUNT] = - { - 0, 1, 2, 3, 2, 1, 1, 3, 2, 2, - }; - - int j, k; - - for (j = 0; j < miss_count[s->miss_type]; j++) - for (k = t->width; k < s->width; k++) - if (s->missing[j].s[k] != ' ') - goto skip_missing_values; - } - - t->miss_type = s->miss_type; - memcpy (t->missing, s->missing, sizeof s->missing); + if (mv_is_resizable (&s->miss, t->width)) + { + mv_copy (&t->miss, &s->miss); + mv_resize (&t->miss, t->width); + } } - skip_missing_values: ; if (s->type == NUMERIC) { diff --git a/src/count.c b/src/count.c index bab59f90..90275084 100644 --- a/src/count.c +++ b/src/count.c @@ -385,7 +385,7 @@ count_numeric (struct counting * cnt, struct ccase * c) counter++; continue; } - if (cnt->missing >= 2 && is_num_user_missing (cmp, cnt->v[i])) + if (cnt->missing >= 2 && mv_is_num_user_missing (&cnt->v[i]->miss, cmp)) { counter++; continue; diff --git a/src/crosstabs.q b/src/crosstabs.q index 09873e85..21d2d3df 100644 --- a/src/crosstabs.q +++ b/src/crosstabs.q @@ -576,11 +576,11 @@ calc_general (struct ccase *c, void *aux UNUSED) assert (x != NULL); for (j = 0; j < x->nvar; j++) { - if ((cmd.miss == CRS_TABLE - && is_missing (case_data (c, x->vars[j]->fv), x->vars[j])) + const union value *v = case_data (c, x->vars[j]->fv); + const struct missing_values *mv = &x->vars[j]->miss; + if ((cmd.miss == CRS_TABLE && mv_is_value_missing (mv, v)) || (cmd.miss == CRS_INCLUDE - && is_system_missing (case_data (c, x->vars[j]->fv), - x->vars[j]))) + && mv_is_value_system_missing (mv, v))) { x->missing += weight; goto next_crosstab; @@ -650,7 +650,8 @@ calc_integer (struct ccase *c, void *aux UNUSED) /* Note that the first test also rules out SYSMIS. */ if ((value < vr->min || value >= vr->max) - || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v))) + || (cmd.miss == CRS_TABLE + && mv_is_num_user_missing (&v->miss, value))) { x->missing += weight; goto next_crosstab; @@ -1410,7 +1411,7 @@ delete_missing (void) int r; for (r = 0; r < n_rows; r++) - if (is_num_user_missing (rows[r].f, x->vars[ROW_VAR])) + if (mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f)) { int c; @@ -1424,7 +1425,7 @@ delete_missing (void) int c; for (c = 0; c < n_cols; c++) - if (is_num_user_missing (cols[c].f, x->vars[COL_VAR])) + if (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)) { int r; @@ -1657,7 +1658,7 @@ table_value_missing (struct tab_table *table, int c, int r, unsigned char opt, s.string = tab_alloc (table, var->print.w); format_short (s.string, &var->print, v); s.length = strlen (s.string); - if (cmd.miss == CRS_REPORT && is_num_user_missing (v->f, var)) + if (cmd.miss == CRS_REPORT && mv_is_num_user_missing (&var->miss, v->f)) s.string[s.length++] = 'M'; while (s.length && *s.string == ' ') { @@ -1740,8 +1741,9 @@ display_crosstabulation (void) int mark_missing = 0; double expected_value = row_tot[r] * col_tot[c] / W; if (cmd.miss == CRS_REPORT - && (is_num_user_missing (cols[c].f, x->vars[COL_VAR]) - || is_num_user_missing (rows[r].f, x->vars[ROW_VAR]))) + && (mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f) + || mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, + rows[r].f))) mark_missing = 1; for (i = 0; i < num_cells; i++) { @@ -1806,7 +1808,7 @@ display_crosstabulation (void) int mark_missing = 0; if (cmd.miss == CRS_REPORT - && is_num_user_missing (rows[r].f, x->vars[ROW_VAR])) + && mv_is_num_user_missing (&x->vars[ROW_VAR]->miss, rows[r].f)) mark_missing = 1; for (i = 0; i < num_cells; i++) @@ -1862,7 +1864,7 @@ display_crosstabulation (void) int i; if (cmd.miss == CRS_REPORT && c < n_cols - && is_num_user_missing (cols[c].f, x->vars[COL_VAR])) + && mv_is_num_user_missing (&x->vars[COL_VAR]->miss, cols[c].f)) mark_missing = 1; for (i = 0; i < num_cells; i++) diff --git a/src/descript.c b/src/descript.c index fedba7f6..05e0767d 100644 --- a/src/descript.c +++ b/src/descript.c @@ -583,8 +583,9 @@ descriptives_trns_proc (struct trns_header *trns, struct ccase * c, for (vars = t->vars; vars < t->vars + t->var_cnt; vars++) { double score = case_num (c, (*vars)->fv); - if ( score == SYSMIS || (!t->include_user_missing - && is_num_user_missing(score, *vars)) ) + if ( score == SYSMIS + || (!t->include_user_missing + && mv_is_num_user_missing (&(*vars)->miss, score))) { all_sysmis = 1; break; @@ -599,7 +600,8 @@ descriptives_trns_proc (struct trns_header *trns, struct ccase * c, if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis || input == SYSMIS - || (!t->include_user_missing && is_num_user_missing(input, z->v))) + || (!t->include_user_missing + && mv_is_num_user_missing (&z->v->miss, input))) *output = SYSMIS; else *output = (input - z->mean) / z->std_dev; @@ -739,7 +741,7 @@ calc_descriptives (const struct casefile *cf, void *dsc_) if (dsc->missing_type != DSC_LISTWISE && (x == SYSMIS || (!dsc->include_user_missing - && is_num_user_missing (x, dv->v)))) + && mv_is_num_user_missing (&dv->v->miss, x)))) { dv->missing += weight; continue; @@ -781,7 +783,7 @@ calc_descriptives (const struct casefile *cf, void *dsc_) if (dsc->missing_type != DSC_LISTWISE && (x == SYSMIS || (!dsc->include_user_missing - && is_num_user_missing (x, dv->v)))) + && mv_is_num_user_missing (&dv->v->miss, x)))) continue; if (dv->moments != NULL) @@ -844,7 +846,8 @@ listwise_missing (struct dsc_proc *dsc, const struct ccase *c) double x = case_num (c, dv->v->fv); if (x == SYSMIS - || (!dsc->include_user_missing && is_num_user_missing (x, dv->v))) + || (!dsc->include_user_missing + && mv_is_num_user_missing (&dv->v->miss, x))) return 1; } return 0; diff --git a/src/dictionary.c b/src/dictionary.c index 9ea9f31a..3f1177d3 100644 --- a/src/dictionary.c +++ b/src/dictionary.c @@ -279,7 +279,7 @@ dict_create_var (struct dictionary *d, const char *name, int width) v->init = 1; v->reinit = dict_class_from_id (v->name) != DC_SCRATCH; v->index = d->var_cnt; - v->miss_type = MISSING_NONE; + mv_init (&v->miss, width); if (v->type == NUMERIC) { v->print = f8_2; @@ -354,8 +354,7 @@ dict_clone_var (struct dictionary *d, const struct variable *ov, the same short name. */ nv->init = 1; nv->reinit = ov->reinit; - nv->miss_type = ov->miss_type; - memcpy (nv->missing, ov->missing, sizeof nv->missing); + mv_copy (&nv->miss, &ov->miss); nv->print = ov->print; nv->write = ov->write; val_labs_destroy (nv->val_labs); @@ -678,7 +677,7 @@ dict_get_case_weight (const struct dictionary *d, const struct ccase *c, else { double w = case_num (c, d->weight->fv); - if ( w < 0.0 || w == SYSMIS || is_num_user_missing(w, d->weight) ) + if (w < 0.0 || mv_is_num_missing (&d->weight->miss, w)) w = 0.0; if ( w == 0.0 && *warn_on_invalid ) { *warn_on_invalid = 0; diff --git a/src/examine.q b/src/examine.q index b54f574b..33498d85 100644 --- a/src/examine.q +++ b/src/examine.q @@ -169,7 +169,7 @@ const char *factor_to_string_concise(const struct factor *fctr, /* Function to use for testing for missing values */ -static is_missing_func value_is_missing; +static is_missing_func *value_is_missing; /* PERCENTILES */ @@ -193,9 +193,9 @@ cmd_examine(void) /* If /MISSING=INCLUDE is set, then user missing values are ignored */ if (cmd.incl == XMN_INCLUDE ) - value_is_missing = is_system_missing; + value_is_missing = mv_is_value_system_missing; else - value_is_missing = is_missing; + value_is_missing = mv_is_value_missing; if ( cmd.st_n == SYSMIS ) cmd.st_n = 5; @@ -650,7 +650,7 @@ factor_calc(struct ccase *c, int case_no, double weight, int case_missing) const struct variable *var = dependent_vars[v]; const union value *val = case_data (c, var->fv); - if ( value_is_missing(val,var) || case_missing ) + if ( value_is_missing (&var->miss, val) || case_missing ) val = 0; metrics_calc( &(*foo)->m[v], val, weight, case_no); @@ -712,7 +712,7 @@ run_examine(const struct casefile *cf, void *cmd_ ) const struct variable *var = dependent_vars[v]; const union value *val = case_data (&c, var->fv); - if ( value_is_missing(val,var)) + if ( value_is_missing(&var->miss, val)) case_missing = 1; } @@ -723,7 +723,7 @@ run_examine(const struct casefile *cf, void *cmd_ ) const struct variable *var = dependent_vars[v]; const union value *val = case_data (&c, var->fv); - if ( value_is_missing(val,var) || case_missing ) + if ( value_is_missing(&var->miss, val) || case_missing ) val = 0; metrics_calc(&totals[v], val, weight, case_no); diff --git a/src/expressions/operations.def b/src/expressions/operations.def index 0d21c16a..bd5af76c 100644 --- a/src/expressions/operations.def +++ b/src/expressions/operations.def @@ -896,7 +896,7 @@ no_opt operator VEC_ELEM_NUM (idx) { const struct variable *var = v->var[(int) idx - 1]; double value = case_num (c, var->fv); - return !is_num_user_missing (value, var) ? value : SYSMIS; + return !mv_is_num_user_missing (&var->miss, value) ? value : SYSMIS; } else { @@ -943,7 +943,7 @@ no_opt operator NUM_VAR () num_var v; { double d = case_num (c, v->fv); - return !is_num_user_missing (d, v) ? d : SYSMIS; + return !mv_is_num_user_missing (&v->miss, d) ? d : SYSMIS; } no_opt string operator STR_VAR () @@ -962,7 +962,7 @@ no_opt function LAG (num_var v, pos_int n_before) if (c != NULL) { double x = case_num (c, v->fv); - return !is_num_user_missing (x, v) ? x : SYSMIS; + return !mv_is_num_user_missing (&v->miss, x) ? x : SYSMIS; } else return SYSMIS; @@ -974,7 +974,7 @@ no_opt function LAG (num_var v) if (c != NULL) { double x = case_num (c, v->fv); - return !is_num_user_missing (x, v) ? x : SYSMIS; + return !mv_is_num_user_missing (&v->miss, x) ? x : SYSMIS; } else return SYSMIS; diff --git a/src/factor_stats.c b/src/factor_stats.c index 4508caf8..e090517b 100644 --- a/src/factor_stats.c +++ b/src/factor_stats.c @@ -18,8 +18,8 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include "factor_stats.h" -#include "config.h" #include "val.h" #include "hash.h" #include "algorithm.h" diff --git a/src/frequencies.q b/src/frequencies.q index c165c21d..0ca83a45 100644 --- a/src/frequencies.q +++ b/src/frequencies.q @@ -697,7 +697,7 @@ not_missing (const void *f_, void *v_) const struct freq *f = f_; struct variable *v = v_; - return !is_missing (&f->v, v); + return !mv_is_value_missing (&v->miss, &f->v); } /* Summarizes the frequency table data for variable V. */ @@ -1564,7 +1564,7 @@ freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) /* Find out the extremes of the x value */ for ( frq = hsh_first(fh, &hi); frq != 0; frq = hsh_next(fh, &hi) ) { - if ( is_missing(&frq->v, var)) + if ( mv_is_value_missing(&var->miss, &frq->v)) continue; if ( frq->v.f < x_min ) x_min = frq->v.f ; diff --git a/src/get.c b/src/get.c index a13277a6..f484d120 100644 --- a/src/get.c +++ b/src/get.c @@ -1426,9 +1426,8 @@ mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f) if (val_labs_count (dv->val_labs) && !val_labs_count (mv->val_labs)) mv->val_labs = val_labs_copy (dv->val_labs); - if (dv->miss_type != MISSING_NONE - && mv->miss_type == MISSING_NONE) - copy_missing_values (mv, dv); + if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss)) + mv_copy (&mv->miss, &dv->miss); } if (dv->label && !mv->label) diff --git a/src/levene.c b/src/levene.c index 833a65e0..7877c7ec 100644 --- a/src/levene.c +++ b/src/levene.c @@ -77,8 +77,7 @@ struct levene_info enum lev_missing missing; /* Function to test for missing values */ - is_missing_func is_missing; - + is_missing_func *is_missing; }; /* First pass */ @@ -202,7 +201,7 @@ levene_calc (const struct ccase *c, void *_l) struct variable *v = l->v_dep[i]; const union value *val = case_data (c, v->fv); - if (l->is_missing(val,v) ) + if (l->is_missing (&v->miss, val) ) { return 0; } @@ -225,7 +224,7 @@ levene_calc (const struct ccase *c, void *_l) if ( 0 == gs ) continue ; - if ( ! l->is_missing(v,var)) + if ( ! l->is_missing(&var->miss, v)) { levene_z= fabs(v->f - gs->mean); lz[i].grand_total += levene_z * weight; @@ -309,7 +308,7 @@ levene2_calc (const struct ccase *c, void *_l) struct variable *v = l->v_dep[i]; const union value *val = case_data (c, v->fv); - if (l->is_missing(val,v) ) + if (l->is_missing(&v->miss, val) ) { return 0; } @@ -330,7 +329,7 @@ levene2_calc (const struct ccase *c, void *_l) if ( 0 == gs ) continue; - if ( ! l->is_missing(v,var) ) + if ( ! l->is_missing (&var->miss, v) ) { levene_z = fabs(v->f - gs->mean); lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean); diff --git a/src/mis-val.c b/src/mis-val.c index 27a51343..555dbf09 100644 --- a/src/mis-val.c +++ b/src/mis-val.c @@ -21,6 +21,7 @@ #include "error.h" #include #include "command.h" +#include "data-in.h" #include "error.h" #include "lexer.h" #include "magic.h" @@ -32,317 +33,180 @@ #include "debug-print.h" -/* Variables on MIS VAL. */ -static struct variable **v; -static int nv; - -/* Type of the variables on MIS VAL. */ -static int type; - -/* Width of string variables on MIS VAL. */ -static size_t width; - -/* Items to fill-in var structs with. */ -static int miss_type; -static union value missing[3]; - -static int parse_varnames (void); -static int parse_numeric (void); -static int parse_alpha (void); +static bool parse_number (double *, const struct fmt_spec *); int cmd_missing_values (void) { - int i; + struct variable **v; + int nv; + + int retval = CMD_PART_SUCCESS_MAYBE; + bool deferred_errors = false; while (token != '.') { - if (!parse_varnames ()) - goto fail; + int i; + - if (token != ')') - { - if ((type == NUMERIC && !parse_numeric ()) - || (type == ALPHA && !parse_alpha ())) - goto fail; - } - else - miss_type = MISSING_NONE; + if (!parse_variables (default_dict, &v, &nv, PV_NONE)) + goto done; - if (!lex_match (')')) - { - msg (SE, _("`)' expected after value specification.")); - goto fail; - } + if (!lex_match ('(')) + { + lex_error (_("expecting `('")); + goto done; + } for (i = 0; i < nv; i++) - { - v[i]->miss_type = miss_type; - memcpy (v[i]->missing, missing, sizeof v[i]->missing); - } + mv_init (&v[i]->miss, v[i]->width); + + if (!lex_match (')')) + { + struct missing_values mv; + + for (i = 0; i < nv; i++) + if (v[i]->type != v[0]->type) + { + const struct variable *n = v[0]->type == NUMERIC ? v[0] : v[i]; + const struct variable *s = v[0]->type == NUMERIC ? v[i] : v[0]; + msg (SE, _("Cannot mix numeric variables (e.g. %s) and " + "string variables (e.g. %s) within a single list."), + n->name, s->name); + goto done; + } + + if (v[0]->type == NUMERIC) + { + mv_init (&mv, 0); + while (!lex_match (')')) + { + double x; + + if (lex_match_id ("LO") || lex_match_id ("LOWEST")) + x = LOWEST; + else if (!parse_number (&x, &v[0]->print)) + goto done; + + if (lex_match_id ("THRU")) + { + double y; + + if (lex_match_id ("HI") || lex_match_id ("HIGHEST")) + y = HIGHEST; + else if (!parse_number (&y, &v[0]->print)) + goto done; + + if (x == LOWEST && y == HIGHEST) + { + msg (SE, _("LO THRU HI is an invalid range.")); + deferred_errors = true; + } + else if (!mv_add_num_range (&mv, x, y)) + deferred_errors = true; + } + else + { + if (x == LOWEST) + { + msg (SE, _("LO or LOWEST must be part of a range.")); + deferred_errors = true; + } + else if (!mv_add_num (&mv, x)) + deferred_errors = true; + } + + lex_match (','); + } + } + else + { + mv_init (&mv, MAX_SHORT_STRING); + while (!lex_match (')')) + { + if (!lex_force_string ()) + { + deferred_errors = true; + break; + } + + if (ds_length (&tokstr) > MAX_SHORT_STRING) + { + ds_truncate (&tokstr, MAX_SHORT_STRING); + msg (SE, _("Truncating missing value to short string " + "length (%d characters)."), + MAX_SHORT_STRING); + } + else + ds_rpad (&tokstr, MAX_SHORT_STRING, ' '); + + if (!mv_add_str (&mv, ds_data (&tokstr))) + deferred_errors = true; + + lex_get (); + lex_match (','); + } + } + + for (i = 0; i < nv; i++) + { + if (!mv_is_resizable (&mv, v[i]->width)) + { + msg (SE, _("Missing values provided are too long to assign " + "to variable of width %d."), + v[i]->width); + deferred_errors = true; + } + else + { + mv_copy (&v[i]->miss, &mv); + mv_resize (&v[i]->miss, v[i]->width); + } + } + } lex_match ('/'); free (v); + v = NULL; } - - return lex_end_of_command (); - -fail: + retval = lex_end_of_command (); + + done: free (v); - return CMD_PART_SUCCESS_MAYBE; + if (deferred_errors) + retval = CMD_PART_SUCCESS_MAYBE; + return retval; } -static int -parse_varnames (void) +static bool +parse_number (double *x, const struct fmt_spec *f) { - int i; - - if (!parse_variables (default_dict, &v, &nv, PV_SAME_TYPE)) - return 0; - if (!lex_match ('(')) + if (lex_is_number ()) { - msg (SE, _("`(' expected after variable name%s."), nv > 1 ? "s" : ""); - return 0; - } - - type = v[0]->type; - if (type == NUMERIC) - return 1; - - width = v[0]->width; - for (i = 1; i < nv; i++) - if (v[i]->type == ALPHA && v[i]->nv != 1) - { - msg (SE, _("Long string value specified.")); - return 0; - } - else if (v[i]->type == ALPHA && (int) width != v[i]->width) - { - msg (SE, _("Short strings must be of equal width.")); - return 0; - } - - return 1; -} - -/* Number or range? */ -enum - { - MV_NOR_NOTHING, /* Empty. */ - MV_NOR_NUMBER, /* Single number. */ - MV_NOR_RANGE /* Range. */ - }; - -/* A single value or a range. */ -struct num_or_range - { - int type; /* One of NOR_*. */ - double d[2]; /* d[0]=lower bound or value, d[1]=upper bound. */ - }; - -/* Parses something of the form , or LO[WEST] THRU , or - THRU HI[GHEST], or THRU , and sets the appropriate - members of NOR. Returns success. */ -static int -parse_num_or_range (struct num_or_range * nor) -{ - if (lex_match_id ("LO") || lex_match_id ("LOWEST")) - { - nor->type = MV_NOR_RANGE; - if (!lex_force_match_id ("THRU")) - return 0; - if (!lex_force_num ()) - return 0; - nor->d[0] = LOWEST; - nor->d[1] = tokval; - } - else if (lex_is_number ()) - { - nor->d[0] = tokval; + *x = lex_number (); lex_get (); - - if (lex_match_id ("THRU")) - { - nor->type = MV_NOR_RANGE; - if (lex_match_id ("HI") || lex_match_id ("HIGHEST")) - nor->d[1] = HIGHEST; - else - { - if (!lex_force_num ()) - return 0; - nor->d[1] = tokval; - lex_get (); - - if (nor->d[0] > nor->d[1]) - { - msg (SE, _("Range %g THRU %g is not valid because %g is " - "greater than %g."), - nor->d[0], nor->d[1], nor->d[0], nor->d[1]); - return 0; - } - } - } - else - nor->type = MV_NOR_NUMBER; + return true; } - else - return -1; - - return 1; -} - -/* Parses a set of numeric missing values and stores them into - `missing[]' and `miss_type' global variables. */ -static int -parse_numeric (void) -{ - struct num_or_range set[3]; - int r; - - set[1].type = set[2].type = MV_NOR_NOTHING; - - /* Get first number or range. */ - r = parse_num_or_range (&set[0]); - if (r < 1) + else if (token == T_STRING) { - if (r == -1) - msg (SE, _("Number or range expected.")); - return 0; - } - - /* Get second and third optional number or range. */ - lex_match (','); - r = parse_num_or_range (&set[1]); - if (r == 1) - { - lex_match (','); - r = parse_num_or_range (&set[2]); - } - if (r == 0) - return 0; - - /* Force range, if present, into set[0]. */ - if (set[1].type == MV_NOR_RANGE) - { - struct num_or_range t = set[1]; - set[1] = set[0]; - set[0] = t; - } - if (set[2].type == MV_NOR_RANGE) - { - struct num_or_range t = set[2]; - set[2] = set[0]; - set[0] = t; - } - - /* Ensure there's not more than one range, or one range - plus one value. */ - if (set[1].type == MV_NOR_RANGE || set[2].type == MV_NOR_RANGE) - { - msg (SE, _("At most one range can exist in the missing values " - "for any one variable.")); - return 0; - } - if (set[0].type == MV_NOR_RANGE && set[2].type != MV_NOR_NOTHING) - { - msg (SE, _("At most one individual value can be missing along " - "with one range.")); - return 0; - } - - /* Set missing[] from set[]. */ - if (set[0].type == MV_NOR_RANGE) - { - int x = 0; - - if (set[0].d[0] == LOWEST) - { - miss_type = MISSING_LOW; - missing[x++].f = set[0].d[1]; - } - else if (set[0].d[1] == HIGHEST) - { - miss_type = MISSING_HIGH; - missing[x++].f = set[0].d[0]; - } - else - { - miss_type = MISSING_RANGE; - missing[x++].f = set[0].d[0]; - missing[x++].f = set[0].d[1]; - } - - if (set[1].type == MV_NOR_NUMBER) - { - miss_type += 3; - missing[x].f = set[1].d[0]; - } - } - else - { - if (set[0].type == MV_NOR_NUMBER) - { - miss_type = MISSING_1; - missing[0].f = set[0].d[0]; - } - if (set[1].type == MV_NOR_NUMBER) - { - miss_type = MISSING_2; - missing[1].f = set[1].d[0]; - } - if (set[2].type == MV_NOR_NUMBER) - { - miss_type = MISSING_3; - missing[2].f = set[2].d[0]; - } - } - - return 1; -} - -static int -parse_alpha (void) -{ - for (miss_type = 0; token == T_STRING && miss_type < 3; miss_type++) - { - if (ds_length (&tokstr) != width) - { - msg (SE, _("String is not of proper length.")); - return 0; - } - strncpy (missing[miss_type].s, ds_c_str (&tokstr), MAX_SHORT_STRING); + struct data_in di; + union value v; + di.s = ds_data (&tokstr); + di.e = ds_end (&tokstr); + di.v = &v; + di.flags = 0; + di.f1 = 1; + di.f2 = ds_length (&tokstr); + di.format = *f; + data_in (&di); lex_get (); - lex_match (','); + *x = v.f; + return true; } - if (miss_type < 1) + else { - msg (SE, _("String expected.")); - return 0; + lex_error (_("expecting number or data string")); + return false; } - - return 1; } -/* Copy the missing values from variable SRC to variable DEST. */ -void -copy_missing_values (struct variable *dest, const struct variable *src) -{ - static const int n_values[MISSING_COUNT] = - { - 0, 1, 2, 3, 2, 1, 1, 3, 2, 2, - }; - - assert (dest->width == src->width); - assert (src->miss_type >= 0 && src->miss_type < MISSING_COUNT); - - { - int i; - - dest->miss_type = src->miss_type; - for (i = 0; i < n_values[src->miss_type]; i++) - if (src->type == NUMERIC) - dest->missing[i].f = src->missing[i].f; - else - memcpy (dest->missing[i].s, src->missing[i].s, src->width); - } -} diff --git a/src/missing-values.c b/src/missing-values.c new file mode 100644 index 00000000..09192179 --- /dev/null +++ b/src/missing-values.c @@ -0,0 +1,382 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2005 Free Software Foundation, Inc. + Written by Ben Pfaff . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#include +#include "missing-values.h" +#include +#include +#include "str.h" + +/* Initializes MV as a set of missing values for a variable of + the given WIDTH. Although only numeric variables and short + string variables may have missing values, WIDTH may be any + valid variable width. */ +void +mv_init (struct missing_values *mv, int width) +{ + assert (width >= 0 && width <= MAX_STRING); + mv->type = MV_NONE; + mv->width = width; +} + +/* Copies SRC to MV. */ +void +mv_copy (struct missing_values *mv, const struct missing_values *src) +{ + *mv = *src; +} + +/* Returns true if MV is an empty set of missing values. */ +bool +mv_is_empty (const struct missing_values *mv) +{ + return mv->type == MV_NONE; +} + +/* Returns the width of the missing values that MV may + contain. */ +int +mv_get_width (const struct missing_values *mv) +{ + return mv->width; +} + +/* Attempts to add individual value V to the set of missing + values MV. Returns true if successful, false if MV has no + more room for missing values. (Long string variables never + accept missing values.) */ +bool +mv_add_value (struct missing_values *mv, const union value *v) +{ + if (mv->width > MAX_SHORT_STRING) + return false; + switch (mv->type) + { + case MV_NONE: + case MV_1: + case MV_2: + case MV_RANGE: + mv->values[mv->type & 3] = *v; + mv->type++; + return true; + + case MV_3: + case MV_RANGE_1: + return false; + } + abort (); +} + +/* Attempts to add S to the set of string missing values MV. S + must contain exactly as many characters as MV's width. + Returns true if successful, false if MV has no more room for + missing values. (Long string variables never accept missing + values.) */ +bool +mv_add_str (struct missing_values *mv, const unsigned char s[]) +{ + assert (mv->width > 0); + return mv_add_value (mv, (union value *) s); +} + +/* Attempts to add D to the set of numeric missing values MV. + Returns true if successful, false if MV has no more room for + missing values. */ +bool +mv_add_num (struct missing_values *mv, double d) +{ + assert (mv->width == 0); + return mv_add_value (mv, (union value *) &d); +} + +/* Attempts to add range [LOW, HIGH] to the set of numeric + missing values MV. Returns true if successful, false if MV + has no room for a range. */ +bool +mv_add_num_range (struct missing_values *mv, double low, double high) +{ + assert (mv->width == 0); + switch (mv->type) + { + case MV_NONE: + case MV_1: + mv->values[1].f = low; + mv->values[2].f = high; + mv->type |= 4; + return true; + + case MV_2: + case MV_3: + case MV_RANGE: + case MV_RANGE_1: + return false; + } + abort (); +} + +/* Returns true if MV contains an individual value, + false if MV is empty (or contains only a range). */ +bool +mv_has_value (struct missing_values *mv) +{ + switch (mv->type) + { + case MV_1: + case MV_2: + case MV_3: + case MV_RANGE_1: + return true; + + case MV_NONE: + case MV_RANGE: + return false; + } + abort (); +} + +/* Removes one individual value from MV and stores it in *V. + MV must contain an individual value (as determined by + mv_has_value()). */ +void +mv_pop_value (struct missing_values *mv, union value *v) +{ + assert (mv_has_value (mv)); + mv->type--; + *v = mv->values[mv->type & 3]; +} + +/* Returns true if MV contains a numeric range, + false if MV is empty (or contains only individual values). */ +bool +mv_has_range (struct missing_values *mv) +{ + switch (mv->type) + { + case MV_RANGE: + case MV_RANGE_1: + return true; + + case MV_NONE: + case MV_1: + case MV_2: + case MV_3: + return false; + } + abort (); +} + +/* Removes the numeric range from MV and stores it in *LOW and + *HIGH. MV must contain a individual range (as determined by + mv_has_range()). */ +void +mv_pop_range (struct missing_values *mv, double *low, double *high) +{ + assert (mv_has_range (mv)); + *low = mv->values[1].f; + *high = mv->values[2].f; + mv->type &= 3; +} + +/* Returns true if values[IDX] is in use when the `type' member + is set to TYPE (in struct missing_values), + false otherwise. */ +static bool +using_element (unsigned type, int idx) +{ + assert (idx >= 0 && idx < 3); + + switch (type) + { + case MV_NONE: + return false; + case MV_1: + return idx < 1; + case MV_2: + return idx < 2; + case MV_3: + return true; + case MV_RANGE: + return idx > 0; + case MV_RANGE_1: + return true; + } + abort (); +} + +/* Returns true if S contains only spaces between indexes + NEW_WIDTH (inclusive) and OLD_WIDTH (exclusive), + false otherwise. */ +static bool +can_resize_string (const unsigned char *s, int old_width, int new_width) +{ + int i; + + assert (new_width < old_width); + for (i = new_width; i < old_width; i++) + if (s[i] != ' ') + return false; + return true; +} + +/* Returns true if MV can be resized to the given WIDTH with + mv_resize(), false otherwise. Resizing to the same width is + always possible. Resizing to a long string WIDTH is only + possible if MV is an empty set of missing values; otherwise, + resizing to a larger WIDTH is always possible. Resizing to a + shorter width is possible only when each missing value + contains only spaces in the characters that will be + trimmed. */ +bool +mv_is_resizable (struct missing_values *mv, int width) +{ + assert ((width == 0) == (mv->width == 0)); + if (width > MAX_SHORT_STRING && mv->type != MV_NONE) + return false; + else if (width >= mv->width) + return true; + else + { + int i; + + for (i = 0; i < 3; i++) + if (using_element (mv->type, i) + && !can_resize_string (mv->values[i].s, mv->width, width)) + return false; + return true; + } +} + +/* Resizes MV to the given WIDTH. WIDTH must fit the constraints + explained for mv_is_resizable(). */ +void +mv_resize (struct missing_values *mv, int width) +{ + assert (mv_is_resizable (mv, width)); + if (width > mv->width) + { + int i; + + for (i = 0; i < 3; i++) + memset (mv->values[i].s + mv->width, ' ', width - mv->width); + } + mv->width = width; +} + +/* Returns true if V is system missing or a missing value in MV, + false otherwise. */ +bool +mv_is_value_missing (const struct missing_values *mv, const union value *v) +{ + return (mv->width == 0 + ? mv_is_num_missing (mv, v->f) + : mv_is_str_missing (mv, v->s)); +} + +/* Returns true if D is system missing or a missing value in MV, + false otherwise. + MV must be a set of numeric missing values. */ +bool +mv_is_num_missing (const struct missing_values *mv, double d) +{ + assert (mv->width == 0); + return d == SYSMIS || mv_is_num_user_missing (mv, d); +} + +/* Returns true if S[] is a missing value in MV, false otherwise. + MV must be a set of string missing values. + S[] must contain exactly as many characters as MV's width. */ +bool +mv_is_str_missing (const struct missing_values *mv, + const unsigned char s[]) +{ + return mv_is_str_user_missing (mv, s); +} + +/* Returns true if V is a missing value in MV, false otherwise. */ +bool +mv_is_value_user_missing (const struct missing_values *mv, + const union value *v) +{ + return (mv->width == 0 + ? mv_is_num_user_missing (mv, v->f) + : mv_is_str_user_missing (mv, v->s)); +} + +/* Returns true if D is a missing value in MV, false otherwise. + MV must be a set of numeric missing values. */ +bool +mv_is_num_user_missing (const struct missing_values *mv, double d) +{ + const union value *v = mv->values; + assert (mv->width == 0); + switch (mv->type) + { + case MV_NONE: + return false; + case MV_1: + return v[0].f == d; + case MV_2: + return v[0].f == d || v[1].f == d; + case MV_3: + return v[0].f == d || v[1].f == d || v[2].f == d; + case MV_RANGE: + return v[1].f <= d && d <= v[2].f; + case MV_RANGE_1: + return v[0].f == d || (v[1].f <= d && d <= v[2].f); + } + abort (); +} + +/* Returns true if S[] is a missing value in MV, false otherwise. + MV must be a set of string missing values. + S[] must contain exactly as many characters as MV's width. */ +bool +mv_is_str_user_missing (const struct missing_values *mv, + const unsigned char s[]) +{ + const union value *v = mv->values; + assert (mv->width > 0); + switch (mv->type) + { + case MV_NONE: + return false; + case MV_1: + return !memcmp (v[0].s, s, mv->width); + case MV_2: + return (!memcmp (v[0].s, s, mv->width) + || !memcmp (v[1].s, s, mv->width)); + case MV_3: + return (!memcmp (v[0].s, s, mv->width) + || !memcmp (v[1].s, s, mv->width) + || !memcmp (v[2].s, s, mv->width)); + case MV_RANGE: + case MV_RANGE_1: + abort (); + } + abort (); +} + +/* Returns true if MV is a set of numeric missing values and V is + the system missing value. */ +bool +mv_is_value_system_missing (const struct missing_values *mv, + const union value *v) +{ + return mv->width == 0 ? v->f == SYSMIS : false; +} diff --git a/src/missing-values.h b/src/missing-values.h new file mode 100644 index 00000000..710fc05e --- /dev/null +++ b/src/missing-values.h @@ -0,0 +1,84 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2005 Free Software Foundation, Inc. + Written by Ben Pfaff . + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#if !missing_values_h +#define missing_values_h 1 + +#include +#include "val.h" + +/* Types of user-missing values. + Invisible--use access functions defined below instead. */ +enum mv_type + { + MV_NONE = 0, /* No user-missing values. */ + MV_1 = 1, /* One user-missing value. */ + MV_2 = 2, /* Two user-missing values. */ + MV_3 = 3, /* Three user-missing values. */ + MV_RANGE = 4, /* A range of user-missing values. */ + MV_RANGE_1 = 5 /* A range plus an individual value. */ + }; + +/* Missing values. + Opaque--use access functions defined below. */ +struct missing_values + { + unsigned type; /* Number and type of missing values. */ + int width; /* 0=numeric, otherwise string width. */ + union value values[3]; /* Missing values. [y,z] are the range. */ + }; + +void mv_init (struct missing_values *, int width); +void mv_copy (struct missing_values *, const struct missing_values *); +bool mv_is_empty (const struct missing_values *); +int mv_get_width (const struct missing_values *); + +bool mv_add_value (struct missing_values *, const union value *); +bool mv_add_str (struct missing_values *, const unsigned char[]); +bool mv_add_num (struct missing_values *, double); +bool mv_add_num_range (struct missing_values *, double low, double high); + +bool mv_has_value (struct missing_values *); +void mv_pop_value (struct missing_values *, union value *); +bool mv_has_range (struct missing_values *); +void mv_pop_range (struct missing_values *, double *low, double *high); + +bool mv_is_resizable (struct missing_values *, int width); +void mv_resize (struct missing_values *, int width); + +typedef bool is_missing_func (const struct missing_values *, + const union value *); + +/* Is a value system or user missing? */ +bool mv_is_value_missing (const struct missing_values *, const union value *); +bool mv_is_num_missing (const struct missing_values *, double); +bool mv_is_str_missing (const struct missing_values *, const unsigned char[]); + +/* Is a value user missing? */ +bool mv_is_value_user_missing (const struct missing_values *, + const union value *); +bool mv_is_num_user_missing (const struct missing_values *, double); +bool mv_is_str_user_missing (const struct missing_values *, + const unsigned char[]); + +/* Is a value system missing? */ +bool mv_is_value_system_missing (const struct missing_values *, + const union value *); + +#endif /* missing-values.h */ diff --git a/src/oneway.q b/src/oneway.q index 9aebbf61..8c9c768a 100644 --- a/src/oneway.q +++ b/src/oneway.q @@ -87,7 +87,7 @@ static int ostensible_number_of_groups=-1; /* Function to use for testing for missing values */ -static is_missing_func value_is_missing; +static is_missing_func *value_is_missing; static void run_oneway(const struct casefile *cf, void *_mode); @@ -119,9 +119,9 @@ cmd_oneway(void) /* If /MISSING=INCLUDE is set, then user missing values are ignored */ if (cmd.incl == ONEWAY_INCLUDE ) - value_is_missing = is_system_missing; + value_is_missing = mv_is_value_system_missing; else - value_is_missing = is_missing; + value_is_missing = mv_is_value_missing; /* What statistics were requested */ if ( cmd.sbc_statistics ) @@ -913,7 +913,7 @@ run_oneway(const struct casefile *cf, void *cmd_) const union value *indep_val = case_data (&c, indep_var->fv); /* Deal with missing values */ - if ( value_is_missing(indep_val,indep_var) ) + if ( value_is_missing(&indep_var->miss, indep_val) ) continue; /* Skip the entire case if /MISSING=LISTWISE is set */ @@ -924,7 +924,7 @@ run_oneway(const struct casefile *cf, void *cmd_) const struct variable *v = vars[i]; const union value *val = case_data (&c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) break; } if ( i != n_vars ) @@ -964,7 +964,7 @@ run_oneway(const struct casefile *cf, void *cmd_) hsh_insert ( group_hash, (void *) gs ); } - if (! value_is_missing(val,v) ) + if (! value_is_missing(&v->miss, val) ) { struct group_statistics *totals = &gp->ugs; diff --git a/src/percentiles.c b/src/percentiles.c index 4e618ad4..2381f771 100644 --- a/src/percentiles.c +++ b/src/percentiles.c @@ -18,6 +18,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include "factor_stats.h" #include "percentiles.h" #include "misc.h" diff --git a/src/pfm-read.c b/src/pfm-read.c index c7a604db..1999628c 100644 --- a/src/pfm-read.c +++ b/src/pfm-read.c @@ -540,45 +540,23 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) convert_format (r, &fmt[3], &v->write, v); /* Range missing values. */ - if (match (r, 'B')) - { - v->miss_type = MISSING_RANGE; - v->missing[0] = parse_value (r, v); - v->missing[1] = parse_value (r, v); - } + if (match (r, 'B')) + { + double x = read_float (r); + double y = read_float (r); + mv_add_num_range (&v->miss, x, y); + } else if (match (r, 'A')) - { - v->miss_type = MISSING_HIGH; - v->missing[0] = parse_value (r, v); - } + mv_add_num_range (&v->miss, read_float (r), HIGHEST); else if (match (r, '9')) - { - v->miss_type = MISSING_LOW; - v->missing[0] = parse_value (r, v); - } + mv_add_num_range (&v->miss, LOWEST, read_float (r)); /* Single missing values. */ - while (match (r, '8')) - { - static const int map_next[MISSING_COUNT] = - { - MISSING_1, MISSING_2, MISSING_3, -1, - MISSING_RANGE_1, MISSING_LOW_1, MISSING_HIGH_1, - -1, -1, -1, - }; - - static const int map_ofs[MISSING_COUNT] = - { - -1, 0, 1, 2, -1, -1, -1, 2, 1, 1, - }; - - v->miss_type = map_next[v->miss_type]; - if (v->miss_type == -1) - error (r, _("Bad missing values for %s."), v->name); - - assert (map_ofs[v->miss_type] != -1); - v->missing[map_ofs[v->miss_type]] = parse_value (r, v); - } + while (match (r, '8')) + { + union value value = parse_value (r, v); + mv_add_value (&v->miss, &value); + } if (match (r, 'C')) { diff --git a/src/pfm-write.c b/src/pfm-write.c index 30615418..fbe56f6a 100644 --- a/src/pfm-write.c +++ b/src/pfm-write.c @@ -298,25 +298,43 @@ write_variables (struct pfm_writer *w, struct dictionary *dict) for (i = 0; i < dict_get_var_cnt (dict); i++) { - static const char *miss_types[MISSING_COUNT] = - { - "", "8", "88", "888", "B ", "9", "A", "B 8", "98", "A8", - }; - - const char *m; - int j; - struct variable *v = dict_get_var (dict, i); + struct missing_values mv; if (!buf_write (w, "7", 1) || !write_int (w, v->width) || !write_string (w, v->short_name) || !write_format (w, &v->print) || !write_format (w, &v->write)) return 0; - for (m = miss_types[v->miss_type], j = 0; j < (int) strlen (m); j++) - if ((m[j] != ' ' && !buf_write (w, &m[j], 1)) - || !write_value (w, &v->missing[j], v)) - return 0; + /* Write missing values. */ + mv_copy (&mv, &v->miss); + while (mv_has_range (&mv)) + { + double x, y; + mv_pop_range (&mv, &x, &y); + if (x == LOWEST) + { + if (!buf_write (w, "9", 1) || !write_float (w, y)) + return 0; + } + else if (y == HIGHEST) + { + if (!buf_write (w, "A", 1) || !write_float (w, y)) + return 0; + } + else { + if (!buf_write (w, "B", 1) || !write_float (w, x) + || !write_float (w, y)) + return 0; + } + } + while (mv_has_value (&mv)) + { + union value value; + mv_pop_value (&mv, &value); + if (!buf_write (w, "8", 1) || !write_value (w, &value, v)) + return 0; + } if (v->label && (!buf_write (w, "C", 1) || !write_string (w, v->label))) return 0; diff --git a/src/recode.c b/src/recode.c index b25ac456..9f7214bd 100644 --- a/src/recode.c +++ b/src/recode.c @@ -714,7 +714,7 @@ find_src_numeric (struct rcd_var * v, struct ccase * c) case RCD_END: return NULL; case RCD_USER: - if (is_num_user_missing (cmp, v->src)) + if (mv_is_num_user_missing (&v->src->miss, cmp)) return cp; break; case RCD_SINGLE: diff --git a/src/sfm-read.c b/src/sfm-read.c index 0c07150e..bd8800bb 100644 --- a/src/sfm-read.c +++ b/src/sfm-read.c @@ -926,63 +926,45 @@ read_variables (struct sfm_reader *r, if (sv.n_missing_values != 0) { flt64 mv[3]; + int mv_cnt = abs (sv.n_missing_values); if (vv->width > MAX_SHORT_STRING) lose ((ME, _("%s: Long string variable %s may not have missing " "values."), handle_get_filename (r->fh), vv->name)); - assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0); + assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0); if (r->reverse_endian && vv->type == NUMERIC) - for (j = 0; j < abs (sv.n_missing_values); j++) + for (j = 0; j < mv_cnt; j++) bswap_flt64 (&mv[j]); if (sv.n_missing_values > 0) { - vv->miss_type = sv.n_missing_values; - if (vv->type == NUMERIC) - for (j = 0; j < sv.n_missing_values; j++) - vv->missing[j].f = mv[j]; - else - for (j = 0; j < sv.n_missing_values; j++) - memcpy (vv->missing[j].s, &mv[j], vv->width); + for (j = 0; j < sv.n_missing_values; j++) + if (vv->type == NUMERIC) + mv_add_num (&vv->miss, mv[j]); + else + mv_add_str (&vv->miss, (unsigned char *) &mv[j]); } else { - int x = 0; - if (vv->type == ALPHA) lose ((ME, _("%s: String variable %s may not have missing " "values specified as a range."), handle_get_filename (r->fh), vv->name)); if (mv[0] == r->lowest) - { - vv->miss_type = MISSING_LOW; - vv->missing[x++].f = mv[1]; - } + mv_add_num_range (&vv->miss, LOWEST, mv[1]); else if (mv[1] == r->highest) - { - vv->miss_type = MISSING_HIGH; - vv->missing[x++].f = mv[0]; - } + mv_add_num_range (&vv->miss, mv[0], HIGHEST); else - { - vv->miss_type = MISSING_RANGE; - vv->missing[x++].f = mv[0]; - vv->missing[x++].f = mv[1]; - } + mv_add_num_range (&vv->miss, mv[0], mv[1]); if (sv.n_missing_values == -3) - { - vv->miss_type += 3; - vv->missing[x++].f = mv[2]; - } + mv_add_num (&vv->miss, mv[2]); } } - else - vv->miss_type = MISSING_NONE; if (!parse_format_spec (r, sv.print, &vv->print, vv) || !parse_format_spec (r, sv.write, &vv->write, vv)) diff --git a/src/sfm-write.c b/src/sfm-write.c index e1e103e5..80b78408 100644 --- a/src/sfm-write.c +++ b/src/sfm-write.c @@ -362,6 +362,7 @@ write_variable (struct sfm_writer *w, struct variable *v) struct sysfile_variable sv; /* Missing values. */ + struct missing_values mv; flt64 m[3]; /* Missing value values. */ int nm; /* Number of missing values, possibly negative. */ @@ -369,54 +370,27 @@ write_variable (struct sfm_writer *w, struct variable *v) sv.type = v->width; sv.has_var_label = (v->label != NULL); - switch (v->miss_type) + mv_copy (&mv, &v->miss); + nm = 0; + if (mv_has_range (&mv)) { - case MISSING_NONE: - nm = 0; - break; - case MISSING_1: - case MISSING_2: - case MISSING_3: - for (nm = 0; nm < v->miss_type; nm++) - m[nm] = v->missing[nm].f; - break; - case MISSING_RANGE: - m[0] = v->missing[0].f; - m[1] = v->missing[1].f; - nm = -2; - break; - case MISSING_LOW: - m[0] = second_lowest_flt64; - m[1] = v->missing[0].f; - nm = -2; - break; - case MISSING_HIGH: - m[0] = v->missing[0].f; - m[1] = FLT64_MAX; - nm = -2; - break; - case MISSING_RANGE_1: - m[0] = v->missing[0].f; - m[1] = v->missing[1].f; - m[2] = v->missing[2].f; - nm = -3; - break; - case MISSING_LOW_1: - m[0] = second_lowest_flt64; - m[1] = v->missing[0].f; - m[2] = v->missing[1].f; - nm = -3; - break; - case MISSING_HIGH_1: - m[0] = v->missing[0].f; - m[1] = second_lowest_flt64; - m[2] = v->missing[1].f; - nm = -3; - break; - default: - assert (0); - abort (); + double x, y; + mv_pop_range (&mv, &x, &y); + m[nm++] = x == LOWEST ? second_lowest_flt64 : x; + m[nm++] = y == HIGHEST ? FLT64_MAX : y; } + while (mv_has_value (&mv)) + { + union value value; + mv_pop_value (&mv, &value); + if (v->type == NUMERIC) + m[nm] = value.f; + else + buf_copy_rpad ((char *) &m[nm], sizeof m[nm], value.s, v->width); + nm++; + } + if (mv_has_range (&v->miss)) + nm = -nm; sv.n_missing_values = nm; write_format_spec (&v->print, &sv.print); @@ -445,7 +419,7 @@ write_variable (struct sfm_writer *w, struct variable *v) return 0; } - if (nm && !buf_write (w, m, sizeof *m * nm)) + if (nm && !buf_write (w, m, sizeof *m * abs (nm))) return 0; if (v->type == ALPHA && v->width > (int) sizeof (flt64)) diff --git a/src/sfmP.h b/src/sfmP.h index 978a3e12..c127b85e 100644 --- a/src/sfmP.h +++ b/src/sfmP.h @@ -55,6 +55,7 @@ #endif /* Figure out SYSMIS value for flt64. */ +#include "magic.h" #if SIZEOF_DOUBLE == 8 #define second_lowest_flt64 second_lowest_value #else diff --git a/src/sysfile-info.c b/src/sysfile-info.c index 08d5484f..45bffad7 100644 --- a/src/sysfile-info.c +++ b/src/sysfile-info.c @@ -463,63 +463,44 @@ describe_variable (struct variable *v, struct tab_table *t, int r, int as) } /* Missing values if any. */ - if (v->miss_type != MISSING_NONE) + if (!mv_is_empty (&v->miss)) { - char buf[80]; - char *cp = stpcpy (buf, _("Missing Values: ")); - - if (v->type == NUMERIC) - switch (v->miss_type) - { - case MISSING_1: - sprintf (cp, "%g", v->missing[0].f); - break; - case MISSING_2: - sprintf (cp, "%g; %g", v->missing[0].f, v->missing[1].f); - break; - case MISSING_3: - sprintf (cp, "%g; %g; %g", v->missing[0].f, - v->missing[1].f, v->missing[2].f); - break; - case MISSING_RANGE: - sprintf (cp, "%g THRU %g", v->missing[0].f, v->missing[1].f); - break; - case MISSING_LOW: - sprintf (cp, "LOWEST THRU %g", v->missing[0].f); - break; - case MISSING_HIGH: - sprintf (cp, "%g THRU HIGHEST", v->missing[0].f); - break; - case MISSING_RANGE_1: - sprintf (cp, "%g THRU %g; %g", - v->missing[0].f, v->missing[1].f, v->missing[2].f); - break; - case MISSING_LOW_1: - sprintf (cp, "LOWEST THRU %g; %g", - v->missing[0].f, v->missing[1].f); - break; - case MISSING_HIGH_1: - sprintf (cp, "%g THRU HIGHEST; %g", - v->missing[0].f, v->missing[1].f); - break; - default: - assert (0); - } - else - { - int i; - - for (i = 0; i < v->miss_type; i++) - { - if (i != 0) - cp = stpcpy (cp, "; "); - *cp++ = '"'; - memcpy (cp, v->missing[i].s, v->width); + char buf[128]; + char *cp; + struct missing_values mv; + int cnt = 0; + + cp = stpcpy (buf, _("Missing Values: ")); + mv_copy (&mv, &v->miss); + if (mv_has_range (&mv)) + { + double x, y; + mv_pop_range (&mv, &x, &y); + if (x == LOWEST) + cp += nsprintf (cp, "LOWEST THRU %g", y); + else if (y == HIGHEST) + cp += nsprintf (cp, "%g THRU HIGHEST", x); + else + cp += nsprintf (cp, "%g THRU %g", x, y); + cnt++; + } + while (mv_has_value (&mv)) + { + union value value; + mv_pop_value (&mv, &value); + if (cnt++ > 0) + cp += nsprintf (cp, "; "); + if (v->type == NUMERIC) + cp += nsprintf (cp, "%g", value.f); + else + { + *cp++ = '"'; + memcpy (cp, value.s, v->width); cp += v->width; *cp++ = '"'; - } - *cp = 0; - } + *cp = '\0'; + } + } tab_joint_text (t, 1, r, 2, r, TAB_LEFT, buf); r++; diff --git a/src/t-test.q b/src/t-test.q index 07bcdd07..89f1741f 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -67,7 +67,7 @@ /* Function to use for testing for missing values */ -static is_missing_func value_is_missing; +static is_missing_func *value_is_missing; /* Variable for the GROUPS subcommand, if given. */ static struct variable *indep_var; @@ -330,9 +330,9 @@ cmd_t_test(void) /* If /MISSING=INCLUDE is set, then user missing values are ignored */ if (cmd.incl == TTS_INCLUDE ) - value_is_missing = is_system_missing; + value_is_missing = mv_is_value_system_missing; else - value_is_missing = is_missing; + value_is_missing = mv_is_value_missing; bad_weight_warn = 1; @@ -1418,7 +1418,7 @@ common_calc (const struct ccase *c, void *_cmd) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1429,7 +1429,7 @@ common_calc (const struct ccase *c, void *_cmd) if ( cmd->sbc_groups ) { const union value *gv = case_data (c, indep_var->fv); - if ( value_is_missing(gv,indep_var) ) + if ( value_is_missing(&indep_var->miss, gv) ) { return 0; } @@ -1444,7 +1444,7 @@ common_calc (const struct ccase *c, void *_cmd) gs= &group_proc_get (cmd->v_variables[i])->ugs; - if (! value_is_missing(val,v) ) + if (! value_is_missing(&v->miss, val) ) { gs->n+=weight; gs->sum+=weight * val->f; @@ -1517,7 +1517,7 @@ one_sample_calc (const struct ccase *c, void *cmd_) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1532,7 +1532,7 @@ one_sample_calc (const struct ccase *c, void *cmd_) gs= &group_proc_get (cmd->v_variables[i])->ugs; - if ( ! value_is_missing(val,v)) + if ( ! value_is_missing(&v->miss, val)) gs->sum_diff += weight * (val->f - cmd->n_testval[0]); } @@ -1611,8 +1611,8 @@ paired_calc (const struct ccase *c, void *cmd_) const union value *val0 = case_data (c, v0->fv); const union value *val1 = case_data (c, v1->fv); - if ( value_is_missing(val0,v0) || - value_is_missing(val1,v1) ) + if ( value_is_missing(&v0->miss, val0) || + value_is_missing(&v1->miss, val1) ) { return 0; } @@ -1627,7 +1627,8 @@ paired_calc (const struct ccase *c, void *cmd_) const union value *val0 = case_data (c, v0->fv); const union value *val1 = case_data (c, v1->fv); - if ( ( !value_is_missing(val0,v0) && !value_is_missing(val1,v1) ) ) + if ( ( !value_is_missing(&v0->miss, val0) + && !value_is_missing(&v1->miss, val1) ) ) { pairs[i].n += weight; pairs[i].sum[0] += weight * val0->f; @@ -1744,7 +1745,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) const double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); - if ( value_is_missing(gv,indep_var) ) + if ( value_is_missing(&indep_var->miss, gv) ) { return 0; } @@ -1756,7 +1757,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1779,7 +1780,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) if ( ! gs ) return 0; - if ( !value_is_missing(val,var) ) + if ( !value_is_missing(&var->miss, val) ) { gs->n+=weight; gs->sum+=weight * val->f; diff --git a/src/val.h b/src/val.h index 96523de2..57aaa2af 100644 --- a/src/val.h +++ b/src/val.h @@ -21,7 +21,7 @@ #define val_h 1 #include -#include "config.h" +#include "magic.h" /* Values. */ diff --git a/src/var.h b/src/var.h index 3bfc43df..b4d50182 100644 --- a/src/var.h +++ b/src/var.h @@ -25,10 +25,9 @@ #include "config.h" #include #include "format.h" +#include "missing-values.h" #include "val.h" - - /* Script variables. */ /* Variable type. */ @@ -39,27 +38,6 @@ enum (STRING is pre-empted by lexer.h.) */ }; -/* Types of missing values. Order is significant, see - mis-val.c:parse_numeric(), sfm-read.c, sfm-write.c, - sysfile-info.c:cmd_sysfile_info(), mis-val.c:copy_missing_values(), - pfm-read.c:read_variables(), pfm-write.c:write_variables(), - apply-dict.c:cmd_apply_dictionary(), and more (?). */ -enum - { - MISSING_NONE, /* No user-missing values. */ - MISSING_1, /* One user-missing value. */ - MISSING_2, /* Two user-missing values. */ - MISSING_3, /* Three user-missing values. */ - MISSING_RANGE, /* [a,b]. */ - MISSING_LOW, /* (-inf,a]. */ - MISSING_HIGH, /* (a,+inf]. */ - MISSING_RANGE_1, /* [a,b], c. */ - MISSING_LOW_1, /* (-inf,a], b. */ - MISSING_HIGH_1, /* (a,+inf), b. */ - MISSING_COUNT - }; - - /* A variable's dictionary entry. */ struct variable { @@ -75,8 +53,7 @@ struct variable int index; /* Dictionary index. */ /* Missing values. */ - int miss_type; /* One of the MISSING_* constants. */ - union value missing[3]; /* User-missing value. */ + struct missing_values miss; /* Missing values. */ /* Display formats. */ struct fmt_spec print; /* Default format for PRINT. */ @@ -178,18 +155,8 @@ extern int FILTER_before_TEMPORARY; void cancel_temporary (void); -/* Functions. */ - struct ccase; void dump_split_vars (const struct ccase *); -typedef int (* is_missing_func )(const union value *, const struct variable *); - -int is_num_user_missing (double, const struct variable *); -int is_str_user_missing (const unsigned char[], const struct variable *); -int is_missing (const union value *, const struct variable *); -int is_system_missing (const union value *, const struct variable *); -int is_user_missing (const union value *, const struct variable *); -void copy_missing_values (struct variable *dest, const struct variable *src); /* Transformations. */ diff --git a/src/vars-atr.c b/src/vars-atr.c index a854033a..5e34cb59 100644 --- a/src/vars-atr.c +++ b/src/vars-atr.c @@ -141,109 +141,6 @@ discard_variables (void) pgm_state = STATE_INIT; } - -/* Return nonzero only if X is a user-missing value for numeric - variable V. */ -inline int -is_num_user_missing (double x, const struct variable *v) -{ - switch (v->miss_type) - { - case MISSING_NONE: - return 0; - case MISSING_1: - return x == v->missing[0].f; - case MISSING_2: - return x == v->missing[0].f || x == v->missing[1].f; - case MISSING_3: - return (x == v->missing[0].f || x == v->missing[1].f - || x == v->missing[2].f); - case MISSING_RANGE: - return x >= v->missing[0].f && x <= v->missing[1].f; - case MISSING_LOW: - return x <= v->missing[0].f; - case MISSING_HIGH: - return x >= v->missing[0].f; - case MISSING_RANGE_1: - return ((x >= v->missing[0].f && x <= v->missing[1].f) - || x == v->missing[2].f); - case MISSING_LOW_1: - return x <= v->missing[0].f || x == v->missing[1].f; - case MISSING_HIGH_1: - return x >= v->missing[0].f || x == v->missing[1].f; - default: - assert (0); - } - abort (); -} - -/* Return nonzero only if string S is a user-missing variable for - string variable V. */ -inline int -is_str_user_missing (const unsigned char s[], const struct variable *v) -{ - /* FIXME: should these be memcmp()? */ - switch (v->miss_type) - { - case MISSING_NONE: - return 0; - case MISSING_1: - return !strncmp (s, v->missing[0].s, v->width); - case MISSING_2: - return (!strncmp (s, v->missing[0].s, v->width) - || !strncmp (s, v->missing[1].s, v->width)); - case MISSING_3: - return (!strncmp (s, v->missing[0].s, v->width) - || !strncmp (s, v->missing[1].s, v->width) - || !strncmp (s, v->missing[2].s, v->width)); - default: - assert (0); - } - abort (); -} - -/* Return nonzero only if value VAL is system-missing for variable - V. */ -int -is_system_missing (const union value *val, const struct variable *v) -{ - return v->type == NUMERIC && val->f == SYSMIS; -} - -/* Return nonzero only if value VAL is system- or user-missing for - variable V. */ -int -is_missing (const union value *val, const struct variable *v) -{ - switch (v->type) - { - case NUMERIC: - if (val->f == SYSMIS) - return 1; - return is_num_user_missing (val->f, v); - case ALPHA: - return is_str_user_missing (val->s, v); - default: - assert (0); - } - abort (); -} - -/* Return nonzero only if value VAL is user-missing for variable V. */ -int -is_user_missing (const union value *val, const struct variable *v) -{ - switch (v->type) - { - case NUMERIC: - return is_num_user_missing (val->f, v); - case ALPHA: - return is_str_user_missing (val->s, v); - default: - assert (0); - } - abort (); -} /* Returns true if NAME is an acceptable name for a variable, false otherwise. If ISSUE_ERROR is true, issues an diff --git a/src/vfm.c b/src/vfm.c index 0414234c..6d7e526b 100644 --- a/src/vfm.c +++ b/src/vfm.c @@ -336,7 +336,7 @@ filter_case (const struct ccase *c, int case_idx) if (filter_var != NULL) { double f = case_num (c, filter_var->fv); - if (f == 0.0 || f == SYSMIS || is_num_user_missing (f, filter_var)) + if (f == 0.0 || mv_is_num_missing (&filter_var->miss, f)) return 1; } diff --git a/tests/ChangeLog b/tests/ChangeLog index 6dc05961..f5151193 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +Sat Aug 6 17:32:39 2005 Ben Pfaff + + * command/missing-values.sh: New test. + + * Makefile.am: Add new test. + Mon Aug 1 21:51:46 2005 Ben Pfaff * bugs/big-input-2.sh: Don't use 1...100000 (etc.) with Perl diff --git a/tests/Makefile.am b/tests/Makefile.am index 3a4172dd..21b70dda 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -22,6 +22,7 @@ TESTS = \ command/loop.sh \ command/longvars.sh \ command/match-files.sh \ + command/missing-values.sh \ command/no_case_size.sh \ command/oneway.sh \ command/oneway-missing.sh \ diff --git a/tests/command/missing-values.sh b/tests/command/missing-values.sh new file mode 100755 index 00000000..4d530989 --- /dev/null +++ b/tests/command/missing-values.sh @@ -0,0 +1,125 @@ +#!/bin/sh + +# This program tests MISSING VALUES + +TEMPDIR=/tmp/pspp-tst-$$ +TESTFILE=$TEMPDIR/`basename $0`.sps + +here=`pwd`; + +# ensure that top_srcdir is absolute +cd $top_srcdir; top_srcdir=`pwd` + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + + +cleanup() +{ + cd / + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +# Copy this file --- it's shared with another test +activity="create data" +cp $top_srcdir/tests/data-list.data $TEMPDIR +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="create program" +cat > $TEMPDIR/missing-values.stat << foobar +DATA LIST NOTABLE/str1 1-5 (A) str2 6-8 (A) date1 9-19 (DATE) num1 20-25. + +/* Valid: numeric missing values. +MISSING VALUES date1 num1 (1). +MISSING VALUES date1 num1 (1, 2). +MISSING VALUES date1 num1 (1, 2, 3). + +/* Valid: numeric missing values using the first variable's format. +MISSING VALUES num1 date1 ('1'). +MISSING VALUES num1 date1 ('1', '2'). +MISSING VALUES num1 date1 ('1', '2', '3'). +MISSING VALUES date1 num1 ('06-AUG-05'). +MISSING VALUES date1 num1 ('06-AUG-05', '01-OCT-78'). +MISSING VALUES date1 num1 ('06-AUG-05', '01-OCT-78', '14-FEB-81'). + +/* Valid: ranges of numeric missing values. +MISSING VALUES num1 (1 THRU 2). +MISSING VALUES num1 (LO THRU 2). +MISSING VALUES num1 (LOWEST THRU 2). +MISSING VALUES num1 (1 THRU HI). +MISSING VALUES num1 (1 THRU HIGHEST). + +/* Valid: a range of numeric missing values, plus an individual value. +MISSING VALUES num1 (1 THRU 2, 3). +MISSING VALUES num1 (LO THRU 2, 3). +MISSING VALUES num1 (LOWEST THRU 2, 3). +MISSING VALUES num1 (1 THRU HI, -1). +MISSING VALUES num1 (1 THRU HIGHEST, -1). + +/* Valid: string missing values. +MISSING VALUES str1 str2 ('abc ','def'). + +/* Invalid: too long for str2. +MISSING VALUES str1 str2 ('abcde'). + +/* Invalid: no string ranges. +MISSING VALUES str1 ('a' THRU 'z'). + +/* Invalid: mixing string and numeric variables. +MISSING VALUES str1 num1 ('123'). + +/* Valid: may mix variable types when clearing missing values. +MISSING VALUES ALL (). + +foobar +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="run program" +$SUPERVISOR $here/../src/pspp --testing-mode -o raw-ascii --testing-mode $TEMPDIR/missing-values.stat > $TEMPDIR/errs +# Note vv --- there are errors in input. Therefore, the command must FAIL +if [ $? -eq 0 ] ; then fail ; fi + +activity="compare error messages" +diff -w $TEMPDIR/errs - <