From: Ben Pfaff Date: Sat, 12 Mar 2005 01:08:33 +0000 (+0000) Subject: Rewrite portable file reader code and incidentally clean up code for X-Git-Tag: v0.4.0~144 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=25fae0555073f526e5d5825133a2f62454a7b4c6;p=pspp-builds.git Rewrite portable file reader code and incidentally clean up code for variable formats. --- diff --git a/src/ChangeLog b/src/ChangeLog index a0eb80bb..2ddb184f 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,41 @@ +Fri Mar 11 11:50:30 2005 Ben Pfaff + + * format.c: (check_common_specifier) New function for checks + common to check_input_specifier() and check_output_specifier(). + (check_input_specifier) Use check_common_specifier(). + (check_output_specifier) Use check_common_specifier(). + (check_string_specifier) Removed. + (check_specifier_type) New function. + (check_specifier_width) New function. + (get_format_var_width) Fix bug. + + * formats.c: (internal_cmd_formats) Only accept numeric variables. + + * lexer.c: (check_id) Rename lex_id_to_token(), make public, + update all references. Make case-insensitive. + + * pfm-read.c: Essentially rewrite the whole file. Now much + cleaner. + + * print.c: (check_string_width) New function. + (fixed_parse_compatible) Use check_string_width(), + check_specifier_type(). + (dump_fmt_list) Ditto. + + * str.c: (st_trim_copy) New function. + (st_uppercase) New function. + + * vars-atr.c: (var_is_valid_name) New function. + + * expressions/parse.c: (type_coercion_core) Use + check_specifier_type(). + +Fri Mar 11 11:31:24 2005 Ben Pfaff + + * expressions/evaluate.c: (cmd_debug_evaluate) Fix memory leaks. + + * expressions/parse.c: (no_match) Ditto. + Wed Mar 9 09:54:27 2005 Ben Pfaff * Makefile.am: (pspp_LDADD) Add libgsl-extras.a. diff --git a/src/expressions/evaluate.c b/src/expressions/evaluate.c index 7f066a50..5084ba6a 100644 --- a/src/expressions/evaluate.c +++ b/src/expressions/evaluate.c @@ -248,8 +248,12 @@ cmd_debug_evaluate (void) retval = CMD_SUCCESS; done: - if (c != NULL) - case_destroy (c); + if (c != NULL) + { + case_destroy (c); + free (c); + } + dict_destroy (d); return retval; } diff --git a/src/expressions/parse.c b/src/expressions/parse.c index be0b9d79..5277648c 100644 --- a/src/expressions/parse.c +++ b/src/expressions/parse.c @@ -336,7 +336,8 @@ type_coercion_core (struct expression *e, case OP_ni_format: if ((*node)->type == OP_format - && check_input_specifier (&(*node)->format.f, 0)) + && check_input_specifier (&(*node)->format.f, false) + && check_specifier_type (&(*node)->format.f, NUMERIC, false)) { if (do_coercion) (*node)->type = OP_ni_format; @@ -346,7 +347,8 @@ type_coercion_core (struct expression *e, case OP_no_format: if ((*node)->type == OP_format - && check_output_specifier (&(*node)->format.f, 0)) + && check_output_specifier (&(*node)->format.f, false) + && check_specifier_type (&(*node)->format.f, NUMERIC, false)) { if (do_coercion) (*node)->type = OP_no_format; @@ -1098,14 +1100,12 @@ no_match (const char *func_name, } else { - ds_create (&s, _("Function invocation ")); + ds_puts (&s, _("Function invocation ")); put_invocation (&s, func_name, args, arg_cnt); ds_puts (&s, _(" does not match any known function. Candidates are:")); for (f = first; f < last; f++) - { - ds_printf (&s, "\n%s", f->prototype); - } + ds_printf (&s, "\n%s", f->prototype); } ds_putc (&s, '.'); diff --git a/src/format.c b/src/format.c index d59ae6fe..2adee205 100644 --- a/src/format.c +++ b/src/format.c @@ -26,6 +26,7 @@ #include "lexer.h" #include "misc.h" #include "str.h" +#include "var.h" #define DEFFMT(LABEL, NAME, N_ARGS, IMIN_W, IMAX_W, OMIN_W, OMAX_W, CAT, \ OUTPUT, SPSS_FMT) \ @@ -111,17 +112,46 @@ fmt_to_string (const struct fmt_spec *f) return buf; } +/* Does checks in common betwen check_input_specifier() and + check_output_specifier() and returns true if so. Otherwise, + emits an error message (if EMIT_ERROR is nonzero) and returns + false. */ +static bool +check_common_specifier (const struct fmt_spec *spec, bool emit_error) +{ + struct fmt_desc *f = &formats[spec->type]; + char *str = fmt_to_string (spec); + + if ((f->cat & FCAT_EVEN_WIDTH) && spec->w % 2) + { + if (emit_error) + msg (SE, _("Format %s specifies an odd width %d, but " + "format %s requires an even width."), + str, spec->w, f->name, f->Imin_w, f->Imax_w); + return false; + } + if (f->n_args > 1 && (spec->d < 0 || spec->d > 16)) + { + if (emit_error) + msg (SE, _("Format %s specifies a bad number of " + "implied decimal places %d. Input format %s allows " + "up to 16 implied decimal places."), str, spec->d, f->name); + return false; + } + return true; +} + /* Checks whether SPEC is valid as an input format and returns nonzero if so. Otherwise, emits an error message (if EMIT_ERROR is nonzero) and returns zero. */ int check_input_specifier (const struct fmt_spec *spec, int emit_error) { - struct fmt_desc *f; - char *str; + struct fmt_desc *f = &formats[spec->type]; + char *str = fmt_to_string (spec); - f = &formats[spec->type]; - str = fmt_to_string (spec); + if (!check_common_specifier (spec, emit_error)) + return false; if (spec->type == FMT_X) return 1; if (f->cat & FCAT_OUTPUT_ONLY) @@ -138,22 +168,6 @@ check_input_specifier (const struct fmt_spec *spec, int emit_error) str, spec->w, f->name, f->Imin_w, f->Imax_w); return 0; } - if ((f->cat & FCAT_EVEN_WIDTH) && spec->w % 2) - { - if (emit_error) - msg (SE, _("Input format %s specifies an odd width %d, but " - "format %s requires an even width between %d and " - "%d."), str, spec->w, f->name, f->Imin_w, f->Imax_w); - return 0; - } - if (f->n_args > 1 && (spec->d < 0 || spec->d > 16)) - { - if (emit_error) - msg (SE, _("Input format %s specifies a bad number of " - "implied decimal places %d. Input format %s allows " - "up to 16 implied decimal places."), str, spec->d, f->name); - return 0; - } return 1; } @@ -163,11 +177,11 @@ check_input_specifier (const struct fmt_spec *spec, int emit_error) int check_output_specifier (const struct fmt_spec *spec, int emit_error) { - struct fmt_desc *f; - char *str; + struct fmt_desc *f = &formats[spec->type]; + char *str = fmt_to_string (spec); - f = &formats[spec->type]; - str = fmt_to_string (spec); + if (!check_common_specifier (spec, emit_error)) + return false; if (spec->type == FMT_X) return 1; if (spec->w < f->Omin_w || spec->w > f->Omax_w) @@ -190,39 +204,48 @@ check_output_specifier (const struct fmt_spec *spec, int emit_error) f->Omin_w + 1 + spec->d, spec->d, str); return 0; } - if ((f->cat & FCAT_EVEN_WIDTH) && spec->w % 2) - { - if (emit_error) - msg (SE, _("Output format %s specifies an odd width %d, but " - "output format %s requires an even width between %d and " - "%d."), str, spec->w, f->name, f->Omin_w, f->Omax_w); - return 0; - } - if (f->n_args > 1 && (spec->d < 0 || spec->d > 16)) + return 1; +} + +/* Checks that FORMAT is appropriate for a variable of the given + TYPE and returns true if so. Otherwise returns false and (if + EMIT_ERROR is true) emits an error message. */ +bool +check_specifier_type (const struct fmt_spec *format, + int type, bool emit_error) +{ + const struct fmt_desc *f = &formats[format->type]; + assert (type == NUMERIC || type == ALPHA); + if ((type == ALPHA) != ((f->cat & FCAT_STRING) != 0)) { if (emit_error) - msg (SE, _("Output format %s specifies a bad number of " - "implied decimal places %d. Output format %s allows " - "a number of implied decimal places between 1 " - "and 16."), str, spec->d, f->name); - return 0; + msg (SE, _("%s variables are not compatible with %s format %s."), + type == ALPHA ? _("String") : _("Numeric"), + type == ALPHA ? _("numeric") : _("string"), + fmt_to_string (format)); + return false; } - return 1; + return true; } - -/* If a string variable has width W, you can't display it with a - format specifier with a required width MIN_LEN>W. */ -int -check_string_specifier (const struct fmt_spec *f, int min_len) + +/* Checks that FORMAT is appropriate for a variable of the given + WIDTH and returns true if so. Otherwise returns false and (if + EMIT_ERROR is true) emits an error message. */ +bool +check_specifier_width (const struct fmt_spec *format, + int width, bool emit_error) { - if ((f->type == FMT_A && min_len > f->w) - || (f->type == FMT_AHEX && min_len * 2 > f->w)) + if (!check_specifier_type (format, width != 0 ? ALPHA : NUMERIC, emit_error)) + return false; + if (get_format_var_width (format) != width) { - msg (SE, _("Can't display a string variable of width %d with " - "format specifier %s."), min_len, fmt_to_string (f)); - return 0; + if (emit_error) + msg (SE, _("String variable with width %d not compatible with " + "format %s."), + width, fmt_to_string (format)); + return false; } - return 1; + return true; } /* Converts input format specifier INPUT into output format @@ -387,7 +410,7 @@ int get_format_var_width (const struct fmt_spec *spec) { if (spec->type == FMT_AHEX) - return spec->w * 2; + return spec->w / 2; else if (spec->type == FMT_A) return spec->w; else diff --git a/src/format.h b/src/format.h index c60e0456..470c45cb 100644 --- a/src/format.h +++ b/src/format.h @@ -21,6 +21,9 @@ #define format_h 1 /* Display format types. */ + +#include "bool.h" + /* See the definitions of these functions and variables when modifying this list: misc.c:convert_fmt_ItoO() @@ -87,7 +90,9 @@ int parse_format_specifier (struct fmt_spec *input, enum fmt_parse_flags); int parse_format_specifier_name (const char **cp, enum fmt_parse_flags); int check_input_specifier (const struct fmt_spec *spec, int emit_error); int check_output_specifier (const struct fmt_spec *spec, int emit_error); -int check_string_specifier (const struct fmt_spec *spec, int min_len); +bool check_specifier_type (const struct fmt_spec *, int type, bool emit_error); +bool check_specifier_width (const struct fmt_spec *, + int width, bool emit_error); void convert_fmt_ItoO (const struct fmt_spec *input, struct fmt_spec *output); int get_format_var_width (const struct fmt_spec *); int parse_string_as_format (const char *s, int len, const struct fmt_spec *fp, diff --git a/src/formats.c b/src/formats.c index 9ad6efc3..e6c85a24 100644 --- a/src/formats.c +++ b/src/formats.c @@ -77,7 +77,7 @@ internal_cmd_formats (int which) if (token == '.') break; - if (!parse_variables (default_dict, &v, &cv, PV_SAME_TYPE)) + if (!parse_variables (default_dict, &v, &cv, PV_NUMERIC)) return CMD_PART_SUCCESS_MAYBE; type = v[0]->type; @@ -86,31 +86,11 @@ internal_cmd_formats (int which) msg (SE, _("`(' expected after variable list")); goto fail; } - if (!parse_format_specifier (&f, 0) || !check_output_specifier (&f, 1)) + if (!parse_format_specifier (&f, 0) + || !check_output_specifier (&f, true) + || !check_specifier_type (&f, NUMERIC, true)) goto fail; - /* Catch type mismatch errors. */ - if ((type == ALPHA) ^ (0 != (formats[f.type].cat & FCAT_STRING))) - { - msg (SE, _("Format %s may not be assigned to a %s variable."), - fmt_to_string (&f), type == NUMERIC ? _("numeric") : _("string")); - goto fail; - } - - /* This is an additional check for string variables. We can't - let the user specify an A8 format for a string variable with - width 4. */ - if (type == ALPHA) - { - /* Shortest string so far. */ - int min_len = INT_MAX; - - for (i = 0; i < cv; i++) - min_len = min (min_len, v[i]->width); - if (!check_string_specifier (&f, min_len)) - goto fail; - } - if (!lex_match (')')) { msg (SE, _("`)' expected after output format.")); diff --git a/src/lexer.c b/src/lexer.c index f676375e..e4b6aaee 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -81,7 +81,6 @@ static struct string put_tokstr; static double put_tokval; static void unexpected_eof (void); -static inline int check_id (const char *id, size_t len); static void convert_numeric_string_to_char_string (int type); static int parse_string (int type); @@ -363,7 +362,7 @@ lex_get (void) strncpy (tokid, ds_c_str (&tokstr), 8); tokid[8] = 0; - token = check_id (ds_c_str (&tokstr), ds_length (&tokstr)); + token = lex_id_to_token (ds_c_str (&tokstr), ds_length (&tokstr)); break; default: @@ -636,6 +635,23 @@ lex_id_match (const char *kw, const char *tok) { return lex_id_match_len (kw, strlen (kw), tok, strlen (tok)); } + +/* Returns the proper token type, either T_ID or a reserved keyword + enum, for ID[], which must contain LEN characters. */ +int +lex_id_to_token (const char *id, size_t len) +{ + const char **kwp; + + if (len < 2 || len > 4) + return T_ID; + + for (kwp = keywords; *kwp; kwp++) + if (!strcasecmp (*kwp, id)) + return T_FIRST_KEYWORD + (kwp - keywords); + + return T_ID; +} /* Weird token functions. */ @@ -1002,23 +1018,6 @@ unexpected_eof (void) msg (FE, _("Unexpected end of file.")); } -/* Returns the proper token type, either T_ID or a reserved keyword - enum, for ID[], which must contain LEN characters. */ -static inline int -check_id (const char *id, size_t len) -{ - const char **kwp; - - if (len < 2 || len > 4) - return T_ID; - - for (kwp = keywords; *kwp; kwp++) - if (!strcmp (*kwp, id)) - return T_FIRST_KEYWORD + (kwp - keywords); - - return T_ID; -} - /* When invoked, tokstr contains a string of binary, octal, or hex digits, for values of TYPE of 0, 1, or 2, respectively. The string is converted to characters having the specified values. */ diff --git a/src/lexer.h b/src/lexer.h index 3250704c..95553f10 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -20,6 +20,7 @@ #if !lexer_h #define lexer_h 1 +#include #include "bool.h" /* Returns nonzero if character CH may be the first character in an @@ -111,6 +112,7 @@ int lex_force_string (void); int lex_id_match_len (const char *keyword_string, size_t keyword_len, const char *token_string, size_t token_len); int lex_id_match (const char *keyword_string, const char *token_string); +int lex_id_to_token (const char *id, size_t len); /* Weird token functions. */ int lex_look_ahead (void); diff --git a/src/pfm-read.c b/src/pfm-read.c index c3fbd90a..3e20f039 100644 --- a/src/pfm-read.c +++ b/src/pfm-read.c @@ -28,7 +28,9 @@ #include #include #include +#include #include "alloc.h" +#include "bool.h" #include "case.h" #include "dictionary.h" #include "file-handle.h" @@ -37,6 +39,7 @@ #include "hash.h" #include "magic.h" #include "misc.h" +#include "pool.h" #include "str.h" #include "value-labels.h" #include "var.h" @@ -46,156 +49,91 @@ /* Portable file reader. */ struct pfm_reader { - struct file_handle *fh; /* File handle. */ - FILE *file; /* File stream. */ + struct pool *pool; /* All the portable file state. */ - int weight_index; /* 0-based index of weight variable, or -1. */ + jmp_buf bail_out; /* longjmp() target for error handling. */ + struct file_handle *fh; /* File handle. */ + FILE *file; /* File stream. */ + char cc; /* Current character. */ unsigned char *trans; /* 256-byte character set translation table. */ int var_cnt; /* Number of variables. */ + int weight_index; /* 0-based index of weight variable, or -1. */ int *widths; /* Variable widths, 0 for numeric. */ int value_cnt; /* Number of `value's per case. */ - - unsigned char buf[83]; /* Input buffer. */ - unsigned char *bp; /* Buffer pointer. */ - int cc; /* Current character. */ }; -static int -corrupt_msg (struct pfm_reader *r, const char *format,...) +static void +error (struct pfm_reader *r, const char *msg,...) PRINTF_FORMAT (2, 3); -/* Displays a corruption error. */ -static int -corrupt_msg (struct pfm_reader *r, const char *format, ...) +/* Displays MSG as an error message and aborts reading the + portable file via longjmp(). */ +static void +error (struct pfm_reader *r, const char *msg, ...) { - char *title; struct error e; const char *filename; + char *title; va_list args; e.class = ME; getl_location (&e.where.filename, &e.where.line_number); filename = handle_get_filename (r->fh); - e.title = title = local_alloc (strlen (filename) + 80); - sprintf (title, _("portable file %s corrupt at offset %ld: "), - filename, ftell (r->file) - (82 - (long) (r->bp - r->buf))); + e.title = title = pool_alloc (r->pool, strlen (filename) + 80); + sprintf (e.title, _("portable file %s corrupt at offset %ld: "), + filename, ftell (r->file)); - va_start (args, format); - err_vmsg (&e, format, args); + va_start (args, msg); + err_vmsg (&e, msg, args); va_end (args); - local_free (title); - - return 0; + longjmp (r->bail_out, 1); } -static unsigned char * read_string (struct pfm_reader *r); - -/* Closes a portable file after we're done with it. */ +/* Closes portable file reader R, after we're done with it. */ void pfm_close_reader (struct pfm_reader *r) { - if (r == NULL) - return; - - read_string (NULL); - - if (r->fh != NULL) - fh_close (r->fh, "portable file", "rs"); - if (fclose (r->file) == EOF) - msg (ME, _("%s: Closing portable file: %s."), - handle_get_filename (r->fh), strerror (errno)); - free (r->trans); - free (r->widths); - free (r); + if (r != NULL) + pool_destroy (r->pool); } -/* Displays the message X with corrupt_msg, then jumps to the error - label. */ -#define lose(X) \ - do { \ - corrupt_msg X; \ - goto error; \ - } while (0) - -/* Read an 80-character line into handle H's buffer. Return - success. */ -static int -fill_buf (struct pfm_reader *r) +/* Read a single character into cur_char. */ +static void +advance (struct pfm_reader *r) { - if (80 != fread (r->buf, 1, 80, r->file)) - lose ((r, _("Unexpected end of file."))); + int c; - /* PORTME: line ends. */ - { - int c; - - c = getc (r->file); - if (c != '\n' && c != '\r') - lose ((r, _("Bad line end."))); - - c = getc (r->file); - if (c != '\n' && c != '\r') - ungetc (c, r->file); - } - - if (r->trans) - { - int i; - - for (i = 0; i < 80; i++) - r->buf[i] = r->trans[r->buf[i]]; - } - - r->bp = r->buf; - - return 1; - - error: - return 0; -} + while ((c = getc (r->file)) == '\r' || c == '\n') + continue; + if (c == EOF) + error (r, _("unexpected end of file")); -/* Read a single character into cur_char. Return success; */ -static int -read_char (struct pfm_reader *r) -{ - if (r->bp >= &r->buf[80] && !fill_buf (r)) - return 0; - r->cc = *r->bp++; - return 1; + if (r->trans != NULL) + c = r->trans[c]; + r->cc = c; } -/* Advance a single character. */ -#define advance() \ - do { \ - if (!read_char (r)) \ - goto error; \ - } while (0) - /* Skip a single character if present, and return whether it was skipped. */ -static inline int -skip_char (struct pfm_reader *r, int c) +static inline bool +match (struct pfm_reader *r, int c) { if (r->cc == c) { - advance (); - return 1; + advance (r); + return true; } - error: - return 0; + else + return false; } -/* Skip a single character if present, and return whether it was - skipped. */ -#define match(C) skip_char (r, C) - -static int read_header (struct pfm_reader *); -static int read_version_data (struct pfm_reader *, struct pfm_read_info *); -static int read_variables (struct pfm_reader *, struct dictionary *); -static int read_value_label (struct pfm_reader *, struct dictionary *); +static void read_header (struct pfm_reader *); +static void read_version_data (struct pfm_reader *, struct pfm_read_info *); +static void read_variables (struct pfm_reader *, struct dictionary *); +static void read_value_label (struct pfm_reader *, struct dictionary *); void dump_dictionary (struct dictionary *); /* Reads the dictionary from file with handle H, and returns it in a @@ -205,22 +143,26 @@ struct pfm_reader * pfm_open_reader (struct file_handle *fh, struct dictionary **dict, struct pfm_read_info *info) { - struct pfm_reader *r = NULL; + struct pool *volatile pool = NULL; + struct pfm_reader *volatile r = NULL; *dict = dict_create (); if (!fh_open (fh, "portable file", "rs")) goto error; /* Create and initialize reader. */ - r = xmalloc (sizeof *r); + pool = pool_create (); + r = pool_alloc (pool, sizeof *r); + r->pool = pool; + if (setjmp (r->bail_out)) + goto error; r->fh = fh; - r->file = fopen (handle_get_filename (r->fh), "rb"); + r->file = pool_fopen (r->pool, handle_get_filename (r->fh), "rb"); r->weight_index = -1; r->trans = NULL; r->var_cnt = 0; r->widths = NULL; r->value_cnt = 0; - r->bp = NULL; /* Check that file open succeeded, prime reading. */ if (r->file == NULL) @@ -231,24 +173,19 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict, err_cond_fail (); goto error; } - if (!fill_buf (r)) - goto error; - advance (); - + /* Read header, version, date info, product id, variables. */ - if (!read_header (r) - || !read_version_data (r, info) - || !read_variables (r, *dict)) - goto error; + read_header (r); + read_version_data (r, info); + read_variables (r, *dict); /* Read value labels. */ - while (match (77 /* D */)) - if (!read_value_label (r, *dict)) - goto error; + while (match (r, 'D')) + read_value_label (r, *dict); /* Check that we've made it to the data. */ - if (!match (79 /* F */)) - lose ((r, _("Data record expected."))); + if (!match (r, 'F')) + error (r, _("Data record expected.")); return r; @@ -259,34 +196,44 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict, return NULL; } -/* Read a floating point value and return its value, or - second_lowest_value on error. */ +/* Returns the value of base-30 digit C, + or -1 if C is not a base-30 digit. */ +static int +base_30_value (unsigned char c) +{ + static const char base_30_digits[] = "0123456789ABCDEFGHIJKLMNOPQRST"; + const char *p = strchr (base_30_digits, c); + return p != NULL ? p - base_30_digits : -1; +} + +/* Read a floating point value and return its value. */ static double read_float (struct pfm_reader *r) { double num = 0.; - int got_dot = 0; - int got_digit = 0; int exponent = 0; - int neg = 0; + bool got_dot = false; /* Seen a decimal point? */ + bool got_digit = false; /* Seen any digits? */ + bool negative = false; /* Number is negative? */ /* Skip leading spaces. */ - while (match (126 /* space */)) - ; + while (match (r, ' ')) + continue; - if (match (137 /* * */)) + /* `*' indicates system-missing. */ + if (match (r, '*')) { - advance (); /* Probably a dot (.) but doesn't appear to matter. */ + advance (r); /* Probably a dot (.) but doesn't appear to matter. */ return SYSMIS; } - else if (match (141 /* - */)) - neg = 1; + negative = match (r, '-'); for (;;) { - if (r->cc >= 64 /* 0 */ && r->cc <= 93 /* T */) + int digit = base_30_value (r->cc); + if (digit != -1) { - got_digit++; + got_digit = true; /* Make sure that multiplication by 30 will not overflow. */ if (num > DBL_MAX * (1. / 30.)) @@ -299,474 +246,312 @@ read_float (struct pfm_reader *r) digit so that we can multiply by 10 later. */ ++exponent; else - num = (num * 30.0) + (r->cc - 64); + num = (num * 30.0) + digit; /* Keep track of the number of digits after the decimal point. If we just divided by 30 here, we would lose precision. */ if (got_dot) --exponent; } - else if (!got_dot && r->cc == 127 /* . */) + else if (!got_dot && r->cc == '.') /* Record that we have found the decimal point. */ got_dot = 1; else /* Any other character terminates the number. */ break; - advance (); + advance (r); } + /* Check that we had some digits. */ if (!got_digit) - lose ((r, "Number expected.")); - - if (r->cc == 130 /* + */ || r->cc == 141 /* - */) + error (r, "Number expected."); + + /* Get exponent if any. */ + if (r->cc == '+' || r->cc == '-') { - /* Get the exponent. */ long int exp = 0; - int neg_exp = r->cc == 141 /* - */; + bool negative_exponent = r->cc == '-'; + int digit; - for (;;) + for (advance (r); (digit = base_30_value (r->cc)) != -1; advance (r)) { - advance (); - - if (r->cc < 64 /* 0 */ || r->cc > 93 /* T */) - break; - if (exp > LONG_MAX / 30) - goto overflow; - exp = exp * 30 + (r->cc - 64); + { + exp = LONG_MAX; + break; + } + exp = exp * 30 + digit; } /* We don't check whether there were actually any digits, but we probably should. */ - if (neg_exp) + if (negative_exponent) exp = -exp; exponent += exp; } - - if (!match (142 /* / */)) - lose ((r, _("Missing numeric terminator."))); - /* Multiply NUM by 30 to the EXPONENT power, checking for overflow. */ + /* Numbers must end with `/'. */ + if (!match (r, '/')) + error (r, _("Missing numeric terminator.")); + /* Multiply `num' by 30 to the `exponent' power, checking for + overflow. */ if (exponent < 0) num *= pow (30.0, (double) exponent); else if (exponent > 0) { if (num > DBL_MAX * pow (30.0, (double) -exponent)) - goto overflow; - num *= pow (30.0, (double) exponent); + num = DBL_MAX; + else + num *= pow (30.0, (double) exponent); } - if (neg) - return -num; - else - return num; - - overflow: - if (neg) - return -DBL_MAX / 10.; - else - return DBL_MAX / 10; - - error: - return second_lowest_value; + return negative ? -num : num; } -/* Read an integer and return its value, or NOT_INT on failure. */ +/* Read an integer and return its value. */ static int read_int (struct pfm_reader *r) { double f = read_float (r); - - if (f == second_lowest_value) - goto error; if (floor (f) != f || f >= INT_MAX || f <= INT_MIN) - lose ((r, _("Bad integer format."))); + error (r, _("Invalid integer.")); return f; - - error: - return NOT_INT; } -/* Reads a string and returns its value in a static buffer, or NULL on - failure. The buffer can be deallocated by calling with a NULL - argument. */ -static unsigned char * -read_string (struct pfm_reader *r) +/* Reads a string into BUF, which must have room for 256 + characters. */ +static void +read_string (struct pfm_reader *r, char *buf) { - static char *buf; - int n; + int n = read_int (r); + if (n < 0 || n > 255) + error (r, _("Bad string length %d."), n); - if (r == NULL) + while (n-- > 0) { - free (buf); - buf = NULL; - return NULL; + *buf++ = r->cc; + advance (r); } - else if (buf == NULL) - buf = xmalloc (256); - - n = read_int (r); - if (n == NOT_INT) - return NULL; - if (n < 0 || n > 255) - lose ((r, _("Bad string length %d."), n)); - - { - int i; - - for (i = 0; i < n; i++) - { - buf[i] = r->cc; - advance (); - } - } - - buf[n] = 0; - return buf; + *buf = '\0'; +} - error: - return NULL; +/* Reads a string and returns a copy of it allocated from R's + pool. */ +static unsigned char * +read_pool_string (struct pfm_reader *r) +{ + char string[256]; + read_string (r, string); + return pool_strdup (r->pool, string); } /* Reads the 464-byte file header. */ -int +static void read_header (struct pfm_reader *r) { - /* For now at least, just ignore the vanity splash strings. */ - { - int i; + /* portable_to_local[PORTABLE] translates the given portable + character into the local character set. */ + static const unsigned char portable_to_local[256] = + { + " " + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ." + "<(+|&[]!$*);^-/|,%_>?`:$@'=\" ~- 0123456789 -() {}\\ " + " " + }; - for (i = 0; i < 200; i++) - advance (); - } - - { - unsigned char src[256]; - int trans_temp[256]; - int i; - - for (i = 0; i < 256; i++) - { - src[i] = (unsigned char) r->cc; - advance (); - } - - for (i = 0; i < 256; i++) - trans_temp[i] = -1; - - /* 0 is used to mark untranslatable characters, so we have to mark - it specially. */ - trans_temp[src[64]] = 64; - for (i = 0; i < 256; i++) - if (trans_temp[src[i]] == -1) - trans_temp[src[i]] = i; - - r->trans = xmalloc (256); - for (i = 0; i < 256; i++) - r->trans[i] = trans_temp[i] == -1 ? 0 : trans_temp[i]; - - /* Translate the input buffer. */ - for (i = 0; i < 80; i++) - r->buf[i] = r->trans[r->buf[i]]; - r->cc = r->trans[r->cc]; - } + unsigned char *trans; + int i; + + /* Read and ignore vanity splash strings. */ + for (i = 0; i < 200; i++) + advance (r); - { - unsigned char sig[8] = {92, 89, 92, 92, 89, 88, 91, 93}; - int i; + /* Skip the first 64 characters of the translation table. + We don't care about these. They are probably all set to + '0', marking them as untranslatable, and that would screw + up our actual translation of the real '0'. */ + for (i = 0; i < 64; i++) + advance (r); + + /* Read the rest of the translation table. */ + trans = pool_malloc (r->pool, 256); + memset (trans, 0, 256); + for (; i < 256; i++) + { + unsigned char c; - for (i = 0; i < 8; i++) - if (!match (sig[i])) - lose ((r, "Missing SPSSPORT signature.")); - } + advance (r); - return 1; + c = r->cc; + if (trans[c] == 0) + trans[c] = portable_to_local[i]; + } - error: - return 0; + /* Set up the translation table, then read the first + translated character. */ + r->trans = trans; + advance (r); + + /* Skip and verify signature. */ + for (i = 0; i < 8; i++) + if (!match (r, "SPSSPORT"[i])) + error (r, _("Missing SPSSPORT signature.")); } /* Reads the version and date info record, as well as product and subproduct identification records if present. */ -int +static void read_version_data (struct pfm_reader *r, struct pfm_read_info *info) { - /* Version. */ - if (!match (74 /* A */)) - lose ((r, "Unrecognized version code %d.", r->cc)); + char *date, *time, *product, *subproduct; + int i; - /* Date. */ - { - static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1}; - char *date = read_string (r); - int i; - - if (!date) - return 0; - if (strlen (date) != 8) - lose ((r, _("Bad date string length %d."), strlen (date))); - for (i = 0; i < 8; i++) - { - if (date[i] < 64 /* 0 */ || date[i] > 73 /* 9 */) - lose ((r, _("Bad character in date."))); - if (info) - info->creation_date[map[i]] = date[i] - 64 /* 0 */ + '0'; - } - if (info) - { - info->creation_date[2] = info->creation_date[5] = ' '; - info->creation_date[10] = 0; - } - } - - /* Time. */ - { - static const int map[] = {0, 1, 3, 4, 6, 7}; - char *time = read_string (r); - int i; - - if (!time) - return 0; - if (strlen (time) != 6) - lose ((r, _("Bad time string length %d."), strlen (time))); - for (i = 0; i < 6; i++) - { - if (time[i] < 64 /* 0 */ || time[i] > 73 /* 9 */) - lose ((r, _("Bad character in time."))); - if (info) - info->creation_time[map[i]] = time[i] - 64 /* 0 */ + '0'; - } - if (info) - { - info->creation_time[2] = info->creation_time[5] = ' '; - info->creation_time[8] = 0; - } - } - - /* Product. */ - if (match (65 /* 1 */)) + /* Read file. */ + if (!match (r, 'A')) + error (r, "Unrecognized version code `%c'.", r->cc); + date = read_pool_string (r); + time = read_pool_string (r); + product = match (r, '1') ? read_pool_string (r) : (unsigned char *) ""; + subproduct + = match (r, '3') ? read_pool_string (r) : (unsigned char *) ""; + + /* Validate file. */ + if (strlen (date) != 8) + error (r, _("Bad date string length %d."), strlen (date)); + if (strlen (time) != 6) + error (r, _("Bad time string length %d."), strlen (time)); + + /* Save file info. */ + if (info != NULL) { - char *product; - - product = read_string (r); - if (product == NULL) - return 0; - if (info) - strncpy (info->product, product, 61); - } - else if (info) - info->product[0] = 0; + /* Date. */ + for (i = 0; i < 8; i++) + { + static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1}; + info->creation_date[map[i]] = date[i]; + } + info->creation_date[2] = info->creation_date[5] = ' '; + info->creation_date[10] = 0; - /* Subproduct. */ - if (match (67 /* 3 */)) - { - char *subproduct; + /* Time. */ + for (i = 0; i < 6; i++) + { + static const int map[] = {0, 1, 3, 4, 6, 7}; + info->creation_time[map[i]] = time[i]; + } + info->creation_time[2] = info->creation_time[5] = ' '; + info->creation_time[8] = 0; - subproduct = read_string (r); - if (subproduct == NULL) - return 0; - if (info) - strncpy (info->subproduct, subproduct, 61); + /* Product. */ + st_trim_copy (info->product, product, sizeof info->product); + st_trim_copy (info->subproduct, subproduct, sizeof info->subproduct); } - else if (info) - info->subproduct[0] = 0; - return 1; - - error: - return 0; } -static int -convert_format (struct pfm_reader *r, int fmt[3], struct fmt_spec *v, - struct variable *vv) -{ - v->type = translate_fmt (fmt[0]); - if (v->type == -1) - lose ((r, _("%s: Bad format specifier byte (%d)."), vv->name, fmt[0])); - v->w = fmt[1]; - v->d = fmt[2]; - - /* FIXME? Should verify the resulting specifier more thoroughly. */ - - if (v->type == -1) - lose ((r, _("%s: Bad format specifier byte (%d)."), vv->name, fmt[0])); - if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0)) - lose ((r, _("%s variable %s has %s format specifier %s."), - vv->type == ALPHA ? _("String") : _("Numeric"), - vv->name, - formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"), - formats[v->type].name)); - return 1; - - error: - return 0; -} - -/* Translation table from SPSS character code to this computer's - native character code (which is probably ASCII). */ -static const unsigned char spss2ascii[256] = - { - " " - "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ." - "<(+|&[]!$*);^-/|,%_>?`:$@'=\" ~- 0123456789 -() {}\\ " - " " - }; - -/* Translate string S into ASCII. */ +/* Translates a format specification read from portable file R as + the three integers INTS into a normal format specifier FORMAT, + checking that the format is appropriate for variable V. */ static void -asciify (char *s) +convert_format (struct pfm_reader *r, const int portable_format[3], + struct fmt_spec *format, struct variable *v) { - for (; *s; s++) - *s = spss2ascii[(unsigned char) *s]; + format->type = translate_fmt (portable_format[0]); + if (format->type == -1) + error (r, _("%s: Bad format specifier byte (%d)."), + v->name, portable_format[0]); + format->w = portable_format[1]; + format->d = portable_format[2]; + + if (!check_output_specifier (format, false) + || !check_specifier_width (format, v->width, false)) + error (r, _("%s variable %s has invalid format specifier %s."), + v->type == NUMERIC ? _("Numeric") : _("String"), + v->name, fmt_to_string (format)); } -static int parse_value (struct pfm_reader *, union value *, struct variable *); +static union value parse_value (struct pfm_reader *, struct variable *); /* Read information on all the variables. */ -static int +static void read_variables (struct pfm_reader *r, struct dictionary *dict) { char *weight_name = NULL; int i; - if (!match (68 /* 4 */)) - lose ((r, _("Expected variable count record."))); + if (!match (r, '4')) + error (r, _("Expected variable count record.")); r->var_cnt = read_int (r); if (r->var_cnt <= 0 || r->var_cnt == NOT_INT) - lose ((r, _("Invalid number of variables %d."), r->var_cnt)); - r->widths = xmalloc (sizeof *r->widths * r->var_cnt); + error (r, _("Invalid number of variables %d."), r->var_cnt); + r->widths = pool_alloc (r->pool, sizeof *r->widths * r->var_cnt); /* Purpose of this value is unknown. It is typically 161. */ - { - int x = read_int (r); - - if (x == NOT_INT) - goto error; - if (x != 161) - corrupt_msg (r, _("Unexpected flag value %d."), x); - } + read_int (r); - if (match (70 /* 6 */)) + if (match (r, '6')) { - weight_name = read_string (r); - if (!weight_name) - goto error; - - asciify (weight_name); + weight_name = read_pool_string (r); if (strlen (weight_name) > 8) - { - corrupt_msg (r, _("Weight variable name (%s) truncated."), - weight_name); - weight_name[8] = '\0'; - } + error (r, _("Weight variable name (%s) truncated."), weight_name); } for (i = 0; i < r->var_cnt; i++) { int width; - unsigned char *name; + char name[256]; int fmt[6]; struct variable *v; int j; - if (!match (71 /* 7 */)) - lose ((r, _("Expected variable record."))); + if (!match (r, '7')) + error (r, _("Expected variable record.")); width = read_int (r); - if (width == NOT_INT) - goto error; if (width < 0) - lose ((r, _("Invalid variable width %d."), width)); + error (r, _("Invalid variable width %d."), width); r->widths[i] = width; - - name = read_string (r); - if (name == NULL) - goto error; - for (j = 0; j < 6; j++) - { - fmt[j] = read_int (r); - if (fmt[j] == NOT_INT) - goto error; - } - - /* Verify first character of variable name. - - Weirdly enough, there is no # character in the SPSS portable - character set, so we can't check for it. */ - if (strlen (name) > 8) - lose ((r, _("position %d: Variable name has %u characters."), - i, strlen (name))); - if ((name[0] < 74 /* A */ || name[0] > 125 /* Z */) - && name[0] != 152 /* @ */) - lose ((r, _("position %d: Variable name begins with invalid " - "character."), i)); - if (name[0] >= 100 /* a */ && name[0] <= 125 /* z */) - { - corrupt_msg (r, _("position %d: Variable name begins with " - "lowercase letter %c."), - i, name[0] - 100 + 'a'); - name[0] -= 26 /* a - A */; - } - /* Verify remaining characters of variable name. */ - for (j = 1; j < (int) strlen (name); j++) - { - int c = name[j]; + read_string (r, name); + for (j = 0; j < 6; j++) + fmt[j] = read_int (r); - if (c >= 100 /* a */ && c <= 125 /* z */) - { - corrupt_msg (r, _("position %d: Variable name character %d " - "is lowercase letter %c."), - i, j + 1, c - 100 + 'a'); - name[j] -= 26 /* z - Z */; - } - else if ((c >= 64 /* 0 */ && c <= 99 /* Z */) - || c == 127 /* . */ || c == 152 /* @ */ - || c == 136 /* $ */ || c == 146 /* _ */) - name[j] = c; - else - lose ((r, _("position %d: character `\\%03o' is not " - "valid in a variable name."), i, c)); - } + if (!var_is_valid_name (name, false) || *name == '#') + error (r, _("position %d: Invalid variable name `%s'."), name); + st_uppercase (name); - asciify (name); if (width < 0 || width > 255) - lose ((r, "Bad width %d for variable %s.", width, name)); + error (r, "Bad width %d for variable %s.", width, name); v = dict_create_var (dict, name, width); if (v == NULL) - lose ((r, _("Duplicate variable name %s."), name)); - if (!convert_format (r, &fmt[0], &v->print, v)) - goto error; - if (!convert_format (r, &fmt[3], &v->write, v)) - goto error; + error (r, _("Duplicate variable name %s."), name); + + convert_format (r, &fmt[0], &v->print, v); + convert_format (r, &fmt[3], &v->write, v); /* Range missing values. */ - if (match (75 /* B */)) + if (match (r, 'B')) { v->miss_type = MISSING_RANGE; - if (!parse_value (r, &v->missing[0], v) - || !parse_value (r, &v->missing[1], v)) - goto error; + v->missing[0] = parse_value (r, v); + v->missing[1] = parse_value (r, v); } - else if (match (74 /* A */)) + else if (match (r, 'A')) { v->miss_type = MISSING_HIGH; - if (!parse_value (r, &v->missing[0], v)) - goto error; + v->missing[0] = parse_value (r, v); } - else if (match (73 /* 9 */)) + else if (match (r, '9')) { v->miss_type = MISSING_LOW; - if (!parse_value (r, &v->missing[0], v)) - goto error; + v->missing[0] = parse_value (r, v); } /* Single missing values. */ - while (match (72 /* 8 */)) + while (match (r, '8')) { static const int map_next[MISSING_COUNT] = { @@ -782,75 +567,51 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) v->miss_type = map_next[v->miss_type]; if (v->miss_type == -1) - lose ((r, _("Bad missing values for %s."), v->name)); + error (r, _("Bad missing values for %s."), v->name); assert (map_ofs[v->miss_type] != -1); - if (!parse_value (r, &v->missing[map_ofs[v->miss_type]], v)) - goto error; + v->missing[map_ofs[v->miss_type]] = parse_value (r, v); } - if (match (76 /* C */)) - { - char *label = read_string (r); - - if (label == NULL) - goto error; - - v->label = xstrdup (label); - asciify (v->label); - } + if (match (r, 'C')) + { + char label[256]; + read_string (r, label); + v->label = xstrdup (label); + } } if (weight_name != NULL) { struct variable *weight_var = dict_lookup_var (dict, weight_name); if (weight_var == NULL) - lose ((r, _("Weighting variable %s not present in dictionary."), - weight_name)); - free (weight_name); + error (r, _("Weighting variable %s not present in dictionary."), + weight_name); dict_set_weight (dict, weight_var); } - - return 1; - - error: - free (weight_name); - return 0; } -/* Parse a value for variable VV into value V. Returns success. */ -static int -parse_value (struct pfm_reader *r, union value *v, struct variable *vv) +/* Parse a value for variable VV into value V. */ +static union value +parse_value (struct pfm_reader *r, struct variable *vv) { - if (vv->type == ALPHA) + union value v; + + if (vv->type == ALPHA) { - char *mv = read_string (r); - int j; - - if (mv == NULL) - return 0; - - strncpy (v->s, mv, 8); - for (j = 0; j < 8; j++) - if (v->s[j]) - v->s[j] = spss2ascii[v->s[j]]; - else - /* Value labels are always padded with spaces. */ - v->s[j] = ' '; + char string[256]; + read_string (r, string); + st_bare_pad_copy (v.s, string, 8); } else - { - v->f = read_float (r); - if (v->f == second_lowest_value) - return 0; - } + v.f = read_float (r); - return 1; + return v; } /* Parse a value label record and return success. */ -static int +static void read_value_label (struct pfm_reader *r, struct dictionary *dict) { /* Variables. */ @@ -863,45 +624,31 @@ read_value_label (struct pfm_reader *r, struct dictionary *dict) int i; nv = read_int (r); - if (nv == NOT_INT) - return 0; - - v = xmalloc (sizeof *v * nv); + v = pool_alloc (r->pool, sizeof *v * nv); for (i = 0; i < nv; i++) { - char *name = read_string (r); - if (name == NULL) - goto error; - asciify (name); + char name[256]; + read_string (r, name); v[i] = dict_lookup_var (dict, name); if (v[i] == NULL) - lose ((r, _("Unknown variable %s while parsing value labels."), name)); + error (r, _("Unknown variable %s while parsing value labels."), name); if (v[0]->width != v[i]->width) - lose ((r, _("Cannot assign value labels to %s and %s, which " + error (r, _("Cannot assign value labels to %s and %s, which " "have different variable types or widths."), - v[0]->name, v[i]->name)); + v[0]->name, v[i]->name); } n_labels = read_int (r); - if (n_labels == NOT_INT) - goto error; - for (i = 0; i < n_labels; i++) { union value val; - char *label; - + char label[256]; int j; - - if (!parse_value (r, &val, v[0])) - goto error; - - label = read_string (r); - if (label == NULL) - goto error; - asciify (label); + + val = parse_value (r, v[0]); + read_string (r, label); /* Assign the value_label's to each variable. */ for (j = 0; j < nv; j++) @@ -912,33 +659,29 @@ read_value_label (struct pfm_reader *r, struct dictionary *dict) continue; if (var->type == NUMERIC) - lose ((r, _("Duplicate label for value %g for variable %s."), - val.f, var->name)); + error (r, _("Duplicate label for value %g for variable %s."), + val.f, var->name); else - lose ((r, _("Duplicate label for value `%.*s' for variable %s."), - var->width, val.s, var->name)); + error (r, _("Duplicate label for value `%.*s' for variable %s."), + var->width, val.s, var->name); } } - free (v); - return 1; - - error: - free (v); - return 0; } -/* Reads one case from portable file R into C. Returns nonzero - only if successful. */ -int +/* Reads one case from portable file R into C. */ +bool pfm_read_case (struct pfm_reader *r, struct ccase *c) { size_t i; size_t idx; - /* Check for end of file. */ - if (r->cc == 99 /* Z */) - return 0; + if (setjmp (r->bail_out)) + return false; + /* Check for end of file. */ + if (r->cc == 'Z') + return false; + idx = 0; for (i = 0; i < r->var_cnt; i++) { @@ -946,30 +689,17 @@ pfm_read_case (struct pfm_reader *r, struct ccase *c) if (width == 0) { - double f = read_float (r); - if (f == second_lowest_value) - goto unexpected_eof; - - case_data_rw (c, idx)->f = f; + case_data_rw (c, idx)->f = read_float (r); idx++; } else { - char *s = read_string (r); - if (s == NULL) - goto unexpected_eof; - asciify (s); - - st_bare_pad_copy (case_data_rw (c, idx)->s, s, width); + char string[256]; + read_string (r, string); + st_bare_pad_copy (case_data_rw (c, idx)->s, string, width); idx += DIV_RND_UP (width, MAX_SHORT_STRING); } } - return 1; - - unexpected_eof: - lose ((r, _("End of file midway through case."))); - - error: - return 0; + return true; } diff --git a/src/pfm-read.h b/src/pfm-read.h index bb37f1db..1ffbf538 100644 --- a/src/pfm-read.h +++ b/src/pfm-read.h @@ -22,6 +22,8 @@ /* Portable file reading. */ +#include "bool.h" + /* Portable file types. */ enum pfm_type { @@ -45,7 +47,7 @@ struct ccase; struct pfm_reader *pfm_open_reader (struct file_handle *, struct dictionary **, struct pfm_read_info *); -int pfm_read_case (struct pfm_reader *, struct ccase *); +bool pfm_read_case (struct pfm_reader *, struct ccase *); void pfm_close_reader (struct pfm_reader *); #endif /* pfm-read.h */ diff --git a/src/print.c b/src/print.c index 8cb7b1d4..25bf9351 100644 --- a/src/print.c +++ b/src/print.c @@ -478,6 +478,21 @@ fail: return 0; } +/* Verifies that FORMAT doesn't need a variable wider than WIDTH. + Returns true iff that is the case. */ +static bool +check_string_width (const struct fmt_spec *format, const struct variable *v) +{ + if (get_format_var_width (format) > v->width) + { + msg (SE, _("Variable %s has width %d so it cannot be output " + "as format %s."), + v->name, v->width, fmt_to_string (format)); + return false; + } + return true; +} + /* Parses a column specification for parse_specs(). */ static int fixed_parse_compatible (void) @@ -599,27 +614,14 @@ fixed_parse_compatible (void) dividend = (fx.lc - fx.fc + 1) / fx.nv; fx.spec.u.v.f.w = dividend; - if (!check_output_specifier (&fx.spec.u.v.f, 1)) + if (!check_output_specifier (&fx.spec.u.v.f, true) + || !check_specifier_type (&fx.spec.u.v.f, type, true)) return 0; - if ((type == ALPHA) ^ (formats[fx.spec.u.v.f.type].cat & FCAT_STRING)) - { - msg (SE, _("%s variables cannot be displayed with format %s."), - type == ALPHA ? _("String") : _("Numeric"), - fmt_to_string (&fx.spec.u.v.f)); - return 0; - } - - /* Check that, for string variables, the user didn't specify a width - longer than an actual string width. */ if (type == ALPHA) { - /* Minimum width of all the string variables specified. */ - int min_len = fx.v[0]->width; - - for (i = 1; i < fx.nv; i++) - min_len = min (min_len, fx.v[i]->width); - if (!check_string_specifier (&fx.spec.u.v.f, min_len)) - return 0; + for (i = 0; i < fx.nv; i++) + if (!check_string_width (&fx.spec.u.v.f, fx.v[i])) + return false; } fx.spec.type = PRT_VAR; @@ -688,15 +690,10 @@ dump_fmt_list (struct fmt_list * f) } v = fx.v[fx.cv++]; - if ((v->type == ALPHA) ^ (formats[f->f.type].cat & FCAT_STRING)) - { - msg (SE, _("Display format %s may not be used with a " - "%s variable."), fmt_to_string (&f->f), - v->type == ALPHA ? _("string") : _("numeric")); - return 0; - } - if (!check_string_specifier (&f->f, v->width)) - return 0; + if (!check_output_specifier (&f->f, true) + || !check_specifier_type (&f->f, v->type, true) + || !check_string_width (&f->f, v)) + return false; fx.spec.type = PRT_VAR; fx.spec.u.v.v = v; diff --git a/src/str.c b/src/str.c index bb49d0f3..db17cea6 100644 --- a/src/str.c +++ b/src/str.c @@ -194,6 +194,31 @@ st_pad_copy (char *dest, const char *src, size_t n) dest[n - 1] = 0; } } + +/* Copies SRC to DST, truncating DST to N-1 characters if + necessary. Always appends a null character. */ +void +st_trim_copy (char *dst, const char *src, size_t n) +{ + size_t len = strlen (src); + assert (n > 0); + if (len + 1 < n) + memcpy (dst, src, len + 1); + else + { + memcpy (dst, src, n - 1); + dst[n - 1] = '\0'; + } +} + + +/* Converts each character in S to uppercase. */ +void +st_uppercase (char *s) +{ + for (; *s != '\0'; s++) + *s = toupper ((unsigned char) *s); +} /* Initializes ST with initial contents S. */ void diff --git a/src/str.h b/src/str.h index 861d9a80..1af2ea69 100644 --- a/src/str.h +++ b/src/str.h @@ -126,6 +126,8 @@ char *st_spaces (int); void st_bare_pad_copy (char *dest, const char *src, size_t n); void st_bare_pad_len_copy (char *dest, const char *src, size_t n, size_t len); void st_pad_copy (char *dest, const char *src, size_t n); +void st_trim_copy (char *dest, const char *src, size_t n); +void st_uppercase (char *); /* Fixed-length strings. */ struct fixed_string diff --git a/src/var.h b/src/var.h index fadda249..680a4c3c 100644 --- a/src/var.h +++ b/src/var.h @@ -21,6 +21,7 @@ #define var_h 1 #include +#include "bool.h" #include "format.h" #include "val.h" @@ -84,6 +85,7 @@ struct variable void (*aux_dtor) (struct variable *); }; +bool var_is_valid_name (const char *, bool issue_error); int compare_var_names (const void *, const void *, void *); unsigned hash_var_name (const void *, void *); int compare_var_ptr_names (const void *, const void *, void *); diff --git a/src/vars-atr.c b/src/vars-atr.c index 9f2baf0e..697518c2 100644 --- a/src/vars-atr.c +++ b/src/vars-atr.c @@ -28,6 +28,7 @@ #include "expressions/public.h" #include "file-handle.h" #include "hash.h" +#include "lexer.h" #include "misc.h" #include "str.h" #include "value-labels.h" @@ -230,6 +231,60 @@ is_user_missing (const union value *val, const struct variable *v) abort (); } +/* Returns true if NAME is an acceptable name for a variable, + false otherwise. If ISSUE_ERROR is true, issues an + explanatory error message on failure. */ +bool +var_is_valid_name (const char *name, bool issue_error) +{ + size_t length, i; + + assert (name != NULL); + + length = strlen (name); + if (length < 1) + { + if (issue_error) + msg (SE, _("Variable names must be at least 1 character long.")); + return false; + } + else if (length > MAX_VAR_NAME_LEN) + { + if (issue_error) + msg (SE, _("Variable name %s exceeds %d-character limit."), + (int) MAX_VAR_NAME_LEN); + return false; + } + + for (i = 0; i < length; i++) + if (!CHAR_IS_IDN (name[i])) + { + if (issue_error) + msg (SE, _("Character `%c' (in %s) may not appear in " + "a variable name."), + name); + return false; + } + + if (!CHAR_IS_ID1 (name[0])) + { + if (issue_error) + msg (SE, _("Character `%c' (in %s), may not appear " + "as the first character in a variable name."), name); + return false; + } + + if (lex_id_to_token (name, strlen (name)) != T_ID) + { + if (issue_error) + msg (SE, _("%s may not be used as a variable name because it " + "is a reserved word."), name); + return false; + } + + return true; +} + /* A hsh_compare_func that orders variables A and B by their names. */ int diff --git a/tests/ChangeLog b/tests/ChangeLog index 0a8ef20b..3a9a93da 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,7 @@ +Fri Mar 11 10:40:41 2005 Ben Pfaff + + * expressions/expressions.sh: Add another test. + Sun Mar 6 19:30:14 2005 Ben Pfaff * expressions/vectors.sh: New test. diff --git a/tests/expressions/expressions.sh b/tests/expressions/expressions.sh index 4cac5571..c4e48f40 100755 --- a/tests/expressions/expressions.sh +++ b/tests/expressions/expressions.sh @@ -665,6 +665,7 @@ number("123", f3.0) => 123.00 number(" 123", f3.0) => 12.00 number("123", f3.1) => 12.30 number(" ", f3.1) => sysmis +number("123", a8) => error number("123", cca1.2) => error # CCA is not an input format ltrim(' abc') => "abc"