X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fpfm-read.c;h=f398747584ae9b7e48c5ffeee25bebfb66581fc1;hb=053e7ff6e0a45a25d5604b211e9c950fff50e75d;hp=b587fbcafa937f3ad183df9f65d7f4b27c8a5b02;hpb=7b98b3a4f58f6dc5a8e9cbc188b627966d5e652d;p=pspp-builds.git diff --git a/src/pfm-read.c b/src/pfm-read.c index b587fbca..f3987475 100644 --- a/src/pfm-read.c +++ b/src/pfm-read.c @@ -1,6 +1,8 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. Written by Ben Pfaff . + Code for parsing floating-point numbers adapted from GNU C + library. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -14,309 +16,236 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include -#include "pfm.h" -#include +#include "pfm-read.h" +#include "error.h" #include #include #include #include #include #include +#include #include "alloc.h" +#include +#include "case.h" +#include "dictionary.h" #include "file-handle.h" #include "format.h" -#include "getline.h" +#include "getl.h" #include "hash.h" #include "magic.h" #include "misc.h" +#include "pool.h" #include "str.h" #include "value-labels.h" #include "var.h" +#include "gettext.h" +#define _(msgid) gettext (msgid) + #include "debug-print.h" -/* pfm's file_handle extension. */ -struct pfm_fhuser_ext +/* portable_to_local[PORTABLE] translates the given portable + character into the local character set. */ +static const char portable_to_local[256] = { - FILE *file; /* Actual file. */ - - struct dictionary *dict; /* File's dictionary. */ - int weight_index; /* 0-based index of weight variable, or -1. */ - - unsigned char *trans; /* 256-byte character set translation table. */ - - int nvars; /* Number of variables. */ - int *vars; /* Variable widths, 0 for numeric. */ - int case_size; /* Number of `value's per case. */ - - unsigned char buf[83]; /* Input buffer. */ - unsigned char *bp; /* Buffer pointer. */ - int cc; /* Current character. */ + " " + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ." + "<(+|&[]!$*);^-/|,%_>?`:$@'=\" ~- 0123456789 -() {}\\ " + " " }; -static struct fh_ext_class pfm_r_class; - -static int -corrupt_msg (struct file_handle *h, const char *format,...) - PRINTF_FORMAT (2, 3); - -/* Displays a corruption error. */ -static int -corrupt_msg (struct file_handle *h, const char *format, ...) -{ - struct pfm_fhuser_ext *ext = h->ext; - char buf[1024]; - +/* Portable file reader. */ +struct pfm_reader { - va_list args; + struct pool *pool; /* All the portable file state. */ - va_start (args, format); - vsnprintf (buf, 1024, format, args); - va_end (args); - } - - { - char *title; - struct error e; + jmp_buf bail_out; /* longjmp() target for error handling. */ - e.class = ME; - getl_location (&e.where.filename, &e.where.line_number); - e.title = title = local_alloc (strlen (h->fn) + 80); - sprintf (title, _("portable file %s corrupt at offset %ld: "), - h->fn, ftell (ext->file) - (82 - (long) (ext->bp - ext->buf))); - e.text = buf; - - err_vmsg (&e); + struct file_handle *fh; /* File handle. */ + FILE *file; /* File stream. */ + char cc; /* Current character. */ + char *trans; /* 256-byte character set translation table. */ + int var_cnt; /* Number of variables. */ + int weight_index; /* 0-based index of weight variable, or -1. */ + int *widths; /* Variable widths, 0 for numeric. */ + int value_cnt; /* Number of `value's per case. */ + }; - local_free (title); - } - - return 0; -} +static void +error (struct pfm_reader *r, const char *msg,...) + PRINTF_FORMAT (2, 3); -/* Closes a portable file after we're done with it. */ +/* Displays MSG as an error message and aborts reading the + portable file via longjmp(). */ static void -pfm_close (struct file_handle * h) +error (struct pfm_reader *r, const char *msg, ...) { - struct pfm_fhuser_ext *ext = h->ext; - - if (EOF == fclose (ext->file)) - msg (ME, _("%s: Closing portable file: %s."), h->fn, strerror (errno)); - free (ext->vars); - free (ext->trans); - free (h->ext); + struct error e; + const char *filename; + char *title; + va_list args; + + e.class = ME; + getl_location (&e.where.filename, &e.where.line_number); + filename = fh_get_filename (r->fh); + e.title = title = pool_alloc (r->pool, strlen (filename) + 80); + sprintf (title, _("portable file %s corrupt at offset %ld: "), + filename, ftell (r->file)); + + va_start (args, msg); + err_vmsg (&e, msg, args); + va_end (args); + + longjmp (r->bail_out, 1); } -/* Displays the message X with corrupt_msg, then jumps to the lossage - label. */ -#define lose(X) \ - do \ - { \ - corrupt_msg X; \ - goto lossage; \ - } \ - while (0) - -/* Read an 80-character line into handle H's buffer. Return - success. */ -static int -fill_buf (struct file_handle *h) +/* Closes portable file reader R, after we're done with it. */ +void +pfm_close_reader (struct pfm_reader *r) { - struct pfm_fhuser_ext *ext = h->ext; - - if (80 != fread (ext->buf, 1, 80, ext->file)) - lose ((h, _("Unexpected end of file."))); - - /* PORTME: line ends. */ - { - int c; - - c = getc (ext->file); - if (c != '\n' && c != '\r') - lose ((h, _("Bad line end."))); - - c = getc (ext->file); - if (c != '\n' && c != '\r') - ungetc (c, ext->file); - } - - if (ext->trans) - { - int i; - - for (i = 0; i < 80; i++) - ext->buf[i] = ext->trans[ext->buf[i]]; - } - - ext->bp = ext->buf; - - return 1; - - lossage: - return 0; + if (r != NULL) + pool_destroy (r->pool); } -/* Read a single character into cur_char. Return success; */ -static int -read_char (struct file_handle *h) +/* Read a single character into cur_char. */ +static void +advance (struct pfm_reader *r) { - struct pfm_fhuser_ext *ext = h->ext; + int c; - if (ext->bp >= &ext->buf[80] && !fill_buf (h)) - return 0; - ext->cc = *ext->bp++; - return 1; -} + while ((c = getc (r->file)) == '\r' || c == '\n') + continue; + if (c == EOF) + error (r, _("unexpected end of file")); -/* Advance a single character. */ -#define advance() if (!read_char (h)) goto lossage + if (r->trans != NULL) + c = r->trans[c]; + r->cc = c; +} /* Skip a single character if present, and return whether it was skipped. */ -static inline int -skip_char (struct file_handle *h, int c) +static inline bool +match (struct pfm_reader *r, int c) { - struct pfm_fhuser_ext *ext = h->ext; - - if (ext->cc == c) + if (r->cc == c) { - advance (); - return 1; + advance (r); + return true; } - lossage: - return 0; + else + return false; } -/* Skip a single character if present, and return whether it was - skipped. */ -#define match(C) skip_char (h, C) - -static int read_header (struct file_handle *h); -static int read_version_data (struct file_handle *h, struct pfm_read_info *inf); -static int read_variables (struct file_handle *h); -static int read_value_label (struct file_handle *h); -void dump_dictionary (struct dictionary *dict); +static void read_header (struct pfm_reader *); +static void read_version_data (struct pfm_reader *, struct pfm_read_info *); +static void read_variables (struct pfm_reader *, struct dictionary *); +static void read_value_label (struct pfm_reader *, struct dictionary *); +void dump_dictionary (struct dictionary *); /* Reads the dictionary from file with handle H, and returns it in a dictionary structure. This dictionary may be modified in order to rename, reorder, and delete variables, etc. */ -struct dictionary * -pfm_read_dictionary (struct file_handle *h, struct pfm_read_info *inf) +struct pfm_reader * +pfm_open_reader (struct file_handle *fh, struct dictionary **dict, + struct pfm_read_info *info) { - /* The file handle extension record. */ - struct pfm_fhuser_ext *ext; - - /* Check whether the file is already open. */ - if (h->class == &pfm_r_class) - { - ext = h->ext; - return ext->dict; - } - else if (h->class != NULL) - { - msg (ME, _("Cannot read file %s as portable file: already opened " - "for %s."), - fh_handle_name (h), h->class->name); - return NULL; - } - - msg (VM (1), _("%s: Opening portable-file handle %s for reading."), - fh_handle_filename (h), fh_handle_name (h)); - - /* Open the physical disk file. */ - ext = xmalloc (sizeof (struct pfm_fhuser_ext)); - ext->file = fopen (h->norm_fn, "rb"); - if (ext->file == NULL) + struct pool *volatile pool = NULL; + struct pfm_reader *volatile r = NULL; + + *dict = dict_create (); + if (!fh_open (fh, FH_REF_FILE, "portable file", "rs")) + goto error; + + /* Create and initialize reader. */ + pool = pool_create (); + r = pool_alloc (pool, sizeof *r); + r->pool = pool; + if (setjmp (r->bail_out)) + goto error; + r->fh = fh; + r->file = pool_fopen (r->pool, fh_get_filename (r->fh), "rb"); + r->weight_index = -1; + r->trans = NULL; + r->var_cnt = 0; + r->widths = NULL; + r->value_cnt = 0; + + /* Check that file open succeeded, prime reading. */ + if (r->file == NULL) { msg (ME, _("An error occurred while opening \"%s\" for reading " - "as a portable file: %s."), h->fn, strerror (errno)); + "as a portable file: %s."), + fh_get_filename (r->fh), strerror (errno)); err_cond_fail (); - free (ext); - return NULL; + goto error; } - - /* Initialize the sfm_fhuser_ext structure. */ - h->class = &pfm_r_class; - h->ext = ext; - ext->dict = NULL; - ext->trans = NULL; - if (!fill_buf (h)) - goto lossage; - advance (); - - /* Read the header. */ - if (!read_header (h)) - goto lossage; - /* Read version, date info, product identification. */ - if (!read_version_data (h, inf)) - goto lossage; + /* Read header, version, date info, product id, variables. */ + read_header (r); + read_version_data (r, info); + read_variables (r, *dict); - /* Read variables. */ - if (!read_variables (h)) - goto lossage; + /* Read value labels. */ + while (match (r, 'D')) + read_value_label (r, *dict); - /* Value labels. */ - while (match (77 /* D */)) - if (!read_value_label (h)) - goto lossage; + /* Check that we've made it to the data. */ + if (!match (r, 'F')) + error (r, _("Data record expected.")); - if (!match (79 /* F */)) - lose ((h, _("Data record expected."))); + return r; - msg (VM (2), _("Read portable-file dictionary successfully.")); - -#if DEBUGGING - dump_dictionary (ext->dict); -#endif - return ext->dict; - - lossage: - /* Come here on unsuccessful completion. */ - msg (VM (1), _("Error reading portable-file dictionary.")); - - fclose (ext->file); - if (ext && ext->dict) - dict_destroy (ext->dict); - free (ext); - h->class = NULL; - h->ext = NULL; + error: + pfm_close_reader (r); + dict_destroy (*dict); + *dict = NULL; return NULL; } -/* Read a floating point value and return its value, or - second_lowest_value on error. */ +/* Returns the value of base-30 digit C, + or -1 if C is not a base-30 digit. */ +static int +base_30_value (unsigned char c) +{ + static const char base_30_digits[] = "0123456789ABCDEFGHIJKLMNOPQRST"; + const char *p = strchr (base_30_digits, c); + return p != NULL ? p - base_30_digits : -1; +} + +/* Read a floating point value and return its value. */ static double -read_float (struct file_handle *h) +read_float (struct pfm_reader *r) { - struct pfm_fhuser_ext *ext = h->ext; double num = 0.; - int got_dot = 0; - int got_digit = 0; int exponent = 0; - int neg = 0; + bool got_dot = false; /* Seen a decimal point? */ + bool got_digit = false; /* Seen any digits? */ + bool negative = false; /* Number is negative? */ /* Skip leading spaces. */ - while (match (126 /* space */)) - ; + while (match (r, ' ')) + continue; - if (match (137 /* * */)) + /* `*' indicates system-missing. */ + if (match (r, '*')) { - advance (); /* Probably a dot (.) but doesn't appear to matter. */ + advance (r); /* Probably a dot (.) but doesn't appear to matter. */ return SYSMIS; } - else if (match (141 /* - */)) - neg = 1; + negative = match (r, '-'); for (;;) { - if (ext->cc >= 64 /* 0 */ && ext->cc <= 93 /* T */) + int digit = base_30_value (r->cc); + if (digit != -1) { - got_digit++; + got_digit = true; /* Make sure that multiplication by 30 will not overflow. */ if (num > DBL_MAX * (1. / 30.)) @@ -329,573 +258,346 @@ read_float (struct file_handle *h) digit so that we can multiply by 10 later. */ ++exponent; else - num = (num * 30.0) + (ext->cc - 64); + num = (num * 30.0) + digit; /* Keep track of the number of digits after the decimal point. If we just divided by 30 here, we would lose precision. */ if (got_dot) --exponent; } - else if (!got_dot && ext->cc == 127 /* . */) + else if (!got_dot && r->cc == '.') /* Record that we have found the decimal point. */ got_dot = 1; else /* Any other character terminates the number. */ break; - advance (); + advance (r); } + /* Check that we had some digits. */ if (!got_digit) - lose ((h, "Number expected.")); - - if (ext->cc == 130 /* + */ || ext->cc == 141 /* - */) + error (r, "Number expected."); + + /* Get exponent if any. */ + if (r->cc == '+' || r->cc == '-') { - /* Get the exponent. */ long int exp = 0; - int neg_exp = ext->cc == 141 /* - */; + bool negative_exponent = r->cc == '-'; + int digit; - for (;;) + for (advance (r); (digit = base_30_value (r->cc)) != -1; advance (r)) { - advance (); - - if (ext->cc < 64 /* 0 */ || ext->cc > 93 /* T */) - break; - if (exp > LONG_MAX / 30) - goto overflow; - exp = exp * 30 + (ext->cc - 64); + { + exp = LONG_MAX; + break; + } + exp = exp * 30 + digit; } /* We don't check whether there were actually any digits, but we probably should. */ - if (neg_exp) + if (negative_exponent) exp = -exp; exponent += exp; } - - if (!match (142 /* / */)) - lose ((h, _("Missing numeric terminator."))); - /* Multiply NUM by 30 to the EXPONENT power, checking for overflow. */ + /* Numbers must end with `/'. */ + if (!match (r, '/')) + error (r, _("Missing numeric terminator.")); + /* Multiply `num' by 30 to the `exponent' power, checking for + overflow. */ if (exponent < 0) num *= pow (30.0, (double) exponent); else if (exponent > 0) { if (num > DBL_MAX * pow (30.0, (double) -exponent)) - goto overflow; - num *= pow (30.0, (double) exponent); + num = DBL_MAX; + else + num *= pow (30.0, (double) exponent); } - if (neg) - return -num; - else - return num; - - overflow: - if (neg) - return -DBL_MAX / 10.; - else - return DBL_MAX / 10; - - lossage: - return second_lowest_value; + return negative ? -num : num; } -/* Read an integer and return its value, or NOT_INT on failure. */ +/* Read an integer and return its value. */ static int -read_int (struct file_handle *h) +read_int (struct pfm_reader *r) { - double f = read_float (h); - - if (f == second_lowest_value) - goto lossage; + double f = read_float (r); if (floor (f) != f || f >= INT_MAX || f <= INT_MIN) - lose ((h, _("Bad integer format."))); + error (r, _("Invalid integer.")); return f; - - lossage: - return NOT_INT; } -/* Reads a string and returns its value in a static buffer, or NULL on - failure. The buffer can be deallocated by calling with a NULL - argument. */ -static unsigned char * -read_string (struct file_handle *h) +/* Reads a string into BUF, which must have room for 256 + characters. */ +static void +read_string (struct pfm_reader *r, char *buf) { - struct pfm_fhuser_ext *ext = h->ext; - static char *buf; - int n; + int n = read_int (r); + if (n < 0 || n > 255) + error (r, _("Bad string length %d."), n); - if (h == NULL) + while (n-- > 0) { - free (buf); - buf = NULL; - return NULL; + *buf++ = r->cc; + advance (r); } - else if (buf == NULL) - buf = xmalloc (256); - - n = read_int (h); - if (n == NOT_INT) - return NULL; - if (n < 0 || n > 255) - lose ((h, _("Bad string length %d."), n)); - - { - int i; - - for (i = 0; i < n; i++) - { - buf[i] = ext->cc; - advance (); - } - } - - buf[n] = 0; - return buf; + *buf = '\0'; +} - lossage: - return NULL; +/* Reads a string and returns a copy of it allocated from R's + pool. */ +static char * +read_pool_string (struct pfm_reader *r) +{ + char string[256]; + read_string (r, string); + return pool_strdup (r->pool, string); } /* Reads the 464-byte file header. */ -int -read_header (struct file_handle *h) +static void +read_header (struct pfm_reader *r) { - struct pfm_fhuser_ext *ext = h->ext; - - /* For now at least, just ignore the vanity splash strings. */ - { - int i; + char *trans; + int i; - for (i = 0; i < 200; i++) - advance (); - } + /* Read and ignore vanity splash strings. */ + for (i = 0; i < 200; i++) + advance (r); - { - unsigned char src[256]; - int trans_temp[256]; - int i; - - for (i = 0; i < 256; i++) - { - src[i] = (unsigned char) ext->cc; - advance (); - } + /* Skip the first 64 characters of the translation table. + We don't care about these. They are probably all set to + '0', marking them as untranslatable, and that would screw + up our actual translation of the real '0'. */ + for (i = 0; i < 64; i++) + advance (r); + + /* Read the rest of the translation table. */ + trans = pool_malloc (r->pool, 256); + memset (trans, 0, 256); + for (; i < 256; i++) + { + unsigned char c; - for (i = 0; i < 256; i++) - trans_temp[i] = -1; - - /* 0 is used to mark untranslatable characters, so we have to mark - it specially. */ - trans_temp[src[64]] = 64; - for (i = 0; i < 256; i++) - if (trans_temp[src[i]] == -1) - trans_temp[src[i]] = i; - - ext->trans = xmalloc (256); - for (i = 0; i < 256; i++) - ext->trans[i] = trans_temp[i] == -1 ? 0 : trans_temp[i]; - - /* Translate the input buffer. */ - for (i = 0; i < 80; i++) - ext->buf[i] = ext->trans[ext->buf[i]]; - ext->cc = ext->trans[ext->cc]; - } - - { - unsigned char sig[8] = {92, 89, 92, 92, 89, 88, 91, 93}; - int i; + advance (r); - for (i = 0; i < 8; i++) - if (!match (sig[i])) - lose ((h, "Missing SPSSPORT signature.")); - } + c = r->cc; + if (trans[c] == 0) + trans[c] = portable_to_local[i]; + } - return 1; + /* Set up the translation table, then read the first + translated character. */ + r->trans = trans; + advance (r); - lossage: - return 0; + /* Skip and verify signature. */ + for (i = 0; i < 8; i++) + if (!match (r, "SPSSPORT"[i])) + { + msg (SE, _("%s: Not a portable file."), fh_get_filename (r->fh)); + longjmp (r->bail_out, 1); + } } /* Reads the version and date info record, as well as product and subproduct identification records if present. */ -int -read_version_data (struct file_handle *h, struct pfm_read_info *inf) +static void +read_version_data (struct pfm_reader *r, struct pfm_read_info *info) { - struct pfm_fhuser_ext *ext = h->ext; - - /* Version. */ - if (!match (74 /* A */)) - lose ((h, "Unrecognized version code %d.", ext->cc)); - - /* Date. */ - { - static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1}; - char *date = read_string (h); - int i; - - if (!date) - return 0; - if (strlen (date) != 8) - lose ((h, _("Bad date string length %d."), strlen (date))); - for (i = 0; i < 8; i++) - { - if (date[i] < 64 /* 0 */ || date[i] > 73 /* 9 */) - lose ((h, _("Bad character in date."))); - if (inf) - inf->creation_date[map[i]] = date[i] - 64 /* 0 */ + '0'; - } - if (inf) - { - inf->creation_date[2] = inf->creation_date[5] = ' '; - inf->creation_date[10] = 0; - } - } - - /* Time. */ - { - static const int map[] = {0, 1, 3, 4, 6, 7}; - char *time = read_string (h); - int i; - - if (!time) - return 0; - if (strlen (time) != 6) - lose ((h, _("Bad time string length %d."), strlen (time))); - for (i = 0; i < 6; i++) - { - if (time[i] < 64 /* 0 */ || time[i] > 73 /* 9 */) - lose ((h, _("Bad character in time."))); - if (inf) - inf->creation_time[map[i]] = time[i] - 64 /* 0 */ + '0'; - } - if (inf) - { - inf->creation_time[2] = inf->creation_time[5] = ' '; - inf->creation_time[8] = 0; - } - } + static char empty_string[] = ""; + char *date, *time, *product, *author, *subproduct; + int i; - /* Product. */ - if (match (65 /* 1 */)) + /* Read file. */ + if (!match (r, 'A')) + error (r, "Unrecognized version code `%c'.", r->cc); + date = read_pool_string (r); + time = read_pool_string (r); + product = match (r, '1') ? read_pool_string (r) : empty_string; + author = match (r, '2') ? read_pool_string (r) : empty_string; + subproduct = match (r, '3') ? read_pool_string (r) : empty_string; + + /* Validate file. */ + if (strlen (date) != 8) + error (r, _("Bad date string length %d."), strlen (date)); + if (strlen (time) != 6) + error (r, _("Bad time string length %d."), strlen (time)); + + /* Save file info. */ + if (info != NULL) { - char *product; - - product = read_string (h); - if (product == NULL) - return 0; - if (inf) - strncpy (inf->product, product, 61); - } - else if (inf) - inf->product[0] = 0; + /* Date. */ + for (i = 0; i < 8; i++) + { + static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1}; + info->creation_date[map[i]] = date[i]; + } + info->creation_date[2] = info->creation_date[5] = ' '; + info->creation_date[10] = 0; - /* Subproduct. */ - if (match (67 /* 3 */)) - { - char *subproduct; + /* Time. */ + for (i = 0; i < 6; i++) + { + static const int map[] = {0, 1, 3, 4, 6, 7}; + info->creation_time[map[i]] = time[i]; + } + info->creation_time[2] = info->creation_time[5] = ' '; + info->creation_time[8] = 0; - subproduct = read_string (h); - if (subproduct == NULL) - return 0; - if (inf) - strncpy (inf->subproduct, subproduct, 61); + /* Product. */ + str_copy_trunc (info->product, sizeof info->product, product); + str_copy_trunc (info->subproduct, sizeof info->subproduct, subproduct); } - else if (inf) - inf->subproduct[0] = 0; - return 1; - - lossage: - return 0; } -static int -convert_format (struct file_handle *h, int fmt[3], struct fmt_spec *v, - struct variable *vv) -{ - if (fmt[0] < 0 - || (size_t) fmt[0] >= sizeof translate_fmt / sizeof *translate_fmt) - lose ((h, _("%s: Bad format specifier byte (%d)."), vv->name, fmt[0])); - - v->type = translate_fmt[fmt[0]]; - v->w = fmt[1]; - v->d = fmt[2]; - - /* FIXME? Should verify the resulting specifier more thoroughly. */ - - if (v->type == -1) - lose ((h, _("%s: Bad format specifier byte (%d)."), vv->name, fmt[0])); - if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0)) - lose ((h, _("%s variable %s has %s format specifier %s."), - vv->type == ALPHA ? _("String") : _("Numeric"), - vv->name, - formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"), - formats[v->type].name)); - return 1; - - lossage: - return 0; -} - -/* Translation table from SPSS character code to this computer's - native character code (which is probably ASCII). */ -static const unsigned char spss2ascii[256] = - { - " " - "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ." - "<(+|&[]!$*);^-/|,%_>?`:$@'=\" ~- 0123456789 -() {}\\ " - " " - }; - -/* Translate string S into ASCII. */ +/* Translates a format specification read from portable file R as + the three integers INTS into a normal format specifier FORMAT, + checking that the format is appropriate for variable V. */ static void -asciify (char *s) +convert_format (struct pfm_reader *r, const int portable_format[3], + struct fmt_spec *format, struct variable *v) { - for (; *s; s++) - *s = spss2ascii[(unsigned char) *s]; + format->type = translate_fmt (portable_format[0]); + if (format->type == -1) + error (r, _("%s: Bad format specifier byte (%d)."), + v->name, portable_format[0]); + format->w = portable_format[1]; + format->d = portable_format[2]; + + if (!check_output_specifier (format, false) + || !check_specifier_width (format, v->width, false)) + error (r, _("%s variable %s has invalid format specifier %s."), + v->type == NUMERIC ? _("Numeric") : _("String"), + v->name, fmt_to_string (format)); } -static int parse_value (struct file_handle *, union value *, struct variable *); +static union value parse_value (struct pfm_reader *, struct variable *); /* Read information on all the variables. */ -static int -read_variables (struct file_handle *h) +static void +read_variables (struct pfm_reader *r, struct dictionary *dict) { - struct pfm_fhuser_ext *ext = h->ext; char *weight_name = NULL; int i; - if (!match (68 /* 4 */)) - lose ((h, _("Expected variable count record."))); + if (!match (r, '4')) + error (r, _("Expected variable count record.")); - ext->nvars = read_int (h); - if (ext->nvars <= 0 || ext->nvars == NOT_INT) - lose ((h, _("Invalid number of variables %d."), ext->nvars)); - ext->vars = xmalloc (sizeof *ext->vars * ext->nvars); + r->var_cnt = read_int (r); + if (r->var_cnt <= 0 || r->var_cnt == NOT_INT) + error (r, _("Invalid number of variables %d."), r->var_cnt); + r->widths = pool_nalloc (r->pool, r->var_cnt, sizeof *r->widths); /* Purpose of this value is unknown. It is typically 161. */ - { - int x = read_int (h); - - if (x == NOT_INT) - goto lossage; - if (x != 161) - corrupt_msg (h, _("Unexpected flag value %d."), x); - } + read_int (r); - ext->dict = dict_create (); - - if (match (70 /* 6 */)) + if (match (r, '6')) { - weight_name = read_string (h); - if (!weight_name) - goto lossage; - - asciify (weight_name); - if (strlen (weight_name) > 8) - { - corrupt_msg (h, _("Weight variable name (%s) truncated."), - weight_name); - weight_name[8] = '\0'; - } + weight_name = read_pool_string (r); + if (strlen (weight_name) > SHORT_NAME_LEN) + error (r, _("Weight variable name (%s) truncated."), weight_name); } - for (i = 0; i < ext->nvars; i++) + for (i = 0; i < r->var_cnt; i++) { int width; - unsigned char *name; + char name[256]; int fmt[6]; struct variable *v; int j; - if (!match (71 /* 7 */)) - lose ((h, _("Expected variable record."))); + if (!match (r, '7')) + error (r, _("Expected variable record.")); - width = read_int (h); - if (width == NOT_INT) - goto lossage; + width = read_int (r); if (width < 0) - lose ((h, _("Invalid variable width %d."), width)); - ext->vars[i] = width; - - name = read_string (h); - if (name == NULL) - goto lossage; - for (j = 0; j < 6; j++) - { - fmt[j] = read_int (h); - if (fmt[j] == NOT_INT) - goto lossage; - } + error (r, _("Invalid variable width %d."), width); + r->widths[i] = width; - /* Verify first character of variable name. - - Weirdly enough, there is no # character in the SPSS portable - character set, so we can't check for it. */ - if (strlen (name) > 8) - lose ((h, _("position %d: Variable name has %u characters."), - i, strlen (name))); - if ((name[0] < 74 /* A */ || name[0] > 125 /* Z */) - && name[0] != 152 /* @ */) - lose ((h, _("position %d: Variable name begins with invalid " - "character."), i)); - if (name[0] >= 100 /* a */ && name[0] <= 125 /* z */) - { - corrupt_msg (h, _("position %d: Variable name begins with " - "lowercase letter %c."), - i, name[0] - 100 + 'a'); - name[0] -= 26 /* a - A */; - } + read_string (r, name); + for (j = 0; j < 6; j++) + fmt[j] = read_int (r); - /* Verify remaining characters of variable name. */ - for (j = 1; j < (int) strlen (name); j++) - { - int c = name[j]; - - if (c >= 100 /* a */ && c <= 125 /* z */) - { - corrupt_msg (h, _("position %d: Variable name character %d " - "is lowercase letter %c."), - i, j + 1, c - 100 + 'a'); - name[j] -= 26 /* z - Z */; - } - else if ((c >= 64 /* 0 */ && c <= 99 /* Z */) - || c == 127 /* . */ || c == 152 /* @ */ - || c == 136 /* $ */ || c == 146 /* _ */) - name[j] = c; - else - lose ((h, _("position %d: character `\\%03o' is not " - "valid in a variable name."), i, c)); - } + if (!var_is_valid_name (name, false) || *name == '#' || *name == '$') + error (r, _("position %d: Invalid variable name `%s'."), i, name); + str_uppercase (name); - asciify (name); if (width < 0 || width > 255) - lose ((h, "Bad width %d for variable %s.", width, name)); + error (r, "Bad width %d for variable %s.", width, name); - v = dict_create_var (ext->dict, name, width); - v->get.fv = v->fv; + v = dict_create_var (dict, name, width); if (v == NULL) - lose ((h, _("Duplicate variable name %s."), name)); - if (!convert_format (h, &fmt[0], &v->print, v)) - goto lossage; - if (!convert_format (h, &fmt[3], &v->write, v)) - goto lossage; + error (r, _("Duplicate variable name %s."), name); + + convert_format (r, &fmt[0], &v->print, v); + convert_format (r, &fmt[3], &v->write, v); /* Range missing values. */ - if (match (75 /* B */)) - { - v->miss_type = MISSING_RANGE; - if (!parse_value (h, &v->missing[0], v) - || !parse_value (h, &v->missing[1], v)) - goto lossage; - } - else if (match (74 /* A */)) - { - v->miss_type = MISSING_HIGH; - if (!parse_value (h, &v->missing[0], v)) - goto lossage; - } - else if (match (73 /* 9 */)) - { - v->miss_type = MISSING_LOW; - if (!parse_value (h, &v->missing[0], v)) - goto lossage; - } + if (match (r, 'B')) + { + double x = read_float (r); + double y = read_float (r); + mv_add_num_range (&v->miss, x, y); + } + else if (match (r, 'A')) + mv_add_num_range (&v->miss, read_float (r), HIGHEST); + else if (match (r, '9')) + mv_add_num_range (&v->miss, LOWEST, read_float (r)); /* Single missing values. */ - while (match (72 /* 8 */)) - { - static const int map_next[MISSING_COUNT] = - { - MISSING_1, MISSING_2, MISSING_3, -1, - MISSING_RANGE_1, MISSING_LOW_1, MISSING_HIGH_1, - -1, -1, -1, - }; - - static const int map_ofs[MISSING_COUNT] = - { - -1, 0, 1, 2, -1, -1, -1, 2, 1, 1, - }; - - v->miss_type = map_next[v->miss_type]; - if (v->miss_type == -1) - lose ((h, _("Bad missing values for %s."), v->name)); - - assert (map_ofs[v->miss_type] != -1); - if (!parse_value (h, &v->missing[map_ofs[v->miss_type]], v)) - goto lossage; - } - - if (match (76 /* C */)) - { - char *label = read_string (h); - - if (label == NULL) - goto lossage; + while (match (r, '8')) + { + union value value = parse_value (r, v); + mv_add_value (&v->miss, &value); + } - v->label = xstrdup (label); - asciify (v->label); - } + if (match (r, 'C')) + { + char label[256]; + read_string (r, label); + v->label = xstrdup (label); + } } if (weight_name != NULL) { - struct variable *weight_var = dict_lookup_var (ext->dict, weight_name); + struct variable *weight_var = dict_lookup_var (dict, weight_name); if (weight_var == NULL) - lose ((h, _("Weighting variable %s not present in dictionary."), - weight_name)); - free (weight_name); + error (r, _("Weighting variable %s not present in dictionary."), + weight_name); - dict_set_weight (ext->dict, weight_var); + dict_set_weight (dict, weight_var); } - - return 1; - - lossage: - free (weight_name); - return 0; } -/* Parse a value for variable VV into value V. Returns success. */ -static int -parse_value (struct file_handle *h, union value *v, struct variable *vv) +/* Parse a value for variable VV into value V. */ +static union value +parse_value (struct pfm_reader *r, struct variable *vv) { - if (vv->type == ALPHA) + union value v; + + if (vv->type == ALPHA) { - char *mv = read_string (h); - int j; - - if (mv == NULL) - return 0; - - strncpy (v->s, mv, 8); - for (j = 0; j < 8; j++) - if (v->s[j]) - v->s[j] = spss2ascii[v->s[j]]; - else - /* Value labels are always padded with spaces. */ - v->s[j] = ' '; + char string[256]; + read_string (r, string); + buf_copy_str_rpad (v.s, 8, string); } else - { - v->f = read_float (h); - if (v->f == second_lowest_value) - return 0; - } + v.f = read_float (r); - return 1; + return v; } /* Parse a value label record and return success. */ -static int -read_value_label (struct file_handle *h) +static void +read_value_label (struct pfm_reader *r, struct dictionary *dict) { - struct pfm_fhuser_ext *ext = h->ext; - /* Variables. */ int nv; struct variable **v; @@ -905,46 +607,32 @@ read_value_label (struct file_handle *h) int i; - nv = read_int (h); - if (nv == NOT_INT) - return 0; - - v = xmalloc (sizeof *v * nv); + nv = read_int (r); + v = pool_nalloc (r->pool, nv, sizeof *v); for (i = 0; i < nv; i++) { - char *name = read_string (h); - if (name == NULL) - goto lossage; - asciify (name); + char name[256]; + read_string (r, name); - v[i] = dict_lookup_var (ext->dict, name); + v[i] = dict_lookup_var (dict, name); if (v[i] == NULL) - lose ((h, _("Unknown variable %s while parsing value labels."), name)); + error (r, _("Unknown variable %s while parsing value labels."), name); if (v[0]->width != v[i]->width) - lose ((h, _("Cannot assign value labels to %s and %s, which " + error (r, _("Cannot assign value labels to %s and %s, which " "have different variable types or widths."), - v[0]->name, v[i]->name)); + v[0]->name, v[i]->name); } - n_labels = read_int (h); - if (n_labels == NOT_INT) - goto lossage; - + n_labels = read_int (r); for (i = 0; i < n_labels; i++) { union value val; - char *label; - + char label[256]; int j; - - if (!parse_value (h, &val, v[0])) - goto lossage; - - label = read_string (h); - if (label == NULL) - goto lossage; - asciify (label); + + val = parse_value (r, v[0]); + read_string (r, label); /* Assign the value_label's to each variable. */ for (j = 0; j < nv; j++) @@ -955,89 +643,82 @@ read_value_label (struct file_handle *h) continue; if (var->type == NUMERIC) - lose ((h, _("Duplicate label for value %g for variable %s."), - val.f, var->name)); + error (r, _("Duplicate label for value %g for variable %s."), + val.f, var->name); else - lose ((h, _("Duplicate label for value `%.*s' for variable %s."), - var->width, val.s, var->name)); + error (r, _("Duplicate label for value `%.*s' for variable %s."), + var->width, val.s, var->name); } } - free (v); - return 1; - - lossage: - free (v); - return 0; } -/* Reads one case from portable file H into the value array PERM - according to the instuctions given in associated dictionary DICT, - which must have the get.fv elements appropriately set. Returns - nonzero only if successful. */ -int -pfm_read_case (struct file_handle *h, union value *perm, struct dictionary *dict) +/* Reads one case from portable file R into C. */ +bool +pfm_read_case (struct pfm_reader *r, struct ccase *c) { - struct pfm_fhuser_ext *ext = h->ext; - - union value *temp, *tp; - int i; + size_t i; + size_t idx; - /* Check for end of file. */ - if (ext->cc == 99 /* Z */) - return 0; + if (setjmp (r->bail_out)) + return false; - /* The first concern is to obtain a full case relative to the data - file. (Cases in the data file have no particular relationship to - cases in the active file.) */ - tp = temp = local_alloc (sizeof *tp * ext->case_size); - for (tp = temp, i = 0; i < ext->nvars; i++) - if (ext->vars[i] == 0) - { - tp->f = read_float (h); - if (tp->f == second_lowest_value) - goto unexpected_eof; - tp++; - } - else - { - char *s = read_string (h); - if (s == NULL) - goto unexpected_eof; - asciify (s); - - st_bare_pad_copy (tp->s, s, ext->vars[i]); - tp += DIV_RND_UP (ext->vars[i], MAX_SHORT_STRING); - } + /* Check for end of file. */ + if (r->cc == 'Z') + return false; - /* Translate a case in data file format to a case in active file - format. */ - for (i = 0; i < dict_get_var_cnt (dict); i++) + idx = 0; + for (i = 0; i < r->var_cnt; i++) { - struct variable *v = dict_get_var (dict, i); - - if (v->get.fv == -1) - continue; + int width = r->widths[i]; - if (v->type == NUMERIC) - perm[v->fv].f = temp[v->get.fv].f; + if (width == 0) + { + case_data_rw (c, idx)->f = read_float (r); + idx++; + } else - memcpy (&perm[v->fv].s, &temp[v->get.fv], v->width); + { + char string[256]; + read_string (r, string); + buf_copy_str_rpad (case_data_rw (c, idx)->s, width, string); + idx += DIV_RND_UP (width, MAX_SHORT_STRING); + } } + + return true; +} - local_free (temp); - return 1; +/* Returns true if FILE is an SPSS portable file, + false otherwise. */ +bool +pfm_detect (FILE *file) +{ + unsigned char header[464]; + char trans[256]; + int cooked_cnt, raw_cnt; + int i; - unexpected_eof: - lose ((h, _("End of file midway through case."))); + cooked_cnt = raw_cnt = 0; + while (cooked_cnt < sizeof header) + { + int c = getc (file); + if (c == EOF || raw_cnt++ > 512) + return false; + else if (c != '\n' && c != '\r') + header[cooked_cnt++] = c; + } - lossage: - local_free (temp); - return 0; -} + memset (trans, 0, 256); + for (i = 64; i < 256; i++) + { + unsigned char c = header[i + 200]; + if (trans[c] == 0) + trans[c] = portable_to_local[i]; + } -static struct fh_ext_class pfm_r_class = -{ - 5, - N_("reading as a portable file"), - pfm_close, -}; + for (i = 0; i < 8; i++) + if (trans[header[i + 456]] != "SPSSPORT"[i]) + return false; + + return true; +}