/* PSPP - computes sample statistics.
Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
Written by Ben Pfaff <blp@gnu.org>.
+ Code for parsing floating-point numbers adapted from GNU C
+ library.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
-/* AIX requires this to be the first thing in the file. */
#include <config.h>
-#if __GNUC__
-#define alloca __builtin_alloca
-#else
-#if HAVE_ALLOCA_H
-#include <alloca.h>
-#else
-#ifdef _AIX
-#pragma alloca
-#else
-#ifndef alloca /* predefined by HP cc +Olibcalls */
-char *alloca ();
-#endif
-#endif
-#endif
-#endif
-
-#include <assert.h>
+#include "pfm-read.h"
+#include "error.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
#include <math.h>
+#include <setjmp.h>
#include "alloc.h"
-#include "avl.h"
+#include <stdbool.h>
+#include "case.h"
+#include "dictionary.h"
#include "file-handle.h"
#include "format.h"
-#include "getline.h"
+#include "getl.h"
+#include "hash.h"
#include "magic.h"
#include "misc.h"
-#include "pfm.h"
+#include "pool.h"
#include "str.h"
+#include "value-labels.h"
#include "var.h"
-#undef DEBUGGING
-/*#define DEBUGGING 1*/
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
#include "debug-print.h"
-/* pfm's file_handle extension. */
-struct pfm_fhuser_ext
+/* Portable file reader. */
+struct pfm_reader
{
- FILE *file; /* Actual file. */
-
- struct dictionary *dict; /* File's dictionary. */
- int weight_index; /* 0-based index of weight variable, or -1. */
+ struct pool *pool; /* All the portable file state. */
- unsigned char *trans; /* 256-byte character set translation table. */
+ jmp_buf bail_out; /* longjmp() target for error handling. */
- int nvars; /* Number of variables. */
- int *vars; /* Variable widths, 0 for numeric. */
- int case_size; /* Number of `value's per case. */
-
- unsigned char buf[83]; /* Input buffer. */
- unsigned char *bp; /* Buffer pointer. */
- int cc; /* Current character. */
+ struct file_handle *fh; /* File handle. */
+ FILE *file; /* File stream. */
+ char cc; /* Current character. */
+ char *trans; /* 256-byte character set translation table. */
+ int var_cnt; /* Number of variables. */
+ int weight_index; /* 0-based index of weight variable, or -1. */
+ int *widths; /* Variable widths, 0 for numeric. */
+ int value_cnt; /* Number of `value's per case. */
};
-static struct fh_ext_class pfm_r_class;
-
-static int
-corrupt_msg (struct file_handle *h, const char *format,...)
- __attribute__ ((format (printf, 2, 3)));
-
-/* Displays a corruption error. */
-static int
-corrupt_msg (struct file_handle *h, const char *format, ...)
-{
- struct pfm_fhuser_ext *ext = h->ext;
- char buf[1024];
-
- {
- va_list args;
-
- va_start (args, format);
- vsnprintf (buf, 1024, format, args);
- va_end (args);
- }
-
- {
- char *title;
- struct error e;
-
- e.class = ME;
- getl_location (&e.where.filename, &e.where.line_number);
- e.title = title = local_alloc (strlen (h->fn) + 80);
- sprintf (title, _("portable file %s corrupt at offset %ld: "),
- h->fn, ftell (ext->file) - (82 - (long) (ext->bp - ext->buf)));
- e.text = buf;
-
- err_vmsg (&e);
-
- local_free (title);
- }
-
- return 0;
-}
+static void
+error (struct pfm_reader *r, const char *msg,...)
+ PRINTF_FORMAT (2, 3);
-/* Closes a portable file after we're done with it. */
+/* Displays MSG as an error message and aborts reading the
+ portable file via longjmp(). */
static void
-pfm_close (struct file_handle * h)
+error (struct pfm_reader *r, const char *msg, ...)
{
- struct pfm_fhuser_ext *ext = h->ext;
-
- if (EOF == fclose (ext->file))
- msg (ME, _("%s: Closing portable file: %s."), h->fn, strerror (errno));
- free (ext->vars);
- free (ext->trans);
- free (h->ext);
+ struct error e;
+ const char *filename;
+ char *title;
+ va_list args;
+
+ e.class = ME;
+ getl_location (&e.where.filename, &e.where.line_number);
+ filename = fh_get_filename (r->fh);
+ e.title = title = pool_alloc (r->pool, strlen (filename) + 80);
+ sprintf (title, _("portable file %s corrupt at offset %ld: "),
+ filename, ftell (r->file));
+
+ va_start (args, msg);
+ err_vmsg (&e, msg, args);
+ va_end (args);
+
+ longjmp (r->bail_out, 1);
}
-/* Displays the message X with corrupt_msg, then jumps to the lossage
- label. */
-#define lose(X) \
- do \
- { \
- corrupt_msg X; \
- goto lossage; \
- } \
- while (0)
-
-/* Read an 80-character line into handle H's buffer. Return
- success. */
-static int
-fill_buf (struct file_handle *h)
+/* Closes portable file reader R, after we're done with it. */
+void
+pfm_close_reader (struct pfm_reader *r)
{
- struct pfm_fhuser_ext *ext = h->ext;
-
- if (80 != fread (ext->buf, 1, 80, ext->file))
- lose ((h, _("Unexpected end of file.")));
-
- /* PORTME: line ends. */
- {
- int c;
-
- c = getc (ext->file);
- if (c != '\n' && c != '\r')
- lose ((h, _("Bad line end.")));
-
- c = getc (ext->file);
- if (c != '\n' && c != '\r')
- ungetc (c, ext->file);
- }
-
- if (ext->trans)
- {
- int i;
-
- for (i = 0; i < 80; i++)
- ext->buf[i] = ext->trans[ext->buf[i]];
- }
-
- ext->bp = ext->buf;
-
- return 1;
-
- lossage:
- return 0;
+ if (r != NULL)
+ pool_destroy (r->pool);
}
-/* Read a single character into cur_char. Return success; */
-static int
-read_char (struct file_handle *h)
+/* Read a single character into cur_char. */
+static void
+advance (struct pfm_reader *r)
{
- struct pfm_fhuser_ext *ext = h->ext;
+ int c;
- if (ext->bp >= &ext->buf[80] && !fill_buf (h))
- return 0;
- ext->cc = *ext->bp++;
- return 1;
-}
+ while ((c = getc (r->file)) == '\r' || c == '\n')
+ continue;
+ if (c == EOF)
+ error (r, _("unexpected end of file"));
-/* Advance a single character. */
-#define advance() if (!read_char (h)) goto lossage
+ if (r->trans != NULL)
+ c = r->trans[c];
+ r->cc = c;
+}
/* Skip a single character if present, and return whether it was
skipped. */
-static inline int
-skip_char (struct file_handle *h, int c)
+static inline bool
+match (struct pfm_reader *r, int c)
{
- struct pfm_fhuser_ext *ext = h->ext;
-
- if (ext->cc == c)
+ if (r->cc == c)
{
- advance ();
- return 1;
+ advance (r);
+ return true;
}
- lossage:
- return 0;
+ else
+ return false;
}
-/* Skip a single character if present, and return whether it was
- skipped. */
-#define match(C) skip_char (h, C)
-
-static int read_header (struct file_handle *h);
-static int read_version_data (struct file_handle *h, struct pfm_read_info *inf);
-static int read_variables (struct file_handle *h);
-static int read_value_label (struct file_handle *h);
-void dump_dictionary (struct dictionary *dict);
+static void read_header (struct pfm_reader *);
+static void read_version_data (struct pfm_reader *, struct pfm_read_info *);
+static void read_variables (struct pfm_reader *, struct dictionary *);
+static void read_value_label (struct pfm_reader *, struct dictionary *);
+void dump_dictionary (struct dictionary *);
/* Reads the dictionary from file with handle H, and returns it in a
dictionary structure. This dictionary may be modified in order to
rename, reorder, and delete variables, etc. */
-struct dictionary *
-pfm_read_dictionary (struct file_handle *h, struct pfm_read_info *inf)
+struct pfm_reader *
+pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
+ struct pfm_read_info *info)
{
- /* The file handle extension record. */
- struct pfm_fhuser_ext *ext;
-
- /* Check whether the file is already open. */
- if (h->class == &pfm_r_class)
- {
- ext = h->ext;
- return ext->dict;
- }
- else if (h->class != NULL)
- {
- msg (ME, _("Cannot read file %s as portable file: already opened "
- "for %s."),
- fh_handle_name (h), h->class->name);
- return NULL;
- }
-
- msg (VM (1), _("%s: Opening portable-file handle %s for reading."),
- fh_handle_filename (h), fh_handle_name (h));
-
- /* Open the physical disk file. */
- ext = xmalloc (sizeof (struct pfm_fhuser_ext));
- ext->file = fopen (h->norm_fn, "rb");
- if (ext->file == NULL)
+ struct pool *volatile pool = NULL;
+ struct pfm_reader *volatile r = NULL;
+
+ *dict = dict_create ();
+ if (!fh_open (fh, "portable file", "rs"))
+ goto error;
+
+ /* Create and initialize reader. */
+ pool = pool_create ();
+ r = pool_alloc (pool, sizeof *r);
+ r->pool = pool;
+ if (setjmp (r->bail_out))
+ goto error;
+ r->fh = fh;
+ r->file = pool_fopen (r->pool, fh_get_filename (r->fh), "rb");
+ r->weight_index = -1;
+ r->trans = NULL;
+ r->var_cnt = 0;
+ r->widths = NULL;
+ r->value_cnt = 0;
+
+ /* Check that file open succeeded, prime reading. */
+ if (r->file == NULL)
{
msg (ME, _("An error occurred while opening \"%s\" for reading "
- "as a portable file: %s."), h->fn, strerror (errno));
+ "as a portable file: %s."),
+ fh_get_filename (r->fh), strerror (errno));
err_cond_fail ();
- free (ext);
- return NULL;
+ goto error;
}
-
- /* Initialize the sfm_fhuser_ext structure. */
- h->class = &pfm_r_class;
- h->ext = ext;
- ext->dict = NULL;
- ext->trans = NULL;
- if (!fill_buf (h))
- goto lossage;
- advance ();
-
- /* Read the header. */
- if (!read_header (h))
- goto lossage;
- /* Read version, date info, product identification. */
- if (!read_version_data (h, inf))
- goto lossage;
-
- /* Read variables. */
- if (!read_variables (h))
- goto lossage;
+ /* Read header, version, date info, product id, variables. */
+ read_header (r);
+ read_version_data (r, info);
+ read_variables (r, *dict);
- /* Value labels. */
- while (match (77 /* D */))
- if (!read_value_label (h))
- goto lossage;
+ /* Read value labels. */
+ while (match (r, 'D'))
+ read_value_label (r, *dict);
- if (!match (79 /* F */))
- lose ((h, _("Data record expected.")));
+ /* Check that we've made it to the data. */
+ if (!match (r, 'F'))
+ error (r, _("Data record expected."));
- msg (VM (2), _("Read portable-file dictionary successfully."));
+ return r;
-#if DEBUGGING
- dump_dictionary (ext->dict);
-#endif
- return ext->dict;
-
- lossage:
- /* Come here on unsuccessful completion. */
- msg (VM (1), _("Error reading portable-file dictionary."));
-
- fclose (ext->file);
- if (ext && ext->dict)
- free_dictionary (ext->dict);
- free (ext);
- h->class = NULL;
- h->ext = NULL;
+ error:
+ pfm_close_reader (r);
+ dict_destroy (*dict);
+ *dict = NULL;
return NULL;
}
\f
-/* Read a floating point value and return its value, or
- second_lowest_value on error. */
+/* Returns the value of base-30 digit C,
+ or -1 if C is not a base-30 digit. */
+static int
+base_30_value (unsigned char c)
+{
+ static const char base_30_digits[] = "0123456789ABCDEFGHIJKLMNOPQRST";
+ const char *p = strchr (base_30_digits, c);
+ return p != NULL ? p - base_30_digits : -1;
+}
+
+/* Read a floating point value and return its value. */
static double
-read_float (struct file_handle *h)
+read_float (struct pfm_reader *r)
{
- struct pfm_fhuser_ext *ext = h->ext;
double num = 0.;
- int got_dot = 0;
- int got_digit = 0;
int exponent = 0;
- int neg = 0;
+ bool got_dot = false; /* Seen a decimal point? */
+ bool got_digit = false; /* Seen any digits? */
+ bool negative = false; /* Number is negative? */
/* Skip leading spaces. */
- while (match (126 /* space */))
- ;
+ while (match (r, ' '))
+ continue;
- if (match (137 /* * */))
+ /* `*' indicates system-missing. */
+ if (match (r, '*'))
{
- advance (); /* Probably a dot (.) but doesn't appear to matter. */
+ advance (r); /* Probably a dot (.) but doesn't appear to matter. */
return SYSMIS;
}
- else if (match (141 /* - */))
- neg = 1;
+ negative = match (r, '-');
for (;;)
{
- if (ext->cc >= 64 /* 0 */ && ext->cc <= 93 /* T */)
+ int digit = base_30_value (r->cc);
+ if (digit != -1)
{
- got_digit++;
+ got_digit = true;
/* Make sure that multiplication by 30 will not overflow. */
if (num > DBL_MAX * (1. / 30.))
digit so that we can multiply by 10 later. */
++exponent;
else
- num = (num * 30.0) + (ext->cc - 64);
+ num = (num * 30.0) + digit;
/* Keep track of the number of digits after the decimal point.
If we just divided by 30 here, we would lose precision. */
if (got_dot)
--exponent;
}
- else if (!got_dot && ext->cc == 127 /* . */)
+ else if (!got_dot && r->cc == '.')
/* Record that we have found the decimal point. */
got_dot = 1;
else
/* Any other character terminates the number. */
break;
- advance ();
+ advance (r);
}
+ /* Check that we had some digits. */
if (!got_digit)
- lose ((h, "Number expected."));
-
- if (ext->cc == 130 /* + */ || ext->cc == 141 /* - */)
+ error (r, "Number expected.");
+
+ /* Get exponent if any. */
+ if (r->cc == '+' || r->cc == '-')
{
- /* Get the exponent. */
long int exp = 0;
- int neg_exp = ext->cc == 141 /* - */;
+ bool negative_exponent = r->cc == '-';
+ int digit;
- for (;;)
+ for (advance (r); (digit = base_30_value (r->cc)) != -1; advance (r))
{
- advance ();
-
- if (ext->cc < 64 /* 0 */ || ext->cc > 93 /* T */)
- break;
-
if (exp > LONG_MAX / 30)
- goto overflow;
- exp = exp * 30 + (ext->cc - 64);
+ {
+ exp = LONG_MAX;
+ break;
+ }
+ exp = exp * 30 + digit;
}
/* We don't check whether there were actually any digits, but we
probably should. */
- if (neg_exp)
+ if (negative_exponent)
exp = -exp;
exponent += exp;
}
-
- if (!match (142 /* / */))
- lose ((h, _("Missing numeric terminator.")));
- /* Multiply NUM by 30 to the EXPONENT power, checking for overflow. */
+ /* Numbers must end with `/'. */
+ if (!match (r, '/'))
+ error (r, _("Missing numeric terminator."));
+ /* Multiply `num' by 30 to the `exponent' power, checking for
+ overflow. */
if (exponent < 0)
num *= pow (30.0, (double) exponent);
else if (exponent > 0)
{
if (num > DBL_MAX * pow (30.0, (double) -exponent))
- goto overflow;
- num *= pow (30.0, (double) exponent);
+ num = DBL_MAX;
+ else
+ num *= pow (30.0, (double) exponent);
}
- if (neg)
- return -num;
- else
- return num;
-
- overflow:
- if (neg)
- return -DBL_MAX / 10.;
- else
- return DBL_MAX / 10;
-
- lossage:
- return second_lowest_value;
+ return negative ? -num : num;
}
-/* Read an integer and return its value, or NOT_INT on failure. */
-int
-read_int (struct file_handle *h)
+/* Read an integer and return its value. */
+static int
+read_int (struct pfm_reader *r)
{
- double f = read_float (h);
-
- if (f == second_lowest_value)
- goto lossage;
+ double f = read_float (r);
if (floor (f) != f || f >= INT_MAX || f <= INT_MIN)
- lose ((h, _("Bad integer format.")));
+ error (r, _("Invalid integer."));
return f;
-
- lossage:
- return NOT_INT;
}
-/* Reads a string and returns its value in a static buffer, or NULL on
- failure. The buffer can be deallocated by calling with a NULL
- argument. */
-static unsigned char *
-read_string (struct file_handle *h)
+/* Reads a string into BUF, which must have room for 256
+ characters. */
+static void
+read_string (struct pfm_reader *r, char *buf)
{
- struct pfm_fhuser_ext *ext = h->ext;
- static char *buf;
- int n;
+ int n = read_int (r);
+ if (n < 0 || n > 255)
+ error (r, _("Bad string length %d."), n);
- if (h == NULL)
+ while (n-- > 0)
{
- free (buf);
- buf = NULL;
- return NULL;
+ *buf++ = r->cc;
+ advance (r);
}
- else if (buf == NULL)
- buf = xmalloc (256);
-
- n = read_int (h);
- if (n == NOT_INT)
- return NULL;
- if (n < 0 || n > 255)
- lose ((h, _("Bad string length %d."), n));
-
- {
- int i;
-
- for (i = 0; i < n; i++)
- {
- buf[i] = ext->cc;
- advance ();
- }
- }
-
- buf[n] = 0;
- return buf;
+ *buf = '\0';
+}
- lossage:
- return NULL;
+/* Reads a string and returns a copy of it allocated from R's
+ pool. */
+static char *
+read_pool_string (struct pfm_reader *r)
+{
+ char string[256];
+ read_string (r, string);
+ return pool_strdup (r->pool, string);
}
\f
/* Reads the 464-byte file header. */
-int
-read_header (struct file_handle *h)
+static void
+read_header (struct pfm_reader *r)
{
- struct pfm_fhuser_ext *ext = h->ext;
+ /* portable_to_local[PORTABLE] translates the given portable
+ character into the local character set. */
+ static const char portable_to_local[256] =
+ {
+ " "
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."
+ "<(+|&[]!$*);^-/|,%_>?`:$@'=\" ~- 0123456789 -() {}\\ "
+ " "
+ };
- /* For now at least, just ignore the vanity splash strings. */
- {
- int i;
+ char *trans;
+ int i;
- for (i = 0; i < 200; i++)
- advance ();
- }
+ /* Read and ignore vanity splash strings. */
+ for (i = 0; i < 200; i++)
+ advance (r);
- {
- unsigned char src[256];
- int trans_temp[256];
- int i;
-
- for (i = 0; i < 256; i++)
- {
- src[i] = (unsigned char) ext->cc;
- advance ();
- }
+ /* Skip the first 64 characters of the translation table.
+ We don't care about these. They are probably all set to
+ '0', marking them as untranslatable, and that would screw
+ up our actual translation of the real '0'. */
+ for (i = 0; i < 64; i++)
+ advance (r);
+
+ /* Read the rest of the translation table. */
+ trans = pool_malloc (r->pool, 256);
+ memset (trans, 0, 256);
+ for (; i < 256; i++)
+ {
+ unsigned char c;
- for (i = 0; i < 256; i++)
- trans_temp[i] = -1;
-
- /* 0 is used to mark untranslatable characters, so we have to mark
- it specially. */
- trans_temp[src[64]] = 64;
- for (i = 0; i < 256; i++)
- if (trans_temp[src[i]] == -1)
- trans_temp[src[i]] = i;
-
- ext->trans = xmalloc (256);
- for (i = 0; i < 256; i++)
- ext->trans[i] = trans_temp[i] == -1 ? 0 : trans_temp[i];
-
- /* Translate the input buffer. */
- for (i = 0; i < 80; i++)
- ext->buf[i] = ext->trans[ext->buf[i]];
- ext->cc = ext->trans[ext->cc];
- }
-
- {
- unsigned char sig[8] = {92, 89, 92, 92, 89, 88, 91, 93};
- int i;
+ advance (r);
- for (i = 0; i < 8; i++)
- if (!match (sig[i]))
- lose ((h, "Missing SPSSPORT signature."));
- }
+ c = r->cc;
+ if (trans[c] == 0)
+ trans[c] = portable_to_local[i];
+ }
- return 1;
+ /* Set up the translation table, then read the first
+ translated character. */
+ r->trans = trans;
+ advance (r);
- lossage:
- return 0;
+ /* Skip and verify signature. */
+ for (i = 0; i < 8; i++)
+ if (!match (r, "SPSSPORT"[i]))
+ {
+ msg (SE, _("%s: Not a portable file."), fh_get_filename (r->fh));
+ longjmp (r->bail_out, 1);
+ }
}
/* Reads the version and date info record, as well as product and
subproduct identification records if present. */
-int
-read_version_data (struct file_handle *h, struct pfm_read_info *inf)
+static void
+read_version_data (struct pfm_reader *r, struct pfm_read_info *info)
{
- struct pfm_fhuser_ext *ext = h->ext;
-
- /* Version. */
- if (!match (74 /* A */))
- lose ((h, "Unrecognized version code %d.", ext->cc));
-
- /* Date. */
- {
- static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1};
- char *date = read_string (h);
- int i;
-
- if (!date)
- return 0;
- if (strlen (date) != 8)
- lose ((h, _("Bad date string length %d."), strlen (date)));
- for (i = 0; i < 8; i++)
- {
- if (date[i] < 64 /* 0 */ || date[i] > 73 /* 9 */)
- lose ((h, _("Bad character in date.")));
- if (inf)
- inf->creation_date[map[i]] = date[i] - 64 /* 0 */ + '0';
- }
- if (inf)
- {
- inf->creation_date[2] = inf->creation_date[5] = ' ';
- inf->creation_date[10] = 0;
- }
- }
-
- /* Time. */
- {
- static const int map[] = {0, 1, 3, 4, 6, 7};
- char *time = read_string (h);
- int i;
-
- if (!time)
- return 0;
- if (strlen (time) != 6)
- lose ((h, _("Bad time string length %d."), strlen (time)));
- for (i = 0; i < 6; i++)
- {
- if (time[i] < 64 /* 0 */ || time[i] > 73 /* 9 */)
- lose ((h, _("Bad character in time.")));
- if (inf)
- inf->creation_time[map[i]] = time[i] - 64 /* 0 */ + '0';
- }
- if (inf)
- {
- inf->creation_time[2] = inf->creation_time[5] = ' ';
- inf->creation_time[8] = 0;
- }
- }
-
- /* Product. */
- if (match (65 /* 1 */))
- {
- char *product;
-
- product = read_string (h);
- if (product == NULL)
- return 0;
- if (inf)
- strncpy (inf->product, product, 61);
- }
- else if (inf)
- inf->product[0] = 0;
+ static char empty_string[] = "";
+ char *date, *time, *product, *author, *subproduct;
+ int i;
- /* Subproduct. */
- if (match (67 /* 3 */))
+ /* Read file. */
+ if (!match (r, 'A'))
+ error (r, "Unrecognized version code `%c'.", r->cc);
+ date = read_pool_string (r);
+ time = read_pool_string (r);
+ product = match (r, '1') ? read_pool_string (r) : empty_string;
+ author = match (r, '2') ? read_pool_string (r) : empty_string;
+ subproduct = match (r, '3') ? read_pool_string (r) : empty_string;
+
+ /* Validate file. */
+ if (strlen (date) != 8)
+ error (r, _("Bad date string length %d."), strlen (date));
+ if (strlen (time) != 6)
+ error (r, _("Bad time string length %d."), strlen (time));
+
+ /* Save file info. */
+ if (info != NULL)
{
- char *subproduct;
-
- subproduct = read_string (h);
- if (subproduct == NULL)
- return 0;
- if (inf)
- strncpy (inf->subproduct, subproduct, 61);
+ /* Date. */
+ for (i = 0; i < 8; i++)
+ {
+ static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1};
+ info->creation_date[map[i]] = date[i];
+ }
+ info->creation_date[2] = info->creation_date[5] = ' ';
+ info->creation_date[10] = 0;
+
+ /* Time. */
+ for (i = 0; i < 6; i++)
+ {
+ static const int map[] = {0, 1, 3, 4, 6, 7};
+ info->creation_time[map[i]] = time[i];
+ }
+ info->creation_time[2] = info->creation_time[5] = ' ';
+ info->creation_time[8] = 0;
+
+ /* Product. */
+ str_copy_trunc (info->product, sizeof info->product, product);
+ str_copy_trunc (info->subproduct, sizeof info->subproduct, subproduct);
}
- else if (inf)
- inf->subproduct[0] = 0;
- return 1;
-
- lossage:
- return 0;
}
-static int
-convert_format (struct file_handle *h, int fmt[3], struct fmt_spec *v,
- struct variable *vv)
-{
- if (fmt[0] < 0
- || (size_t) fmt[0] >= sizeof translate_fmt / sizeof *translate_fmt)
- lose ((h, _("%s: Bad format specifier byte %d."), vv->name, fmt[0]));
-
- v->type = translate_fmt[fmt[0]];
- v->w = fmt[1];
- v->d = fmt[2];
-
- /* FIXME? Should verify the resulting specifier more thoroughly. */
-
- if (v->type == -1)
- lose ((h, _("%s: Bad format specifier byte (%d)."), vv->name, fmt[0]));
- if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0))
- lose ((h, _("%s variable %s has %s format specifier %s."),
- vv->type == ALPHA ? _("String") : _("Numeric"),
- vv->name,
- formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"),
- formats[v->type].name));
- return 1;
-
- lossage:
- return 0;
-}
-
-/* Translation table from SPSS character code to this computer's
- native character code (which is probably ASCII). */
-static const unsigned char spss2ascii[256] =
- {
- " "
- "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."
- "<(+|&[]!$*);^-/|,%_>?`:$@'=\" ~- 0123456789 -() {}\\ "
- " "
- };
-
-/* Translate string S into ASCII. */
+/* Translates a format specification read from portable file R as
+ the three integers INTS into a normal format specifier FORMAT,
+ checking that the format is appropriate for variable V. */
static void
-asciify (char *s)
+convert_format (struct pfm_reader *r, const int portable_format[3],
+ struct fmt_spec *format, struct variable *v)
{
- for (; *s; s++)
- *s = spss2ascii[(unsigned char) *s];
+ format->type = translate_fmt (portable_format[0]);
+ if (format->type == -1)
+ error (r, _("%s: Bad format specifier byte (%d)."),
+ v->name, portable_format[0]);
+ format->w = portable_format[1];
+ format->d = portable_format[2];
+
+ if (!check_output_specifier (format, false)
+ || !check_specifier_width (format, v->width, false))
+ error (r, _("%s variable %s has invalid format specifier %s."),
+ v->type == NUMERIC ? _("Numeric") : _("String"),
+ v->name, fmt_to_string (format));
}
-static int parse_value (struct file_handle *, union value *, struct variable *);
+static union value parse_value (struct pfm_reader *, struct variable *);
/* Read information on all the variables. */
-static int
-read_variables (struct file_handle *h)
+static void
+read_variables (struct pfm_reader *r, struct dictionary *dict)
{
- struct pfm_fhuser_ext *ext = h->ext;
+ char *weight_name = NULL;
int i;
- if (!match (68 /* 4 */))
- lose ((h, _("Expected variable count record.")));
+ if (!match (r, '4'))
+ error (r, _("Expected variable count record."));
- ext->nvars = read_int (h);
- if (ext->nvars <= 0 || ext->nvars == NOT_INT)
- lose ((h, _("Invalid number of variables %d."), ext->nvars));
- ext->vars = xmalloc (sizeof *ext->vars * ext->nvars);
+ r->var_cnt = read_int (r);
+ if (r->var_cnt <= 0 || r->var_cnt == NOT_INT)
+ error (r, _("Invalid number of variables %d."), r->var_cnt);
+ r->widths = pool_nalloc (r->pool, r->var_cnt, sizeof *r->widths);
/* Purpose of this value is unknown. It is typically 161. */
- {
- int x = read_int (h);
+ read_int (r);
- if (x == NOT_INT)
- goto lossage;
- if (x != 161)
- corrupt_msg (h, _("Unexpected flag value %d."), x);
- }
-
- ext->dict = new_dictionary (0);
-
- if (match (70 /* 6 */))
+ if (match (r, '6'))
{
- char *name = read_string (h);
- if (!name)
- goto lossage;
-
- strcpy (ext->dict->weight_var, name);
- asciify (ext->dict->weight_var);
+ weight_name = read_pool_string (r);
+ if (strlen (weight_name) > SHORT_NAME_LEN)
+ error (r, _("Weight variable name (%s) truncated."), weight_name);
}
- for (i = 0; i < ext->nvars; i++)
+ for (i = 0; i < r->var_cnt; i++)
{
int width;
- unsigned char *name;
+ char name[256];
int fmt[6];
struct variable *v;
int j;
- if (!match (71 /* 7 */))
- lose ((h, _("Expected variable record.")));
+ if (!match (r, '7'))
+ error (r, _("Expected variable record."));
- width = read_int (h);
- if (width == NOT_INT)
- goto lossage;
+ width = read_int (r);
if (width < 0)
- lose ((h, _("Invalid variable width %d."), width));
- ext->vars[i] = width;
-
- name = read_string (h);
- if (name == NULL)
- goto lossage;
- for (j = 0; j < 6; j++)
- {
- fmt[j] = read_int (h);
- if (fmt[j] == NOT_INT)
- goto lossage;
- }
+ error (r, _("Invalid variable width %d."), width);
+ r->widths[i] = width;
- /* Verify first character of variable name.
-
- Weirdly enough, there is no # character in the SPSS portable
- character set, so we can't check for it. */
- if (strlen (name) > 8)
- lose ((h, _("position %d: Variable name has %u characters."),
- i, strlen (name)));
- if ((name[0] < 74 /* A */ || name[0] > 125 /* Z */)
- && name[0] != 152 /* @ */)
- lose ((h, _("position %d: Variable name begins with invalid "
- "character."), i));
- if (name[0] >= 100 /* a */ && name[0] <= 125 /* z */)
- {
- corrupt_msg (h, _("position %d: Variable name begins with "
- "lowercase letter %c."),
- i, name[0] - 100 + 'a');
- name[0] -= 26 /* a - A */;
- }
+ read_string (r, name);
+ for (j = 0; j < 6; j++)
+ fmt[j] = read_int (r);
- /* Verify remaining characters of variable name. */
- for (j = 1; j < (int) strlen (name); j++)
- {
- int c = name[j];
-
- if (c >= 100 /* a */ && c <= 125 /* z */)
- {
- corrupt_msg (h, _("position %d: Variable name character %d "
- "is lowercase letter %c."),
- i, j + 1, c - 100 + 'a');
- name[j] -= 26 /* z - Z */;
- }
- else if ((c >= 64 /* 0 */ && c <= 99 /* Z */)
- || c == 127 /* . */ || c == 152 /* @ */
- || c == 136 /* $ */ || c == 146 /* _ */)
- name[j] = c;
- else
- lose ((h, _("position %d: character `\\%03o' is not "
- "valid in a variable name."), i, c));
- }
+ if (!var_is_valid_name (name, false) || *name == '#' || *name == '$')
+ error (r, _("position %d: Invalid variable name `%s'."), i, name);
+ str_uppercase (name);
- asciify (name);
if (width < 0 || width > 255)
- lose ((h, "Bad width %d for variable %s.", width, name));
+ error (r, "Bad width %d for variable %s.", width, name);
- v = create_variable (ext->dict, name, width ? ALPHA : NUMERIC, width);
- v->get.fv = v->fv;
+ v = dict_create_var (dict, name, width);
if (v == NULL)
- lose ((h, _("Duplicate variable name %s."), name));
- if (!convert_format (h, &fmt[0], &v->print, v))
- goto lossage;
- if (!convert_format (h, &fmt[3], &v->write, v))
- goto lossage;
+ error (r, _("Duplicate variable name %s."), name);
+
+ convert_format (r, &fmt[0], &v->print, v);
+ convert_format (r, &fmt[3], &v->write, v);
/* Range missing values. */
- if (match (75 /* B */))
- {
- v->miss_type = MISSING_RANGE;
- if (!parse_value (h, &v->missing[0], v)
- || !parse_value (h, &v->missing[1], v))
- goto lossage;
- }
- else if (match (74 /* A */))
- {
- v->miss_type = MISSING_HIGH;
- if (!parse_value (h, &v->missing[0], v))
- goto lossage;
- }
- else if (match (73 /* 9 */))
- {
- v->miss_type = MISSING_LOW;
- if (!parse_value (h, &v->missing[0], v))
- goto lossage;
- }
+ if (match (r, 'B'))
+ {
+ double x = read_float (r);
+ double y = read_float (r);
+ mv_add_num_range (&v->miss, x, y);
+ }
+ else if (match (r, 'A'))
+ mv_add_num_range (&v->miss, read_float (r), HIGHEST);
+ else if (match (r, '9'))
+ mv_add_num_range (&v->miss, LOWEST, read_float (r));
/* Single missing values. */
- while (match (72 /* 8 */))
- {
- static const int map_next[MISSING_COUNT] =
- {
- MISSING_1, MISSING_2, MISSING_3, -1,
- MISSING_RANGE_1, MISSING_LOW_1, MISSING_HIGH_1,
- -1, -1, -1,
- };
-
- static const int map_ofs[MISSING_COUNT] =
- {
- -1, 0, 1, 2, -1, -1, -1, 2, 1, 1,
- };
-
- v->miss_type = map_next[v->miss_type];
- if (v->miss_type == -1)
- lose ((h, _("Bad missing values for %s."), v->name));
-
- assert (map_ofs[v->miss_type] != -1);
- if (!parse_value (h, &v->missing[map_ofs[v->miss_type]], v))
- goto lossage;
- }
-
- if (match (76 /* C */))
- {
- char *label = read_string (h);
-
- if (label == NULL)
- goto lossage;
-
- v->label = xstrdup (label);
- asciify (v->label);
- }
+ while (match (r, '8'))
+ {
+ union value value = parse_value (r, v);
+ mv_add_value (&v->miss, &value);
+ }
+
+ if (match (r, 'C'))
+ {
+ char label[256];
+ read_string (r, label);
+ v->label = xstrdup (label);
+ }
}
- ext->case_size = ext->dict->nval;
- if (ext->dict->weight_var[0] != 0
- && !find_dict_variable (ext->dict, ext->dict->weight_var))
- lose ((h, _("Weighting variable %s not present in dictionary."),
- ext->dict->weight_var));
-
- return 1;
+ if (weight_name != NULL)
+ {
+ struct variable *weight_var = dict_lookup_var (dict, weight_name);
+ if (weight_var == NULL)
+ error (r, _("Weighting variable %s not present in dictionary."),
+ weight_name);
- lossage:
- return 0;
+ dict_set_weight (dict, weight_var);
+ }
}
-/* Parse a value for variable VV into value V. Returns success. */
-static int
-parse_value (struct file_handle *h, union value *v, struct variable *vv)
+/* Parse a value for variable VV into value V. */
+static union value
+parse_value (struct pfm_reader *r, struct variable *vv)
{
- if (vv->type == ALPHA)
+ union value v;
+
+ if (vv->type == ALPHA)
{
- char *mv = read_string (h);
- int j;
-
- if (mv == NULL)
- return 0;
-
- strncpy (v->s, mv, 8);
- for (j = 0; j < 8; j++)
- if (v->s[j])
- v->s[j] = spss2ascii[v->s[j]];
- else
- /* Value labels are always padded with spaces. */
- v->s[j] = ' ';
+ char string[256];
+ read_string (r, string);
+ buf_copy_str_rpad (v.s, 8, string);
}
else
- {
- v->f = read_float (h);
- if (v->f == second_lowest_value)
- return 0;
- }
+ v.f = read_float (r);
- return 1;
+ return v;
}
/* Parse a value label record and return success. */
-static int
-read_value_label (struct file_handle *h)
+static void
+read_value_label (struct pfm_reader *r, struct dictionary *dict)
{
- struct pfm_fhuser_ext *ext = h->ext;
-
/* Variables. */
int nv;
struct variable **v;
int i;
- nv = read_int (h);
- if (nv == NOT_INT)
- return 0;
-
- v = xmalloc (sizeof *v * nv);
+ nv = read_int (r);
+ v = pool_nalloc (r->pool, nv, sizeof *v);
for (i = 0; i < nv; i++)
{
- char *name = read_string (h);
- if (name == NULL)
- goto lossage;
- asciify (name);
+ char name[256];
+ read_string (r, name);
- v[i] = find_dict_variable (ext->dict, name);
+ v[i] = dict_lookup_var (dict, name);
if (v[i] == NULL)
- lose ((h, _("Unknown variable %s while parsing value labels."), name));
+ error (r, _("Unknown variable %s while parsing value labels."), name);
if (v[0]->width != v[i]->width)
- lose ((h, _("Cannot assign value labels to %s and %s, which "
+ error (r, _("Cannot assign value labels to %s and %s, which "
"have different variable types or widths."),
- v[0]->name, v[i]->name));
+ v[0]->name, v[i]->name);
}
- n_labels = read_int (h);
- if (n_labels == NOT_INT)
- goto lossage;
-
+ n_labels = read_int (r);
for (i = 0; i < n_labels; i++)
{
union value val;
- char *label;
- struct value_label *vl;
-
+ char label[256];
int j;
-
- if (!parse_value (h, &val, v[0]))
- goto lossage;
-
- label = read_string (h);
- if (label == NULL)
- goto lossage;
- asciify (label);
- /* Create a label. */
- vl = xmalloc (sizeof *vl);
- vl->v = val;
- vl->s = xstrdup (label);
- vl->ref_count = nv;
+ val = parse_value (r, v[0]);
+ read_string (r, label);
/* Assign the value_label's to each variable. */
for (j = 0; j < nv; j++)
{
struct variable *var = v[j];
- struct value_label *old;
-
- /* Create AVL tree if necessary. */
- if (!var->val_lab)
- var->val_lab = avl_create (NULL, val_lab_cmp,
- (void *) (var->width));
- old = avl_replace (var->val_lab, vl);
- if (old == NULL)
+ if (!val_labs_replace (var->val_labs, val, label))
continue;
if (var->type == NUMERIC)
- lose ((h, _("Duplicate label for value %g for variable %s."),
- vl->v.f, var->name));
+ error (r, _("Duplicate label for value %g for variable %s."),
+ val.f, var->name);
else
- lose ((h, _("Duplicate label for value `%.*s' for variable %s."),
- var->width, vl->v.s, var->name));
-
- free_value_label (old);
+ error (r, _("Duplicate label for value `%.*s' for variable %s."),
+ var->width, val.s, var->name);
}
}
- free (v);
- return 1;
-
- lossage:
- free (v);
- return 0;
}
-/* Reads one case from portable file H into the value array PERM
- according to the instuctions given in associated dictionary DICT,
- which must have the get.fv elements appropriately set. Returns
- nonzero only if successful. */
-int
-pfm_read_case (struct file_handle *h, union value *perm, struct dictionary *dict)
+/* Reads one case from portable file R into C. */
+bool
+pfm_read_case (struct pfm_reader *r, struct ccase *c)
{
- struct pfm_fhuser_ext *ext = h->ext;
-
- union value *temp, *tp;
- int i;
+ size_t i;
+ size_t idx;
- /* Check for end of file. */
- if (ext->cc == 99 /* Z */)
- return 0;
+ if (setjmp (r->bail_out))
+ return false;
- /* The first concern is to obtain a full case relative to the data
- file. (Cases in the data file have no particular relationship to
- cases in the active file.) */
- tp = temp = local_alloc (sizeof *tp * ext->case_size);
- for (tp = temp, i = 0; i < ext->nvars; i++)
- if (ext->vars[i] == 0)
- {
- tp->f = read_float (h);
- if (tp->f == second_lowest_value)
- goto unexpected_eof;
- tp++;
- }
- else
- {
- char *s = read_string (h);
- if (s == NULL)
- goto unexpected_eof;
- asciify (s);
-
- st_bare_pad_copy (tp->s, s, ext->vars[i]);
- tp += DIV_RND_UP (ext->vars[i], MAX_SHORT_STRING);
- }
+ /* Check for end of file. */
+ if (r->cc == 'Z')
+ return false;
- /* Translate a case in data file format to a case in active file
- format. */
- for (i = 0; i < dict->nvar; i++)
+ idx = 0;
+ for (i = 0; i < r->var_cnt; i++)
{
- struct variable *v = dict->var[i];
-
- if (v->get.fv == -1)
- continue;
+ int width = r->widths[i];
- if (v->type == NUMERIC)
- perm[v->fv].f = temp[v->get.fv].f;
+ if (width == 0)
+ {
+ case_data_rw (c, idx)->f = read_float (r);
+ idx++;
+ }
else
- memcpy (&perm[v->fv].s, &temp[v->get.fv], v->width);
+ {
+ char string[256];
+ read_string (r, string);
+ buf_copy_str_rpad (case_data_rw (c, idx)->s, width, string);
+ idx += DIV_RND_UP (width, MAX_SHORT_STRING);
+ }
}
-
- local_free (temp);
- return 1;
-
- unexpected_eof:
- lose ((h, _("End of file midway through case.")));
-
- lossage:
- local_free (temp);
- return 0;
+
+ return true;
}
-
-static struct fh_ext_class pfm_r_class =
-{
- 5,
- N_("reading as a portable file"),
- pfm_close,
-};