You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
#include <config.h>
-#include "sfm.h"
+#include "sfm-read.h"
#include "sfmP.h"
-#include <assert.h>
+#include "error.h"
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
#include <float.h>
+#include <setjmp.h>
#include "alloc.h"
+#include "case.h"
+#include "dictionary.h"
#include "error.h"
#include "file-handle.h"
#include "filename.h"
#include "debug-print.h"
-/* PORTME: This file may require substantial revision for those
- systems that don't meet the typical 32-bit integer/64-bit double
- model. It's kinda hard to tell without having one of them on my
- desk. */
-
-/* sfm's file_handle extension. */
-struct sfm_fhuser_ext
+/* System file reader. */
+struct sfm_reader
{
- FILE *file; /* Actual file. */
- int opened; /* Reference count. */
-
- struct dictionary *dict; /* File's dictionary. */
+ struct file_handle *fh; /* File handle. */
+ FILE *file; /* File stream. */
int reverse_endian; /* 1=file has endianness opposite us. */
- int case_size; /* Number of `values's per case. */
- long ncases; /* Number of cases, -1 if unknown. */
+ int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
+ int value_cnt; /* Number of `union values's per case. */
+ long case_cnt; /* Number of cases, -1 if unknown. */
int compressed; /* 1=compressed, 0=not compressed. */
double bias; /* Compression bias, usually 100.0. */
- int weight_index; /* 0-based index of weighting variable, or -1. */
+ int weight_idx; /* 0-based index of weighting variable, or -1. */
+
+ /* Variables. */
+ struct sfm_var *vars; /* Variables. */
/* File's special constants. */
flt64 sysmis;
flt64 highest;
flt64 lowest;
- /* Uncompression buffer. */
+ /* Decompression buffer. */
flt64 *buf; /* Buffer data. */
flt64 *ptr; /* Current location in buffer. */
flt64 *end; /* End of buffer data. */
/* Compression instruction octet. */
- unsigned char x[sizeof (flt64)];
- /* Current instruction octet. */
+ unsigned char x[8]; /* Current instruction octet. */
unsigned char *y; /* Location in current instruction octet. */
};
-static struct fh_ext_class sfm_r_class;
-
-#if GLOBAL_DEBUGGING
-void dump_dictionary (struct dictionary * dict);
-#endif
+/* A variable in a system file. */
+struct sfm_var
+ {
+ int width; /* 0=numeric, otherwise string width. */
+ int fv; /* Index into case. */
+ };
\f
/* Utilities. */
-/* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
+/* Swap bytes *A and *B. */
static inline void
-bswap_int32 (int32 *x)
+bswap (unsigned char *a, unsigned char *b)
{
- unsigned char *y = (unsigned char *) x;
- unsigned char t;
-
- t = y[0];
- y[0] = y[3];
- y[3] = t;
+ unsigned char t = *a;
+ *a = *b;
+ *b = t;
+}
- t = y[1];
- y[1] = y[2];
- y[2] = t;
+/* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
+static inline void
+bswap_int32 (int32 *x_)
+{
+ unsigned char *x = (unsigned char *) x_;
+ bswap (x + 0, x + 3);
+ bswap (x + 1, x + 2);
}
/* Reverse the byte order of 64-bit floating point *X. */
static inline void
-bswap_flt64 (flt64 *x)
+bswap_flt64 (flt64 *x_)
{
- unsigned char *y = (unsigned char *) x;
- unsigned char t;
-
- t = y[0];
- y[0] = y[7];
- y[7] = t;
-
- t = y[1];
- y[1] = y[6];
- y[6] = t;
-
- t = y[2];
- y[2] = y[5];
- y[5] = t;
-
- t = y[3];
- y[3] = y[4];
- y[4] = t;
+ unsigned char *x = (unsigned char *) x_;
+ bswap (x + 0, x + 7);
+ bswap (x + 1, x + 6);
+ bswap (x + 2, x + 5);
+ bswap (x + 3, x + 4);
}
static void
static void
corrupt_msg (int class, const char *format,...)
{
- char buf[1024];
-
- {
- va_list args;
-
- va_start (args, format);
- vsnprintf (buf, 1024, format, args);
- va_end (args);
- }
-
- {
- struct error e;
+ struct error e;
+ va_list args;
- e.class = class;
- getl_location (&e.where.filename, &e.where.line_number);
- e.title = _("corrupt system file: ");
- e.text = buf;
+ e.class = class;
+ getl_location (&e.where.filename, &e.where.line_number);
+ e.title = _("corrupt system file: ");
- err_vmsg (&e);
- }
+ va_start (args, format);
+ err_vmsg (&e, format, args);
+ va_end (args);
}
/* Closes a system file after we're done with it. */
-static void
-sfm_close (struct file_handle * h)
-{
- struct sfm_fhuser_ext *ext = h->ext;
-
- ext->opened--;
- assert (ext->opened == 0);
- if (EOF == fn_close (h->fn, ext->file))
- msg (ME, _("%s: Closing system file: %s."), h->fn, strerror (errno));
- free (ext->buf);
- free (h->ext);
-}
-
-/* Closes a system file if we're done with it. */
void
-sfm_maybe_close (struct file_handle *h)
+sfm_close_reader (struct sfm_reader *r)
{
- struct sfm_fhuser_ext *ext = h->ext;
+ if (r == NULL)
+ return;
- if (ext->opened == 1)
- fh_close_handle (h);
- else
- ext->opened--;
+ if (r->fh != NULL)
+ fh_close (r->fh, "system file", "rs");
+
+ if ( r->file ) {
+ if (fn_close (handle_get_filename (r->fh), r->file) == EOF)
+ msg (ME, _("%s: Closing system file: %s."),
+ handle_get_filename (r->fh), strerror (errno));
+ r->file = NULL;
+ }
+ free (r->vars);
+ free (r->buf);
+ free (r);
}
\f
/* Dictionary reader. */
-static void *bufread (struct file_handle * handle, void *buf, size_t nbytes,
- size_t minalloc);
+static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
+
+static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
+ size_t min_alloc);
-static int read_header (struct file_handle * h, struct sfm_read_info * inf);
-static int parse_format_spec (struct file_handle * h, int32 s,
- struct fmt_spec * v, struct variable *vv);
-static int read_value_labels (struct file_handle * h, struct variable ** var_by_index);
-static int read_variables (struct file_handle * h, struct variable *** var_by_index);
-static int read_machine_int32_info (struct file_handle * h, int size, int count);
-static int read_machine_flt64_info (struct file_handle * h, int size, int count);
-static int read_documents (struct file_handle * h);
+static int read_header (struct sfm_reader *,
+ struct dictionary *, struct sfm_read_info *);
+static int parse_format_spec (struct sfm_reader *, int32,
+ struct fmt_spec *, struct variable *);
+static int read_value_labels (struct sfm_reader *, struct dictionary *,
+ struct variable **var_by_idx);
+static int read_variables (struct sfm_reader *,
+ struct dictionary *, struct variable ***var_by_idx);
+static int read_machine_int32_info (struct sfm_reader *, int size, int count);
+static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
+static int read_documents (struct sfm_reader *, struct dictionary *);
-/* Displays the message X with corrupt_msg, then jumps to the lossage
+static int fread_ok (struct sfm_reader *, void *, size_t);
+
+/* Displays the message X with corrupt_msg, then jumps to the error
label. */
-#define lose(X) \
- do \
- { \
- corrupt_msg X; \
- goto lossage; \
- } \
- while (0)
-
-/* Calls bufread with the specified arguments, and jumps to lossage if
- the read fails. */
-#define assertive_bufread(a,b,c,d) \
- do \
- { \
- if (!bufread (a,b,c,d)) \
- goto lossage; \
- } \
- while (0)
-
-/* Reads the dictionary from file with handle H, and returns it in a
- dictionary structure. This dictionary may be modified in order to
- rename, reorder, and delete variables, etc. */
-struct dictionary *
-sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf)
+#define lose(X) \
+ do { \
+ corrupt_msg X; \
+ goto error; \
+ } while (0)
+
+/* Calls buf_read with the specified arguments, and jumps to
+ error if the read fails. */
+#define assertive_buf_read(a,b,c,d) \
+ do { \
+ if (!buf_read (a,b,c,d)) \
+ goto error; \
+ } while (0)
+
+/* Opens the system file designated by file handle FH for
+ reading. Reads the system file's dictionary into *DICT.
+ If INFO is non-null, then it receives additional info about the
+ system file. */
+struct sfm_reader *
+sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
+ struct sfm_read_info *info)
{
- /* The file handle extension record. */
- struct sfm_fhuser_ext *ext;
+ struct sfm_reader *r = NULL;
+ struct variable **var_by_idx = NULL;
- /* Allows for quick reference to variables according to indexes
- relative to position within a case. */
- struct variable **var_by_index = NULL;
+ *dict = dict_create ();
+ if (!fh_open (fh, "system file", "rs"))
+ goto error;
- /* Check whether the file is already open. */
- if (h->class == &sfm_r_class)
- {
- ext = h->ext;
- ext->opened++;
- return ext->dict;
- }
- else if (h->class != NULL)
- {
- msg (ME, _("Cannot read file %s as system file: already opened for %s."),
- fh_handle_name (h), h->class->name);
- return NULL;
- }
+ /* Create and initialize reader. */
+ r = xmalloc (sizeof *r);
+ r->fh = fh;
+ r->file = fn_open (handle_get_filename (fh), "rb");
- msg (VM (1), _("%s: Opening system-file handle %s for reading."),
- fh_handle_filename (h), fh_handle_name (h));
-
- /* Open the physical disk file. */
- ext = xmalloc (sizeof (struct sfm_fhuser_ext));
- ext->file = fn_open (h->norm_fn, "rb");
- if (ext->file == NULL)
+ r->reverse_endian = 0;
+ r->fix_specials = 0;
+ r->value_cnt = 0;
+ r->case_cnt = 0;
+ r->compressed = 0;
+ r->bias = 100.0;
+ r->weight_idx = -1;
+
+ r->vars = NULL;
+
+ r->sysmis = -FLT64_MAX;
+ r->highest = FLT64_MAX;
+ r->lowest = second_lowest_flt64;
+
+ r->buf = r->ptr = r->end = NULL;
+ r->y = r->x + sizeof r->x;
+
+ /* Check that file open succeeded. */
+ if (r->file == NULL)
{
msg (ME, _("An error occurred while opening \"%s\" for reading "
- "as a system file: %s."), h->fn, strerror (errno));
+ "as a system file: %s."),
+ handle_get_filename (r->fh), strerror (errno));
err_cond_fail ();
- free (ext);
- return NULL;
+ goto error;
}
- /* Initialize the sfm_fhuser_ext structure. */
- h->class = &sfm_r_class;
- h->ext = ext;
- ext->dict = NULL;
- ext->buf = ext->ptr = ext->end = NULL;
- ext->y = ext->x + sizeof ext->x;
- ext->opened = 1;
-
- /* Default special constants. */
- ext->sysmis = -FLT64_MAX;
- ext->highest = FLT64_MAX;
- ext->lowest = second_lowest_flt64;
-
- /* Read the header. */
- if (!read_header (h, inf))
- goto lossage;
+ /* Read header and variables. */
+ if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
+ goto error;
- /* Read about the variables. */
- if (!read_variables (h, &var_by_index))
- goto lossage;
/* Handle weighting. */
- if (ext->weight_index != -1)
+ if (r->weight_idx != -1)
{
- struct variable *wv = var_by_index[ext->weight_index];
+ struct variable *weight_var;
+
+ if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
+ lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
+ "and number of elements per case (%d)."),
+ handle_get_filename (r->fh), r->weight_idx, r->value_cnt));
+
+
+ weight_var = var_by_idx[r->weight_idx];
- if (wv == NULL)
- lose ((ME, _("%s: Weighting variable may not be a continuation of "
- "a long string variable."), h->fn));
- else if (wv->type == ALPHA)
+ if (weight_var == NULL)
+ lose ((ME,
+ _("%s: Weighting variable may not be a continuation of "
+ "a long string variable."), handle_get_filename (fh)));
+ else if (weight_var->type == ALPHA)
lose ((ME, _("%s: Weighting variable may not be a string variable."),
- h->fn));
+ handle_get_filename (fh)));
- dict_set_weight (ext->dict, wv);
+ dict_set_weight (*dict, weight_var);
}
else
- dict_set_weight (ext->dict, NULL);
+ dict_set_weight (*dict, NULL);
/* Read records of types 3, 4, 6, and 7. */
for (;;)
{
int32 rec_type;
- assertive_bufread (h, &rec_type, sizeof rec_type, 0);
- if (ext->reverse_endian)
+ assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
+ if (r->reverse_endian)
bswap_int32 (&rec_type);
switch (rec_type)
{
case 3:
- if (!read_value_labels (h, var_by_index))
- goto lossage;
+ if (!read_value_labels (r, *dict, var_by_idx))
+ goto error;
break;
case 4:
lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
- "records must always immediately follow type 3 records."),
- h->fn));
+ "records must always immediately follow type 3 "
+ "records."),
+ handle_get_filename (r->fh)));
case 6:
- if (!read_documents (h))
- goto lossage;
+ if (!read_documents (r, *dict))
+ goto error;
break;
case 7:
int32 count P;
}
data;
+ unsigned long bytes;
int skip = 0;
- assertive_bufread (h, &data, sizeof data, 0);
- if (ext->reverse_endian)
+ assertive_buf_read (r, &data, sizeof data, 0);
+ if (r->reverse_endian)
{
bswap_int32 (&data.subtype);
bswap_int32 (&data.size);
bswap_int32 (&data.count);
}
-
- /*if(data.size != sizeof(int32) && data.size != sizeof(flt64))
- lose((ME, "%s: Element size in record type 7, subtype %d, is "
- "not either the size of IN (%d) or OBS (%d); actual value "
- "is %d.",
- h->fn, data.subtype, sizeof(int32), sizeof(flt64),
- data.size)); */
+ bytes = data.size * data.count;
+ if (bytes < data.size || bytes < data.count)
+ lose ((ME, "%s: Record type %d subtype %d too large.",
+ handle_get_filename (r->fh), rec_type, data.subtype));
switch (data.subtype)
{
case 3:
- if (!read_machine_int32_info (h, data.size, data.count))
- goto lossage;
+ if (!read_machine_int32_info (r, data.size, data.count))
+ goto error;
break;
case 4:
- if (!read_machine_flt64_info (h, data.size, data.count))
- goto lossage;
+ if (!read_machine_flt64_info (r, data.size, data.count))
+ goto error;
break;
case 5:
- case 6:
- case 11: /* ?? Used by SPSS 8.0. */
+ case 6: /* ?? Used by SPSS 8.0. */
skip = 1;
break;
+
+ case 11: /* Variable display parameters */
+ {
+ const int n_vars = data.count / 3 ;
+ int i;
+ if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) )
+ {
+ msg (MW, _("%s: Invalid subrecord length. "
+ "Record: 7; Subrecord: 11"),
+ handle_get_filename (r->fh));
+ skip = 1;
+ }
+
+ for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
+ {
+ struct
+ {
+ int32 measure P;
+ int32 width P;
+ int32 align P;
+ }
+ params;
+
+ struct variable *v;
+
+ assertive_buf_read (r, ¶ms, sizeof(params), 0);
+
+ v = dict_get_var(*dict, i);
+
+ v->measure = params.measure;
+ v->display_width = params.width;
+ v->alignment = params.align;
+ }
+ }
+ break;
+
+ case 13: /* SPSS 12.0 Long variable name map */
+ {
+ char *buf, *short_name, *save_ptr;
+ int idx;
+
+ /* Read data. */
+ buf = xmalloc (bytes + 1);
+ if (!buf_read (r, buf, bytes, 0))
+ {
+ free (buf);
+ goto error;
+ }
+ buf[bytes] = '\0';
+
+ /* Parse data. */
+ for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
+ short_name != NULL;
+ short_name = strtok_r (NULL, "=", &save_ptr), idx++)
+ {
+ char *long_name = strtok_r (NULL, "\t", &save_ptr);
+ struct variable *v;
+
+ /* Validate long name. */
+ if (long_name == NULL)
+ {
+ msg (MW, _("%s: Trailing garbage in long variable "
+ "name map."),
+ handle_get_filename (r->fh));
+ break;
+ }
+ if (!var_is_valid_name (long_name, false))
+ {
+ msg (MW, _("%s: Long variable mapping to invalid "
+ "variable name `%s'."),
+ handle_get_filename (r->fh), long_name);
+ break;
+ }
+
+ /* Find variable using short name. */
+ v = dict_lookup_var (*dict, short_name);
+ if (v == NULL)
+ {
+ msg (MW, _("%s: Long variable mapping for "
+ "nonexistent variable %s."),
+ handle_get_filename (r->fh), short_name);
+ break;
+ }
+
+ /* Identify any duplicates. */
+ if ( compare_var_names(short_name, long_name, 0) &&
+ NULL != dict_lookup_var (*dict, long_name))
+ {
+ lose ((ME, _("%s: Duplicate long variable name `%s' "
+ "within system file."),
+ handle_get_filename (r->fh), long_name));
+ break;
+ }
+
+ /* Set long name.
+ Renaming a variable may clear the short
+ name, but we want to retain it, so
+ re-set it explicitly. */
+ dict_rename_var (*dict, v, long_name);
+ var_set_short_name (v, short_name);
+
+ /* For compatability, make sure dictionary
+ is in long variable name map order. In
+ the common case, this has no effect,
+ because the dictionary and the long
+ variable name map are already in the
+ same order. */
+ dict_reorder_var (*dict, v, idx);
+ }
+
+ /* Free data. */
+ free (buf);
+ }
+ break;
default:
msg (MW, _("%s: Unrecognized record type 7, subtype %d "
- "encountered in system file."), h->fn, data.subtype);
+ "encountered in system file."),
+ handle_get_filename (r->fh), data.subtype);
skip = 1;
}
if (skip)
{
- void *x = bufread (h, NULL, data.size * data.count, 0);
+ void *x = buf_read (r, NULL, data.size * data.count, 0);
if (x == NULL)
- goto lossage;
+ goto error;
free (x);
}
}
{
int32 filler;
- assertive_bufread (h, &filler, sizeof filler, 0);
- goto break_out_of_loop;
+ assertive_buf_read (r, &filler, sizeof filler, 0);
+ goto success;
}
default:
- lose ((ME, _("%s: Unrecognized record type %d."), h->fn, rec_type));
+ corrupt_msg(MW, _("%s: Unrecognized record type %d."),
+ handle_get_filename (r->fh), rec_type);
}
}
-break_out_of_loop:
+success:
/* Come here on successful completion. */
- msg (VM (2), _("Read system-file dictionary successfully."));
-
-#if DEBUGGING
- dump_dictionary (ext->dict);
-#endif
- free (var_by_index);
- return ext->dict;
+ free (var_by_idx);
+ return r;
-lossage:
+error:
/* Come here on unsuccessful completion. */
- msg (VM (1), _("Error reading system-file header."));
-
- free (var_by_index);
- fn_close (h->fn, ext->file);
- if (ext && ext->dict)
- dict_destroy (ext->dict);
- free (ext);
- h->class = NULL;
- h->ext = NULL;
+ sfm_close_reader (r);
+ free (var_by_idx);
+ if (*dict != NULL)
+ {
+ dict_destroy (*dict);
+ *dict = NULL;
+ }
return NULL;
}
/* Read record type 7, subtype 3. */
static int
-read_machine_int32_info (struct file_handle * h, int size, int count)
+read_machine_int32_info (struct sfm_reader *r, int size, int count)
{
- struct sfm_fhuser_ext *ext = h->ext;
-
int32 data[8];
int file_bigendian;
if (size != sizeof (int32) || count != 8)
lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
- "subtype 3. Expected size %d, count 8."),
- h->fn, size, count, sizeof (int32)));
+ "subtype 3. Expected size %d, count 8."),
+ handle_get_filename (r->fh), size, count, sizeof (int32)));
- assertive_bufread (h, data, sizeof data, 0);
- if (ext->reverse_endian)
+ assertive_buf_read (r, data, sizeof data, 0);
+ if (r->reverse_endian)
for (i = 0; i < 8; i++)
bswap_int32 (&data[i]);
- /* PORTME: Check floating-point representation. */
#ifdef FPREP_IEEE754
if (data[4] != 1)
lose ((ME, _("%s: Floating-point representation in system file is not "
"IEEE-754. PSPP cannot convert between floating-point "
- "formats."), h->fn));
+ "formats."),
+ handle_get_filename (r->fh)));
+#else
+#error Add support for your floating-point format.
#endif
- /* PORTME: Check recorded file endianness against intuited file
- endianness. */
#ifdef WORDS_BIGENDIAN
file_bigendian = 1;
#else
file_bigendian = 0;
#endif
- if (ext->reverse_endian)
+ if (r->reverse_endian)
file_bigendian ^= 1;
if (file_bigendian ^ (data[6] == 1))
- lose ((ME, _("%s: File-indicated endianness (%s) does not match endianness "
- "intuited from file header (%s)."),
- h->fn, file_bigendian ? _("big-endian") : _("little-endian"),
+ lose ((ME, _("%s: File-indicated endianness (%s) does not match "
+ "endianness intuited from file header (%s)."),
+ handle_get_filename (r->fh),
+ file_bigendian ? _("big-endian") : _("little-endian"),
data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
: _("unknown"))));
/* PORTME: Character representation code. */
- if (data[7] != 2 && data[7] != 3)
- lose ((ME, _("%s: File-indicated character representation code (%s) is not "
- "ASCII."), h->fn,
- data[7] == 1 ? "EBCDIC" : (data[7] == 4 ? _("DEC Kanji") : _("Unknown"))));
+ if (data[7] != 2 && data[7] != 3)
+ lose ((ME, _("%s: File-indicated character representation code (%s) is "
+ "not ASCII."),
+ handle_get_filename (r->fh),
+ (data[7] == 1 ? "EBCDIC"
+ : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
return 1;
-lossage:
+error:
return 0;
}
/* Read record type 7, subtype 4. */
static int
-read_machine_flt64_info (struct file_handle * h, int size, int count)
+read_machine_flt64_info (struct sfm_reader *r, int size, int count)
{
- struct sfm_fhuser_ext *ext = h->ext;
-
flt64 data[3];
-
int i;
if (size != sizeof (flt64) || count != 3)
lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
- "subtype 4. Expected size %d, count 8."),
- h->fn, size, count, sizeof (flt64)));
+ "subtype 4. Expected size %d, count 8."),
+ handle_get_filename (r->fh), size, count, sizeof (flt64)));
- assertive_bufread (h, data, sizeof data, 0);
- if (ext->reverse_endian)
+ assertive_buf_read (r, data, sizeof data, 0);
+ if (r->reverse_endian)
for (i = 0; i < 3; i++)
bswap_flt64 (&data[i]);
if (data[0] != SYSMIS || data[1] != FLT64_MAX
|| data[2] != second_lowest_flt64)
{
- ext->sysmis = data[0];
- ext->highest = data[1];
- ext->lowest = data[2];
+ r->sysmis = data[0];
+ r->highest = data[1];
+ r->lowest = data[2];
msg (MW, _("%s: File-indicated value is different from internal value "
"for at least one of the three system values. SYSMIS: "
"indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
"%g, %g."),
- h->fn, (double) data[0], (double) SYSMIS,
+ handle_get_filename (r->fh), (double) data[0], (double) SYSMIS,
(double) data[1], (double) FLT64_MAX,
(double) data[2], (double) second_lowest_flt64);
}
return 1;
-lossage:
+error:
return 0;
}
static int
-read_header (struct file_handle * h, struct sfm_read_info * inf)
+read_header (struct sfm_reader *r,
+ struct dictionary *dict, struct sfm_read_info *info)
{
- struct sfm_fhuser_ext *ext = h->ext; /* File extension strcut. */
struct sysfile_header hdr; /* Disk buffer. */
- struct dictionary *dict; /* File dictionary. */
char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
int skip_amt = 0; /* Amount of product name to omit. */
int i;
- /* Create the dictionary. */
- dict = ext->dict = dict_create ();
-
/* Read header, check magic. */
- assertive_bufread (h, &hdr, sizeof hdr, 0);
- if (0 != strncmp ("$FL2", hdr.rec_type, 4))
+ assertive_buf_read (r, &hdr, sizeof hdr, 0);
+ if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
lose ((ME, _("%s: Bad magic. Proper system files begin with "
"the four characters `$FL2'. This file will not be read."),
- h->fn));
+ handle_get_filename (r->fh)));
/* Check eye-catcher string. */
memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
}
/* Check endianness. */
- /* PORTME: endianness. */
if (hdr.layout_code == 2)
- ext->reverse_endian = 0;
+ r->reverse_endian = 0;
else
{
bswap_int32 (&hdr.layout_code);
if (hdr.layout_code != 2)
lose ((ME, _("%s: File layout code has unexpected value %d. Value "
- "should be 2, in big-endian or little-endian format."),
- h->fn, hdr.layout_code));
+ "should be 2, in big-endian or little-endian format."),
+ handle_get_filename (r->fh), hdr.layout_code));
- ext->reverse_endian = 1;
+ r->reverse_endian = 1;
bswap_int32 (&hdr.case_size);
- bswap_int32 (&hdr.compressed);
- bswap_int32 (&hdr.weight_index);
- bswap_int32 (&hdr.ncases);
+ bswap_int32 (&hdr.compress);
+ bswap_int32 (&hdr.weight_idx);
+ bswap_int32 (&hdr.case_cnt);
bswap_flt64 (&hdr.bias);
}
+
/* Copy basic info and verify correctness. */
- ext->case_size = hdr.case_size;
- if (hdr.case_size <= 0 || ext->case_size > (INT_MAX
- / (int) sizeof (union value) / 2))
- lose ((ME, _("%s: Number of elements per case (%d) is not between 1 "
- "and %d."), h->fn, hdr.case_size, INT_MAX / sizeof (union value) / 2));
-
- ext->compressed = hdr.compressed;
-
- ext->weight_index = hdr.weight_index - 1;
- if (hdr.weight_index < 0 || hdr.weight_index > hdr.case_size)
- lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
- "and number of elements per case (%d)."),
- h->fn, hdr.weight_index, ext->case_size));
-
- ext->ncases = hdr.ncases;
- if (ext->ncases < -1 || ext->ncases > INT_MAX / 2)
- lose ((ME, _("%s: Number of cases in file (%ld) is not between -1 and "
- "%d."), h->fn, (long) ext->ncases, INT_MAX / 2));
-
- ext->bias = hdr.bias;
- if (ext->bias != 100.0)
+ r->value_cnt = hdr.case_size;
+
+ /* If value count is rediculous, then force it to -1 (a sentinel value) */
+ if ( r->value_cnt < 0 ||
+ r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
+ r->value_cnt = -1;
+
+ r->compressed = hdr.compress;
+
+ r->weight_idx = hdr.weight_idx - 1;
+
+ r->case_cnt = hdr.case_cnt;
+ if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
+ lose ((ME,
+ _("%s: Number of cases in file (%ld) is not between -1 and %d."),
+ handle_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
+
+ r->bias = hdr.bias;
+ if (r->bias != 100.0)
corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
- "value of 100."), h->fn, ext->bias);
+ "value of 100."),
+ handle_get_filename (r->fh), r->bias);
/* Make a file label only on the condition that the given label is
not all spaces or nulls. */
}
}
- if (inf)
+ if (info)
{
char *cp;
- memcpy (inf->creation_date, hdr.creation_date, 9);
- inf->creation_date[9] = 0;
+ memcpy (info->creation_date, hdr.creation_date, 9);
+ info->creation_date[9] = 0;
- memcpy (inf->creation_time, hdr.creation_time, 8);
- inf->creation_time[8] = 0;
+ memcpy (info->creation_time, hdr.creation_time, 8);
+ info->creation_time[8] = 0;
#ifdef WORDS_BIGENDIAN
- inf->bigendian = !ext->reverse_endian;
+ info->big_endian = !r->reverse_endian;
#else
- inf->bigendian = ext->reverse_endian;
+ info->big_endian = r->reverse_endian;
#endif
- inf->compressed = hdr.compressed;
+ info->compressed = hdr.compress;
- inf->ncases = hdr.ncases;
+ info->case_cnt = hdr.case_cnt;
for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
if (isgraph ((unsigned char) *cp))
break;
- strcpy (inf->product, cp);
+ strcpy (info->product, cp);
}
return 1;
-lossage:
+error:
return 0;
}
/* Reads most of the dictionary from file H; also fills in the
- associated VAR_BY_INDEX array. The get.* elements in the
- created dictionary are set to appropriate values to allow the
- file to be read. */
+ associated VAR_BY_IDX array. */
static int
-read_variables (struct file_handle * h, struct variable *** var_by_index)
+read_variables (struct sfm_reader *r,
+ struct dictionary *dict, struct variable ***var_by_idx)
{
int i;
- struct sfm_fhuser_ext *ext = h->ext; /* File extension record. */
- struct dictionary *dict = ext->dict; /* Dictionary being constructed. */
struct sysfile_variable sv; /* Disk buffer. */
int long_string_count = 0; /* # of long string continuation
records still expected. */
int next_value = 0; /* Index to next `value' structure. */
- /* Allocate variables. */
- *var_by_index = xmalloc (sizeof **var_by_index * ext->case_size);
+ assert(r);
+
+ *var_by_idx = 0;
+
+ /* Pre-allocate variables. */
+ if ( r->value_cnt != -1 )
+ {
+ *var_by_idx = xmalloc(r->value_cnt * sizeof (**var_by_idx));
+ r->vars = xmalloc( r->value_cnt * sizeof (*r->vars) );
+ }
+
/* Read in the entry for each variable and use the info to
initialize the dictionary. */
- for (i = 0; i < ext->case_size; i++)
+ for (i = 0; ; ++i)
{
struct variable *vv;
- char name[9];
+ char name[SHORT_NAME_LEN + 1];
+ int nv;
int j;
- assertive_bufread (h, &sv, sizeof sv, 0);
+ if ( r->value_cnt != -1 && i >= r->value_cnt )
+ break;
- if (ext->reverse_endian)
+ assertive_buf_read (r, &sv, sizeof sv, 0);
+
+ if (r->reverse_endian)
{
bswap_int32 (&sv.rec_type);
bswap_int32 (&sv.type);
bswap_int32 (&sv.write);
}
+ /* We've come to the end of the variable entries */
if (sv.rec_type != 2)
- lose ((ME, _("%s: position %d: Bad record type (%d); "
- "the expected value was 2."), h->fn, i, sv.rec_type));
+ {
+ buf_unread(r, sizeof sv);
+ r->value_cnt = i;
+ break;
+ }
+
+ if ( -1 == r->value_cnt )
+ {
+ *var_by_idx = xrealloc (*var_by_idx, sizeof **var_by_idx * (i + 1));
+ r->vars = xrealloc(r->vars, (i + 1) * sizeof (*r->vars) );
+ }
/* If there was a long string previously, make sure that the
continuations are present; otherwise make sure there aren't
{
if (sv.type != -1)
lose ((ME, _("%s: position %d: String variable does not have "
- "proper number of continuation records."), h->fn, i));
+ "proper number of continuation records."),
+ handle_get_filename (r->fh), i));
- (*var_by_index)[i] = NULL;
+
+ r->vars[i].width = -1;
+ (*var_by_idx)[i] = NULL;
long_string_count--;
continue;
}
else if (sv.type == -1)
lose ((ME, _("%s: position %d: Superfluous long string continuation "
- "record."), h->fn, i));
+ "record."),
+ handle_get_filename (r->fh), i));
/* Check fields for validity. */
if (sv.type < 0 || sv.type > 255)
lose ((ME, _("%s: position %d: Bad variable type code %d."),
- h->fn, i, sv.type));
+ handle_get_filename (r->fh), i, sv.type));
if (sv.has_var_label != 0 && sv.has_var_label != 1)
lose ((ME, _("%s: position %d: Variable label indicator field is not "
- "0 or 1."), h->fn, i));
+ "0 or 1."), handle_get_filename (r->fh), i));
if (sv.n_missing_values < -3 || sv.n_missing_values > 3
|| sv.n_missing_values == -1)
lose ((ME, _("%s: position %d: Missing value indicator field is not "
- "-3, -2, 0, 1, 2, or 3."), h->fn, i));
+ "-3, -2, 0, 1, 2, or 3."), handle_get_filename (r->fh), i));
/* Copy first character of variable name. */
if (!isalpha ((unsigned char) sv.name[0])
&& sv.name[0] != '@' && sv.name[0] != '#')
lose ((ME, _("%s: position %d: Variable name begins with invalid "
- "character."), h->fn, i));
+ "character."),
+ handle_get_filename (r->fh), i));
if (islower ((unsigned char) sv.name[0]))
msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
- "%c."), h->fn, i, sv.name[0]);
+ "%c."),
+ handle_get_filename (r->fh), i, sv.name[0]);
if (sv.name[0] == '#')
msg (MW, _("%s: position %d: Variable name begins with octothorpe "
"(`#'). Scratch variables should not appear in system "
- "files."), h->fn, i);
+ "files."),
+ handle_get_filename (r->fh), i);
name[0] = toupper ((unsigned char) (sv.name[0]));
/* Copy remaining characters of variable name. */
- for (j = 1; j < 8; j++)
+ for (j = 1; j < SHORT_NAME_LEN; j++)
{
int c = (unsigned char) sv.name[j];
else if (islower (c))
{
msg (MW, _("%s: position %d: Variable name character %d is "
- "lowercase letter %c."), h->fn, i, j + 1, sv.name[j]);
+ "lowercase letter %c."),
+ handle_get_filename (r->fh), i, j + 1, sv.name[j]);
name[j] = toupper ((unsigned char) (c));
}
else if (isalnum (c) || c == '.' || c == '@'
name[j] = c;
else
lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
- "variable name."), h->fn, i, c, c));
+ "variable name."),
+ handle_get_filename (r->fh), i, c, c));
}
name[j] = 0;
+ if ( ! var_is_valid_name(name, false) )
+ lose ((ME, _("%s: Invalid variable name `%s' within system file."),
+ handle_get_filename (r->fh), name));
+
/* Create variable. */
- vv = (*var_by_index)[i] = dict_create_var (dict, name, sv.type);
+
+ vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
if (vv == NULL)
lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
- h->fn, name));
+ handle_get_filename (r->fh), name));
+
+ var_set_short_name (vv, vv->name);
/* Case reading data. */
- vv->get.fv = next_value;
- if (sv.type == 0)
- vv->get.nv = 1;
- else
- vv->get.nv = DIV_RND_UP (sv.type, sizeof (flt64));
- long_string_count = vv->get.nv - 1;
- next_value += vv->get.nv;
+ nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
+ long_string_count = nv - 1;
+ next_value += nv;
/* Get variable label, if any. */
if (sv.has_var_label == 1)
int32 len;
/* Read length of label. */
- assertive_bufread (h, &len, sizeof len, 0);
- if (ext->reverse_endian)
+ assertive_buf_read (r, &len, sizeof len, 0);
+ if (r->reverse_endian)
bswap_int32 (&len);
/* Check len. */
if (len < 0 || len > 255)
lose ((ME, _("%s: Variable %s indicates variable label of invalid "
- "length %d."), h->fn, vv->name, len));
+ "length %d."),
+ handle_get_filename (r->fh), vv->name, len));
- /* Read label into variable structure. */
- vv->label = bufread (h, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
- if (vv->label == NULL)
- goto lossage;
- vv->label[len] = '\0';
+ if ( len != 0 )
+ {
+ /* Read label into variable structure. */
+ vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
+ if (vv->label == NULL)
+ goto error;
+ vv->label[len] = '\0';
+ }
}
/* Set missing values. */
if (vv->width > MAX_SHORT_STRING)
lose ((ME, _("%s: Long string variable %s may not have missing "
- "values."), h->fn, vv->name));
+ "values."),
+ handle_get_filename (r->fh), vv->name));
- assertive_bufread (h, mv, sizeof *mv * abs (sv.n_missing_values), 0);
+ assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0);
- if (ext->reverse_endian && vv->type == NUMERIC)
+ if (r->reverse_endian && vv->type == NUMERIC)
for (j = 0; j < abs (sv.n_missing_values); j++)
bswap_flt64 (&mv[j]);
if (vv->type == ALPHA)
lose ((ME, _("%s: String variable %s may not have missing "
- "values specified as a range."), h->fn, vv->name));
+ "values specified as a range."),
+ handle_get_filename (r->fh), vv->name));
- if (mv[0] == ext->lowest)
+ if (mv[0] == r->lowest)
{
vv->miss_type = MISSING_LOW;
vv->missing[x++].f = mv[1];
}
- else if (mv[1] == ext->highest)
+ else if (mv[1] == r->highest)
{
vv->miss_type = MISSING_HIGH;
vv->missing[x++].f = mv[0];
else
vv->miss_type = MISSING_NONE;
- if (!parse_format_spec (h, sv.print, &vv->print, vv)
- || !parse_format_spec (h, sv.write, &vv->write, vv))
- goto lossage;
+ if (!parse_format_spec (r, sv.print, &vv->print, vv)
+ || !parse_format_spec (r, sv.write, &vv->write, vv))
+ goto error;
+
+ r->vars[i].width = vv->width;
+ r->vars[i].fv = vv->fv;
+
}
/* Some consistency checks. */
if (long_string_count != 0)
lose ((ME, _("%s: Long string continuation records omitted at end of "
- "dictionary."), h->fn));
- if (next_value != ext->case_size)
- lose ((ME, _("%s: System file header indicates %d variable positions but "
- "%d were read from file."), h->fn, ext->case_size, next_value));
+ "dictionary."),
+ handle_get_filename (r->fh)));
+
+ if (next_value != r->value_cnt)
+ corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
+ "%d were read from file."),
+ handle_get_filename (r->fh), r->value_cnt, next_value);
- return 1;
-lossage:
- dict_destroy (dict);
- ext->dict = NULL;
+ return 1;
+error:
return 0;
}
/* Translates the format spec from sysfile format to internal
format. */
static int
-parse_format_spec (struct file_handle *h, int32 s, struct fmt_spec *v, struct variable *vv)
+parse_format_spec (struct sfm_reader *r, int32 s,
+ struct fmt_spec *f, struct variable *v)
{
- v->type = translate_fmt ((s >> 16) & 0xff);
- if (v->type == -1)
+ f->type = translate_fmt ((s >> 16) & 0xff);
+ if (f->type == -1)
lose ((ME, _("%s: Bad format specifier byte (%d)."),
- h->fn, (s >> 16) & 0xff));
- v->w = (s >> 8) & 0xff;
- v->d = s & 0xff;
-
- /* FIXME? Should verify the resulting specifier more thoroughly. */
+ handle_get_filename (r->fh), (s >> 16) & 0xff));
+ f->w = (s >> 8) & 0xff;
+ f->d = s & 0xff;
- if (v->type == -1)
- lose ((ME, _("%s: Bad format specifier byte (%d)."),
- h->fn, (s >> 16) & 0xff));
- if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0))
+ if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
- h->fn, vv->type == ALPHA ? _("String") : _("Numeric"),
- vv->name,
- formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"),
- formats[v->type].name));
+ handle_get_filename (r->fh),
+ v->type == ALPHA ? _("String") : _("Numeric"),
+ v->name,
+ formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
+ formats[f->type].name));
+
+ if (!check_output_specifier (f, false)
+ || !check_specifier_width (f, v->width, false))
+ {
+ msg (ME, _("%s variable %s has invalid format specifier %s."),
+ v->type == NUMERIC ? _("Numeric") : _("String"),
+ v->name, fmt_to_string (f));
+ *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
+ }
return 1;
-lossage:
+error:
return 0;
}
/* Reads value labels from sysfile H and inserts them into the
associated dictionary. */
int
-read_value_labels (struct file_handle * h, struct variable ** var_by_index)
+read_value_labels (struct sfm_reader *r,
+ struct dictionary *dict, struct variable **var_by_idx)
{
- struct sfm_fhuser_ext *ext = h->ext; /* File extension record. */
-
struct label
{
unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
don't know yet whether it is of numeric or string type. */
/* Read number of labels. */
- assertive_bufread (h, &n_labels, sizeof n_labels, 0);
- if (ext->reverse_endian)
+ assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
+ if (r->reverse_endian)
bswap_int32 (&n_labels);
+ if ( n_labels >= ((int32) ~0) / sizeof *labels)
+ {
+ corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
+ handle_get_filename (r->fh), n_labels);
+ n_labels = 0;
+ }
+
/* Allocate memory. */
- labels = xmalloc (n_labels * sizeof *labels);
+ labels = xcalloc (n_labels , sizeof *labels);
for (i = 0; i < n_labels; i++)
labels[i].label = NULL;
size_t padded_len;
/* Read value. */
- assertive_bufread (h, label->raw_value, sizeof label->raw_value, 0);
+ assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
/* Read label length. */
- assertive_bufread (h, &label_len, sizeof label_len, 0);
+ assertive_buf_read (r, &label_len, sizeof label_len, 0);
padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
/* Read label, padding. */
label->label = xmalloc (padded_len + 1);
- assertive_bufread (h, label->label, padded_len - 1, 0);
+ assertive_buf_read (r, label->label, padded_len - 1, 0);
label->label[label_len] = 0;
}
{
int32 rec_type;
- assertive_bufread (h, &rec_type, sizeof rec_type, 0);
- if (ext->reverse_endian)
+ assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
+ if (r->reverse_endian)
bswap_int32 (&rec_type);
if (rec_type != 4)
lose ((ME, _("%s: Variable index record (type 4) does not immediately "
- "follow value label record (type 3) as it should."), h->fn));
+ "follow value label record (type 3) as it should."),
+ handle_get_filename (r->fh)));
}
/* Read number of variables associated with value label from type 4
record. */
- assertive_bufread (h, &n_vars, sizeof n_vars, 0);
- if (ext->reverse_endian)
+ assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
+ if (r->reverse_endian)
bswap_int32 (&n_vars);
- if (n_vars < 1 || n_vars > dict_get_var_cnt (ext->dict))
+ if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
lose ((ME, _("%s: Number of variables associated with a value label (%d) "
- "is not between 1 and the number of variables (%d)."),
- h->fn, n_vars, dict_get_var_cnt (ext->dict)));
+ "is not between 1 and the number of variables (%d)."),
+ handle_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
/* Read the list of variables. */
var = xmalloc (n_vars * sizeof *var);
for (i = 0; i < n_vars; i++)
{
- int32 var_index;
+ int32 var_idx;
struct variable *v;
/* Read variable index, check range. */
- assertive_bufread (h, &var_index, sizeof var_index, 0);
- if (ext->reverse_endian)
- bswap_int32 (&var_index);
- if (var_index < 1 || var_index > ext->case_size)
+ assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
+ if (r->reverse_endian)
+ bswap_int32 (&var_idx);
+ if (var_idx < 1 || var_idx > r->value_cnt)
lose ((ME, _("%s: Variable index associated with value label (%d) is "
- "not between 1 and the number of values (%d)."),
- h->fn, var_index, ext->case_size));
+ "not between 1 and the number of values (%d)."),
+ handle_get_filename (r->fh), var_idx, r->value_cnt));
/* Make sure it's a real variable. */
- v = var_by_index[var_index - 1];
+ v = var_by_idx[var_idx - 1];
if (v == NULL)
lose ((ME, _("%s: Variable index associated with value label (%d) "
"refers to a continuation of a string variable, not to "
- "an actual variable."), h->fn, var_index));
+ "an actual variable."),
+ handle_get_filename (r->fh), var_idx));
if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
lose ((ME, _("%s: Value labels are not allowed on long string "
- "variables (%s)."), h->fn, v->name));
+ "variables (%s)."),
+ handle_get_filename (r->fh), v->name));
/* Add it to the list of variables. */
var[i] = v;
for (i = 1; i < n_vars; i++)
if (var[i]->type != var[0]->type)
lose ((ME, _("%s: Variables associated with value label are not all of "
- "identical type. Variable %s has %s type, but variable %s has "
- "%s type."), h->fn,
+ "identical type. Variable %s has %s type, but variable "
+ "%s has %s type."),
+ handle_get_filename (r->fh),
var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
flt64 f;
assert (sizeof f == sizeof label->raw_value);
memcpy (&f, label->raw_value, sizeof f);
- if (ext->reverse_endian)
+ if (r->reverse_endian)
bswap_flt64 (&f);
label->value.f = f;
}
if (var[0]->type == NUMERIC)
msg (MW, _("%s: File contains duplicate label for value %g for "
- "variable %s."), h->fn, label->value.f, v->name);
+ "variable %s."),
+ handle_get_filename (r->fh), label->value.f, v->name);
else
msg (MW, _("%s: File contains duplicate label for value `%.*s' "
- "for variable %s."),
- h->fn, v->width, label->value.s, v->name);
+ "for variable %s."),
+ handle_get_filename (r->fh), v->width, label->value.s, v->name);
}
}
free (var);
return 1;
-lossage:
+error:
if (labels)
{
for (i = 0; i < n_labels; i++)
return 0;
}
-/* Reads NBYTES bytes from the file represented by H. If BUF is
+/* Reads BYTE_CNT bytes from the file represented by H. If BUF is
non-NULL, uses that as the buffer; otherwise allocates at least
- MINALLOC bytes. Returns a pointer to the buffer on success, NULL
+ MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
on failure. */
static void *
-bufread (struct file_handle * h, void *buf, size_t nbytes, size_t minalloc)
+buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
{
- struct sfm_fhuser_ext *ext = h->ext;
+ assert (r);
+
+ if (buf == NULL && byte_cnt > 0 )
+ buf = xmalloc (max (byte_cnt, min_alloc));
- if (buf == NULL)
- buf = xmalloc (max (nbytes, minalloc));
- if (1 != fread (buf, nbytes, 1, ext->file))
+ if ( byte_cnt == 0 )
+ return buf;
+
+
+ if (1 != fread (buf, byte_cnt, 1, r->file))
{
- if (ferror (ext->file))
- msg (ME, _("%s: Reading system file: %s."), h->fn, strerror (errno));
+ if (ferror (r->file))
+ msg (ME, _("%s: Reading system file: %s."),
+ handle_get_filename (r->fh), strerror (errno));
else
- corrupt_msg (ME, _("%s: Unexpected end of file."), h->fn);
+ corrupt_msg (ME, _("%s: Unexpected end of file."),
+ handle_get_filename (r->fh));
return NULL;
}
return buf;
}
-/* Reads a document record, type 6, from system file H, and sets up
+/* Winds the reader BYTE_CNT bytes back in the reader stream. */
+void
+buf_unread(struct sfm_reader *r, size_t byte_cnt)
+{
+ assert(byte_cnt > 0);
+
+ if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
+ {
+ msg (ME, _("%s: Seeking system file: %s."),
+ handle_get_filename (r->fh), strerror (errno));
+ }
+}
+
+/* Reads a document record, type 6, from system file R, and sets up
the documents and n_documents fields in the associated
dictionary. */
static int
-read_documents (struct file_handle * h)
+read_documents (struct sfm_reader *r, struct dictionary *dict)
{
- struct sfm_fhuser_ext *ext = h->ext;
- struct dictionary *dict = ext->dict;
- int32 n_lines;
+ int32 line_cnt;
char *documents;
if (dict_get_documents (dict) != NULL)
- lose ((ME, _("%s: System file contains multiple type 6 (document) records."),
- h->fn));
+ lose ((ME, _("%s: System file contains multiple "
+ "type 6 (document) records."),
+ handle_get_filename (r->fh)));
- assertive_bufread (h, &n_lines, sizeof n_lines, 0);
- if (n_lines <= 0)
- lose ((ME, _("%s: Number of document lines (%ld) must be greater than 0."),
- h->fn, (long) n_lines));
+ assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
+ if (line_cnt <= 0)
+ lose ((ME, _("%s: Number of document lines (%ld) "
+ "must be greater than 0."),
+ handle_get_filename (r->fh), (long) line_cnt));
- documents = bufread (h, NULL, 80 * n_lines, n_lines * 80 + 1);
+ documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
/* FIXME? Run through asciify. */
if (documents == NULL)
return 0;
- documents[80 * n_lines] = '\0';
+ documents[80 * line_cnt] = '\0';
dict_set_documents (dict, documents);
free (documents);
return 1;
-lossage:
+error:
return 0;
}
-
-#if GLOBAL_DEBUGGING
-#include "debug-print.h"
-/* Displays dictionary DICT on stdout. */
-void
-dump_dictionary (struct dictionary * dict)
-{
- int i;
-
- debug_printf ((_("dictionary:\n")));
- for (i = 0; i < dict->nvar; i++)
- {
- char print[32];
- struct variable *v = dict->var[i];
- int n, j;
-
- debug_printf ((" var %s", v->name));
- debug_printf (("(type:%s,%d)", (v->type == NUMERIC ? _("num")
- : (v->type == ALPHA ? _("str") : "!!!")),
- v->width));
- debug_printf (("(fv:%d,%d)", v->fv, v->nv));
- debug_printf (("(left:%s)(miss:", v->left ? _("left") : _("right")));
-
- switch (v->miss_type)
- {
- case MISSING_NONE:
- n = 0;
- debug_printf ((_("none")));
- break;
- case MISSING_1:
- n = 1;
- debug_printf ((_("one")));
- break;
- case MISSING_2:
- n = 2;
- debug_printf ((_("two")));
- break;
- case MISSING_3:
- n = 3;
- debug_printf ((_("three")));
- break;
- case MISSING_RANGE:
- n = 2;
- debug_printf ((_("range")));
- break;
- case MISSING_LOW:
- n = 1;
- debug_printf ((_("low")));
- break;
- case MISSING_HIGH:
- n = 1;
- debug_printf ((_("high")));
- break;
- case MISSING_RANGE_1:
- n = 3;
- debug_printf ((_("range+1")));
- break;
- case MISSING_LOW_1:
- n = 2;
- debug_printf ((_("low+1")));
- break;
- case MISSING_HIGH_1:
- n = 2;
- debug_printf ((_("high+1")));
- break;
- default:
- assert (0);
- }
- for (j = 0; j < n; j++)
- if (v->type == NUMERIC)
- debug_printf ((",%g", v->missing[j].f));
- else
- debug_printf ((",\"%.*s\"", v->width, v->missing[j].s));
- strcpy (print, fmt_to_string (&v->print));
- debug_printf ((")(fmt:%s,%s)(lbl:%s)\n",
- print, fmt_to_string (&v->write),
- v->label ? v->label : "nolabel"));
- }
-}
-#endif
\f
/* Data reader. */
appropriately. Returns nonzero only if both no errors occur and
data was read. */
static int
-buffer_input (struct file_handle * h)
+buffer_input (struct sfm_reader *r)
{
- struct sfm_fhuser_ext *ext = h->ext;
size_t amt;
- if (ext->buf == NULL)
- ext->buf = xmalloc (sizeof *ext->buf * 128);
- amt = fread (ext->buf, sizeof *ext->buf, 128, ext->file);
- if (ferror (ext->file))
+ if (r->buf == NULL)
+ r->buf = xmalloc (sizeof *r->buf * 128);
+ amt = fread (r->buf, sizeof *r->buf, 128, r->file);
+ if (ferror (r->file))
{
- msg (ME, _("%s: Error reading file: %s."), h->fn, strerror (errno));
+ msg (ME, _("%s: Error reading file: %s."),
+ handle_get_filename (r->fh), strerror (errno));
return 0;
}
- ext->ptr = ext->buf;
- ext->end = &ext->buf[amt];
+ r->ptr = r->buf;
+ r->end = &r->buf[amt];
return amt;
}
-/* Reads a single case consisting of compressed data from system file
- H into the array TEMP[] according to dictionary DICT, and returns
- nonzero only if successful. */
-/* Data in system files is compressed in the following manner:
- data values are grouped into sets of eight; each of the eight has
- one instruction byte, which are output together in an octet; each
- byte gives a value for that byte or indicates that the value can be
- found following the instructions. */
+/* Reads a single case consisting of compressed data from system
+ file H into the array BUF[] according to reader R, and
+ returns nonzero only if successful. */
+/* Data in system files is compressed in this manner. Data
+ values are grouped into sets of eight ("octets"). Each value
+ in an octet has one instruction byte that are output together.
+ Each instruction byte gives a value for that byte or indicates
+ that the value can be found following the instructions. */
static int
-read_compressed_data (struct file_handle * h, flt64 * temp)
+read_compressed_data (struct sfm_reader *r, flt64 *buf)
{
- struct sfm_fhuser_ext *ext = h->ext;
+ const unsigned char *p_end = r->x + sizeof (flt64);
+ unsigned char *p = r->y;
- const unsigned char *p_end = ext->x + sizeof (flt64);
- unsigned char *p = ext->y;
-
- const flt64 *temp_beg = temp;
- const flt64 *temp_end = &temp[ext->case_size];
+ const flt64 *buf_beg = buf;
+ const flt64 *buf_end = &buf[r->value_cnt];
for (;;)
{
- for (; p < p_end; p++)
+ for (; p < p_end; p++){
switch (*p)
{
case 0:
continue;
case 252:
/* Code 252 is end of file. */
- if (temp_beg != temp)
+ if (buf_beg != buf)
lose ((ME, _("%s: Compressed data is corrupted. Data ends "
- "partway through a case."), h->fn));
- goto lossage;
+ "in partial case."),
+ handle_get_filename (r->fh)));
+ goto error;
case 253:
/* Code 253 indicates that the value is stored explicitly
following the instruction bytes. */
- if (ext->ptr == NULL || ext->ptr >= ext->end)
- if (!buffer_input (h))
+ if (r->ptr == NULL || r->ptr >= r->end)
+ if (!buffer_input (r))
{
- lose ((ME, _("%s: Unexpected end of file."), h->fn));
- goto lossage;
+ lose ((ME, _("%s: Unexpected end of file."),
+ handle_get_filename (r->fh)));
+ goto error;
}
- memcpy (temp++, ext->ptr++, sizeof *temp);
- if (temp >= temp_end)
- goto winnage;
+ memcpy (buf++, r->ptr++, sizeof *buf);
+ if (buf >= buf_end)
+ goto success;
break;
case 254:
/* Code 254 indicates a string that is all blanks. */
- memset (temp++, ' ', sizeof *temp);
- if (temp >= temp_end)
- goto winnage;
+ memset (buf++, ' ', sizeof *buf);
+ if (buf >= buf_end)
+ goto success;
break;
case 255:
/* Code 255 indicates the system-missing value. */
- *temp = ext->sysmis;
- if (ext->reverse_endian)
- bswap_flt64 (temp);
- temp++;
- if (temp >= temp_end)
- goto winnage;
+ *buf = r->sysmis;
+ if (r->reverse_endian)
+ bswap_flt64 (buf);
+ buf++;
+ if (buf >= buf_end)
+ goto success;
break;
default:
/* Codes 1 through 251 inclusive are taken to indicate a
value of (BYTE - BIAS), where BYTE is the byte's value
and BIAS is the compression bias (generally 100.0). */
- *temp = *p - ext->bias;
- if (ext->reverse_endian)
- bswap_flt64 (temp);
- temp++;
- if (temp >= temp_end)
- goto winnage;
+ *buf = *p - r->bias;
+ if (r->reverse_endian)
+ bswap_flt64 (buf);
+ buf++;
+ if (buf >= buf_end)
+ goto success;
break;
}
-
+ }
/* We have reached the end of this instruction octet. Read
another. */
- if (ext->ptr == NULL || ext->ptr >= ext->end)
- if (!buffer_input (h))
+ if (r->ptr == NULL || r->ptr >= r->end)
+ if (!buffer_input (r))
{
- if (temp_beg != temp)
- lose ((ME, _("%s: Unexpected end of file."), h->fn));
- goto lossage;
+ if (buf_beg != buf)
+ lose ((ME, _("%s: Unexpected end of file."),
+ handle_get_filename (r->fh)));
+ goto error;
}
- memcpy (ext->x, ext->ptr++, sizeof *temp);
- p = ext->x;
+ memcpy (r->x, r->ptr++, sizeof *buf);
+ p = r->x;
}
/* Not reached. */
assert (0);
-winnage:
+success:
/* We have filled up an entire record. Update state and return
successfully. */
- ext->y = ++p;
+ r->y = ++p;
return 1;
-lossage:
+error:
/* We have been unsuccessful at filling a record, either through i/o
error or through an end-of-file indication. Update state and
return unsuccessfully. */
return 0;
}
-/* Reads one case from system file H into the value array PERM
- according to the instructions given in associated dictionary DICT,
- which must have the get.* elements appropriately set. Returns
- nonzero only if successful. */
+/* Reads one case from READER's file into C. Returns nonzero
+ only if successful. */
int
-sfm_read_case (struct file_handle * h, union value * perm, struct dictionary * dict)
+sfm_read_case (struct sfm_reader *r, struct ccase *c)
{
- struct sfm_fhuser_ext *ext = h->ext;
-
- size_t nbytes;
- flt64 *temp;
-
- int i;
-
- /* The first concern is to obtain a full case relative to the data
- file. (Cases in the data file have no particular relationship to
- cases in the active file.) */
- nbytes = sizeof *temp * ext->case_size;
- temp = local_alloc (nbytes);
-
- if (ext->compressed == 0)
+ if (!r->compressed && sizeof (flt64) == sizeof (double))
{
- size_t amt = fread (temp, 1, nbytes, ext->file);
+ /* Fast path: external and internal representations are the
+ same, except possibly for endianness or SYSMIS. Read
+ directly into the case's buffer, then fix up any minor
+ details as needed. */
+ if (!fread_ok (r, case_data_all_rw (c),
+ sizeof (union value) * r->value_cnt))
+ return 0;
+
+ /* Fix up endianness if needed. */
+ if (r->reverse_endian)
+ {
+ int i;
+
+ for (i = 0; i < r->value_cnt; i++)
+ if (r->vars[i].width == 0)
+ bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
+ }
- if (amt != nbytes)
- {
- if (ferror (ext->file))
- msg (ME, _("%s: Reading system file: %s."), h->fn, strerror (errno));
- else if (amt != 0)
- msg (ME, _("%s: Partial record at end of system file."), h->fn);
- goto lossage;
- }
+ /* Fix up SYSMIS values if needed.
+ I don't think this will ever actually kick in, but it
+ can't hurt. */
+ if (r->sysmis != SYSMIS)
+ {
+ int i;
+
+ for (i = 0; i < r->value_cnt; i++)
+ if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
+ case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
+ }
}
- else if (!read_compressed_data (h, temp))
- goto lossage;
-
- /* Translate a case in data file format to a case in active file
- format. */
- for (i = 0; i < dict_get_var_cnt (dict); i++)
+ else
{
- struct variable *v = dict_get_var (dict, i);
-
- if (v->get.fv == -1)
- continue;
-
- if (v->type == NUMERIC)
- {
- flt64 src = temp[v->get.fv];
- if (ext->reverse_endian)
- bswap_flt64 (&src);
- perm[v->fv].f = src == ext->sysmis ? SYSMIS : src;
- }
+ /* Slow path: internal and external representations differ.
+ Read into a bounce buffer, then copy to C. */
+ flt64 *bounce;
+ flt64 *bounce_cur;
+ size_t bounce_size;
+ int read_ok;
+ int i;
+
+ bounce_size = sizeof *bounce * r->value_cnt;
+ bounce = bounce_cur = local_alloc (bounce_size);
+
+ if (!r->compressed)
+ read_ok = fread_ok (r, bounce, bounce_size);
else
- memcpy (&perm[v->fv].s, &temp[v->get.fv], v->width);
- }
+ read_ok = read_compressed_data (r, bounce);
+ if (!read_ok)
+ {
+ local_free (bounce);
+ return 0;
+ }
- local_free (temp);
- return 1;
+ for (i = 0; i < r->value_cnt; i++)
+ {
+ struct sfm_var *v = &r->vars[i];
+
+ if (v->width == 0)
+ {
+ flt64 f = *bounce_cur++;
+ if (r->reverse_endian)
+ bswap_flt64 (&f);
+ case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
+ }
+ else if (v->width != -1)
+ {
+ memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
+ bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
+ }
+ }
-lossage:
- local_free (temp);
- return 0;
+ local_free (bounce);
+ }
+ return 1;
}
-static struct fh_ext_class sfm_r_class =
+static int
+fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
{
- 3,
- N_("reading as a system file"),
- sfm_close,
-};
+ size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
+
+ if (read_bytes == byte_cnt)
+ return 1;
+ else
+ {
+ if (ferror (r->file))
+ msg (ME, _("%s: Reading system file: %s."),
+ handle_get_filename (r->fh), strerror (errno));
+ else if (read_bytes != 0)
+ msg (ME, _("%s: Partial record at end of system file."),
+ handle_get_filename (r->fh));
+ return 0;
+ }
+}