02110-1301, USA. */
#include <config.h>
-#include "sys-file-reader.h"
-#include "sfm-private.h"
-#include <libpspp/message.h>
+
#include <stdlib.h>
-#include <ctype.h>
#include <errno.h>
#include <float.h>
-#include <setjmp.h>
+#include <c-ctype.h>
+#include <minmax.h>
+
#include <libpspp/alloc.h>
-#include "case.h"
+#include <libpspp/message.h>
#include <libpspp/compiler.h>
+#include <libpspp/magic.h>
+#include <libpspp/misc.h>
+#include <libpspp/str.h>
+#include <libpspp/hash.h>
+#include <libpspp/array.h>
+
+#include "sys-file-reader.h"
+#include "sfm-private.h"
+#include "case.h"
#include "dictionary.h"
-#include <libpspp/message.h>
#include "file-handle-def.h"
-#include "filename.h"
+#include "file-name.h"
#include "format.h"
-#include <libpspp/magic.h>
-#include <libpspp/misc.h>
#include "value-labels.h"
-#include <libpspp/str.h>
#include "variable.h"
+#include "value.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
-#include <libpspp/debug-print.h>
-
/* System file reader. */
struct sfm_reader
- {
- struct file_handle *fh; /* File handle. */
- FILE *file; /* File stream. */
-
- int reverse_endian; /* 1=file has endianness opposite us. */
- int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
- int value_cnt; /* Number of `union values's per case. */
- long case_cnt; /* Number of cases, -1 if unknown. */
- int compressed; /* 1=compressed, 0=not compressed. */
- double bias; /* Compression bias, usually 100.0. */
- int weight_idx; /* 0-based index of weighting variable, or -1. */
- bool ok; /* False after an I/O error or corrupt data. */
-
- /* Variables. */
- struct sfm_var *vars; /* Variables. */
-
- /* File's special constants. */
- flt64 sysmis;
- flt64 highest;
- flt64 lowest;
-
- /* Decompression buffer. */
- flt64 *buf; /* Buffer data. */
- flt64 *ptr; /* Current location in buffer. */
- flt64 *end; /* End of buffer data. */
-
- /* Compression instruction octet. */
- unsigned char x[8]; /* Current instruction octet. */
- unsigned char *y; /* Location in current instruction octet. */
- };
+{
+ struct file_handle *fh; /* File handle. */
+ FILE *file; /* File stream. */
+
+ int reverse_endian; /* 1=file has endianness opposite us. */
+ int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
+ int value_cnt; /* Number of `union values's per case. */
+ long case_cnt; /* Number of cases, -1 if unknown. */
+ int compressed; /* 1=compressed, 0=not compressed. */
+ double bias; /* Compression bias, usually 100.0. */
+ int weight_idx; /* 0-based index of weighting variable, or -1. */
+ bool ok; /* False after an I/O error or corrupt data. */
+ bool has_vls; /* True if the file has one or more Very Long Strings*/
+
+ /* Variables. */
+ struct hsh_table *var_hash;
+ struct variable **svars;
+
+ /* File's special constants. */
+ flt64 sysmis;
+ flt64 highest;
+ flt64 lowest;
+
+ /* Decompression buffer. */
+ flt64 *buf; /* Buffer data. */
+ flt64 *ptr; /* Current location in buffer. */
+ flt64 *end; /* End of buffer data. */
+
+ /* Compression instruction octet. */
+ unsigned char x[8]; /* Current instruction octet. */
+ unsigned char *y; /* Location in current instruction octet. */
+};
/* A variable in a system file. */
struct sfm_var
- {
- int width; /* 0=numeric, otherwise string width. */
- int fv; /* Index into case. */
- };
+{
+ char name[SHORT_NAME_LEN + 1]; /* name */
+ int width; /* 0=numeric, otherwise string width. */
+ int fv; /* Index into case. */
+};
\f
/* Utilities. */
/* Reverse the byte order of 32-bit integer *X. */
static inline void
-bswap_int32 (int32 *x_)
+bswap_int32 (int32_t *x_)
{
char *x = (char *) x_;
bswap (x + 0, x + 3);
corrupt_msg (int class, const char *format,...)
PRINTF_FORMAT (2, 3);
-/* Displays a corrupt sysfile error. */
-static void
-corrupt_msg (int class, const char *format,...)
+ /* Displays a corrupt sysfile error. */
+ static void
+ corrupt_msg (int class, const char *format,...)
{
- struct error e;
+ struct msg m;
va_list args;
+ struct string text;
- e.class = class;
- e.where.filename = NULL;
- e.where.line_number = 0;
- e.title = _("corrupt system file: ");
-
+ ds_create (&text, _("corrupt system file: "));
va_start (args, format);
- err_vmsg (&e, format, args);
+ ds_vprintf (&text, format, args);
va_end (args);
+
+ m.category = msg_class_to_category (class);
+ m.severity = msg_class_to_severity (class);
+ m.where.file_name = NULL;
+ m.where.line_number = 0;
+ m.text = ds_c_str (&text);
+
+ msg_emit (&m);
}
/* Closes a system file after we're done with it. */
if (r->file)
{
- if (fn_close (fh_get_filename (r->fh), r->file) == EOF)
+ if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
msg (ME, _("%s: Closing system file: %s."),
- fh_get_filename (r->fh), strerror (errno));
+ fh_get_file_name (r->fh), strerror (errno));
r->file = NULL;
}
if (r->fh != NULL)
fh_close (r->fh, "system file", "rs");
-
- free (r->vars);
+
+ hsh_destroy(r->var_hash);
free (r->buf);
free (r);
}
static int read_header (struct sfm_reader *,
struct dictionary *, struct sfm_read_info *);
-static int parse_format_spec (struct sfm_reader *, int32,
+static int parse_format_spec (struct sfm_reader *, int32_t,
struct fmt_spec *, const struct variable *);
static int read_value_labels (struct sfm_reader *, struct dictionary *,
struct variable **var_by_idx);
goto error; \
} while (0)
+
+struct name_pair
+{
+ char *shortname;
+ char *longname;
+};
+
+static int
+pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED)
+{
+ int i;
+
+ const struct name_pair *p1 = _p1;
+ const struct name_pair *p2 = _p2;
+
+ char buf1[SHORT_NAME_LEN + 1];
+ char buf2[SHORT_NAME_LEN + 1];
+
+ memset(buf1, 0, SHORT_NAME_LEN + 1);
+ memset(buf2, 0, SHORT_NAME_LEN + 1);
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf1[i] = p1->shortname[i];
+ if ( '\0' == buf1[i])
+ break;
+ }
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf2[i] = p2->shortname[i];
+ if ( '\0' == buf2[i])
+ break;
+ }
+
+ return strncmp(buf1, buf2, SHORT_NAME_LEN);
+}
+
+static unsigned int
+pair_sn_hash(const void *_p, void *aux UNUSED)
+{
+ int i;
+ const struct name_pair *p = _p;
+ char buf[SHORT_NAME_LEN + 1];
+
+ memset(buf, 0, SHORT_NAME_LEN + 1);
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf[i] = p->shortname[i];
+ if ( '\0' == buf[i])
+ break;
+ }
+
+ return hsh_hash_bytes(buf, strlen(buf));
+}
+
+static void
+pair_sn_free(void *p, void *aux UNUSED)
+{
+ free(p);
+}
+
+
+
+/* A hsh_compare_func that orders variables A and B by their
+ names. */
+static int
+compare_var_shortnames (const void *a_, const void *b_, void *foo UNUSED)
+{
+ int i;
+ const struct variable *a = a_;
+ const struct variable *b = b_;
+
+ char buf1[SHORT_NAME_LEN + 1];
+ char buf2[SHORT_NAME_LEN + 1];
+
+ memset(buf1, 0, SHORT_NAME_LEN + 1);
+ memset(buf2, 0, SHORT_NAME_LEN + 1);
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf1[i] = a->short_name[i];
+ if ( '\0' == buf1[i])
+ break;
+ }
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf2[i] = b->short_name[i];
+ if ( '\0' == buf2[i])
+ break;
+ }
+
+ return strncmp(buf1, buf2, SHORT_NAME_LEN);
+}
+
+/* A hsh_hash_func that hashes variable V based on its name. */
+static unsigned
+hash_var_shortname (const void *v_, void *foo UNUSED)
+{
+ int i;
+ const struct variable *v = v_;
+ char buf[SHORT_NAME_LEN + 1];
+
+ memset(buf, 0, SHORT_NAME_LEN + 1);
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf[i] = v->short_name[i];
+ if ( '\0' == buf[i])
+ break;
+ }
+
+ return hsh_hash_bytes(buf, strlen(buf));
+}
+
+
+
/* Opens the system file designated by file handle FH for
reading. Reads the system file's dictionary into *DICT.
If INFO is non-null, then it receives additional info about the
struct sfm_reader *r = NULL;
struct variable **var_by_idx = NULL;
+ /* The data in record 7(14) */
+ char *subrec14data = 0;
+
+ /* A hash table of long variable names indexed by short name */
+ struct hsh_table *short_to_long = NULL;
+
+
*dict = dict_create ();
if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
goto error;
/* Create and initialize reader. */
r = xmalloc (sizeof *r);
r->fh = fh;
- r->file = fn_open (fh_get_filename (fh), "rb");
+ r->file = fn_open (fh_get_file_name (fh), "rb");
r->reverse_endian = 0;
r->fix_specials = 0;
r->bias = 100.0;
r->weight_idx = -1;
r->ok = true;
+ r->has_vls = false;
+ r->svars = 0;
- r->vars = NULL;
+ r->var_hash = hsh_create(4, compare_var_shortnames, hash_var_shortname, 0, 0);
r->sysmis = -FLT64_MAX;
r->highest = FLT64_MAX;
{
msg (ME, _("An error occurred while opening \"%s\" for reading "
"as a system file: %s."),
- fh_get_filename (r->fh), strerror (errno));
+ fh_get_file_name (r->fh), strerror (errno));
goto error;
}
if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
"and number of elements per case (%d)."),
- fh_get_filename (r->fh), r->weight_idx, r->value_cnt));
+ fh_get_file_name (r->fh), r->weight_idx, r->value_cnt));
weight_var = var_by_idx[r->weight_idx];
if (weight_var == NULL)
lose ((ME,
_("%s: Weighting variable may not be a continuation of "
- "a long string variable."), fh_get_filename (fh)));
+ "a long string variable."), fh_get_file_name (fh)));
else if (weight_var->type == ALPHA)
lose ((ME, _("%s: Weighting variable may not be a string variable."),
- fh_get_filename (fh)));
+ fh_get_file_name (fh)));
dict_set_weight (*dict, weight_var);
}
/* Read records of types 3, 4, 6, and 7. */
for (;;)
{
- int32 rec_type;
+ int32_t rec_type;
assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
if (r->reverse_endian)
bswap_int32 (&rec_type);
+
switch (rec_type)
{
case 3:
lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
"records must always immediately follow type 3 "
"records."),
- fh_get_filename (r->fh)));
+ fh_get_file_name (r->fh)));
case 6:
if (!read_documents (r, *dict))
case 7:
{
struct
- {
- int32 subtype P;
- int32 size P;
- int32 count P;
- }
+ {
+ int32_t subtype P;
+ int32_t size P;
+ int32_t count P;
+ }
data;
unsigned long bytes;
bswap_int32 (&data.count);
}
bytes = data.size * data.count;
+
if (bytes < data.size || bytes < data.count)
lose ((ME, "%s: Record type %d subtype %d too large.",
- fh_get_filename (r->fh), rec_type, data.subtype));
+ fh_get_file_name (r->fh), rec_type, data.subtype));
switch (data.subtype)
{
{
const int n_vars = data.count / 3 ;
int i;
- if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) )
+ if ( data.count % 3 || n_vars != dict_get_var_cnt(*dict) )
{
msg (MW, _("%s: Invalid subrecord length. "
"Record: 7; Subrecord: 11"),
- fh_get_filename (r->fh));
+ fh_get_file_name (r->fh));
skip = 1;
+ break;
}
for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
{
struct
{
- int32 measure P;
- int32 width P;
- int32 align P;
+ int32_t measure P;
+ int32_t width P;
+ int32_t align P;
}
params;
assertive_buf_read (r, ¶ms, sizeof(params), 0);
+ if ( ! measure_is_valid(params.measure)
+ ||
+ ! alignment_is_valid(params.align))
+ {
+ msg(MW,
+ _("Invalid variable display parameters. Default parameters substituted."),
+ fh_get_file_name(r->fh));
+ continue;
+ }
+
v = dict_get_var(*dict, i);
v->measure = params.measure;
case 13: /* SPSS 12.0 Long variable name map */
{
- char *buf, *short_name, *save_ptr;
+ char *short_name, *save_ptr;
int idx;
+ r->has_vls = true;
+
/* Read data. */
- buf = xmalloc (bytes + 1);
- if (!buf_read (r, buf, bytes, 0))
+ subrec14data = xmalloc (bytes + 1);
+ if (!buf_read (r, subrec14data, bytes, 0))
{
- free (buf);
goto error;
}
- buf[bytes] = '\0';
+ subrec14data[bytes] = '\0';
+
+ short_to_long = hsh_create(4,
+ pair_sn_compare,
+ pair_sn_hash,
+ pair_sn_free,
+ 0);
/* Parse data. */
- for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
+ for (short_name = strtok_r (subrec14data, "=", &save_ptr), idx = 0;
short_name != NULL;
short_name = strtok_r (NULL, "=", &save_ptr), idx++)
{
+ struct name_pair *pair ;
char *long_name = strtok_r (NULL, "\t", &save_ptr);
struct variable *v;
{
msg (MW, _("%s: Trailing garbage in long variable "
"name map."),
- fh_get_filename (r->fh));
+ fh_get_file_name (r->fh));
break;
}
if (!var_is_valid_name (long_name, false))
{
msg (MW, _("%s: Long variable mapping to invalid "
"variable name `%s'."),
- fh_get_filename (r->fh), long_name);
+ fh_get_file_name (r->fh), long_name);
break;
}
{
msg (MW, _("%s: Long variable mapping for "
"nonexistent variable %s."),
- fh_get_filename (r->fh), short_name);
+ fh_get_file_name (r->fh), short_name);
break;
}
/* Identify any duplicates. */
if ( compare_var_names(short_name, long_name, 0) &&
- NULL != dict_lookup_var (*dict, long_name))
+ NULL != dict_lookup_var (*dict, long_name))
lose ((ME, _("%s: Duplicate long variable name `%s' "
"within system file."),
- fh_get_filename (r->fh), long_name));
+ fh_get_file_name (r->fh), long_name));
/* Set long name.
dict_rename_var (*dict, v, long_name);
var_set_short_name (v, short_name);
+ pair = xmalloc(sizeof *pair);
+ pair->shortname = short_name;
+ pair->longname = long_name;
+ hsh_insert(short_to_long, pair);
+#if 0
+ /* This messes up the processing of subtype 14 (below).
+ I'm not sure if it is needed anyway, so I'm removing it for
+ now. If it's needed, then it will need to be done after all the
+ records have been processed. --- JMD 27 April 2006
+ */
+
/* For compatability, make sure dictionary
is in long variable name map order. In
the common case, this has no effect,
variable name map are already in the
same order. */
dict_reorder_var (*dict, v, idx);
+#endif
}
+
+ }
+ break;
- /* Free data. */
- free (buf);
+ case 14:
+ {
+ int j = 0;
+ bool eq_seen = false;
+ int i;
+
+ /* Read data. */
+ char *buffer = xmalloc (bytes + 1);
+ if (!buf_read (r, buffer, bytes, 0))
+ {
+ free (buffer);
+ goto error;
+ }
+ buffer[bytes] = '\0';
+
+
+ /* Note: SPSS v13 terminates this record with 00,
+ whereas SPSS v14 terminates it with 00 09. We must
+ accept either */
+ for(i = 0; i < bytes ; ++i)
+ {
+ long int length;
+ static char name[SHORT_NAME_LEN + 1] = {0};
+ static char len_str[6] ={0};
+
+ switch( buffer[i] )
+ {
+ case '=':
+ eq_seen = true;
+ j = 0;
+ break;
+ case '\0':
+ length = strtol(len_str, 0, 10);
+ if ( length != LONG_MAX && length != LONG_MIN)
+ {
+ char *lookup_name = name;
+ int l;
+ int idx;
+ struct variable *v;
+
+ if ( short_to_long )
+ {
+ struct name_pair pair;
+ struct name_pair *p;
+
+ pair.shortname = name;
+ p = hsh_find(short_to_long, &pair);
+ if ( p )
+ lookup_name = p->longname;
+ }
+
+ v = dict_lookup_var(*dict, lookup_name);
+ if ( !v )
+ {
+ corrupt_msg(MW,
+ _("%s: No variable called %s but it is listed in length table."),
+ fh_get_file_name (r->fh), lookup_name);
+
+ goto error;
+
+ }
+
+ l = length;
+ if ( v->width > EFFECTIVE_LONG_STRING_LENGTH )
+ l -= EFFECTIVE_LONG_STRING_LENGTH;
+ else
+ l -= v->width;
+
+ idx = v->index;
+ while ( l > 0 )
+ {
+ struct variable *v_next;
+ v_next = dict_get_var(*dict, idx + 1);
+
+ if ( v_next->width > EFFECTIVE_LONG_STRING_LENGTH )
+ l -= EFFECTIVE_LONG_STRING_LENGTH;
+ else
+ l -= v_next->width;
+
+ hsh_delete(r->var_hash, v_next);
+
+ dict_delete_var(*dict, v_next);
+ }
+
+ assert ( length > MAX_LONG_STRING );
+
+ v->width = length;
+ v->print.w = v->width;
+ v->write.w = v->width;
+ v->nv = DIV_RND_UP (length, MAX_SHORT_STRING);
+ }
+ eq_seen = false;
+ memset(name, 0, SHORT_NAME_LEN+1);
+ memset(len_str, 0, 6);
+ j = 0;
+ break;
+ case '\t':
+ break;
+ default:
+ if ( eq_seen )
+ len_str[j] = buffer[i];
+ else
+ name[j] = buffer[i];
+ j++;
+ break;
+ }
+ }
+ free(buffer);
+ dict_compact_values(*dict);
}
break;
default:
msg (MW, _("%s: Unrecognized record type 7, subtype %d "
"encountered in system file."),
- fh_get_filename (r->fh), data.subtype);
+ fh_get_file_name (r->fh), data.subtype);
skip = 1;
}
case 999:
{
- int32 filler;
+ int32_t filler;
assertive_buf_read (r, &filler, sizeof filler, 0);
+
goto success;
}
default:
corrupt_msg(MW, _("%s: Unrecognized record type %d."),
- fh_get_filename (r->fh), rec_type);
+ fh_get_file_name (r->fh), rec_type);
}
}
-success:
+ success:
/* Come here on successful completion. */
+
free (var_by_idx);
+ hsh_destroy(short_to_long);
+ free (subrec14data);
return r;
-error:
+ error:
/* Come here on unsuccessful completion. */
sfm_close_reader (r);
free (var_by_idx);
+ hsh_destroy(short_to_long);
+ free (subrec14data);
if (*dict != NULL)
{
dict_destroy (*dict);
static int
read_machine_int32_info (struct sfm_reader *r, int size, int count)
{
- int32 data[8];
+ int32_t data[8];
int file_bigendian;
int i;
- if (size != sizeof (int32) || count != 8)
+ if (size != sizeof (int32_t) || count != 8)
lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
"subtype 3. Expected size %d, count 8."),
- fh_get_filename (r->fh), size, count, sizeof (int32)));
+ fh_get_file_name (r->fh), size, count, sizeof (int32_t)));
assertive_buf_read (r, data, sizeof data, 0);
if (r->reverse_endian)
lose ((ME, _("%s: Floating-point representation in system file is not "
"IEEE-754. PSPP cannot convert between floating-point "
"formats."),
- fh_get_filename (r->fh)));
+ fh_get_file_name (r->fh)));
#else
#error Add support for your floating-point format.
#endif
if (file_bigendian ^ (data[6] == 1))
lose ((ME, _("%s: File-indicated endianness (%s) does not match "
"endianness intuited from file header (%s)."),
- fh_get_filename (r->fh),
+ fh_get_file_name (r->fh),
file_bigendian ? _("big-endian") : _("little-endian"),
data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
- : _("unknown"))));
+ : _("unknown"))));
/* PORTME: Character representation code. */
if (data[7] != 2 && data[7] != 3)
lose ((ME, _("%s: File-indicated character representation code (%s) is "
"not ASCII."),
- fh_get_filename (r->fh),
+ fh_get_file_name (r->fh),
(data[7] == 1 ? "EBCDIC"
: (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
return 1;
-error:
+ error:
return 0;
}
if (size != sizeof (flt64) || count != 3)
lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
"subtype 4. Expected size %d, count 8."),
- fh_get_filename (r->fh), size, count, sizeof (flt64)));
+ fh_get_file_name (r->fh), size, count, sizeof (flt64)));
assertive_buf_read (r, data, sizeof data, 0);
if (r->reverse_endian)
"for at least one of the three system values. SYSMIS: "
"indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
"%g, %g."),
- fh_get_filename (r->fh), (double) data[0], (double) SYSMIS,
+ fh_get_file_name (r->fh), (double) data[0], (double) SYSMIS,
(double) data[1], (double) FLT64_MAX,
(double) data[2], (double) second_lowest_flt64);
}
return 1;
-error:
+ error:
return 0;
}
if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
lose ((ME, _("%s: Bad magic. Proper system files begin with "
"the four characters `$FL2'. This file will not be read."),
- fh_get_filename (r->fh)));
+ fh_get_file_name (r->fh)));
/* Check eye-category.her string. */
memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
for (i = 0; i < 60; i++)
- if (!isprint ((unsigned char) prod_name[i]))
+ if (!c_isprint ((unsigned char) prod_name[i]))
prod_name[i] = ' ';
for (i = 59; i >= 0; i--)
- if (!isgraph ((unsigned char) prod_name[i]))
+ if (!c_isgraph ((unsigned char) prod_name[i]))
{
prod_name[i] = '\0';
break;
if (hdr.layout_code != 2)
lose ((ME, _("%s: File layout code has unexpected value %d. Value "
"should be 2, in big-endian or little-endian format."),
- fh_get_filename (r->fh), hdr.layout_code));
+ fh_get_file_name (r->fh), hdr.layout_code));
r->reverse_endian = 1;
- bswap_int32 (&hdr.case_size);
+ bswap_int32 (&hdr.nominal_case_size);
bswap_int32 (&hdr.compress);
bswap_int32 (&hdr.weight_idx);
bswap_int32 (&hdr.case_cnt);
/* Copy basic info and verify correctness. */
- r->value_cnt = hdr.case_size;
+ r->value_cnt = hdr.nominal_case_size;
- /* If value count is rediculous, then force it to -1 (a sentinel value) */
+ /* If value count is ridiculous, then force it to -1 (a
+ sentinel value). */
if ( r->value_cnt < 0 ||
r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
r->value_cnt = -1;
if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
lose ((ME,
_("%s: Number of cases in file (%ld) is not between -1 and %d."),
- fh_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
+ fh_get_file_name (r->fh), (long) r->case_cnt, INT_MAX / 2));
r->bias = hdr.bias;
if (r->bias != 100.0)
corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
"value of 100."),
- fh_get_filename (r->fh), r->bias);
+ fh_get_file_name (r->fh), r->bias);
/* Make a file label only on the condition that the given label is
not all spaces or nulls. */
int i;
for (i = sizeof hdr.file_label - 1; i >= 0; i--)
- if (!isspace ((unsigned char) hdr.file_label[i])
- && hdr.file_label[i] != 0)
- {
- char *label = xmalloc (i + 2);
- memcpy (label, hdr.file_label, i + 1);
- label[i + 1] = 0;
- dict_set_label (dict, label);
- free (label);
- break;
- }
+ {
+ if (!c_isspace ((unsigned char) hdr.file_label[i])
+ && hdr.file_label[i] != 0)
+ {
+ char *label = xmalloc (i + 2);
+ memcpy (label, hdr.file_label, i + 1);
+ label[i + 1] = 0;
+ dict_set_label (dict, label);
+ free (label);
+ break;
+ }
+ }
}
if (info)
info->case_cnt = hdr.case_cnt;
for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
- if (isgraph ((unsigned char) *cp))
+ if (c_isgraph ((unsigned char) *cp))
break;
strcpy (info->product, cp);
}
return 1;
-error:
+ error:
return 0;
}
*var_by_idx = 0;
- /* Pre-allocate variables. */
- if (r->value_cnt != -1)
- {
- *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx);
- r->vars = xnmalloc (r->value_cnt, sizeof *r->vars);
- }
-
/* Read in the entry for each variable and use the info to
initialize the dictionary. */
int nv;
int j;
- if ( r->value_cnt != -1 && i >= r->value_cnt )
- break;
-
assertive_buf_read (r, &sv, sizeof sv, 0);
if (r->reverse_endian)
break;
}
- if ( -1 == r->value_cnt )
- {
- *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
- r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars);
- }
+ *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
/* If there was a long string previously, make sure that the
continuations are present; otherwise make sure there aren't
if (sv.type != -1)
lose ((ME, _("%s: position %d: String variable does not have "
"proper number of continuation records."),
- fh_get_filename (r->fh), i));
+ fh_get_file_name (r->fh), i));
- r->vars[i].width = -1;
(*var_by_idx)[i] = NULL;
long_string_count--;
continue;
else if (sv.type == -1)
lose ((ME, _("%s: position %d: Superfluous long string continuation "
"record."),
- fh_get_filename (r->fh), i));
+ fh_get_file_name (r->fh), i));
/* Check fields for validity. */
if (sv.type < 0 || sv.type > 255)
lose ((ME, _("%s: position %d: Bad variable type code %d."),
- fh_get_filename (r->fh), i, sv.type));
+ fh_get_file_name (r->fh), i, sv.type));
if (sv.has_var_label != 0 && sv.has_var_label != 1)
lose ((ME, _("%s: position %d: Variable label indicator field is not "
- "0 or 1."), fh_get_filename (r->fh), i));
+ "0 or 1."), fh_get_file_name (r->fh), i));
if (sv.n_missing_values < -3 || sv.n_missing_values > 3
|| sv.n_missing_values == -1)
lose ((ME, _("%s: position %d: Missing value indicator field is not "
- "-3, -2, 0, 1, 2, or 3."), fh_get_filename (r->fh), i));
+ "-3, -2, 0, 1, 2, or 3."), fh_get_file_name (r->fh), i));
/* Copy first character of variable name. */
- if (!isalpha ((unsigned char) sv.name[0])
- && sv.name[0] != '@' && sv.name[0] != '#')
+ if (sv.name[0] == '@' || sv.name[0] == '#')
lose ((ME, _("%s: position %d: Variable name begins with invalid "
"character."),
- fh_get_filename (r->fh), i));
- if (islower ((unsigned char) sv.name[0]))
- msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
- "%c."),
- fh_get_filename (r->fh), i, sv.name[0]);
- if (sv.name[0] == '#')
- msg (MW, _("%s: position %d: Variable name begins with octothorpe "
- "(`#'). Scratch variables should not appear in system "
- "files."),
- fh_get_filename (r->fh), i);
- name[0] = toupper ((unsigned char) (sv.name[0]));
+ fh_get_file_name (r->fh), i));
+
+ name[0] = sv.name[0];
/* Copy remaining characters of variable name. */
for (j = 1; j < SHORT_NAME_LEN; j++)
{
int c = (unsigned char) sv.name[j];
- if (isspace (c))
+ if (c == ' ')
break;
- else if (islower (c))
- {
- msg (MW, _("%s: position %d: Variable name character %d is "
- "lowercase letter %c."),
- fh_get_filename (r->fh), i, j + 1, sv.name[j]);
- name[j] = toupper ((unsigned char) (c));
- }
- else if (isalnum (c) || c == '.' || c == '@'
- || c == '#' || c == '$' || c == '_')
+ else
name[j] = c;
- else
- lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
- "variable name."),
- fh_get_filename (r->fh), i, c, c));
}
name[j] = 0;
- if ( ! var_is_valid_name(name, false) )
+ if ( ! var_is_plausible_name(name, false) )
lose ((ME, _("%s: Invalid variable name `%s' within system file."),
- fh_get_filename (r->fh), name));
+ fh_get_file_name (r->fh), name));
/* Create variable. */
-
vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
if (vv == NULL)
lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
- fh_get_filename (r->fh), name));
+ fh_get_file_name (r->fh), name));
+ /* Set the short name the same as the long name */
var_set_short_name (vv, vv->name);
/* Case reading data. */
if (sv.has_var_label == 1)
{
/* Disk buffer. */
- int32 len;
+ int32_t len;
/* Read length of label. */
assertive_buf_read (r, &len, sizeof len, 0);
if (len < 0 || len > 255)
lose ((ME, _("%s: Variable %s indicates variable label of invalid "
"length %d."),
- fh_get_filename (r->fh), vv->name, len));
+ fh_get_file_name (r->fh), vv->name, len));
if ( len != 0 )
{
/* Read label into variable structure. */
- vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
+ vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1);
if (vv->label == NULL)
goto error;
vv->label[len] = '\0';
if (vv->width > MAX_SHORT_STRING)
lose ((ME, _("%s: Long string variable %s may not have missing "
"values."),
- fh_get_filename (r->fh), vv->name));
+ fh_get_file_name (r->fh), vv->name));
assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
if (vv->type == ALPHA)
lose ((ME, _("%s: String variable %s may not have missing "
"values specified as a range."),
- fh_get_filename (r->fh), vv->name));
+ fh_get_file_name (r->fh), vv->name));
if (mv[0] == r->lowest)
mv_add_num_range (&vv->miss, LOWEST, mv[1]);
|| !parse_format_spec (r, sv.write, &vv->write, vv))
goto error;
- r->vars[i].width = vv->width;
- r->vars[i].fv = vv->fv;
-
+ if ( vv->width != -1)
+ hsh_insert(r->var_hash, vv);
}
/* Some consistency checks. */
if (long_string_count != 0)
lose ((ME, _("%s: Long string continuation records omitted at end of "
"dictionary."),
- fh_get_filename (r->fh)));
+ fh_get_file_name (r->fh)));
if (next_value != r->value_cnt)
corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
- "%d were read from file."),
- fh_get_filename (r->fh), r->value_cnt, next_value);
+ "%d were read from file."),
+ fh_get_file_name (r->fh), r->value_cnt, next_value);
return 1;
-error:
+ error:
return 0;
}
/* Translates the format spec from sysfile format to internal
format. */
static int
-parse_format_spec (struct sfm_reader *r, int32 s,
+parse_format_spec (struct sfm_reader *r, int32_t s,
struct fmt_spec *f, const struct variable *v)
{
f->type = translate_fmt ((s >> 16) & 0xff);
if (f->type == -1)
lose ((ME, _("%s: Bad format specifier byte (%d)."),
- fh_get_filename (r->fh), (s >> 16) & 0xff));
+ fh_get_file_name (r->fh), (s >> 16) & 0xff));
f->w = (s >> 8) & 0xff;
f->d = s & 0xff;
if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
- fh_get_filename (r->fh),
+ fh_get_file_name (r->fh),
v->type == ALPHA ? _("String") : _("Numeric"),
v->name,
formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
}
return 1;
-error:
+ error:
return 0;
}
struct dictionary *dict, struct variable **var_by_idx)
{
struct label
- {
- char raw_value[8]; /* Value as uninterpreted bytes. */
- union value value; /* Value. */
- char *label; /* Null-terminated label string. */
- };
+ {
+ char raw_value[8]; /* Value as uninterpreted bytes. */
+ union value value; /* Value. */
+ char *label; /* Null-terminated label string. */
+ };
struct label *labels = NULL;
- int32 n_labels; /* Number of labels. */
+ int32_t n_labels; /* Number of labels. */
struct variable **var = NULL; /* Associated variables. */
- int32 n_vars; /* Number of associated variables. */
+ int32_t n_vars; /* Number of associated variables. */
int i;
if (r->reverse_endian)
bswap_int32 (&n_labels);
- if ( n_labels >= ((int32) ~0) / sizeof *labels)
+ if ( n_labels >= ((int32_t) ~0) / sizeof *labels)
{
corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
- fh_get_filename (r->fh), n_labels);
+ fh_get_file_name (r->fh), n_labels);
n_labels = 0;
}
/* Read record type of type 4 record. */
{
- int32 rec_type;
+ int32_t rec_type;
assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
if (r->reverse_endian)
if (rec_type != 4)
lose ((ME, _("%s: Variable index record (type 4) does not immediately "
"follow value label record (type 3) as it should."),
- fh_get_filename (r->fh)));
+ fh_get_file_name (r->fh)));
}
/* Read number of variables associated with value label from type 4
if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
lose ((ME, _("%s: Number of variables associated with a value label (%d) "
"is not between 1 and the number of variables (%d)."),
- fh_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
+ fh_get_file_name (r->fh), n_vars, dict_get_var_cnt (dict)));
/* Read the list of variables. */
var = xnmalloc (n_vars, sizeof *var);
for (i = 0; i < n_vars; i++)
{
- int32 var_idx;
+ int32_t var_idx;
struct variable *v;
/* Read variable index, check range. */
if (var_idx < 1 || var_idx > r->value_cnt)
lose ((ME, _("%s: Variable index associated with value label (%d) is "
"not between 1 and the number of values (%d)."),
- fh_get_filename (r->fh), var_idx, r->value_cnt));
+ fh_get_file_name (r->fh), var_idx, r->value_cnt));
/* Make sure it's a real variable. */
v = var_by_idx[var_idx - 1];
lose ((ME, _("%s: Variable index associated with value label (%d) "
"refers to a continuation of a string variable, not to "
"an actual variable."),
- fh_get_filename (r->fh), var_idx));
+ fh_get_file_name (r->fh), var_idx));
if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
lose ((ME, _("%s: Value labels are not allowed on long string "
"variables (%s)."),
- fh_get_filename (r->fh), v->name));
+ fh_get_file_name (r->fh), v->name));
/* Add it to the list of variables. */
var[i] = v;
lose ((ME, _("%s: Variables associated with value label are not all of "
"identical type. Variable %s has %s type, but variable "
"%s has %s type."),
- fh_get_filename (r->fh),
+ fh_get_file_name (r->fh),
var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
if (var[0]->type == NUMERIC)
msg (MW, _("%s: File contains duplicate label for value %g for "
"variable %s."),
- fh_get_filename (r->fh), label->value.f, v->name);
+ fh_get_file_name (r->fh), label->value.f, v->name);
else
msg (MW, _("%s: File contains duplicate label for value `%.*s' "
"for variable %s."),
- fh_get_filename (r->fh), v->width, label->value.s, v->name);
+ fh_get_file_name (r->fh), v->width, label->value.s, v->name);
}
}
free (var);
return 1;
-error:
+ error:
if (labels)
{
for (i = 0; i < n_labels; i++)
{
if (ferror (r->file))
msg (ME, _("%s: Reading system file: %s."),
- fh_get_filename (r->fh), strerror (errno));
+ fh_get_file_name (r->fh), strerror (errno));
else
corrupt_msg (ME, _("%s: Unexpected end of file."),
- fh_get_filename (r->fh));
+ fh_get_file_name (r->fh));
r->ok = false;
return NULL;
}
+
return buf;
}
if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
{
msg (ME, _("%s: Seeking system file: %s."),
- fh_get_filename (r->fh), strerror (errno));
+ fh_get_file_name (r->fh), strerror (errno));
}
}
static int
read_documents (struct sfm_reader *r, struct dictionary *dict)
{
- int32 line_cnt;
+ int32_t line_cnt;
char *documents;
if (dict_get_documents (dict) != NULL)
lose ((ME, _("%s: System file contains multiple "
"type 6 (document) records."),
- fh_get_filename (r->fh)));
+ fh_get_file_name (r->fh)));
assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
if (line_cnt <= 0)
lose ((ME, _("%s: Number of document lines (%ld) "
"must be greater than 0."),
- fh_get_filename (r->fh), (long) line_cnt));
+ fh_get_file_name (r->fh), (long) line_cnt));
documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
/* FIXME? Run through asciify. */
free (documents);
return 1;
-error:
+ error:
return 0;
}
\f
if (ferror (r->file))
{
msg (ME, _("%s: Error reading file: %s."),
- fh_get_filename (r->fh), strerror (errno));
+ fh_get_file_name (r->fh), strerror (errno));
r->ok = false;
return 0;
}
return 0;
lose ((ME, _("%s: Compressed data is corrupted. Data ends "
"in partial case."),
- fh_get_filename (r->fh)));
+ fh_get_file_name (r->fh)));
case 253:
/* Code 253 indicates that the value is stored explicitly
following the instruction bytes. */
if (r->ptr == NULL || r->ptr >= r->end)
if (!buffer_input (r))
lose ((ME, _("%s: Unexpected end of file."),
- fh_get_filename (r->fh)));
+ fh_get_file_name (r->fh)));
memcpy (buf++, r->ptr++, sizeof *buf);
if (buf >= buf_end)
goto success;
{
if (buf_beg != buf)
lose ((ME, _("%s: Unexpected end of file."),
- fh_get_filename (r->fh)));
+ fh_get_file_name (r->fh)));
else
return 0;
}
abort ();
-success:
+ success:
/* We have filled up an entire record. Update state and return
successfully. */
r->y = ++p;
return 1;
-error:
+ error:
/* I/O error. */
r->ok = false;
return 0;
}
+
+static int
+compare_var_index(const void *_v1, const void *_v2, void *aux UNUSED)
+{
+ const struct variable *const *v1 = _v1;
+ const struct variable *const *v2 = _v2;
+
+ if ( (*v1)->index < (*v2)->index)
+ return -1;
+
+ return ( (*v1)->index > (*v2)->index) ;
+}
+
+
/* Reads one case from READER's file into C. Returns nonzero
only if successful. */
int
{
if (!r->ok)
return 0;
-
- if (!r->compressed && sizeof (flt64) == sizeof (double))
+
+ if ( ! r->svars )
+ {
+ r->svars = (struct variable **) hsh_data(r->var_hash);
+ sort(r->svars, hsh_count(r->var_hash),
+ sizeof(*r->svars), compare_var_index, 0);
+ }
+
+ if (!r->compressed && sizeof (flt64) == sizeof (double) && ! r->has_vls)
{
/* Fast path: external and internal representations are the
same, except possibly for endianness or SYSMIS. Read
{
int i;
- for (i = 0; i < r->value_cnt; i++)
- if (r->vars[i].width == 0)
- bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
+ for (i = 0; i < hsh_count(r->var_hash); i++)
+ {
+ struct variable *v = r->svars[i];
+ if (v->width == 0)
+ bswap_flt64 (&case_data_rw (c, v->fv)->f);
+ }
}
/* Fix up SYSMIS values if needed.
if (r->sysmis != SYSMIS)
{
int i;
-
- for (i = 0; i < r->value_cnt; i++)
- if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
- case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
+ for (i = 0; i < hsh_count(r->var_hash); i++)
+ {
+ struct variable *v = r->svars[i];
+ if (v->width == 0 && case_num (c, i) == r->sysmis)
+ case_data_rw (c, v->fv)->f = SYSMIS;
+ }
}
}
else
bounce_size = sizeof *bounce * r->value_cnt;
bounce = bounce_cur = local_alloc (bounce_size);
+ memset(bounce, 0, bounce_size);
+
if (!r->compressed)
read_ok = fread_ok (r, bounce, bounce_size);
else
return 0;
}
- for (i = 0; i < r->value_cnt; i++)
+ for (i = 0; i < hsh_count(r->var_hash); i++)
{
- struct sfm_var *v = &r->vars[i];
+ struct variable *tv = r->svars[i];
- if (v->width == 0)
+ if (tv->width == 0)
{
flt64 f = *bounce_cur++;
if (r->reverse_endian)
bswap_flt64 (&f);
- case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
+ case_data_rw (c, tv->fv)->f = f == r->sysmis ? SYSMIS : f;
}
- else if (v->width != -1)
+ else if (tv->width != -1)
{
- memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
- bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
+ flt64 *bc_start = bounce_cur;
+ int ofs = 0;
+ while (ofs < tv->width )
+ {
+ const int chunk = MIN (MAX_LONG_STRING, tv->width - ofs);
+ memcpy (case_data_rw (c, tv->fv)->s + ofs, bounce_cur, chunk);
+
+ bounce_cur += DIV_RND_UP (chunk, sizeof (flt64));
+
+ ofs += chunk;
+ }
+ bounce_cur = bc_start + width_to_bytes(tv->width) / sizeof(flt64);
}
}
if (ferror (r->file))
{
msg (ME, _("%s: Reading system file: %s."),
- fh_get_filename (r->fh), strerror (errno));
+ fh_get_file_name (r->fh), strerror (errno));
r->ok = false;
}
else if (read_bytes != 0)
{
msg (ME, _("%s: Partial record at end of system file."),
- fh_get_filename (r->fh));
+ fh_get_file_name (r->fh));
r->ok = false;
}
return 0;
return false;
return true;
}
+