#include <errno.h>
#include <float.h>
#include <c-ctype.h>
+#include <minmax.h>
#include <libpspp/alloc.h>
+#include <libpspp/assertion.h>
#include <libpspp/message.h>
#include <libpspp/compiler.h>
#include <libpspp/magic.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
+#include <libpspp/hash.h>
+#include <libpspp/array.h>
#include "sys-file-reader.h"
#include "sfm-private.h"
#include "format.h"
#include "value-labels.h"
#include "variable.h"
+#include "value.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
/* System file reader. */
struct sfm_reader
- {
- struct file_handle *fh; /* File handle. */
- FILE *file; /* File stream. */
-
- int reverse_endian; /* 1=file has endianness opposite us. */
- int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
- int value_cnt; /* Number of `union values's per case. */
- long case_cnt; /* Number of cases, -1 if unknown. */
- int compressed; /* 1=compressed, 0=not compressed. */
- double bias; /* Compression bias, usually 100.0. */
- int weight_idx; /* 0-based index of weighting variable, or -1. */
- bool ok; /* False after an I/O error or corrupt data. */
-
- /* Variables. */
- struct sfm_var *vars; /* Variables. */
-
- /* File's special constants. */
- flt64 sysmis;
- flt64 highest;
- flt64 lowest;
-
- /* Decompression buffer. */
- flt64 *buf; /* Buffer data. */
- flt64 *ptr; /* Current location in buffer. */
- flt64 *end; /* End of buffer data. */
-
- /* Compression instruction octet. */
- unsigned char x[8]; /* Current instruction octet. */
- unsigned char *y; /* Location in current instruction octet. */
- };
+{
+ struct file_handle *fh; /* File handle. */
+ FILE *file; /* File stream. */
+
+ int reverse_endian; /* 1=file has endianness opposite us. */
+ int value_cnt; /* Number of `union values's per case. */
+ long case_cnt; /* Number of cases, -1 if unknown. */
+ int compressed; /* 1=compressed, 0=not compressed. */
+ double bias; /* Compression bias, usually 100.0. */
+ int weight_idx; /* 0-based index of weighting variable, or -1. */
+ bool ok; /* False after an I/O error or corrupt data. */
+ bool has_vls; /* True if the file has one or more Very Long Strings*/
+
+ /* Variables. */
+ struct sfm_var *vars;
+ size_t var_cnt;
+
+ /* File's special constants. */
+ flt64 sysmis;
+ flt64 highest;
+ flt64 lowest;
+
+ /* Decompression buffer. */
+ flt64 *buf; /* Buffer data. */
+ flt64 *ptr; /* Current location in buffer. */
+ flt64 *end; /* End of buffer data. */
+
+ /* Compression instruction octet. */
+ unsigned char x[8]; /* Current instruction octet. */
+ unsigned char *y; /* Location in current instruction octet. */
+};
/* A variable in a system file. */
struct sfm_var
- {
- int width; /* 0=numeric, otherwise string width. */
- int fv; /* Index into case. */
- };
+{
+ int width; /* 0=numeric, otherwise string width. */
+ int fv; /* Index into case. */
+};
\f
/* Utilities. */
corrupt_msg (int class, const char *format,...)
PRINTF_FORMAT (2, 3);
-/* Displays a corrupt sysfile error. */
-static void
-corrupt_msg (int class, const char *format,...)
+ /* Displays a corrupt sysfile error. */
+ static void
+ corrupt_msg (int class, const char *format,...)
{
- struct error e;
+ struct msg m;
va_list args;
struct string text;
- ds_create (&text, _("corrupt system file: "));
+ ds_init_cstr (&text, _("corrupt system file: "));
va_start (args, format);
- ds_vprintf (&text, format, args);
+ ds_put_vformat (&text, format, args);
va_end (args);
- e.category = msg_class_to_category (class);
- e.severity = msg_class_to_severity (class);
- e.where.file_name = NULL;
- e.where.line_number = 0;
- e.text = ds_c_str (&text);
+ m.category = msg_class_to_category (class);
+ m.severity = msg_class_to_severity (class);
+ m.where.file_name = NULL;
+ m.where.line_number = 0;
+ m.text = ds_cstr (&text);
- err_msg (&e);
+ msg_emit (&m);
}
/* Closes a system file after we're done with it. */
if (r->fh != NULL)
fh_close (r->fh, "system file", "rs");
-
+
free (r->vars);
free (r->buf);
free (r);
goto error; \
} while (0)
+
+struct name_pair
+{
+ char *shortname;
+ char *longname;
+};
+
+static int
+pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED)
+{
+ int i;
+
+ const struct name_pair *p1 = _p1;
+ const struct name_pair *p2 = _p2;
+
+ char buf1[SHORT_NAME_LEN + 1];
+ char buf2[SHORT_NAME_LEN + 1];
+
+ memset(buf1, 0, SHORT_NAME_LEN + 1);
+ memset(buf2, 0, SHORT_NAME_LEN + 1);
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf1[i] = p1->shortname[i];
+ if ( '\0' == buf1[i])
+ break;
+ }
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf2[i] = p2->shortname[i];
+ if ( '\0' == buf2[i])
+ break;
+ }
+
+ return strncmp(buf1, buf2, SHORT_NAME_LEN);
+}
+
+static unsigned int
+pair_sn_hash(const void *_p, void *aux UNUSED)
+{
+ int i;
+ const struct name_pair *p = _p;
+ char buf[SHORT_NAME_LEN + 1];
+
+ memset(buf, 0, SHORT_NAME_LEN + 1);
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf[i] = p->shortname[i];
+ if ( '\0' == buf[i])
+ break;
+ }
+
+ return hsh_hash_bytes(buf, strlen(buf));
+}
+
+static void
+pair_sn_free(void *p, void *aux UNUSED)
+{
+ free(p);
+}
+
+
+
/* Opens the system file designated by file handle FH for
reading. Reads the system file's dictionary into *DICT.
If INFO is non-null, then it receives additional info about the
struct sfm_reader *r = NULL;
struct variable **var_by_idx = NULL;
+ /* The data in record 7(14) */
+ char *subrec14data = 0;
+
+ /* A hash table of long variable names indexed by short name */
+ struct hsh_table *short_to_long = NULL;
+
*dict = dict_create ();
if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
goto error;
r->file = fn_open (fh_get_file_name (fh), "rb");
r->reverse_endian = 0;
- r->fix_specials = 0;
r->value_cnt = 0;
r->case_cnt = 0;
r->compressed = 0;
r->bias = 100.0;
r->weight_idx = -1;
r->ok = true;
+ r->has_vls = false;
r->vars = NULL;
if (weight_var == NULL)
lose ((ME,
_("%s: Weighting variable may not be a continuation of "
- "a long string variable."), fh_get_file_name (fh)));
+ "a long string variable."), fh_get_file_name (fh)));
else if (weight_var->type == ALPHA)
lose ((ME, _("%s: Weighting variable may not be a string variable."),
fh_get_file_name (fh)));
if (r->reverse_endian)
bswap_int32 (&rec_type);
+
switch (rec_type)
{
case 3:
case 7:
{
struct
- {
- int32_t subtype P;
- int32_t size P;
- int32_t count P;
- }
+ {
+ int32_t subtype P;
+ int32_t size P;
+ int32_t count P;
+ }
data;
unsigned long bytes;
bswap_int32 (&data.count);
}
bytes = data.size * data.count;
+
if (bytes < data.size || bytes < data.count)
lose ((ME, "%s: Record type %d subtype %d too large.",
fh_get_file_name (r->fh), rec_type, data.subtype));
{
const int n_vars = data.count / 3 ;
int i;
- if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) )
+ if ( data.count % 3 || n_vars != dict_get_var_cnt(*dict) )
{
msg (MW, _("%s: Invalid subrecord length. "
"Record: 7; Subrecord: 11"),
fh_get_file_name (r->fh));
skip = 1;
+ break;
}
for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
assertive_buf_read (r, ¶ms, sizeof(params), 0);
+ if ( ! measure_is_valid(params.measure)
+ ||
+ ! alignment_is_valid(params.align))
+ {
+ msg(MW,
+ _("%s: Invalid variable display parameters. Default parameters substituted."),
+ fh_get_file_name(r->fh));
+ continue;
+ }
+
v = dict_get_var(*dict, i);
v->measure = params.measure;
case 13: /* SPSS 12.0 Long variable name map */
{
- char *buf, *short_name, *save_ptr;
+ char *short_name, *save_ptr;
int idx;
/* Read data. */
- buf = xmalloc (bytes + 1);
- if (!buf_read (r, buf, bytes, 0))
+ subrec14data = xmalloc (bytes + 1);
+ if (!buf_read (r, subrec14data, bytes, 0))
{
- free (buf);
goto error;
}
- buf[bytes] = '\0';
+ subrec14data[bytes] = '\0';
+
+ short_to_long = hsh_create(4,
+ pair_sn_compare,
+ pair_sn_hash,
+ pair_sn_free,
+ 0);
/* Parse data. */
- for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
+ for (short_name = strtok_r (subrec14data, "=", &save_ptr), idx = 0;
short_name != NULL;
short_name = strtok_r (NULL, "=", &save_ptr), idx++)
{
+ struct name_pair *pair ;
char *long_name = strtok_r (NULL, "\t", &save_ptr);
struct variable *v;
/* Identify any duplicates. */
if ( compare_var_names(short_name, long_name, 0) &&
- NULL != dict_lookup_var (*dict, long_name))
+ NULL != dict_lookup_var (*dict, long_name))
lose ((ME, _("%s: Duplicate long variable name `%s' "
"within system file."),
fh_get_file_name (r->fh), long_name));
dict_rename_var (*dict, v, long_name);
var_set_short_name (v, short_name);
- /* For compatability, make sure dictionary
+ pair = xmalloc(sizeof *pair);
+ pair->shortname = short_name;
+ pair->longname = long_name;
+ hsh_insert(short_to_long, pair);
+#if 0
+ /* This messes up the processing of subtype 14 (below).
+ I'm not sure if it is needed anyway, so I'm removing it for
+ now. If it's needed, then it will need to be done after all the
+ records have been processed. --- JMD 27 April 2006
+ */
+
+ /* For compatibility, make sure dictionary
is in long variable name map order. In
the common case, this has no effect,
because the dictionary and the long
variable name map are already in the
same order. */
dict_reorder_var (*dict, v, idx);
+#endif
}
+
+ }
+ break;
- /* Free data. */
- free (buf);
+ case 14:
+ {
+ int j = 0;
+ bool eq_seen = false;
+ int i;
+
+ /* Read data. */
+ char *buffer = xmalloc (bytes + 1);
+ if (!buf_read (r, buffer, bytes, 0))
+ {
+ free (buffer);
+ goto error;
+ }
+ buffer[bytes] = '\0';
+
+ r->has_vls = true;
+
+ /* Note: SPSS v13 terminates this record with 00,
+ whereas SPSS v14 terminates it with 00 09. We must
+ accept either */
+ for(i = 0; i < bytes ; ++i)
+ {
+ long int length;
+ static char name[SHORT_NAME_LEN + 1] = {0};
+ static char len_str[6] ={0};
+
+ switch( buffer[i] )
+ {
+ case '=':
+ eq_seen = true;
+ j = 0;
+ break;
+ case '\0':
+ length = strtol(len_str, 0, 10);
+ if ( length != LONG_MAX && length != LONG_MIN)
+ {
+ char *lookup_name = name;
+ int l;
+ int idx;
+ struct variable *v;
+
+ if ( short_to_long )
+ {
+ struct name_pair pair;
+ struct name_pair *p;
+
+ pair.shortname = name;
+ p = hsh_find(short_to_long, &pair);
+ if ( p )
+ lookup_name = p->longname;
+ }
+
+ v = dict_lookup_var(*dict, lookup_name);
+ if ( !v )
+ {
+ corrupt_msg(MW,
+ _("%s: No variable called %s but it is listed in length table."),
+ fh_get_file_name (r->fh), lookup_name);
+
+ goto error;
+
+ }
+
+ l = length;
+ if ( v->width > EFFECTIVE_LONG_STRING_LENGTH )
+ l -= EFFECTIVE_LONG_STRING_LENGTH;
+ else
+ l -= v->width;
+
+ idx = v->index;
+ while ( l > 0 )
+ {
+ struct variable *v_next;
+ v_next = dict_get_var(*dict, idx + 1);
+
+ if ( v_next->width > EFFECTIVE_LONG_STRING_LENGTH )
+ l -= EFFECTIVE_LONG_STRING_LENGTH;
+ else
+ l -= v_next->width;
+
+ dict_delete_var(*dict, v_next);
+ }
+
+ assert ( length > MAX_LONG_STRING );
+
+ v->width = length;
+ v->print.w = v->width;
+ v->write.w = v->width;
+ v->nv = DIV_RND_UP (length, MAX_SHORT_STRING);
+ }
+ eq_seen = false;
+ memset(name, 0, SHORT_NAME_LEN+1);
+ memset(len_str, 0, 6);
+ j = 0;
+ break;
+ case '\t':
+ break;
+ default:
+ if ( eq_seen )
+ len_str[j] = buffer[i];
+ else
+ name[j] = buffer[i];
+ j++;
+ break;
+ }
+ }
+ free(buffer);
+ dict_compact_values(*dict);
}
break;
int32_t filler;
assertive_buf_read (r, &filler, sizeof filler, 0);
+
goto success;
}
default:
corrupt_msg(MW, _("%s: Unrecognized record type %d."),
- fh_get_file_name (r->fh), rec_type);
+ fh_get_file_name (r->fh), rec_type);
}
}
-success:
+ success:
/* Come here on successful completion. */
+
+ /* Create an index of dictionary variable widths for
+ sfm_read_case to use. We cannot use the `struct variables'
+ from the dictionary we created, because the caller owns the
+ dictionary and may destroy or modify its variables. */
+ {
+ size_t i;
+
+ r->var_cnt = dict_get_var_cnt (*dict);
+ r->vars = xnmalloc (r->var_cnt, sizeof *r->vars);
+ for (i = 0; i < r->var_cnt; i++)
+ {
+ struct variable *v = dict_get_var (*dict, i);
+ struct sfm_var *sv = &r->vars[i];
+ sv->width = v->width;
+ sv->fv = v->fv;
+ }
+ }
+
free (var_by_idx);
+ hsh_destroy(short_to_long);
+ free (subrec14data);
return r;
-error:
+ error:
/* Come here on unsuccessful completion. */
sfm_close_reader (r);
free (var_by_idx);
+ hsh_destroy(short_to_long);
+ free (subrec14data);
if (*dict != NULL)
{
dict_destroy (*dict);
fh_get_file_name (r->fh),
file_bigendian ? _("big-endian") : _("little-endian"),
data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
- : _("unknown"))));
+ : _("unknown"))));
/* PORTME: Character representation code. */
if (data[7] != 2 && data[7] != 3)
return 1;
-error:
+ error:
return 0;
}
return 1;
-error:
+ error:
return 0;
}
fh_get_file_name (r->fh), hdr.layout_code));
r->reverse_endian = 1;
- bswap_int32 (&hdr.case_size);
+ bswap_int32 (&hdr.nominal_case_size);
bswap_int32 (&hdr.compress);
bswap_int32 (&hdr.weight_idx);
bswap_int32 (&hdr.case_cnt);
/* Copy basic info and verify correctness. */
- r->value_cnt = hdr.case_size;
+ r->value_cnt = hdr.nominal_case_size;
- /* If value count is rediculous, then force it to -1 (a sentinel value) */
+ /* If value count is ridiculous, then force it to -1 (a
+ sentinel value). */
if ( r->value_cnt < 0 ||
r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
r->value_cnt = -1;
return 1;
-error:
+ error:
return 0;
}
*var_by_idx = 0;
- /* Pre-allocate variables. */
- if (r->value_cnt != -1)
- {
- *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx);
- r->vars = xnmalloc (r->value_cnt, sizeof *r->vars);
- }
-
/* Read in the entry for each variable and use the info to
initialize the dictionary. */
int nv;
int j;
- if ( r->value_cnt != -1 && i >= r->value_cnt )
- break;
-
assertive_buf_read (r, &sv, sizeof sv, 0);
if (r->reverse_endian)
break;
}
- if ( -1 == r->value_cnt )
- {
- *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
- r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars);
- }
+ *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
/* If there was a long string previously, make sure that the
continuations are present; otherwise make sure there aren't
fh_get_file_name (r->fh), i));
- r->vars[i].width = -1;
(*var_by_idx)[i] = NULL;
long_string_count--;
continue;
fh_get_file_name (r->fh), i, sv.type));
if (sv.has_var_label != 0 && sv.has_var_label != 1)
lose ((ME, _("%s: position %d: Variable label indicator field is not "
- "0 or 1."), fh_get_file_name (r->fh), i));
+ "0 or 1."), fh_get_file_name (r->fh), i));
if (sv.n_missing_values < -3 || sv.n_missing_values > 3
|| sv.n_missing_values == -1)
lose ((ME, _("%s: position %d: Missing value indicator field is not "
lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
fh_get_file_name (r->fh), name));
+ /* Set the short name the same as the long name */
var_set_short_name (vv, vv->name);
/* Case reading data. */
if (!parse_format_spec (r, sv.print, &vv->print, vv)
|| !parse_format_spec (r, sv.write, &vv->write, vv))
goto error;
-
- r->vars[i].width = vv->width;
- r->vars[i].fv = vv->fv;
-
}
/* Some consistency checks. */
if (next_value != r->value_cnt)
corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
- "%d were read from file."),
- fh_get_file_name (r->fh), r->value_cnt, next_value);
+ "%d were read from file."),
+ fh_get_file_name (r->fh), r->value_cnt, next_value);
return 1;
-error:
+ error:
return 0;
}
}
return 1;
-error:
+ error:
return 0;
}
struct dictionary *dict, struct variable **var_by_idx)
{
struct label
- {
- char raw_value[8]; /* Value as uninterpreted bytes. */
- union value value; /* Value. */
- char *label; /* Null-terminated label string. */
- };
+ {
+ char raw_value[8]; /* Value as uninterpreted bytes. */
+ union value value; /* Value. */
+ char *label; /* Null-terminated label string. */
+ };
struct label *labels = NULL;
int32_t n_labels; /* Number of labels. */
free (var);
return 1;
-error:
+ error:
if (labels)
{
for (i = 0; i < n_labels; i++)
r->ok = false;
return NULL;
}
+
return buf;
}
free (documents);
return 1;
-error:
+ error:
return 0;
}
\f
p = r->x;
}
- abort ();
+ NOT_REACHED ();
-success:
+ success:
/* We have filled up an entire record. Update state and return
successfully. */
r->y = ++p;
return 1;
-error:
+ error:
/* I/O error. */
r->ok = false;
return 0;
{
if (!r->ok)
return 0;
-
- if (!r->compressed && sizeof (flt64) == sizeof (double))
+
+ if (!r->compressed && sizeof (flt64) == sizeof (double) && ! r->has_vls)
{
/* Fast path: external and internal representations are the
same, except possibly for endianness or SYSMIS. Read
{
int i;
- for (i = 0; i < r->value_cnt; i++)
+ for (i = 0; i < r->var_cnt; i++)
if (r->vars[i].width == 0)
bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
}
{
int i;
- for (i = 0; i < r->value_cnt; i++)
+ for (i = 0; i < r->var_cnt; i++)
if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
}
bounce_size = sizeof *bounce * r->value_cnt;
bounce = bounce_cur = local_alloc (bounce_size);
+ memset(bounce, 0, bounce_size);
+
if (!r->compressed)
read_ok = fread_ok (r, bounce, bounce_size);
else
return 0;
}
- for (i = 0; i < r->value_cnt; i++)
+ for (i = 0; i < r->var_cnt; i++)
{
- struct sfm_var *v = &r->vars[i];
+ struct sfm_var *sv = &r->vars[i];
- if (v->width == 0)
+ if (sv->width == 0)
{
flt64 f = *bounce_cur++;
if (r->reverse_endian)
bswap_flt64 (&f);
- case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
+ case_data_rw (c, sv->fv)->f = f == r->sysmis ? SYSMIS : f;
}
- else if (v->width != -1)
+ else
{
- memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
- bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
+ flt64 *bc_start = bounce_cur;
+ int ofs = 0;
+ while (ofs < sv->width )
+ {
+ const int chunk = MIN (MAX_LONG_STRING, sv->width - ofs);
+ memcpy (case_data_rw (c, sv->fv)->s + ofs, bounce_cur, chunk);
+
+ bounce_cur += DIV_RND_UP (chunk, sizeof (flt64));
+
+ ofs += chunk;
+ }
+ bounce_cur = bc_start + width_to_bytes(sv->width) / sizeof(flt64);
}
}