You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
#include <config.h>
#include "sfm-read.h"
/* Variables. */
struct sfm_var *vars; /* Variables. */
- size_t var_cnt; /* Number of variables. */
/* File's special constants. */
flt64 sysmis;
\f
/* Dictionary reader. */
+static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
+
static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
size_t min_alloc);
r->weight_idx = -1;
r->vars = NULL;
- r->var_cnt = 0;
r->sysmis = -FLT64_MAX;
r->highest = FLT64_MAX;
if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
goto error;
+
/* Handle weighting. */
if (r->weight_idx != -1)
{
- struct variable *weight_var = var_by_idx[r->weight_idx];
+ struct variable *weight_var;
+
+ if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
+ lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
+ "and number of elements per case (%d)."),
+ handle_get_filename (r->fh), r->weight_idx, r->value_cnt));
+
+
+ weight_var = var_by_idx[r->weight_idx];
if (weight_var == NULL)
lose ((ME,
int32 count P;
}
data;
+ unsigned long bytes;
int skip = 0;
bswap_int32 (&data.size);
bswap_int32 (&data.count);
}
+ bytes = data.size * data.count;
+ if (bytes < data.size || bytes < data.count)
+ lose ((ME, "%s: Record type %d subtype %d too large.",
+ handle_get_filename (r->fh), rec_type, data.subtype));
switch (data.subtype)
{
break;
case 5:
- case 6:
- case 11: /* ?? Used by SPSS 8.0. */
+ case 6: /* ?? Used by SPSS 8.0. */
skip = 1;
break;
+
+ case 11: /* Variable display parameters */
+ {
+ const int n_vars = data.count / 3 ;
+ int i;
+ if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) )
+ {
+ msg (MW, _("%s: Invalid subrecord length. "
+ "Record: 7; Subrecord: 11"),
+ handle_get_filename (r->fh));
+ skip = 1;
+ }
+
+ for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
+ {
+ struct
+ {
+ int32 measure P;
+ int32 width P;
+ int32 align P;
+ }
+ params;
+
+ struct variable *v;
+
+ assertive_buf_read (r, ¶ms, sizeof(params), 0);
+
+ v = dict_get_var(*dict, i);
+
+ v->measure = params.measure;
+ v->display_width = params.width;
+ v->alignment = params.align;
+ }
+ }
+ break;
+
+ case 13: /* SPSS 12.0 Long variable name map */
+ {
+ char *buf, *short_name, *save_ptr;
+ int idx;
+
+ /* Read data. */
+ buf = xmalloc (bytes + 1);
+ if (!buf_read (r, buf, bytes, 0))
+ {
+ free (buf);
+ goto error;
+ }
+ buf[bytes] = '\0';
+
+ /* Parse data. */
+ for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
+ short_name != NULL;
+ short_name = strtok_r (NULL, "=", &save_ptr), idx++)
+ {
+ char *long_name = strtok_r (NULL, "\t", &save_ptr);
+ struct variable *v;
+
+ /* Validate long name. */
+ if (long_name == NULL)
+ {
+ msg (MW, _("%s: Trailing garbage in long variable "
+ "name map."),
+ handle_get_filename (r->fh));
+ break;
+ }
+ if (!var_is_valid_name (long_name, false))
+ {
+ msg (MW, _("%s: Long variable mapping to invalid "
+ "variable name `%s'."),
+ handle_get_filename (r->fh), long_name);
+ break;
+ }
+
+ /* Find variable using short name. */
+ v = dict_lookup_var (*dict, short_name);
+ if (v == NULL)
+ {
+ msg (MW, _("%s: Long variable mapping for "
+ "nonexistent variable %s."),
+ handle_get_filename (r->fh), short_name);
+ break;
+ }
+
+ /* Identify any duplicates. */
+ if ( compare_var_names(short_name, long_name, 0) &&
+ NULL != dict_lookup_var (*dict, long_name))
+ {
+ lose ((ME, _("%s: Duplicate long variable name `%s' "
+ "within system file."),
+ handle_get_filename (r->fh), long_name));
+ break;
+ }
+
+ /* Set long name.
+ Renaming a variable may clear the short
+ name, but we want to retain it, so
+ re-set it explicitly. */
+ dict_rename_var (*dict, v, long_name);
+ var_set_short_name (v, short_name);
+
+ /* For compatability, make sure dictionary
+ is in long variable name map order. In
+ the common case, this has no effect,
+ because the dictionary and the long
+ variable name map are already in the
+ same order. */
+ dict_reorder_var (*dict, v, idx);
+ }
+
+ /* Free data. */
+ free (buf);
+ }
+ break;
default:
msg (MW, _("%s: Unrecognized record type 7, subtype %d "
}
default:
- lose ((ME, _("%s: Unrecognized record type %d."),
- handle_get_filename (r->fh), rec_type));
+ corrupt_msg(MW, _("%s: Unrecognized record type %d."),
+ handle_get_filename (r->fh), rec_type);
}
}
bswap_flt64 (&hdr.bias);
}
+
/* Copy basic info and verify correctness. */
r->value_cnt = hdr.case_size;
- if (r->value_cnt <= 0
- || r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
- lose ((ME, _("%s: Number of elements per case (%d) is not between 1 "
- "and %d."),
- handle_get_filename (r->fh), r->value_cnt,
- INT_MAX / sizeof (union value) / 2));
+
+ /* If value count is rediculous, then force it to -1 (a sentinel value) */
+ if ( r->value_cnt < 0 ||
+ r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
+ r->value_cnt = -1;
r->compressed = hdr.compress;
r->weight_idx = hdr.weight_idx - 1;
- if (hdr.weight_idx < 0 || hdr.weight_idx > r->value_cnt)
- lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
- "and number of elements per case (%d)."),
- handle_get_filename (r->fh), hdr.weight_idx, r->value_cnt));
r->case_cnt = hdr.case_cnt;
if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
int long_string_count = 0; /* # of long string continuation
records still expected. */
int next_value = 0; /* Index to next `value' structure. */
- size_t var_cap = 0;
- /* Allocate variables. */
- *var_by_idx = xmalloc (sizeof **var_by_idx * r->value_cnt);
+ assert(r);
+
+ *var_by_idx = 0;
+
+ /* Pre-allocate variables. */
+ if ( r->value_cnt != -1 )
+ {
+ *var_by_idx = xmalloc(r->value_cnt * sizeof (**var_by_idx));
+ r->vars = xmalloc( r->value_cnt * sizeof (*r->vars) );
+ }
+
/* Read in the entry for each variable and use the info to
initialize the dictionary. */
- for (i = 0; i < r->value_cnt; i++)
+ for (i = 0; ; ++i)
{
struct variable *vv;
- char name[9];
+ char name[SHORT_NAME_LEN + 1];
int nv;
int j;
+ if ( r->value_cnt != -1 && i >= r->value_cnt )
+ break;
+
assertive_buf_read (r, &sv, sizeof sv, 0);
if (r->reverse_endian)
bswap_int32 (&sv.write);
}
+ /* We've come to the end of the variable entries */
if (sv.rec_type != 2)
- lose ((ME, _("%s: position %d: Bad record type (%d); "
- "the expected value was 2."),
- handle_get_filename (r->fh), i, sv.rec_type));
+ {
+ buf_unread(r, sizeof sv);
+ r->value_cnt = i;
+ break;
+ }
+
+ if ( -1 == r->value_cnt )
+ {
+ *var_by_idx = xrealloc (*var_by_idx, sizeof **var_by_idx * (i + 1));
+ r->vars = xrealloc(r->vars, (i + 1) * sizeof (*r->vars) );
+ }
/* If there was a long string previously, make sure that the
continuations are present; otherwise make sure there aren't
"proper number of continuation records."),
handle_get_filename (r->fh), i));
+
+ r->vars[i].width = -1;
(*var_by_idx)[i] = NULL;
long_string_count--;
continue;
name[0] = toupper ((unsigned char) (sv.name[0]));
/* Copy remaining characters of variable name. */
- for (j = 1; j < 8; j++)
+ for (j = 1; j < SHORT_NAME_LEN; j++)
{
int c = (unsigned char) sv.name[j];
}
name[j] = 0;
+ if ( ! var_is_valid_name(name, false) )
+ lose ((ME, _("%s: Invalid variable name `%s' within system file."),
+ handle_get_filename (r->fh), name));
+
/* Create variable. */
+
vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
if (vv == NULL)
lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
handle_get_filename (r->fh), name));
+ var_set_short_name (vv, vv->name);
+
/* Case reading data. */
nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
long_string_count = nv - 1;
"length %d."),
handle_get_filename (r->fh), vv->name, len));
- /* Read label into variable structure. */
- vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
- if (vv->label == NULL)
- goto error;
- vv->label[len] = '\0';
+ if ( len != 0 )
+ {
+ /* Read label into variable structure. */
+ vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
+ if (vv->label == NULL)
+ goto error;
+ vv->label[len] = '\0';
+ }
}
/* Set missing values. */
|| !parse_format_spec (r, sv.write, &vv->write, vv))
goto error;
- /* Add variable to list. */
- if (var_cap >= r->var_cnt)
- {
- var_cap = 2 + r->var_cnt * 2;
- r->vars = xrealloc (r->vars, var_cap * sizeof *r->vars);
- }
- r->vars[r->var_cnt].width = vv->width;
- r->vars[r->var_cnt].fv = vv->fv;
- r->var_cnt++;
+ r->vars[i].width = vv->width;
+ r->vars[i].fv = vv->fv;
+
}
/* Some consistency checks. */
lose ((ME, _("%s: Long string continuation records omitted at end of "
"dictionary."),
handle_get_filename (r->fh)));
+
if (next_value != r->value_cnt)
- lose ((ME, _("%s: System file header indicates %d variable positions but "
+ corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
"%d were read from file."),
- handle_get_filename (r->fh), r->value_cnt, next_value));
+ handle_get_filename (r->fh), r->value_cnt, next_value);
+
return 1;
/* Translates the format spec from sysfile format to internal
format. */
static int
-parse_format_spec (struct sfm_reader *r, int32 s, struct fmt_spec *v, struct variable *vv)
+parse_format_spec (struct sfm_reader *r, int32 s,
+ struct fmt_spec *f, struct variable *v)
{
- v->type = translate_fmt ((s >> 16) & 0xff);
- if (v->type == -1)
+ f->type = translate_fmt ((s >> 16) & 0xff);
+ if (f->type == -1)
lose ((ME, _("%s: Bad format specifier byte (%d)."),
handle_get_filename (r->fh), (s >> 16) & 0xff));
- v->w = (s >> 8) & 0xff;
- v->d = s & 0xff;
-
- /* FIXME? Should verify the resulting specifier more thoroughly. */
+ f->w = (s >> 8) & 0xff;
+ f->d = s & 0xff;
- if (v->type == -1)
- lose ((ME, _("%s: Bad format specifier byte (%d)."),
- handle_get_filename (r->fh), (s >> 16) & 0xff));
- if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0))
+ if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
handle_get_filename (r->fh),
- vv->type == ALPHA ? _("String") : _("Numeric"),
- vv->name,
- formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"),
- formats[v->type].name));
+ v->type == ALPHA ? _("String") : _("Numeric"),
+ v->name,
+ formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
+ formats[f->type].name));
+
+ if (!check_output_specifier (f, false)
+ || !check_specifier_width (f, v->width, false))
+ {
+ msg (ME, _("%s variable %s has invalid format specifier %s."),
+ v->type == NUMERIC ? _("Numeric") : _("String"),
+ v->name, fmt_to_string (f));
+ *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
+ }
return 1;
error:
if (r->reverse_endian)
bswap_int32 (&n_labels);
+ if ( n_labels >= ((int32) ~0) / sizeof *labels)
+ {
+ corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
+ handle_get_filename (r->fh), n_labels);
+ n_labels = 0;
+ }
+
/* Allocate memory. */
- labels = xmalloc (n_labels * sizeof *labels);
+ labels = xcalloc (n_labels , sizeof *labels);
for (i = 0; i < n_labels; i++)
labels[i].label = NULL;
static void *
buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
{
- if (buf == NULL)
+ assert (r);
+
+ if (buf == NULL && byte_cnt > 0 )
buf = xmalloc (max (byte_cnt, min_alloc));
+
+ if ( byte_cnt == 0 )
+ return buf;
+
+
if (1 != fread (buf, byte_cnt, 1, r->file))
{
if (ferror (r->file))
return buf;
}
+/* Winds the reader BYTE_CNT bytes back in the reader stream. */
+void
+buf_unread(struct sfm_reader *r, size_t byte_cnt)
+{
+ assert(byte_cnt > 0);
+
+ if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
+ {
+ msg (ME, _("%s: Seeking system file: %s."),
+ handle_get_filename (r->fh), strerror (errno));
+ }
+}
+
/* Reads a document record, type 6, from system file R, and sets up
the documents and n_documents fields in the associated
dictionary. */
for (;;)
{
- for (; p < p_end; p++)
+ for (; p < p_end; p++){
switch (*p)
{
case 0:
goto success;
break;
}
-
+ }
/* We have reached the end of this instruction octet. Read
another. */
if (r->ptr == NULL || r->ptr >= r->end)
{
int i;
- for (i = 0; i < r->var_cnt; i++)
+ for (i = 0; i < r->value_cnt; i++)
if (r->vars[i].width == 0)
bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
}
{
int i;
- for (i = 0; i < r->var_cnt; i++)
+ for (i = 0; i < r->value_cnt; i++)
if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
}
return 0;
}
- for (i = 0; i < r->var_cnt; i++)
+ for (i = 0; i < r->value_cnt; i++)
{
struct sfm_var *v = &r->vars[i];
flt64 f = *bounce_cur++;
if (r->reverse_endian)
bswap_flt64 (&f);
- case_data_rw (c, i)->f = f == r->sysmis ? SYSMIS : f;
+ case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
}
- else
+ else if (v->width != -1)
{
memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));