#include <errno.h>
#include <float.h>
#include <c-ctype.h>
+#include <minmax.h>
#include <libpspp/alloc.h>
+#include <libpspp/assertion.h>
#include <libpspp/message.h>
#include <libpspp/compiler.h>
#include <libpspp/magic.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
#include <libpspp/hash.h>
+#include <libpspp/array.h>
#include "sys-file-reader.h"
#include "sfm-private.h"
FILE *file; /* File stream. */
int reverse_endian; /* 1=file has endianness opposite us. */
- int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
int value_cnt; /* Number of `union values's per case. */
long case_cnt; /* Number of cases, -1 if unknown. */
int compressed; /* 1=compressed, 0=not compressed. */
- double bias; /* Compression bias, usually 100.0. */
+ double bias; /* Compression bias, usually 100.0. */
int weight_idx; /* 0-based index of weighting variable, or -1. */
bool ok; /* False after an I/O error or corrupt data. */
+ bool has_vls; /* True if the file has one or more Very Long Strings*/
/* Variables. */
- struct sfm_var *vars; /* Variables. */
+ struct sfm_var *vars;
+ size_t var_cnt;
/* File's special constants. */
flt64 sysmis;
va_list args;
struct string text;
- ds_create (&text, _("corrupt system file: "));
+ ds_init_cstr (&text, _("corrupt system file: "));
va_start (args, format);
- ds_vprintf (&text, format, args);
+ ds_put_vformat (&text, format, args);
va_end (args);
m.category = msg_class_to_category (class);
m.severity = msg_class_to_severity (class);
m.where.file_name = NULL;
m.where.line_number = 0;
- m.text = ds_c_str (&text);
+ m.text = ds_cstr (&text);
msg_emit (&m);
}
};
static int
-pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED)
+pair_sn_compare(const void *_p1, const void *_p2, const void *aux UNUSED)
{
+ int i;
+
const struct name_pair *p1 = _p1;
const struct name_pair *p2 = _p2;
-
- return strcmp(p1->shortname, p2->shortname);
+
+ char buf1[SHORT_NAME_LEN + 1];
+ char buf2[SHORT_NAME_LEN + 1];
+
+ memset(buf1, 0, SHORT_NAME_LEN + 1);
+ memset(buf2, 0, SHORT_NAME_LEN + 1);
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf1[i] = p1->shortname[i];
+ if ( '\0' == buf1[i])
+ break;
+ }
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf2[i] = p2->shortname[i];
+ if ( '\0' == buf2[i])
+ break;
+ }
+
+ return strncmp(buf1, buf2, SHORT_NAME_LEN);
}
-static unsigned
-pair_sn_hash(const void *_p, void *aux UNUSED)
+static unsigned int
+pair_sn_hash(const void *_p, const void *aux UNUSED)
{
+ int i;
const struct name_pair *p = _p;
- return hsh_hash_bytes(p->shortname, strlen(p->shortname));
+ char buf[SHORT_NAME_LEN + 1];
+
+ memset(buf, 0, SHORT_NAME_LEN + 1);
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf[i] = p->shortname[i];
+ if ( '\0' == buf[i])
+ break;
+ }
+
+ return hsh_hash_bytes(buf, strlen(buf));
}
static void
-pair_sn_free(void *p, void *aux UNUSED)
+pair_sn_free(void *p, const void *aux UNUSED)
{
free(p);
}
+
/* Opens the system file designated by file handle FH for
reading. Reads the system file's dictionary into *DICT.
If INFO is non-null, then it receives additional info about the
struct sfm_reader *r = NULL;
struct variable **var_by_idx = NULL;
+ /* The data in record 7(14) */
+ char *subrec14data = 0;
+
/* A hash table of long variable names indexed by short name */
struct hsh_table *short_to_long = NULL;
r->file = fn_open (fh_get_file_name (fh), "rb");
r->reverse_endian = 0;
- r->fix_specials = 0;
r->value_cnt = 0;
r->case_cnt = 0;
r->compressed = 0;
r->bias = 100.0;
r->weight_idx = -1;
r->ok = true;
+ r->has_vls = false;
r->vars = NULL;
lose ((ME,
_("%s: Weighting variable may not be a continuation of "
"a long string variable."), fh_get_file_name (fh)));
- else if (weight_var->type == ALPHA)
+ else if (var_is_alpha (weight_var))
lose ((ME, _("%s: Weighting variable may not be a string variable."),
fh_get_file_name (fh)));
{
struct
{
- int32_t subtype P;
- int32_t size P;
- int32_t count P;
- }
+ int32_t subtype ;
+ int32_t size ;
+ int32_t count ;
+ } ATTRIBUTE((packed))
data;
unsigned long bytes;
break;
}
- for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
+ for ( i = 0 ; i < MIN(n_vars, dict_get_var_cnt(*dict)) ; ++i )
{
struct
{
- int32_t measure P;
- int32_t width P;
- int32_t align P;
- }
+ int32_t measure ;
+ int32_t width ;
+ int32_t align ;
+ } ATTRIBUTE((packed))
params;
struct variable *v;
assertive_buf_read (r, ¶ms, sizeof(params), 0);
+ if ( ! measure_is_valid(params.measure)
+ ||
+ ! alignment_is_valid(params.align))
+ {
+ msg(MW,
+ _("%s: Invalid variable display parameters. Default parameters substituted."),
+ fh_get_file_name(r->fh));
+ continue;
+ }
+
v = dict_get_var(*dict, i);
- v->measure = params.measure;
- v->display_width = params.width;
- v->alignment = params.align;
+ var_set_measure (v, params.measure);
+ var_set_display_width (v, params.width);
+ var_set_alignment (v, params.align);
}
}
break;
case 13: /* SPSS 12.0 Long variable name map */
{
- char *buf, *short_name, *save_ptr;
+ char *short_name;
+ char *save_ptr = NULL;
int idx;
/* Read data. */
- buf = xmalloc (bytes + 1);
- if (!buf_read (r, buf, bytes, 0))
+ subrec14data = xmalloc (bytes + 1);
+ if (!buf_read (r, subrec14data, bytes, 0))
{
- free (buf);
goto error;
}
- buf[bytes] = '\0';
+ subrec14data[bytes] = '\0';
short_to_long = hsh_create(4,
pair_sn_compare,
0);
/* Parse data. */
- for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
+ for (short_name = strtok_r (subrec14data, "=", &save_ptr), idx = 0;
short_name != NULL;
short_name = strtok_r (NULL, "=", &save_ptr), idx++)
{
records have been processed. --- JMD 27 April 2006
*/
- /* For compatability, make sure dictionary
+ /* For compatibility, make sure dictionary
is in long variable name map order. In
the common case, this has no effect,
because the dictionary and the long
#endif
}
-
- /* Free data. */
- free (buf);
}
break;
int i;
/* Read data. */
- char *buf = xmalloc (bytes + 1);
- if (!buf_read (r, buf, bytes, 0))
+ char *buffer = xmalloc (bytes + 1);
+ if (!buf_read (r, buffer, bytes, 0))
{
- free (buf);
+ free (buffer);
goto error;
}
- buf[bytes] = '\0';
+ buffer[bytes] = '\0';
+ r->has_vls = true;
/* Note: SPSS v13 terminates this record with 00,
whereas SPSS v14 terminates it with 00 09. We must
for(i = 0; i < bytes ; ++i)
{
long int length;
- static char name[SHORT_NAME_LEN + 1];
- static char len_str[6];
+ static char name[SHORT_NAME_LEN + 1] = {0};
+ static char len_str[6] ={0};
- switch( buf[i] )
+ switch( buffer[i] )
{
case '=':
eq_seen = true;
lookup_name = p->longname;
}
-
v = dict_lookup_var(*dict, lookup_name);
if ( !v )
{
goto error;
}
-
+
l = length;
- if ( v->width > EFFECTIVE_LONG_STRING_LENGTH )
+ if ( var_get_width (v) > EFFECTIVE_LONG_STRING_LENGTH )
l -= EFFECTIVE_LONG_STRING_LENGTH;
else
- l -= v->width;
+ l -= var_get_width (v);
idx = v->index;
while ( l > 0 )
struct variable *v_next;
v_next = dict_get_var(*dict, idx + 1);
- if ( v_next->width > EFFECTIVE_LONG_STRING_LENGTH )
+ if ( var_get_width (v_next) > EFFECTIVE_LONG_STRING_LENGTH )
l -= EFFECTIVE_LONG_STRING_LENGTH;
else
- l -= v_next->width;
+ l -= var_get_width (v_next);
dict_delete_var(*dict, v_next);
}
-
- v->width = length;
- v->print.w = v->width;
- v->write.w = v->width;
+
+ assert ( length > MAX_LONG_STRING );
+
+ var_set_width (v, length);
}
eq_seen = false;
memset(name, 0, SHORT_NAME_LEN+1);
break;
default:
if ( eq_seen )
- len_str[j] = buf[i];
+ len_str[j] = buffer[i];
else
- name[j] = buf[i];
+ name[j] = buffer[i];
j++;
break;
}
}
- free(buf);
+ free(buffer);
+ dict_compact_values(*dict);
}
break;
success:
/* Come here on successful completion. */
+ /* Create an index of dictionary variable widths for
+ sfm_read_case to use. We cannot use the `struct variables'
+ from the dictionary we created, because the caller owns the
+ dictionary and may destroy or modify its variables. */
+ {
+ size_t i;
+
+ r->var_cnt = dict_get_var_cnt (*dict);
+ r->vars = xnmalloc (r->var_cnt, sizeof *r->vars);
+ for (i = 0; i < r->var_cnt; i++)
+ {
+ struct variable *v = dict_get_var (*dict, i);
+ struct sfm_var *sv = &r->vars[i];
+ sv->width = var_get_width (v);
+ sv->fv = v->fv;
+ }
+ }
free (var_by_idx);
hsh_destroy(short_to_long);
+ free (subrec14data);
return r;
error:
sfm_close_reader (r);
free (var_by_idx);
hsh_destroy(short_to_long);
+ free (subrec14data);
if (*dict != NULL)
{
dict_destroy (*dict);
/* Copy basic info and verify correctness. */
r->value_cnt = hdr.nominal_case_size;
- /* If value count is rediculous, then force it to -1 (a sentinel value) */
+ /* If value count is ridiculous, then force it to -1 (a
+ sentinel value). */
if ( r->value_cnt < 0 ||
r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
r->value_cnt = -1;
*var_by_idx = 0;
- /* Pre-allocate variables. */
- if (r->value_cnt != -1)
- {
- *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx);
- r->vars = xnmalloc (r->value_cnt, sizeof *r->vars);
- }
-
/* Read in the entry for each variable and use the info to
initialize the dictionary. */
char name[SHORT_NAME_LEN + 1];
int nv;
int j;
+ struct fmt_spec print, write;
+
assertive_buf_read (r, &sv, sizeof sv, 0);
}
*var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
- r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars);
/* If there was a long string previously, make sure that the
continuations are present; otherwise make sure there aren't
fh_get_file_name (r->fh), i));
- r->vars[i].width = -1;
(*var_by_idx)[i] = NULL;
long_string_count--;
continue;
fh_get_file_name (r->fh), name));
/* Set the short name the same as the long name */
- var_set_short_name (vv, vv->name);
+ var_set_short_name (vv, var_get_name (vv));
/* Case reading data. */
nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
if (len < 0 || len > 255)
lose ((ME, _("%s: Variable %s indicates variable label of invalid "
"length %d."),
- fh_get_file_name (r->fh), vv->name, len));
+ fh_get_file_name (r->fh), var_get_name (vv), len));
if ( len != 0 )
{
/* Read label into variable structure. */
- vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1);
- if (vv->label == NULL)
- goto error;
- vv->label[len] = '\0';
+ char label[256];
+ assertive_buf_read (r, label, ROUND_UP (len, sizeof (int32_t)),
+ 0);
+ label[len] = '\0';
+ var_set_label (vv, label);
}
}
{
flt64 mv[3];
int mv_cnt = abs (sv.n_missing_values);
+ struct missing_values miss;
- if (vv->width > MAX_SHORT_STRING)
+ if (var_get_width (vv) > MAX_SHORT_STRING)
lose ((ME, _("%s: Long string variable %s may not have missing "
"values."),
- fh_get_file_name (r->fh), vv->name));
+ fh_get_file_name (r->fh), var_get_name (vv)));
+ mv_init (&miss, var_get_width (vv));
assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
- if (r->reverse_endian && vv->type == NUMERIC)
+ if (r->reverse_endian && var_is_numeric (vv))
for (j = 0; j < mv_cnt; j++)
bswap_flt64 (&mv[j]);
if (sv.n_missing_values > 0)
{
for (j = 0; j < sv.n_missing_values; j++)
- if (vv->type == NUMERIC)
- mv_add_num (&vv->miss, mv[j]);
+ if (var_is_numeric (vv))
+ mv_add_num (&miss, mv[j]);
else
- mv_add_str (&vv->miss, (char *) &mv[j]);
+ mv_add_str (&miss, (char *) &mv[j]);
}
else
{
- if (vv->type == ALPHA)
+ if (var_is_alpha (vv))
lose ((ME, _("%s: String variable %s may not have missing "
"values specified as a range."),
- fh_get_file_name (r->fh), vv->name));
+ fh_get_file_name (r->fh), var_get_name (vv)));
if (mv[0] == r->lowest)
- mv_add_num_range (&vv->miss, LOWEST, mv[1]);
+ mv_add_num_range (&miss, LOWEST, mv[1]);
else if (mv[1] == r->highest)
- mv_add_num_range (&vv->miss, mv[0], HIGHEST);
+ mv_add_num_range (&miss, mv[0], HIGHEST);
else
- mv_add_num_range (&vv->miss, mv[0], mv[1]);
+ mv_add_num_range (&miss, mv[0], mv[1]);
if (sv.n_missing_values == -3)
- mv_add_num (&vv->miss, mv[2]);
+ mv_add_num (&miss, mv[2]);
}
+ var_set_missing_values (vv, &miss);
}
- if (!parse_format_spec (r, sv.print, &vv->print, vv)
- || !parse_format_spec (r, sv.write, &vv->write, vv))
+ if (!parse_format_spec (r, sv.print, &print, vv)
+ || !parse_format_spec (r, sv.write, &write, vv))
goto error;
- r->vars[i].width = vv->width;
- r->vars[i].fv = vv->fv;
-
+ var_set_print_format (vv, &print);
+ var_set_write_format (vv, &write);
}
/* Some consistency checks. */
parse_format_spec (struct sfm_reader *r, int32_t s,
struct fmt_spec *f, const struct variable *v)
{
- f->type = translate_fmt ((s >> 16) & 0xff);
- if (f->type == -1)
+ bool ok;
+
+ if (!fmt_from_io ((s >> 16) & 0xff, &f->type))
lose ((ME, _("%s: Bad format specifier byte (%d)."),
fh_get_file_name (r->fh), (s >> 16) & 0xff));
f->w = (s >> 8) & 0xff;
f->d = s & 0xff;
- if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
+ if (var_is_alpha (v) != fmt_is_string (f->type))
lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
fh_get_file_name (r->fh),
- v->type == ALPHA ? _("String") : _("Numeric"),
- v->name,
- formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
- formats[f->type].name));
-
- if (!check_output_specifier (f, false)
- || !check_specifier_width (f, v->width, false))
+ var_is_alpha (v) ? _("String") : _("Numeric"),
+ var_get_name (v),
+ fmt_is_string (f->type) ? _("string") : _("numeric"),
+ fmt_name (f->type)));
+
+ msg_disable ();
+ ok = fmt_check_output (f) && fmt_check_width_compat (f, var_get_width (v));
+ msg_enable ();
+
+ if (!ok)
{
+ char fmt_string[FMT_STRING_LEN_MAX + 1];
msg (ME, _("%s variable %s has invalid format specifier %s."),
- v->type == NUMERIC ? _("Numeric") : _("String"),
- v->name, fmt_to_string (f));
- *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
+ var_is_numeric (v) ? _("Numeric") : _("String"),
+ var_get_name (v), fmt_to_string (f, fmt_string));
+ *f = (var_is_numeric (v)
+ ? fmt_for_output (FMT_F, 8, 2)
+ : fmt_for_output (FMT_A, var_get_width (v), 0));
}
return 1;
"refers to a continuation of a string variable, not to "
"an actual variable."),
fh_get_file_name (r->fh), var_idx));
- if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
+ if (var_is_long_string (v))
lose ((ME, _("%s: Value labels are not allowed on long string "
"variables (%s)."),
- fh_get_file_name (r->fh), v->name));
+ fh_get_file_name (r->fh), var_get_name (v)));
/* Add it to the list of variables. */
var[i] = v;
/* Type check the variables. */
for (i = 1; i < n_vars; i++)
- if (var[i]->type != var[0]->type)
+ if (var_get_type (var[i]) != var_get_type (var[0]))
lose ((ME, _("%s: Variables associated with value label are not all of "
"identical type. Variable %s has %s type, but variable "
"%s has %s type."),
fh_get_file_name (r->fh),
- var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
- var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
+ var_get_name (var[0]),
+ var_is_alpha (var[0]) ? _("string") : _("numeric"),
+ var_get_name (var[i]),
+ var_is_alpha (var[i]) ? _("string") : _("numeric")));
/* Fill in labels[].value, now that we know the desired type. */
for (i = 0; i < n_labels; i++)
{
struct label *label = labels + i;
- if (var[0]->type == ALPHA)
+ if (var_is_alpha (var[0]))
{
- const int copy_len = min (sizeof label->raw_value,
+ const int copy_len = MIN (sizeof label->raw_value,
sizeof label->label);
memcpy (label->value.s, label->raw_value, copy_len);
} else {
if (!val_labs_replace (v->val_labs, label->value, label->label))
continue;
- if (var[0]->type == NUMERIC)
+ if (var_is_numeric (var[0]))
msg (MW, _("%s: File contains duplicate label for value %g for "
"variable %s."),
- fh_get_file_name (r->fh), label->value.f, v->name);
+ fh_get_file_name (r->fh), label->value.f, var_get_name (v));
else
msg (MW, _("%s: File contains duplicate label for value `%.*s' "
"for variable %s."),
- fh_get_file_name (r->fh), v->width, label->value.s, v->name);
+ fh_get_file_name (r->fh), var_get_width (v),
+ label->value.s, var_get_name (v));
}
}
assert (r);
if (buf == NULL && byte_cnt > 0 )
- buf = xmalloc (max (byte_cnt, min_alloc));
+ buf = xmalloc (MAX (byte_cnt, min_alloc));
if ( byte_cnt == 0 )
return buf;
p = r->x;
}
- abort ();
+ NOT_REACHED ();
success:
/* We have filled up an entire record. Update state and return
{
if (!r->ok)
return 0;
-
- if (!r->compressed && sizeof (flt64) == sizeof (double))
+
+ if (!r->compressed && sizeof (flt64) == sizeof (double) && ! r->has_vls)
{
/* Fast path: external and internal representations are the
same, except possibly for endianness or SYSMIS. Read
{
int i;
- for (i = 0; i < r->value_cnt; i++)
+ for (i = 0; i < r->var_cnt; i++)
if (r->vars[i].width == 0)
bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
}
{
int i;
- for (i = 0; i < r->value_cnt; i++)
+ for (i = 0; i < r->var_cnt; i++)
if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
}
bounce_size = sizeof *bounce * r->value_cnt;
bounce = bounce_cur = local_alloc (bounce_size);
+ memset(bounce, 0, bounce_size);
+
if (!r->compressed)
read_ok = fread_ok (r, bounce, bounce_size);
else
return 0;
}
- for (i = 0; i < r->value_cnt; i++)
+ for (i = 0; i < r->var_cnt; i++)
{
- struct sfm_var *v = &r->vars[i];
+ struct sfm_var *sv = &r->vars[i];
- if (v->width == 0)
+ if (sv->width == 0)
{
flt64 f = *bounce_cur++;
if (r->reverse_endian)
bswap_flt64 (&f);
- case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
+ case_data_rw (c, sv->fv)->f = f == r->sysmis ? SYSMIS : f;
}
- else if (v->width != -1)
+ else
{
- memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
- bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
+ flt64 *bc_start = bounce_cur;
+ int ofs = 0;
+ while (ofs < sv->width )
+ {
+ const int chunk = MIN (MAX_LONG_STRING, sv->width - ofs);
+ memcpy (case_data_rw (c, sv->fv)->s + ofs, bounce_cur, chunk);
+
+ bounce_cur += DIV_RND_UP (chunk, sizeof (flt64));
+
+ ofs += chunk;
+ }
+ bounce_cur = bc_start + width_to_bytes(sv->width) / sizeof(flt64);
}
}