#include <errno.h>
#include <float.h>
#include <c-ctype.h>
+#include <minmax.h>
#include <libpspp/alloc.h>
#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
#include <libpspp/hash.h>
+#include <libpspp/array.h>
#include "sys-file-reader.h"
#include "sfm-private.h"
FILE *file; /* File stream. */
int reverse_endian; /* 1=file has endianness opposite us. */
- int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
+ int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
int value_cnt; /* Number of `union values's per case. */
long case_cnt; /* Number of cases, -1 if unknown. */
int compressed; /* 1=compressed, 0=not compressed. */
- double bias; /* Compression bias, usually 100.0. */
+ double bias; /* Compression bias, usually 100.0. */
int weight_idx; /* 0-based index of weighting variable, or -1. */
bool ok; /* False after an I/O error or corrupt data. */
+ bool has_vls; /* True if the file has one or more Very Long Strings*/
/* Variables. */
- struct sfm_var *vars; /* Variables. */
+ struct hsh_table *var_hash;
+ struct variable **svars;
/* File's special constants. */
flt64 sysmis;
/* A variable in a system file. */
struct sfm_var
{
+ char name[SHORT_NAME_LEN + 1]; /* name */
int width; /* 0=numeric, otherwise string width. */
int fv; /* Index into case. */
};
va_list args;
struct string text;
- ds_create (&text, _("corrupt system file: "));
+ ds_init_cstr (&text, _("corrupt system file: "));
va_start (args, format);
- ds_vprintf (&text, format, args);
+ ds_put_vformat (&text, format, args);
va_end (args);
m.category = msg_class_to_category (class);
m.severity = msg_class_to_severity (class);
m.where.file_name = NULL;
m.where.line_number = 0;
- m.text = ds_c_str (&text);
+ m.text = ds_cstr (&text);
msg_emit (&m);
}
if (r->fh != NULL)
fh_close (r->fh, "system file", "rs");
- free (r->vars);
+ hsh_destroy(r->var_hash);
free (r->buf);
free (r);
}
static int
pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED)
{
+ int i;
+
const struct name_pair *p1 = _p1;
const struct name_pair *p2 = _p2;
-
- return strcmp(p1->shortname, p2->shortname);
+
+ char buf1[SHORT_NAME_LEN + 1];
+ char buf2[SHORT_NAME_LEN + 1];
+
+ memset(buf1, 0, SHORT_NAME_LEN + 1);
+ memset(buf2, 0, SHORT_NAME_LEN + 1);
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf1[i] = p1->shortname[i];
+ if ( '\0' == buf1[i])
+ break;
+ }
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf2[i] = p2->shortname[i];
+ if ( '\0' == buf2[i])
+ break;
+ }
+
+ return strncmp(buf1, buf2, SHORT_NAME_LEN);
}
-static unsigned
+static unsigned int
pair_sn_hash(const void *_p, void *aux UNUSED)
{
+ int i;
const struct name_pair *p = _p;
- return hsh_hash_bytes(p->shortname, strlen(p->shortname));
+ char buf[SHORT_NAME_LEN + 1];
+
+ memset(buf, 0, SHORT_NAME_LEN + 1);
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf[i] = p->shortname[i];
+ if ( '\0' == buf[i])
+ break;
+ }
+
+ return hsh_hash_bytes(buf, strlen(buf));
}
static void
}
+
+/* A hsh_compare_func that orders variables A and B by their
+ names. */
+static int
+compare_var_shortnames (const void *a_, const void *b_, void *foo UNUSED)
+{
+ int i;
+ const struct variable *a = a_;
+ const struct variable *b = b_;
+
+ char buf1[SHORT_NAME_LEN + 1];
+ char buf2[SHORT_NAME_LEN + 1];
+
+ memset(buf1, 0, SHORT_NAME_LEN + 1);
+ memset(buf2, 0, SHORT_NAME_LEN + 1);
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf1[i] = a->short_name[i];
+ if ( '\0' == buf1[i])
+ break;
+ }
+
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf2[i] = b->short_name[i];
+ if ( '\0' == buf2[i])
+ break;
+ }
+
+ return strncmp(buf1, buf2, SHORT_NAME_LEN);
+}
+
+/* A hsh_hash_func that hashes variable V based on its name. */
+static unsigned
+hash_var_shortname (const void *v_, void *foo UNUSED)
+{
+ int i;
+ const struct variable *v = v_;
+ char buf[SHORT_NAME_LEN + 1];
+
+ memset(buf, 0, SHORT_NAME_LEN + 1);
+ for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
+ {
+ buf[i] = v->short_name[i];
+ if ( '\0' == buf[i])
+ break;
+ }
+
+ return hsh_hash_bytes(buf, strlen(buf));
+}
+
+
+
/* Opens the system file designated by file handle FH for
reading. Reads the system file's dictionary into *DICT.
If INFO is non-null, then it receives additional info about the
struct sfm_reader *r = NULL;
struct variable **var_by_idx = NULL;
+ /* The data in record 7(14) */
+ char *subrec14data = 0;
+
/* A hash table of long variable names indexed by short name */
struct hsh_table *short_to_long = NULL;
+
*dict = dict_create ();
if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
goto error;
r->bias = 100.0;
r->weight_idx = -1;
r->ok = true;
+ r->has_vls = false;
+ r->svars = 0;
- r->vars = NULL;
+ r->var_hash = hsh_create(4, compare_var_shortnames, hash_var_shortname, 0, 0);
r->sysmis = -FLT64_MAX;
r->highest = FLT64_MAX;
assertive_buf_read (r, ¶ms, sizeof(params), 0);
+ if ( ! measure_is_valid(params.measure)
+ ||
+ ! alignment_is_valid(params.align))
+ {
+ msg(MW,
+ _("%s: Invalid variable display parameters. Default parameters substituted."),
+ fh_get_file_name(r->fh));
+ continue;
+ }
+
v = dict_get_var(*dict, i);
v->measure = params.measure;
case 13: /* SPSS 12.0 Long variable name map */
{
- char *buf, *short_name, *save_ptr;
+ char *short_name, *save_ptr;
int idx;
+ r->has_vls = true;
+
/* Read data. */
- buf = xmalloc (bytes + 1);
- if (!buf_read (r, buf, bytes, 0))
+ subrec14data = xmalloc (bytes + 1);
+ if (!buf_read (r, subrec14data, bytes, 0))
{
- free (buf);
goto error;
}
- buf[bytes] = '\0';
+ subrec14data[bytes] = '\0';
short_to_long = hsh_create(4,
pair_sn_compare,
0);
/* Parse data. */
- for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
+ for (short_name = strtok_r (subrec14data, "=", &save_ptr), idx = 0;
short_name != NULL;
short_name = strtok_r (NULL, "=", &save_ptr), idx++)
{
#endif
}
-
- /* Free data. */
- free (buf);
}
break;
int i;
/* Read data. */
- char *buf = xmalloc (bytes + 1);
- if (!buf_read (r, buf, bytes, 0))
+ char *buffer = xmalloc (bytes + 1);
+ if (!buf_read (r, buffer, bytes, 0))
{
- free (buf);
+ free (buffer);
goto error;
}
- buf[bytes] = '\0';
+ buffer[bytes] = '\0';
/* Note: SPSS v13 terminates this record with 00,
for(i = 0; i < bytes ; ++i)
{
long int length;
- static char name[SHORT_NAME_LEN + 1];
- static char len_str[6];
+ static char name[SHORT_NAME_LEN + 1] = {0};
+ static char len_str[6] ={0};
- switch( buf[i] )
+ switch( buffer[i] )
{
case '=':
eq_seen = true;
lookup_name = p->longname;
}
-
v = dict_lookup_var(*dict, lookup_name);
if ( !v )
{
goto error;
}
-
+
l = length;
if ( v->width > EFFECTIVE_LONG_STRING_LENGTH )
l -= EFFECTIVE_LONG_STRING_LENGTH;
else
l -= v_next->width;
+ hsh_delete(r->var_hash, v_next);
+
dict_delete_var(*dict, v_next);
}
-
+
+ assert ( length > MAX_LONG_STRING );
+
v->width = length;
v->print.w = v->width;
v->write.w = v->width;
+ v->nv = DIV_RND_UP (length, MAX_SHORT_STRING);
}
eq_seen = false;
memset(name, 0, SHORT_NAME_LEN+1);
break;
default:
if ( eq_seen )
- len_str[j] = buf[i];
+ len_str[j] = buffer[i];
else
- name[j] = buf[i];
+ name[j] = buffer[i];
j++;
break;
}
}
- free(buf);
+ free(buffer);
+ dict_compact_values(*dict);
}
break;
success:
/* Come here on successful completion. */
-
free (var_by_idx);
hsh_destroy(short_to_long);
+ free (subrec14data);
return r;
error:
sfm_close_reader (r);
free (var_by_idx);
hsh_destroy(short_to_long);
+ free (subrec14data);
if (*dict != NULL)
{
dict_destroy (*dict);
/* Copy basic info and verify correctness. */
r->value_cnt = hdr.nominal_case_size;
- /* If value count is rediculous, then force it to -1 (a sentinel value) */
+ /* If value count is ridiculous, then force it to -1 (a
+ sentinel value). */
if ( r->value_cnt < 0 ||
r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
r->value_cnt = -1;
*var_by_idx = 0;
- /* Pre-allocate variables. */
- if (r->value_cnt != -1)
- {
- *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx);
- r->vars = xnmalloc (r->value_cnt, sizeof *r->vars);
- }
-
/* Read in the entry for each variable and use the info to
initialize the dictionary. */
}
*var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
- r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars);
/* If there was a long string previously, make sure that the
continuations are present; otherwise make sure there aren't
fh_get_file_name (r->fh), i));
- r->vars[i].width = -1;
(*var_by_idx)[i] = NULL;
long_string_count--;
continue;
|| !parse_format_spec (r, sv.write, &vv->write, vv))
goto error;
- r->vars[i].width = vv->width;
- r->vars[i].fv = vv->fv;
-
+ if ( vv->width != -1)
+ hsh_insert(r->var_hash, vv);
}
/* Some consistency checks. */
return 0;
}
+
+static int
+compare_var_index(const void *_v1, const void *_v2, void *aux UNUSED)
+{
+ const struct variable *const *v1 = _v1;
+ const struct variable *const *v2 = _v2;
+
+ if ( (*v1)->index < (*v2)->index)
+ return -1;
+
+ return ( (*v1)->index > (*v2)->index) ;
+}
+
+
/* Reads one case from READER's file into C. Returns nonzero
only if successful. */
int
{
if (!r->ok)
return 0;
-
- if (!r->compressed && sizeof (flt64) == sizeof (double))
+
+ if ( ! r->svars )
+ {
+ r->svars = (struct variable **) hsh_data(r->var_hash);
+ sort(r->svars, hsh_count(r->var_hash),
+ sizeof(*r->svars), compare_var_index, 0);
+ }
+
+ if (!r->compressed && sizeof (flt64) == sizeof (double) && ! r->has_vls)
{
/* Fast path: external and internal representations are the
same, except possibly for endianness or SYSMIS. Read
{
int i;
- for (i = 0; i < r->value_cnt; i++)
- if (r->vars[i].width == 0)
- bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
+ for (i = 0; i < hsh_count(r->var_hash); i++)
+ {
+ struct variable *v = r->svars[i];
+ if (v->width == 0)
+ bswap_flt64 (&case_data_rw (c, v->fv)->f);
+ }
}
/* Fix up SYSMIS values if needed.
if (r->sysmis != SYSMIS)
{
int i;
-
- for (i = 0; i < r->value_cnt; i++)
- if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
- case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
+ for (i = 0; i < hsh_count(r->var_hash); i++)
+ {
+ struct variable *v = r->svars[i];
+ if (v->width == 0 && case_num (c, i) == r->sysmis)
+ case_data_rw (c, v->fv)->f = SYSMIS;
+ }
}
}
else
bounce_size = sizeof *bounce * r->value_cnt;
bounce = bounce_cur = local_alloc (bounce_size);
+ memset(bounce, 0, bounce_size);
+
if (!r->compressed)
read_ok = fread_ok (r, bounce, bounce_size);
else
return 0;
}
- for (i = 0; i < r->value_cnt; i++)
+ for (i = 0; i < hsh_count(r->var_hash); i++)
{
- struct sfm_var *v = &r->vars[i];
+ struct variable *tv = r->svars[i];
- if (v->width == 0)
+ if (tv->width == 0)
{
flt64 f = *bounce_cur++;
if (r->reverse_endian)
bswap_flt64 (&f);
- case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
+ case_data_rw (c, tv->fv)->f = f == r->sysmis ? SYSMIS : f;
}
- else if (v->width != -1)
+ else if (tv->width != -1)
{
- memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
- bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
+ flt64 *bc_start = bounce_cur;
+ int ofs = 0;
+ while (ofs < tv->width )
+ {
+ const int chunk = MIN (MAX_LONG_STRING, tv->width - ofs);
+ memcpy (case_data_rw (c, tv->fv)->s + ofs, bounce_cur, chunk);
+
+ bounce_cur += DIV_RND_UP (chunk, sizeof (flt64));
+
+ ofs += chunk;
+ }
+ bounce_cur = bc_start + width_to_bytes(tv->width) / sizeof(flt64);
}
}