Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA. */
-/* AIX requires this to be the first thing in the file. */
#include <config.h>
-#if __GNUC__
-#define alloca __builtin_alloca
-#else
-#if HAVE_ALLOCA_H
-#include <alloca.h>
-#else
-#ifdef _AIX
-#pragma alloca
-#else
-#ifndef alloca /* predefined by HP cc +Olibcalls */
-char *alloca ();
-#endif
-#endif
-#endif
-#endif
-
+#include "sfm.h"
+#include "sfmP.h"
#include <assert.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
#include <float.h>
#include "alloc.h"
-#include "avl.h"
#include "error.h"
#include "file-handle.h"
#include "filename.h"
#include "format.h"
#include "getline.h"
+#include "hash.h"
#include "magic.h"
#include "misc.h"
-#include "sfm.h"
-#include "sfmP.h"
+#include "value-labels.h"
#include "str.h"
#include "var.h"
/* Utilities. */
/* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
-#if __linux__
-#include <asm/byteorder.h>
-#include <netinet/in.h>
static inline void
-bswap_int32 (int32 * x)
+bswap_int32 (int32 *x)
{
- *x = ntohl (*x);
-}
-#else /* not Linux */
-static inline void
-bswap_int32 (int32 * x)
-{
- unsigned char *y = (char *) x;
+ unsigned char *y = (unsigned char *) x;
unsigned char t;
+
t = y[0];
y[0] = y[3];
y[3] = t;
+
t = y[1];
y[1] = y[2];
y[2] = t;
}
-#endif /* not Linux */
/* Reverse the byte order of 64-bit floating point *X. */
static inline void
-bswap_flt64 (flt64 * x)
+bswap_flt64 (flt64 *x)
{
- /* Note that under compilers of any quality, half of this function
- should optimize out as dead code. */
- unsigned char *y = (char *) x;
+ unsigned char *y = (unsigned char *) x;
+ unsigned char t;
- if (sizeof (flt64) == 8)
- {
- unsigned char t;
- t = y[0];
- y[0] = y[7];
- y[7] = t;
- t = y[1];
- y[1] = y[6];
- y[6] = t;
- t = y[2];
- y[2] = y[5];
- y[5] = t;
- t = y[3];
- y[3] = y[4];
- y[4] = t;
- }
- else
- {
- unsigned char t;
- size_t x;
+ t = y[0];
+ y[0] = y[7];
+ y[7] = t;
- for (x = 0; x < sizeof (flt64) / 2; x++)
- {
- t = y[x];
- y[x] = y[sizeof (flt64) - x];
- y[sizeof (flt64) - x] = t;
- }
- }
+ t = y[1];
+ y[1] = y[6];
+ y[6] = t;
+
+ t = y[2];
+ y[2] = y[5];
+ y[5] = t;
+
+ t = y[3];
+ y[3] = y[4];
+ y[4] = t;
}
static void
lose ((ME, _("%s: Weighting variable may not be a string variable."),
h->fn));
- strcpy (ext->dict->weight_var, wv->name);
+ dict_set_weight (ext->dict, wv);
}
else
- ext->dict->weight_var[0] = 0;
+ dict_set_weight (ext->dict, NULL);
/* Read records of types 3, 4, 6, and 7. */
for (;;)
free (var_by_index);
fn_close (h->fn, ext->file);
if (ext && ext->dict)
- free_dictionary (ext->dict);
+ dict_destroy (ext->dict);
free (ext);
h->class = NULL;
h->ext = NULL;
int i;
/* Create the dictionary. */
- dict = ext->dict = xmalloc (sizeof *dict);
- dict->var = NULL;
- dict->var_by_name = NULL;
- dict->nvar = 0;
- dict->N = 0;
- dict->nval = -1; /* Unknown. */
- dict->n_splits = 0;
- dict->splits = NULL;
- dict->weight_var[0] = 0;
- dict->weight_index = -1;
- dict->filter_var[0] = 0;
- dict->label = NULL;
- dict->n_documents = 0;
- dict->documents = NULL;
+ dict = ext->dict = dict_create ();
/* Read header, check magic. */
assertive_bufread (h, &hdr, sizeof hdr, 0);
{
int i;
- dict->label = NULL;
for (i = sizeof hdr.file_label - 1; i >= 0; i--)
if (!isspace ((unsigned char) hdr.file_label[i])
&& hdr.file_label[i] != 0)
{
- dict->label = xmalloc (i + 2);
- memcpy (dict->label, hdr.file_label, i + 1);
- dict->label[i + 1] = 0;
+ char *label = xmalloc (i + 2);
+ memcpy (label, hdr.file_label, i + 1);
+ label[i + 1] = 0;
+ dict_set_label (dict, label);
+ free (label);
break;
}
}
}
/* Reads most of the dictionary from file H; also fills in the
- associated VAR_BY_INDEX array.
-
- Note: the dictionary returned by this function has an invalid NVAL
- element, also the VAR[] array does not have the FV and LV elements
- set, however the NV elements *are* set. This is because the caller
- will probably modify the dictionary before reading it in from the
- file. Also, the get.* elements are set to appropriate values to
- allow the file to be read. */
+ associated VAR_BY_INDEX array. The get.* elements in the
+ created dictionary are set to appropriate values to allow the
+ file to be read. */
static int
read_variables (struct file_handle * h, struct variable *** var_by_index)
{
int next_value = 0; /* Index to next `value' structure. */
/* Allocate variables. */
- dict->var = xmalloc (sizeof *dict->var * ext->case_size);
*var_by_index = xmalloc (sizeof **var_by_index * ext->case_size);
/* Read in the entry for each variable and use the info to
for (i = 0; i < ext->case_size; i++)
{
struct variable *vv;
+ char name[9];
int j;
assertive_bufread (h, &sv, sizeof sv, 0);
lose ((ME, _("%s: position %d: Missing value indicator field is not "
"-3, -2, 0, 1, 2, or 3."), h->fn, i));
- /* Construct internal variable structure, initialize critical bits. */
- vv = (*var_by_index)[i] = dict->var[dict->nvar++] = xmalloc (sizeof *vv);
- vv->index = dict->nvar - 1;
- vv->foo = -1;
- vv->label = NULL;
- vv->val_lab = NULL;
-
/* Copy first character of variable name. */
if (!isalpha ((unsigned char) sv.name[0])
&& sv.name[0] != '@' && sv.name[0] != '#')
msg (MW, _("%s: position %d: Variable name begins with octothorpe "
"(`#'). Scratch variables should not appear in system "
"files."), h->fn, i);
- vv->name[0] = toupper ((unsigned char) (sv.name[0]));
+ name[0] = toupper ((unsigned char) (sv.name[0]));
/* Copy remaining characters of variable name. */
for (j = 1; j < 8; j++)
{
msg (MW, _("%s: position %d: Variable name character %d is "
"lowercase letter %c."), h->fn, i, j + 1, sv.name[j]);
- vv->name[j] = toupper ((unsigned char) (c));
+ name[j] = toupper ((unsigned char) (c));
}
else if (isalnum (c) || c == '.' || c == '@'
|| c == '#' || c == '$' || c == '_')
- vv->name[j] = c;
+ name[j] = c;
else
lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
"variable name."), h->fn, i, c, c));
}
- vv->name[j] = 0;
-
- /* Set type, width, and `left' fields and allocate `value'
- indices. */
- if (sv.type == 0)
- {
- vv->type = NUMERIC;
- vv->width = 0;
- vv->get.nv = 1;
- vv->get.fv = next_value++;
- vv->nv = 1;
- }
+ name[j] = 0;
+
+ /* Create variable. */
+ vv = (*var_by_index)[i] = dict_create_var (dict, name, sv.type);
+ if (vv == NULL)
+ lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
+ h->fn, name));
+
+ /* Case reading data. */
+ vv->get.fv = next_value;
+ if (sv.type == 0)
+ vv->get.nv = 1;
else
- {
- vv->type = ALPHA;
- vv->width = sv.type;
- vv->nv = DIV_RND_UP (vv->width, MAX_SHORT_STRING);
- vv->get.nv = DIV_RND_UP (vv->width, sizeof (flt64));
- vv->get.fv = next_value;
- next_value += vv->get.nv;
- long_string_count = vv->get.nv - 1;
- }
- vv->left = (vv->name[0] == '#');
+ vv->get.nv = DIV_RND_UP (sv.type, sizeof (flt64));
+ long_string_count = vv->get.nv - 1;
+ next_value += vv->get.nv;
/* Get variable label, if any. */
if (sv.has_var_label == 1)
if (next_value != ext->case_size)
lose ((ME, _("%s: System file header indicates %d variable positions but "
"%d were read from file."), h->fn, ext->case_size, next_value));
- dict->var = xrealloc (dict->var, sizeof *dict->var * dict->nvar);
-
- /* Construct AVL tree of dictionary in order to speed up later
- processing and to check for duplicate varnames. */
- dict->var_by_name = avl_create (NULL, cmp_variable, NULL);
- for (i = 0; i < dict->nvar; i++)
- if (NULL != avl_insert (dict->var_by_name, dict->var[i]))
- lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
- h->fn, dict->var[i]->name));
return 1;
lossage:
- for (i = 0; i < dict->nvar; i++)
- {
- free (dict->var[i]->label);
- free (dict->var[i]);
- }
- free (dict->var);
- if (dict->var_by_name)
- avl_destroy (dict->var_by_name, NULL);
- free (dict);
+ dict_destroy (dict);
ext->dict = NULL;
return 0;
{
struct sfm_fhuser_ext *ext = h->ext; /* File extension record. */
- flt64 *raw_label = NULL; /* Array of raw label values. */
- struct value_label **cooked_label = NULL; /* Array of cooked labels. */
+ struct label
+ {
+ unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
+ union value value; /* Value. */
+ char *label; /* Null-terminated label string. */
+ };
+
+ struct label *labels = NULL;
int32 n_labels; /* Number of labels. */
struct variable **var = NULL; /* Associated variables. */
bswap_int32 (&n_labels);
/* Allocate memory. */
- raw_label = xmalloc (sizeof *raw_label * n_labels);
- cooked_label = xmalloc (sizeof *cooked_label * n_labels);
+ labels = xmalloc (n_labels * sizeof *labels);
for (i = 0; i < n_labels; i++)
- cooked_label[i] = NULL;
+ labels[i].label = NULL;
- /* Read each value/label tuple. */
+ /* Read each value/label tuple into labels[]. */
for (i = 0; i < n_labels; i++)
{
- flt64 value;
+ struct label *label = labels + i;
unsigned char label_len;
+ size_t padded_len;
- int rem;
-
- /* Read value, label length. */
- assertive_bufread (h, &value, sizeof value, 0);
- assertive_bufread (h, &label_len, 1, 0);
- memcpy (&raw_label[i], &value, sizeof value);
+ /* Read value. */
+ assertive_bufread (h, label->raw_value, sizeof label->raw_value, 0);
- /* Read label. */
- cooked_label[i] = xmalloc (sizeof **cooked_label);
- cooked_label[i]->s = xmalloc (label_len + 1);
- assertive_bufread (h, cooked_label[i]->s, label_len, 0);
- cooked_label[i]->s[label_len] = 0;
+ /* Read label length. */
+ assertive_bufread (h, &label_len, sizeof label_len, 0);
+ padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
- /* Skip padding. */
- rem = REM_RND_UP (label_len + 1, sizeof (flt64));
- if (rem)
- assertive_bufread (h, &value, rem, 0);
+ /* Read label, padding. */
+ label->label = xmalloc (padded_len + 1);
+ assertive_bufread (h, label->label, padded_len - 1, 0);
+ label->label[label_len] = 0;
}
/* Second step: Read the type 4 record that has the list of
if (rec_type != 4)
lose ((ME, _("%s: Variable index record (type 4) does not immediately "
- "follow value label record (type 3) as it ought."), h->fn));
+ "follow value label record (type 3) as it should."), h->fn));
}
/* Read number of variables associated with value label from type 4
assertive_bufread (h, &n_vars, sizeof n_vars, 0);
if (ext->reverse_endian)
bswap_int32 (&n_vars);
- if (n_vars < 1 || n_vars > ext->dict->nvar)
+ if (n_vars < 1 || n_vars > dict_get_var_cnt (ext->dict))
lose ((ME, _("%s: Number of variables associated with a value label (%d) "
"is not between 1 and the number of variables (%d)."),
- h->fn, n_vars, ext->dict->nvar));
-
- /* Allocate storage. */
- var = xmalloc (sizeof *var * n_vars);
+ h->fn, n_vars, dict_get_var_cnt (ext->dict)));
/* Read the list of variables. */
+ var = xmalloc (n_vars * sizeof *var);
for (i = 0; i < n_vars; i++)
{
int32 var_index;
/* Make sure it's a real variable. */
v = var_by_index[var_index - 1];
if (v == NULL)
- lose ((ME, _("%s: Variable index associated with value label (%d) refers "
- "to a continuation of a string variable, not to an actual "
- "variable."), h->fn, var_index));
+ lose ((ME, _("%s: Variable index associated with value label (%d) "
+ "refers to a continuation of a string variable, not to "
+ "an actual variable."), h->fn, var_index));
if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
- lose ((ME, _("%s: Value labels are not allowed on long string variables "
- "(%s)."), h->fn, v->name));
+ lose ((ME, _("%s: Value labels are not allowed on long string "
+ "variables (%s)."), h->fn, v->name));
/* Add it to the list of variables. */
var[i] = v;
var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
- /* Create a value_label for each value/label tuple, now that we know
- the desired type. */
- for (i = 0; i < n_labels; i++)
+ /* Fill in labels[].value, now that we know the desired type. */
+ for (i = 0; i < n_labels; i++)
{
+ struct label *label = labels + i;
+
if (var[0]->type == ALPHA)
- {
- const int copy_len = min (sizeof (flt64), MAX_SHORT_STRING);
- memcpy (cooked_label[i]->v.s, (char *) &raw_label[i], copy_len);
- if (MAX_SHORT_STRING > copy_len)
- memset (&cooked_label[i]->v.s[copy_len], ' ',
- MAX_SHORT_STRING - copy_len);
- } else {
- cooked_label[i]->v.f = raw_label[i];
- if (ext->reverse_endian)
- bswap_flt64 (&cooked_label[i]->v.f);
- }
- cooked_label[i]->ref_count = n_vars;
+ {
+ const int copy_len = min (sizeof (label->raw_value),
+ sizeof (label->label));
+ memcpy (label->value.s, label->raw_value, copy_len);
+ } else {
+ flt64 f;
+ assert (sizeof f == sizeof label->raw_value);
+ memcpy (&f, label->raw_value, sizeof f);
+ if (ext->reverse_endian)
+ bswap_flt64 (&f);
+ label->value.f = f;
+ }
}
-
+
/* Assign the value_label's to each variable. */
for (i = 0; i < n_vars; i++)
{
struct variable *v = var[i];
int j;
- /* Create AVL tree if necessary. */
- if (!v->val_lab)
- v->val_lab = avl_create (NULL, val_lab_cmp, (void *) (v->width));
-
/* Add each label to the variable. */
for (j = 0; j < n_labels; j++)
{
- struct value_label *old = avl_replace (v->val_lab, cooked_label[j]);
- if (old == NULL)
+ struct label *label = labels + j;
+ if (!val_labs_replace (v->val_labs, label->value, label->label))
continue;
if (var[0]->type == NUMERIC)
msg (MW, _("%s: File contains duplicate label for value %g for "
- "variable %s."), h->fn, cooked_label[j]->v.f, v->name);
+ "variable %s."), h->fn, label->value.f, v->name);
else
msg (MW, _("%s: File contains duplicate label for value `%.*s' "
- "for variable %s."), h->fn, v->width,
- cooked_label[j]->v.s, v->name);
-
- free_value_label (old);
+ "for variable %s."),
+ h->fn, v->width, label->value.s, v->name);
}
}
- free (cooked_label);
- free (raw_label);
+ for (i = 0; i < n_labels; i++)
+ free (labels[i].label);
+ free (labels);
free (var);
return 1;
lossage:
- if (cooked_label)
- for (i = 0; i < n_labels; i++)
- if (cooked_label[i])
- {
- free (cooked_label[i]->s);
- free (cooked_label[i]);
- }
- free (raw_label);
+ if (labels)
+ {
+ for (i = 0; i < n_labels; i++)
+ free (labels[i].label);
+ free (labels);
+ }
free (var);
return 0;
}
struct sfm_fhuser_ext *ext = h->ext;
struct dictionary *dict = ext->dict;
int32 n_lines;
+ char *documents;
- if (dict->documents != NULL)
+ if (dict_get_documents (dict) != NULL)
lose ((ME, _("%s: System file contains multiple type 6 (document) records."),
h->fn));
assertive_bufread (h, &n_lines, sizeof n_lines, 0);
- dict->n_documents = n_lines;
- if (dict->n_documents <= 0)
+ if (n_lines <= 0)
lose ((ME, _("%s: Number of document lines (%ld) must be greater than 0."),
- h->fn, (long) dict->n_documents));
+ h->fn, (long) n_lines));
- dict->documents = bufread (h, NULL, 80 * n_lines, 0);
- if (dict->documents == NULL)
+ documents = bufread (h, NULL, 80 * n_lines, n_lines * 80 + 1);
+ /* FIXME? Run through asciify. */
+ if (documents == NULL)
return 0;
+ documents[80 * n_lines] = '\0';
+ dict_set_documents (dict, documents);
+ free (documents);
return 1;
lossage:
int n, j;
debug_printf ((" var %s", v->name));
- /*debug_printf (("(indices:%d,%d)", v->index, v->foo));*/
debug_printf (("(type:%s,%d)", (v->type == NUMERIC ? _("num")
: (v->type == ALPHA ? _("str") : "!!!")),
v->width));
debug_printf (("(fv:%d,%d)", v->fv, v->nv));
- /*debug_printf (("(get.fv:%d,%d)", v->get.fv, v->get.nv));*/
debug_printf (("(left:%s)(miss:", v->left ? _("left") : _("right")));
switch (v->miss_type)
int i;
- /* Make sure the caller remembered to finish polishing the
- dictionary returned by sfm_read_dictionary(). */
- assert (dict->nval > 0);
-
/* The first concern is to obtain a full case relative to the data
file. (Cases in the data file have no particular relationship to
cases in the active file.) */
/* Translate a case in data file format to a case in active file
format. */
- for (i = 0; i < dict->nvar; i++)
+ for (i = 0; i < dict_get_var_cnt (dict); i++)
{
- struct variable *v = dict->var[i];
+ struct variable *v = dict_get_var (dict, i);
if (v->get.fv == -1)
continue;