/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-2000, 2006-2007, 2009-2014 Free Software Foundation, Inc.
+ Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include "data/sys-file-reader.h"
#include "data/sys-file-private.h"
#include <errno.h>
#include <sys/stat.h>
#include <zlib.h>
+#include "data/any-reader.h"
#include "data/attributes.h"
#include "data/case.h"
#include "data/casereader-provider.h"
#include "libpspp/assertion.h"
#include "libpspp/compiler.h"
#include "libpspp/i18n.h"
+#include "libpspp/ll.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
#include "libpspp/pool.h"
int weight_idx; /* 0 if unweighted, otherwise a var index. */
int nominal_case_size; /* Number of var positions. */
- /* These correspond to the members of struct sfm_file_info or a dictionary
+ /* These correspond to the members of struct any_file_info or a dictionary
but in the system file's encoding rather than ASCII. */
char creation_date[10]; /* "dd mmm yy". */
char creation_time[9]; /* "hh:mm:ss". */
{
off_t pos;
int width;
- char name[8];
+ char name[9];
int print_format;
int write_format;
int missing_value_code;
struct sfm_extension_record
{
+ struct ll ll; /* In struct sfm_reader 'var_attrs' list. */
int subtype; /* Record subtype. */
off_t pos; /* Starting offset in file. */
- size_t size; /* Size of data elements. */
- size_t count; /* Number of data elements. */
+ unsigned int size; /* Size of data elements. */
+ unsigned int count; /* Number of data elements. */
void *data; /* Contents. */
};
/* System file reader. */
struct sfm_reader
{
+ struct any_reader any_reader;
+
/* Resource tracking. */
struct pool *pool; /* All system file state. */
/* File data. */
- struct sfm_read_info info;
+ struct any_read_info info;
struct sfm_header_record header;
struct sfm_var_record *vars;
size_t n_vars;
struct sfm_mrset *mrsets;
size_t n_mrsets;
struct sfm_extension_record *extensions[32];
+ struct ll_list var_attrs; /* Contains "struct sfm_extension_record"s. */
/* File state. */
struct file_handle *fh; /* File handle. */
const char *encoding; /* String encoding. */
/* Decompression. */
- enum sfm_compression compression;
+ enum any_compression compression;
double bias; /* Compression bias, usually 100.0. */
uint8_t opcodes[8]; /* Current block of opcodes. */
size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
static const struct casereader_class sys_file_casereader_class;
+static struct sfm_reader *
+sfm_reader_cast (const struct any_reader *r_)
+{
+ assert (r_->klass == &sys_file_reader_class);
+ return UP_CAST (r_, struct sfm_reader, any_reader);
+}
+
+static bool sfm_close (struct any_reader *);
+
static struct variable *lookup_var_by_index (struct sfm_reader *, off_t,
const struct sfm_var_record *,
size_t n, int idx);
struct sfm_var_record *);
static bool read_value_label_record (struct sfm_reader *,
struct sfm_value_label_record *);
-static struct sfm_document_record *read_document_record (struct sfm_reader *);
+static bool read_document_record (struct sfm_reader *);
static bool read_extension_record (struct sfm_reader *, int subtype,
struct sfm_extension_record **);
static bool skip_extension_record (struct sfm_reader *, int subtype);
struct text_record *,
struct variable **var, char **value);
static void text_warn (struct sfm_reader *r, struct text_record *text,
- const char *format, ...)
- PRINTF_FORMAT (3, 4);
+ const char *format, ...) PRINTF_FORMAT (3, 4);
static char *text_get_token (struct text_record *,
struct substring delimiters, char *delimiter);
static bool text_match (struct text_record *, char c);
static bool read_dictionary (struct sfm_reader *);
static bool read_record (struct sfm_reader *, int type,
size_t *allocated_vars, size_t *allocated_labels);
-static bool read_header (struct sfm_reader *, struct sfm_read_info *,
+static bool read_header (struct sfm_reader *, struct any_read_info *,
struct sfm_header_record *);
static void parse_header (struct sfm_reader *,
const struct sfm_header_record *,
- struct sfm_read_info *, struct dictionary *);
+ struct any_read_info *, struct dictionary *);
static bool parse_variable_records (struct sfm_reader *, struct dictionary *,
struct sfm_var_record *, size_t n);
static void parse_format_spec (struct sfm_reader *, off_t pos,
struct dictionary *);
static bool parse_machine_integer_info (struct sfm_reader *,
const struct sfm_extension_record *,
- struct sfm_read_info *);
+ struct any_read_info *);
static void parse_machine_float_info (struct sfm_reader *,
const struct sfm_extension_record *);
static void parse_extra_product_info (struct sfm_reader *,
const struct sfm_extension_record *,
- struct sfm_read_info *);
+ struct any_read_info *);
static void parse_mrsets (struct sfm_reader *,
const struct sfm_extension_record *,
size_t *allocated_mrsets);
const struct sfm_extension_record *,
struct dictionary *);
static void assign_variable_roles (struct sfm_reader *, struct dictionary *);
-static bool parse_long_string_value_labels (struct sfm_reader *,
+static void parse_long_string_value_labels (struct sfm_reader *,
const struct sfm_extension_record *,
struct dictionary *);
-static bool parse_long_string_missing_values (
+static void parse_long_string_missing_values (
struct sfm_reader *, const struct sfm_extension_record *,
struct dictionary *);
/* Frees the strings inside INFO. */
void
-sfm_read_info_destroy (struct sfm_read_info *info)
+any_read_info_destroy (struct any_read_info *info)
{
if (info)
{
/* Tries to open FH for reading as a system file. Returns an sfm_reader if
successful, otherwise NULL. */
-struct sfm_reader *
+static struct any_reader *
sfm_open (struct file_handle *fh)
{
size_t allocated_mrsets = 0;
/* Create and initialize reader. */
r = xzalloc (sizeof *r);
+ r->any_reader.klass = &sys_file_reader_class;
r->pool = pool_create ();
pool_register (r->pool, free, r);
r->fh = fh_ref (fh);
r->opcode_idx = sizeof r->opcodes;
+ ll_init (&r->var_attrs);
/* TRANSLATORS: this fragment will be interpolated into
messages in fh_lock() that identify types of files. */
if (r->lock == NULL)
goto error;
- r->file = fn_open (fh_get_file_name (fh), "rb");
+ r->file = fn_open (fh, "rb");
if (r->file == NULL)
{
msg (ME, _("Error opening `%s' for reading as a system file: %s."),
if (r->extensions[EXT_MRSETS2] != NULL)
parse_mrsets (r, r->extensions[EXT_MRSETS2], &allocated_mrsets);
- return r;
+ return &r->any_reader;
+
error:
- sfm_close (r);
+ if (r)
+ sfm_close (&r->any_reader);
return NULL;
}
if (!skip_bytes (r, 4))
return false;
- if (r->compression == SFM_COMP_ZLIB && !read_zheader (r))
+ if (r->compression == ANY_COMP_ZLIB && !read_zheader (r))
return false;
return true;
sys_error (r, r->pos, _("Duplicate type 6 (document) record."));
return false;
}
- r->document = read_document_record (r);
- return r->document != NULL;
+ return read_document_record (r);
case 7:
if (!read_int (r, &subtype))
|| subtype >= sizeof r->extensions / sizeof *r->extensions)
{
sys_warn (r, r->pos,
- _("Unrecognized record type 7, subtype %d. Please "
- "send a copy of this file, and the syntax which "
- "created it to %s."),
- subtype, PACKAGE_BUGREPORT);
+ _("Unrecognized record type 7, subtype %d. For help, "
+ "please send this file to %s and mention that you were "
+ "using %s."),
+ subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
return skip_extension_record (r, subtype);
}
+ else if (subtype == 18)
+ {
+ /* System files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin"
+ put each variable attribute into a separate record with subtype
+ 18. I'm surprised that SPSS puts up with this. */
+ struct sfm_extension_record *ext;
+ bool ok = read_extension_record (r, subtype, &ext);
+ if (ok && ext)
+ ll_push_tail (&r->var_attrs, &ext->ll);
+ return ok;
+ }
else if (r->extensions[subtype] != NULL)
{
sys_warn (r, r->pos,
_("Record type 7, subtype %d found here has the same "
- "type as the record found near offset 0x%llx. "
- "Please send a copy of this file, and the syntax "
- "which created it to %s."),
+ "type as the record found near offset 0x%llx. For "
+ "help, please send this file to %s and mention that "
+ "you were using %s."),
subtype, (long long int) r->extensions[subtype]->pos,
- PACKAGE_BUGREPORT);
+ PACKAGE_BUGREPORT, PACKAGE_STRING);
return skip_extension_record (r, subtype);
}
else
/* Returns the character encoding obtained from R, or a null pointer if R
doesn't have an indication of its character encoding. */
-const char *
+static const char *
sfm_get_encoding (const struct sfm_reader *r)
{
/* The EXT_ENCODING record is the best way to determine dictionary
return NULL;
}
+struct get_strings_aux
+ {
+ struct pool *pool;
+ char **titles;
+ char **strings;
+ bool *ids;
+ size_t allocated;
+ size_t n;
+ };
+
+static void
+add_string__ (struct get_strings_aux *aux,
+ const char *string, bool id, char *title)
+{
+ if (aux->n >= aux->allocated)
+ {
+ aux->allocated = 2 * (aux->allocated + 1);
+ aux->titles = pool_realloc (aux->pool, aux->titles,
+ aux->allocated * sizeof *aux->titles);
+ aux->strings = pool_realloc (aux->pool, aux->strings,
+ aux->allocated * sizeof *aux->strings);
+ aux->ids = pool_realloc (aux->pool, aux->ids,
+ aux->allocated * sizeof *aux->ids);
+ }
+
+ aux->titles[aux->n] = title;
+ aux->strings[aux->n] = pool_strdup (aux->pool, string);
+ aux->ids[aux->n] = id;
+ aux->n++;
+}
+
+static void PRINTF_FORMAT (3, 4)
+add_string (struct get_strings_aux *aux,
+ const char *string, const char *title, ...)
+{
+ va_list args;
+
+ va_start (args, title);
+ add_string__ (aux, string, false, pool_vasprintf (aux->pool, title, args));
+ va_end (args);
+}
+
+static void PRINTF_FORMAT (3, 4)
+add_id (struct get_strings_aux *aux, const char *id, const char *title, ...)
+{
+ va_list args;
+
+ va_start (args, title);
+ add_string__ (aux, id, true, pool_vasprintf (aux->pool, title, args));
+ va_end (args);
+}
+
+/* Retrieves significant string data from R in its raw format, to allow the
+ caller to try to detect the encoding in use.
+
+ Returns the number of strings retrieved N. Sets each of *TITLESP, *IDSP,
+ and *STRINGSP to an array of N elements allocated from POOL. For each I in
+ 0...N-1, UTF-8 string *TITLESP[I] describes *STRINGSP[I], which is in
+ whatever encoding system file R uses. *IDS[I] is true if *STRINGSP[I] must
+ be a valid PSPP language identifier, false if *STRINGSP[I] is free-form
+ text. */
+static size_t
+sfm_get_strings (const struct any_reader *r_, struct pool *pool,
+ char ***titlesp, bool **idsp, char ***stringsp)
+{
+ struct sfm_reader *r = sfm_reader_cast (r_);
+ const struct sfm_mrset *mrset;
+ struct get_strings_aux aux;
+ size_t var_idx;
+ size_t i, j, k;
+
+ aux.pool = pool;
+ aux.titles = NULL;
+ aux.strings = NULL;
+ aux.ids = NULL;
+ aux.allocated = 0;
+ aux.n = 0;
+
+ var_idx = 0;
+ for (i = 0; i < r->n_vars; i++)
+ if (r->vars[i].width != -1)
+ add_id (&aux, r->vars[i].name, _("Variable %zu"), ++var_idx);
+
+ var_idx = 0;
+ for (i = 0; i < r->n_vars; i++)
+ if (r->vars[i].width != -1)
+ {
+ var_idx++;
+ if (r->vars[i].label)
+ add_string (&aux, r->vars[i].label, _("Variable %zu Label"),
+ var_idx);
+ }
+
+ k = 0;
+ for (i = 0; i < r->n_labels; i++)
+ for (j = 0; j < r->labels[i].n_labels; j++)
+ add_string (&aux, r->labels[i].labels[j].label,
+ _("Value Label %zu"), k++);
+
+ add_string (&aux, r->header.creation_date, _("Creation Date"));
+ add_string (&aux, r->header.creation_time, _("Creation Time"));
+ add_string (&aux, r->header.eye_catcher, _("Product"));
+ add_string (&aux, r->header.file_label, _("File Label"));
+
+ if (r->extensions[EXT_PRODUCT_INFO])
+ add_string (&aux, r->extensions[EXT_PRODUCT_INFO]->data,
+ _("Extra Product Info"));
+
+ if (r->document)
+ {
+ size_t i;
+
+ for (i = 0; i < r->document->n_lines; i++)
+ {
+ char line[81];
+
+ memcpy (line, r->document->documents + i * 80, 80);
+ line[80] = '\0';
+
+ add_string (&aux, line, _("Document Line %zu"), i + 1);
+ }
+ }
+
+ for (mrset = r->mrsets; mrset < &r->mrsets[r->n_mrsets]; mrset++)
+ {
+ size_t mrset_idx = mrset - r->mrsets + 1;
+
+ add_id (&aux, mrset->name, _("MRSET %zu"), mrset_idx);
+ if (mrset->label[0])
+ add_string (&aux, mrset->label, _("MRSET %zu Label"), mrset_idx);
+
+ /* Skip the variables because they ought to be duplicates. */
+
+ if (mrset->counted)
+ add_string (&aux, mrset->counted, _("MRSET %zu Counted Value"),
+ mrset_idx);
+ }
+
+ /* data file attributes */
+ /* variable attributes */
+ /* long var map */
+ /* long string value labels */
+ /* long string missing values */
+
+ *titlesp = aux.titles;
+ *idsp = aux.ids;
+ *stringsp = aux.strings;
+ return aux.n;
+}
+
/* Decodes the dictionary read from R, saving it into into *DICT. Character
strings in R are decoded using ENCODING, or an encoding obtained from R if
ENCODING is null, or the locale encoding if R specifies no encoding.
If INFOP is non-null, then it receives additional info about the system
- file, which the caller must eventually free with sfm_read_info_destroy()
+ file, which the caller must eventually free with any_read_info_destroy()
when it is no longer needed.
This function consumes R. The caller must use it again later, even to
destroy it with sfm_close(). */
-struct casereader *
-sfm_decode (struct sfm_reader *r, const char *encoding,
- struct dictionary **dictp, struct sfm_read_info *infop)
+static struct casereader *
+sfm_decode (struct any_reader *r_, const char *encoding,
+ struct dictionary **dictp, struct any_read_info *infop)
{
+ struct sfm_reader *r = sfm_reader_cast (r_);
struct dictionary *dict;
size_t i;
{
encoding = sfm_get_encoding (r);
if (encoding == NULL)
- encoding = locale_charset ();
+ {
+ sys_warn (r, -1, _("This system file does not indicate its own "
+ "character encoding. Using default encoding "
+ "%s. For best results, specify an encoding "
+ "explicitly. Use SYSFILE INFO with "
+ "ENCODING=\"DETECT\" to analyze the possible "
+ "encodings."),
+ locale_charset ());
+ encoding = locale_charset ();
+ }
}
dict = dict_create (encoding);
parse_long_var_name_map (r, r->extensions[EXT_LONG_NAMES], dict);
/* The following records use long names, so they need to follow renaming. */
- if (r->extensions[EXT_VAR_ATTRS] != NULL)
+ if (!ll_is_empty (&r->var_attrs))
{
- parse_variable_attributes (r, r->extensions[EXT_VAR_ATTRS], dict);
+ struct sfm_extension_record *ext;
+ ll_for_each (ext, struct sfm_extension_record, ll, &r->var_attrs)
+ parse_variable_attributes (r, ext, dict);
/* Roles use the $@Role attribute. */
assign_variable_roles (r, dict);
}
-
- if (r->extensions[EXT_LONG_LABELS] != NULL
- && !parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS],
- dict))
- goto error;
- if (r->extensions[EXT_LONG_MISSING] != NULL
- && !parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING],
- dict))
- goto error;
+ if (r->extensions[EXT_LONG_LABELS] != NULL)
+ parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS], dict);
+ if (r->extensions[EXT_LONG_MISSING] != NULL)
+ parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING],
+ dict);
/* Warn if the actual amount of data per case differs from the
amount that the header claims. SPSS version 13 gets this
&sys_file_casereader_class, r);
error:
- sfm_close (r);
+ sfm_close (r_);
dict_destroy (dict);
*dictp = NULL;
return NULL;
closed with sfm_decode() or this function.
Returns true if an I/O error has occurred on READER, false
otherwise. */
-bool
-sfm_close (struct sfm_reader *r)
+static bool
+sfm_close (struct any_reader *r_)
{
+ struct sfm_reader *r = sfm_reader_cast (r_);
bool error;
- if (r == NULL)
- return true;
-
if (r->file)
{
- if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
+ if (fn_close (r->fh, r->file) == EOF)
{
msg (ME, _("Error closing system file `%s': %s."),
fh_get_file_name (r->fh), strerror (errno));
r->file = NULL;
}
- sfm_read_info_destroy (&r->info);
+ any_read_info_destroy (&r->info);
fh_unlock (r->lock);
fh_unref (r->fh);
sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
{
struct sfm_reader *r = r_;
- sfm_close (r);
+ sfm_close (&r->any_reader);
}
-/* Returns true if FILE is an SPSS system file,
- false otherwise. */
-bool
+/* Detects whether FILE is an SPSS system file. Returns 1 if so, 0 if not, and
+ a negative errno value if there is an error reading FILE. */
+static int
sfm_detect (FILE *file)
{
char magic[5];
+ if (fseek (file, 0, SEEK_SET) != 0)
+ return -errno;
if (fread (magic, 4, 1, file) != 1)
- return false;
+ return ferror (file) ? -errno : 0;
magic[4] = '\0';
return (!strcmp (ASCII_MAGIC, magic)
except for the string fields in *INFO, which parse_header() will initialize
later once the file's encoding is known. */
static bool
-read_header (struct sfm_reader *r, struct sfm_read_info *info,
+read_header (struct sfm_reader *r, struct any_read_info *info,
struct sfm_header_record *header)
{
uint8_t raw_layout_code[4];
if (!zmagic)
{
if (compressed == 0)
- r->compression = SFM_COMP_NONE;
+ r->compression = ANY_COMP_NONE;
else if (compressed == 1)
- r->compression = SFM_COMP_SIMPLE;
+ r->compression = ANY_COMP_SIMPLE;
else if (compressed != 0)
{
sys_error (r, 0, "System file header has invalid compression "
else
{
if (compressed == 2)
- r->compression = SFM_COMP_ZLIB;
+ r->compression = ANY_COMP_ZLIB;
else
{
sys_error (r, 0, "ZLIB-compressed system file header has invalid "
|| !read_int (r, &record->missing_value_code)
|| !read_int (r, &record->print_format)
|| !read_int (r, &record->write_format)
- || !read_bytes (r, record->name, sizeof record->name))
+ || !read_string (r, record->name, sizeof record->name))
return false;
if (has_variable_label == 1)
{
- enum { MAX_LABEL_LEN = 255 };
+ enum { MAX_LABEL_LEN = 65536 };
unsigned int len, read_len;
if (!read_uint (r, &len))
record->pos = r->pos;
if (!read_uint (r, &record->n_labels))
return false;
- if (record->n_labels > SIZE_MAX / sizeof *record->labels)
+ if (record->n_labels > UINT_MAX / sizeof *record->labels)
{
- sys_error (r, r->pos - 4, _("Invalid number of labels %zu."),
+ sys_error (r, r->pos - 4, _("Invalid number of labels %u."),
record->n_labels);
return false;
}
if (record->n_vars < 1 || record->n_vars > r->n_vars)
{
sys_error (r, r->pos - 4,
- _("Number of variables associated with a value label (%zu) "
+ _("Number of variables associated with a value label (%u) "
"is not between 1 and the number of variables (%zu)."),
record->n_vars, r->n_vars);
return false;
return true;
}
-/* Reads a document record from R and returns it. */
-static struct sfm_document_record *
+/* Reads a document record from R. Returns true if successful, false on
+ error. */
+static bool
read_document_record (struct sfm_reader *r)
{
- struct sfm_document_record *record;
int n_lines;
-
- record = pool_malloc (r->pool, sizeof *record);
- record->pos = r->pos;
-
if (!read_int (r, &n_lines))
- return NULL;
- if (n_lines <= 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
+ return false;
+ else if (n_lines == 0)
+ return true;
+ else if (n_lines < 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
{
- sys_error (r, record->pos,
+ sys_error (r, r->pos,
_("Number of document lines (%d) "
"must be greater than 0 and less than %d."),
n_lines, INT_MAX / DOC_LINE_LENGTH);
- return NULL;
+ return false;
}
+ struct sfm_document_record *record;
+ record = pool_malloc (r->pool, sizeof *record);
+ record->pos = r->pos;
record->n_lines = n_lines;
record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines);
if (!read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines))
- return NULL;
+ return false;
- return record;
+ r->document = record;
+ return true;
}
static bool
{
if (type->size > 0 && record->size != type->size)
sys_warn (r, record->pos,
- _("Record type 7, subtype %d has bad size %zu "
+ _("Record type 7, subtype %d has bad size %u "
"(expected %d)."), subtype, record->size, type->size);
else if (type->count > 0 && record->count != type->count)
sys_warn (r, record->pos,
- _("Record type 7, subtype %d has bad count %zu "
+ _("Record type 7, subtype %d has bad count %u "
"(expected %d)."), subtype, record->count, type->count);
else if (type->count == 0 && type->size == 0)
{
}
sys_warn (r, record->pos,
- _("Unrecognized record type 7, subtype %d. Please send a "
- "copy of this file, and the syntax which created it to %s."),
- subtype, PACKAGE_BUGREPORT);
+ _("Unrecognized record type 7, subtype %d. For help, please "
+ "send this file to %s and mention that you were using %s."),
+ subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
skip:
return skip_bytes (r, n_bytes);
static void
parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
- struct sfm_read_info *info, struct dictionary *dict)
+ struct any_read_info *info, struct dictionary *dict)
{
const char *dict_encoding = dict_get_encoding (dict);
struct substring product;
size_t i;
name = recode_string_pool ("UTF-8", dict_encoding,
- rec->name, 8, r->pool);
+ rec->name, -1, r->pool);
name[strcspn (name, " ")] = '\0';
if (!dict_id_is_valid (dict, name, false)
utf8_label = recode_string_pool ("UTF-8", dict_encoding,
rec->label, -1, r->pool);
- var_set_label (var, utf8_label, false);
+ var_set_label (var, utf8_label);
}
/* Set missing values. */
}
}
else
- {
- union value value;
-
- value_init_pool (r->pool, &value, width);
- value_set_missing (&value, width);
- for (i = 0; i < rec->missing_value_code; i++)
- mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
- }
+ for (i = 0; i < rec->missing_value_code; i++)
+ mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
var_set_missing_values (var, &mv);
}
static bool
parse_machine_integer_info (struct sfm_reader *r,
const struct sfm_extension_record *record,
- struct sfm_read_info *info)
+ struct any_read_info *info)
{
int float_representation, expected_float_format;
int integer_representation, expected_integer_format;
static void
parse_extra_product_info (struct sfm_reader *r,
const struct sfm_extension_record *record,
- struct sfm_read_info *info)
+ struct any_read_info *info)
{
struct text_record *text;
else
{
sys_warn (r, record->pos,
- _("Extension 11 has bad count %zu (for %zu variables)."),
+ _("Extension 11 has bad count %u (for %zu variables)."),
record->count, n_vars);
return;
}
while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
{
/* Validate long name. */
- if (!dict_id_is_valid (dict, long_name, false))
+ if (!dict_id_is_valid (dict, long_name, false)
+ || long_name[0] == '$' || long_name[0] == '#')
{
sys_warn (r, record->pos,
_("Long variable mapping from %s to invalid "
text_warn (r, text, _("Error parsing attribute value %s[%d]."),
key, index);
break;
- }
+ }
length = strlen (value);
- if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
+ if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
{
value[length - 1] = '\0';
- attribute_add_value (attr, value + 1);
+ attribute_add_value (attr, value + 1);
}
- else
+ else
{
text_warn (r, text,
_("Attribute value %s[%d] is not quoted: %s."),
key, index, value);
- attribute_add_value (attr, value);
+ attribute_add_value (attr, value);
}
/* Was this the last value for this attribute? */
size_t end = record->size * record->count;
if (length >= end || ofs + length > end)
{
- sys_error (r, record->pos + end,
- _("Extension record subtype %d ends unexpectedly."),
- record->subtype);
+ sys_warn (r, record->pos + end,
+ _("Extension record subtype %d ends unexpectedly."),
+ record->subtype);
return false;
}
return true;
}
-static bool
+static void
parse_long_string_value_labels (struct sfm_reader *r,
const struct sfm_extension_record *record,
struct dictionary *dict)
/* Parse variable name length. */
if (!check_overflow (r, record, ofs, 4))
- return false;
+ return;
var_name_len = parse_int (r, record->data, ofs);
ofs += 4;
/* Parse variable name, width, and number of labels. */
- if (!check_overflow (r, record, ofs, var_name_len + 8))
- return false;
+ if (!check_overflow (r, record, ofs, var_name_len)
+ || !check_overflow (r, record, ofs, var_name_len + 8))
+ return;
var_name = recode_string_pool ("UTF-8", dict_encoding,
(const char *) record->data + ofs,
var_name_len, r->pool);
/* Parse value length. */
if (!check_overflow (r, record, ofs, 4))
- return false;
+ return;
value_length = parse_int (r, record->data, ofs);
ofs += 4;
/* Parse value. */
if (!check_overflow (r, record, ofs, value_length))
- return false;
+ return;
if (!skip)
{
if (value_length == width)
/* Parse label length. */
if (!check_overflow (r, record, ofs, 4))
- return false;
+ return;
label_length = parse_int (r, record->data, ofs);
ofs += 4;
/* Parse label. */
if (!check_overflow (r, record, ofs, label_length))
- return false;
+ return;
if (!skip)
{
char *label;
ofs += label_length;
}
}
-
- return true;
}
-static bool
+static void
parse_long_string_missing_values (struct sfm_reader *r,
const struct sfm_extension_record *record,
struct dictionary *dict)
/* Parse variable name length. */
if (!check_overflow (r, record, ofs, 4))
- return false;
+ return;
var_name_len = parse_int (r, record->data, ofs);
ofs += 4;
/* Parse variable name. */
- if (!check_overflow (r, record, ofs, var_name_len + 1))
- return false;
+ if (!check_overflow (r, record, ofs, var_name_len)
+ || !check_overflow (r, record, ofs, var_name_len + 1))
+ return;
var_name = recode_string_pool ("UTF-8", dict_encoding,
(const char *) record->data + ofs,
var_name_len, r->pool);
/* Parse value length. */
if (!check_overflow (r, record, ofs, 4))
- return false;
+ return;
value_length = parse_int (r, record->data, ofs);
ofs += 4;
/* Parse value. */
if (!check_overflow (r, record, ofs, value_length))
- return false;
+ return;
if (var != NULL
&& i < 3
&& !mv_add_str (&mv, (const uint8_t *) record->data + ofs,
if (var != NULL)
var_set_missing_values (var, &mv);
}
-
- return true;
}
\f
/* Case reader. */
int retval;
int i;
- if (r->error)
+ if (r->error || !r->sfm_var_cnt)
return NULL;
c = case_create (r->proto);
static bool
read_case_number (struct sfm_reader *r, double *d)
{
- if (r->compression == SFM_COMP_NONE)
+ if (r->compression == ANY_COMP_NONE)
{
uint8_t number[8];
if (!try_read_bytes (r, number, sizeof number))
static int
read_opcode (struct sfm_reader *r)
{
- assert (r->compression != SFM_COMP_NONE);
+ assert (r->compression != ANY_COMP_NONE);
for (;;)
{
int opcode;
read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
{
assert (length % 8 == 0);
- if (r->compression == SFM_COMP_NONE)
+ if (r->compression == ANY_COMP_NONE)
return try_read_bytes (r, s, length);
else
{
{
if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
return false;
-
+
*value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
if (*value == NULL)
return false;
text_warn (struct sfm_reader *r, struct text_record *text,
const char *format, ...)
{
- if (text->n_warnings++ < MAX_TEXT_WARNINGS)
+ if (text->n_warnings++ < MAX_TEXT_WARNINGS)
{
va_list args;
static bool
text_match (struct text_record *text, char c)
{
- if (text->buffer.string[text->pos] == c)
+ if (text->pos >= text->buffer.length)
+ return false;
+
+ if (text->buffer.string[text->pos] == c)
{
text->pos++;
return true;
va_end (args);
}
-/* Displays an error for the current file position,
- marks it as in an error state,
- and aborts reading it using longjmp. */
+/* Displays an error for the current file position and marks it as in an error
+ state. */
static void
sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
{
static int
read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
{
- if (r->compression == SFM_COMP_SIMPLE)
+ if (r->compression == ANY_COMP_SIMPLE)
return read_bytes (r, buf, byte_cnt);
else
{
static int
try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
{
- if (r->compression == SFM_COMP_SIMPLE)
+ if (r->compression == ANY_COMP_SIMPLE)
return try_read_bytes (r, buf, byte_cnt);
else
return read_bytes_zlib (r, buf, byte_cnt);
NULL,
NULL,
};
+
+const struct any_reader_class sys_file_reader_class =
+ {
+ N_("SPSS System File"),
+ sfm_detect,
+ sfm_open,
+ sfm_close,
+ sfm_decode,
+ sfm_get_strings,
+ };