/* Opens the system file designated by file handle FH for reading. Reads the
system file's dictionary into *DICT.
+ Ordinarily the reader attempts to automatically detect the character
+ encoding based on the file's contents. This isn't always possible,
+ especially for files written by old versions of SPSS or PSPP, so specifying
+ a nonnull ENCODING overrides the choice of character encoding.
+
If INFO is non-null, then it receives additional info about the system file,
which the caller must eventually free with sfm_read_info_destroy() when it
is no longer needed. */
struct casereader *
-sfm_open_reader (struct file_handle *fh, struct dictionary **dictp,
- struct sfm_read_info *infop)
+sfm_open_reader (struct file_handle *fh, const char *volatile encoding,
+ struct dictionary **dictp, struct sfm_read_info *infop)
{
struct sfm_reader *volatile r = NULL;
- struct sfm_read_info info;
+ struct sfm_read_info *volatile info;
struct sfm_header_record header;
struct sfm_extension_record *extensions[32];
- struct dictionary *dict = NULL;
+ struct dictionary *volatile dict = NULL;
size_t i;
/* Create and initialize reader. */
r->opcode_idx = sizeof r->opcodes;
r->corruption_warning = false;
- memset (&info, 0, sizeof info);
+ info = infop ? infop : xmalloc (sizeof *info);
+ memset (info, 0, sizeof *info);
/* TRANSLATORS: this fragment will be interpolated into
messages in fh_lock() that identify types of files. */
goto error;
/* Read header. */
- read_header (r, &info, &header);
+ read_header (r, info, &header);
vars = NULL;
n_vars = allocated_vars = 0;
First, figure out the correct character encoding, because this determines
how the rest of the header data is to be interpreted. */
- dict = dict_create (choose_encoding (r, &header, extensions[EXT_INTEGER],
- extensions[EXT_ENCODING]));
+ dict = dict_create (encoding
+ ? encoding
+ : choose_encoding (r, &header, extensions[EXT_INTEGER],
+ extensions[EXT_ENCODING]));
r->encoding = dict_get_encoding (dict);
/* These records don't use variables at all. */
parse_document (dict, document);
if (extensions[EXT_INTEGER] != NULL)
- parse_machine_integer_info (r, extensions[EXT_INTEGER], &info);
+ parse_machine_integer_info (r, extensions[EXT_INTEGER], info);
if (extensions[EXT_FLOAT] != NULL)
parse_machine_float_info (r, extensions[EXT_FLOAT]);
if (extensions[EXT_FILE_ATTRS] != NULL)
parse_data_file_attributes (r, extensions[EXT_FILE_ATTRS], dict);
- parse_header (r, &header, &info, dict);
+ parse_header (r, &header, info, dict);
/* Parse the variable records, the basis of almost everything else. */
parse_variable_records (r, dict, vars, n_vars);
wrong when very long strings are involved, so don't warn in
that case. */
if (header.nominal_case_size != -1 && header.nominal_case_size != n_vars
- && info.version_major != 13)
+ && info->version_major != 13)
sys_warn (r, -1, _("File header claims %d variable positions but "
"%zu were read from file."),
header.nominal_case_size, n_vars);
r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
*dictp = dict;
- if (infop)
- *infop = info;
- else
- sfm_read_info_destroy (&info);
+ if (infop != info)
+ {
+ sfm_read_info_destroy (info);
+ free (info);
+ }
return casereader_create_sequential
(NULL, r->proto,
&sys_file_casereader_class, r);
error:
- sfm_read_info_destroy (&info);
+ if (infop != info)
+ {
+ sfm_read_info_destroy (info);
+ free (info);
+ }
+
close_reader (r);
dict_destroy (dict);
*dictp = NULL;
_("MRSET %s has only %zu variables."), mrset->name,
mrset->n_vars);
mrset_destroy (mrset);
+ stringi_set_destroy (&var_names);
continue;
}
while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
{
/* Validate long name. */
- /* XXX need to reencode name to UTF-8 */
if (!dict_id_is_valid (dict, long_name, false))
{
sys_warn (r, record->pos,
start = text->pos;
n = 0;
- for (;;)
+ while (text->pos < text->buffer.length)
{
int c = text->buffer.string[text->pos];
if (c < '0' || c > '9')
n = (n * 10) + (c - '0');
text->pos++;
}
- if (start == text->pos)
+ if (text->pos >= text->buffer.length || start == text->pos)
{
sys_warn (r, text->start,
_("Expecting digit at offset %zu in MRSETS record."),