#include "data/sys-file-private.h"
#include <errno.h>
+#include <fcntl.h>
#include <float.h>
#include <inttypes.h>
#include <stdlib.h>
#include "libpspp/array.h"
#include "libpspp/assertion.h"
#include "libpspp/compiler.h"
+#include "libpspp/fbuf.h"
#include "libpspp/i18n.h"
#include "libpspp/ll.h"
#include "libpspp/message.h"
/* File state. */
struct file_handle *fh; /* File handle. */
struct fh_lock *lock; /* Mutual exclusion for file handle. */
- FILE *file; /* File stream. */
+ struct fbuf *fbuf; /* File stream. */
off_t pos; /* Position in file. */
bool error; /* I/O or corruption error? */
struct caseproto *proto; /* Format of output cases. */
size_t sfm_var_cnt; /* Number of variables. */
int case_cnt; /* Number of cases */
const char *encoding; /* String encoding. */
+ bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */
/* Decompression. */
enum any_compression compression;
if (r->lock == NULL)
goto error;
- r->file = fn_open (fh, "rb");
- if (r->file == NULL)
+ int fd = fn_open (fh, O_RDONLY | O_BINARY, 0);
+ if (fd < 0)
{
msg (ME, _("Error opening `%s' for reading as a system file: %s."),
fh_get_file_name (r->fh), strerror (errno));
goto error;
}
+ r->fbuf = fbuf_open_fd (fd);
if (!read_dictionary (r))
goto error;
amount that the header claims. SPSS version 13 gets this
wrong when very long strings are involved, so don't warn in
that case. */
- if (r->header.nominal_case_size != -1
+ if (r->header.nominal_case_size > 0
&& r->header.nominal_case_size != r->n_vars
&& r->info.version_major != 13)
sys_warn (r, -1, _("File header claims %d variable positions but "
struct sfm_reader *r = sfm_reader_cast (r_);
bool error;
- if (r->file)
+ if (r->fbuf)
{
- if (fn_close (r->fh, r->file) == EOF)
+ int error = fbuf_close (r->fbuf);
+ if (error)
{
msg (ME, _("Error closing system file `%s': %s."),
- fh_get_file_name (r->fh), strerror (errno));
+ fh_get_file_name (r->fh), strerror (error));
r->error = true;
}
- r->file = NULL;
+ r->fbuf = NULL;
}
any_read_info_destroy (&r->info);
if (!read_string (r, header->magic, sizeof header->magic)
|| !read_string (r, header->eye_catcher, sizeof header->eye_catcher))
return false;
+ r->written_by_readstat = strstr (header->eye_catcher,
+ "https://github.com/WizardMac/ReadStat");
if (!strcmp (ASCII_MAGIC, header->magic)
|| !strcmp (EBCDIC_MAGIC, header->magic))
"`%s' to `%s'."),
name, new_name);
var = rec->var = dict_create_var_assert (dict, new_name, rec->width);
+ var_set_short_name (var, 0, new_name);
free (new_name);
}
- /* Set the short name the same as the long name. */
- var_set_short_name (var, 0, name);
+ /* Set the short name the same as the long name (even if we renamed
+ it). */
+ var_set_short_name (var, 0, var_get_name (var));
/* Get variable label, if any. */
if (rec->label)
size_t i;
name = recode_string ("UTF-8", r->encoding, s->name, -1);
- if (name[0] != '$')
+ if (!mrset_is_valid_name (name, dict_get_encoding (dict), false))
{
- sys_warn (r, -1, _("Multiple response set name `%s' does not begin "
- "with `$'."),
+ sys_warn (r, -1, _("Invalid multiple response set name `%s'."),
name);
free (name);
continue;
}
static void
-rename_var_and_save_short_names (struct dictionary *dict, struct variable *var,
- const char *new_name)
+rename_var_and_save_short_names (struct sfm_reader *r, off_t pos,
+ struct dictionary *dict,
+ struct variable *var, const char *new_name)
{
size_t n_short_names;
char **short_names;
}
/* Set long name. */
- dict_rename_var (dict, var, new_name);
+ if (!dict_try_rename_var (dict, var, new_name))
+ sys_warn (r, pos, _("Duplicate long variable name `%s'."), new_name);
/* Restore short names. */
for (i = 0; i < n_short_names; i++)
char *new_name;
new_name = utf8_to_lower (var_get_name (var));
- rename_var_and_save_short_names (dict, var, new_name);
+ rename_var_and_save_short_names (r, -1, dict, var, new_name);
free (new_name);
}
continue;
}
- /* Identify any duplicates. */
- if (utf8_strcasecmp (var_get_short_name (var, 0), long_name)
- && dict_lookup_var (dict, long_name) != NULL)
- {
- sys_warn (r, record->pos,
- _("Duplicate long variable name `%s'."), long_name);
- continue;
- }
-
- rename_var_and_save_short_names (dict, var, long_name);
+ rename_var_and_save_short_names (r, record->pos, dict, var, long_name);
}
close_text_record (r, text);
}
if (!var_add_value_label (var, &value, utf8_labels[j]))
{
- if (var_is_numeric (var))
+ if (r->written_by_readstat)
+ {
+ /* Ignore the problem. ReadStat is buggy and emits value
+ labels whose values are longer than string variables'
+ widths, that are identical in the actual width of the
+ variable, e.g. both values "ABC123" and "ABC456" for a
+ string variable with width 3. */
+ }
+ else if (var_is_numeric (var))
sys_warn (r, record->pos,
_("Duplicate value label for %g on %s."),
value.f, var_get_name (var));
break;
}
if (attrs != NULL)
- attrset_add (attrs, attr);
+ {
+ if (!attrset_try_add (attrs, attr))
+ {
+ text_warn (r, text, _("Duplicate attribute %s."),
+ attribute_get_name (attr));
+ attribute_destroy (attr);
+ }
+ }
else
attribute_destroy (attr);
}
}
/* Closes TEXT, frees its storage, and issues a final warning
- about suppressed warnings if necesary. */
+ about suppressed warnings if necessary. */
static void
close_text_record (struct sfm_reader *r, struct text_record *text)
{
read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
void *buf, size_t byte_cnt)
{
- size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
+ size_t bytes_read = fbuf_read (r->fbuf, buf, byte_cnt);
r->pos += bytes_read;
if (bytes_read == byte_cnt)
return 1;
- else if (ferror (r->file))
+
+ int status = fbuf_get_status (r->fbuf);
+ if (status > 0)
{
sys_error (r, r->pos, _("System error: %s."), strerror (errno));
return -1;
static void
seek (struct sfm_reader *r, off_t offset)
{
- if (fseeko (r->file, offset, SEEK_SET))
+ int error = fbuf_seek (r->fbuf, offset);
+ if (error)
sys_error (r, 0, _("%s: seek failed (%s)."),
- fh_get_file_name (r->fh), strerror (errno));
+ fh_get_file_name (r->fh), strerror (error));
r->pos = offset;
}
unsigned int block_size;
unsigned int n_blocks;
unsigned int i;
- struct stat s;
- if (fstat (fileno (r->file), &s))
+ int seekable = fbuf_is_seekable (r->fbuf);
+ if (seekable < 0)
{
- sys_error (ME, 0, _("%s: stat failed (%s)."),
- fh_get_file_name (r->fh), strerror (errno));
+ sys_error (r, 0, _("%s: stat failed (%s)."),
+ fh_get_file_name (r->fh), strerror (-seekable));
return false;
}
-
- if (!S_ISREG (s.st_mode))
+ else if (!seekable)
{
/* We can't seek to the trailer and then back to the data in this file,
so skip doing extra checks. */
return true;
}
- if (r->ztrailer_ofs + ztrailer_len != s.st_size)
+ off_t size = fbuf_get_size (r->fbuf);
+ if (size >= 0 && r->ztrailer_ofs + ztrailer_len != size)
sys_warn (r, r->pos,
_("End of ZLIB trailer (0x%llx) is not file size (0x%llx)."),
- r->ztrailer_ofs + ztrailer_len, (long long int) s.st_size);
+ r->ztrailer_ofs + ztrailer_len, (long long int) size);
seek (r, r->ztrailer_ofs);