#include <config.h>
-#include "data/sys-file-reader.h"
#include "data/sys-file-private.h"
#include <errno.h>
#include <sys/stat.h>
#include <zlib.h>
+#include "data/any-reader.h"
#include "data/attributes.h"
#include "data/case.h"
#include "data/casereader-provider.h"
int weight_idx; /* 0 if unweighted, otherwise a var index. */
int nominal_case_size; /* Number of var positions. */
- /* These correspond to the members of struct sfm_file_info or a dictionary
+ /* These correspond to the members of struct any_file_info or a dictionary
but in the system file's encoding rather than ASCII. */
char creation_date[10]; /* "dd mmm yy". */
char creation_time[9]; /* "hh:mm:ss". */
/* System file reader. */
struct sfm_reader
{
+ struct any_reader any_reader;
+
/* Resource tracking. */
struct pool *pool; /* All system file state. */
/* File data. */
- struct sfm_read_info info;
+ struct any_read_info info;
struct sfm_header_record header;
struct sfm_var_record *vars;
size_t n_vars;
const char *encoding; /* String encoding. */
/* Decompression. */
- enum sfm_compression compression;
+ enum any_compression compression;
double bias; /* Compression bias, usually 100.0. */
uint8_t opcodes[8]; /* Current block of opcodes. */
size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
static const struct casereader_class sys_file_casereader_class;
+static struct sfm_reader *
+sfm_reader_cast (const struct any_reader *r_)
+{
+ assert (r_->klass == &sys_file_reader_class);
+ return UP_CAST (r_, struct sfm_reader, any_reader);
+}
+
+static bool sfm_close (struct any_reader *);
+
static struct variable *lookup_var_by_index (struct sfm_reader *, off_t,
const struct sfm_var_record *,
size_t n, int idx);
static bool read_dictionary (struct sfm_reader *);
static bool read_record (struct sfm_reader *, int type,
size_t *allocated_vars, size_t *allocated_labels);
-static bool read_header (struct sfm_reader *, struct sfm_read_info *,
+static bool read_header (struct sfm_reader *, struct any_read_info *,
struct sfm_header_record *);
static void parse_header (struct sfm_reader *,
const struct sfm_header_record *,
- struct sfm_read_info *, struct dictionary *);
+ struct any_read_info *, struct dictionary *);
static bool parse_variable_records (struct sfm_reader *, struct dictionary *,
struct sfm_var_record *, size_t n);
static void parse_format_spec (struct sfm_reader *, off_t pos,
struct dictionary *);
static bool parse_machine_integer_info (struct sfm_reader *,
const struct sfm_extension_record *,
- struct sfm_read_info *);
+ struct any_read_info *);
static void parse_machine_float_info (struct sfm_reader *,
const struct sfm_extension_record *);
static void parse_extra_product_info (struct sfm_reader *,
const struct sfm_extension_record *,
- struct sfm_read_info *);
+ struct any_read_info *);
static void parse_mrsets (struct sfm_reader *,
const struct sfm_extension_record *,
size_t *allocated_mrsets);
/* Frees the strings inside INFO. */
void
-sfm_read_info_destroy (struct sfm_read_info *info)
+any_read_info_destroy (struct any_read_info *info)
{
if (info)
{
/* Tries to open FH for reading as a system file. Returns an sfm_reader if
successful, otherwise NULL. */
-struct sfm_reader *
+static struct any_reader *
sfm_open (struct file_handle *fh)
{
size_t allocated_mrsets = 0;
/* Create and initialize reader. */
r = xzalloc (sizeof *r);
+ r->any_reader.klass = &sys_file_reader_class;
r->pool = pool_create ();
pool_register (r->pool, free, r);
r->fh = fh_ref (fh);
if (r->extensions[EXT_MRSETS2] != NULL)
parse_mrsets (r, r->extensions[EXT_MRSETS2], &allocated_mrsets);
- return r;
+ return &r->any_reader;
+
error:
- sfm_close (r);
+ if (r)
+ sfm_close (&r->any_reader);
return NULL;
}
if (!skip_bytes (r, 4))
return false;
- if (r->compression == SFM_COMP_ZLIB && !read_zheader (r))
+ if (r->compression == ANY_COMP_ZLIB && !read_zheader (r))
return false;
return true;
whatever encoding system file R uses. *IDS[I] is true if *STRINGSP[I] must
be a valid PSPP language identifier, false if *STRINGSP[I] is free-form
text. */
-size_t
-sfm_get_strings (const struct sfm_reader *r, struct pool *pool,
+static size_t
+sfm_get_strings (const struct any_reader *r_, struct pool *pool,
char ***titlesp, bool **idsp, char ***stringsp)
{
+ struct sfm_reader *r = sfm_reader_cast (r_);
const struct sfm_mrset *mrset;
struct get_strings_aux aux;
size_t var_idx;
ENCODING is null, or the locale encoding if R specifies no encoding.
If INFOP is non-null, then it receives additional info about the system
- file, which the caller must eventually free with sfm_read_info_destroy()
+ file, which the caller must eventually free with any_read_info_destroy()
when it is no longer needed.
This function consumes R. The caller must use it again later, even to
destroy it with sfm_close(). */
-struct casereader *
-sfm_decode (struct sfm_reader *r, const char *encoding,
- struct dictionary **dictp, struct sfm_read_info *infop)
+static struct casereader *
+sfm_decode (struct any_reader *r_, const char *encoding,
+ struct dictionary **dictp, struct any_read_info *infop)
{
+ struct sfm_reader *r = sfm_reader_cast (r_);
struct dictionary *dict;
size_t i;
&sys_file_casereader_class, r);
error:
- sfm_close (r);
+ sfm_close (r_);
dict_destroy (dict);
*dictp = NULL;
return NULL;
closed with sfm_decode() or this function.
Returns true if an I/O error has occurred on READER, false
otherwise. */
-bool
-sfm_close (struct sfm_reader *r)
+static bool
+sfm_close (struct any_reader *r_)
{
+ struct sfm_reader *r = sfm_reader_cast (r_);
bool error;
- if (r == NULL)
- return true;
-
if (r->file)
{
if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
r->file = NULL;
}
- sfm_read_info_destroy (&r->info);
+ any_read_info_destroy (&r->info);
fh_unlock (r->lock);
fh_unref (r->fh);
sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
{
struct sfm_reader *r = r_;
- sfm_close (r);
+ sfm_close (&r->any_reader);
}
-/* Returns true if FILE is an SPSS system file,
- false otherwise. */
-bool
+/* Returns 1 if FILE is an SPSS system file,
+ 0 if it is not,
+ otherwise a negative errno value. */
+static int
sfm_detect (FILE *file)
{
char magic[5];
+ if (fseek (file, 0, SEEK_SET) != 0)
+ return -errno;
if (fread (magic, 4, 1, file) != 1)
- return false;
+ return feof (file) ? 0 : -errno;
magic[4] = '\0';
return (!strcmp (ASCII_MAGIC, magic)
except for the string fields in *INFO, which parse_header() will initialize
later once the file's encoding is known. */
static bool
-read_header (struct sfm_reader *r, struct sfm_read_info *info,
+read_header (struct sfm_reader *r, struct any_read_info *info,
struct sfm_header_record *header)
{
uint8_t raw_layout_code[4];
if (!zmagic)
{
if (compressed == 0)
- r->compression = SFM_COMP_NONE;
+ r->compression = ANY_COMP_NONE;
else if (compressed == 1)
- r->compression = SFM_COMP_SIMPLE;
+ r->compression = ANY_COMP_SIMPLE;
else if (compressed != 0)
{
sys_error (r, 0, "System file header has invalid compression "
else
{
if (compressed == 2)
- r->compression = SFM_COMP_ZLIB;
+ r->compression = ANY_COMP_ZLIB;
else
{
sys_error (r, 0, "ZLIB-compressed system file header has invalid "
if (has_variable_label == 1)
{
- enum { MAX_LABEL_LEN = 255 };
+ enum { MAX_LABEL_LEN = 65536 };
unsigned int len, read_len;
if (!read_uint (r, &len))
return false;
if (record->n_labels > UINT_MAX / sizeof *record->labels)
{
- sys_error (r, r->pos - 4, _("Invalid number of labels %zu."),
+ sys_error (r, r->pos - 4, _("Invalid number of labels %u."),
record->n_labels);
return false;
}
{
if (type->size > 0 && record->size != type->size)
sys_warn (r, record->pos,
- _("Record type 7, subtype %d has bad size %zu "
+ _("Record type 7, subtype %d has bad size %u "
"(expected %d)."), subtype, record->size, type->size);
else if (type->count > 0 && record->count != type->count)
sys_warn (r, record->pos,
- _("Record type 7, subtype %d has bad count %zu "
+ _("Record type 7, subtype %d has bad count %u "
"(expected %d)."), subtype, record->count, type->count);
else if (type->count == 0 && type->size == 0)
{
static void
parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
- struct sfm_read_info *info, struct dictionary *dict)
+ struct any_read_info *info, struct dictionary *dict)
{
const char *dict_encoding = dict_get_encoding (dict);
struct substring product;
utf8_label = recode_string_pool ("UTF-8", dict_encoding,
rec->label, -1, r->pool);
- var_set_label (var, utf8_label, false);
+ var_set_label (var, utf8_label);
}
/* Set missing values. */
}
}
else
- {
- union value value;
-
- value_init_pool (r->pool, &value, width);
- value_set_missing (&value, width);
- for (i = 0; i < rec->missing_value_code; i++)
- mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
- }
+ for (i = 0; i < rec->missing_value_code; i++)
+ mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
var_set_missing_values (var, &mv);
}
static bool
parse_machine_integer_info (struct sfm_reader *r,
const struct sfm_extension_record *record,
- struct sfm_read_info *info)
+ struct any_read_info *info)
{
int float_representation, expected_float_format;
int integer_representation, expected_integer_format;
static void
parse_extra_product_info (struct sfm_reader *r,
const struct sfm_extension_record *record,
- struct sfm_read_info *info)
+ struct any_read_info *info)
{
struct text_record *text;
else
{
sys_warn (r, record->pos,
- _("Extension 11 has bad count %zu (for %zu variables)."),
+ _("Extension 11 has bad count %u (for %zu variables)."),
record->count, n_vars);
return;
}
int retval;
int i;
- if (r->error)
+ if (r->error || !r->sfm_var_cnt)
return NULL;
c = case_create (r->proto);
static bool
read_case_number (struct sfm_reader *r, double *d)
{
- if (r->compression == SFM_COMP_NONE)
+ if (r->compression == ANY_COMP_NONE)
{
uint8_t number[8];
if (!try_read_bytes (r, number, sizeof number))
static int
read_opcode (struct sfm_reader *r)
{
- assert (r->compression != SFM_COMP_NONE);
+ assert (r->compression != ANY_COMP_NONE);
for (;;)
{
int opcode;
read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
{
assert (length % 8 == 0);
- if (r->compression == SFM_COMP_NONE)
+ if (r->compression == ANY_COMP_NONE)
return try_read_bytes (r, s, length);
else
{
va_end (args);
}
-/* Displays an error for the current file position,
- marks it as in an error state,
- and aborts reading it using longjmp. */
+/* Displays an error for the current file position and marks it as in an error
+ state. */
static void
sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
{
static int
read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
{
- if (r->compression == SFM_COMP_SIMPLE)
+ if (r->compression == ANY_COMP_SIMPLE)
return read_bytes (r, buf, byte_cnt);
else
{
static int
try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
{
- if (r->compression == SFM_COMP_SIMPLE)
+ if (r->compression == ANY_COMP_SIMPLE)
return try_read_bytes (r, buf, byte_cnt);
else
return read_bytes_zlib (r, buf, byte_cnt);
NULL,
NULL,
};
+
+const struct any_reader_class sys_file_reader_class =
+ {
+ N_("SPSS System File"),
+ sfm_detect,
+ sfm_open,
+ sfm_close,
+ sfm_decode,
+ sfm_get_strings,
+ };