/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
-#include "por-file-reader.h"
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
-#include <data/casereader-provider.h>
-#include <data/casereader.h>
-#include <data/dictionary.h>
-#include <data/file-handle-def.h>
-#include <data/file-name.h>
-#include <data/format.h>
-#include <data/missing-values.h>
-#include <data/short-names.h>
-#include <data/value-labels.h>
-#include <data/variable.h>
-#include <libpspp/compiler.h>
-#include <libpspp/hash.h>
-#include <libpspp/message.h>
-#include <libpspp/misc.h>
-#include <libpspp/pool.h>
-#include <libpspp/str.h>
-
-#include "xalloc.h"
+#include "data/any-reader.h"
+#include "data/casereader-provider.h"
+#include "data/casereader.h"
+#include "data/dictionary.h"
+#include "data/file-handle-def.h"
+#include "data/file-name.h"
+#include "data/format.h"
+#include "data/missing-values.h"
+#include "data/short-names.h"
+#include "data/value-labels.h"
+#include "data/variable.h"
+#include "libpspp/compiler.h"
+#include "libpspp/i18n.h"
+#include "libpspp/message.h"
+#include "libpspp/misc.h"
+#include "libpspp/pool.h"
+#include "libpspp/str.h"
+
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
+#include "gl/xmemdup0.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
/* Portable file reader. */
struct pfm_reader
{
+ struct any_reader any_reader;
struct pool *pool; /* All the portable file state. */
jmp_buf bail_out; /* longjmp() target for error handling. */
+ struct dictionary *dict;
+ struct any_read_info info;
struct file_handle *fh; /* File handle. */
struct fh_lock *lock; /* Read lock for file. */
FILE *file; /* File stream. */
int line_length; /* Number of characters so far on this line. */
char cc; /* Current character. */
char *trans; /* 256-byte character set translation table. */
- int var_cnt; /* Number of variables. */
+ int n_vars; /* Number of variables. */
int weight_index; /* 0-based index of weight variable, or -1. */
- int *widths; /* Variable widths, 0 for numeric. */
- size_t value_cnt; /* Number of `value's per case. */
+ struct caseproto *proto; /* Format of output cases. */
bool ok; /* Set false on I/O error. */
};
static const struct casereader_class por_file_casereader_class;
+static struct pfm_reader *
+pfm_reader_cast (const struct any_reader *r_)
+{
+ assert (r_->klass == &por_file_reader_class);
+ return UP_CAST (r_, struct pfm_reader, any_reader);
+}
+
static void
error (struct pfm_reader *r, const char *msg,...)
PRINTF_FORMAT (2, 3)
static void
error (struct pfm_reader *r, const char *msg, ...)
{
- struct msg m;
struct string text;
va_list args;
ds_init_empty (&text);
- ds_put_format (&text, _("portable file %s corrupt at offset 0x%lx: "),
- fh_get_file_name (r->fh), ftell (r->file));
+ ds_put_format (&text, _("portable file %s corrupt at offset 0x%llx: "),
+ fh_get_file_name (r->fh), (long long int) ftello (r->file));
va_start (args, msg);
ds_put_vformat (&text, msg, args);
va_end (args);
- m.category = MSG_GENERAL;
- m.severity = MSG_ERROR;
- m.where.file_name = NULL;
- m.where.line_number = 0;
- m.text = ds_cstr (&text);
-
- msg_emit (&m);
+ struct msg *m = xmalloc (sizeof *m);
+ *m = (struct msg) {
+ .category = MSG_C_GENERAL,
+ .severity = MSG_S_ERROR,
+ .text = ds_steal_cstr (&text),
+ };
+ msg_emit (m);
r->ok = false;
static void
warning (struct pfm_reader *r, const char *msg, ...)
{
- struct msg m;
struct string text;
va_list args;
ds_init_empty (&text);
- ds_put_format (&text, _("reading portable file %s at offset 0x%lx: "),
- fh_get_file_name (r->fh), ftell (r->file));
+ ds_put_format (&text, _("reading portable file %s at offset 0x%llx: "),
+ fh_get_file_name (r->fh), (long long int) ftello (r->file));
va_start (args, msg);
ds_put_vformat (&text, msg, args);
va_end (args);
- m.category = MSG_GENERAL;
- m.severity = MSG_WARNING;
- m.where.file_name = NULL;
- m.where.line_number = 0;
- m.text = ds_cstr (&text);
-
- msg_emit (&m);
+ struct msg *m = xmalloc (sizeof *m);
+ *m = (struct msg) {
+ .category = MSG_C_GENERAL,
+ .severity = MSG_S_WARNING,
+ .text = ds_steal_cstr (&text),
+ };
+ msg_emit (m);
}
/* Close and destroy R.
Returns false if an error was detected on R, true otherwise. */
static bool
-close_reader (struct pfm_reader *r)
+pfm_close (struct any_reader *r_)
{
+ struct pfm_reader *r = pfm_reader_cast (r_);
bool ok;
- if (r == NULL)
- return true;
+ dict_unref (r->dict);
+ any_read_info_destroy (&r->info);
if (r->file)
{
- if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
+ if (fn_close (r->fh, r->file) == EOF)
{
- msg (ME, _("Error closing portable file \"%s\": %s."),
+ msg (ME, _("Error closing portable file `%s': %s."),
fh_get_file_name (r->fh), strerror (errno));
r->ok = false;
}
por_file_casereader_destroy (struct casereader *reader, void *r_)
{
struct pfm_reader *r = r_;
- if (!close_reader (r))
+ if (!pfm_close (&r->any_reader))
casereader_force_error (reader);
}
}
static void read_header (struct pfm_reader *);
-static void read_version_data (struct pfm_reader *, struct pfm_read_info *);
+static void read_version_data (struct pfm_reader *, struct any_read_info *);
static void read_variables (struct pfm_reader *, struct dictionary *);
static void read_value_label (struct pfm_reader *, struct dictionary *);
static void read_documents (struct pfm_reader *, struct dictionary *);
/* Reads the dictionary from file with handle H, and returns it in a
dictionary structure. This dictionary may be modified in order to
rename, reorder, and delete variables, etc. */
-struct casereader *
-pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
- struct pfm_read_info *info)
+static struct any_reader *
+pfm_open (struct file_handle *fh)
{
struct pool *volatile pool = NULL;
struct pfm_reader *volatile r = NULL;
- *dict = dict_create ();
-
/* Create and initialize reader. */
pool = pool_create ();
r = pool_alloc (pool, sizeof *r);
+ r->any_reader.klass = &por_file_reader_class;
+ r->dict = dict_create (get_default_encoding ());
+ memset (&r->info, 0, sizeof r->info);
r->pool = pool;
r->fh = fh_ref (fh);
r->lock = NULL;
r->line_length = 0;
r->weight_index = -1;
r->trans = NULL;
- r->var_cnt = 0;
- r->widths = NULL;
- r->value_cnt = 0;
+ r->n_vars = 0;
+ r->proto = NULL;
r->ok = true;
if (setjmp (r->bail_out))
goto error;
goto error;
/* Open file. */
- r->file = fn_open (fh_get_file_name (r->fh), "rb");
+ r->file = fn_open (r->fh, "rb");
if (r->file == NULL)
{
- msg (ME, _("An error occurred while opening \"%s\" for reading "
+ msg (ME, _("An error occurred while opening `%s' for reading "
"as a portable file: %s."),
fh_get_file_name (r->fh), strerror (errno));
goto error;
/* Read header, version, date info, product id, variables. */
read_header (r);
- read_version_data (r, info);
- read_variables (r, *dict);
+ read_version_data (r, &r->info);
+ read_variables (r, r->dict);
/* Read value labels. */
while (match (r, 'D'))
- read_value_label (r, *dict);
+ read_value_label (r, r->dict);
/* Read documents. */
if (match (r, 'E'))
- read_documents (r, *dict);
+ read_documents (r, r->dict);
/* Check that we've made it to the data. */
if (!match (r, 'F'))
error (r, _("Data record expected."));
- r->value_cnt = dict_get_next_value_idx (*dict);
- return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX,
- &por_file_casereader_class, r);
+ r->proto = caseproto_ref_pool (dict_get_proto (r->dict), r->pool);
+ return &r->any_reader;
error:
- close_reader (r);
- dict_destroy (*dict);
- *dict = NULL;
+ pfm_close (&r->any_reader);
return NULL;
}
+
+static struct casereader *
+pfm_decode (struct any_reader *r_, const char *encoding UNUSED,
+ struct dictionary **dictp, struct any_read_info *info)
+{
+ struct pfm_reader *r = pfm_reader_cast (r_);
+
+ *dictp = r->dict;
+ r->dict = NULL;
+
+ if (info)
+ {
+ *info = r->info;
+ memset (&r->info, 0, sizeof r->info);
+ }
+
+ return casereader_create_sequential (NULL, r->proto, CASENUMBER_MAX,
+ &por_file_casereader_class, r);
+}
\f
/* Returns the value of base-30 digit C,
or -1 if C is not a base-30 digit. */
*buf = '\0';
}
+
+/* Reads a string into BUF, which must have room for 256
+ characters.
+ Returns the number of bytes read.
+*/
+static size_t
+read_bytes (struct pfm_reader *r, uint8_t *buf)
+{
+ int n = read_int (r);
+ if (n < 0 || n > 255)
+ error (r, _("Bad string length %d."), n);
+
+ while (n-- > 0)
+ {
+ *buf++ = r->cc;
+ advance (r);
+ }
+ return n;
+}
+
+
+
/* Reads a string and returns a copy of it allocated from R's
pool. */
static char *
/* Reads the version and date info record, as well as product and
subproduct identification records if present. */
static void
-read_version_data (struct pfm_reader *r, struct pfm_read_info *info)
+read_version_data (struct pfm_reader *r, struct any_read_info *info)
{
static const char empty_string[] = "";
char *date, *time;
- const char *product, *author, *subproduct;
+ const char *product, *subproduct;
int i;
/* Read file. */
date = read_pool_string (r);
time = read_pool_string (r);
product = match (r, '1') ? read_pool_string (r) : empty_string;
- author = match (r, '2') ? read_pool_string (r) : empty_string;
+ if (match (r, '2'))
+ {
+ /* Skip "author" field. */
+ read_pool_string (r);
+ }
subproduct = match (r, '3') ? read_pool_string (r) : empty_string;
/* Validate file. */
/* Save file info. */
if (info != NULL)
{
+ memset (info, 0, sizeof *info);
+
+ info->float_format = FLOAT_NATIVE_DOUBLE;
+ info->integer_format = INTEGER_NATIVE;
+ info->compression = ANY_COMP_NONE;
+ info->n_cases = -1;
+
/* Date. */
+ info->creation_date = xmalloc (11);
for (i = 0; i < 8; i++)
{
static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1};
info->creation_date[map[i]] = date[i];
}
info->creation_date[2] = info->creation_date[5] = ' ';
- info->creation_date[10] = 0;
+ info->creation_date[10] = '\0';
/* Time. */
+ info->creation_time = xmalloc (9);
for (i = 0; i < 6; i++)
{
static const int map[] = {0, 1, 3, 4, 6, 7};
info->creation_time[8] = 0;
/* Product. */
- str_copy_trunc (info->product, sizeof info->product, product);
- str_copy_trunc (info->subproduct, sizeof info->subproduct, subproduct);
+ info->product = xstrdup (product);
+ info->product_ext = xstrdup (subproduct);
}
}
return fmt_default_for_width (var_get_width (v));
}
-static union value parse_value (struct pfm_reader *, struct variable *);
+static void parse_value (struct pfm_reader *, int width, union value *);
/* Read information on all the variables. */
static void
if (!match (r, '4'))
error (r, _("Expected variable count record."));
- r->var_cnt = read_int (r);
- if (r->var_cnt <= 0)
- error (r, _("Invalid number of variables %d."), r->var_cnt);
- r->widths = pool_nalloc (r->pool, r->var_cnt, sizeof *r->widths);
+ r->n_vars = read_int (r);
+ if (r->n_vars <= 0)
+ error (r, _("Invalid number of variables %d."), r->n_vars);
- /* Purpose of this value is unknown. It is typically 161. */
- read_int (r);
+ if (match (r, '5'))
+ read_int (r);
if (match (r, '6'))
{
error (r, _("Weight variable name (%s) truncated."), weight_name);
}
- for (i = 0; i < r->var_cnt; i++)
+ for (i = 0; i < r->n_vars; i++)
{
int width;
char name[256];
width = read_int (r);
if (width < 0)
error (r, _("Invalid variable width %d."), width);
- r->widths[i] = width;
read_string (r, name);
for (j = 0; j < 6; j++)
fmt[j] = read_int (r);
- if (!var_is_valid_name (name, false) || *name == '#' || *name == '$')
+ if (!dict_id_is_valid (dict, name, false)
+ || *name == '#' || *name == '$')
error (r, _("Invalid variable name `%s' in position %d."), name, i);
str_uppercase (name);
v = dict_create_var (dict, name, width);
if (v == NULL)
{
- int i;
- for (i = 1; i < 100000; i++)
+ unsigned long int i;
+ for (i = 1; ; i++)
{
- char try_name[VAR_NAME_LEN + 1];
- sprintf (try_name, "%.*s_%d", VAR_NAME_LEN - 6, name, i);
+ char *try_name = xasprintf ("%s_%lu", name, i);
v = dict_create_var (dict, try_name, width);
+ free (try_name);
if (v != NULL)
break;
}
- if (v == NULL)
- error (r, _("Duplicate variable name %s in position %d."), name, i);
warning (r, _("Duplicate variable name %s in position %d renamed "
"to %s."), name, i, var_get_name (v));
}
var_set_write_format (v, &write);
/* Range missing values. */
- mv_init (&miss, var_get_width (v));
+ mv_init (&miss, width);
if (match (r, 'B'))
{
double x = read_float (r);
/* Single missing values. */
while (match (r, '8'))
{
- union value value = parse_value (r, v);
+ int mv_width = MIN (width, 8);
+ union value value;
+
+ parse_value (r, mv_width, &value);
+ value_resize (&value, mv_width, width);
mv_add_value (&miss, &value);
+ value_destroy (&value, width);
}
var_set_missing_values (v, &miss);
+ mv_destroy (&miss);
if (match (r, 'C'))
{
char label[256];
read_string (r, label);
- var_set_label (v, label);
+ var_set_label (v, label); /* XXX */
}
}
}
}
-/* Parse a value for variable VV into value V. */
-static union value
-parse_value (struct pfm_reader *r, struct variable *vv)
+/* Parse a value of with WIDTH into value V. */
+static void
+parse_value (struct pfm_reader *r, int width, union value *v)
{
- union value v;
-
- if (var_is_alpha (vv))
+ value_init (v, width);
+ if (width > 0)
{
- char string[256];
- read_string (r, string);
- buf_copy_str_rpad (v.s, 8, string);
+ uint8_t buf[256];
+ size_t n_bytes = read_bytes (r, buf);
+ value_copy_buf_rpad (v, width, buf, n_bytes, ' ');
}
else
- v.f = read_float (r);
-
- return v;
+ v->f = read_float (r);
}
/* Parse a value label record and return success. */
char label[256];
int j;
- val = parse_value (r, v[0]);
+ parse_value (r, var_get_width (v[0]), &val);
read_string (r, label);
/* Assign the value label to each variable. */
for (j = 0; j < nv; j++)
- {
- struct variable *var = v[j];
+ var_replace_value_label (v[j], &val, label);
- if (!var_is_long_string (var))
- var_replace_value_label (var, &val, label);
- }
+ value_destroy (&val, var_get_width (v[0]));
}
}
static void
read_documents (struct pfm_reader *r, struct dictionary *dict)
{
- int line_cnt;
- int i;
-
- line_cnt = read_int (r);
- for (i = 0; i < line_cnt; i++)
+ int n_lines = read_int (r);
+ for (int i = 0; i < n_lines; i++)
{
char line[256];
read_string (r, line);
- dict_add_document_line (dict, line);
+ dict_add_document_line (dict, line, false);
}
}
-/* Reads one case from portable file R into C. */
-static bool
-por_file_casereader_read (struct casereader *reader, void *r_, struct ccase *c)
+/* Reads and returns one case from portable file R. Returns a
+ null pointer on failure. */
+static struct ccase *
+por_file_casereader_read (struct casereader *reader, void *r_)
{
struct pfm_reader *r = r_;
+ struct ccase *volatile c;
size_t i;
- size_t idx;
- case_create (c, casereader_get_value_cnt (reader));
+ c = case_create (r->proto);
setjmp (r->bail_out);
if (!r->ok)
{
casereader_force_error (reader);
- case_destroy (c);
- return false;
+ case_unref (c);
+ return NULL;
}
/* Check for end of file. */
if (r->cc == 'Z')
{
- case_destroy (c);
- return false;
+ case_unref (c);
+ return NULL;
}
- idx = 0;
- for (i = 0; i < r->var_cnt; i++)
+ for (i = 0; i < r->n_vars; i++)
{
- int width = r->widths[i];
+ int width = caseproto_get_width (r->proto, i);
if (width == 0)
- {
- case_data_rw_idx (c, idx)->f = read_float (r);
- idx++;
- }
+ *case_num_rw_idx (c, i) = read_float (r);
else
{
- char string[256];
- read_string (r, string);
- buf_copy_str_rpad (case_data_rw_idx (c, idx)->s, width, string);
- idx += DIV_RND_UP (width, MAX_SHORT_STRING);
+ uint8_t buf[256];
+ size_t n_bytes = read_bytes (r, buf);
+ u8_buf_copy_rpad (case_str_rw_idx (c, i), width, buf, n_bytes, ' ');
}
}
- return true;
+ return c;
}
-/* Returns true if FILE is an SPSS portable file,
- false otherwise. */
-bool
+/* Detects whether FILE is an SPSS portable file. Returns 1 if so, 0 if not,
+ and a negative errno value if there is an error reading FILE. */
+static int
pfm_detect (FILE *file)
{
unsigned char header[464];
char trans[256];
- int cooked_cnt, raw_cnt;
+ int n_cooked, n_raws, line_len;
int i;
- cooked_cnt = raw_cnt = 0;
- while (cooked_cnt < sizeof header)
+ n_cooked = n_raws = 0;
+ line_len = 0;
+ while (n_cooked < sizeof header)
{
int c = getc (file);
- if (c == EOF || raw_cnt++ > 512)
- return false;
- else if (c != '\n' && c != '\r')
- header[cooked_cnt++] = c;
+ if (c == EOF || n_raws++ > 512)
+ return ferror (file) ? -errno : 0;
+ else if (c == '\n')
+ {
+ while (line_len < 80 && n_cooked < sizeof header)
+ {
+ header[n_cooked++] = ' ';
+ line_len++;
+ }
+ line_len = 0;
+ }
+ else if (c != '\r')
+ {
+ header[n_cooked++] = c;
+ line_len++;
+ }
}
memset (trans, 0, 256);
for (i = 0; i < 8; i++)
if (trans[header[i + 456]] != "SPSSPORT"[i])
- return false;
+ return 0;
- return true;
+ return 1;
}
static const struct casereader_class por_file_casereader_class =
NULL,
NULL,
};
+
+const struct any_reader_class por_file_reader_class =
+ {
+ N_("SPSS Portable File"),
+ pfm_detect,
+ pfm_open,
+ pfm_close,
+ pfm_decode,
+ NULL, /* get_strings */
+ };