/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
-#include "por-file-reader.h"
+
+#include "data/por-file-reader.h"
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
-#include <data/casereader-provider.h>
-#include <data/casereader.h>
-#include <data/dictionary.h>
-#include <data/file-handle-def.h>
-#include <data/file-name.h>
-#include <data/format.h>
-#include <data/missing-values.h>
-#include <data/value-labels.h>
-#include <data/variable.h>
-#include <libpspp/compiler.h>
-#include <libpspp/hash.h>
-#include <libpspp/message.h>
-#include <libpspp/misc.h>
-#include <libpspp/pool.h>
-#include <libpspp/str.h>
-
-#include "xalloc.h"
+#include "data/casereader-provider.h"
+#include "data/casereader.h"
+#include "data/dictionary.h"
+#include "data/file-handle-def.h"
+#include "data/file-name.h"
+#include "data/format.h"
+#include "data/missing-values.h"
+#include "data/short-names.h"
+#include "data/value-labels.h"
+#include "data/variable.h"
+#include "libpspp/compiler.h"
+#include "libpspp/i18n.h"
+#include "libpspp/message.h"
+#include "libpspp/misc.h"
+#include "libpspp/pool.h"
+#include "libpspp/str.h"
+
+#include "gl/intprops.h"
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
+#define N_(msgid) (msgid)
/* portable_to_local[PORTABLE] translates the given portable
character into the local character set. */
jmp_buf bail_out; /* longjmp() target for error handling. */
struct file_handle *fh; /* File handle. */
+ struct fh_lock *lock; /* Read lock for file. */
FILE *file; /* File stream. */
int line_length; /* Number of characters so far on this line. */
char cc; /* Current character. */
char *trans; /* 256-byte character set translation table. */
int var_cnt; /* Number of variables. */
int weight_index; /* 0-based index of weight variable, or -1. */
- int *widths; /* Variable widths, 0 for numeric. */
- size_t value_cnt; /* Number of `value's per case. */
+ struct caseproto *proto; /* Format of output cases. */
bool ok; /* Set false on I/O error. */
};
-static struct casereader_class por_file_casereader_class;
+static const struct casereader_class por_file_casereader_class;
static void
error (struct pfm_reader *r, const char *msg,...)
va_list args;
ds_init_empty (&text);
- ds_put_format (&text, _("portable file %s corrupt at offset 0x%lx: "),
- fh_get_file_name (r->fh), ftell (r->file));
+ ds_put_format (&text, _("portable file %s corrupt at offset 0x%llx: "),
+ fh_get_file_name (r->fh), (long long int) ftello (r->file));
va_start (args, msg);
ds_put_vformat (&text, msg, args);
va_end (args);
- m.category = MSG_GENERAL;
- m.severity = MSG_ERROR;
- m.where.file_name = NULL;
- m.where.line_number = 0;
+ m.category = MSG_C_GENERAL;
+ m.severity = MSG_S_ERROR;
+ m.file_name = NULL;
+ m.first_line = 0;
+ m.last_line = 0;
+ m.first_column = 0;
+ m.last_column = 0;
m.text = ds_cstr (&text);
msg_emit (&m);
va_list args;
ds_init_empty (&text);
- ds_put_format (&text, _("reading portable file %s at offset 0x%lx: "),
- fh_get_file_name (r->fh), ftell (r->file));
+ ds_put_format (&text, _("reading portable file %s at offset 0x%llx: "),
+ fh_get_file_name (r->fh), (long long int) ftello (r->file));
va_start (args, msg);
ds_put_vformat (&text, msg, args);
va_end (args);
- m.category = MSG_GENERAL;
- m.severity = MSG_WARNING;
- m.where.file_name = NULL;
- m.where.line_number = 0;
+ m.category = MSG_C_GENERAL;
+ m.severity = MSG_S_WARNING;
+ m.file_name = NULL;
+ m.first_line = 0;
+ m.last_line = 0;
+ m.first_column = 0;
+ m.last_column = 0;
m.text = ds_cstr (&text);
msg_emit (&m);
{
if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
{
- msg (ME, _("Error closing portable file \"%s\": %s."),
+ msg (ME, _("Error closing portable file `%s': %s."),
fh_get_file_name (r->fh), strerror (errno));
r->ok = false;
}
r->file = NULL;
}
- if (r->fh != NULL)
- fh_close (r->fh, "portable file", "rs");
+ fh_unlock (r->lock);
+ fh_unref (r->fh);
ok = r->ok;
pool_destroy (r->pool);
struct pool *volatile pool = NULL;
struct pfm_reader *volatile r = NULL;
- *dict = dict_create ();
- if (!fh_open (fh, FH_REF_FILE, "portable file", "rs"))
- goto error;
+ *dict = dict_create (get_default_encoding ());
/* Create and initialize reader. */
pool = pool_create ();
r = pool_alloc (pool, sizeof *r);
r->pool = pool;
- r->fh = fh;
- r->file = fn_open (fh_get_file_name (r->fh), "rb");
+ r->fh = fh_ref (fh);
+ r->lock = NULL;
+ r->file = NULL;
r->line_length = 0;
r->weight_index = -1;
r->trans = NULL;
r->var_cnt = 0;
- r->widths = NULL;
- r->value_cnt = 0;
+ r->proto = NULL;
r->ok = true;
-
if (setjmp (r->bail_out))
goto error;
- /* Check that file open succeeded. */
+ /* Lock file. */
+ /* TRANSLATORS: this fragment will be interpolated into
+ messages in fh_lock() that identify types of files. */
+ r->lock = fh_lock (fh, FH_REF_FILE, N_("portable file"), FH_ACC_READ, false);
+ if (r->lock == NULL)
+ goto error;
+
+ /* Open file. */
+ r->file = fn_open (fh_get_file_name (r->fh), "rb");
if (r->file == NULL)
{
- msg (ME, _("An error occurred while opening \"%s\" for reading "
+ msg (ME, _("An error occurred while opening `%s' for reading "
"as a portable file: %s."),
fh_get_file_name (r->fh), strerror (errno));
goto error;
if (!match (r, 'F'))
error (r, _("Data record expected."));
- r->value_cnt = dict_get_next_value_idx (*dict);
- return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX,
+ r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool);
+ return casereader_create_sequential (NULL, r->proto, CASENUMBER_MAX,
&por_file_casereader_class, r);
error:
*buf = '\0';
}
+
+/* Reads a string into BUF, which must have room for 256
+ characters.
+ Returns the number of bytes read.
+*/
+static size_t
+read_bytes (struct pfm_reader *r, uint8_t *buf)
+{
+ int n = read_int (r);
+ if (n < 0 || n > 255)
+ error (r, _("Bad string length %d."), n);
+
+ while (n-- > 0)
+ {
+ *buf++ = r->cc;
+ advance (r);
+ }
+ return n;
+}
+
+
+
/* Reads a string and returns a copy of it allocated from R's
pool. */
static char *
static void
read_version_data (struct pfm_reader *r, struct pfm_read_info *info)
{
- static char empty_string[] = "";
- char *date, *time, *product, *author, *subproduct;
+ static const char empty_string[] = "";
+ char *date, *time;
+ const char *product, *subproduct;
int i;
/* Read file. */
date = read_pool_string (r);
time = read_pool_string (r);
product = match (r, '1') ? read_pool_string (r) : empty_string;
- author = match (r, '2') ? read_pool_string (r) : empty_string;
+ if (match (r, '2'))
+ {
+ /* Skip "author" field. */
+ read_pool_string (r);
+ }
subproduct = match (r, '3') ? read_pool_string (r) : empty_string;
/* Validate file. */
if (strlen (date) != 8)
- error (r, _("Bad date string length %d."), (int) strlen (date));
+ error (r, _("Bad date string length %zu."), strlen (date));
if (strlen (time) != 6)
- error (r, _("Bad time string length %d."), (int) strlen (time));
+ error (r, _("Bad time string length %zu."), strlen (time));
/* Save file info. */
if (info != NULL)
return fmt_default_for_width (var_get_width (v));
}
-static union value parse_value (struct pfm_reader *, struct variable *);
+static void parse_value (struct pfm_reader *, int width, union value *);
/* Read information on all the variables. */
static void
r->var_cnt = read_int (r);
if (r->var_cnt <= 0)
error (r, _("Invalid number of variables %d."), r->var_cnt);
- r->widths = pool_nalloc (r->pool, r->var_cnt, sizeof *r->widths);
- /* Purpose of this value is unknown. It is typically 161. */
- read_int (r);
+ if (match (r, '5'))
+ read_int (r);
if (match (r, '6'))
{
width = read_int (r);
if (width < 0)
error (r, _("Invalid variable width %d."), width);
- r->widths[i] = width;
read_string (r, name);
for (j = 0; j < 6; j++)
fmt[j] = read_int (r);
- if (!var_is_valid_name (name, false) || *name == '#' || *name == '$')
+ if (!dict_id_is_valid (dict, name, false)
+ || *name == '#' || *name == '$')
error (r, _("Invalid variable name `%s' in position %d."), name, i);
str_uppercase (name);
v = dict_create_var (dict, name, width);
if (v == NULL)
{
- int i;
- for (i = 1; i < 100000; i++)
+ unsigned long int i;
+ for (i = 1; ; i++)
{
- char try_name[LONG_NAME_LEN + 1];
- sprintf (try_name, "%.*s_%d", LONG_NAME_LEN - 6, name, i);
+ char try_name[8 + 1 + INT_STRLEN_BOUND (i) + 1];
+ sprintf (try_name, "%s_%lu", name, i);
v = dict_create_var (dict, try_name, width);
if (v != NULL)
break;
}
- if (v == NULL)
- error (r, _("Duplicate variable name %s in position %d."), name, i);
warning (r, _("Duplicate variable name %s in position %d renamed "
"to %s."), name, i, var_get_name (v));
}
var_set_write_format (v, &write);
/* Range missing values. */
- mv_init (&miss, var_get_width (v));
+ mv_init (&miss, width);
if (match (r, 'B'))
{
double x = read_float (r);
double y = read_float (r);
- mv_add_num_range (&miss, x, y);
+ mv_add_range (&miss, x, y);
}
else if (match (r, 'A'))
- mv_add_num_range (&miss, read_float (r), HIGHEST);
+ mv_add_range (&miss, read_float (r), HIGHEST);
else if (match (r, '9'))
- mv_add_num_range (&miss, LOWEST, read_float (r));
+ mv_add_range (&miss, LOWEST, read_float (r));
/* Single missing values. */
while (match (r, '8'))
{
- union value value = parse_value (r, v);
+ int mv_width = MIN (width, 8);
+ union value value;
+
+ parse_value (r, mv_width, &value);
+ value_resize (&value, mv_width, width);
mv_add_value (&miss, &value);
+ value_destroy (&value, width);
}
var_set_missing_values (v, &miss);
+ mv_destroy (&miss);
if (match (r, 'C'))
{
char label[256];
read_string (r, label);
- var_set_label (v, label);
+ var_set_label (v, label); /* XXX */
}
}
}
}
-/* Parse a value for variable VV into value V. */
-static union value
-parse_value (struct pfm_reader *r, struct variable *vv)
+/* Parse a value of with WIDTH into value V. */
+static void
+parse_value (struct pfm_reader *r, int width, union value *v)
{
- union value v;
-
- if (var_is_alpha (vv))
+ value_init (v, width);
+ if (width > 0)
{
- char string[256];
- read_string (r, string);
- buf_copy_str_rpad (v.s, 8, string);
+ uint8_t buf[256];
+ size_t n_bytes = read_bytes (r, buf);
+ value_copy_buf_rpad (v, width, buf, n_bytes, ' ');
}
else
- v.f = read_float (r);
-
- return v;
+ v->f = read_float (r);
}
/* Parse a value label record and return success. */
char label[256];
int j;
- val = parse_value (r, v[0]);
+ parse_value (r, var_get_width (v[0]), &val);
read_string (r, label);
/* Assign the value label to each variable. */
for (j = 0; j < nv; j++)
- {
- struct variable *var = v[j];
+ var_replace_value_label (v[j], &val, label);
- if (!var_is_long_string (var))
- var_replace_value_label (var, &val, label);
- }
+ value_destroy (&val, var_get_width (v[0]));
}
}
{
char line[256];
read_string (r, line);
- dict_add_document_line (dict, line);
+ dict_add_document_line (dict, line, false);
}
}
-/* Reads one case from portable file R into C. */
-static bool
-por_file_casereader_read (struct casereader *reader, void *r_, struct ccase *c)
+/* Reads and returns one case from portable file R. Returns a
+ null pointer on failure. */
+static struct ccase *
+por_file_casereader_read (struct casereader *reader, void *r_)
{
struct pfm_reader *r = r_;
+ struct ccase *volatile c;
size_t i;
- size_t idx;
- case_create (c, casereader_get_value_cnt (reader));
+ c = case_create (r->proto);
setjmp (r->bail_out);
if (!r->ok)
{
casereader_force_error (reader);
- case_destroy (c);
- return false;
+ case_unref (c);
+ return NULL;
}
/* Check for end of file. */
if (r->cc == 'Z')
{
- case_destroy (c);
- return false;
+ case_unref (c);
+ return NULL;
}
- idx = 0;
for (i = 0; i < r->var_cnt; i++)
{
- int width = r->widths[i];
+ int width = caseproto_get_width (r->proto, i);
if (width == 0)
- {
- case_data_rw_idx (c, idx)->f = read_float (r);
- idx++;
- }
+ case_data_rw_idx (c, i)->f = read_float (r);
else
{
- char string[256];
- read_string (r, string);
- buf_copy_str_rpad (case_data_rw_idx (c, idx)->s, width, string);
- idx += DIV_RND_UP (width, MAX_SHORT_STRING);
+ uint8_t buf[256];
+ size_t n_bytes = read_bytes (r, buf);
+ u8_buf_copy_rpad (case_str_rw_idx (c, i), width, buf, n_bytes, ' ');
}
}
- return true;
+ return c;
}
/* Returns true if FILE is an SPSS portable file,
{
unsigned char header[464];
char trans[256];
- int cooked_cnt, raw_cnt;
+ int cooked_cnt, raw_cnt, line_len;
int i;
cooked_cnt = raw_cnt = 0;
+ line_len = 0;
while (cooked_cnt < sizeof header)
{
int c = getc (file);
if (c == EOF || raw_cnt++ > 512)
return false;
- else if (c != '\n' && c != '\r')
- header[cooked_cnt++] = c;
+ else if (c == '\n')
+ {
+ while (line_len < 80 && cooked_cnt < sizeof header)
+ {
+ header[cooked_cnt++] = ' ';
+ line_len++;
+ }
+ line_len = 0;
+ }
+ else if (c != '\r')
+ {
+ header[cooked_cnt++] = c;
+ line_len++;
+ }
}
memset (trans, 0, 256);
return true;
}
-static struct casereader_class por_file_casereader_class =
+static const struct casereader_class por_file_casereader_class =
{
por_file_casereader_read,
por_file_casereader_destroy,