-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
- Code for parsing floating-point numbers adapted from GNU C
- library.
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include "por-file-reader.h"
#include <data/casereader.h>
#include <data/dictionary.h>
#include <data/file-handle-def.h>
+#include <data/file-name.h>
#include <data/format.h>
#include <data/missing-values.h>
+#include <data/short-names.h>
#include <data/value-labels.h>
#include <data/variable.h>
-#include <libpspp/alloc.h>
#include <libpspp/compiler.h>
#include <libpspp/hash.h>
-#include <libpspp/magic.h>
#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/pool.h>
#include <libpspp/str.h>
+#include "xalloc.h"
+
#include "gettext.h"
#define _(msgid) gettext (msgid)
+#define N_(msgid) (msgid)
/* portable_to_local[PORTABLE] translates the given portable
character into the local character set. */
jmp_buf bail_out; /* longjmp() target for error handling. */
struct file_handle *fh; /* File handle. */
+ struct fh_lock *lock; /* Read lock for file. */
FILE *file; /* File stream. */
+ int line_length; /* Number of characters so far on this line. */
char cc; /* Current character. */
char *trans; /* 256-byte character set translation table. */
int var_cnt; /* Number of variables. */
bool ok; /* Set false on I/O error. */
};
-static struct casereader_class por_file_casereader_class;
+static const struct casereader_class por_file_casereader_class;
static void
error (struct pfm_reader *r, const char *msg,...)
va_list args;
ds_init_empty (&text);
- ds_put_format (&text, _("portable file %s corrupt at offset %ld: "),
+ ds_put_format (&text, _("portable file %s corrupt at offset 0x%lx: "),
fh_get_file_name (r->fh), ftell (r->file));
va_start (args, msg);
ds_put_vformat (&text, msg, args);
longjmp (r->bail_out, 1);
}
+/* Displays MSG as an warning for the current position in
+ portable file reader R. */
+static void
+warning (struct pfm_reader *r, const char *msg, ...)
+{
+ struct msg m;
+ struct string text;
+ va_list args;
+
+ ds_init_empty (&text);
+ ds_put_format (&text, _("reading portable file %s at offset 0x%lx: "),
+ fh_get_file_name (r->fh), ftell (r->file));
+ va_start (args, msg);
+ ds_put_vformat (&text, msg, args);
+ va_end (args);
+
+ m.category = MSG_GENERAL;
+ m.severity = MSG_WARNING;
+ m.where.file_name = NULL;
+ m.where.line_number = 0;
+ m.text = ds_cstr (&text);
+
+ msg_emit (&m);
+}
+
+/* Close and destroy R.
+ Returns false if an error was detected on R, true otherwise. */
+static bool
+close_reader (struct pfm_reader *r)
+{
+ bool ok;
+ if (r == NULL)
+ return true;
+
+ if (r->file)
+ {
+ if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
+ {
+ msg (ME, _("Error closing portable file \"%s\": %s."),
+ fh_get_file_name (r->fh), strerror (errno));
+ r->ok = false;
+ }
+ r->file = NULL;
+ }
+
+ fh_unlock (r->lock);
+ fh_unref (r->fh);
+
+ ok = r->ok;
+ pool_destroy (r->pool);
+
+ return ok;
+}
+
/* Closes portable file reader R, after we're done with it. */
static void
-por_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
+por_file_casereader_destroy (struct casereader *reader, void *r_)
{
struct pfm_reader *r = r_;
- pool_destroy (r->pool);
+ if (!close_reader (r))
+ casereader_force_error (reader);
}
/* Read a single character into cur_char. */
{
int c;
- while ((c = getc (r->file)) == '\r' || c == '\n')
- continue;
+ /* Read the next character from the file.
+ Ignore carriage returns entirely.
+ Mostly ignore new-lines, but if a new-line occurs before the
+ line has reached 80 bytes in length, then treat the
+ "missing" bytes as spaces. */
+ for (;;)
+ {
+ while ((c = getc (r->file)) == '\r')
+ continue;
+ if (c != '\n')
+ break;
+
+ if (r->line_length < 80)
+ {
+ c = ' ';
+ ungetc ('\n', r->file);
+ break;
+ }
+ r->line_length = 0;
+ }
if (c == EOF)
error (r, _("unexpected end of file"));
if (r->trans != NULL)
c = r->trans[c];
r->cc = c;
+ r->line_length++;
}
/* Skip a single character if present, and return whether it was
static void read_version_data (struct pfm_reader *, struct pfm_read_info *);
static void read_variables (struct pfm_reader *, struct dictionary *);
static void read_value_label (struct pfm_reader *, struct dictionary *);
-void dump_dictionary (struct dictionary *);
+static void read_documents (struct pfm_reader *, struct dictionary *);
/* Reads the dictionary from file with handle H, and returns it in a
dictionary structure. This dictionary may be modified in order to
struct pfm_reader *volatile r = NULL;
*dict = dict_create ();
- if (!fh_open (fh, FH_REF_FILE, "portable file", "rs"))
- goto error;
/* Create and initialize reader. */
pool = pool_create ();
r = pool_alloc (pool, sizeof *r);
r->pool = pool;
- if (setjmp (r->bail_out))
- goto error;
- r->fh = fh;
- r->file = pool_fopen (r->pool, fh_get_file_name (r->fh), "rb");
+ r->fh = fh_ref (fh);
+ r->lock = NULL;
+ r->file = NULL;
+ r->line_length = 0;
r->weight_index = -1;
r->trans = NULL;
r->var_cnt = 0;
r->widths = NULL;
r->value_cnt = 0;
r->ok = true;
+ if (setjmp (r->bail_out))
+ goto error;
- /* Check that file open succeeded, prime reading. */
+ /* Lock file. */
+ /* TRANSLATORS: this fragment will be interpolated into
+ messages in fh_lock() that identify types of files. */
+ r->lock = fh_lock (fh, FH_REF_FILE, N_("portable file"), FH_ACC_READ, false);
+ if (r->lock == NULL)
+ goto error;
+
+ /* Open file. */
+ r->file = fn_open (fh_get_file_name (r->fh), "rb");
if (r->file == NULL)
{
msg (ME, _("An error occurred while opening \"%s\" for reading "
while (match (r, 'D'))
read_value_label (r, *dict);
+ /* Read documents. */
+ if (match (r, 'E'))
+ read_documents (r, *dict);
+
/* Check that we've made it to the data. */
if (!match (r, 'F'))
error (r, _("Data record expected."));
&por_file_casereader_class, r);
error:
- pool_destroy (r->pool);
+ close_reader (r);
dict_destroy (*dict);
*dict = NULL;
return NULL;
static void
read_version_data (struct pfm_reader *r, struct pfm_read_info *info)
{
- static char empty_string[] = "";
- char *date, *time, *product, *author, *subproduct;
+ static const char empty_string[] = "";
+ char *date, *time;
+ const char *product, *author, *subproduct;
int i;
/* Read file. */
/* Validate file. */
if (strlen (date) != 8)
- error (r, _("Bad date string length %d."), (int) strlen (date));
+ error (r, _("Bad date string length %zu."), strlen (date));
if (strlen (time) != 6)
- error (r, _("Bad time string length %d."), (int) strlen (time));
+ error (r, _("Bad time string length %zu."), strlen (time));
/* Save file info. */
if (info != NULL)
checking that the format is appropriate for variable V. */
static struct fmt_spec
convert_format (struct pfm_reader *r, const int portable_format[3],
- struct variable *v)
+ struct variable *v, bool *report_error)
{
struct fmt_spec format;
bool ok;
if (!fmt_from_io (portable_format[0], &format.type))
- error (r, _("%s: Bad format specifier byte (%d)."),
- var_get_name (v), portable_format[0]);
+ {
+ if (*report_error)
+ warning (r, _("%s: Bad format specifier byte (%d). Variable "
+ "will be assigned a default format."),
+ var_get_name (v), portable_format[0]);
+ goto assign_default;
+ }
+
format.w = portable_format[1];
format.d = portable_format[2];
if (!ok)
{
- char fmt_string[FMT_STRING_LEN_MAX + 1];
- error (r, _("%s variable %s has invalid format specifier %s."),
- var_is_numeric (v) ? _("Numeric") : _("String"),
- var_get_name (v), fmt_to_string (&format, fmt_string));
- format = fmt_default_for_width (var_get_width (v));
+ if (*report_error)
+ {
+ char fmt_string[FMT_STRING_LEN_MAX + 1];
+ fmt_to_string (&format, fmt_string);
+ if (var_is_numeric (v))
+ warning (r, _("Numeric variable %s has invalid format "
+ "specifier %s."),
+ var_get_name (v), fmt_string);
+ else
+ warning (r, _("String variable %s with width %d has "
+ "invalid format specifier %s."),
+ var_get_name (v), var_get_width (v), fmt_string);
+ }
+ goto assign_default;
}
return format;
+
+assign_default:
+ *report_error = false;
+ return fmt_default_for_width (var_get_width (v));
}
static union value parse_value (struct pfm_reader *, struct variable *);
error (r, _("Expected variable count record."));
r->var_cnt = read_int (r);
- if (r->var_cnt <= 0 || r->var_cnt == NOT_INT)
+ if (r->var_cnt <= 0)
error (r, _("Invalid number of variables %d."), r->var_cnt);
r->widths = pool_nalloc (r->pool, r->var_cnt, sizeof *r->widths);
struct variable *v;
struct missing_values miss;
struct fmt_spec print, write;
+ bool report_error = true;
int j;
if (!match (r, '7'))
fmt[j] = read_int (r);
if (!var_is_valid_name (name, false) || *name == '#' || *name == '$')
- error (r, _("position %d: Invalid variable name `%s'."), i, name);
+ error (r, _("Invalid variable name `%s' in position %d."), name, i);
str_uppercase (name);
if (width < 0 || width > 255)
v = dict_create_var (dict, name, width);
if (v == NULL)
- error (r, _("Duplicate variable name %s."), name);
+ {
+ int i;
+ for (i = 1; i < 100000; i++)
+ {
+ char try_name[VAR_NAME_LEN + 1];
+ sprintf (try_name, "%.*s_%d", VAR_NAME_LEN - 6, name, i);
+ v = dict_create_var (dict, try_name, width);
+ if (v != NULL)
+ break;
+ }
+ if (v == NULL)
+ error (r, _("Duplicate variable name %s in position %d."), name, i);
+ warning (r, _("Duplicate variable name %s in position %d renamed "
+ "to %s."), name, i, var_get_name (v));
+ }
- print = convert_format (r, &fmt[0], v);
- write = convert_format (r, &fmt[3], v);
+ print = convert_format (r, &fmt[0], v, &report_error);
+ write = convert_format (r, &fmt[3], v, &report_error);
var_set_print_format (v, &print);
var_set_write_format (v, &write);
{
double x = read_float (r);
double y = read_float (r);
- mv_add_num_range (&miss, x, y);
+ mv_add_range (&miss, x, y);
}
else if (match (r, 'A'))
- mv_add_num_range (&miss, read_float (r), HIGHEST);
+ mv_add_range (&miss, read_float (r), HIGHEST);
else if (match (r, '9'))
- mv_add_num_range (&miss, LOWEST, read_float (r));
+ mv_add_range (&miss, LOWEST, read_float (r));
/* Single missing values. */
while (match (r, '8'))
if (v[i] == NULL)
error (r, _("Unknown variable %s while parsing value labels."), name);
- if (var_get_width (v[0]) != var_get_width (v[i]))
+ if (var_get_type (v[0]) != var_get_type (v[i]))
error (r, _("Cannot assign value labels to %s and %s, which "
- "have different variable types or widths."),
+ "have different variable types."),
var_get_name (v[0]), var_get_name (v[i]));
}
val = parse_value (r, v[0]);
read_string (r, label);
- /* Assign the value_label's to each variable. */
+ /* Assign the value label to each variable. */
for (j = 0; j < nv; j++)
{
struct variable *var = v[j];
- if (!var_add_value_label (var, &val, label))
- continue;
-
- if (var_is_numeric (var))
- error (r, _("Duplicate label for value %g for variable %s."),
- val.f, var_get_name (var));
- else
- error (r, _("Duplicate label for value `%.*s' for variable %s."),
- var_get_width (var), val.s, var_get_name (var));
+ if (!var_is_long_string (var))
+ var_replace_value_label (var, &val, label);
}
}
}
-/* Reads one case from portable file R into C. */
-static bool
-por_file_casereader_read (struct casereader *reader, void *r_, struct ccase *c)
+/* Reads a set of documents from portable file R into DICT. */
+static void
+read_documents (struct pfm_reader *r, struct dictionary *dict)
+{
+ int line_cnt;
+ int i;
+
+ line_cnt = read_int (r);
+ for (i = 0; i < line_cnt; i++)
+ {
+ char line[256];
+ read_string (r, line);
+ dict_add_document_line (dict, line);
+ }
+}
+
+/* Reads and returns one case from portable file R. Returns a
+ null pointer on failure. */
+static struct ccase *
+por_file_casereader_read (struct casereader *reader, void *r_)
{
struct pfm_reader *r = r_;
+ struct ccase *volatile c;
size_t i;
size_t idx;
- case_create (c, casereader_get_value_cnt (reader));
+ c = case_create (casereader_get_value_cnt (reader));
setjmp (r->bail_out);
if (!r->ok)
{
casereader_force_error (reader);
- case_destroy (c);
- return false;
+ case_unref (c);
+ return NULL;
}
/* Check for end of file. */
if (r->cc == 'Z')
{
- case_destroy (c);
- return false;
+ case_unref (c);
+ return NULL;
}
idx = 0;
}
}
- return true;
+ return c;
}
/* Returns true if FILE is an SPSS portable file,
return true;
}
-static struct casereader_class por_file_casereader_class =
+static const struct casereader_class por_file_casereader_class =
{
por_file_casereader_read,
por_file_casereader_destroy,