X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fpor-file-reader.c;h=4636d5fb806cf82b81124f0515c96a9f89feb965;hb=9f087e7aa4cdff1d5d46d5e188c0017a9d2d0029;hp=a9b1de8193f18e34b6b30f3aee0077e8ce85e45d;hpb=847f28dc2b47bda50561ff1547af42053a56eb78;p=pspp-builds.git
diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c
index a9b1de81..4636d5fb 100644
--- a/src/data/por-file-reader.c
+++ b/src/data/por-file-reader.c
@@ -1,48 +1,48 @@
-/* PSPP - computes sample statistics.
+/* PSPP - a program for statistical analysis.
Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
- Code for parsing floating-point numbers adapted from GNU C
- library.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
+ along with this program. If not, see . */
#include
#include "por-file-reader.h"
-#include
-#include
-#include
-#include
+
#include
#include
#include
#include
-#include
+#include
#include
-#include "case.h"
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
#include
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "format.h"
-#include "missing-values.h"
#include
-#include
+#include
#include
#include
#include
-#include "value-labels.h"
-#include "variable.h"
+
+#include "xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
@@ -66,15 +66,18 @@ struct pfm_reader
struct file_handle *fh; /* File handle. */
FILE *file; /* File stream. */
+ int line_length; /* Number of characters so far on this line. */
char cc; /* Current character. */
char *trans; /* 256-byte character set translation table. */
int var_cnt; /* Number of variables. */
int weight_index; /* 0-based index of weight variable, or -1. */
int *widths; /* Variable widths, 0 for numeric. */
- int value_cnt; /* Number of `value's per case. */
+ size_t value_cnt; /* Number of `value's per case. */
bool ok; /* Set false on I/O error. */
};
+static struct casereader_class por_file_casereader_class;
+
static void
error (struct pfm_reader *r, const char *msg,...)
PRINTF_FORMAT (2, 3)
@@ -90,7 +93,7 @@ error (struct pfm_reader *r, const char *msg, ...)
va_list args;
ds_init_empty (&text);
- ds_put_format (&text, _("portable file %s corrupt at offset %ld: "),
+ ds_put_format (&text, _("portable file %s corrupt at offset 0x%lx: "),
fh_get_file_name (r->fh), ftell (r->file));
va_start (args, msg);
ds_put_vformat (&text, msg, args);
@@ -101,7 +104,7 @@ error (struct pfm_reader *r, const char *msg, ...)
m.where.file_name = NULL;
m.where.line_number = 0;
m.text = ds_cstr (&text);
-
+
msg_emit (&m);
r->ok = false;
@@ -109,12 +112,67 @@ error (struct pfm_reader *r, const char *msg, ...)
longjmp (r->bail_out, 1);
}
+/* Displays MSG as an warning for the current position in
+ portable file reader R. */
+static void
+warning (struct pfm_reader *r, const char *msg, ...)
+{
+ struct msg m;
+ struct string text;
+ va_list args;
+
+ ds_init_empty (&text);
+ ds_put_format (&text, _("reading portable file %s at offset 0x%lx: "),
+ fh_get_file_name (r->fh), ftell (r->file));
+ va_start (args, msg);
+ ds_put_vformat (&text, msg, args);
+ va_end (args);
+
+ m.category = MSG_GENERAL;
+ m.severity = MSG_WARNING;
+ m.where.file_name = NULL;
+ m.where.line_number = 0;
+ m.text = ds_cstr (&text);
+
+ msg_emit (&m);
+}
+
+/* Close and destroy R.
+ Returns false if an error was detected on R, true otherwise. */
+static bool
+close_reader (struct pfm_reader *r)
+{
+ bool ok;
+ if (r == NULL)
+ return true;
+
+ if (r->file)
+ {
+ if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
+ {
+ msg (ME, _("Error closing portable file \"%s\": %s."),
+ fh_get_file_name (r->fh), strerror (errno));
+ r->ok = false;
+ }
+ r->file = NULL;
+ }
+
+ if (r->fh != NULL)
+ fh_close (r->fh, "portable file", "rs");
+
+ ok = r->ok;
+ pool_destroy (r->pool);
+
+ return ok;
+}
+
/* Closes portable file reader R, after we're done with it. */
-void
-pfm_close_reader (struct pfm_reader *r)
+static void
+por_file_casereader_destroy (struct casereader *reader, void *r_)
{
- if (r != NULL)
- pool_destroy (r->pool);
+ struct pfm_reader *r = r_;
+ if (!close_reader (r))
+ casereader_force_error (reader);
}
/* Read a single character into cur_char. */
@@ -123,14 +181,33 @@ advance (struct pfm_reader *r)
{
int c;
- while ((c = getc (r->file)) == '\r' || c == '\n')
- continue;
+ /* Read the next character from the file.
+ Ignore carriage returns entirely.
+ Mostly ignore new-lines, but if a new-line occurs before the
+ line has reached 80 bytes in length, then treat the
+ "missing" bytes as spaces. */
+ for (;;)
+ {
+ while ((c = getc (r->file)) == '\r')
+ continue;
+ if (c != '\n')
+ break;
+
+ if (r->line_length < 80)
+ {
+ c = ' ';
+ ungetc ('\n', r->file);
+ break;
+ }
+ r->line_length = 0;
+ }
if (c == EOF)
- error (r, _("unexpected end of file"));
+ error (r, _("unexpected end of file"));
if (r->trans != NULL)
- c = r->trans[c];
+ c = r->trans[c];
r->cc = c;
+ r->line_length++;
}
/* Skip a single character if present, and return whether it was
@@ -151,12 +228,12 @@ static void read_header (struct pfm_reader *);
static void read_version_data (struct pfm_reader *, struct pfm_read_info *);
static void read_variables (struct pfm_reader *, struct dictionary *);
static void read_value_label (struct pfm_reader *, struct dictionary *);
-void dump_dictionary (struct dictionary *);
+static void read_documents (struct pfm_reader *, struct dictionary *);
/* Reads the dictionary from file with handle H, and returns it in a
dictionary structure. This dictionary may be modified in order to
rename, reorder, and delete variables, etc. */
-struct pfm_reader *
+struct casereader *
pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
struct pfm_read_info *info)
{
@@ -171,10 +248,9 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
pool = pool_create ();
r = pool_alloc (pool, sizeof *r);
r->pool = pool;
- if (setjmp (r->bail_out))
- goto error;
r->fh = fh;
- r->file = pool_fopen (r->pool, fh_get_file_name (r->fh), "rb");
+ r->file = fn_open (fh_get_file_name (r->fh), "rb");
+ r->line_length = 0;
r->weight_index = -1;
r->trans = NULL;
r->var_cnt = 0;
@@ -182,7 +258,10 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
r->value_cnt = 0;
r->ok = true;
- /* Check that file open succeeded, prime reading. */
+ if (setjmp (r->bail_out))
+ goto error;
+
+ /* Check that file open succeeded. */
if (r->file == NULL)
{
msg (ME, _("An error occurred while opening \"%s\" for reading "
@@ -190,7 +269,7 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
fh_get_file_name (r->fh), strerror (errno));
goto error;
}
-
+
/* Read header, version, date info, product id, variables. */
read_header (r);
read_version_data (r, info);
@@ -200,14 +279,20 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
while (match (r, 'D'))
read_value_label (r, *dict);
+ /* Read documents. */
+ if (match (r, 'E'))
+ read_documents (r, *dict);
+
/* Check that we've made it to the data. */
if (!match (r, 'F'))
error (r, _("Data record expected."));
- return r;
+ r->value_cnt = dict_get_next_value_idx (*dict);
+ return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX,
+ &por_file_casereader_class, r);
error:
- pfm_close_reader (r);
+ close_reader (r);
dict_destroy (*dict);
*dict = NULL;
return NULL;
@@ -216,7 +301,7 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
/* Returns the value of base-30 digit C,
or -1 if C is not a base-30 digit. */
static int
-base_30_value (unsigned char c)
+base_30_value (unsigned char c)
{
static const char base_30_digits[] = "0123456789ABCDEFGHIJKLMNOPQRST";
const char *p = strchr (base_30_digits, c);
@@ -326,7 +411,7 @@ read_float (struct pfm_reader *r)
return negative ? -num : num;
}
-
+
/* Read an integer and return its value. */
static int
read_int (struct pfm_reader *r)
@@ -345,7 +430,7 @@ read_string (struct pfm_reader *r, char *buf)
int n = read_int (r);
if (n < 0 || n > 255)
error (r, _("Bad string length %d."), n);
-
+
while (n-- > 0)
{
*buf++ = r->cc;
@@ -357,7 +442,7 @@ read_string (struct pfm_reader *r, char *buf)
/* Reads a string and returns a copy of it allocated from R's
pool. */
static char *
-read_pool_string (struct pfm_reader *r)
+read_pool_string (struct pfm_reader *r)
{
char string[256];
read_string (r, string);
@@ -374,7 +459,7 @@ read_header (struct pfm_reader *r)
/* Read and ignore vanity splash strings. */
for (i = 0; i < 200; i++)
advance (r);
-
+
/* Skip the first 64 characters of the translation table.
We don't care about these. They are probably all set to
'0', marking them as untranslatable, and that would screw
@@ -385,7 +470,7 @@ read_header (struct pfm_reader *r)
/* Read the rest of the translation table. */
trans = pool_malloc (r->pool, 256);
memset (trans, 0, 256);
- for (; i < 256; i++)
+ for (; i < 256; i++)
{
unsigned char c;
@@ -399,11 +484,11 @@ read_header (struct pfm_reader *r)
/* Set up the translation table, then read the first
translated character. */
r->trans = trans;
- advance (r);
+ advance (r);
/* Skip and verify signature. */
- for (i = 0; i < 8; i++)
- if (!match (r, "SPSSPORT"[i]))
+ for (i = 0; i < 8; i++)
+ if (!match (r, "SPSSPORT"[i]))
{
msg (SE, _("%s: Not a portable file."), fh_get_file_name (r->fh));
longjmp (r->bail_out, 1);
@@ -435,13 +520,13 @@ read_version_data (struct pfm_reader *r, struct pfm_read_info *info)
error (r, _("Bad time string length %d."), (int) strlen (time));
/* Save file info. */
- if (info != NULL)
+ if (info != NULL)
{
/* Date. */
- for (i = 0; i < 8; i++)
+ for (i = 0; i < 8; i++)
{
static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1};
- info->creation_date[map[i]] = date[i];
+ info->creation_date[map[i]] = date[i];
}
info->creation_date[2] = info->creation_date[5] = ' ';
info->creation_date[10] = 0;
@@ -466,14 +551,20 @@ read_version_data (struct pfm_reader *r, struct pfm_read_info *info)
checking that the format is appropriate for variable V. */
static struct fmt_spec
convert_format (struct pfm_reader *r, const int portable_format[3],
- struct variable *v)
+ struct variable *v, bool *report_error)
{
struct fmt_spec format;
bool ok;
if (!fmt_from_io (portable_format[0], &format.type))
- error (r, _("%s: Bad format specifier byte (%d)."),
- var_get_name (v), portable_format[0]);
+ {
+ if (*report_error)
+ warning (r, _("%s: Bad format specifier byte (%d). Variable "
+ "will be assigned a default format."),
+ var_get_name (v), portable_format[0]);
+ goto assign_default;
+ }
+
format.w = portable_format[1];
format.d = portable_format[2];
@@ -484,14 +575,27 @@ convert_format (struct pfm_reader *r, const int portable_format[3],
if (!ok)
{
- char fmt_string[FMT_STRING_LEN_MAX + 1];
- error (r, _("%s variable %s has invalid format specifier %s."),
- var_is_numeric (v) ? _("Numeric") : _("String"),
- var_get_name (v), fmt_to_string (&format, fmt_string));
- format = fmt_default_for_width (var_get_width (v));
+ if (*report_error)
+ {
+ char fmt_string[FMT_STRING_LEN_MAX + 1];
+ fmt_to_string (&format, fmt_string);
+ if (var_is_numeric (v))
+ warning (r, _("Numeric variable %s has invalid format "
+ "specifier %s."),
+ var_get_name (v), fmt_string);
+ else
+ warning (r, _("String variable %s with width %d has "
+ "invalid format specifier %s."),
+ var_get_name (v), var_get_width (v), fmt_string);
+ }
+ goto assign_default;
}
return format;
+
+assign_default:
+ *report_error = false;
+ return fmt_default_for_width (var_get_width (v));
}
static union value parse_value (struct pfm_reader *, struct variable *);
@@ -502,12 +606,12 @@ read_variables (struct pfm_reader *r, struct dictionary *dict)
{
char *weight_name = NULL;
int i;
-
+
if (!match (r, '4'))
error (r, _("Expected variable count record."));
-
+
r->var_cnt = read_int (r);
- if (r->var_cnt <= 0 || r->var_cnt == NOT_INT)
+ if (r->var_cnt <= 0)
error (r, _("Invalid number of variables %d."), r->var_cnt);
r->widths = pool_nalloc (r->pool, r->var_cnt, sizeof *r->widths);
@@ -517,10 +621,10 @@ read_variables (struct pfm_reader *r, struct dictionary *dict)
if (match (r, '6'))
{
weight_name = read_pool_string (r);
- if (strlen (weight_name) > SHORT_NAME_LEN)
+ if (strlen (weight_name) > SHORT_NAME_LEN)
error (r, _("Weight variable name (%s) truncated."), weight_name);
}
-
+
for (i = 0; i < r->var_cnt; i++)
{
int width;
@@ -529,6 +633,7 @@ read_variables (struct pfm_reader *r, struct dictionary *dict)
struct variable *v;
struct missing_values miss;
struct fmt_spec print, write;
+ bool report_error = true;
int j;
if (!match (r, '7'))
@@ -544,7 +649,7 @@ read_variables (struct pfm_reader *r, struct dictionary *dict)
fmt[j] = read_int (r);
if (!var_is_valid_name (name, false) || *name == '#' || *name == '$')
- error (r, _("position %d: Invalid variable name `%s'."), i, name);
+ error (r, _("Invalid variable name `%s' in position %d."), name, i);
str_uppercase (name);
if (width < 0 || width > 255)
@@ -552,16 +657,30 @@ read_variables (struct pfm_reader *r, struct dictionary *dict)
v = dict_create_var (dict, name, width);
if (v == NULL)
- error (r, _("Duplicate variable name %s."), name);
+ {
+ int i;
+ for (i = 1; i < 100000; i++)
+ {
+ char try_name[LONG_NAME_LEN + 1];
+ sprintf (try_name, "%.*s_%d", LONG_NAME_LEN - 6, name, i);
+ v = dict_create_var (dict, try_name, width);
+ if (v != NULL)
+ break;
+ }
+ if (v == NULL)
+ error (r, _("Duplicate variable name %s in position %d."), name, i);
+ warning (r, _("Duplicate variable name %s in position %d renamed "
+ "to %s."), name, i, var_get_name (v));
+ }
- print = convert_format (r, &fmt[0], v);
- write = convert_format (r, &fmt[3], v);
+ print = convert_format (r, &fmt[0], v, &report_error);
+ write = convert_format (r, &fmt[3], v, &report_error);
var_set_print_format (v, &print);
var_set_write_format (v, &write);
/* Range missing values. */
mv_init (&miss, var_get_width (v));
- if (match (r, 'B'))
+ if (match (r, 'B'))
{
double x = read_float (r);
double y = read_float (r);
@@ -573,15 +692,15 @@ read_variables (struct pfm_reader *r, struct dictionary *dict)
mv_add_num_range (&miss, LOWEST, read_float (r));
/* Single missing values. */
- while (match (r, '8'))
+ while (match (r, '8'))
{
union value value = parse_value (r, v);
- mv_add_value (&miss, &value);
+ mv_add_value (&miss, &value);
}
var_set_missing_values (v, &miss);
- if (match (r, 'C'))
+ if (match (r, 'C'))
{
char label[256];
read_string (r, label);
@@ -589,7 +708,7 @@ read_variables (struct pfm_reader *r, struct dictionary *dict)
}
}
- if (weight_name != NULL)
+ if (weight_name != NULL)
{
struct variable *weight_var = dict_lookup_var (dict, weight_name);
if (weight_var == NULL)
@@ -605,12 +724,12 @@ static union value
parse_value (struct pfm_reader *r, struct variable *vv)
{
union value v;
-
- if (var_is_alpha (vv))
+
+ if (var_is_alpha (vv))
{
char string[256];
read_string (r, string);
- buf_copy_str_rpad (v.s, 8, string);
+ buf_copy_str_rpad (v.s, 8, string);
}
else
v.f = read_float (r);
@@ -642,9 +761,9 @@ read_value_label (struct pfm_reader *r, struct dictionary *dict)
if (v[i] == NULL)
error (r, _("Unknown variable %s while parsing value labels."), name);
- if (var_get_width (v[0]) != var_get_width (v[i]))
+ if (var_get_type (v[0]) != var_get_type (v[i]))
error (r, _("Cannot assign value labels to %s and %s, which "
- "have different variable types or widths."),
+ "have different variable types."),
var_get_name (v[0]), var_get_name (v[i]));
}
@@ -658,44 +777,62 @@ read_value_label (struct pfm_reader *r, struct dictionary *dict)
val = parse_value (r, v[0]);
read_string (r, label);
- /* Assign the value_label's to each variable. */
+ /* Assign the value label to each variable. */
for (j = 0; j < nv; j++)
{
struct variable *var = v[j];
- if (!var_add_value_label (var, &val, label))
- continue;
-
- if (var_is_numeric (var))
- error (r, _("Duplicate label for value %g for variable %s."),
- val.f, var_get_name (var));
- else
- error (r, _("Duplicate label for value `%.*s' for variable %s."),
- var_get_width (var), val.s, var_get_name (var));
+ if (!var_is_long_string (var))
+ var_replace_value_label (var, &val, label);
}
}
}
+/* Reads a set of documents from portable file R into DICT. */
+static void
+read_documents (struct pfm_reader *r, struct dictionary *dict)
+{
+ int line_cnt;
+ int i;
+
+ line_cnt = read_int (r);
+ for (i = 0; i < line_cnt; i++)
+ {
+ char line[256];
+ read_string (r, line);
+ dict_add_document_line (dict, line);
+ }
+}
+
/* Reads one case from portable file R into C. */
-bool
-pfm_read_case (struct pfm_reader *r, struct ccase *c)
+static bool
+por_file_casereader_read (struct casereader *reader, void *r_, struct ccase *c)
{
+ struct pfm_reader *r = r_;
size_t i;
size_t idx;
+ case_create (c, casereader_get_value_cnt (reader));
setjmp (r->bail_out);
if (!r->ok)
- return false;
-
+ {
+ casereader_force_error (reader);
+ case_destroy (c);
+ return false;
+ }
+
/* Check for end of file. */
if (r->cc == 'Z')
- return false;
+ {
+ case_destroy (c);
+ return false;
+ }
idx = 0;
- for (i = 0; i < r->var_cnt; i++)
+ for (i = 0; i < r->var_cnt; i++)
{
int width = r->widths[i];
-
+
if (width == 0)
{
case_data_rw_idx (c, idx)->f = read_float (r);
@@ -709,22 +846,14 @@ pfm_read_case (struct pfm_reader *r, struct ccase *c)
idx += DIV_RND_UP (width, MAX_SHORT_STRING);
}
}
-
- return true;
-}
-/* Returns true if an I/O error has occurred on READER, false
- otherwise. */
-bool
-pfm_read_error (const struct pfm_reader *reader)
-{
- return !reader->ok;
+ return true;
}
/* Returns true if FILE is an SPSS portable file,
false otherwise. */
bool
-pfm_detect (FILE *file)
+pfm_detect (FILE *file)
{
unsigned char header[464];
char trans[256];
@@ -737,21 +866,29 @@ pfm_detect (FILE *file)
int c = getc (file);
if (c == EOF || raw_cnt++ > 512)
return false;
- else if (c != '\n' && c != '\r')
+ else if (c != '\n' && c != '\r')
header[cooked_cnt++] = c;
}
memset (trans, 0, 256);
- for (i = 64; i < 256; i++)
+ for (i = 64; i < 256; i++)
{
unsigned char c = header[i + 200];
if (trans[c] == 0)
trans[c] = portable_to_local[i];
}
- for (i = 0; i < 8; i++)
- if (trans[header[i + 456]] != "SPSSPORT"[i])
- return false;
+ for (i = 0; i < 8; i++)
+ if (trans[header[i + 456]] != "SPSSPORT"[i])
+ return false;
return true;
}
+
+static struct casereader_class por_file_casereader_class =
+ {
+ por_file_casereader_read,
+ por_file_casereader_destroy,
+ NULL,
+ NULL,
+ };