/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <libpspp/hash.h>
#include <libpspp/array.h>
+#include <data/attributes.h>
#include <data/case.h>
#include <data/casereader-provider.h>
#include <data/casereader.h>
#include <data/file-name.h>
#include <data/format.h>
#include <data/missing-values.h>
+#include <data/short-names.h>
#include <data/value-labels.h>
#include <data/variable.h>
#include <data/value.h>
/* File state. */
struct file_handle *fh; /* File handle. */
+ struct fh_lock *lock; /* Mutual exclusion for file handle. */
FILE *file; /* File stream. */
bool error; /* I/O or corruption error? */
size_t value_cnt; /* Number of "union value"s in struct case. */
size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
};
-static struct casereader_class sys_file_casereader_class;
+static const struct casereader_class sys_file_casereader_class;
static bool close_reader (struct sfm_reader *);
struct variable **,
int value_idx);
+static void sys_msg (struct sfm_reader *r, int class,
+ const char *format, va_list args)
+ PRINTF_FORMAT (3, 0);
static void sys_warn (struct sfm_reader *, const char *, ...)
PRINTF_FORMAT (2, 3);
-
static void sys_error (struct sfm_reader *, const char *, ...)
PRINTF_FORMAT (2, 3)
NO_RETURN;
static void read_string (struct sfm_reader *, char *, size_t);
static void skip_bytes (struct sfm_reader *, size_t);
-static struct variable_to_value_map *open_variable_to_value_map (
- struct sfm_reader *, size_t size);
-static void close_variable_to_value_map (struct sfm_reader *r,
- struct variable_to_value_map *);
-static bool read_variable_to_value_map (struct sfm_reader *,
- struct dictionary *,
- struct variable_to_value_map *,
- struct variable **var, char **value,
- int *warning_cnt);
+static struct text_record *open_text_record (struct sfm_reader *, size_t size);
+static void close_text_record (struct sfm_reader *r,
+ struct text_record *);
+static bool read_variable_to_value_pair (struct sfm_reader *,
+ struct dictionary *,
+ struct text_record *,
+ struct variable **var, char **value);
+static void text_warn (struct sfm_reader *r, struct text_record *text,
+ const char *format, ...)
+ PRINTF_FORMAT (3, 4);
+static char *text_get_token (struct text_record *,
+ struct substring delimiters);
+static bool text_match (struct text_record *, char c);
+static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
+ struct text_record *,
+ struct substring delimiters,
+ struct variable **);
static bool close_reader (struct sfm_reader *r);
\f
struct sfm_read_info *);
static void read_machine_integer_info (struct sfm_reader *,
size_t size, size_t count,
- struct sfm_read_info *);
+ struct sfm_read_info *,
+ struct dictionary *
+ );
static void read_machine_float_info (struct sfm_reader *,
size_t size, size_t count);
static void read_display_parameters (struct sfm_reader *,
static void read_long_string_map (struct sfm_reader *,
size_t size, size_t count,
struct dictionary *);
-
+static void read_data_file_attributes (struct sfm_reader *,
+ size_t size, size_t count,
+ struct dictionary *);
+static void read_variable_attributes (struct sfm_reader *,
+ size_t size, size_t count,
+ struct dictionary *);
/* Opens the system file designated by file handle FH for
reading. Reads the system file's dictionary into *DICT.
int claimed_oct_cnt;
int rec_type;
- if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
- return NULL;
-
*dict = dict_create ();
/* Create and initialize reader. */
r = pool_create_container (struct sfm_reader, pool);
- r->fh = fh;
- r->file = fn_open (fh_get_file_name (fh), "rb");
+ r->fh = fh_ref (fh);
+ r->lock = NULL;
+ r->file = NULL;
r->error = false;
r->oct_cnt = 0;
r->has_long_var_names = false;
r->opcode_idx = sizeof r->opcodes;
+ /* TRANSLATORS: this fragment will be interpolated into
+ messages in fh_lock() that identify types of files. */
+ r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
+ if (r->lock == NULL)
+ goto error;
+
+ r->file = fn_open (fh_get_file_name (fh), "rb");
+ if (r->file == NULL)
+ {
+ msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
+ fh_get_file_name (r->fh), strerror (errno));
+ goto error;
+ }
+
/* Initialize info. */
if (info == NULL)
info = &local_info;
memset (info, 0, sizeof *info);
if (setjmp (r->bail_out))
- {
- close_reader (r);
- dict_destroy (*dict);
- *dict = NULL;
- return NULL;
- }
+ goto error;
- if (r->file == NULL)
- {
- msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
- fh_get_file_name (r->fh), strerror (errno));
- longjmp (r->bail_out, 1);
- }
/* Read header. */
read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
for (i = 0; i < dict_get_var_cnt (*dict); i++)
{
struct variable *var = dict_get_var (*dict, i);
- char short_name [SHORT_NAME_LEN + 1];
- char long_name [SHORT_NAME_LEN + 1];
+ char short_name[SHORT_NAME_LEN + 1];
+ char long_name[SHORT_NAME_LEN + 1];
strcpy (short_name, var_get_name (var));
(NULL, r->value_cnt,
r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
&sys_file_casereader_class, r);
+
+error:
+ close_reader (r);
+ dict_destroy (*dict);
+ *dict = NULL;
+ return NULL;
}
/* Closes a system file after we're done with it.
r->file = NULL;
}
- if (r->fh != NULL)
- fh_close (r->fh, "system file", "rs");
+ fh_unlock (r->lock);
+ fh_unref (r->fh);
error = r->error;
pool_destroy (r->pool);
read_bytes (r, raw_bias, sizeof raw_bias);
if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
{
- sys_warn (r, _("Compression bias (%g) is not the usual "
+ sys_warn (r, _("Compression bias is not the usual "
"value of 100, or system file uses unrecognized "
- "floating-point format."),
- r->bias);
+ "floating-point format."));
if (r->integer_format == INTEGER_MSB_FIRST)
r->float_format = FLOAT_IEEE_DOUBLE_BE;
else
{
double low = read_float (r);
double high = read_float (r);
- mv_add_num_range (&mv, low, high);
+ mv_add_range (&mv, low, high);
missing_value_code = -missing_value_code - 2;
}
for (i = 0; i < missing_value_code; i++)
mv_add_num (&mv, read_float (r));
}
- else if (var_get_width (var) <= MAX_SHORT_STRING)
+ else
{
if (missing_value_code < 1 || missing_value_code > 3)
sys_error (r, _("String missing value indicator field is not "
"0, 1, 2, or 3."));
+ if (var_is_long_string (var))
+ sys_warn (r, _("Ignoring missing values on long string variable "
+ "%s, which PSPP does not yet support."), name);
for (i = 0; i < missing_value_code; i++)
{
char string[9];
mv_add_str (&mv, string);
}
}
- else
- sys_error (r, _("Long string variable %s may not have missing "
- "values."), name);
- var_set_missing_values (var, &mv);
+ if (!var_is_long_string (var))
+ var_set_missing_values (var, &mv);
}
/* Set formats. */
switch (subtype)
{
case 3:
- read_machine_integer_info (r, size, count, info);
+ read_machine_integer_info (r, size, count, info, dict);
return;
case 4:
break;
case 7:
- /* Unknown purpose. */
+ /* Used by the MRSETS command. */
+ break;
+
+ case 8:
+ /* Used by the SPSS Data Entry software. */
break;
case 11:
break;
case 17:
- /* Text field that defines variable attributes. New in
- SPSS 14. */
+ read_data_file_attributes (r, size, count, dict);
+ return;
+
+ case 18:
+ read_variable_attributes (r, size, count, dict);
+ return;
+
+ case 20:
+ /* New in SPSS 16. Contains a single string that describes
+ the character encoding, e.g. "windows-1252". */
+ {
+ char *encoding = xcalloc (size, count + 1);
+ read_string (r, encoding, count + 1);
+ dict_set_encoding (dict, encoding);
+ return;
+ }
+
+ case 21:
+ /* New in SPSS 16. Encodes value labels for long string
+ variables. */
+ sys_warn (r, _("Ignoring value labels for long string variables, "
+ "which PSPP does not yet support."));
break;
default:
- sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
+ sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
+ subtype, PACKAGE_BUGREPORT);
break;
}
/* Read record type 7, subtype 3. */
static void
read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
- struct sfm_read_info *info)
+ struct sfm_read_info *info,
+ struct dictionary *dict)
{
int version_major = read_int (r);
int version_minor = read_int (r);
int float_representation = read_int (r);
int compression_code UNUSED = read_int (r);
int integer_representation = read_int (r);
- int character_code UNUSED = read_int (r);
+ int character_code = read_int (r);
int expected_float_format;
int expected_integer_format;
NOT_REACHED ();
if (integer_representation != expected_integer_format)
{
- static const char *endian[] = {N_("little-endian"), N_("big-endian")};
+ static const char *const endian[] = {N_("little-endian"), N_("big-endian")};
sys_warn (r, _("Integer format indicated by system file (%s) "
"differs from expected (%s)."),
gettext (endian[integer_representation == 1]),
gettext (endian[expected_integer_format == 1]));
}
+
+
+ /*
+ Record 7 (20) provides a much more reliable way of
+ setting the encoding.
+ The character_code is used as a fallback only.
+ */
+ if ( NULL == dict_get_encoding (dict))
+ {
+ switch (character_code)
+ {
+ case 1:
+ dict_set_encoding (dict, "EBCDIC-US");
+ break;
+ case 2:
+ case 3:
+ /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+ respectively. However, there are known to be many files
+ in the wild with character code 2, yet have data which are
+ clearly not ascii.
+ Therefore we ignore these values.
+ */
+ return;
+ case 4:
+ dict_set_encoding (dict, "MS_KANJI");
+ break;
+ case 65000:
+ dict_set_encoding (dict, "UTF-7");
+ break;
+ case 65001:
+ dict_set_encoding (dict, "UTF-8");
+ break;
+ default:
+ {
+ char enc[100];
+ snprintf (enc, 100, "CP%d", character_code);
+ dict_set_encoding (dict, enc);
+ }
+ break;
+ };
+ }
}
/* Read record type 7, subtype 4. */
read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
struct dictionary *dict)
{
- const size_t n_vars = count / 3 ;
+ size_t n_vars;
+ bool includes_width;
bool warned = false;
- int i;
+ size_t i;
- if (count % 3 || n_vars != dict_get_var_cnt (dict))
- sys_error (r, _("Bad size (%zu) or count (%zu) on extension 11."),
- size, count);
+ if (size != 4)
+ {
+ sys_warn (r, _("Bad size %zu on extension 11."), size);
+ skip_bytes (r, size * count);
+ return;
+ }
+
+ n_vars = dict_get_var_cnt (dict);
+ if (count == 3 * n_vars)
+ includes_width = true;
+ else if (count == 2 * n_vars)
+ includes_width = false;
+ else
+ {
+ sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."),
+ count, n_vars);
+ skip_bytes (r, size * count);
+ return;
+ }
for (i = 0; i < n_vars; ++i)
{
struct variable *v = dict_get_var (dict, i);
int measure = read_int (r);
- int width = read_int (r);
+ int width = includes_width ? read_int (r) : 0;
int align = read_int (r);
/* SPSS 14 sometimes seems to set string variables' measure
if (0 == measure && var_is_alpha (v))
measure = 1;
- /* Older versions (SPSS 9.0) sometimes set the display width
- to zero. This causes confusion especially in the GUI */
- if (0 == width)
- width = 8;
-
if (measure < 1 || measure > 3 || align < 0 || align > 2)
{
if (!warned)
- sys_warn (r, _("Invalid variable display parameters. "
- "Default parameters substituted."));
+ sys_warn (r, _("Invalid variable display parameters "
+ "for variable %zu (%s). "
+ "Default parameters substituted."),
+ i, var_get_name (v));
warned = true;
continue;
}
var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
: measure == 2 ? MEASURE_ORDINAL
: MEASURE_SCALE));
- var_set_display_width (v, width);
var_set_alignment (v, (align == 0 ? ALIGN_LEFT
: align == 1 ? ALIGN_RIGHT
: ALIGN_CENTRE));
+
+ /* Older versions (SPSS 9.0) sometimes set the display
+ width to zero. This causes confusion in the GUI, so
+ only set the width if it is nonzero. */
+ if (width > 0)
+ var_set_display_width (v, width);
}
}
read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
struct dictionary *dict)
{
- struct variable_to_value_map *map;
+ struct text_record *text;
struct variable *var;
char *long_name;
- int warning_cnt = 0;
- map = open_variable_to_value_map (r, size * count);
- while (read_variable_to_value_map (r, dict, map, &var, &long_name,
- &warning_cnt))
+ text = open_text_record (r, size * count);
+ while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
{
char **short_names;
size_t short_name_cnt;
}
free (short_names);
}
- close_variable_to_value_map (r, map);
+ close_text_record (r, text);
r->has_long_var_names = true;
}
read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
struct dictionary *dict)
{
- struct variable_to_value_map *map;
+ struct text_record *text;
struct variable *var;
char *length_s;
- int warning_cnt = 0;
- map = open_variable_to_value_map (r, size * count);
- while (read_variable_to_value_map (r, dict, map, &var, &length_s,
- &warning_cnt))
+ text = open_text_record (r, size * count);
+ while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
{
size_t idx = var_get_dict_index (var);
long int length;
dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
var_set_width (var, length);
}
- close_variable_to_value_map (r, map);
+ close_text_record (r, text);
dict_compact_values (dict);
}
pool_destroy (subpool);
}
+
+/* Reads a set of custom attributes from TEXT into ATTRS.
+ ATTRS may be a null pointer, in which case the attributes are
+ read but discarded. */
+static void
+read_attributes (struct sfm_reader *r, struct text_record *text,
+ struct attrset *attrs)
+{
+ do
+ {
+ struct attribute *attr;
+ char *key;
+ int index;
+
+ /* Parse the key. */
+ key = text_get_token (text, ss_cstr ("("));
+ if (key == NULL)
+ return;
+
+ attr = attribute_create (key);
+ for (index = 1; ; index++)
+ {
+ /* Parse the value. */
+ char *value;
+ size_t length;
+
+ value = text_get_token (text, ss_cstr ("\n"));
+ if (value == NULL)
+ {
+ text_warn (r, text, _("Error parsing attribute value %s[%d]"),
+ key, index);
+ break;
+ }
+
+ length = strlen (value);
+ if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
+ {
+ value[length - 1] = '\0';
+ attribute_add_value (attr, value + 1);
+ }
+ else
+ {
+ text_warn (r, text,
+ _("Attribute value %s[%d] is not quoted: %s"),
+ key, index, value);
+ attribute_add_value (attr, value);
+ }
+
+ /* Was this the last value for this attribute? */
+ if (text_match (text, ')'))
+ break;
+ }
+ if (attrs != NULL)
+ attrset_add (attrs, attr);
+ else
+ attribute_destroy (attr);
+ }
+ while (!text_match (text, '/'));
+}
+
+/* Reads record type 7, subtype 17, which lists custom
+ attributes on the data file. */
+static void
+read_data_file_attributes (struct sfm_reader *r,
+ size_t size, size_t count,
+ struct dictionary *dict)
+{
+ struct text_record *text = open_text_record (r, size * count);
+ read_attributes (r, text, dict_get_attributes (dict));
+ close_text_record (r, text);
+}
+
+/* Reads record type 7, subtype 18, which lists custom
+ attributes on individual variables. */
+static void
+read_variable_attributes (struct sfm_reader *r,
+ size_t size, size_t count,
+ struct dictionary *dict)
+{
+ struct text_record *text = open_text_record (r, size * count);
+ for (;;)
+ {
+ struct variable *var;
+ if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var))
+ break;
+ read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
+ }
+ close_text_record (r, text);
+}
+
\f
/* Case reader. */
static bool read_whole_strings (struct sfm_reader *, char *, size_t);
static bool skip_whole_strings (struct sfm_reader *, size_t);
-/* Reads one case from READER's file into C. Returns true only
- if successful. */
-static bool
-sys_file_casereader_read (struct casereader *reader, void *r_,
- struct ccase *c)
+/* Reads and returns one case from READER's file. Returns a null
+ pointer if not successful. */
+static struct ccase *
+sys_file_casereader_read (struct casereader *reader, void *r_)
{
struct sfm_reader *r = r_;
+ struct ccase *volatile c;
int i;
if (r->error)
- return false;
+ return NULL;
- case_create (c, r->value_cnt);
+ c = case_create (r->value_cnt);
if (setjmp (r->bail_out))
{
casereader_force_error (reader);
- case_destroy (c);
- return false;
+ case_unref (c);
+ return NULL;
}
for (i = 0; i < r->sfm_var_cnt; i++)
partial_record (r);
}
}
- return true;
+ return c;
eof:
- case_destroy (c);
+ case_unref (c);
if (i != 0)
partial_record (r);
if (r->case_cnt != -1)
read_error (reader, r);
- return false;
+ return NULL;
}
/* Issues an error that R ends in a partial record. */
return NULL;
}
\f
-/* Helpers for reading records that contain "variable=value"
- pairs. */
+/* Helpers for reading records that contain structured text
+ strings. */
+
+/* Maximum number of warnings to issue for a single text
+ record. */
+#define MAX_TEXT_WARNINGS 5
/* State. */
-struct variable_to_value_map
+struct text_record
{
struct substring buffer; /* Record contents. */
size_t pos; /* Current position in buffer. */
+ int n_warnings; /* Number of warnings issued or suppressed. */
};
-/* Reads SIZE bytes into a "variable=value" map for R,
- and returns the map. */
-static struct variable_to_value_map *
-open_variable_to_value_map (struct sfm_reader *r, size_t size)
+/* Reads SIZE bytes into a text record for R,
+ and returns the new text record. */
+static struct text_record *
+open_text_record (struct sfm_reader *r, size_t size)
{
- struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map);
+ struct text_record *text = pool_alloc (r->pool, sizeof *text);
char *buffer = pool_malloc (r->pool, size + 1);
read_bytes (r, buffer, size);
- map->buffer = ss_buffer (buffer, size);
- map->pos = 0;
- return map;
+ text->buffer = ss_buffer (buffer, size);
+ text->pos = 0;
+ text->n_warnings = 0;
+ return text;
}
-/* Closes MAP and frees its storage.
- Not really needed, because the pool will free the map anyway,
- but can be used to free it earlier. */
+/* Closes TEXT, frees its storage, and issues a final warning
+ about suppressed warnings if necesary. */
static void
-close_variable_to_value_map (struct sfm_reader *r,
- struct variable_to_value_map *map)
+close_text_record (struct sfm_reader *r, struct text_record *text)
{
- pool_free (r->pool, ss_data (map->buffer));
+ if (text->n_warnings > MAX_TEXT_WARNINGS)
+ sys_warn (r, _("Suppressed %d additional related warnings."),
+ text->n_warnings - MAX_TEXT_WARNINGS);
+ pool_free (r->pool, ss_data (text->buffer));
}
-/* Reads the next variable=value pair from MAP.
+/* Reads a variable=value pair from TEXT.
Looks up the variable in DICT and stores it into *VAR.
Stores a null-terminated value into *VALUE. */
static bool
-read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict,
- struct variable_to_value_map *map,
- struct variable **var, char **value,
- int *warning_cnt)
+read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
+ struct text_record *text,
+ struct variable **var, char **value)
{
- int max_warnings = 5;
-
for (;;)
{
- struct substring short_name_ss, value_ss;
+ if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
+ return false;
+
+ *value = text_get_token (text, ss_buffer ("\t\0", 2));
+ if (*value == NULL)
+ return false;
- if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss)
- || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos,
- &value_ss))
- {
- if (*warning_cnt > max_warnings)
- sys_warn (r, _("Suppressed %d additional variable map warnings."),
- *warning_cnt - max_warnings);
- return false;
- }
+ text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
+ ss_buffer ("\t\0", 2));
- map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX),
- ss_buffer ("\t\0", 2));
+ if (*var != NULL)
+ return true;
+ }
+}
- ss_data (short_name_ss)[ss_length (short_name_ss)] = '\0';
- *var = lookup_var_by_short_name (dict, ss_data (short_name_ss));
- if (*var == NULL)
- {
- if (++*warning_cnt <= max_warnings)
- sys_warn (r, _("Variable map refers to unknown variable %s."),
- ss_data (short_name_ss));
- continue;
- }
+static bool
+text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
+ struct text_record *text, struct substring delimiters,
+ struct variable **var)
+{
+ char *short_name = text_get_token (text, delimiters);
+ if (short_name == NULL)
+ return false;
+
+ *var = lookup_var_by_short_name (dict, short_name);
+ if (*var == NULL)
+ text_warn (r, text, _("Variable map refers to unknown variable %s."),
+ short_name);
+ return true;
+}
+
+/* Displays a warning for the current file position, limiting the
+ number to MAX_TEXT_WARNINGS for TEXT. */
+static void
+text_warn (struct sfm_reader *r, struct text_record *text,
+ const char *format, ...)
+{
+ if (text->n_warnings++ < MAX_TEXT_WARNINGS)
+ {
+ va_list args;
+
+ va_start (args, format);
+ sys_msg (r, MW, format, args);
+ va_end (args);
+ }
+}
- ss_data (value_ss)[ss_length (value_ss)] = '\0';
- *value = ss_data (value_ss);
+static char *
+text_get_token (struct text_record *text, struct substring delimiters)
+{
+ struct substring token;
+ if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
+ return NULL;
+ ss_data (token)[ss_length (token)] = '\0';
+ return ss_data (token);
+}
+
+static bool
+text_match (struct text_record *text, char c)
+{
+ if (text->buffer.string[text->pos] == c)
+ {
+ text->pos++;
return true;
}
+ else
+ return false;
}
\f
/* Messages. */
}
}
\f
-static struct casereader_class sys_file_casereader_class =
+static const struct casereader_class sys_file_casereader_class =
{
sys_file_casereader_read,
sys_file_casereader_destroy,