/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
struct sfm_read_info *);
static void read_machine_integer_info (struct sfm_reader *,
size_t size, size_t count,
- struct sfm_read_info *);
+ struct sfm_read_info *,
+ struct dictionary *
+ );
static void read_machine_float_info (struct sfm_reader *,
size_t size, size_t count);
static void read_display_parameters (struct sfm_reader *,
switch (subtype)
{
case 3:
- read_machine_integer_info (r, size, count, info);
+ read_machine_integer_info (r, size, count, info, dict);
return;
case 4:
break;
case 7:
- /* Unknown purpose. */
+ /* Used by the MRSETS command. */
+ break;
+
+ case 8:
+ /* Used by the SPSS Data Entry software. */
break;
case 11:
case 20:
/* New in SPSS 16. Contains a single string that describes
the character encoding, e.g. "windows-1252". */
- break;
+ {
+ char *encoding = xcalloc (size, count + 1);
+ read_string (r, encoding, count + 1);
+ dict_set_encoding (dict, encoding);
+ return;
+ }
case 21:
/* New in SPSS 16. Encodes value labels for long string
break;
default:
- sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
+ sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
+ subtype, PACKAGE_BUGREPORT);
break;
}
/* Read record type 7, subtype 3. */
static void
read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
- struct sfm_read_info *info)
+ struct sfm_read_info *info,
+ struct dictionary *dict)
{
int version_major = read_int (r);
int version_minor = read_int (r);
int float_representation = read_int (r);
int compression_code UNUSED = read_int (r);
int integer_representation = read_int (r);
- int character_code UNUSED = read_int (r);
+ int character_code = read_int (r);
int expected_float_format;
int expected_integer_format;
gettext (endian[integer_representation == 1]),
gettext (endian[expected_integer_format == 1]));
}
+
+
+ /*
+ Record 7 (20) provides a much more reliable way of
+ setting the encoding.
+ The character_code is used as a fallback only.
+ */
+ if ( NULL == dict_get_encoding (dict))
+ {
+ switch (character_code)
+ {
+ case 1:
+ dict_set_encoding (dict, "EBCDIC-US");
+ break;
+ case 2:
+ case 3:
+ /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+ respectively. However, there are known to be many files
+ in the wild with character code 2, yet have data which are
+ clearly not ascii.
+ Therefore we ignore these values.
+ */
+ return;
+ case 4:
+ dict_set_encoding (dict, "MS_KANJI");
+ break;
+ case 65000:
+ dict_set_encoding (dict, "UTF-7");
+ break;
+ case 65001:
+ dict_set_encoding (dict, "UTF-8");
+ break;
+ default:
+ {
+ char enc[100];
+ snprintf (enc, 100, "CP%d", character_code);
+ dict_set_encoding (dict, enc);
+ }
+ break;
+ };
+ }
}
/* Read record type 7, subtype 4. */
static bool read_whole_strings (struct sfm_reader *, char *, size_t);
static bool skip_whole_strings (struct sfm_reader *, size_t);
-/* Reads one case from READER's file into C. Returns true only
- if successful. */
-static bool
-sys_file_casereader_read (struct casereader *reader, void *r_,
- struct ccase *c)
+/* Reads and returns one case from READER's file. Returns a null
+ pointer if not successful. */
+static struct ccase *
+sys_file_casereader_read (struct casereader *reader, void *r_)
{
struct sfm_reader *r = r_;
+ struct ccase *volatile c;
int i;
if (r->error)
- return false;
+ return NULL;
- case_create (c, r->value_cnt);
+ c = case_create (r->value_cnt);
if (setjmp (r->bail_out))
{
casereader_force_error (reader);
- case_destroy (c);
- return false;
+ case_unref (c);
+ return NULL;
}
for (i = 0; i < r->sfm_var_cnt; i++)
partial_record (r);
}
}
- return true;
+ return c;
eof:
- case_destroy (c);
+ case_unref (c);
if (i != 0)
partial_record (r);
if (r->case_cnt != -1)
read_error (reader, r);
- return false;
+ return NULL;
}
/* Issues an error that R ends in a partial record. */