struct fh_lock *lock; /* Mutual exclusion for file handle. */
FILE *file; /* File stream. */
bool error; /* I/O or corruption error? */
- size_t value_cnt; /* Number of "union value"s in struct case. */
+ struct caseproto *proto; /* Format of output cases. */
/* File format. */
enum integer_format integer_format; /* On-disk integer format. */
struct sfm_read_info *);
static void read_machine_integer_info (struct sfm_reader *,
size_t size, size_t count,
- struct sfm_read_info *);
+ struct sfm_read_info *,
+ struct dictionary *
+ );
static void read_machine_float_info (struct sfm_reader *,
size_t size, size_t count);
static void read_display_parameters (struct sfm_reader *,
dictionary and may destroy or modify its variables. */
sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
pool_register (r->pool, free, r->sfm_vars);
+ r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool);
pool_free (r->pool, var_by_value_idx);
- r->value_cnt = dict_get_next_value_idx (*dict);
return casereader_create_sequential
- (NULL, r->value_cnt,
+ (NULL, r->proto,
r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
&sys_file_casereader_class, r);
switch (subtype)
{
case 3:
- read_machine_integer_info (r, size, count, info);
+ read_machine_integer_info (r, size, count, info, dict);
return;
case 4:
break;
case 7:
- /* Unknown purpose. */
+ /* Used by the MRSETS command. */
+ break;
+
+ case 8:
+ /* Used by the SPSS Data Entry software. */
break;
case 11:
case 20:
/* New in SPSS 16. Contains a single string that describes
the character encoding, e.g. "windows-1252". */
- break;
+ {
+ char *encoding = xcalloc (size, count + 1);
+ read_string (r, encoding, count + 1);
+ dict_set_encoding (dict, encoding);
+ return;
+ }
case 21:
/* New in SPSS 16. Encodes value labels for long string
break;
default:
- sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
+ sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
+ subtype, PACKAGE_BUGREPORT);
break;
}
/* Read record type 7, subtype 3. */
static void
read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
- struct sfm_read_info *info)
+ struct sfm_read_info *info,
+ struct dictionary *dict)
{
int version_major = read_int (r);
int version_minor = read_int (r);
int float_representation = read_int (r);
int compression_code UNUSED = read_int (r);
int integer_representation = read_int (r);
- int character_code UNUSED = read_int (r);
+ int character_code = read_int (r);
int expected_float_format;
int expected_integer_format;
gettext (endian[integer_representation == 1]),
gettext (endian[expected_integer_format == 1]));
}
+
+
+ /*
+ Record 7 (20) provides a much more reliable way of
+ setting the encoding.
+ The character_code is used as a fallback only.
+ */
+ if ( NULL == dict_get_encoding (dict))
+ {
+ switch (character_code)
+ {
+ case 1:
+ dict_set_encoding (dict, "EBCDIC-US");
+ break;
+ case 2:
+ case 3:
+ /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+ respectively. However, there are known to be many files
+ in the wild with character code 2, yet have data which are
+ clearly not ascii.
+ Therefore we ignore these values.
+ */
+ return;
+ case 4:
+ dict_set_encoding (dict, "MS_KANJI");
+ break;
+ case 65000:
+ dict_set_encoding (dict, "UTF-7");
+ break;
+ case 65001:
+ dict_set_encoding (dict, "UTF-8");
+ break;
+ default:
+ {
+ char enc[100];
+ snprintf (enc, 100, "CP%d", character_code);
+ dict_set_encoding (dict, enc);
+ }
+ break;
+ };
+ }
}
/* Read record type 7, subtype 4. */
struct variable **var = NULL; /* Associated variables. */
int var_cnt; /* Number of associated variables. */
+ int max_width; /* Maximum width of string variables. */
int i;
/* Read the list of variables. */
var = pool_nalloc (subpool, var_cnt, sizeof *var);
+ max_width = 0;
for (i = 0; i < var_cnt; i++)
{
var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
if (var_is_long_string (var[i]))
sys_error (r, _("Value labels are not allowed on long string "
"variables (%s)."), var_get_name (var[i]));
+ max_width = MAX (max_width, var_get_width (var[i]));
}
/* Type check the variables. */
{
struct label *label = labels + i;
+ value_init_pool (subpool, &label->value, max_width);
if (var_is_alpha (var[0]))
- buf_copy_rpad (label->value.s, sizeof label->value.s,
- label->raw_value, sizeof label->raw_value);
+ buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
+ label->raw_value, sizeof label->raw_value, ' ');
else
label->value.f = float_get_double (r->float_format, label->raw_value);
}
label->value.f, var_get_name (v));
else
sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
- var_get_width (v), label->value.s,
+ max_width, value_str (&label->value, max_width),
var_get_name (v));
}
}
if (r->error)
return NULL;
- c = case_create (r->value_cnt);
+ c = case_create (r->proto);
if (setjmp (r->bail_out))
{
casereader_force_error (reader);
struct sfm_var *sv = &r->sfm_vars[i];
union value *v = case_data_rw_idx (c, sv->case_index);
- if (sv->width == 0)
+ if (sv->var_width == 0)
{
if (!read_case_number (r, &v->f))
goto eof;
}
else
{
- if (!read_case_string (r, v->s + sv->offset, sv->width))
+ char *s = value_str_rw (v, sv->var_width);
+ if (!read_case_string (r, s + sv->offset, sv->segment_width))
goto eof;
if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
partial_record (r);