+ for (rec = var_recs; rec < &var_recs[n_var_recs]; )
+ {
+ struct variable *var;
+ size_t n_values;
+ char *name;
+ size_t i;
+
+ name = recode_string_pool ("UTF-8", dict_encoding,
+ rec->name, 8, r->pool);
+ name[strcspn (name, " ")] = '\0';
+
+ if (!dict_id_is_valid (dict, name, false)
+ || name[0] == '$' || name[0] == '#')
+ sys_error (r, rec->pos, _("Invalid variable name `%s'."), name);
+
+ if (rec->width < 0 || rec->width > 255)
+ sys_error (r, rec->pos,
+ _("Bad width %d for variable %s."), rec->width, name);
+
+ var = rec->var = dict_create_var (dict, name, rec->width);
+ if (var == NULL)
+ sys_error (r, rec->pos, _("Duplicate variable name `%s'."), name);
+
+ /* Set the short name the same as the long name. */
+ var_set_short_name (var, 0, name);
+
+ /* Get variable label, if any. */
+ if (rec->label)
+ {
+ char *utf8_label;
+
+ utf8_label = recode_string_pool ("UTF-8", dict_encoding,
+ rec->label, -1, r->pool);
+ var_set_label (var, utf8_label, false);
+ }
+
+ /* Set missing values. */
+ if (rec->missing_value_code != 0)
+ {
+ int width = var_get_width (var);
+ struct missing_values mv;
+
+ mv_init_pool (r->pool, &mv, width);
+ if (var_is_numeric (var))
+ {
+ bool has_range = rec->missing_value_code < 0;
+ int n_discrete = (has_range
+ ? rec->missing_value_code == -3
+ : rec->missing_value_code);
+ int ofs = 0;
+
+ if (has_range)
+ {
+ double low = parse_float (r, rec->missing, 0);
+ double high = parse_float (r, rec->missing, 8);
+
+ /* Deal with SPSS 21 change in representation. */
+ if (low == SYSMIS)
+ low = LOWEST;
+
+ mv_add_range (&mv, low, high);
+ ofs += 16;
+ }
+
+ for (i = 0; i < n_discrete; i++)
+ {
+ mv_add_num (&mv, parse_float (r, rec->missing, ofs));
+ ofs += 8;
+ }
+ }
+ else
+ {
+ union value value;
+
+ value_init_pool (r->pool, &value, width);
+ value_set_missing (&value, width);
+ for (i = 0; i < rec->missing_value_code; i++)
+ mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
+ }
+ var_set_missing_values (var, &mv);
+ }
+
+ /* Set formats. */
+ parse_format_spec (r, rec->pos + 12, rec->print_format,
+ PRINT_FORMAT, var, &n_warnings);
+ parse_format_spec (r, rec->pos + 16, rec->write_format,
+ WRITE_FORMAT, var, &n_warnings);
+
+ /* Account for values.
+ Skip long string continuation records, if any. */
+ n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
+ for (i = 1; i < n_values; i++)
+ if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1)
+ sys_error (r, rec->pos, _("Missing string continuation record."));
+ rec += n_values;
+ }
+}
+
+/* Translates the format spec from sysfile format to internal
+ format. */
+static void
+parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format,
+ enum which_format which, struct variable *v,
+ int *n_warnings)
+{
+ const int max_warnings = 8;
+ uint8_t raw_type = format >> 16;
+ uint8_t w = format >> 8;
+ uint8_t d = format;
+ struct fmt_spec f;
+ bool ok;
+
+ f.w = w;
+ f.d = d;
+
+ msg_disable ();
+ ok = (fmt_from_io (raw_type, &f.type)
+ && fmt_check_output (&f)
+ && fmt_check_width_compat (&f, var_get_width (v)));
+ msg_enable ();
+
+ if (ok)
+ {
+ if (which == PRINT_FORMAT)
+ var_set_print_format (v, &f);
+ else
+ var_set_write_format (v, &f);
+ }
+ else if (format == 0)
+ {
+ /* Actually observed in the wild. No point in warning about it. */
+ }
+ else if (++*n_warnings <= max_warnings)
+ {
+ if (which == PRINT_FORMAT)
+ sys_warn (r, pos, _("Variable %s with width %d has invalid print "
+ "format 0x%x."),
+ var_get_name (v), var_get_width (v), format);
+ else
+ sys_warn (r, pos, _("Variable %s with width %d has invalid write "
+ "format 0x%x."),
+ var_get_name (v), var_get_width (v), format);
+
+ if (*n_warnings == max_warnings)
+ sys_warn (r, -1, _("Suppressing further invalid format warnings."));
+ }
+}
+
+static void
+parse_document (struct dictionary *dict, struct sfm_document_record *record)
+{
+ const char *p;
+
+ for (p = record->documents;
+ p < record->documents + DOC_LINE_LENGTH * record->n_lines;
+ p += DOC_LINE_LENGTH)
+ {
+ struct substring line;
+
+ line = recode_substring_pool ("UTF-8", dict_get_encoding (dict),
+ ss_buffer (p, DOC_LINE_LENGTH), NULL);
+ ss_rtrim (&line, ss_cstr (" "));
+ line.string[line.length] = '\0';
+
+ dict_add_document_line (dict, line.string, false);
+
+ ss_dealloc (&line);
+ }
+}
+
+/* Parses record type 7, subtype 3. */
+static void
+parse_machine_integer_info (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ struct sfm_read_info *info)
+{
+ int float_representation, expected_float_format;
+ int integer_representation, expected_integer_format;
+
+ /* Save version info. */
+ info->version_major = parse_int (r, record->data, 0);
+ info->version_minor = parse_int (r, record->data, 4);
+ info->version_revision = parse_int (r, record->data, 8);
+
+ /* Check floating point format. */
+ float_representation = parse_int (r, record->data, 16);
+ if (r->float_format == FLOAT_IEEE_DOUBLE_BE
+ || r->float_format == FLOAT_IEEE_DOUBLE_LE)
+ expected_float_format = 1;
+ else if (r->float_format == FLOAT_Z_LONG)
+ expected_float_format = 2;
+ else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
+ expected_float_format = 3;
+ else
+ NOT_REACHED ();
+ if (float_representation != expected_float_format)
+ sys_error (r, record->pos, _("Floating-point representation indicated by "
+ "system file (%d) differs from expected (%d)."),
+ float_representation, expected_float_format);
+
+ /* Check integer format. */
+ integer_representation = parse_int (r, record->data, 24);
+ if (r->integer_format == INTEGER_MSB_FIRST)
+ expected_integer_format = 1;
+ else if (r->integer_format == INTEGER_LSB_FIRST)
+ expected_integer_format = 2;
+ else
+ NOT_REACHED ();
+ if (integer_representation != expected_integer_format)
+ sys_warn (r, record->pos,
+ _("Integer format indicated by system file (%d) "
+ "differs from expected (%d)."),
+ integer_representation, expected_integer_format);
+
+}
+
+static const char *
+choose_encoding (struct sfm_reader *r,
+ const struct sfm_header_record *header,
+ const struct sfm_extension_record *ext_integer,
+ const struct sfm_extension_record *ext_encoding)
+{
+ /* The EXT_ENCODING record is a more reliable way to determine dictionary
+ encoding. */
+ if (ext_encoding)
+ return ext_encoding->data;
+
+ /* But EXT_INTEGER is better than nothing as a fallback. */
+ if (ext_integer)
+ {
+ int codepage = parse_int (r, ext_integer->data, 7 * 4);
+ const char *encoding;
+
+ switch (codepage)
+ {
+ case 1:
+ return "EBCDIC-US";
+
+ case 2:
+ case 3:
+ /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+ respectively. However, there are known to be many files in the wild
+ with character code 2, yet have data which are clearly not ASCII.
+ Therefore we ignore these values. */
+ break;
+
+ case 4:
+ return "MS_KANJI";
+
+ default:
+ encoding = sys_get_encoding_from_codepage (codepage);
+ if (encoding != NULL)
+ return encoding;
+ break;
+ }
+ }
+
+ /* If the file magic number is EBCDIC then its character data is too. */
+ if (!strcmp (header->magic, EBCDIC_MAGIC))
+ return "EBCDIC-US";
+
+ return locale_charset ();
+}
+
+/* Parses record type 7, subtype 4. */
+static void
+parse_machine_float_info (struct sfm_reader *r,
+ const struct sfm_extension_record *record)
+{
+ double sysmis = parse_float (r, record->data, 0);
+ double highest = parse_float (r, record->data, 8);
+ double lowest = parse_float (r, record->data, 16);
+
+ if (sysmis != SYSMIS)
+ sys_warn (r, record->pos,
+ _("File specifies unexpected value %g (%a) as %s, "
+ "instead of %g (%a)."),
+ sysmis, sysmis, "SYSMIS", SYSMIS, SYSMIS);