+ /* Set formats. */
+ parse_format_spec (r, rec->pos + 12, rec->print_format,
+ PRINT_FORMAT, var, &n_warnings);
+ parse_format_spec (r, rec->pos + 16, rec->write_format,
+ WRITE_FORMAT, var, &n_warnings);
+
+ /* Account for values.
+ Skip long string continuation records, if any. */
+ n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
+ for (i = 1; i < n_values; i++)
+ if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1)
+ sys_error (r, rec->pos, _("Missing string continuation record."));
+ rec += n_values;
+ }
+}
+
+/* Translates the format spec from sysfile format to internal
+ format. */
+static void
+parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format,
+ enum which_format which, struct variable *v,
+ int *n_warnings)
+{
+ const int max_warnings = 8;
+ uint8_t raw_type = format >> 16;
+ uint8_t w = format >> 8;
+ uint8_t d = format;
+ struct fmt_spec f;
+ bool ok;
+
+ f.w = w;
+ f.d = d;
+
+ msg_disable ();
+ ok = (fmt_from_io (raw_type, &f.type)
+ && fmt_check_output (&f)
+ && fmt_check_width_compat (&f, var_get_width (v)));
+ msg_enable ();
+
+ if (ok)
+ {
+ if (which == PRINT_FORMAT)
+ var_set_print_format (v, &f);
+ else
+ var_set_write_format (v, &f);
+ }
+ else if (format == 0)
+ {
+ /* Actually observed in the wild. No point in warning about it. */
+ }
+ else if (++*n_warnings <= max_warnings)
+ {
+ if (which == PRINT_FORMAT)
+ sys_warn (r, pos, _("Variable %s with width %d has invalid print "
+ "format 0x%x."),
+ var_get_name (v), var_get_width (v), format);
+ else
+ sys_warn (r, pos, _("Variable %s with width %d has invalid write "
+ "format 0x%x."),
+ var_get_name (v), var_get_width (v), format);
+
+ if (*n_warnings == max_warnings)
+ sys_warn (r, -1, _("Suppressing further invalid format warnings."));
+ }
+}
+
+static void
+parse_document (struct dictionary *dict, struct sfm_document_record *record)
+{
+ const char *p;
+
+ for (p = record->documents;
+ p < record->documents + DOC_LINE_LENGTH * record->n_lines;
+ p += DOC_LINE_LENGTH)
+ {
+ struct substring line;
+
+ line = recode_substring_pool ("UTF-8", dict_get_encoding (dict),
+ ss_buffer (p, DOC_LINE_LENGTH), NULL);
+ ss_rtrim (&line, ss_cstr (" "));
+ line.string[line.length] = '\0';
+
+ dict_add_document_line (dict, line.string, false);
+
+ ss_dealloc (&line);
+ }
+}
+
+/* Parses record type 7, subtype 3. */
+static void
+parse_machine_integer_info (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ struct sfm_read_info *info)
+{
+ int float_representation, expected_float_format;
+ int integer_representation, expected_integer_format;
+
+ /* Save version info. */
+ info->version_major = parse_int (r, record->data, 0);
+ info->version_minor = parse_int (r, record->data, 4);
+ info->version_revision = parse_int (r, record->data, 8);
+
+ /* Check floating point format. */
+ float_representation = parse_int (r, record->data, 16);
+ if (r->float_format == FLOAT_IEEE_DOUBLE_BE
+ || r->float_format == FLOAT_IEEE_DOUBLE_LE)
+ expected_float_format = 1;
+ else if (r->float_format == FLOAT_Z_LONG)
+ expected_float_format = 2;
+ else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
+ expected_float_format = 3;
+ else
+ NOT_REACHED ();
+ if (float_representation != expected_float_format)
+ sys_error (r, record->pos, _("Floating-point representation indicated by "
+ "system file (%d) differs from expected (%d)."),
+ float_representation, expected_float_format);
+
+ /* Check integer format. */
+ integer_representation = parse_int (r, record->data, 24);
+ if (r->integer_format == INTEGER_MSB_FIRST)
+ expected_integer_format = 1;
+ else if (r->integer_format == INTEGER_LSB_FIRST)
+ expected_integer_format = 2;
+ else
+ NOT_REACHED ();
+ if (integer_representation != expected_integer_format)
+ sys_warn (r, record->pos,
+ _("Integer format indicated by system file (%d) "
+ "differs from expected (%d)."),
+ integer_representation, expected_integer_format);
+
+}
+
+static const char *
+choose_encoding (struct sfm_reader *r,
+ const struct sfm_header_record *header,
+ const struct sfm_extension_record *ext_integer,
+ const struct sfm_extension_record *ext_encoding)
+{
+ /* The EXT_ENCODING record is a more reliable way to determine dictionary
+ encoding. */
+ if (ext_encoding)
+ return ext_encoding->data;
+
+ /* But EXT_INTEGER is better than nothing as a fallback. */
+ if (ext_integer)
+ {
+ int codepage = parse_int (r, ext_integer->data, 7 * 4);
+ const char *encoding;
+
+ switch (codepage)
+ {
+ case 1:
+ return "EBCDIC-US";
+
+ case 2:
+ case 3:
+ /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+ respectively. However, there are known to be many files in the wild
+ with character code 2, yet have data which are clearly not ASCII.
+ Therefore we ignore these values. */
+ break;
+
+ case 4:
+ return "MS_KANJI";
+
+ default:
+ encoding = sys_get_encoding_from_codepage (codepage);
+ if (encoding != NULL)
+ return encoding;
+ break;
+ }
+ }
+
+ /* If the file magic number is EBCDIC then its character data is too. */
+ if (!strcmp (header->magic, EBCDIC_MAGIC))
+ return "EBCDIC-US";
+
+ return locale_charset ();
+}
+
+/* Parses record type 7, subtype 4. */
+static void
+parse_machine_float_info (struct sfm_reader *r,
+ const struct sfm_extension_record *record)
+{
+ double sysmis = parse_float (r, record->data, 0);
+ double highest = parse_float (r, record->data, 8);
+ double lowest = parse_float (r, record->data, 16);
+
+ if (sysmis != SYSMIS)
+ sys_warn (r, record->pos,
+ _("File specifies unexpected value %g (%a) as %s, "
+ "instead of %g (%a)."),
+ sysmis, sysmis, "SYSMIS", SYSMIS, SYSMIS);
+
+ if (highest != HIGHEST)
+ sys_warn (r, record->pos,
+ _("File specifies unexpected value %g (%a) as %s, "
+ "instead of %g (%a)."),
+ highest, highest, "HIGHEST", HIGHEST, HIGHEST);
+
+ /* SPSS before version 21 used a unique value just bigger than SYSMIS as
+ LOWEST. SPSS 21 uses SYSMIS for LOWEST, which is OK because LOWEST only
+ appears in a context (missing values) where SYSMIS cannot. */
+ if (lowest != LOWEST && lowest != SYSMIS)
+ sys_warn (r, record->pos,
+ _("File specifies unexpected value %g (%a) as %s, "
+ "instead of %g (%a) or %g (%a)."),
+ lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS);
+}
+
+/* Parses record type 7, subtype 10. */
+static void
+parse_extra_product_info (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ struct sfm_read_info *info)
+{
+ struct text_record *text;
+
+ text = open_text_record (r, record, true);
+ info->product_ext = fix_line_ends (text_get_all (text));
+ close_text_record (r, text);
+}
+
+/* Parses record type 7, subtype 7 or 19. */
+static void
+parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record,
+ struct dictionary *dict)
+{
+ struct text_record *text;
+ struct mrset *mrset;
+
+ text = open_text_record (r, record, false);
+ for (;;)
+ {
+ const char *counted = NULL;
+ const char *name;
+ const char *label;
+ struct stringi_set var_names;
+ size_t allocated_vars;
+ char delimiter;
+ int width;
+
+ mrset = xzalloc (sizeof *mrset);
+
+ name = text_get_token (text, ss_cstr ("="), NULL);
+ if (name == NULL)
+ break;
+ mrset->name = recode_string ("UTF-8", r->encoding, name, -1);
+
+ if (mrset->name[0] != '$')
+ {
+ sys_warn (r, record->pos,
+ _("`%s' does not begin with `$' at offset %zu "
+ "in MRSETS record."), mrset->name, text_pos (text));
+ break;
+ }
+
+ if (text_match (text, 'C'))
+ {
+ mrset->type = MRSET_MC;
+ if (!text_match (text, ' '))
+ {
+ sys_warn (r, record->pos,
+ _("Missing space following `%c' at offset %zu "
+ "in MRSETS record."), 'C', text_pos (text));
+ break;
+ }
+ }
+ else if (text_match (text, 'D'))
+ {
+ mrset->type = MRSET_MD;
+ mrset->cat_source = MRSET_VARLABELS;
+ }
+ else if (text_match (text, 'E'))
+ {
+ char *number;
+
+ mrset->type = MRSET_MD;
+ mrset->cat_source = MRSET_COUNTEDVALUES;
+ if (!text_match (text, ' '))
+ {
+ sys_warn (r, record->pos,
+ _("Missing space following `%c' at offset %zu "
+ "in MRSETS record."), 'E', text_pos (text));
+ break;
+ }
+
+ number = text_get_token (text, ss_cstr (" "), NULL);
+ if (!strcmp (number, "11"))
+ mrset->label_from_var_label = true;
+ else if (strcmp (number, "1"))
+ sys_warn (r, record->pos,
+ _("Unexpected label source value `%s' following `E' "
+ "at offset %zu in MRSETS record."),
+ number, text_pos (text));
+ }
+ else
+ {
+ sys_warn (r, record->pos,
+ _("Missing `C', `D', or `E' at offset %zu "
+ "in MRSETS record."),
+ text_pos (text));
+ break;
+ }
+
+ if (mrset->type == MRSET_MD)
+ {
+ counted = text_parse_counted_string (r, text);
+ if (counted == NULL)
+ break;
+ }
+
+ label = text_parse_counted_string (r, text);
+ if (label == NULL)
+ break;
+ if (label[0] != '\0')
+ mrset->label = recode_string ("UTF-8", r->encoding, label, -1);
+
+ stringi_set_init (&var_names);
+ allocated_vars = 0;
+ width = INT_MAX;
+ do
+ {
+ const char *raw_var_name;
+ struct variable *var;
+ char *var_name;
+
+ raw_var_name = text_get_token (text, ss_cstr (" \n"), &delimiter);
+ if (raw_var_name == NULL)
+ {
+ sys_warn (r, record->pos,
+ _("Missing new-line parsing variable names "
+ "at offset %zu in MRSETS record."),
+ text_pos (text));
+ break;
+ }
+ var_name = recode_string ("UTF-8", r->encoding, raw_var_name, -1);
+
+ var = dict_lookup_var (dict, var_name);
+ if (var == NULL)
+ {
+ free (var_name);
+ continue;
+ }
+ if (!stringi_set_insert (&var_names, var_name))
+ {
+ sys_warn (r, record->pos,
+ _("Duplicate variable name %s "
+ "at offset %zu in MRSETS record."),
+ var_name, text_pos (text));
+ free (var_name);
+ continue;
+ }
+ free (var_name);
+
+ if (mrset->label == NULL && mrset->label_from_var_label
+ && var_has_label (var))
+ mrset->label = xstrdup (var_get_label (var));
+
+ if (mrset->n_vars
+ && var_get_type (var) != var_get_type (mrset->vars[0]))
+ {
+ sys_warn (r, record->pos,
+ _("MRSET %s contains both string and "
+ "numeric variables."), name);
+ continue;
+ }
+ width = MIN (width, var_get_width (var));
+
+ if (mrset->n_vars >= allocated_vars)
+ mrset->vars = x2nrealloc (mrset->vars, &allocated_vars,
+ sizeof *mrset->vars);
+ mrset->vars[mrset->n_vars++] = var;
+ }
+ while (delimiter != '\n');
+
+ if (mrset->n_vars < 2)
+ {
+ sys_warn (r, record->pos,
+ _("MRSET %s has only %zu variables."), mrset->name,
+ mrset->n_vars);
+ mrset_destroy (mrset);
+ stringi_set_destroy (&var_names);
+ continue;
+ }
+
+ if (mrset->type == MRSET_MD)
+ {
+ mrset->width = width;
+ value_init (&mrset->counted, width);
+ if (width == 0)
+ mrset->counted.f = c_strtod (counted, NULL);
+ else
+ value_copy_str_rpad (&mrset->counted, width,
+ (const uint8_t *) counted, ' ');
+ }
+
+ dict_add_mrset (dict, mrset);
+ mrset = NULL;
+ stringi_set_destroy (&var_names);
+ }
+ mrset_destroy (mrset);
+ close_text_record (r, text);