+/* Reads a compressed 8-byte string segment from R and stores it in DST. */
+static int
+read_compressed_string (struct sfm_reader *r, uint8_t *dst)
+{
+ int opcode;
+ int retval;
+
+ opcode = read_opcode (r);
+ switch (opcode)
+ {
+ case -1:
+ case 252:
+ return 0;
+
+ case 253:
+ retval = read_compressed_bytes (r, dst, 8);
+ return retval == 1 ? 1 : -1;
+
+ case 254:
+ memset (dst, ' ', 8);
+ return 1;
+
+ default:
+ {
+ double value = opcode - r->bias;
+ float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
+ if (value == 0.0)
+ {
+ /* This has actually been seen "in the wild". The submitter of the
+ file that showed that the contents decoded as spaces, but they
+ were at the end of the field so it's possible that the null
+ bytes just acted as null terminators. */
+ }
+ else if (!r->corruption_warning)
+ {
+ r->corruption_warning = true;
+ sys_warn (r, r->pos,
+ _("Possible compressed data corruption: "
+ "string contains compressed integer (opcode %d)."),
+ opcode);
+ }
+ }
+ return 1;
+ }
+}
+
+/* Reads LENGTH string bytes from R into S. LENGTH must be a multiple of 8.
+ Reads compressed strings if S is compressed. Returns 1 if successful, 0 if
+ end of file is reached immediately, or -1 for some kind of error. */
+static int
+read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
+{
+ assert (length % 8 == 0);
+ if (r->compression == ANY_COMP_NONE)
+ return try_read_bytes (r, s, length);
+ else
+ {
+ size_t ofs;
+
+ for (ofs = 0; ofs < length; ofs += 8)
+ {
+ int retval = read_compressed_string (r, s + ofs);
+ if (retval != 1)
+ {
+ if (ofs != 0)
+ {
+ partial_record (r);
+ return -1;
+ }
+ return retval;
+ }
+ }
+ return 1;
+ }
+}
+
+/* Skips LENGTH string bytes from R.
+ LENGTH must be a multiple of 8.
+ (LENGTH is also limited to 1024, but that's only because the
+ current caller never needs more than that many bytes.)
+ Returns true if successful, false if end of file is
+ reached immediately. */
+static bool
+skip_whole_strings (struct sfm_reader *r, size_t length)
+{
+ uint8_t buffer[1024];
+ assert (length < sizeof buffer);
+ return read_whole_strings (r, buffer, length);
+}
+\f
+/* Helpers for reading records that contain structured text
+ strings. */
+
+/* Maximum number of warnings to issue for a single text
+ record. */
+#define MAX_TEXT_WARNINGS 5
+
+/* State. */
+struct text_record
+ {
+ struct substring buffer; /* Record contents. */
+ off_t start; /* Starting offset in file. */
+ size_t pos; /* Current position in buffer. */
+ int n_warnings; /* Number of warnings issued or suppressed. */
+ bool recoded; /* Recoded into UTF-8? */
+ };
+
+static struct text_record *
+open_text_record (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ bool recode_to_utf8)
+{
+ struct text_record *text;
+ struct substring raw;
+
+ text = pool_alloc (r->pool, sizeof *text);
+ raw = ss_buffer (record->data, record->size * record->count);
+ text->start = record->pos;
+ text->buffer = (recode_to_utf8
+ ? recode_substring_pool ("UTF-8", r->encoding, raw, r->pool)
+ : raw);
+ text->pos = 0;
+ text->n_warnings = 0;
+ text->recoded = recode_to_utf8;
+
+ return text;
+}
+
+/* Closes TEXT, frees its storage, and issues a final warning
+ about suppressed warnings if necessary. */
+static void
+close_text_record (struct sfm_reader *r, struct text_record *text)
+{
+ if (text->n_warnings > MAX_TEXT_WARNINGS)
+ sys_warn (r, -1, _("Suppressed %d additional related warnings."),
+ text->n_warnings - MAX_TEXT_WARNINGS);
+ if (text->recoded)
+ pool_free (r->pool, ss_data (text->buffer));
+}
+
+/* Reads a variable=value pair from TEXT.
+ Looks up the variable in DICT and stores it into *VAR.
+ Stores a null-terminated value into *VALUE. */
+static bool
+read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
+ struct text_record *text,
+ struct variable **var, char **value)
+{
+ for (;;)
+ {
+ if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
+ return false;
+
+ *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
+ if (*value == NULL)
+ return false;
+
+ text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
+ ss_buffer ("\t\0", 2));
+
+ if (*var != NULL)
+ return true;
+ }
+}
+
+static bool
+text_read_variable_name (struct sfm_reader *r, struct dictionary *dict,
+ struct text_record *text, struct substring delimiters,
+ struct variable **var)
+{
+ char *name;
+
+ name = text_get_token (text, delimiters, NULL);
+ if (name == NULL)
+ return false;
+
+ *var = dict_lookup_var (dict, name);
+ if (*var != NULL)
+ return true;
+
+ text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
+ name);
+ return false;
+}
+
+
+static bool
+text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
+ struct text_record *text, struct substring delimiters,
+ struct variable **var)
+{
+ char *short_name = text_get_token (text, delimiters, NULL);
+ if (short_name == NULL)
+ return false;
+
+ *var = dict_lookup_var (dict, short_name);
+ if (*var == NULL)
+ text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
+ short_name);
+ return true;
+}
+
+/* Displays a warning for the current file position, limiting the
+ number to MAX_TEXT_WARNINGS for TEXT. */
+static void
+text_warn (struct sfm_reader *r, struct text_record *text,
+ const char *format, ...)
+{
+ if (text->n_warnings++ < MAX_TEXT_WARNINGS)
+ {
+ va_list args;
+
+ va_start (args, format);
+ sys_msg (r, text->start + text->pos, MW, format, args);
+ va_end (args);
+ }
+}
+
+static char *
+text_get_token (struct text_record *text, struct substring delimiters,
+ char *delimiter)
+{
+ struct substring token;
+ char *end;
+
+ if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
+ {
+ if (delimiter != NULL)
+ *delimiter = ss_data (text->buffer)[text->pos-1];
+ return NULL;
+ }
+
+ end = &ss_data (token)[ss_length (token)];
+ if (delimiter != NULL)
+ *delimiter = *end;
+ *end = '\0';
+ return ss_data (token);
+}
+
+/* Reads a integer value expressed in decimal, then a space, then a string that
+ consists of exactly as many bytes as specified by the integer, then a space,
+ from TEXT. Returns the string, null-terminated, as a subset of TEXT's
+ buffer (so the caller should not free the string). */
+static const char *
+text_parse_counted_string (struct sfm_reader *r, struct text_record *text)
+{
+ size_t start;
+ size_t n;
+ char *s;
+
+ start = text->pos;
+ n = 0;
+ while (text->pos < text->buffer.length)
+ {
+ int c = text->buffer.string[text->pos];
+ if (c < '0' || c > '9')
+ break;
+ n = (n * 10) + (c - '0');
+ text->pos++;
+ }
+ if (text->pos >= text->buffer.length || start == text->pos)
+ {
+ sys_warn (r, text->start,
+ _("Expecting digit at offset %zu in MRSETS record."),
+ text->pos);
+ return NULL;
+ }
+
+ if (!text_match (text, ' '))
+ {
+ sys_warn (r, text->start,
+ _("Expecting space at offset %zu in MRSETS record."),
+ text->pos);
+ return NULL;
+ }
+
+ if (text->pos + n > text->buffer.length)
+ {
+ sys_warn (r, text->start,
+ _("%zu-byte string starting at offset %zu "
+ "exceeds record length %zu."),
+ n, text->pos, text->buffer.length);
+ return NULL;
+ }
+
+ s = &text->buffer.string[text->pos];
+ if (s[n] != ' ')
+ {
+ sys_warn (r, text->start,
+ _("Expecting space at offset %zu following %zu-byte string."),
+ text->pos + n, n);
+ return NULL;
+ }
+ s[n] = '\0';
+ text->pos += n + 1;
+ return s;
+}
+
+static bool
+text_match (struct text_record *text, char c)
+{
+ if (text->pos >= text->buffer.length)
+ return false;
+
+ if (text->buffer.string[text->pos] == c)
+ {
+ text->pos++;
+ return true;
+ }
+ else
+ return false;
+}
+
+/* Returns the current byte offset (as converted to UTF-8, if it was converted)
+ inside the TEXT's string. */
+static size_t
+text_pos (const struct text_record *text)
+{
+ return text->pos;
+}
+
+static const char *
+text_get_all (const struct text_record *text)
+{
+ return text->buffer.string;
+}
+\f
+/* Messages. */
+
+/* Displays a corruption message. */
+static void
+sys_msg (struct sfm_reader *r, off_t offset,
+ int class, const char *format, va_list args)
+{
+ struct string text;
+
+ ds_init_empty (&text);
+ if (offset >= 0)
+ ds_put_format (&text, _("`%s' near offset 0x%llx: "),
+ fh_get_file_name (r->fh), (long long int) offset);
+ else
+ ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
+ ds_put_vformat (&text, format, args);
+
+ struct msg *m = xmalloc (sizeof *m);
+ *m = (struct msg) {
+ .category = msg_class_to_category (class),
+ .severity = msg_class_to_severity (class),
+ .text = ds_steal_cstr (&text),
+ };
+ msg_emit (m);
+}
+
+/* Displays a warning for offset OFFSET in the file. */
+static void
+sys_warn (struct sfm_reader *r, off_t offset, const char *format, ...)
+{
+ va_list args;
+
+ va_start (args, format);
+ sys_msg (r, offset, MW, format, args);
+ va_end (args);
+}
+
+/* Displays an error for the current file position and marks it as in an error
+ state. */
+static void
+sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
+{
+ va_list args;
+
+ va_start (args, format);
+ sys_msg (r, offset, ME, format, args);
+ va_end (args);
+
+ r->error = true;
+}
+\f
+/* Reads BYTE_CNT bytes into BUF.
+ Returns 1 if exactly BYTE_CNT bytes are successfully read.
+ Returns -1 if an I/O error or a partial read occurs.
+ Returns 0 for an immediate end-of-file and, if EOF_IS_OK is false, reports
+ an error. */
+static inline int
+read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
+ void *buf, size_t n_bytes)
+{
+ size_t bytes_read = fread (buf, 1, n_bytes, r->file);
+ r->pos += bytes_read;
+ if (bytes_read == n_bytes)
+ return 1;
+ else if (ferror (r->file))
+ {
+ sys_error (r, r->pos, _("System error: %s."), strerror (errno));
+ return -1;
+ }
+ else if (!eof_is_ok || bytes_read != 0)
+ {
+ sys_error (r, r->pos, _("Unexpected end of file."));
+ return -1;
+ }
+ else
+ return 0;
+}
+
+/* Reads BYTE_CNT into BUF.
+ Returns true if successful.
+ Returns false upon I/O error or if end-of-file is encountered. */
+static bool
+read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes)
+{
+ return read_bytes_internal (r, false, buf, n_bytes) == 1;
+}
+
+/* Reads BYTE_CNT bytes into BUF.
+ Returns 1 if exactly BYTE_CNT bytes are successfully read.
+ Returns 0 if an immediate end-of-file is encountered.
+ Returns -1 if an I/O error or a partial read occurs. */
+static int
+try_read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes)
+{
+ return read_bytes_internal (r, true, buf, n_bytes);
+}
+
+/* Reads a 32-bit signed integer from R and stores its value in host format in
+ *X. Returns true if successful, otherwise false. */
+static bool
+read_int (struct sfm_reader *r, int *x)
+{
+ uint8_t integer[4];
+ if (read_bytes (r, integer, sizeof integer) != 1)
+ return false;
+ *x = integer_get (r->integer_format, integer, sizeof integer);
+ return true;
+}
+
+static bool
+read_uint (struct sfm_reader *r, unsigned int *x)
+{
+ bool ok;
+ int y;
+
+ ok = read_int (r, &y);
+ *x = y;
+ return ok;
+}
+
+/* Reads a 64-bit signed integer from R and returns its value in
+ host format. */
+static bool
+read_int64 (struct sfm_reader *r, long long int *x)
+{
+ uint8_t integer[8];
+ if (read_bytes (r, integer, sizeof integer) != 1)
+ return false;
+ *x = integer_get (r->integer_format, integer, sizeof integer);
+ return true;
+}
+
+/* Reads a 64-bit signed integer from R and returns its value in
+ host format. */