Merge commit 'origin/stable'

author John Darrington <john@darrington.wattle.id.au>

Wed, 16 Dec 2009 20:09:55 +0000 (21:09 +0100)

committer John Darrington <john@darrington.wattle.id.au>

Wed, 16 Dec 2009 20:09:55 +0000 (21:09 +0100)
author John Darrington <john@darrington.wattle.id.au>
Wed, 16 Dec 2009 20:09:55 +0000 (21:09 +0100)
committer John Darrington <john@darrington.wattle.id.au>
Wed, 16 Dec 2009 20:09:55 +0000 (21:09 +0100)
diff --combined NEWS

index 1aac688c8736a3b66322cf993fbc0f195ee9920f,f99fb17374fd2baf74b409df9b4d37fdbaf1a306..149d1deadbad2b44269d9a6b787ac9ed9ed10c80
--- 1/NEWS
--- 2/NEWS
+++ b/NEWS
@@@ -1,61 -1,11 +1,60 @@@
   PSPP NEWS -- history of user-visible changes.
- Time-stamp: <2009-09-08 21:08:29 blp>
+ Time-stamp: <2009-10-06 20:46:21 blp>
   Copyright (C) 1996-9, 2000, 2008, 2009 Free Software Foundation, Inc.
   See the end for copying conditions.
   
   Please send PSPP bug reports to bug-gnu-pspp@gnu.org.
   
- -Changes from 0.6.1 to 0.6.2-rc1:
+ +Changes from 0.7.1 to 0.7.2:
+ +
+ + * Updated Perl module interface.
+ +
+ + * Value labels for long string variables are now supported.
+ +
+ + * Missing values for long string variables are now supported.
+ +
+ +Changes from 0.7.0 to 0.7.1:
+ +
+ + *  Added a perl module to facilitate reading and writing of pspp system 
+ +    files from perl programs.
+ +
+ +Changes from 0.6.2-pre6 to 0.7.0:
+ +
+ +  * Custom variable and data file attributes are now supported.
+ +    Commands VARIABLE ATTRIBUTE and DATAFILE ATTRIBUTE have been added
+ +    for setting and clear attributes.  Support for attributes has also
+ +    been added to commands that read and write system files, such as
+ +    SAVE and GET, as well as to the DISPLAY command.
+ +
+ +  * Numererous improvements to the Graphical User Interface have
+ +    made.  Notable improvements include:
+ +
+ +    - Non-Ascii characters in strings, labels and variable names are
+ +      now supported.
+ +
+ +    - A "Split Window" function is available, which makes it easier to
+ +      see different parts of a large data file.
+ +
+ +    - Data files can now be opened by specifing their name as the first
+ +      argument.  This means that on a properly configured desktop, double
+ +      clicking on an icon will open the file.
+ +    
+ +
+ +  * New statistical procedures:
+ +    - CORRELATIONS
+ +    - ROC
+ +    - RELIABILITY
+ +
+ +    NPAR TESTS now supports the WILCOXON and SIGN subcommands.
+ +
+ +    The CROSSTABS command has been completely re-implemented to fix numerous bugs.
+ +
+ +  * Three new commands to combine data files have been added: MATCH FILES,
+ +   UPDATE and  ADD FILES.
+ +
+ +  * A tutorial chapter has been added to the user manual.
+ +
- 
- Changes from 0.6.1 to 0.6.2-pre6:
++Changes from 0.6.1 to 0.6.2
   
     * New translations:
   
@@@ -77,6 -27,8 +76,8 @@@
   
     * Build fixes and changes:
   
+     - Fix build with GTK+ 2.17.4 and later.
+ 
       - Make running "make" after running "configure" with different
         settings reliably rebuild version.c.
   
@@@ -100,6 -52,9 +101,9 @@@
   
       - Fix writing corrupted .sav files on Windows.
   
+     - Fix writing variable labels longer than 252 bytes to save files.
+       Thanks to Robert Westlund for reporting this bug.
+ 
       - Fix writing corrupted .por files (bug #26034).
   
       - Fix reading .por files whose initial lines are not padded out
diff --combined doc/dev/system-file-format.texi

index a404d0d6ce3961eed3ca1ec134a2ff9f6140adac,b1be385334bf5ef3f8aa1adfcfdecb10d9adaf3d..c1d1e42129a02c5e7fb7dfbf9f6daa528c2550dc
--- 1/doc/dev/system-file-format.texi
--- 2/doc/dev/system-file-format.texi
+++ b/doc/dev/system-file-format.texi
@@@ -96,9 -96,6 +96,9 @@@ Each type of record is described separa
   * Variable Display Parameter Record::
   * Long Variable Names Record::
   * Very Long String Record::
+ +* Character Encoding Record::
+ +* Long String Value Labels Record::
+ +* Data File and Variable Attributes Records::
   * Miscellaneous Informational Records::
   * Dictionary Termination Record::
   * Data Record::
@@@ -289,20 -286,15 +289,20 @@@ length @code{label_len}, rounded up to 
   The first @code{label_len} characters are the variable's variable label.
   
   @item flt64 missing_values[];
- -This field is present only if @code{n_missing_values} is not 0.  It has
- -the same number of elements as the absolute value of
- -@code{n_missing_values}.  For discrete missing values, each element
- -represents one missing value.  When a range is present, the first
- -element denotes the minimum value in the range, and the second element
- -denotes the maximum value in the range.  When a range plus a value are
- -present, the third element denotes the additional discrete missing
- -value.  HIGHEST and LOWEST are indicated as described in the chapter
- -introduction.
+ +This field is present only if @code{n_missing_values} is nonzero.  It
+ +has the same number of 8-byte elements as the absolute value of
+ +@code{n_missing_values}.  Each element is interpreted as a number for
+ +numeric variables (with HIGHEST and LOWEST indicated as described in
+ +the chapter introduction).  For string variables of width less than 8
+ +bytes, elements are right-padded with spaces; for string variables
+ +wider than 8 bytes, only the first 8 bytes of each missing value are
+ +specified, with the remainder implicitly all spaces.
+ +
+ +For discrete missing values, each element represents one missing
+ +value.  When a range is present, the first element denotes the minimum
+ +value in the range, and the second element denotes the maximum value
+ +in the range.  When a range plus a value are present, the third
+ +element denotes the additional discrete missing value.
   @end table
   
   The @code{print} and @code{write} members of sysfile_variable are output
@@@ -404,11 -396,6 +404,11 @@@ Format types are defined as follows
   @node Value Labels Records
   @section Value Labels Records
   
+ +The value label records documented in this section are used for
+ +numeric and short string variables only.  Long string variables may
+ +have value labels, but their value labels are recorded using a
+ +different record type (@pxref{Long String Value Labels Record}).
+ +
   The value label record has the following format:
   
   @example
@@@ -469,7 -456,7 +469,7 @@@ A list of dictionary indexes of variabl
   labels (@pxref{Dictionary Index}).  There are @code{var_count}
   elements.
   
- -String variables wider than 8 bytes may not have value labels.
+ +String variables wider than 8 bytes may not be specified in this list.
   @end table
   
   @node Document Record
@@@ -558,14 -545,9 +558,14 @@@ Compression code.  Always set to 1
   Machine endianness.  1 indicates big-endian, 2 indicates little-endian.
   
   @item int32 character_code;
+ +@anchor{character-code}
   Character code.  1 indicates EBCDIC, 2 indicates 7-bit ASCII, 3
   indicates 8-bit ASCII, 4 indicates DEC Kanji.
   Windows code page numbers are also valid.
+ +
+ +Experience has shown that in many files, this field is ignored or incorrect.
+ +For a more reliable indication of the file's character encoding
+ +see @ref{Character Encoding Record}.
   @end table
   
   @node Machine Floating-Point Info Record
@@@ -809,197 -791,6 +809,197 @@@ After the last tuple, there may be a si
   The total length is @code{count} bytes.
   @end table
   
+ +@node Character Encoding Record
+ +@section Character Encoding Record
+ +
+ +This record, if present, indicates the character encoding for string data,
+ +long variable names, variable labels, value labels and other strings in the
+ +file.
+ +
+ +@example
+ +/* @r{Header.} */
+ +int32               rec_type;
+ +int32               subtype;
+ +int32               size;
+ +int32               count;
+ +
+ +/* @r{Exactly @code{count} bytes of data.} */
+ +char                encoding[];
+ +@end example
+ +
+ +@table @code
+ +@item int32 rec_type;
+ +Record type.  Always set to 7.
+ +
+ +@item int32 subtype;
+ +Record subtype.  Always set to 20.
+ +
+ +@item int32 size;
+ +The size of each element in the @code{encoding} member. Always set to 1.
+ +
+ +@item int32 count;
+ +The total number of bytes in @code{encoding}.
+ +
+ +@item char encoding[];
+ +The name of the character encoding.  Normally this will be an official IANA characterset name or alias.
+ +See @url{http://www.iana.org/assignments/character-sets}.
+ +@end table
+ +
+ +This record is not present in files generated by older software.
+ +See also @ref{character-code}.
+ +
+ +@node Long String Value Labels Record
+ +@section Long String Value Labels Record
+ +
+ +This record, if present, specifies value labels for long string
+ +variables.
+ +
+ +@example
+ +/* @r{Header.} */
+ +int32               rec_type;
+ +int32               subtype;
+ +int32               size;
+ +int32               count;
+ +
+ +/* @r{Repeated up to exactly @code{count} bytes.} */
+ +int32               var_name_len;
+ +char                var_name[];
+ +int32               var_width;
+ +int32               n_labels;
+ +long_string_label   labels[];
+ +@end example
+ +
+ +@table @code
+ +@item int32 rec_type;
+ +Record type.  Always set to 7.
+ +
+ +@item int32 subtype;
+ +Record subtype.  Always set to 21.
+ +
+ +@item int32 size;
+ +Always set to 1.
+ +
+ +@item int32 count;
+ +The number of bytes following the header until the next header.
+ +
+ +@item int32 var_name_len;
+ +@itemx char var_name[];
+ +The number of bytes in the name of the variable that has long string
+ +value labels, plus the variable name itself, which consists of exactly
+ +@code{var_name_len} bytes.  The variable name is not padded to any
+ +particular boundary, nor is it null-terminated.
+ +
+ +@item int32 var_width;
+ +The width of the variable, in bytes, which will be between 9 and
+ +32767.
+ +
+ +@item int32 n_labels;
+ +@itemx long_string_label labels[];
+ +The long string labels themselves.  The @code{labels} array contains
+ +exactly @code{n_labels} elements, each of which has the following
+ +substructure:
+ +
+ +@example
+ +int32               value_len;
+ +char                value[];
+ +int32               label_len;
+ +char                label[];
+ +@end example
+ +
+ +@table @code
+ +@item int32 value_len;
+ +@itemx char value[];
+ +The string value being labeled.  @code{value_len} is the number of
+ +bytes in @code{value}; it is equal to @code{var_width}.  The
+ +@code{value} array is not padded or null-terminated.
+ +
+ +@item int32 label_len;
+ +@itemx char label[];
+ +The label for the string value.  @code{label_len}, which must be
+ +between 0 and 120, is the number of bytes in @code{label}.  The
+ +@code{label} array is not padded or null-terminated.
+ +@end table
+ +@end table
+ +
+ +@node Data File and Variable Attributes Records
+ +@section Data File and Variable Attributes Records
+ +
+ +The data file and variable attributes records represent custom
+ +attributes for the system file or for individual variables in the
+ +system file, as defined on the DATAFILE ATTRIBUTE (@pxref{DATAFILE
+ +ATTRIBUTE,,,pspp, PSPP Users Guide}) and VARIABLE ATTRIBUTE commands
+ +(@pxref{VARIABLE ATTRIBUTE,,,pspp, PSPP Users Guide}), respectively.
+ +
+ +@example
+ +/* @r{Header.} */
+ +int32               rec_type;
+ +int32               subtype;
+ +int32               size;
+ +int32               count;
+ +
+ +/* @r{Exactly @code{count} bytes of data.} */
+ +char                attributes[];
+ +@end example
+ +
+ +@table @code
+ +@item int32 rec_type;
+ +Record type.  Always set to 7.
+ +
+ +@item int32 subtype;
+ +Record subtype.  Always set to 17 for a data file attribute record or
+ +to 18 for a variable attributes record.
+ +
+ +@item int32 size;
+ +The size of each element in the @code{attributes} member. Always set to 1.
+ +
+ +@item int32 count;
+ +The total number of bytes in @code{attributes}.
+ +
+ +@item char attributes[];
+ +The attributes, in a text-based format.
+ +
+ +In record type 17, this field contains a single attribute set.  An
+ +attribute set is a sequence of one or more attributes concatenated
+ +together.  Each attribute consists of a name, which has the same
+ +syntax as a variable name, followed by, inside parentheses, a sequence
+ +of one or more values.  Each value consists of a string enclosed in
+ +single quotes (@code{'}) followed by a line feed (byte 0x0a).  A value
+ +may contain single quote characters, which are not themselves escaped
+ +or quoted or required to be present in pairs.  There is no apparent
+ +way to embed a line feed in a value.  There is no distinction between
+ +an attribute with a single value and an attribute array with one
+ +element.
+ +
+ +In record type 18, this field contains a sequence of one or more
+ +variable attribute sets.  If more than one variable attribute set is
+ +present, each one after the first is delimited from the previous by
+ +@code{/}.  Each variable attribute set consists of a variable name,
+ +followed by @code{:}, followed by an attribute set with the same
+ +syntax as on record type 17.
+ +
+ +The total length is @code{count} bytes.
+ +@end table
+ +
+ +@subheading Example
+ +
+ +A system file produced with the following VARIABLE ATTRIBUTE commands
+ +in effect:
+ +
+ +@example
+ +VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=fred[1]('23') fred[2]('34').
+ +VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=bert('123').
+ +@end example
+ +
+ +@noindent
+ +will contain a variable attribute record with the following contents:
+ +
+ +@example
+ +00000000  07 00 00 00 12 00 00 00  01 00 00 00 22 00 00 00  |............"...|
+ +00000010  64 75 6d 6d 79 3a 66 72  65 64 28 27 32 33 27 0a  |dummy:fred('23'.|
+ +00000020  27 33 34 27 0a 29 62 65  72 74 28 27 31 32 33 27  |'34'.)bert('123'|
+ +00000030  0a 29                                             |.)              |
+ +@end example
+ +
   @node Miscellaneous Informational Records
   @section Miscellaneous Informational Records
   
@@@ -1093,6 -884,9 +1093,9 @@@ value @var{code} - @var{bias}, wher
   variable @code{bias} from the file header.  For example,
   code 105 with bias 100.0 (the normal value) indicates a numeric variable
   of value 5.
+ One file has been seen written by SPSS 14 that contained such a code
+ in a @emph{string} field with the value 0 (after the bias is
+ subtracted) as a way of encoding null bytes.
   
   @item 252
   End of file.  This code may or may not appear at the end of the data
diff --combined src/data/sys-file-reader.c

index b024e4f0ef81532d51f7024a9b2ea352efba84f6,8d973e4dbc103d09860e4c14cc12a006fda0bfcd..f63a122fe83b96776b632cef57b51e49c83c4bd1
--- 1/src/data/sys-file-reader.c
--- 2/src/data/sys-file-reader.c
+++ b/src/data/sys-file-reader.c
@@@ -25,7 -25,6 +25,7 @@@
   #include <setjmp.h>
   #include <stdlib.h>
   
+ +#include <libpspp/i18n.h>
   #include <libpspp/assertion.h>
   #include <libpspp/message.h>
   #include <libpspp/compiler.h>
@@@ -35,7 -34,6 +35,7 @@@
   #include <libpspp/hash.h>
   #include <libpspp/array.h>
   
+ +#include <data/attributes.h>
   #include <data/case.h>
   #include <data/casereader-provider.h>
   #include <data/casereader.h>
@@@ -72,7 -70,7 +72,7 @@@ struct sfm_reade
       struct fh_lock *lock;       /* Mutual exclusion for file handle. */
       FILE *file;                 /* File stream. */
       bool error;                 /* I/O or corruption error? */
- -    size_t value_cnt;           /* Number of "union value"s in struct case. */
+ +    struct caseproto *proto;    /* Format of output cases. */
   
       /* File format. */
       enum integer_format integer_format; /* On-disk integer format. */
@@@ -88,6 -86,7 +88,7 @@@
       double bias;              /* Compression bias, usually 100.0. */
       uint8_t opcodes[8];         /* Current block of opcodes. */
       size_t opcode_idx;          /* Next opcode to interpret, 8 if none left. */
+     bool corruption_warning;    /* Warned about possible corruption? */
     };
   
   static const struct casereader_class sys_file_casereader_class;
@@@ -100,11 -99,9 +101,11 @@@ static struct variable *lookup_var_by_v
                                                    struct variable **,
                                                    int value_idx);
   
+ +static void sys_msg (struct sfm_reader *r, int class,
+ +                     const char *format, va_list args)
+ +     PRINTF_FORMAT (3, 0);
   static void sys_warn (struct sfm_reader *, const char *, ...)
        PRINTF_FORMAT (2, 3);
- -
   static void sys_error (struct sfm_reader *, const char *, ...)
        PRINTF_FORMAT (2, 3)
        NO_RETURN;
@@@ -116,23 -113,15 +117,23 @@@ static double read_float (struct sfm_re
   static void read_string (struct sfm_reader *, char *, size_t);
   static void skip_bytes (struct sfm_reader *, size_t);
   
- -static struct variable_to_value_map *open_variable_to_value_map (
- -  struct sfm_reader *, size_t size);
- -static void close_variable_to_value_map (struct sfm_reader *r,
- -                                         struct variable_to_value_map *);
- -static bool read_variable_to_value_map (struct sfm_reader *,
- -                                        struct dictionary *,
- -                                        struct variable_to_value_map *,
- -                                        struct variable **var, char **value,
- -                                        int *warning_cnt);
+ +static struct text_record *open_text_record (struct sfm_reader *, size_t size);
+ +static void close_text_record (struct sfm_reader *r,
+ +                               struct text_record *);
+ +static bool read_variable_to_value_pair (struct sfm_reader *,
+ +                                         struct dictionary *,
+ +                                         struct text_record *,
+ +                                         struct variable **var, char **value);
+ +static void text_warn (struct sfm_reader *r, struct text_record *text,
+ +                       const char *format, ...)
+ +  PRINTF_FORMAT (3, 4);
+ +static char *text_get_token (struct text_record *,
+ +                             struct substring delimiters);
+ +static bool text_match (struct text_record *, char c);
+ +static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
+ +                                  struct text_record *,
+ +                                  struct substring delimiters,
+ +                                  struct variable **);
   
   static bool close_reader (struct sfm_reader *r);
   \f
@@@ -163,9 -152,7 +164,9 @@@ static void read_extension_record (stru
                                      struct sfm_read_info *);
   static void read_machine_integer_info (struct sfm_reader *,
                                          size_t size, size_t count,
- -                                       struct sfm_read_info *);
+ +                                       struct sfm_read_info *,
+ +                                     struct dictionary *
+ +                                     );
   static void read_machine_float_info (struct sfm_reader *,
                                        size_t size, size_t count);
   static void read_display_parameters (struct sfm_reader *,
@@@ -177,71 -164,7 +178,71 @@@ static void read_long_var_name_map (str
   static void read_long_string_map (struct sfm_reader *,
                                     size_t size, size_t count,
                                     struct dictionary *);
+ +static void read_data_file_attributes (struct sfm_reader *,
+ +                                       size_t size, size_t count,
+ +                                       struct dictionary *);
+ +static void read_variable_attributes (struct sfm_reader *,
+ +                                      size_t size, size_t count,
+ +                                      struct dictionary *);
+ +static void read_long_string_value_labels (struct sfm_reader *,
+ +                                         size_t size, size_t count,
+ +                                         struct dictionary *);
+ +
+ +/* Convert all the strings in DICT from the dict encoding to UTF8 */
+ +static void
+ +recode_strings (struct dictionary *dict)
+ +{
+ +  int i;
+ +
+ +  const char *enc = dict_get_encoding (dict);
+ +
+ +  if ( NULL == enc)
+ +    enc = get_default_encoding ();
+ +
+ +  for (i = 0 ; i < dict_get_var_cnt (dict); ++i)
+ +    {
+ +      /* Convert the long variable name */
+ +      struct variable *var = dict_get_var (dict, i);
+ +      const char *native_name = var_get_name (var);
+ +      char *utf8_name = recode_string (UTF8, enc, native_name, -1);
+ +      if ( 0 != strcmp (utf8_name, native_name))
+ +      {
+ +        if ( NULL == dict_lookup_var (dict, utf8_name))
+ +          dict_rename_var (dict, var, utf8_name);
+ +        else
+ +          msg (MW,
+ +           _("Recoded variable name duplicates an existing `%s' within system file."), utf8_name);
+ +    }
+ +
+ +      free (utf8_name);
+ +
+ +      /* Convert the variable label */
+ +      if (var_has_label (var))
+ +      {
+ +        char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1);
+ +        var_set_label (var, utf8_label);
+ +        free (utf8_label);
+ +      }
+ +
+ +      if (var_has_value_labels (var))
+ +      {
+ +        const struct val_lab *vl = NULL;
+ +        const struct val_labs *vlabs = var_get_value_labels (var);
+ +
+ +        for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl))
+ +          {
+ +            const union value *val = val_lab_get_value (vl);
+ +            const char *label = val_lab_get_label (vl);
+ +            char *new_label = NULL;
   
+ +            new_label = recode_string (UTF8, enc, label, -1);
+ +
+ +            var_replace_value_label (var, val, new_label);
+ +            free (new_label);
+ +          }
+ +      }
+ +    }
+ +}
   
   /* Opens the system file designated by file handle FH for
      reading.  Reads the system file's dictionary into *DICT.
@@@ -270,6 -193,7 +271,7 @@@ sfm_open_reader (struct file_handle *fh
     r->oct_cnt = 0;
     r->has_long_var_names = false;
     r->opcode_idx = sizeof r->opcodes;
+   r->corruption_warning = false;
   
     /* TRANSLATORS: this fragment will be interpolated into
        messages in fh_lock() that identify types of files. */
@@@ -360,8 -284,6 +362,8 @@@
         r->has_long_var_names = true;
       }
   
+ +  recode_strings (*dict);
+ +
     /* Read record 999 data, which is just filler. */
     read_int (r);
   
@@@ -381,11 -303,11 +383,11 @@@
        dictionary and may destroy or modify its variables. */
     sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
     pool_register (r->pool, free, r->sfm_vars);
+ +  r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool);
   
     pool_free (r->pool, var_by_value_idx);
- -  r->value_cnt = dict_get_next_value_idx (*dict);
     return casereader_create_sequential
- -    (NULL, r->value_cnt,
+ +    (NULL, r->proto,
        r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
                                          &sys_file_casereader_class, r);
   
@@@ -589,7 -511,7 +591,7 @@@ read_variable_record (struct sfm_reade
   
     /* Create variable. */
     if (width < 0 || width > 255)
- -    sys_error (r, _("Bad variable width %d."), width);
+ +    sys_error (r, _("Bad width %d for variable %s."), width, name);
     var = dict_create_var (dict, name, width);
     if (var == NULL)
       sys_error (r,
@@@ -623,7 -545,7 +625,7 @@@
         struct missing_values mv;
         int i;
   
- -      mv_init (&mv, var_get_width (var));
+ +      mv_init_pool (r->pool, &mv, var_get_width (var));
         if (var_is_numeric (var))
           {
             if (missing_value_code < -3 || missing_value_code > 3
@@@ -642,24 -564,21 +644,24 @@@
           }
         else
           {
+ +          int mv_width = MAX (width, 8);
+ +          union value value;
+ +
             if (missing_value_code < 1 || missing_value_code > 3)
               sys_error (r, _("String missing value indicator field is not "
                               "0, 1, 2, or 3."));
- -          if (var_is_long_string (var))
- -            sys_warn (r, _("Ignoring missing values on long string variable "
- -                           "%s, which PSPP does not yet support."), name);
+ +
+ +          value_init (&value, mv_width);
+ +          value_set_missing (&value, mv_width);
             for (i = 0; i < missing_value_code; i++)
               {
- -              char string[9];
- -              read_string (r, string, sizeof string);
- -              mv_add_str (&mv, string);
+ +              uint8_t *s = value_str_rw (&value, mv_width);
+ +              read_bytes (r, s, 8);
+ +              mv_add_str (&mv, s);
               }
+ +          value_destroy (&value, mv_width);
           }
- -      if (!var_is_long_string (var))
- -        var_set_missing_values (var, &mv);
+ +      var_set_missing_values (var, &mv);
       }
   
     /* Set formats. */
@@@ -804,7 -723,7 +806,7 @@@ read_extension_record (struct sfm_reade
     switch (subtype)
       {
       case 3:
- -      read_machine_integer_info (r, size, count, info);
+ +      read_machine_integer_info (r, size, count, info, dict);
         return;
   
       case 4:
@@@ -847,28 -766,21 +849,28 @@@
         break;
   
       case 17:
- -      /* Text field that defines variable attributes.  New in
- -         SPSS 14. */
- -      break;
+ +      read_data_file_attributes (r, size, count, dict);
+ +      return;
+ +
+ +    case 18:
+ +      read_variable_attributes (r, size, count, dict);
+ +      return;
   
       case 20:
         /* New in SPSS 16.  Contains a single string that describes
            the character encoding, e.g. "windows-1252". */
- -      break;
+ +      {
+ +      char *encoding = pool_calloc (r->pool, size, count + 1);
+ +      read_string (r, encoding, count + 1);
+ +      dict_set_encoding (dict, encoding);
+ +      return;
+ +      }
   
       case 21:
         /* New in SPSS 16.  Encodes value labels for long string
            variables. */
- -      sys_warn (r, _("Ignoring value labels for long string variables, "
- -                     "which PSPP does not yet support."));
- -      break;
+ +      read_long_string_value_labels (r, size, count, dict);
+ +      return;
   
       default:
         sys_warn (r, _("Unrecognized record type 7, subtype %d.  Please send a copy of this file, and the syntax which created it to %s"),
@@@ -882,8 -794,7 +884,8 @@@
   /* Read record type 7, subtype 3. */
   static void
   read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
- -                           struct sfm_read_info *info)
+ +                           struct sfm_read_info *info,
+ +                         struct dictionary *dict)
   {
     int version_major = read_int (r);
     int version_minor = read_int (r);
@@@ -892,7 -803,7 +894,7 @@@
     int float_representation = read_int (r);
     int compression_code UNUSED = read_int (r);
     int integer_representation = read_int (r);
- -  int character_code UNUSED = read_int (r);
+ +  int character_code = read_int (r);
   
     int expected_float_format;
     int expected_integer_format;
@@@ -931,53 -842,12 +933,53 @@@
       NOT_REACHED ();
     if (integer_representation != expected_integer_format)
       {
- -      static const char *const endian[] = {N_("little-endian"), N_("big-endian")};
+ +      static const char *const endian[] = {N_("Little Endian"), N_("Big Endian")};
         sys_warn (r, _("Integer format indicated by system file (%s) "
                        "differs from expected (%s)."),
                   gettext (endian[integer_representation == 1]),
                   gettext (endian[expected_integer_format == 1]));
       }
+ +
+ +
+ +  /*
+ +    Record 7 (20) provides a much more reliable way of
+ +    setting the encoding.
+ +    The character_code is used as a fallback only.
+ +  */
+ +  if ( NULL == dict_get_encoding (dict))
+ +    {
+ +      switch (character_code)
+ +      {
+ +      case 1:
+ +        dict_set_encoding (dict, "EBCDIC-US");
+ +        break;
+ +      case 2:
+ +      case 3:
+ +        /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+ +           respectively.   However, there are known to be many files
+ +           in the wild with character code 2, yet have data which are
+ +           clearly not ascii.
+ +           Therefore we ignore these values.
+ +        */
+ +        return;
+ +      case 4:
+ +        dict_set_encoding (dict, "MS_KANJI");
+ +        break;
+ +      case 65000:
+ +        dict_set_encoding (dict, "UTF-7");
+ +        break;
+ +      case 65001:
+ +        dict_set_encoding (dict, "UTF-8");
+ +        break;
+ +      default:
+ +        {
+ +          char enc[100];
+ +          snprintf (enc, 100, "CP%d", character_code);
+ +          dict_set_encoding (dict, enc);
+ +        }
+ +        break;
+ +      };
+ +    }
   }
   
   /* Read record type 7, subtype 4. */
@@@ -993,16 -863,11 +995,16 @@@ read_machine_float_info (struct sfm_rea
                  size, count);
   
     if (sysmis != SYSMIS)
- -    sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
+ +    sys_warn (r, _("File specifies unexpected value %g as %s."),
+ +              sysmis, "SYSMIS");
+ +
     if (highest != HIGHEST)
- -    sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
+ +    sys_warn (r, _("File specifies unexpected value %g as %s."),
+ +              highest, "HIGHEST");
+ +
     if (lowest != LOWEST)
- -    sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
+ +    sys_warn (r, _("File specifies unexpected value %g as %s."),
+ +              lowest, "LOWEST");
   }
   
   /* Read record type 7, subtype 11, which specifies how variables
@@@ -1081,12 -946,14 +1083,12 @@@ static voi
   read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
                           struct dictionary *dict)
   {
- -  struct variable_to_value_map *map;
+ +  struct text_record *text;
     struct variable *var;
     char *long_name;
- -  int warning_cnt = 0;
   
- -  map = open_variable_to_value_map (r, size * count);
- -  while (read_variable_to_value_map (r, dict, map, &var, &long_name,
- -                                     &warning_cnt))
+ +  text = open_text_record (r, size * count);
+ +  while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
       {
         char **short_names;
         size_t short_name_cnt;
@@@ -1132,7 -999,7 +1134,7 @@@
           }
         free (short_names);
       }
- -  close_variable_to_value_map (r, map);
+ +  close_text_record (r, text);
     r->has_long_var_names = true;
   }
   
@@@ -1142,12 -1009,14 +1144,12 @@@ static voi
   read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
                         struct dictionary *dict)
   {
- -  struct variable_to_value_map *map;
+ +  struct text_record *text;
     struct variable *var;
     char *length_s;
- -  int warning_cnt = 0;
   
- -  map = open_variable_to_value_map (r, size * count);
- -  while (read_variable_to_value_map (r, dict, map, &var, &length_s,
- -                                     &warning_cnt))
+ +  text = open_text_record (r, size * count);
+ +  while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
       {
         size_t idx = var_get_dict_index (var);
         long int length;
@@@ -1195,7 -1064,7 +1197,7 @@@
         dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
         var_set_width (var, length);
       }
- -  close_variable_to_value_map (r, map);
+ +  close_text_record (r, text);
     dict_compact_values (dict);
   }
   
@@@ -1209,7 -1078,7 +1211,7 @@@ read_value_labels (struct sfm_reader *r
   
     struct label
       {
- -      char raw_value[8];        /* Value as uninterpreted bytes. */
+ +      uint8_t raw_value[8];        /* Value as uninterpreted bytes. */
         union value value;        /* Value. */
         char *label;              /* Null-terminated label string. */
       };
@@@ -1219,7 -1088,6 +1221,7 @@@
   
     struct variable **var = NULL;       /* Associated variables. */
     int var_cnt;                        /* Number of associated variables. */
+ +  int max_width;                /* Maximum width of string variables. */
   
     int i;
   
@@@ -1278,15 -1146,12 +1280,15 @@@
   
     /* Read the list of variables. */
     var = pool_nalloc (subpool, var_cnt, sizeof *var);
+ +  max_width = 0;
     for (i = 0; i < var_cnt; i++)
       {
         var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
- -      if (var_is_long_string (var[i]))
- -        sys_error (r, _("Value labels are not allowed on long string "
- -                        "variables (%s)."), var_get_name (var[i]));
+ +      if (var_get_width (var[i]) > 8)
+ +        sys_error (r, _("Value labels may not be added to long string "
+ +                        "variables (e.g. %s) using records types 3 and 4."),
+ +                   var_get_name (var[i]));
+ +      max_width = MAX (max_width, var_get_width (var[i]));
       }
   
     /* Type check the variables. */
@@@ -1305,10 -1170,9 +1307,10 @@@
       {
         struct label *label = labels + i;
   
+ +      value_init_pool (subpool, &label->value, max_width);
         if (var_is_alpha (var[0]))
- -        buf_copy_rpad (label->value.s, sizeof label->value.s,
- -                       label->raw_value, sizeof label->raw_value);
+ +        u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
+ +                       label->raw_value, sizeof label->raw_value, ' ');
         else
           label->value.f = float_get_double (r->float_format, label->raw_value);
       }
@@@ -1330,7 -1194,7 +1332,7 @@@
                             label->value.f, var_get_name (v));
                 else
                   sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
- -                          var_get_width (v), label->value.s,
+ +                          max_width, value_str (&label->value, max_width),
                             var_get_name (v));
               }
         }
@@@ -1338,203 -1202,6 +1340,203 @@@
   
     pool_destroy (subpool);
   }
+ +
+ +/* Reads a set of custom attributes from TEXT into ATTRS.
+ +   ATTRS may be a null pointer, in which case the attributes are
+ +   read but discarded. */
+ +static void
+ +read_attributes (struct sfm_reader *r, struct text_record *text,
+ +                 struct attrset *attrs)
+ +{
+ +  do
+ +    {
+ +      struct attribute *attr;
+ +      char *key;
+ +      int index;
+ +
+ +      /* Parse the key. */
+ +      key = text_get_token (text, ss_cstr ("("));
+ +      if (key == NULL)
+ +        return;
+ +
+ +      attr = attribute_create (key);
+ +      for (index = 1; ; index++)
+ +        {
+ +          /* Parse the value. */
+ +          char *value;
+ +          size_t length;
+ +
+ +          value = text_get_token (text, ss_cstr ("\n"));
+ +          if (value == NULL)
+ +            {
+ +              text_warn (r, text, _("Error parsing attribute value %s[%d]"),
+ +                         key, index);
+ +              break;
+ +            }              
+ +
+ +          length = strlen (value);
+ +          if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'') 
+ +            {
+ +              value[length - 1] = '\0';
+ +              attribute_add_value (attr, value + 1); 
+ +            }
+ +          else 
+ +            {
+ +              text_warn (r, text,
+ +                         _("Attribute value %s[%d] is not quoted: %s"),
+ +                         key, index, value);
+ +              attribute_add_value (attr, value); 
+ +            }
+ +
+ +          /* Was this the last value for this attribute? */
+ +          if (text_match (text, ')'))
+ +            break;
+ +        }
+ +      if (attrs != NULL)
+ +        attrset_add (attrs, attr);
+ +      else
+ +        attribute_destroy (attr);
+ +    }
+ +  while (!text_match (text, '/'));
+ +}
+ +
+ +/* Reads record type 7, subtype 17, which lists custom
+ +   attributes on the data file.  */
+ +static void
+ +read_data_file_attributes (struct sfm_reader *r,
+ +                           size_t size, size_t count,
+ +                           struct dictionary *dict)
+ +{
+ +  struct text_record *text = open_text_record (r, size * count);
+ +  read_attributes (r, text, dict_get_attributes (dict));
+ +  close_text_record (r, text);
+ +}
+ +
+ +static void
+ +skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels)
+ +{
+ +  size_t i;
+ +
+ +  for (i = 0; i < n_labels; i++)
+ +    {
+ +      size_t value_length, label_length;
+ +
+ +      value_length = read_int (r);
+ +      skip_bytes (r, value_length);
+ +      label_length = read_int (r);
+ +      skip_bytes (r, label_length);
+ +    }
+ +}
+ +
+ +static void
+ +read_long_string_value_labels (struct sfm_reader *r,
+ +                             size_t size, size_t count,
+ +                             struct dictionary *d)
+ +{
+ +  const off_t start = ftello (r->file);
+ +  while (ftello (r->file) - start < size * count)
+ +    {
+ +      char var_name[VAR_NAME_LEN + 1];
+ +      size_t n_labels, i;
+ +      struct variable *v;
+ +      union value value;
+ +      int var_name_len;
+ +      int width;
+ +
+ +      /* Read header. */
+ +      var_name_len = read_int (r);
+ +      if (var_name_len > VAR_NAME_LEN)
+ +        sys_error (r, _("Variable name length in long string value label "
+ +                        "record (%d) exceeds %d-byte limit."),
+ +                   var_name_len, VAR_NAME_LEN);
+ +      read_string (r, var_name, var_name_len + 1);
+ +      width = read_int (r);
+ +      n_labels = read_int (r);
+ +
+ +      v = dict_lookup_var (d, var_name);
+ +      if (v == NULL)
+ +        {
+ +          sys_warn (r, _("Ignoring long string value record for "
+ +                         "unknown variable %s."), var_name);
+ +          skip_long_string_value_labels (r, n_labels);
+ +          continue;
+ +        }
+ +      if (var_is_numeric (v))
+ +        {
+ +          sys_warn (r, _("Ignoring long string value record for "
+ +                         "numeric variable %s."), var_name);
+ +          skip_long_string_value_labels (r, n_labels);
+ +          continue;
+ +        }
+ +      if (width != var_get_width (v))
+ +        {
+ +          sys_warn (r, _("Ignoring long string value record for variable %s "
+ +                         "because the record's width (%d) does not match the "
+ +                         "variable's width (%d)"),
+ +                    var_name, width, var_get_width (v));
+ +          skip_long_string_value_labels (r, n_labels);
+ +          continue;
+ +        }
+ +
+ +      /* Read values. */
+ +      value_init_pool (r->pool, &value, width);
+ +      for (i = 0; i < n_labels; i++)
+ +      {
+ +          size_t value_length, label_length;
+ +          char label[256];
+ +          bool skip = false;
+ +
+ +          /* Read value. */
+ +          value_length = read_int (r);
+ +          if (value_length == width)
+ +            read_bytes (r, value_str_rw (&value, width), width);
+ +          else
+ +            {
+ +              sys_warn (r, _("Ignoring long string value %zu for variable %s, "
+ +                             "with width %d, that has bad value width %zu."),
+ +                        i, var_get_name (v), width, value_length);
+ +              skip_bytes (r, value_length);
+ +              skip = true;
+ +            }
+ +
+ +          /* Read label. */
+ +          label_length = read_int (r);
+ +          read_string (r, label, MIN (sizeof label, label_length + 1));
+ +          if (label_length >= sizeof label)
+ +            {
+ +              /* Skip and silently ignore label text after the
+ +                 first 255 bytes.  The maximum documented length
+ +                 of a label is 120 bytes so this is more than
+ +                 generous. */
+ +              skip_bytes (r, sizeof label - (label_length + 1));
+ +            }
+ +
+ +          if (!skip && !var_add_value_label (v, &value, label))
+ +            sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
+ +                      width, value_str (&value, width), var_get_name (v));
+ +        }
+ +    }
+ +}
+ +
+ +
+ +/* Reads record type 7, subtype 18, which lists custom
+ +   attributes on individual variables.  */
+ +static void
+ +read_variable_attributes (struct sfm_reader *r,
+ +                          size_t size, size_t count,
+ +                          struct dictionary *dict)
+ +{
+ +  struct text_record *text = open_text_record (r, size * count);
+ +  for (;;) 
+ +    {
+ +      struct variable *var;
+ +      if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var))
+ +        break;
+ +      read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
+ +    }
+ +  close_text_record (r, text);
+ +}
+ +
   \f
   /* Case reader. */
   
@@@ -1544,31 -1211,31 +1546,31 @@@ static void partial_record (struct sfm_
   static void read_error (struct casereader *, const struct sfm_reader *);
   
   static bool read_case_number (struct sfm_reader *, double *);
- -static bool read_case_string (struct sfm_reader *, char *, size_t);
+ +static bool read_case_string (struct sfm_reader *, uint8_t *, size_t);
   static int read_opcode (struct sfm_reader *);
   static bool read_compressed_number (struct sfm_reader *, double *);
- -static bool read_compressed_string (struct sfm_reader *, char *);
- -static bool read_whole_strings (struct sfm_reader *, char *, size_t);
+ +static bool read_compressed_string (struct sfm_reader *, uint8_t *);
+ +static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
   static bool skip_whole_strings (struct sfm_reader *, size_t);
   
- -/* Reads one case from READER's file into C.  Returns true only
- -   if successful. */
- -static bool
- -sys_file_casereader_read (struct casereader *reader, void *r_,
- -                          struct ccase *c)
+ +/* Reads and returns one case from READER's file.  Returns a null
+ +   pointer if not successful. */
+ +static struct ccase *
+ +sys_file_casereader_read (struct casereader *reader, void *r_)
   {
     struct sfm_reader *r = r_;
+ +  struct ccase *volatile c;
     int i;
   
     if (r->error)
- -    return false;
+ +    return NULL;
   
- -  case_create (c, r->value_cnt);
+ +  c = case_create (r->proto);
     if (setjmp (r->bail_out))
       {
         casereader_force_error (reader);
- -      case_destroy (c);
- -      return false;
+ +      case_unref (c);
+ +      return NULL;
       }
   
     for (i = 0; i < r->sfm_var_cnt; i++)
@@@ -1576,29 -1243,28 +1578,29 @@@
         struct sfm_var *sv = &r->sfm_vars[i];
         union value *v = case_data_rw_idx (c, sv->case_index);
   
- -      if (sv->width == 0)
+ +      if (sv->var_width == 0)
           {
             if (!read_case_number (r, &v->f))
               goto eof;
           }
         else
           {
- -          if (!read_case_string (r, v->s + sv->offset, sv->width))
+ +          uint8_t *s = value_str_rw (v, sv->var_width);
+ +          if (!read_case_string (r, s + sv->offset, sv->segment_width))
               goto eof;
             if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
               partial_record (r);
           }
       }
- -  return true;
+ +  return c;
   
   eof:
- -  case_destroy (c);
+ +  case_unref (c);
     if (i != 0)
       partial_record (r);
     if (r->case_cnt != -1)
       read_error (reader, r);
- -  return false;
+ +  return NULL;
   }
   
   /* Issues an error that R ends in a partial record. */
@@@ -1645,7 -1311,7 +1647,7 @@@ read_case_number (struct sfm_reader *r
      Returns true if successful, false if end of file is
      reached immediately. */
   static bool
- -read_case_string (struct sfm_reader *r, char *s, size_t length)
+ +read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
   {
     size_t whole = ROUND_DOWN (length, 8);
     size_t partial = length % 8;
@@@ -1658,7 -1324,7 +1660,7 @@@
   
     if (partial)
       {
- -      char bounce[8];
+ +      uint8_t bounce[8];
         if (!read_whole_strings (r, bounce, sizeof bounce))
           {
             if (whole)
@@@ -1710,7 -1376,14 +1712,14 @@@ read_compressed_number (struct sfm_read
         break;
   
       case 254:
-       sys_error (r, _("Compressed data is corrupt."));
+       float_convert (r->float_format, "        ", FLOAT_NATIVE_DOUBLE, d);
+       if (!r->corruption_warning)
+         {
+           r->corruption_warning = true;
+           sys_warn (r, _("Possible compressed data corruption: "
+                          "compressed spaces appear in numeric field."));
+         }
+       break;
   
       case 255:
         *d = SYSMIS;
@@@ -1729,9 -1402,10 +1738,10 @@@
      Returns true if successful, false if end of file is
      reached immediately. */
   static bool
- -read_compressed_string (struct sfm_reader *r, char *dst)
+ +read_compressed_string (struct sfm_reader *r, uint8_t *dst)
   {
-   switch (read_opcode (r))
+   int opcode = read_opcode (r);
+   switch (opcode)
       {
       case -1:
       case 252:
@@@ -1746,7 -1420,25 +1756,25 @@@
         break;
   
       default:
-       sys_error (r, _("Compressed data is corrupt."));
+       {
+         double value = opcode - r->bias;
+         float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
+         if (value == 0.0)
+           {
+             /* This has actually been seen "in the wild".  The submitter of the
+                file that showed that the contents decoded as spaces, but they
+                were at the end of the field so it's possible that the null
+                bytes just acted as null terminators. */
+           }
+         else if (!r->corruption_warning)
+           {
+             r->corruption_warning = true;
+             sys_warn (r, _("Possible compressed data corruption: "
+                            "string contains compressed integer (opcode %d)"),
+                       opcode);
+           }
+       }
+       break;
       }
   
     return true;
@@@ -1758,7 -1450,7 +1786,7 @@@
      Returns true if successful, false if end of file is
      reached immediately. */
   static bool
- -read_whole_strings (struct sfm_reader *r, char *s, size_t length)
+ +read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
   {
     assert (length % 8 == 0);
     if (!r->compressed)
@@@ -1786,7 -1478,7 +1814,7 @@@
   static bool
   skip_whole_strings (struct sfm_reader *r, size_t length)
   {
- -  char buffer[1024];
+ +  uint8_t buffer[1024];
     assert (length < sizeof buffer);
     return read_whole_strings (r, buffer, length);
   }
@@@ -1871,124 -1563,82 +1899,124 @@@ lookup_var_by_short_name (struct dictio
     return NULL;
   }
   \f
- -/* Helpers for reading records that contain "variable=value"
- -   pairs. */
+ +/* Helpers for reading records that contain structured text
+ +   strings. */
+ +
+ +/* Maximum number of warnings to issue for a single text
+ +   record. */
+ +#define MAX_TEXT_WARNINGS 5
   
   /* State. */
- -struct variable_to_value_map
+ +struct text_record
     {
       struct substring buffer;    /* Record contents. */
       size_t pos;                 /* Current position in buffer. */
+ +    int n_warnings;             /* Number of warnings issued or suppressed. */
     };
   
- -/* Reads SIZE bytes into a "variable=value" map for R,
- -   and returns the map. */
- -static struct variable_to_value_map *
- -open_variable_to_value_map (struct sfm_reader *r, size_t size)
+ +/* Reads SIZE bytes into a text record for R,
+ +   and returns the new text record. */
+ +static struct text_record *
+ +open_text_record (struct sfm_reader *r, size_t size)
   {
- -  struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map);
+ +  struct text_record *text = pool_alloc (r->pool, sizeof *text);
     char *buffer = pool_malloc (r->pool, size + 1);
     read_bytes (r, buffer, size);
- -  map->buffer = ss_buffer (buffer, size);
- -  map->pos = 0;
- -  return map;
+ +  text->buffer = ss_buffer (buffer, size);
+ +  text->pos = 0;
+ +  text->n_warnings = 0;
+ +  return text;
   }
   
- -/* Closes MAP and frees its storage.
- -   Not really needed, because the pool will free the map anyway,
- -   but can be used to free it earlier. */
+ +/* Closes TEXT, frees its storage, and issues a final warning
+ +   about suppressed warnings if necesary. */
   static void
- -close_variable_to_value_map (struct sfm_reader *r,
- -                             struct variable_to_value_map *map)
+ +close_text_record (struct sfm_reader *r, struct text_record *text)
   {
- -  pool_free (r->pool, ss_data (map->buffer));
+ +  if (text->n_warnings > MAX_TEXT_WARNINGS)
+ +    sys_warn (r, _("Suppressed %d additional related warnings."),
+ +              text->n_warnings - MAX_TEXT_WARNINGS);
+ +  pool_free (r->pool, ss_data (text->buffer));
   }
   
- -/* Reads the next variable=value pair from MAP.
+ +/* Reads a variable=value pair from TEXT.
      Looks up the variable in DICT and stores it into *VAR.
      Stores a null-terminated value into *VALUE. */
   static bool
- -read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict,
- -                            struct variable_to_value_map *map,
- -                            struct variable **var, char **value,
- -                            int *warning_cnt)
+ +read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
+ +                             struct text_record *text,
+ +                             struct variable **var, char **value)
   {
- -  int max_warnings = 5;
- -
     for (;;)
       {
- -      struct substring short_name_ss, value_ss;
+ +      if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
+ +        return false;
+ +      
+ +      *value = text_get_token (text, ss_buffer ("\t\0", 2));
+ +      if (*value == NULL)
+ +        return false;
   
- -      if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss)
- -          || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos,
- -                           &value_ss))
- -        {
- -          if (*warning_cnt > max_warnings)
- -            sys_warn (r, _("Suppressed %d additional variable map warnings."),
- -                      *warning_cnt - max_warnings);
- -          return false;
- -        }
+ +      text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
+ +                            ss_buffer ("\t\0", 2));
   
- -      map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX),
- -                           ss_buffer ("\t\0", 2));
+ +      if (*var != NULL)
+ +        return true;
+ +    }
+ +}
   
- -      ss_data (short_name_ss)[ss_length (short_name_ss)] = '\0';
- -      *var = lookup_var_by_short_name (dict, ss_data (short_name_ss));
- -      if (*var == NULL)
- -        {
- -          if (++*warning_cnt <= max_warnings)
- -            sys_warn (r, _("Variable map refers to unknown variable %s."),
- -                      ss_data (short_name_ss));
- -          continue;
- -        }
+ +static bool
+ +text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
+ +                      struct text_record *text, struct substring delimiters,
+ +                      struct variable **var)
+ +{
+ +  char *short_name = text_get_token (text, delimiters);
+ +  if (short_name == NULL)
+ +    return false;
   
- -      ss_data (value_ss)[ss_length (value_ss)] = '\0';
- -      *value = ss_data (value_ss);
+ +  *var = lookup_var_by_short_name (dict, short_name);
+ +  if (*var == NULL)
+ +    text_warn (r, text, _("Variable map refers to unknown variable %s."),
+ +               short_name);
+ +  return true;
+ +}
+ +
+ +/* Displays a warning for the current file position, limiting the
+ +   number to MAX_TEXT_WARNINGS for TEXT. */
+ +static void
+ +text_warn (struct sfm_reader *r, struct text_record *text,
+ +           const char *format, ...)
+ +{
+ +  if (text->n_warnings++ < MAX_TEXT_WARNINGS) 
+ +    {
+ +      va_list args;
   
+ +      va_start (args, format);
+ +      sys_msg (r, MW, format, args);
+ +      va_end (args);
+ +    }
+ +}
+ +
+ +static char *
+ +text_get_token (struct text_record *text, struct substring delimiters)
+ +{
+ +  struct substring token;
+ +
+ +  if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
+ +    return NULL;
+ +  ss_data (token)[ss_length (token)] = '\0';
+ +  return ss_data (token);
+ +}
+ +
+ +static bool
+ +text_match (struct text_record *text, char c)
+ +{
+ +  if (text->buffer.string[text->pos] == c) 
+ +    {
+ +      text->pos++;
         return true;
       }
+ +  else
+ +    return false;
   }
   \f
   /* Messages. */
author	John Darrington <john@darrington.wattle.id.au>
	Wed, 16 Dec 2009 20:09:55 +0000 (21:09 +0100)
committer	John Darrington <john@darrington.wattle.id.au>
	Wed, 16 Dec 2009 20:09:55 +0000 (21:09 +0100)
		1	2
NEWS	patch \|	diff1 \|	diff2 \|	blob \| history
doc/dev/system-file-format.texi	patch \|	diff1 \|	diff2 \|	blob \| history
src/data/sys-file-reader.c	patch \|	diff1 \|	diff2 \|	blob \| history