pintos-os.org Git - pspp/blob - src/language/data-io/data-parser.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013, 2016 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include "language/data-io/data-parser.h"
  20
  21 #include <stdint.h>
  22 #include <stdlib.h>
  23
  24 #include "data/casereader-provider.h"
  25 #include "data/data-in.h"
  26 #include "data/dataset.h"
  27 #include "data/dictionary.h"
  28 #include "data/format.h"
  29 #include "data/file-handle-def.h"
  30 #include "data/settings.h"
  31 #include "language/data-io/data-reader.h"
  32 #include "libpspp/intern.h"
  33 #include "libpspp/message.h"
  34 #include "libpspp/str.h"
  35 #include "output/pivot-table.h"
  36
  37 #include "gl/xalloc.h"
  38
  39 #include "gettext.h"
  40 #define N_(msgid) msgid
  41 #define _(msgid) gettext (msgid)
  42
  43 /* Data parser for textual data like that read by DATA LIST. */
  44 struct data_parser
  45   {
  46     struct dictionary *dict;    /* Dictionary of destination */
  47     enum data_parser_type type; /* Type of data to parse. */
  48     int skip_records;           /* Records to skip before first real data. */
  49
  50     struct field *fields;       /* Fields to parse. */
  51     size_t field_cnt;           /* Number of fields. */
  52     size_t field_allocated;     /* Number of fields spaced allocated for. */
  53
  54     /* DP_DELIMITED parsers only. */
  55     bool span;                  /* May cases span multiple records? */
  56     bool empty_line_has_field;  /* Does an empty line have an (empty) field? */
  57     bool warn_missing_fields;   /* Should missing fields be considered errors? */
  58     struct substring quotes;    /* Characters that can quote separators. */
  59     bool quote_escape;          /* Doubled quote acts as escape? */
  60     struct substring soft_seps; /* Two soft separators act like just one. */
  61     struct substring hard_seps; /* Two hard separators yield empty fields. */
  62     struct string any_sep;      /* Concatenation of soft_seps and hard_seps. */
  63
  64     /* DP_FIXED parsers only. */
  65     int records_per_case;       /* Number of records in each case. */
  66   };
  67
  68 /* How to parse one variable. */
  69 struct field
  70   {
  71     struct fmt_spec format;     /* Input format of this field. */
  72     int case_idx;               /* First value in case. */
  73     char *name;                 /* Var name for error messages and tables. */
  74
  75     /* DP_FIXED only. */
  76     int record;                 /* Record number (1-based). */
  77     int first_column;           /* First column in record (1-based). */
  78   };
  79
  80 static void set_any_sep (struct data_parser *parser);
  81
  82 /* Creates and returns a new data parser. */
  83 struct data_parser *
  84 data_parser_create (struct dictionary *dict)
  85 {
  86   struct data_parser *parser = xmalloc (sizeof *parser);
  87
  88   parser->type = DP_FIXED;
  89   parser->skip_records = 0;
  90
  91   parser->fields = NULL;
  92   parser->field_cnt = 0;
  93   parser->field_allocated = 0;
  94   parser->dict = dict_ref (dict);
  95
  96   parser->span = true;
  97   parser->empty_line_has_field = false;
  98   parser->warn_missing_fields = true;
  99   ss_alloc_substring (&parser->quotes, ss_cstr ("\"'"));
 100   parser->quote_escape = false;
 101   ss_alloc_substring (&parser->soft_seps, ss_cstr (CC_SPACES));
 102   ss_alloc_substring (&parser->hard_seps, ss_cstr (","));
 103   ds_init_empty (&parser->any_sep);
 104   set_any_sep (parser);
 105
 106   parser->records_per_case = 0;
 107
 108   return parser;
 109 }
 110
 111 /* Destroys PARSER. */
 112 void
 113 data_parser_destroy (struct data_parser *parser)
 114 {
 115   if (parser != NULL)
 116     {
 117       size_t i;
 118
 119       dict_unref (parser->dict);
 120       for (i = 0; i < parser->field_cnt; i++)
 121         free (parser->fields[i].name);
 122       free (parser->fields);
 123       ss_dealloc (&parser->quotes);
 124       ss_dealloc (&parser->soft_seps);
 125       ss_dealloc (&parser->hard_seps);
 126       ds_destroy (&parser->any_sep);
 127       free (parser);
 128     }
 129 }
 130
 131 /* Returns the type of PARSER (either DP_DELIMITED or DP_FIXED). */
 132 enum data_parser_type
 133 data_parser_get_type (const struct data_parser *parser)
 134 {
 135   return parser->type;
 136 }
 137
 138 /* Sets the type of PARSER to TYPE (either DP_DELIMITED or
 139    DP_FIXED). */
 140 void
 141 data_parser_set_type (struct data_parser *parser, enum data_parser_type type)
 142 {
 143   assert (parser->field_cnt == 0);
 144   assert (type == DP_FIXED || type == DP_DELIMITED);
 145   parser->type = type;
 146 }
 147
 148 /* Configures PARSER to skip the specified number of
 149    INITIAL_RECORDS_TO_SKIP before parsing any data.  By default,
 150    no records are skipped. */
 151 void
 152 data_parser_set_skip (struct data_parser *parser, int initial_records_to_skip)
 153 {
 154   assert (initial_records_to_skip >= 0);
 155   parser->skip_records = initial_records_to_skip;
 156 }
 157
 158 /* Returns true if PARSER is configured to allow cases to span
 159    multiple records. */
 160 bool
 161 data_parser_get_span (const struct data_parser *parser)
 162 {
 163   return parser->span;
 164 }
 165
 166 /* If MAY_CASES_SPAN_RECORDS is true, configures PARSER to allow
 167    a single case to span multiple records and multiple cases to
 168    occupy a single record.  If MAY_CASES_SPAN_RECORDS is false,
 169    configures PARSER to require each record to contain exactly
 170    one case.
 171
 172    This setting affects parsing of DP_DELIMITED files only. */
 173 void
 174 data_parser_set_span (struct data_parser *parser, bool may_cases_span_records)
 175 {
 176   parser->span = may_cases_span_records;
 177 }
 178
 179 /* If EMPTY_LINE_HAS_FIELD is true, configures PARSER to parse an
 180    empty line as an empty field and to treat a hard delimiter
 181    followed by end-of-line as an empty field.  If
 182    EMPTY_LINE_HAS_FIELD is false, PARSER will skip empty lines
 183    and hard delimiters at the end of lines without emitting empty
 184    fields.
 185
 186    This setting affects parsing of DP_DELIMITED files only. */
 187 void
 188 data_parser_set_empty_line_has_field (struct data_parser *parser,
 189                                       bool empty_line_has_field)
 190 {
 191   parser->empty_line_has_field = empty_line_has_field;
 192 }
 193
 194
 195 /* If WARN_MISSING_FIELDS is true, configures PARSER to emit a warning
 196    and cause an error condition when a missing field is encountered.
 197    If  WARN_MISSING_FIELDS is false, PARSER will silently fill such
 198    fields with the system missing value.
 199
 200    This setting affects parsing of DP_DELIMITED files only. */
 201 void
 202 data_parser_set_warn_missing_fields (struct data_parser *parser,
 203                                      bool warn_missing_fields)
 204 {
 205   parser->warn_missing_fields = warn_missing_fields;
 206 }
 207
 208
 209 /* Sets the characters that may be used for quoting field
 210    contents to QUOTES.  If QUOTES is empty, quoting will be
 211    disabled.
 212
 213    The caller retains ownership of QUOTES.
 214
 215    This setting affects parsing of DP_DELIMITED files only. */
 216 void
 217 data_parser_set_quotes (struct data_parser *parser, struct substring quotes)
 218 {
 219   ss_dealloc (&parser->quotes);
 220   ss_alloc_substring (&parser->quotes, quotes);
 221 }
 222
 223 /* If ESCAPE is false (the default setting), a character used for
 224    quoting cannot itself be embedded within a quoted field.  If
 225    ESCAPE is true, then a quote character can be embedded within
 226    a quoted field by doubling it.
 227
 228    This setting affects parsing of DP_DELIMITED files only, and
 229    only when at least one quote character has been set (with
 230    data_parser_set_quotes). */
 231 void
 232 data_parser_set_quote_escape (struct data_parser *parser, bool escape)
 233 {
 234   parser->quote_escape = escape;
 235 }
 236
 237 /* Sets PARSER's soft delimiters to DELIMITERS.  Soft delimiters
 238    separate fields, but consecutive soft delimiters do not yield
 239    empty fields.  (Ordinarily, only white space characters are
 240    appropriate soft delimiters.)
 241
 242    The caller retains ownership of DELIMITERS.
 243
 244    This setting affects parsing of DP_DELIMITED files only. */
 245 void
 246 data_parser_set_soft_delimiters (struct data_parser *parser,
 247                                  struct substring delimiters)
 248 {
 249   ss_dealloc (&parser->soft_seps);
 250   ss_alloc_substring (&parser->soft_seps, delimiters);
 251   set_any_sep (parser);
 252 }
 253
 254 /* Sets PARSER's hard delimiters to DELIMITERS.  Hard delimiters
 255    separate fields.  A consecutive pair of hard delimiters yield
 256    an empty field.
 257
 258    The caller retains ownership of DELIMITERS.
 259
 260    This setting affects parsing of DP_DELIMITED files only. */
 261 void
 262 data_parser_set_hard_delimiters (struct data_parser *parser,
 263                                  struct substring delimiters)
 264 {
 265   ss_dealloc (&parser->hard_seps);
 266   ss_alloc_substring (&parser->hard_seps, delimiters);
 267   set_any_sep (parser);
 268 }
 269
 270 /* Returns the number of records per case. */
 271 int
 272 data_parser_get_records (const struct data_parser *parser)
 273 {
 274   return parser->records_per_case;
 275 }
 276
 277 /* Sets the number of records per case to RECORDS_PER_CASE.
 278
 279    This setting affects parsing of DP_FIXED files only. */
 280 void
 281 data_parser_set_records (struct data_parser *parser, int records_per_case)
 282 {
 283   assert (records_per_case >= 0);
 284   assert (records_per_case >= parser->records_per_case);
 285   parser->records_per_case = records_per_case;
 286 }
 287
 288 static void
 289 add_field (struct data_parser *p, const struct fmt_spec *format, int case_idx,
 290            const char *name, int record, int first_column)
 291 {
 292   struct field *field;
 293
 294   if (p->field_cnt == p->field_allocated)
 295     p->fields = x2nrealloc (p->fields, &p->field_allocated, sizeof *p->fields);
 296   field = &p->fields[p->field_cnt++];
 297   field->format = *format;
 298   field->case_idx = case_idx;
 299   field->name = xstrdup (name);
 300   field->record = record;
 301   field->first_column = first_column;
 302 }
 303
 304 /* Adds a delimited field to the field parsed by PARSER, which
 305    must be configured as a DP_DELIMITED parser.  The field is
 306    parsed as input format FORMAT.  Its data will be stored into case
 307    index CASE_INDEX.  Errors in input data will be reported
 308    against variable NAME. */
 309 void
 310 data_parser_add_delimited_field (struct data_parser *parser,
 311                                  const struct fmt_spec *format, int case_idx,
 312                                  const char *name)
 313 {
 314   assert (parser->type == DP_DELIMITED);
 315   add_field (parser, format, case_idx, name, 0, 0);
 316 }
 317
 318 /* Adds a fixed field to the field parsed by PARSER, which
 319    must be configured as a DP_FIXED parser.  The field is
 320    parsed as input format FORMAT.  Its data will be stored into case
 321    index CASE_INDEX.  Errors in input data will be reported
 322    against variable NAME.  The field will be drawn from the
 323    FORMAT->w columns in 1-based RECORD starting at 1-based
 324    column FIRST_COLUMN.
 325
 326    RECORD must be at least as great as that of any field already
 327    added; that is, fields must be added in increasing order of
 328    record number.  If RECORD is greater than the current number
 329    of records per case, the number of records per case are
 330    increased as needed.  */
 331 void
 332 data_parser_add_fixed_field (struct data_parser *parser,
 333                              const struct fmt_spec *format, int case_idx,
 334                              const char *name,
 335                              int record, int first_column)
 336 {
 337   assert (parser->type == DP_FIXED);
 338   assert (parser->field_cnt == 0
 339           || record >= parser->fields[parser->field_cnt - 1].record);
 340   if (record > parser->records_per_case)
 341     parser->records_per_case = record;
 342   add_field (parser, format, case_idx, name, record, first_column);
 343 }
 344
 345 /* Returns true if any fields have been added to PARSER, false
 346    otherwise. */
 347 bool
 348 data_parser_any_fields (const struct data_parser *parser)
 349 {
 350   return parser->field_cnt > 0;
 351 }
 352
 353 static void
 354 set_any_sep (struct data_parser *parser)
 355 {
 356   ds_assign_substring (&parser->any_sep, parser->soft_seps);
 357   ds_put_substring (&parser->any_sep, parser->hard_seps);
 358 }
 359 \f
 360 static bool parse_delimited_span (const struct data_parser *,
 361                                   struct dfm_reader *, struct ccase *);
 362 static bool parse_delimited_no_span (const struct data_parser *,
 363                                      struct dfm_reader *, struct ccase *);
 364 static bool parse_fixed (const struct data_parser *,
 365                          struct dfm_reader *, struct ccase *);
 366
 367 /* Reads a case from DFM into C, parsing it with PARSER.  Returns
 368    true if successful, false at end of file or on I/O error.
 369
 370    Case C must not be shared. */
 371 bool
 372 data_parser_parse (struct data_parser *parser, struct dfm_reader *reader,
 373                    struct ccase *c)
 374 {
 375   bool retval;
 376
 377   assert (!case_is_shared (c));
 378   assert (data_parser_any_fields (parser));
 379
 380   /* Skip the requested number of records before reading the
 381      first case. */
 382   for (; parser->skip_records > 0; parser->skip_records--)
 383     {
 384       if (dfm_eof (reader))
 385         return false;
 386       dfm_forward_record (reader);
 387     }
 388
 389   /* Limit cases. */
 390   if (parser->type == DP_DELIMITED)
 391     {
 392       if (parser->span)
 393         retval = parse_delimited_span (parser, reader, c);
 394       else
 395         retval = parse_delimited_no_span (parser, reader, c);
 396     }
 397   else
 398     retval = parse_fixed (parser, reader, c);
 399
 400   return retval;
 401 }
 402
 403 /* Extracts a delimited field from the current position in the
 404    current record according to PARSER, reading data from READER.
 405
 406    *FIELD is set to the field content.  The caller must not or
 407    destroy this constant string.
 408
 409    Sets *FIRST_COLUMN to the 1-based column number of the start of
 410    the extracted field, and *LAST_COLUMN to the end of the extracted
 411    field.
 412
 413    Returns true on success, false on failure. */
 414 static bool
 415 cut_field (const struct data_parser *parser, struct dfm_reader *reader,
 416            int *first_column, int *last_column, struct string *tmp,
 417            struct substring *field)
 418 {
 419   size_t length_before_separators;
 420   struct substring line, p;
 421   bool quoted;
 422
 423   if (dfm_eof (reader))
 424     return false;
 425   if (ss_is_empty (parser->hard_seps))
 426     dfm_expand_tabs (reader);
 427   line = p = dfm_get_record (reader);
 428
 429   /* Skip leading soft separators. */
 430   ss_ltrim (&p, parser->soft_seps);
 431
 432   /* Handle empty or completely consumed lines. */
 433   if (ss_is_empty (p))
 434     {
 435       if (!parser->empty_line_has_field || dfm_columns_past_end (reader) > 0)
 436         return false;
 437       else
 438         {
 439           *field = p;
 440           *first_column = dfm_column_start (reader);
 441           *last_column = *first_column + 1;
 442           dfm_forward_columns (reader, 1);
 443           return true;
 444         }
 445     }
 446
 447   *first_column = dfm_column_start (reader);
 448   quoted = ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX;
 449   if (quoted)
 450     {
 451       /* Quoted field. */
 452       int quote = ss_get_byte (&p);
 453       if (!ss_get_until (&p, quote, field))
 454         msg (DW, _("Quoted string extends beyond end of line."));
 455       if (parser->quote_escape && ss_first (p) == quote)
 456         {
 457           ds_assign_substring (tmp, *field);
 458           while (ss_match_byte (&p, quote))
 459             {
 460               struct substring ss;
 461               ds_put_byte (tmp, quote);
 462               if (!ss_get_until (&p, quote, &ss))
 463                 msg (DW, _("Quoted string extends beyond end of line."));
 464               ds_put_substring (tmp, ss);
 465             }
 466           *field = ds_ss (tmp);
 467         }
 468       *last_column = *first_column + (ss_length (line) - ss_length (p));
 469     }
 470   else
 471     {
 472       /* Regular field. */
 473       ss_get_bytes (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field);
 474       *last_column = *first_column + ss_length (*field);
 475     }
 476
 477   /* Skip trailing soft separator and a single hard separator if present. */
 478   length_before_separators = ss_length (p);
 479   ss_ltrim (&p, parser->soft_seps);
 480   if (!ss_is_empty (p)
 481       && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX)
 482     {
 483       ss_advance (&p, 1);
 484       ss_ltrim (&p, parser->soft_seps);
 485     }
 486   if (ss_is_empty (p))
 487     dfm_forward_columns (reader, 1);
 488   else if (quoted && length_before_separators == ss_length (p))
 489     msg (DW, _("Missing delimiter following quoted string."));
 490   dfm_forward_columns (reader, ss_length (line) - ss_length (p));
 491
 492   return true;
 493 }
 494
 495 static void
 496 parse_error (const struct dfm_reader *reader, const struct field *field,
 497              int first_column, int last_column, char *error)
 498 {
 499   int line_number = dfm_get_line_number (reader);
 500   struct msg_location *location = xmalloc (sizeof *location);
 501   *location = (struct msg_location) {
 502     .file_name = intern_new (dfm_get_file_name (reader)),
 503     .first_line = line_number,
 504     .last_line = line_number + 1,
 505     .first_column = first_column,
 506     .last_column = last_column,
 507   };
 508   struct msg *m = xmalloc (sizeof *m);
 509   *m = (struct msg) {
 510     .category = MSG_C_DATA,
 511     .severity = MSG_S_WARNING,
 512     .location = location,
 513     .text = xasprintf (_("Data for variable %s is not valid as format %s: %s"),
 514                        field->name, fmt_name (field->format.type), error),
 515   };
 516   msg_emit (m);
 517
 518   free (error);
 519 }
 520
 521 /* Reads a case from READER into C, parsing it according to
 522    fixed-format syntax rules in PARSER.
 523    Returns true if successful, false at end of file or on I/O error. */
 524 static bool
 525 parse_fixed (const struct data_parser *parser, struct dfm_reader *reader,
 526              struct ccase *c)
 527 {
 528   const char *input_encoding = dfm_reader_get_encoding (reader);
 529   const char *output_encoding = dict_get_encoding (parser->dict);
 530   struct field *f;
 531   int row;
 532
 533   if (dfm_eof (reader))
 534     return false;
 535
 536   f = parser->fields;
 537   for (row = 1; row <= parser->records_per_case; row++)
 538     {
 539       struct substring line;
 540
 541       if (dfm_eof (reader))
 542         {
 543           msg (DW, _("Partial case of %d of %d records discarded."),
 544                row - 1, parser->records_per_case);
 545           return false;
 546         }
 547       dfm_expand_tabs (reader);
 548       line = dfm_get_record (reader);
 549
 550       for (; f < &parser->fields[parser->field_cnt] && f->record == row; f++)
 551         {
 552           struct substring s = ss_substr (line, f->first_column - 1,
 553                                           f->format.w);
 554           union value *value = case_data_rw_idx (c, f->case_idx);
 555           char *error = data_in (s, input_encoding, f->format.type,
 556                                  settings_get_fmt_settings (),
 557                                  value, fmt_var_width (&f->format),
 558                                  output_encoding);
 559
 560           if (error == NULL)
 561             data_in_imply_decimals (s, input_encoding, f->format.type,
 562                                     f->format.d, settings_get_fmt_settings (),
 563                                     value);
 564           else
 565             parse_error (reader, f, f->first_column,
 566                          f->first_column + f->format.w, error);
 567         }
 568
 569       dfm_forward_record (reader);
 570     }
 571
 572   return true;
 573 }
 574
 575 /* Reads a case from READER into C, parsing it according to
 576    free-format syntax rules in PARSER.
 577    Returns true if successful, false at end of file or on I/O error. */
 578 static bool
 579 parse_delimited_span (const struct data_parser *parser,
 580                       struct dfm_reader *reader, struct ccase *c)
 581 {
 582   const char *output_encoding = dict_get_encoding (parser->dict);
 583   struct string tmp = DS_EMPTY_INITIALIZER;
 584   struct field *f;
 585
 586   for (f = parser->fields; f < &parser->fields[parser->field_cnt]; f++)
 587     {
 588       struct substring s;
 589       int first_column, last_column;
 590       char *error;
 591
 592       /* Cut out a field and read in a new record if necessary. */
 593       while (!cut_field (parser, reader,
 594                          &first_column, &last_column, &tmp, &s))
 595         {
 596           if (!dfm_eof (reader))
 597             dfm_forward_record (reader);
 598           if (dfm_eof (reader))
 599             {
 600               if (f > parser->fields)
 601                 msg (DW, _("Partial case discarded.  The first variable "
 602                            "missing was %s."), f->name);
 603               ds_destroy (&tmp);
 604               return false;
 605             }
 606         }
 607
 608       const char *input_encoding = dfm_reader_get_encoding (reader);
 609       error = data_in (s, input_encoding, f->format.type,
 610                        settings_get_fmt_settings (),
 611                        case_data_rw_idx (c, f->case_idx),
 612                        fmt_var_width (&f->format), output_encoding);
 613       if (error != NULL)
 614         parse_error (reader, f, first_column, last_column, error);
 615     }
 616   ds_destroy (&tmp);
 617   return true;
 618 }
 619
 620 /* Reads a case from READER into C, parsing it according to
 621    delimited syntax rules with one case per record in PARSER.
 622    Returns true if successful, false at end of file or on I/O error. */
 623 static bool
 624 parse_delimited_no_span (const struct data_parser *parser,
 625                          struct dfm_reader *reader, struct ccase *c)
 626 {
 627   const char *output_encoding = dict_get_encoding (parser->dict);
 628   struct string tmp = DS_EMPTY_INITIALIZER;
 629   struct substring s;
 630   struct field *f, *end;
 631
 632   if (dfm_eof (reader))
 633     return false;
 634
 635   end = &parser->fields[parser->field_cnt];
 636   for (f = parser->fields; f < end; f++)
 637     {
 638       int first_column, last_column;
 639       char *error;
 640
 641       if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s))
 642         {
 643           if (f < end - 1 && settings_get_undefined () && parser->warn_missing_fields)
 644             msg (DW, _("Missing value(s) for all variables from %s onward.  "
 645                        "These will be filled with the system-missing value "
 646                        "or blanks, as appropriate."),
 647                  f->name);
 648           for (; f < end; f++)
 649             value_set_missing (case_data_rw_idx (c, f->case_idx),
 650                                fmt_var_width (&f->format));
 651           goto exit;
 652         }
 653
 654       const char *input_encoding = dfm_reader_get_encoding (reader);
 655       error = data_in (s, input_encoding, f->format.type,
 656                        settings_get_fmt_settings (),
 657                        case_data_rw_idx (c, f->case_idx),
 658                        fmt_var_width (&f->format), output_encoding);
 659       if (error != NULL)
 660         parse_error (reader, f, first_column, last_column, error);
 661     }
 662
 663   s = dfm_get_record (reader);
 664   ss_ltrim (&s, parser->soft_seps);
 665   if (!ss_is_empty (s))
 666     msg (DW, _("Record ends in data not part of any field."));
 667
 668 exit:
 669   dfm_forward_record (reader);
 670   ds_destroy (&tmp);
 671   return true;
 672 }
 673 \f
 674 /* Displays a table giving information on fixed-format variable
 675    parsing on DATA LIST. */
 676 static void
 677 dump_fixed_table (const struct data_parser *parser,
 678                   const struct file_handle *fh)
 679 {
 680   /* XXX This should not be preformatted. */
 681   char *title = xasprintf (ngettext ("Reading %d record from %s.",
 682                                      "Reading %d records from %s.",
 683                                      parser->records_per_case),
 684                            parser->records_per_case, fh_get_name (fh));
 685   struct pivot_table *table = pivot_table_create__ (
 686     pivot_value_new_user_text (title, -1), "Fixed Data Records");
 687   free (title);
 688
 689   pivot_dimension_create (
 690     table, PIVOT_AXIS_COLUMN, N_("Attributes"),
 691     N_("Record"), N_("Columns"), N_("Format"));
 692
 693   struct pivot_dimension *variables = pivot_dimension_create (
 694     table, PIVOT_AXIS_ROW, N_("Variable"));
 695   variables->root->show_label = true;
 696   for (size_t i = 0; i < parser->field_cnt; i++)
 697     {
 698       struct field *f = &parser->fields[i];
 699
 700       /* XXX It would be better to have the actual variable here. */
 701       int variable_idx = pivot_category_create_leaf (
 702         variables->root, pivot_value_new_user_text (f->name, -1));
 703
 704       pivot_table_put2 (table, 0, variable_idx,
 705                         pivot_value_new_integer (f->record));
 706
 707       int first_column = f->first_column;
 708       int last_column = f->first_column + f->format.w - 1;
 709       char *columns = xasprintf ("%d-%d", first_column, last_column);
 710       pivot_table_put2 (table, 1, variable_idx,
 711                         pivot_value_new_user_text (columns, -1));
 712       free (columns);
 713
 714       char str[FMT_STRING_LEN_MAX + 1];
 715       pivot_table_put2 (table, 2, variable_idx,
 716                         pivot_value_new_user_text (
 717                           fmt_to_string (&f->format, str), -1));
 718
 719     }
 720
 721   pivot_table_submit (table);
 722 }
 723
 724 /* Displays a table giving information on free-format variable parsing
 725    on DATA LIST. */
 726 static void
 727 dump_delimited_table (const struct data_parser *parser,
 728                       const struct file_handle *fh)
 729 {
 730   struct pivot_table *table = pivot_table_create__ (
 731     pivot_value_new_text_format (N_("Reading free-form data from %s."),
 732                                  fh_get_name (fh)),
 733     "Free-Form Data Records");
 734
 735   pivot_dimension_create (
 736     table, PIVOT_AXIS_COLUMN, N_("Attributes"), N_("Format"));
 737
 738   struct pivot_dimension *variables = pivot_dimension_create (
 739     table, PIVOT_AXIS_ROW, N_("Variable"));
 740   variables->root->show_label = true;
 741   for (size_t i = 0; i < parser->field_cnt; i++)
 742     {
 743       struct field *f = &parser->fields[i];
 744
 745       /* XXX It would be better to have the actual variable here. */
 746       int variable_idx = pivot_category_create_leaf (
 747         variables->root, pivot_value_new_user_text (f->name, -1));
 748
 749       char str[FMT_STRING_LEN_MAX + 1];
 750       pivot_table_put2 (table, 0, variable_idx,
 751                         pivot_value_new_user_text (
 752                           fmt_to_string (&f->format, str), -1));
 753     }
 754
 755   pivot_table_submit (table);
 756 }
 757
 758 /* Displays a table giving information on how PARSER will read
 759    data from FH. */
 760 void
 761 data_parser_output_description (struct data_parser *parser,
 762                                 const struct file_handle *fh)
 763 {
 764   if (parser->type == DP_FIXED)
 765     dump_fixed_table (parser, fh);
 766   else
 767     dump_delimited_table (parser, fh);
 768 }
 769 \f
 770 /* Data parser input program. */
 771 struct data_parser_casereader
 772   {
 773     struct data_parser *parser; /* Parser. */
 774     struct dfm_reader *reader;  /* Data file reader. */
 775     struct caseproto *proto;    /* Format of cases. */
 776   };
 777
 778 static const struct casereader_class data_parser_casereader_class;
 779
 780 /* Replaces DS's active dataset by an input program that reads data
 781    from READER according to the rules in PARSER, using DICT as
 782    the underlying dictionary.  Ownership of PARSER and READER is
 783    transferred to the input program, and ownership of DICT is
 784    transferred to the dataset. */
 785 void
 786 data_parser_make_active_file (struct data_parser *parser, struct dataset *ds,
 787                                struct dfm_reader *reader,
 788                                struct dictionary *dict,
 789                                struct casereader* (*func)(struct casereader *,
 790                                                           const struct dictionary *,
 791                                                           void *),
 792                                void *ud)
 793 {
 794   struct data_parser_casereader *r;
 795   struct casereader *casereader0;
 796   struct casereader *casereader1;
 797
 798   r = xmalloc (sizeof *r);
 799   r->parser = parser;
 800   r->reader = reader;
 801   r->proto = caseproto_ref (dict_get_proto (dict));
 802   casereader0 = casereader_create_sequential (NULL, r->proto,
 803                                              CASENUMBER_MAX,
 804                                              &data_parser_casereader_class, r);
 805
 806   if (func)
 807     casereader1 = func (casereader0, dict, ud);
 808   else
 809     casereader1 = casereader0;
 810
 811   dataset_set_dict (ds, dict);
 812   dataset_set_source (ds, casereader1);
 813 }
 814
 815
 816 static struct ccase *
 817 data_parser_casereader_read (struct casereader *reader UNUSED, void *r_)
 818 {
 819   struct data_parser_casereader *r = r_;
 820   struct ccase *c = case_create (r->proto);
 821   if (data_parser_parse (r->parser, r->reader, c))
 822     return c;
 823   else
 824     {
 825       case_unref (c);
 826       return NULL;
 827     }
 828 }
 829
 830 static void
 831 data_parser_casereader_destroy (struct casereader *reader, void *r_)
 832 {
 833   struct data_parser_casereader *r = r_;
 834   if (dfm_reader_error (r->reader))
 835     casereader_force_error (reader);
 836   dfm_close_reader (r->reader);
 837   caseproto_unref (r->proto);
 838   data_parser_destroy (r->parser);
 839   free (r);
 840 }
 841
 842 static const struct casereader_class data_parser_casereader_class =
 843   {
 844     data_parser_casereader_read,
 845     data_parser_casereader_destroy,
 846     NULL,
 847     NULL,
 848   };