pintos-os.org Git - pspp/blob - src/language/data-io/data-parser.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013, 2016 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include "language/data-io/data-parser.h"
  20
  21 #include <stdint.h>
  22 #include <stdlib.h>
  23
  24 #include "data/casereader-provider.h"
  25 #include "data/data-in.h"
  26 #include "data/dataset.h"
  27 #include "data/dictionary.h"
  28 #include "data/format.h"
  29 #include "data/file-handle-def.h"
  30 #include "data/settings.h"
  31 #include "language/data-io/data-reader.h"
  32 #include "libpspp/message.h"
  33 #include "libpspp/str.h"
  34 #include "output/pivot-table.h"
  35
  36 #include "gl/xalloc.h"
  37
  38 #include "gettext.h"
  39 #define N_(msgid) msgid
  40 #define _(msgid) gettext (msgid)
  41
  42 /* Data parser for textual data like that read by DATA LIST. */
  43 struct data_parser
  44   {
  45     const struct dictionary *dict; /*Dictionary of destination */
  46     enum data_parser_type type; /* Type of data to parse. */
  47     int skip_records;           /* Records to skip before first real data. */
  48
  49     struct field *fields;       /* Fields to parse. */
  50     size_t field_cnt;           /* Number of fields. */
  51     size_t field_allocated;     /* Number of fields spaced allocated for. */
  52
  53     /* DP_DELIMITED parsers only. */
  54     bool span;                  /* May cases span multiple records? */
  55     bool empty_line_has_field;  /* Does an empty line have an (empty) field? */
  56     bool warn_missing_fields;   /* Should missing fields be considered errors? */
  57     struct substring quotes;    /* Characters that can quote separators. */
  58     bool quote_escape;          /* Doubled quote acts as escape? */
  59     struct substring soft_seps; /* Two soft separators act like just one. */
  60     struct substring hard_seps; /* Two hard separators yield empty fields. */
  61     struct string any_sep;      /* Concatenation of soft_seps and hard_seps. */
  62
  63     /* DP_FIXED parsers only. */
  64     int records_per_case;       /* Number of records in each case. */
  65   };
  66
  67 /* How to parse one variable. */
  68 struct field
  69   {
  70     struct fmt_spec format;     /* Input format of this field. */
  71     int case_idx;               /* First value in case. */
  72     char *name;                 /* Var name for error messages and tables. */
  73
  74     /* DP_FIXED only. */
  75     int record;                 /* Record number (1-based). */
  76     int first_column;           /* First column in record (1-based). */
  77   };
  78
  79 static void set_any_sep (struct data_parser *parser);
  80
  81 /* Creates and returns a new data parser. */
  82 struct data_parser *
  83 data_parser_create (const struct dictionary *dict)
  84 {
  85   struct data_parser *parser = xmalloc (sizeof *parser);
  86
  87   parser->type = DP_FIXED;
  88   parser->skip_records = 0;
  89
  90   parser->fields = NULL;
  91   parser->field_cnt = 0;
  92   parser->field_allocated = 0;
  93   parser->dict = dict;
  94
  95   parser->span = true;
  96   parser->empty_line_has_field = false;
  97   parser->warn_missing_fields = true;
  98   ss_alloc_substring (&parser->quotes, ss_cstr ("\"'"));
  99   parser->quote_escape = false;
 100   ss_alloc_substring (&parser->soft_seps, ss_cstr (CC_SPACES));
 101   ss_alloc_substring (&parser->hard_seps, ss_cstr (","));
 102   ds_init_empty (&parser->any_sep);
 103   set_any_sep (parser);
 104
 105   parser->records_per_case = 0;
 106
 107   return parser;
 108 }
 109
 110 /* Destroys PARSER. */
 111 void
 112 data_parser_destroy (struct data_parser *parser)
 113 {
 114   if (parser != NULL)
 115     {
 116       size_t i;
 117
 118       for (i = 0; i < parser->field_cnt; i++)
 119         free (parser->fields[i].name);
 120       free (parser->fields);
 121       ss_dealloc (&parser->quotes);
 122       ss_dealloc (&parser->soft_seps);
 123       ss_dealloc (&parser->hard_seps);
 124       ds_destroy (&parser->any_sep);
 125       free (parser);
 126     }
 127 }
 128
 129 /* Returns the type of PARSER (either DP_DELIMITED or DP_FIXED). */
 130 enum data_parser_type
 131 data_parser_get_type (const struct data_parser *parser)
 132 {
 133   return parser->type;
 134 }
 135
 136 /* Sets the type of PARSER to TYPE (either DP_DELIMITED or
 137    DP_FIXED). */
 138 void
 139 data_parser_set_type (struct data_parser *parser, enum data_parser_type type)
 140 {
 141   assert (parser->field_cnt == 0);
 142   assert (type == DP_FIXED || type == DP_DELIMITED);
 143   parser->type = type;
 144 }
 145
 146 /* Configures PARSER to skip the specified number of
 147    INITIAL_RECORDS_TO_SKIP before parsing any data.  By default,
 148    no records are skipped. */
 149 void
 150 data_parser_set_skip (struct data_parser *parser, int initial_records_to_skip)
 151 {
 152   assert (initial_records_to_skip >= 0);
 153   parser->skip_records = initial_records_to_skip;
 154 }
 155
 156 /* Returns true if PARSER is configured to allow cases to span
 157    multiple records. */
 158 bool
 159 data_parser_get_span (const struct data_parser *parser)
 160 {
 161   return parser->span;
 162 }
 163
 164 /* If MAY_CASES_SPAN_RECORDS is true, configures PARSER to allow
 165    a single case to span multiple records and multiple cases to
 166    occupy a single record.  If MAY_CASES_SPAN_RECORDS is false,
 167    configures PARSER to require each record to contain exactly
 168    one case.
 169
 170    This setting affects parsing of DP_DELIMITED files only. */
 171 void
 172 data_parser_set_span (struct data_parser *parser, bool may_cases_span_records)
 173 {
 174   parser->span = may_cases_span_records;
 175 }
 176
 177 /* If EMPTY_LINE_HAS_FIELD is true, configures PARSER to parse an
 178    empty line as an empty field and to treat a hard delimiter
 179    followed by end-of-line as an empty field.  If
 180    EMPTY_LINE_HAS_FIELD is false, PARSER will skip empty lines
 181    and hard delimiters at the end of lines without emitting empty
 182    fields.
 183
 184    This setting affects parsing of DP_DELIMITED files only. */
 185 void
 186 data_parser_set_empty_line_has_field (struct data_parser *parser,
 187                                       bool empty_line_has_field)
 188 {
 189   parser->empty_line_has_field = empty_line_has_field;
 190 }
 191
 192
 193 /* If WARN_MISSING_FIELDS is true, configures PARSER to emit a warning
 194    and cause an error condition when a missing field is encountered.
 195    If  WARN_MISSING_FIELDS is false, PARSER will silently fill such
 196    fields with the system missing value.
 197
 198    This setting affects parsing of DP_DELIMITED files only. */
 199 void
 200 data_parser_set_warn_missing_fields (struct data_parser *parser,
 201                                      bool warn_missing_fields)
 202 {
 203   parser->warn_missing_fields = warn_missing_fields;
 204 }
 205
 206
 207 /* Sets the characters that may be used for quoting field
 208    contents to QUOTES.  If QUOTES is empty, quoting will be
 209    disabled.
 210
 211    The caller retains ownership of QUOTES.
 212
 213    This setting affects parsing of DP_DELIMITED files only. */
 214 void
 215 data_parser_set_quotes (struct data_parser *parser, struct substring quotes)
 216 {
 217   ss_dealloc (&parser->quotes);
 218   ss_alloc_substring (&parser->quotes, quotes);
 219 }
 220
 221 /* If ESCAPE is false (the default setting), a character used for
 222    quoting cannot itself be embedded within a quoted field.  If
 223    ESCAPE is true, then a quote character can be embedded within
 224    a quoted field by doubling it.
 225
 226    This setting affects parsing of DP_DELIMITED files only, and
 227    only when at least one quote character has been set (with
 228    data_parser_set_quotes). */
 229 void
 230 data_parser_set_quote_escape (struct data_parser *parser, bool escape)
 231 {
 232   parser->quote_escape = escape;
 233 }
 234
 235 /* Sets PARSER's soft delimiters to DELIMITERS.  Soft delimiters
 236    separate fields, but consecutive soft delimiters do not yield
 237    empty fields.  (Ordinarily, only white space characters are
 238    appropriate soft delimiters.)
 239
 240    The caller retains ownership of DELIMITERS.
 241
 242    This setting affects parsing of DP_DELIMITED files only. */
 243 void
 244 data_parser_set_soft_delimiters (struct data_parser *parser,
 245                                  struct substring delimiters)
 246 {
 247   ss_dealloc (&parser->soft_seps);
 248   ss_alloc_substring (&parser->soft_seps, delimiters);
 249   set_any_sep (parser);
 250 }
 251
 252 /* Sets PARSER's hard delimiters to DELIMITERS.  Hard delimiters
 253    separate fields.  A consecutive pair of hard delimiters yield
 254    an empty field.
 255
 256    The caller retains ownership of DELIMITERS.
 257
 258    This setting affects parsing of DP_DELIMITED files only. */
 259 void
 260 data_parser_set_hard_delimiters (struct data_parser *parser,
 261                                  struct substring delimiters)
 262 {
 263   ss_dealloc (&parser->hard_seps);
 264   ss_alloc_substring (&parser->hard_seps, delimiters);
 265   set_any_sep (parser);
 266 }
 267
 268 /* Returns the number of records per case. */
 269 int
 270 data_parser_get_records (const struct data_parser *parser)
 271 {
 272   return parser->records_per_case;
 273 }
 274
 275 /* Sets the number of records per case to RECORDS_PER_CASE.
 276
 277    This setting affects parsing of DP_FIXED files only. */
 278 void
 279 data_parser_set_records (struct data_parser *parser, int records_per_case)
 280 {
 281   assert (records_per_case >= 0);
 282   assert (records_per_case >= parser->records_per_case);
 283   parser->records_per_case = records_per_case;
 284 }
 285
 286 static void
 287 add_field (struct data_parser *p, const struct fmt_spec *format, int case_idx,
 288            const char *name, int record, int first_column)
 289 {
 290   struct field *field;
 291
 292   if (p->field_cnt == p->field_allocated)
 293     p->fields = x2nrealloc (p->fields, &p->field_allocated, sizeof *p->fields);
 294   field = &p->fields[p->field_cnt++];
 295   field->format = *format;
 296   field->case_idx = case_idx;
 297   field->name = xstrdup (name);
 298   field->record = record;
 299   field->first_column = first_column;
 300 }
 301
 302 /* Adds a delimited field to the field parsed by PARSER, which
 303    must be configured as a DP_DELIMITED parser.  The field is
 304    parsed as input format FORMAT.  Its data will be stored into case
 305    index CASE_INDEX.  Errors in input data will be reported
 306    against variable NAME. */
 307 void
 308 data_parser_add_delimited_field (struct data_parser *parser,
 309                                  const struct fmt_spec *format, int case_idx,
 310                                  const char *name)
 311 {
 312   assert (parser->type == DP_DELIMITED);
 313   add_field (parser, format, case_idx, name, 0, 0);
 314 }
 315
 316 /* Adds a fixed field to the field parsed by PARSER, which
 317    must be configured as a DP_FIXED parser.  The field is
 318    parsed as input format FORMAT.  Its data will be stored into case
 319    index CASE_INDEX.  Errors in input data will be reported
 320    against variable NAME.  The field will be drawn from the
 321    FORMAT->w columns in 1-based RECORD starting at 1-based
 322    column FIRST_COLUMN.
 323
 324    RECORD must be at least as great as that of any field already
 325    added; that is, fields must be added in increasing order of
 326    record number.  If RECORD is greater than the current number
 327    of records per case, the number of records per case are
 328    increased as needed.  */
 329 void
 330 data_parser_add_fixed_field (struct data_parser *parser,
 331                              const struct fmt_spec *format, int case_idx,
 332                              const char *name,
 333                              int record, int first_column)
 334 {
 335   assert (parser->type == DP_FIXED);
 336   assert (parser->field_cnt == 0
 337           || record >= parser->fields[parser->field_cnt - 1].record);
 338   if (record > parser->records_per_case)
 339     parser->records_per_case = record;
 340   add_field (parser, format, case_idx, name, record, first_column);
 341 }
 342
 343 /* Returns true if any fields have been added to PARSER, false
 344    otherwise. */
 345 bool
 346 data_parser_any_fields (const struct data_parser *parser)
 347 {
 348   return parser->field_cnt > 0;
 349 }
 350
 351 static void
 352 set_any_sep (struct data_parser *parser)
 353 {
 354   ds_assign_substring (&parser->any_sep, parser->soft_seps);
 355   ds_put_substring (&parser->any_sep, parser->hard_seps);
 356 }
 357 \f
 358 static bool parse_delimited_span (const struct data_parser *,
 359                                   struct dfm_reader *, struct ccase *);
 360 static bool parse_delimited_no_span (const struct data_parser *,
 361                                      struct dfm_reader *, struct ccase *);
 362 static bool parse_fixed (const struct data_parser *,
 363                          struct dfm_reader *, struct ccase *);
 364
 365 /* Reads a case from DFM into C, parsing it with PARSER.  Returns
 366    true if successful, false at end of file or on I/O error.
 367
 368    Case C must not be shared. */
 369 bool
 370 data_parser_parse (struct data_parser *parser, struct dfm_reader *reader,
 371                    struct ccase *c)
 372 {
 373   bool retval;
 374
 375   assert (!case_is_shared (c));
 376   assert (data_parser_any_fields (parser));
 377
 378   /* Skip the requested number of records before reading the
 379      first case. */
 380   for (; parser->skip_records > 0; parser->skip_records--)
 381     {
 382       if (dfm_eof (reader))
 383         return false;
 384       dfm_forward_record (reader);
 385     }
 386
 387   /* Limit cases. */
 388   if (parser->type == DP_DELIMITED)
 389     {
 390       if (parser->span)
 391         retval = parse_delimited_span (parser, reader, c);
 392       else
 393         retval = parse_delimited_no_span (parser, reader, c);
 394     }
 395   else
 396     retval = parse_fixed (parser, reader, c);
 397
 398   return retval;
 399 }
 400
 401 /* Extracts a delimited field from the current position in the
 402    current record according to PARSER, reading data from READER.
 403
 404    *FIELD is set to the field content.  The caller must not or
 405    destroy this constant string.
 406
 407    Sets *FIRST_COLUMN to the 1-based column number of the start of
 408    the extracted field, and *LAST_COLUMN to the end of the extracted
 409    field.
 410
 411    Returns true on success, false on failure. */
 412 static bool
 413 cut_field (const struct data_parser *parser, struct dfm_reader *reader,
 414            int *first_column, int *last_column, struct string *tmp,
 415            struct substring *field)
 416 {
 417   size_t length_before_separators;
 418   struct substring line, p;
 419   bool quoted;
 420
 421   if (dfm_eof (reader))
 422     return false;
 423   if (ss_is_empty (parser->hard_seps))
 424     dfm_expand_tabs (reader);
 425   line = p = dfm_get_record (reader);
 426
 427   /* Skip leading soft separators. */
 428   ss_ltrim (&p, parser->soft_seps);
 429
 430   /* Handle empty or completely consumed lines. */
 431   if (ss_is_empty (p))
 432     {
 433       if (!parser->empty_line_has_field || dfm_columns_past_end (reader) > 0)
 434         return false;
 435       else
 436         {
 437           *field = p;
 438           *first_column = dfm_column_start (reader);
 439           *last_column = *first_column + 1;
 440           dfm_forward_columns (reader, 1);
 441           return true;
 442         }
 443     }
 444
 445   *first_column = dfm_column_start (reader);
 446   quoted = ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX;
 447   if (quoted)
 448     {
 449       /* Quoted field. */
 450       int quote = ss_get_byte (&p);
 451       if (!ss_get_until (&p, quote, field))
 452         msg (DW, _("Quoted string extends beyond end of line."));
 453       if (parser->quote_escape && ss_first (p) == quote)
 454         {
 455           ds_assign_substring (tmp, *field);
 456           while (ss_match_byte (&p, quote))
 457             {
 458               struct substring ss;
 459               ds_put_byte (tmp, quote);
 460               if (!ss_get_until (&p, quote, &ss))
 461                 msg (DW, _("Quoted string extends beyond end of line."));
 462               ds_put_substring (tmp, ss);
 463             }
 464           *field = ds_ss (tmp);
 465         }
 466       *last_column = *first_column + (ss_length (line) - ss_length (p));
 467     }
 468   else
 469     {
 470       /* Regular field. */
 471       ss_get_bytes (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field);
 472       *last_column = *first_column + ss_length (*field);
 473     }
 474
 475   /* Skip trailing soft separator and a single hard separator if present. */
 476   length_before_separators = ss_length (p);
 477   ss_ltrim (&p, parser->soft_seps);
 478   if (!ss_is_empty (p)
 479       && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX)
 480     {
 481       ss_advance (&p, 1);
 482       ss_ltrim (&p, parser->soft_seps);
 483     }
 484   if (ss_is_empty (p))
 485     dfm_forward_columns (reader, 1);
 486   else if (quoted && length_before_separators == ss_length (p))
 487     msg (DW, _("Missing delimiter following quoted string."));
 488   dfm_forward_columns (reader, ss_length (line) - ss_length (p));
 489
 490   return true;
 491 }
 492
 493 static void
 494 parse_error (const struct dfm_reader *reader, const struct field *field,
 495              int first_column, int last_column, char *error)
 496 {
 497   struct msg m = {
 498     .category = MSG_C_DATA,
 499     .severity = MSG_S_WARNING,
 500     .file_name = CONST_CAST (char *, dfm_get_file_name (reader)),
 501     .first_line = dfm_get_line_number (reader),
 502     .last_line = m.first_line + 1,
 503     .first_column = first_column,
 504     .last_column = last_column,
 505     .text = xasprintf (_("Data for variable %s is not valid as format %s: %s"),
 506                        field->name, fmt_name (field->format.type), error),
 507   };
 508   msg_emit (&m);
 509
 510   free (error);
 511 }
 512
 513 /* Reads a case from READER into C, parsing it according to
 514    fixed-format syntax rules in PARSER.
 515    Returns true if successful, false at end of file or on I/O error. */
 516 static bool
 517 parse_fixed (const struct data_parser *parser, struct dfm_reader *reader,
 518              struct ccase *c)
 519 {
 520   const char *input_encoding = dfm_reader_get_encoding (reader);
 521   const char *output_encoding = dict_get_encoding (parser->dict);
 522   struct field *f;
 523   int row;
 524
 525   if (dfm_eof (reader))
 526     return false;
 527
 528   f = parser->fields;
 529   for (row = 1; row <= parser->records_per_case; row++)
 530     {
 531       struct substring line;
 532
 533       if (dfm_eof (reader))
 534         {
 535           msg (DW, _("Partial case of %d of %d records discarded."),
 536                row - 1, parser->records_per_case);
 537           return false;
 538         }
 539       dfm_expand_tabs (reader);
 540       line = dfm_get_record (reader);
 541
 542       for (; f < &parser->fields[parser->field_cnt] && f->record == row; f++)
 543         {
 544           struct substring s = ss_substr (line, f->first_column - 1,
 545                                           f->format.w);
 546           union value *value = case_data_rw_idx (c, f->case_idx);
 547           char *error = data_in (s, input_encoding, f->format.type,
 548                                  value, fmt_var_width (&f->format),
 549                                  output_encoding);
 550
 551           if (error == NULL)
 552             data_in_imply_decimals (s, input_encoding, f->format.type,
 553                                     f->format.d, value);
 554           else
 555             parse_error (reader, f, f->first_column,
 556                          f->first_column + f->format.w, error);
 557         }
 558
 559       dfm_forward_record (reader);
 560     }
 561
 562   return true;
 563 }
 564
 565 /* Reads a case from READER into C, parsing it according to
 566    free-format syntax rules in PARSER.
 567    Returns true if successful, false at end of file or on I/O error. */
 568 static bool
 569 parse_delimited_span (const struct data_parser *parser,
 570                       struct dfm_reader *reader, struct ccase *c)
 571 {
 572   const char *output_encoding = dict_get_encoding (parser->dict);
 573   struct string tmp = DS_EMPTY_INITIALIZER;
 574   struct field *f;
 575
 576   for (f = parser->fields; f < &parser->fields[parser->field_cnt]; f++)
 577     {
 578       struct substring s;
 579       int first_column, last_column;
 580       char *error;
 581
 582       /* Cut out a field and read in a new record if necessary. */
 583       while (!cut_field (parser, reader,
 584                          &first_column, &last_column, &tmp, &s))
 585         {
 586           if (!dfm_eof (reader))
 587             dfm_forward_record (reader);
 588           if (dfm_eof (reader))
 589             {
 590               if (f > parser->fields)
 591                 msg (DW, _("Partial case discarded.  The first variable "
 592                            "missing was %s."), f->name);
 593               ds_destroy (&tmp);
 594               return false;
 595             }
 596         }
 597
 598       const char *input_encoding = dfm_reader_get_encoding (reader);
 599       error = data_in (s, input_encoding, f->format.type,
 600                        case_data_rw_idx (c, f->case_idx),
 601                        fmt_var_width (&f->format), output_encoding);
 602       if (error != NULL)
 603         parse_error (reader, f, first_column, last_column, error);
 604     }
 605   ds_destroy (&tmp);
 606   return true;
 607 }
 608
 609 /* Reads a case from READER into C, parsing it according to
 610    delimited syntax rules with one case per record in PARSER.
 611    Returns true if successful, false at end of file or on I/O error. */
 612 static bool
 613 parse_delimited_no_span (const struct data_parser *parser,
 614                          struct dfm_reader *reader, struct ccase *c)
 615 {
 616   const char *output_encoding = dict_get_encoding (parser->dict);
 617   struct string tmp = DS_EMPTY_INITIALIZER;
 618   struct substring s;
 619   struct field *f, *end;
 620
 621   if (dfm_eof (reader))
 622     return false;
 623
 624   end = &parser->fields[parser->field_cnt];
 625   for (f = parser->fields; f < end; f++)
 626     {
 627       int first_column, last_column;
 628       char *error;
 629
 630       if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s))
 631         {
 632           if (f < end - 1 && settings_get_undefined () && parser->warn_missing_fields)
 633             msg (DW, _("Missing value(s) for all variables from %s onward.  "
 634                        "These will be filled with the system-missing value "
 635                        "or blanks, as appropriate."),
 636                  f->name);
 637           for (; f < end; f++)
 638             value_set_missing (case_data_rw_idx (c, f->case_idx),
 639                                fmt_var_width (&f->format));
 640           goto exit;
 641         }
 642
 643       const char *input_encoding = dfm_reader_get_encoding (reader);
 644       error = data_in (s, input_encoding, f->format.type,
 645                        case_data_rw_idx (c, f->case_idx),
 646                        fmt_var_width (&f->format), output_encoding);
 647       if (error != NULL)
 648         parse_error (reader, f, first_column, last_column, error);
 649     }
 650
 651   s = dfm_get_record (reader);
 652   ss_ltrim (&s, parser->soft_seps);
 653   if (!ss_is_empty (s))
 654     msg (DW, _("Record ends in data not part of any field."));
 655
 656 exit:
 657   dfm_forward_record (reader);
 658   ds_destroy (&tmp);
 659   return true;
 660 }
 661 \f
 662 /* Displays a table giving information on fixed-format variable
 663    parsing on DATA LIST. */
 664 static void
 665 dump_fixed_table (const struct data_parser *parser,
 666                   const struct file_handle *fh)
 667 {
 668   /* XXX This should not be preformatted. */
 669   char *title = xasprintf (ngettext ("Reading %d record from %s.",
 670                                      "Reading %d records from %s.",
 671                                      parser->records_per_case),
 672                            parser->records_per_case, fh_get_name (fh));
 673   struct pivot_table *table = pivot_table_create__ (
 674     pivot_value_new_user_text (title, -1));
 675   free (title);
 676
 677   pivot_dimension_create (
 678     table, PIVOT_AXIS_COLUMN, N_("Attributes"),
 679     N_("Record"), N_("Columns"), N_("Format"));
 680
 681   struct pivot_dimension *variables = pivot_dimension_create (
 682     table, PIVOT_AXIS_ROW, N_("Variable"));
 683   variables->root->show_label = true;
 684   for (size_t i = 0; i < parser->field_cnt; i++)
 685     {
 686       struct field *f = &parser->fields[i];
 687
 688       /* XXX It would be better to have the actual variable here. */
 689       int variable_idx = pivot_category_create_leaf (
 690         variables->root, pivot_value_new_user_text (f->name, -1));
 691
 692       pivot_table_put2 (table, 0, variable_idx,
 693                         pivot_value_new_integer (f->record));
 694
 695       int first_column = f->first_column;
 696       int last_column = f->first_column + f->format.w - 1;
 697       char *columns = xasprintf ("%3d-%3d", first_column, last_column);
 698       pivot_table_put2 (table, 1, variable_idx,
 699                         pivot_value_new_user_text (columns, -1));
 700       free (columns);
 701
 702       char str[FMT_STRING_LEN_MAX + 1];
 703       pivot_table_put2 (table, 2, variable_idx,
 704                         pivot_value_new_user_text (
 705                           fmt_to_string (&f->format, str), -1));
 706
 707     }
 708
 709   pivot_table_submit (table);
 710 }
 711
 712 /* Displays a table giving information on free-format variable parsing
 713    on DATA LIST. */
 714 static void
 715 dump_delimited_table (const struct data_parser *parser,
 716                       const struct file_handle *fh)
 717 {
 718   struct pivot_table *table = pivot_table_create__ (
 719     pivot_value_new_text_format (N_("Reading free-form data from %s."),
 720                                  fh_get_name (fh)));
 721
 722   pivot_dimension_create (
 723     table, PIVOT_AXIS_COLUMN, N_("Attributes"), N_("Format"));
 724
 725   struct pivot_dimension *variables = pivot_dimension_create (
 726     table, PIVOT_AXIS_ROW, N_("Variable"));
 727   variables->root->show_label = true;
 728   for (size_t i = 0; i < parser->field_cnt; i++)
 729     {
 730       struct field *f = &parser->fields[i];
 731
 732       /* XXX It would be better to have the actual variable here. */
 733       int variable_idx = pivot_category_create_leaf (
 734         variables->root, pivot_value_new_user_text (f->name, -1));
 735
 736       char str[FMT_STRING_LEN_MAX + 1];
 737       pivot_table_put2 (table, 0, variable_idx,
 738                         pivot_value_new_user_text (
 739                           fmt_to_string (&f->format, str), -1));
 740     }
 741
 742   pivot_table_submit (table);
 743 }
 744
 745 /* Displays a table giving information on how PARSER will read
 746    data from FH. */
 747 void
 748 data_parser_output_description (struct data_parser *parser,
 749                                 const struct file_handle *fh)
 750 {
 751   if (parser->type == DP_FIXED)
 752     dump_fixed_table (parser, fh);
 753   else
 754     dump_delimited_table (parser, fh);
 755 }
 756 \f
 757 /* Data parser input program. */
 758 struct data_parser_casereader
 759   {
 760     struct data_parser *parser; /* Parser. */
 761     struct dfm_reader *reader;  /* Data file reader. */
 762     struct caseproto *proto;    /* Format of cases. */
 763   };
 764
 765 static const struct casereader_class data_parser_casereader_class;
 766
 767 /* Replaces DS's active dataset by an input program that reads data
 768    from READER according to the rules in PARSER, using DICT as
 769    the underlying dictionary.  Ownership of PARSER and READER is
 770    transferred to the input program, and ownership of DICT is
 771    transferred to the dataset. */
 772 void
 773 data_parser_make_active_file (struct data_parser *parser, struct dataset *ds,
 774                                struct dfm_reader *reader,
 775                                struct dictionary *dict,
 776                                struct casereader* (*func)(struct casereader *,
 777                                                           const struct dictionary *,
 778                                                           void *),
 779                                void *ud)
 780 {
 781   struct data_parser_casereader *r;
 782   struct casereader *casereader0;
 783   struct casereader *casereader1;
 784
 785   r = xmalloc (sizeof *r);
 786   r->parser = parser;
 787   r->reader = reader;
 788   r->proto = caseproto_ref (dict_get_proto (dict));
 789   casereader0 = casereader_create_sequential (NULL, r->proto,
 790                                              CASENUMBER_MAX,
 791                                              &data_parser_casereader_class, r);
 792
 793   if (func)
 794     casereader1 = func (casereader0, dict, ud);
 795   else
 796     casereader1 = casereader0;
 797
 798   dataset_set_dict (ds, dict);
 799   dataset_set_source (ds, casereader1);
 800 }
 801
 802
 803 static struct ccase *
 804 data_parser_casereader_read (struct casereader *reader UNUSED, void *r_)
 805 {
 806   struct data_parser_casereader *r = r_;
 807   struct ccase *c = case_create (r->proto);
 808   if (data_parser_parse (r->parser, r->reader, c))
 809     return c;
 810   else
 811     {
 812       case_unref (c);
 813       return NULL;
 814     }
 815 }
 816
 817 static void
 818 data_parser_casereader_destroy (struct casereader *reader UNUSED, void *r_)
 819 {
 820   struct data_parser_casereader *r = r_;
 821   if (dfm_reader_error (r->reader))
 822     casereader_force_error (reader);
 823   data_parser_destroy (r->parser);
 824   dfm_close_reader (r->reader);
 825   caseproto_unref (r->proto);
 826   free (r);
 827 }
 828
 829 static const struct casereader_class data_parser_casereader_class =
 830   {
 831     data_parser_casereader_read,
 832     data_parser_casereader_destroy,
 833     NULL,
 834     NULL,
 835   };