1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/case-source.h>
30 #include <data/data-in.h>
31 #include <data/dictionary.h>
32 #include <data/format.h>
33 #include <data/procedure.h>
34 #include <data/settings.h>
35 #include <data/transformations.h>
36 #include <data/variable.h>
37 #include <language/command.h>
38 #include <language/data-io/data-reader.h>
39 #include <language/data-io/file-handle.h>
40 #include <language/data-io/inpt-pgm.h>
41 #include <language/data-io/placement-parser.h>
42 #include <language/lexer/format-parser.h>
43 #include <language/lexer/lexer.h>
44 #include <language/lexer/variable-parser.h>
45 #include <libpspp/alloc.h>
46 #include <libpspp/assertion.h>
47 #include <libpspp/compiler.h>
48 #include <libpspp/ll.h>
49 #include <libpspp/message.h>
50 #include <libpspp/misc.h>
51 #include <libpspp/pool.h>
52 #include <libpspp/str.h>
53 #include <output/table.h>
59 #define _(msgid) gettext (msgid)
61 /* Utility function. */
63 /* Describes how to parse one variable. */
66 struct ll ll; /* List element. */
69 struct fmt_spec input; /* Input format of this field. */
70 int fv; /* First value in case. */
71 char name[LONG_NAME_LEN + 1]; /* Var name for error messages and tables. */
73 /* Fixed format only. */
74 int record; /* Record number (1-based). */
75 int first_column; /* Column numbers in record. */
78 static struct dls_var_spec *
79 ll_to_dls_var_spec (struct ll *ll)
81 return ll_data (ll, struct dls_var_spec, ll);
84 /* Constants for DATA LIST type. */
92 /* DATA LIST private data structure. */
95 struct pool *pool; /* Used for all DATA LIST storage. */
96 struct ll_list specs; /* List of dls_var_specs. */
97 struct dfm_reader *reader; /* Data file reader. */
98 enum dls_type type; /* Type of DATA LIST construct. */
99 struct variable *end; /* Variable specified on END subcommand. */
100 int record_cnt; /* Number of records. */
101 struct string delims; /* Field delimiters. */
104 static const struct case_source_class data_list_source_class;
106 static bool parse_fixed (struct pool *tmp_pool, struct data_list_pgm *);
107 static bool parse_free (struct pool *tmp_pool, struct data_list_pgm *);
108 static void dump_fixed_table (const struct ll_list *,
109 const struct file_handle *, int record_cnt);
110 static void dump_free_table (const struct data_list_pgm *,
111 const struct file_handle *);
113 static trns_free_func data_list_trns_free;
114 static trns_proc_func data_list_trns_proc;
119 struct data_list_pgm *dls;
120 int table = -1; /* Print table if nonzero, -1=undecided. */
121 struct file_handle *fh = fh_inline_file ();
122 struct pool *tmp_pool;
125 if (!in_input_program ())
126 discard_variables ();
128 dls = pool_create_container (struct data_list_pgm, pool);
129 ll_init (&dls->specs);
134 ds_init_empty (&dls->delims);
135 ds_register_pool (&dls->delims, dls->pool);
137 tmp_pool = pool_create_subpool (dls->pool);
141 if (lex_match_id ("FILE"))
144 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
148 else if (lex_match_id ("RECORDS"))
152 if (!lex_force_int ())
154 dls->record_cnt = lex_integer ();
158 else if (lex_match_id ("END"))
162 msg (SE, _("The END subcommand may only be specified once."));
167 if (!lex_force_id ())
169 dls->end = dict_lookup_var (default_dict, tokid);
171 dls->end = dict_create_var_assert (default_dict, tokid, 0);
174 else if (token == T_ID)
176 if (lex_match_id ("NOTABLE"))
178 else if (lex_match_id ("TABLE"))
183 if (lex_match_id ("FIXED"))
185 else if (lex_match_id ("FREE"))
187 else if (lex_match_id ("LIST"))
197 msg (SE, _("Only one of FIXED, FREE, or LIST may "
203 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
206 while (!lex_match (')'))
210 if (lex_match_id ("TAB"))
212 else if (token == T_STRING && ds_length (&tokstr) == 1)
214 delim = ds_first (&tokstr);
223 ds_put_char (&dls->delims, delim);
237 fh_set_default_handle (fh);
240 dls->type = DLS_FIXED;
243 table = dls->type != DLS_FREE;
245 ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (tmp_pool, dls);
249 if (lex_end_of_command () != CMD_SUCCESS)
254 if (dls->type == DLS_FIXED)
255 dump_fixed_table (&dls->specs, fh, dls->record_cnt);
257 dump_free_table (dls, fh);
260 dls->reader = dfm_open_reader (fh);
261 if (dls->reader == NULL)
264 if (in_input_program ())
265 add_transformation (data_list_trns_proc, data_list_trns_free, dls);
267 proc_set_source (create_case_source (&data_list_source_class, dls));
269 pool_destroy (tmp_pool);
274 data_list_trns_free (dls);
275 return CMD_CASCADING_FAILURE;
278 /* Fixed-format parsing. */
280 /* Parses all the variable specifications for DATA LIST FIXED,
281 storing them into DLS. Uses TMP_POOL for data that is not
282 needed once parsing is complete. Returns true only if
285 parse_fixed (struct pool *tmp_pool, struct data_list_pgm *dls)
287 int last_nonempty_record;
294 size_t name_cnt, name_idx;
295 struct fmt_spec *formats, *f;
298 /* Parse everything. */
299 if (!parse_record_placement (&record, &column)
300 || !parse_DATA_LIST_vars_pool (tmp_pool, &names, &name_cnt, PV_NONE)
301 || !parse_var_placements (tmp_pool, name_cnt, &formats, &format_cnt))
304 /* Create variables and var specs. */
306 for (f = formats; f < &formats[format_cnt]; f++)
307 if (!execute_placement_format (f, &record, &column))
312 struct dls_var_spec *spec;
314 name = names[name_idx++];
316 /* Create variable. */
317 width = get_format_var_width (f);
318 v = dict_create_var (default_dict, name, width);
322 struct fmt_spec output;
323 convert_fmt_ItoO (f, &output);
330 This can be acceptable if we're in INPUT
331 PROGRAM, but only if the existing variable has
332 the same width as the one we would have
334 if (!in_input_program ())
336 msg (SE, _("%s is a duplicate variable name."), name);
340 v = dict_lookup_var_assert (default_dict, name);
341 if ((width != 0) != (v->width != 0))
343 msg (SE, _("There is already a variable %s of a "
348 if (width != 0 && width != v->width)
350 msg (SE, _("There is already a string variable %s of a "
351 "different width."), name);
356 /* Create specifier for parsing the variable. */
357 spec = pool_alloc (dls->pool, sizeof *spec);
360 spec->record = record;
361 spec->first_column = column;
362 strcpy (spec->name, v->name);
363 ll_push_tail (&dls->specs, &spec->ll);
367 assert (name_idx == name_cnt);
369 if (ll_is_empty (&dls->specs))
371 msg (SE, _("At least one variable must be specified."));
375 last_nonempty_record = ll_to_dls_var_spec (ll_tail (&dls->specs))->record;
376 if (dls->record_cnt && last_nonempty_record > dls->record_cnt)
378 msg (SE, _("Variables are specified on records that "
379 "should not exist according to RECORDS subcommand."));
382 else if (!dls->record_cnt)
383 dls->record_cnt = last_nonempty_record;
388 /* Displays a table giving information on fixed-format variable
389 parsing on DATA LIST. */
391 dump_fixed_table (const struct ll_list *specs,
392 const struct file_handle *fh, int record_cnt)
396 struct dls_var_spec *spec;
399 spec_cnt = ll_count (specs);
400 t = tab_create (4, spec_cnt + 1, 0);
401 tab_columns (t, TAB_COL_DOWN, 1);
402 tab_headers (t, 0, 0, 1, 0);
403 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
404 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
405 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
406 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
407 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, spec_cnt);
408 tab_hline (t, TAL_2, 0, 3, 1);
409 tab_dim (t, tab_natural_dimensions);
412 ll_for_each (spec, struct dls_var_spec, ll, specs)
414 tab_text (t, 0, row, TAB_LEFT, spec->name);
415 tab_text (t, 1, row, TAT_PRINTF, "%d", spec->record);
416 tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d",
417 spec->first_column, spec->first_column + spec->input.w - 1);
418 tab_text (t, 3, row, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input));
422 tab_title (t, ngettext ("Reading %d record from %s.",
423 "Reading %d records from %s.", record_cnt),
424 record_cnt, fh_get_name (fh));
428 /* Free-format parsing. */
430 /* Parses variable specifications for DATA LIST FREE and adds
431 them to DLS. Uses TMP_POOL for data that is not needed once
432 parsing is complete. Returns true only if successful. */
434 parse_free (struct pool *tmp_pool, struct data_list_pgm *dls)
439 struct fmt_spec input, output;
444 if (!parse_DATA_LIST_vars_pool (tmp_pool, &name, &name_cnt, PV_NONE))
449 if (!parse_format_specifier (&input)
450 || !check_input_specifier (&input, 1)
451 || !lex_force_match (')'))
453 convert_fmt_ItoO (&input, &output);
458 input = make_input_format (FMT_F, 8, 0);
459 output = *get_format ();
462 for (i = 0; i < name_cnt; i++)
464 struct dls_var_spec *spec;
467 v = dict_create_var (default_dict, name[i],
468 get_format_var_width (&input));
471 msg (SE, _("%s is a duplicate variable name."), name[i]);
474 v->print = v->write = output;
476 spec = pool_alloc (dls->pool, sizeof *spec);
479 strcpy (spec->name, v->name);
480 ll_push_tail (&dls->specs, &spec->ll);
487 /* Displays a table giving information on free-format variable parsing
490 dump_free_table (const struct data_list_pgm *dls,
491 const struct file_handle *fh)
494 struct dls_var_spec *spec;
498 spec_cnt = ll_count (&dls->specs);
500 t = tab_create (2, spec_cnt + 1, 0);
501 tab_columns (t, TAB_COL_DOWN, 1);
502 tab_headers (t, 0, 0, 1, 0);
503 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
504 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
505 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, spec_cnt);
506 tab_hline (t, TAL_2, 0, 1, 1);
507 tab_dim (t, tab_natural_dimensions);
510 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
512 tab_text (t, 0, row, TAB_LEFT, spec->name);
513 tab_text (t, 1, row, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input));
517 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
522 /* Input procedure. */
524 /* Extracts a field from the current position in the current
525 record. Fields can be unquoted or quoted with single- or
526 double-quote characters.
528 *FIELD is set to the field content. The caller must not
529 or destroy this constant string.
531 After parsing the field, sets the current position in the
532 record to just past the field and any trailing delimiter.
533 Returns 0 on failure or a 1-based column number indicating the
534 beginning of the field on success. */
536 cut_field (const struct data_list_pgm *dls, struct substring *field)
538 struct substring line, p;
540 if (dfm_eof (dls->reader))
542 if (ds_is_empty (&dls->delims))
543 dfm_expand_tabs (dls->reader);
544 line = p = dfm_get_record (dls->reader);
546 if (ds_is_empty (&dls->delims))
548 bool missing_quote = false;
550 /* Skip leading whitespace. */
551 ss_ltrim (&p, ss_cstr (CC_SPACES));
555 /* Handle actual data, whether quoted or unquoted. */
556 if (ss_match_char (&p, '\''))
557 missing_quote = !ss_get_until (&p, '\'', field);
558 else if (ss_match_char (&p, '"'))
559 missing_quote = !ss_get_until (&p, '"', field);
561 ss_get_chars (&p, ss_cspan (p, ss_cstr ("," CC_SPACES)), field);
563 msg (SW, _("Quoted string extends beyond end of line."));
565 /* Skip trailing whitespace and a single comma if present. */
566 ss_ltrim (&p, ss_cstr (CC_SPACES));
567 ss_match_char (&p, ',');
569 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p));
573 if (!ss_is_empty (p))
574 ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field);
575 else if (dfm_columns_past_end (dls->reader) == 0)
577 /* A blank line or a line that ends in a delimiter has a
578 trailing blank field. */
584 /* Advance past the field.
586 Also advance past a trailing delimiter, regardless of
587 whether one actually existed. If we "skip" a delimiter
588 that was not actually there, then we will return
589 end-of-line on our next call, which is what we want. */
590 dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p) + 1);
595 static bool read_from_data_list_fixed (const struct data_list_pgm *,
597 static bool read_from_data_list_free (const struct data_list_pgm *,
599 static bool read_from_data_list_list (const struct data_list_pgm *,
602 /* Reads a case from DLS into C.
603 Returns true if successful, false at end of file or on I/O error. */
605 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
609 dfm_push (dls->reader);
613 retval = read_from_data_list_fixed (dls, c);
616 retval = read_from_data_list_free (dls, c);
619 retval = read_from_data_list_list (dls, c);
624 dfm_pop (dls->reader);
629 /* Reads a case from the data file into C, parsing it according
630 to fixed-format syntax rules in DLS.
631 Returns true if successful, false at end of file or on I/O error. */
633 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
635 struct dls_var_spec *spec;
638 if (dfm_eof (dls->reader))
641 spec = ll_to_dls_var_spec (ll_head (&dls->specs));
642 for (row = 1; row <= dls->record_cnt; row++)
644 struct substring line;
646 if (dfm_eof (dls->reader))
648 msg (SW, _("Partial case of %d of %d records discarded."),
649 row - 1, dls->record_cnt);
652 dfm_expand_tabs (dls->reader);
653 line = dfm_get_record (dls->reader);
655 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
659 data_in_finite_line (&di, ss_data (line), ss_length (line),
661 spec->first_column + spec->input.w - 1);
662 di.v = case_data_rw (c, spec->fv);
663 di.flags = DI_IMPLIED_DECIMALS;
664 di.f1 = spec->first_column;
665 di.format = spec->input;
670 dfm_forward_record (dls->reader);
676 /* Reads a case from the data file into C, parsing it according
677 to free-format syntax rules in DLS.
678 Returns true if successful, false at end of file or on I/O error. */
680 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
682 struct dls_var_spec *spec;
684 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
686 struct substring field;
689 /* Cut out a field and read in a new record if necessary. */
690 while (!cut_field (dls, &field))
692 if (!dfm_eof (dls->reader))
693 dfm_forward_record (dls->reader);
694 if (dfm_eof (dls->reader))
696 if (&spec->ll != ll_head (&dls->specs))
697 msg (SW, _("Partial case discarded. The first variable "
698 "missing was %s."), spec->name);
703 di.s = ss_data (field);
704 di.e = ss_end (field);
705 di.v = case_data_rw (c, spec->fv);
707 di.f1 = dfm_get_column (dls->reader, ss_data (field));
708 di.format = spec->input;
714 /* Reads a case from the data file and parses it according to
715 list-format syntax rules.
716 Returns true if successful, false at end of file or on I/O error. */
718 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
720 struct dls_var_spec *spec;
722 if (dfm_eof (dls->reader))
725 ll_for_each (spec, struct dls_var_spec, ll, &dls->specs)
727 struct substring field;
730 if (!cut_field (dls, &field))
732 if (get_undefined ())
733 msg (SW, _("Missing value(s) for all variables from %s onward. "
734 "These will be filled with the system-missing value "
735 "or blanks, as appropriate."),
737 ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs)
739 int width = get_format_var_width (&spec->input);
741 case_data_rw (c, spec->fv)->f = SYSMIS;
743 memset (case_data_rw (c, spec->fv)->s, ' ', width);
748 di.s = ss_data (field);
749 di.e = ss_end (field);
750 di.v = case_data_rw (c, spec->fv);
752 di.f1 = dfm_get_column (dls->reader, ss_data (field));
753 di.format = spec->input;
757 dfm_forward_record (dls->reader);
761 /* Destroys DATA LIST transformation DLS.
762 Returns true if successful, false if an I/O error occurred. */
764 data_list_trns_free (void *dls_)
766 struct data_list_pgm *dls = dls_;
767 dfm_close_reader (dls->reader);
768 pool_destroy (dls->pool);
772 /* Handle DATA LIST transformation DLS, parsing data into C. */
774 data_list_trns_proc (void *dls_, struct ccase *c, casenum_t case_num UNUSED)
776 struct data_list_pgm *dls = dls_;
779 if (read_from_data_list (dls, c))
780 retval = TRNS_CONTINUE;
781 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
783 /* An I/O error, or encountering end of file for a second
784 time, should be escalated into a more serious error. */
788 retval = TRNS_END_FILE;
790 /* If there was an END subcommand handle it. */
791 if (dls->end != NULL)
793 double *end = &case_data_rw (c, dls->end->fv)->f;
794 if (retval == TRNS_DROP_CASE)
797 retval = TRNS_END_FILE;
806 /* Reads all the records from the data file and passes them to
808 Returns true if successful, false if an I/O error occurred. */
810 data_list_source_read (struct case_source *source,
812 write_case_func *write_case, write_case_data wc_data)
814 struct data_list_pgm *dls = source->aux;
820 if (!read_from_data_list (dls, c))
821 return !dfm_reader_error (dls->reader);
823 dfm_push (dls->reader);
824 ok = write_case (wc_data);
825 dfm_pop (dls->reader);
831 /* Destroys the source's internal data. */
833 data_list_source_destroy (struct case_source *source)
835 data_list_trns_free (source->aux);
838 static const struct case_source_class data_list_source_class =
842 data_list_source_read,
843 data_list_source_destroy,