1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include <libpspp/alloc.h>
23 #include <data/case.h>
24 #include <language/command.h>
25 #include <libpspp/compiler.h>
26 #include <data/data-in.h>
27 #include <language/data-io/data-reader.h>
28 #include <data/dictionary.h>
29 #include <libpspp/message.h>
30 #include <language/data-io/file-handle.h>
31 #include <data/format.h>
32 #include <language/lexer/lexer.h>
33 #include <libpspp/str.h>
34 #include <data/variable.h>
35 #include <procedure.h>
38 #define _(msgid) gettext (msgid)
40 /* Defines the three types of complex files read by FILE TYPE. */
48 /* Limited variable column specifications. */
51 char name[LONG_NAME_LEN + 1]; /* Variable name. */
52 int fc, nc; /* First column (1-based), # of columns. */
53 int fmt; /* Format type. */
54 struct variable *v; /* Variable. */
57 /* RCT_* record type constants. */
60 RCT_OTHER = 001, /* 1=OTHER. */
61 RCT_SKIP = 002, /* 1=SKIP. */
62 RCT_DUPLICATE = 004, /* DUPLICATE: 0=NOWARN, 1=WARN. */
63 RCT_MISSING = 010, /* MISSING: 0=NOWARN, 1=WARN. */
64 RCT_SPREAD = 020 /* SPREAD: 0=NO, 1=YES. */
67 /* Represents a RECORD TYPE command. */
70 struct record_type *next;
71 unsigned flags; /* RCT_* constants. */
72 union value *v; /* Vector of values for this record type. */
73 int nv; /* Length of vector V. */
74 struct col_spec case_sbc; /* CASE subcommand. */
75 int ft, lt; /* First, last transformation index. */
78 /* Represents a FILE TYPE input program. */
81 int type; /* One of the FTY_* constants. */
82 struct dfm_reader *reader; /* Data file to read. */
83 struct col_spec record; /* RECORD subcommand. */
84 struct col_spec case_sbc; /* CASE subcommand. */
85 int wild; /* 0=NOWARN, 1=WARN. */
86 int duplicate; /* 0=NOWARN, 1=WARN. */
87 int missing; /* 0=NOWARN, 1=WARN, 2=CASE. */
88 int ordered; /* 0=NO, 1=YES. */
89 int had_rec_type; /* 1=Had a RECORD TYPE command.
90 RECORD TYPE must precede the first
92 struct record_type *recs_head; /* List of record types. */
93 struct record_type *recs_tail; /* Last in list of record types. */
94 size_t case_size; /* Case size in bytes. */
97 static int parse_col_spec (struct col_spec *, const char *);
98 static void create_col_var (struct col_spec *c);
100 int cmd_file_type (void);
102 /* Parses FILE TYPE command. */
106 static struct file_type_pgm *fty; /* FIXME: static? WTF? */
107 struct file_handle *fh = fh_inline_file ();
110 discard_variables ();
112 fty = xmalloc (sizeof *fty);
114 fty->record.name[0] = 0;
115 fty->case_sbc.name[0] = 0;
116 fty->wild = fty->duplicate = fty->missing = fty->ordered = 0;
117 fty->had_rec_type = 0;
118 fty->recs_head = fty->recs_tail = NULL;
120 if (lex_match_id ("MIXED"))
121 fty->type = FTY_MIXED;
122 else if (lex_match_id ("GROUPED"))
124 fty->type = FTY_GROUPED;
130 else if (lex_match_id ("NESTED"))
131 fty->type = FTY_NESTED;
134 msg (SE, _("MIXED, GROUPED, or NESTED expected."));
140 if (lex_match_id ("FILE"))
143 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
147 else if (lex_match_id ("RECORD"))
150 if (!parse_col_spec (&fty->record, "####RECD"))
153 else if (lex_match_id ("CASE"))
155 if (fty->type == FTY_MIXED)
157 msg (SE, _("The CASE subcommand is not valid on FILE TYPE "
163 if (!parse_col_spec (&fty->case_sbc, "####CASE"))
166 else if (lex_match_id ("WILD"))
169 if (lex_match_id ("WARN"))
171 else if (lex_match_id ("NOWARN"))
175 msg (SE, _("WARN or NOWARN expected after WILD."));
179 else if (lex_match_id ("DUPLICATE"))
181 if (fty->type == FTY_MIXED)
183 msg (SE, _("The DUPLICATE subcommand is not valid on "
184 "FILE TYPE MIXED."));
189 if (lex_match_id ("WARN"))
191 else if (lex_match_id ("NOWARN"))
193 else if (lex_match_id ("CASE"))
195 if (fty->type != FTY_NESTED)
197 msg (SE, _("DUPLICATE=CASE is only valid on "
198 "FILE TYPE NESTED."));
206 msg (SE, _("WARN%s expected after DUPLICATE."),
207 (fty->type == FTY_NESTED ? _(", NOWARN, or CASE")
212 else if (lex_match_id ("MISSING"))
214 if (fty->type == FTY_MIXED)
216 msg (SE, _("The MISSING subcommand is not valid on "
217 "FILE TYPE MIXED."));
222 if (lex_match_id ("NOWARN"))
224 else if (lex_match_id ("WARN"))
228 msg (SE, _("WARN or NOWARN after MISSING."));
232 else if (lex_match_id ("ORDERED"))
234 if (fty->type != FTY_GROUPED)
236 msg (SE, _("ORDERED is only valid on FILE TYPE GROUPED."));
241 if (lex_match_id ("YES"))
243 else if (lex_match_id ("NO"))
247 msg (SE, _("YES or NO expected after ORDERED."));
253 lex_error (_("while expecting a valid subcommand"));
258 if (fty->record.name[0] == 0)
260 msg (SE, _("The required RECORD subcommand was not present."));
264 if (fty->type == FTY_GROUPED)
266 if (fty->case_sbc.name[0] == 0)
268 msg (SE, _("The required CASE subcommand was not present."));
272 if (!strcasecmp (fty->case_sbc.name, fty->record.name))
274 msg (SE, _("CASE and RECORD must specify different variable "
280 fty->reader = dfm_open_reader (fh);
281 if (fty->reader == NULL)
283 fh_set_default_handle (fh);
285 create_col_var (&fty->record);
286 if (fty->case_sbc.name[0])
287 create_col_var (&fty->case_sbc);
288 vfm_source = create_case_source (&file_type_source_class, fty);
294 return CMD_CASCADING_FAILURE;
297 /* Creates a variable with attributes specified by struct col_spec C, and
298 stores it into C->V. */
300 create_col_var (struct col_spec *c)
304 if (formats[c->fmt].cat & FCAT_STRING)
308 c->v = dict_create_var (default_dict, c->name, width);
311 /* Parses variable, column, type specifications for a variable. */
313 parse_col_spec (struct col_spec *c, const char *def_name)
315 struct fmt_spec spec;
320 strcpy (c->name, tokid);
324 strcpy (c->name, def_name);
327 if (!lex_force_int ())
329 c->fc = lex_integer ();
332 msg (SE, _("Column value must be positive."));
338 lex_negative_to_dash ();
341 if (!lex_force_int ())
343 c->nc = lex_integer ();
348 msg (SE, _("Ending column precedes beginning column."));
357 /* Format specifier. */
361 if (!lex_force_id ())
363 c->fmt = parse_format_specifier_name (&cp, 0);
368 msg (SE, _("Bad format specifier name."));
372 if (!lex_force_match (')'))
381 return check_input_specifier (&spec, 1);
386 /* Parse the RECORD TYPE command. */
388 cmd_record_type (void)
390 struct file_type_pgm *fty;
391 struct record_type *rct;
393 /* Make sure we're inside a FILE TYPE structure. */
394 if (pgm_state != STATE_INPUT
395 || !case_source_is_class (vfm_source, &file_type_source_class))
397 msg (SE, _("This command may only appear within a "
398 "FILE TYPE/END FILE TYPE structure."));
399 return CMD_CASCADING_FAILURE;
402 fty = vfm_source->aux;
404 /* Initialize the record_type structure. */
405 rct = xmalloc (sizeof *rct);
409 rct->flags |= RCT_DUPLICATE;
411 rct->flags |= RCT_MISSING;
415 if (fty->case_sbc.name[0])
416 rct->case_sbc = fty->case_sbc;
418 if (fty->recs_tail && (fty->recs_tail->flags & RCT_OTHER))
420 msg (SE, _("OTHER may appear only on the last RECORD TYPE command."));
426 fty->recs_tail->lt = n_trns - 1;
427 if (!(fty->recs_tail->flags & RCT_SKIP)
428 && fty->recs_tail->ft == fty->recs_tail->lt)
430 msg (SE, _("No input commands (DATA LIST, REPEATING DATA) "
431 "for above RECORD TYPE."));
436 /* Parse record type values. */
437 if (lex_match_id ("OTHER"))
438 rct->flags |= RCT_OTHER;
443 while (lex_is_number () || token == T_STRING)
448 rct->v = xnrealloc (rct->v, mv, sizeof *rct->v);
451 if (formats[fty->record.fmt].cat & FCAT_STRING)
453 if (!lex_force_string ())
455 rct->v[rct->nv].c = xmalloc (fty->record.nc + 1);
456 buf_copy_str_rpad (rct->v[rct->nv].c, fty->record.nc + 1,
461 if (!lex_force_num ())
463 rct->v[rct->nv].f = tokval;
472 /* Parse the rest of the subcommands. */
475 if (lex_match_id ("SKIP"))
476 rct->flags |= RCT_SKIP;
477 else if (lex_match_id ("CASE"))
479 if (fty->type == FTY_MIXED)
481 msg (SE, _("The CASE subcommand is not allowed on "
482 "the RECORD TYPE command for FILE TYPE MIXED."));
487 if (!parse_col_spec (&rct->case_sbc, ""))
489 if (rct->case_sbc.name[0])
491 msg (SE, _("No variable name may be specified for the "
492 "CASE subcommand on RECORD TYPE."));
496 if ((formats[rct->case_sbc.fmt].cat ^ formats[fty->case_sbc.fmt].cat)
499 msg (SE, _("The CASE column specification on RECORD TYPE "
500 "must give a format specifier that is the "
501 "same type as that of the CASE column "
502 "specification given on FILE TYPE."));
506 else if (lex_match_id ("DUPLICATE"))
509 if (lex_match_id ("WARN"))
510 rct->flags |= RCT_DUPLICATE;
511 else if (lex_match_id ("NOWARN"))
512 rct->flags &= ~RCT_DUPLICATE;
515 msg (SE, _("WARN or NOWARN expected on DUPLICATE "
520 else if (lex_match_id ("MISSING"))
523 if (lex_match_id ("WARN"))
524 rct->flags |= RCT_MISSING;
525 else if (lex_match_id ("NOWARN"))
526 rct->flags &= ~RCT_MISSING;
529 msg (SE, _("WARN or NOWARN expected on MISSING subcommand."));
533 else if (lex_match_id ("SPREAD"))
536 if (lex_match_id ("YES"))
537 rct->flags |= RCT_SPREAD;
538 else if (lex_match_id ("NO"))
539 rct->flags &= ~RCT_SPREAD;
542 msg (SE, _("YES or NO expected on SPREAD subcommand."));
548 lex_error (_("while expecting a valid subcommand"));
554 fty->recs_tail = fty->recs_tail->next = xmalloc (sizeof *fty->recs_tail);
556 fty->recs_head = fty->recs_tail = xmalloc (sizeof *fty->recs_tail);
557 memcpy (fty->recs_tail, &rct, sizeof *fty->recs_tail);
562 if (formats[fty->record.fmt].cat & FCAT_STRING)
566 for (i = 0; i < rct->nv; i++)
572 return CMD_CASCADING_FAILURE;
577 int cmd_end_file_type (void);
579 cmd_end_file_type (void)
581 struct file_type_pgm *fty;
583 if (pgm_state != STATE_INPUT
584 || case_source_is_class (vfm_source, &file_type_source_class))
586 msg (SE, _("This command may only appear within a "
587 "FILE TYPE/END FILE TYPE structure."));
588 return CMD_CASCADING_FAILURE;
590 fty = vfm_source->aux;
591 fty->case_size = dict_get_case_size (default_dict);
595 fty->recs_tail->lt = n_trns - 1;
596 if (!(fty->recs_tail->flags & RCT_SKIP)
597 && fty->recs_tail->ft == fty->recs_tail->lt)
599 msg (SE, _("No input commands (DATA LIST, REPEATING DATA) "
600 "on above RECORD TYPE."));
606 msg (SE, _("No commands between FILE TYPE and END FILE TYPE."));
612 return lex_end_of_command ();
615 /* Come here on I/O error. */
616 discard_variables ();
617 return CMD_CASCADING_FAILURE;
620 /* FILE TYPE runtime. */
622 /*static void read_from_file_type_mixed(void);
623 static void read_from_file_type_grouped(void);
624 static void read_from_file_type_nested(void); */
626 /* Reads any number of cases into case C and calls write_case()
627 for each one. Compare data-list.c:read_from_data_list.
628 Returns true if successful, false if an I/O error occurred. */
630 file_type_source_read (struct case_source *source,
632 write_case_func *write_case UNUSED,
633 write_case_data wc_data UNUSED)
635 struct file_type_pgm *fty = source->aux;
636 struct fmt_spec format;
638 dfm_push (fty->reader);
640 format.type = fty->record.fmt;
641 format.w = fty->record.nc;
643 while (!dfm_eof (fty->reader))
645 struct fixed_string line;
646 struct record_type *iter;
650 dfm_expand_tabs (fty->reader);
651 dfm_get_record (fty->reader, &line);
652 if (formats[fty->record.fmt].cat & FCAT_STRING)
656 v.c = case_data_rw (c, fty->record.v->fv)->s;
658 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
659 fty->record.fc, fty->record.fc + fty->record.nc);
660 di.v = (union value *) v.c;
662 di.f1 = fty->record.fc;
666 for (iter = fty->recs_head; iter; iter = iter->next)
668 if (iter->flags & RCT_OTHER)
670 for (i = 0; i < iter->nv; i++)
671 if (!memcmp (iter->v[i].c, v.c, fty->record.nc))
675 msg (SW, _("Unknown record type \"%.*s\"."), fty->record.nc, v.c);
681 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
682 fty->record.fc, fty->record.fc + fty->record.nc);
685 di.f1 = fty->record.fc;
689 case_data_rw (c, fty->record.v->fv)->f = v.f;
690 for (iter = fty->recs_head; iter; iter = iter->next)
692 if (iter->flags & RCT_OTHER)
694 for (i = 0; i < iter->nv; i++)
695 if (iter->v[i].f == v.f)
699 msg (SW, _("Unknown record type %g."), v.f);
701 dfm_forward_record (fty->reader);
705 /* Arrive here if there is a matching record_type, which is in
707 dfm_forward_record (fty->reader);
712 case FTY_MIXED: read_from_file_type_mixed(); break;
713 case FTY_GROUPED: read_from_file_type_grouped(); break;
714 case FTY_NESTED: read_from_file_type_nested(); break;
718 dfm_pop (fty->reader);
720 return !dfm_reader_error (fty->reader);
724 file_type_source_destroy (struct case_source *source)
726 struct file_type_pgm *fty = source->aux;
727 struct record_type *iter, *next;
729 cancel_transformations ();
730 dfm_close_reader (fty->reader);
731 for (iter = fty->recs_head; iter; iter = next)
738 const struct case_source_class file_type_source_class =
742 file_type_source_read,
743 file_type_source_destroy,