1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "dictionary.h"
29 #include "file-handle.h"
36 /* Defines the three types of complex files read by FILE TYPE. */
44 /* Limited variable column specifications. */
47 char name[LONG_NAME_LEN + 1]; /* Variable name. */
48 int fc, nc; /* First column (1-based), # of columns. */
49 int fmt; /* Format type. */
50 struct variable *v; /* Variable. */
53 /* RCT_* record type constants. */
56 RCT_OTHER = 001, /* 1=OTHER. */
57 RCT_SKIP = 002, /* 1=SKIP. */
58 RCT_DUPLICATE = 004, /* DUPLICATE: 0=NOWARN, 1=WARN. */
59 RCT_MISSING = 010, /* MISSING: 0=NOWARN, 1=WARN. */
60 RCT_SPREAD = 020 /* SPREAD: 0=NO, 1=YES. */
63 /* Represents a RECORD TYPE command. */
66 struct record_type *next;
67 unsigned flags; /* RCT_* constants. */
68 union value *v; /* Vector of values for this record type. */
69 int nv; /* Length of vector V. */
70 struct col_spec case_sbc; /* CASE subcommand. */
71 int ft, lt; /* First, last transformation index. */
74 /* Represents a FILE TYPE input program. Does not contain a
75 trns_header because it's never submitted as a transformation. */
78 int type; /* One of the FTY_* constants. */
79 struct dfm_reader *reader; /* Data file to read. */
80 struct col_spec record; /* RECORD subcommand. */
81 struct col_spec case_sbc; /* CASE subcommand. */
82 int wild; /* 0=NOWARN, 1=WARN. */
83 int duplicate; /* 0=NOWARN, 1=WARN. */
84 int missing; /* 0=NOWARN, 1=WARN, 2=CASE. */
85 int ordered; /* 0=NO, 1=YES. */
86 int had_rec_type; /* 1=Had a RECORD TYPE command.
87 RECORD TYPE must precede the first
89 struct record_type *recs_head; /* List of record types. */
90 struct record_type *recs_tail; /* Last in list of record types. */
91 size_t case_size; /* Case size in bytes. */
94 static int parse_col_spec (struct col_spec *, const char *);
95 static void create_col_var (struct col_spec *c);
97 int cmd_file_type (void);
99 /* Parses FILE TYPE command. */
103 static struct file_type_pgm *fty; /* FIXME: static? WTF? */
104 struct file_handle *fh = NULL;
107 discard_variables ();
109 fty = xmalloc (sizeof *fty);
111 fty->record.name[0] = 0;
112 fty->case_sbc.name[0] = 0;
113 fty->wild = fty->duplicate = fty->missing = fty->ordered = 0;
114 fty->had_rec_type = 0;
115 fty->recs_head = fty->recs_tail = NULL;
117 if (lex_match_id ("MIXED"))
118 fty->type = FTY_MIXED;
119 else if (lex_match_id ("GROUPED"))
121 fty->type = FTY_GROUPED;
127 else if (lex_match_id ("NESTED"))
128 fty->type = FTY_NESTED;
131 msg (SE, _("MIXED, GROUPED, or NESTED expected."));
137 if (lex_match_id ("FILE"))
144 else if (lex_match_id ("RECORD"))
147 if (!parse_col_spec (&fty->record, "####RECD"))
150 else if (lex_match_id ("CASE"))
152 if (fty->type == FTY_MIXED)
154 msg (SE, _("The CASE subcommand is not valid on FILE TYPE "
160 if (!parse_col_spec (&fty->case_sbc, "####CASE"))
163 else if (lex_match_id ("WILD"))
166 if (lex_match_id ("WARN"))
168 else if (lex_match_id ("NOWARN"))
172 msg (SE, _("WARN or NOWARN expected after WILD."));
176 else if (lex_match_id ("DUPLICATE"))
178 if (fty->type == FTY_MIXED)
180 msg (SE, _("The DUPLICATE subcommand is not valid on "
181 "FILE TYPE MIXED."));
186 if (lex_match_id ("WARN"))
188 else if (lex_match_id ("NOWARN"))
190 else if (lex_match_id ("CASE"))
192 if (fty->type != FTY_NESTED)
194 msg (SE, _("DUPLICATE=CASE is only valid on "
195 "FILE TYPE NESTED."));
203 msg (SE, _("WARN%s expected after DUPLICATE."),
204 (fty->type == FTY_NESTED ? _(", NOWARN, or CASE")
209 else if (lex_match_id ("MISSING"))
211 if (fty->type == FTY_MIXED)
213 msg (SE, _("The MISSING subcommand is not valid on "
214 "FILE TYPE MIXED."));
219 if (lex_match_id ("NOWARN"))
221 else if (lex_match_id ("WARN"))
225 msg (SE, _("WARN or NOWARN after MISSING."));
229 else if (lex_match_id ("ORDERED"))
231 if (fty->type != FTY_GROUPED)
233 msg (SE, _("ORDERED is only valid on FILE TYPE GROUPED."));
238 if (lex_match_id ("YES"))
240 else if (lex_match_id ("NO"))
244 msg (SE, _("YES or NO expected after ORDERED."));
250 lex_error (_("while expecting a valid subcommand"));
255 if (fty->record.name[0] == 0)
257 msg (SE, _("The required RECORD subcommand was not present."));
261 if (fty->type == FTY_GROUPED)
263 if (fty->case_sbc.name[0] == 0)
265 msg (SE, _("The required CASE subcommand was not present."));
269 if (!strcasecmp (fty->case_sbc.name, fty->record.name))
271 msg (SE, _("CASE and RECORD must specify different variable "
277 fty->reader = dfm_open_reader (fh);
278 if (fty->reader == NULL)
282 create_col_var (&fty->record);
283 if (fty->case_sbc.name[0])
284 create_col_var (&fty->case_sbc);
285 vfm_source = create_case_source (&file_type_source_class, fty);
294 /* Creates a variable with attributes specified by struct col_spec C, and
295 stores it into C->V. */
297 create_col_var (struct col_spec *c)
301 if (formats[c->fmt].cat & FCAT_STRING)
305 c->v = dict_create_var (default_dict, c->name, width);
308 /* Parses variable, column, type specifications for a variable. */
310 parse_col_spec (struct col_spec *c, const char *def_name)
312 struct fmt_spec spec;
317 strcpy (c->name, tokid);
321 strcpy (c->name, def_name);
324 if (!lex_force_int ())
326 c->fc = lex_integer ();
329 msg (SE, _("Column value must be positive."));
335 lex_negative_to_dash ();
338 if (!lex_force_int ())
340 c->nc = lex_integer ();
345 msg (SE, _("Ending column precedes beginning column."));
354 /* Format specifier. */
358 if (!lex_force_id ())
360 c->fmt = parse_format_specifier_name (&cp, 0);
365 msg (SE, _("Bad format specifier name."));
369 if (!lex_force_match (')'))
378 return check_input_specifier (&spec, 1);
383 /* Parse the RECORD TYPE command. */
385 cmd_record_type (void)
387 struct file_type_pgm *fty;
388 struct record_type *rct;
390 /* Make sure we're inside a FILE TYPE structure. */
391 if (pgm_state != STATE_INPUT
392 || !case_source_is_class (vfm_source, &file_type_source_class))
394 msg (SE, _("This command may only appear within a "
395 "FILE TYPE/END FILE TYPE structure."));
399 fty = vfm_source->aux;
401 /* Initialize the record_type structure. */
402 rct = xmalloc (sizeof *rct);
406 rct->flags |= RCT_DUPLICATE;
408 rct->flags |= RCT_MISSING;
412 if (fty->case_sbc.name[0])
413 rct->case_sbc = fty->case_sbc;
415 if (fty->recs_tail && (fty->recs_tail->flags & RCT_OTHER))
417 msg (SE, _("OTHER may appear only on the last RECORD TYPE command."));
423 fty->recs_tail->lt = n_trns - 1;
424 if (!(fty->recs_tail->flags & RCT_SKIP)
425 && fty->recs_tail->ft == fty->recs_tail->lt)
427 msg (SE, _("No input commands (DATA LIST, REPEATING DATA) "
428 "for above RECORD TYPE."));
433 /* Parse record type values. */
434 if (lex_match_id ("OTHER"))
435 rct->flags |= RCT_OTHER;
440 while (lex_is_number () || token == T_STRING)
445 rct->v = xrealloc (rct->v, mv * sizeof *rct->v);
448 if (formats[fty->record.fmt].cat & FCAT_STRING)
450 if (!lex_force_string ())
452 rct->v[rct->nv].c = xmalloc (fty->record.nc + 1);
453 buf_copy_str_rpad (rct->v[rct->nv].c, fty->record.nc + 1,
458 if (!lex_force_num ())
460 rct->v[rct->nv].f = tokval;
469 /* Parse the rest of the subcommands. */
472 if (lex_match_id ("SKIP"))
473 rct->flags |= RCT_SKIP;
474 else if (lex_match_id ("CASE"))
476 if (fty->type == FTY_MIXED)
478 msg (SE, _("The CASE subcommand is not allowed on "
479 "the RECORD TYPE command for FILE TYPE MIXED."));
484 if (!parse_col_spec (&rct->case_sbc, ""))
486 if (rct->case_sbc.name[0])
488 msg (SE, _("No variable name may be specified for the "
489 "CASE subcommand on RECORD TYPE."));
493 if ((formats[rct->case_sbc.fmt].cat ^ formats[fty->case_sbc.fmt].cat)
496 msg (SE, _("The CASE column specification on RECORD TYPE "
497 "must give a format specifier that is the "
498 "same type as that of the CASE column "
499 "specification given on FILE TYPE."));
503 else if (lex_match_id ("DUPLICATE"))
506 if (lex_match_id ("WARN"))
507 rct->flags |= RCT_DUPLICATE;
508 else if (lex_match_id ("NOWARN"))
509 rct->flags &= ~RCT_DUPLICATE;
512 msg (SE, _("WARN or NOWARN expected on DUPLICATE "
517 else if (lex_match_id ("MISSING"))
520 if (lex_match_id ("WARN"))
521 rct->flags |= RCT_MISSING;
522 else if (lex_match_id ("NOWARN"))
523 rct->flags &= ~RCT_MISSING;
526 msg (SE, _("WARN or NOWARN expected on MISSING subcommand."));
530 else if (lex_match_id ("SPREAD"))
533 if (lex_match_id ("YES"))
534 rct->flags |= RCT_SPREAD;
535 else if (lex_match_id ("NO"))
536 rct->flags &= ~RCT_SPREAD;
539 msg (SE, _("YES or NO expected on SPREAD subcommand."));
545 lex_error (_("while expecting a valid subcommand"));
551 fty->recs_tail = fty->recs_tail->next = xmalloc (sizeof *fty->recs_tail);
553 fty->recs_head = fty->recs_tail = xmalloc (sizeof *fty->recs_tail);
554 memcpy (fty->recs_tail, &rct, sizeof *fty->recs_tail);
559 if (formats[fty->record.fmt].cat & FCAT_STRING)
563 for (i = 0; i < rct->nv; i++)
574 int cmd_end_file_type (void);
576 cmd_end_file_type (void)
578 struct file_type_pgm *fty;
580 if (pgm_state != STATE_INPUT
581 || case_source_is_class (vfm_source, &file_type_source_class))
583 msg (SE, _("This command may only appear within a "
584 "FILE TYPE/END FILE TYPE structure."));
587 fty = vfm_source->aux;
588 fty->case_size = dict_get_case_size (default_dict);
592 fty->recs_tail->lt = n_trns - 1;
593 if (!(fty->recs_tail->flags & RCT_SKIP)
594 && fty->recs_tail->ft == fty->recs_tail->lt)
596 msg (SE, _("No input commands (DATA LIST, REPEATING DATA) "
597 "on above RECORD TYPE."));
603 msg (SE, _("No commands between FILE TYPE and END FILE TYPE."));
609 return lex_end_of_command ();
612 /* Come here on discovering catastrophic error. */
614 discard_variables ();
618 /* FILE TYPE runtime. */
620 /*static void read_from_file_type_mixed(void);
621 static void read_from_file_type_grouped(void);
622 static void read_from_file_type_nested(void); */
624 /* Reads any number of cases into case C and calls write_case()
625 for each one. Compare data-list.c:read_from_data_list. */
627 file_type_source_read (struct case_source *source,
629 write_case_func *write_case UNUSED,
630 write_case_data wc_data UNUSED)
632 struct file_type_pgm *fty = source->aux;
633 struct fmt_spec format;
635 dfm_push (fty->reader);
637 format.type = fty->record.fmt;
638 format.w = fty->record.nc;
640 while (!dfm_eof (fty->reader))
642 struct fixed_string line;
643 struct record_type *iter;
647 dfm_expand_tabs (fty->reader);
648 dfm_get_record (fty->reader, &line);
649 if (formats[fty->record.fmt].cat & FCAT_STRING)
653 v.c = case_data_rw (c, fty->record.v->fv)->s;
655 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
656 fty->record.fc, fty->record.fc + fty->record.nc);
657 di.v = (union value *) v.c;
659 di.f1 = fty->record.fc;
663 for (iter = fty->recs_head; iter; iter = iter->next)
665 if (iter->flags & RCT_OTHER)
667 for (i = 0; i < iter->nv; i++)
668 if (!memcmp (iter->v[i].c, v.c, fty->record.nc))
672 msg (SW, _("Unknown record type \"%.*s\"."), fty->record.nc, v.c);
678 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
679 fty->record.fc, fty->record.fc + fty->record.nc);
682 di.f1 = fty->record.fc;
686 case_data_rw (c, fty->record.v->fv)->f = v.f;
687 for (iter = fty->recs_head; iter; iter = iter->next)
689 if (iter->flags & RCT_OTHER)
691 for (i = 0; i < iter->nv; i++)
692 if (iter->v[i].f == v.f)
696 msg (SW, _("Unknown record type %g."), v.f);
698 dfm_forward_record (fty->reader);
702 /* Arrive here if there is a matching record_type, which is in
704 dfm_forward_record (fty->reader);
709 case FTY_MIXED: read_from_file_type_mixed(); break;
710 case FTY_GROUPED: read_from_file_type_grouped(); break;
711 case FTY_NESTED: read_from_file_type_nested(); break;
715 dfm_pop (fty->reader);
719 file_type_source_destroy (struct case_source *source)
721 struct file_type_pgm *fty = source->aux;
722 struct record_type *iter, *next;
724 cancel_transformations ();
725 dfm_close_reader (fty->reader);
726 for (iter = fty->recs_head; iter; iter = next)
733 const struct case_source_class file_type_source_class =
737 file_type_source_read,
738 file_type_source_destroy,