1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-2004 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
29 #include "file-handle.h"
30 #include "file-handle-def.h"
38 #define _(msgid) gettext (msgid)
40 #include "debug-print.h"
42 /* Flags for DFM readers. */
45 DFM_EOF = 001, /* At end-of-file? */
46 DFM_ADVANCE = 002, /* Read next line on dfm_get_record() call? */
47 DFM_SAW_BEGIN_DATA = 004, /* For inline_file only, whether we've
48 already read a BEGIN DATA line. */
49 DFM_TABS_EXPANDED = 010, /* Tabs have been expanded. */
52 /* Data file reader. */
55 struct file_handle *fh; /* File handle. */
56 struct file_ext file; /* Associated file. */
57 struct file_locator where; /* Current location in data file. */
58 struct string line; /* Current line. */
59 size_t pos; /* Offset in line of current character. */
60 struct string scratch; /* Extra line buffer. */
61 enum dfm_reader_flags flags; /* Zero or more of DFM_*. */
64 static int inline_open_cnt;
65 static struct dfm_reader *inline_file;
67 static void read_record (struct dfm_reader *r);
69 /* Closes reader R opened by dfm_open_reader(). */
71 dfm_close_reader (struct dfm_reader *r)
79 still_open = fh_close (r->fh, "data file", "rs");
82 assert (inline_open_cnt > 0);
83 still_open = --inline_open_cnt;
87 /* Skip any remaining data on the inline file. */
88 if (r->flags & DFM_SAW_BEGIN_DATA)
89 while ((r->flags & DFM_EOF) == 0)
97 if (r->fh != NULL && r->file.file)
99 fn_close_ext (&r->file);
100 free (r->file.filename);
101 r->file.filename = NULL;
103 ds_destroy (&r->line);
104 ds_destroy (&r->scratch);
108 /* Opens the file designated by file handle FH for reading as a
109 data file. Providing a null pointer for FH designates the
110 "inline file", that is, data included inline in the command
111 file between BEGIN FILE and END FILE. Returns nonzero only if
114 dfm_open_reader (struct file_handle *fh)
116 struct dfm_reader *r;
121 rp = fh_open (fh, "data file", "rs");
129 assert (inline_open_cnt >= 0);
130 if (inline_open_cnt++ > 0)
135 r = xmalloc (sizeof *r);
139 r->where.filename = handle_get_filename (fh);
140 r->where.line_number = 0;
143 ds_init (&r->line, 64);
144 ds_init (&r->scratch, 0);
145 r->flags = DFM_ADVANCE;
149 r->file.filename = xstrdup (handle_get_filename (r->fh));
152 r->file.sequence_no = NULL;
153 r->file.param = NULL;
154 r->file.postopen = NULL;
155 r->file.preclose = NULL;
156 if (!fn_open_ext (&r->file))
158 msg (ME, _("Could not open \"%s\" for reading "
159 "as a data file: %s."),
160 handle_get_filename (r->fh), strerror (errno));
162 fh_close (fh,"data file", "rs");
175 read_inline_record (struct dfm_reader *r)
177 if ((r->flags & DFM_SAW_BEGIN_DATA) == 0)
181 r->flags |= DFM_SAW_BEGIN_DATA;
183 /* FIXME: WTF can't this just be done with tokens?
184 Is this really a special case? */
189 if (!getl_read_line ())
191 msg (SE, _("BEGIN DATA expected."));
195 /* Skip leading whitespace, separate out first
196 word, so that S points to a single word reduced
198 s = ds_c_str (&getl_buf);
199 while (isspace ((unsigned char) *s))
201 for (cp = s; isalpha ((unsigned char) *cp); cp++)
202 *cp = tolower ((unsigned char) (*cp));
203 ds_truncate (&getl_buf, cp - s);
207 if (!lex_id_match_len ("begin", 5, s, strcspn (s, " \t\r\v\n")))
209 msg (SE, _("BEGIN DATA expected."));
210 lex_preprocess_line ();
213 getl_prompt = GETL_PRPT_DATA;
216 if (!getl_read_line ())
218 msg (SE, _("Unexpected end-of-file while reading data in BEGIN "
219 "DATA. This probably indicates "
220 "a missing or misformatted END DATA command. "
221 "END DATA must appear by itself on a single line "
222 "with exactly one space between words."));
227 r->where.line_number++;
229 if (ds_length (&getl_buf) >= 8
230 && !strncasecmp (ds_c_str (&getl_buf), "end data", 8))
232 lex_set_prog (ds_c_str (&getl_buf) + ds_length (&getl_buf));
236 ds_replace (&r->line, ds_c_str (&getl_buf));
241 read_file_record (struct dfm_reader *r)
243 assert (r->fh != NULL);
244 if (handle_get_mode (r->fh) == MODE_TEXT)
247 if (!ds_gets (&r->line, r->file.file))
249 if (ferror (r->file.file))
251 msg (ME, _("Error reading file %s: %s."),
252 handle_get_name (r->fh), strerror (errno));
258 else if (handle_get_mode (r->fh) == MODE_BINARY)
260 size_t record_width = handle_get_record_width (r->fh);
263 if (ds_length (&r->line) < record_width)
264 ds_rpad (&r->line, record_width, 0);
266 amt = fread (ds_c_str (&r->line), 1, record_width,
268 if (record_width != amt)
270 if (ferror (r->file.file))
271 msg (ME, _("Error reading file %s: %s."),
272 handle_get_name (r->fh), strerror (errno));
274 msg (ME, _("%s: Partial record at end of file."),
275 handle_get_name (r->fh));
286 r->where.line_number++;
291 /* Reads a record from R, setting the current position to the
292 start of the line. If an error occurs or end-of-file is
293 encountered, the current line is set to null. */
295 read_record (struct dfm_reader *r)
297 int success = r->fh != NULL ? read_file_record (r) : read_inline_record (r);
304 /* Returns nonzero if end of file has been reached on HANDLE.
305 Reads forward in HANDLE's file, if necessary to tell. */
307 dfm_eof (struct dfm_reader *r)
309 if (r->flags & DFM_ADVANCE)
311 r->flags &= ~DFM_ADVANCE;
312 if ((r->flags & DFM_EOF) == 0)
317 msg (SE, _("Attempt to read beyond end-of-file on file %s."),
318 handle_get_name (r->fh));
320 msg (SE, _("Attempt to read beyond END DATA."));
325 return (r->flags & DFM_EOF) != 0;
328 /* Returns the current record in the file corresponding to
329 HANDLE. Aborts if reading from the file is necessary or at
330 end of file, so call dfm_eof() first. Sets *LINE to the line,
331 which is not null-terminated. The caller must not free or
332 modify the returned string. */
334 dfm_get_record (struct dfm_reader *r, struct fixed_string *line)
336 assert ((r->flags & DFM_ADVANCE) == 0);
337 assert ((r->flags & DFM_EOF) == 0);
338 assert (r->pos <= ds_length (&r->line));
340 line->string = ds_data (&r->line) + r->pos;
341 line->length = ds_length (&r->line) - r->pos;
344 /* Expands tabs in the current line into the equivalent number of
345 spaces, if appropriate for this kind of file. Aborts if
346 reading from the file is necessary or at end of file, so call
349 dfm_expand_tabs (struct dfm_reader *r)
352 size_t ofs, new_pos, tab_width;
354 assert ((r->flags & DFM_ADVANCE) == 0);
355 assert ((r->flags & DFM_EOF) == 0);
356 assert (r->pos <= ds_length (&r->line));
358 if (r->flags & DFM_TABS_EXPANDED)
360 r->flags |= DFM_TABS_EXPANDED;
363 && (handle_get_mode (r->fh) == MODE_BINARY
364 || handle_get_tab_width (r->fh) == 0
365 || memchr (ds_c_str (&r->line), '\t', ds_length (&r->line)) == NULL))
368 /* Expand tabs from r->line into r->scratch, and figure out
369 new value for r->pos. */
370 tab_width = r->fh != NULL ? handle_get_tab_width (r->fh) : 8;
371 ds_clear (&r->scratch);
373 for (ofs = 0; ofs < ds_length (&r->line); ofs++)
378 new_pos = ds_length (&r->scratch);
380 c = ds_c_str (&r->line)[ofs];
382 ds_putc (&r->scratch, c);
386 ds_putc (&r->scratch, ' ');
387 while (ds_length (&r->scratch) % tab_width != 0);
391 /* Swap r->line and r->scratch and set new r->pos. */
393 r->line = r->scratch;
398 /* Causes dfm_get_record() to read in the next record the next time it
399 is executed on file HANDLE. */
401 dfm_forward_record (struct dfm_reader *r)
403 r->flags |= DFM_ADVANCE;
406 /* Cancels the effect of any previous dfm_fwd_record() executed
407 on file HANDLE. Sets the current line to begin in the 1-based
410 dfm_reread_record (struct dfm_reader *r, size_t column)
412 r->flags &= ~DFM_ADVANCE;
415 else if (column > ds_length (&r->line))
416 r->pos = ds_length (&r->line);
421 /* Sets the current line to begin COLUMNS characters following
422 the current start. */
424 dfm_forward_columns (struct dfm_reader *r, size_t columns)
426 dfm_reread_record (r, (r->pos + 1) + columns);
429 /* Returns the 1-based column to which the line pointer in HANDLE
430 is set. Unless dfm_reread_record() or dfm_forward_columns()
431 have been called, this is 1. */
433 dfm_column_start (struct dfm_reader *r)
438 /* Pushes the filename and line number on the fn/ln stack. */
440 dfm_push (struct dfm_reader *r)
443 err_push_file_locator (&r->where);
446 /* Pops the filename and line number from the fn/ln stack. */
448 dfm_pop (struct dfm_reader *r)
451 err_pop_file_locator (&r->where);
454 /* BEGIN DATA...END DATA procedure. */
456 /* Perform BEGIN DATA...END DATA as a procedure in itself. */
458 cmd_begin_data (void)
460 struct dfm_reader *r;
462 /* FIXME: figure out the *exact* conditions, not these really
463 lenient conditions. */
464 if (vfm_source == NULL
465 || case_source_is_class (vfm_source, &storage_source_class))
467 msg (SE, _("This command is not valid here since the current "
468 "input program does not access the inline file."));
473 /* Open inline file. */
474 r = dfm_open_reader (NULL);
475 r->flags |= DFM_SAW_BEGIN_DATA;
477 /* Input procedure reads from inline file. */
478 getl_prompt = GETL_PRPT_DATA;
479 procedure (NULL, NULL);
481 dfm_close_reader (r);