1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-2004 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
29 #include "file-handle.h"
36 #include "debug-print.h"
38 /* Flags for DFM readers. */
41 DFM_EOF = 001, /* At end-of-file? */
42 DFM_ADVANCE = 002, /* Read next line on dfm_get_record() call? */
43 DFM_SAW_BEGIN_DATA = 004, /* For inline_file only, whether we've
44 already read a BEGIN DATA line. */
45 DFM_TABS_EXPANDED = 010, /* Tabs have been expanded. */
48 /* Data file reader. */
51 struct file_handle *fh; /* File handle. */
52 struct file_ext file; /* Associated file. */
53 struct file_locator where; /* Current location in data file. */
54 struct string line; /* Current line. */
55 size_t pos; /* Offset in line of current character. */
56 struct string scratch; /* Extra line buffer. */
57 enum dfm_reader_flags flags; /* Zero or more of DFM_*. */
60 static int inline_open_cnt;
61 static struct dfm_reader *inline_file;
63 static void read_record (struct dfm_reader *r);
65 /* Closes reader R opened by dfm_open_reader(). */
67 dfm_close_reader (struct dfm_reader *r)
75 still_open = fh_close (r->fh, "data file", "rs");
78 assert (inline_open_cnt > 0);
79 still_open = --inline_open_cnt;
83 /* Skip any remaining data on the inline file. */
84 while ((r->flags & DFM_EOF) == 0)
92 if (r->fh != NULL && r->file.file)
94 fn_close_ext (&r->file);
95 free (r->file.filename);
96 r->file.filename = NULL;
98 ds_destroy (&r->line);
99 ds_destroy (&r->scratch);
103 /* Opens the file designated by file handle FH for reading as a
104 data file. Providing a null pointer for FH designates the
105 "inline file", that is, data included inline in the command
106 file between BEGIN FILE and END FILE. Returns nonzero only if
109 dfm_open_reader (struct file_handle *fh)
111 struct dfm_reader *r;
116 rp = fh_open (fh, "data file", "rs");
124 assert (inline_open_cnt >= 0);
125 if (inline_open_cnt++ > 0)
130 r = xmalloc (sizeof *r);
134 r->where.filename = handle_get_filename (fh);
135 r->where.line_number = 0;
138 ds_init (&r->line, 64);
139 ds_init (&r->scratch, 0);
140 r->flags = DFM_ADVANCE;
144 r->file.filename = xstrdup (handle_get_filename (r->fh));
147 r->file.sequence_no = NULL;
148 r->file.param = NULL;
149 r->file.postopen = NULL;
150 r->file.preclose = NULL;
151 if (!fn_open_ext (&r->file))
153 msg (ME, _("Could not open \"%s\" for reading "
154 "as a data file: %s."),
155 handle_get_filename (r->fh), strerror (errno));
157 fh_close (fh,"data file", "rs");
170 read_inline_record (struct dfm_reader *r)
172 if ((r->flags & DFM_SAW_BEGIN_DATA) == 0)
176 r->flags |= DFM_SAW_BEGIN_DATA;
178 /* FIXME: WTF can't this just be done with tokens?
179 Is this really a special case? */
184 if (!getl_read_line ())
186 msg (SE, _("BEGIN DATA expected."));
190 /* Skip leading whitespace, separate out first
191 word, so that S points to a single word reduced
193 s = ds_c_str (&getl_buf);
194 while (isspace ((unsigned char) *s))
196 for (cp = s; isalpha ((unsigned char) *cp); cp++)
197 *cp = tolower ((unsigned char) (*cp));
198 ds_truncate (&getl_buf, cp - s);
202 if (!lex_id_match_len ("begin", 5, s, strcspn (s, " \t\r\v\n")))
204 msg (SE, _("BEGIN DATA expected."));
205 lex_preprocess_line ();
208 getl_prompt = GETL_PRPT_DATA;
211 if (!getl_read_line ())
213 msg (SE, _("Unexpected end-of-file while reading data in BEGIN "
214 "DATA. This probably indicates "
215 "a missing or misformatted END DATA command. "
216 "END DATA must appear by itself on a single line "
217 "with exactly one space between words."));
222 r->where.line_number++;
224 if (ds_length (&getl_buf) >= 8
225 && !strncasecmp (ds_c_str (&getl_buf), "end data", 8))
227 lex_set_prog (ds_c_str (&getl_buf) + ds_length (&getl_buf));
231 ds_replace (&r->line, ds_c_str (&getl_buf));
236 read_file_record (struct dfm_reader *r)
238 assert (r->fh != NULL);
239 if (handle_get_mode (r->fh) == MODE_TEXT)
242 if (!ds_gets (&r->line, r->file.file))
244 if (ferror (r->file.file))
246 msg (ME, _("Error reading file %s: %s."),
247 handle_get_name (r->fh), strerror (errno));
253 else if (handle_get_mode (r->fh) == MODE_BINARY)
255 size_t record_width = handle_get_record_width (r->fh);
258 if (ds_length (&r->line) < record_width)
259 ds_rpad (&r->line, record_width, 0);
261 amt = fread (ds_c_str (&r->line), 1, record_width,
263 if (record_width != amt)
265 if (ferror (r->file.file))
266 msg (ME, _("Error reading file %s: %s."),
267 handle_get_name (r->fh), strerror (errno));
269 msg (ME, _("%s: Partial record at end of file."),
270 handle_get_name (r->fh));
281 r->where.line_number++;
286 /* Reads a record from R, setting the current position to the
287 start of the line. If an error occurs or end-of-file is
288 encountered, the current line is set to null. */
290 read_record (struct dfm_reader *r)
292 int success = r->fh != NULL ? read_file_record (r) : read_inline_record (r);
299 /* Returns nonzero if end of file has been reached on HANDLE.
300 Reads forward in HANDLE's file, if necessary to tell. */
302 dfm_eof (struct dfm_reader *r)
304 if (r->flags & DFM_ADVANCE)
306 r->flags &= ~DFM_ADVANCE;
307 if ((r->flags & DFM_EOF) == 0)
312 msg (SE, _("Attempt to read beyond end-of-file on file %s."),
313 handle_get_name (r->fh));
315 msg (SE, _("Attempt to read beyond END DATA."));
320 return (r->flags & DFM_EOF) != 0;
323 /* Returns the current record in the file corresponding to
324 HANDLE. Aborts if reading from the file is necessary or at
325 end of file, so call dfm_eof() first. Sets *LINE to the line,
326 which is not null-terminated. The caller must not free or
327 modify the returned string. */
329 dfm_get_record (struct dfm_reader *r, struct len_string *line)
331 assert ((r->flags & DFM_ADVANCE) == 0);
332 assert ((r->flags & DFM_EOF) == 0);
333 assert (r->pos <= ds_length (&r->line));
335 line->string = ds_data (&r->line) + r->pos;
336 line->length = ds_length (&r->line) - r->pos;
339 /* Expands tabs in the current line into the equivalent number of
340 spaces, if appropriate for this kind of file. Aborts if
341 reading from the file is necessary or at end of file, so call
344 dfm_expand_tabs (struct dfm_reader *r)
347 size_t ofs, new_pos, tab_width;
349 assert ((r->flags & DFM_ADVANCE) == 0);
350 assert ((r->flags & DFM_EOF) == 0);
351 assert (r->pos <= ds_length (&r->line));
353 if (r->flags & DFM_TABS_EXPANDED)
355 r->flags |= DFM_TABS_EXPANDED;
358 && (handle_get_mode (r->fh) == MODE_BINARY
359 || handle_get_tab_width (r->fh) == 0
360 || memchr (ds_c_str (&r->line), '\t', ds_length (&r->line)) == NULL))
363 /* Expand tabs from r->line into r->scratch, and figure out
364 new value for r->pos. */
365 tab_width = r->fh != NULL ? handle_get_tab_width (r->fh) : 8;
366 ds_clear (&r->scratch);
368 for (ofs = 0; ofs < ds_length (&r->line); ofs++)
373 new_pos = ds_length (&r->scratch);
375 c = ds_c_str (&r->line)[ofs];
377 ds_putc (&r->scratch, c);
381 ds_putc (&r->scratch, ' ');
382 while (ds_length (&r->scratch) % tab_width != 0);
386 /* Swap r->line and r->scratch and set new r->pos. */
388 r->line = r->scratch;
393 /* Causes dfm_get_record() to read in the next record the next time it
394 is executed on file HANDLE. */
396 dfm_forward_record (struct dfm_reader *r)
398 r->flags |= DFM_ADVANCE;
401 /* Cancels the effect of any previous dfm_fwd_record() executed
402 on file HANDLE. Sets the current line to begin in the 1-based
405 dfm_reread_record (struct dfm_reader *r, size_t column)
407 r->flags &= ~DFM_ADVANCE;
410 else if (column > ds_length (&r->line))
411 r->pos = ds_length (&r->line);
416 /* Sets the current line to begin COLUMNS characters following
417 the current start. */
419 dfm_forward_columns (struct dfm_reader *r, size_t columns)
421 dfm_reread_record (r, (r->pos + 1) + columns);
424 /* Returns the 1-based column to which the line pointer in HANDLE
425 is set. Unless dfm_reread_record() or dfm_forward_columns()
426 have been called, this is 1. */
428 dfm_column_start (struct dfm_reader *r)
433 /* Pushes the filename and line number on the fn/ln stack. */
435 dfm_push (struct dfm_reader *r)
438 err_push_file_locator (&r->where);
441 /* Pops the filename and line number from the fn/ln stack. */
443 dfm_pop (struct dfm_reader *r)
446 err_pop_file_locator (&r->where);
449 /* BEGIN DATA...END DATA procedure. */
451 /* Perform BEGIN DATA...END DATA as a procedure in itself. */
453 cmd_begin_data (void)
455 struct dfm_reader *r;
457 /* FIXME: figure out the *exact* conditions, not these really
458 lenient conditions. */
459 if (vfm_source == NULL
460 || case_source_is_class (vfm_source, &storage_source_class))
462 msg (SE, _("This command is not valid here since the current "
463 "input program does not access the inline file."));
468 /* Open inline file. */
469 r = dfm_open_reader (NULL);
470 r->flags |= DFM_SAW_BEGIN_DATA;
472 /* Input procedure reads from inline file. */
473 getl_prompt = GETL_PRPT_DATA;
474 procedure (NULL, NULL);
476 dfm_close_reader (r);