1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-2004, 2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <language/data-io/data-reader.h>
28 #include <data/file-handle-def.h>
29 #include <data/file-name.h>
30 #include <data/procedure.h>
31 #include <language/command.h>
32 #include <language/data-io/file-handle.h>
33 #include <language/lexer/lexer.h>
34 #include <language/prompt.h>
35 #include <libpspp/alloc.h>
36 #include <libpspp/assertion.h>
37 #include <libpspp/message.h>
38 #include <libpspp/str.h>
44 #define _(msgid) gettext (msgid)
46 /* Flags for DFM readers. */
49 DFM_ADVANCE = 002, /* Read next line on dfm_get_record() call? */
50 DFM_SAW_BEGIN_DATA = 004, /* For inline_file only, whether we've
51 already read a BEGIN DATA line. */
52 DFM_TABS_EXPANDED = 010, /* Tabs have been expanded. */
55 /* Data file reader. */
58 struct file_handle *fh; /* File handle. */
59 struct msg_locator where; /* Current location in data file. */
60 struct string line; /* Current line. */
61 struct string scratch; /* Extra line buffer. */
62 enum dfm_reader_flags flags; /* Zero or more of DFM_*. */
63 FILE *file; /* Associated file. */
64 size_t pos; /* Offset in line of current character. */
65 unsigned eof_cnt; /* # of attempts to advance past EOF. */
66 struct lexer *lexer; /* The lexer reading the file */
69 /* Closes reader R opened by dfm_open_reader(). */
71 dfm_close_reader (struct dfm_reader *r)
80 is_inline = r->fh == fh_inline_file ();
81 file_name = is_inline ? NULL : xstrdup (fh_get_file_name (r->fh));
82 still_open = fh_close (r->fh, "data file", "rs");
90 fn_close (file_name, r->file);
93 /* Skip any remaining data on the inline file. */
94 if (r->flags & DFM_SAW_BEGIN_DATA)
96 dfm_reread_record (r, 0);
98 dfm_forward_record (r);
102 ds_destroy (&r->line);
103 ds_destroy (&r->scratch);
108 /* Opens the file designated by file handle FH for reading as a
109 data file. Providing fh_inline_file() for FH designates the
110 "inline file", that is, data included inline in the command
111 file between BEGIN FILE and END FILE. Returns a reader if
112 successful, or a null pointer otherwise. */
114 dfm_open_reader (struct file_handle *fh, struct lexer *lexer)
116 struct dfm_reader *r;
119 rp = fh_open (fh, FH_REF_FILE | FH_REF_INLINE, "data file", "rs");
125 r = xmalloc (sizeof *r);
128 ds_init_empty (&r->line);
129 ds_init_empty (&r->scratch);
130 r->flags = DFM_ADVANCE;
132 if (fh != fh_inline_file ())
134 r->where.file_name = fh_get_file_name (fh);
135 r->where.line_number = 0;
136 r->file = fn_open (fh_get_file_name (fh), "rb");
139 msg (ME, _("Could not open \"%s\" for reading as a data file: %s."),
140 fh_get_file_name (r->fh), strerror (errno));
141 fh_close (fh,"data file", "rs");
151 /* Returns true if an I/O error occurred on READER, false otherwise. */
153 dfm_reader_error (const struct dfm_reader *r)
155 return fh_get_referent (r->fh) == FH_REF_FILE && ferror (r->file);
158 /* Reads a record from the inline file into R.
159 Returns true if successful, false on failure. */
161 read_inline_record (struct dfm_reader *r)
163 if ((r->flags & DFM_SAW_BEGIN_DATA) == 0)
165 r->flags |= DFM_SAW_BEGIN_DATA;
167 while (lex_token (r->lexer) == '.')
169 if (!lex_force_match_id (r->lexer, "BEGIN") || !lex_force_match_id (r->lexer, "DATA"))
171 prompt_set_style (PROMPT_DATA);
174 if (!lex_get_line_raw (r->lexer, NULL))
176 msg (SE, _("Unexpected end-of-file while reading data in BEGIN "
177 "DATA. This probably indicates "
178 "a missing or misformatted END DATA command. "
179 "END DATA must appear by itself on a single line "
180 "with exactly one space between words."));
184 if (ds_length (lex_entire_line_ds (r->lexer) ) >= 8
185 && !strncasecmp (lex_entire_line (r->lexer), "end data", 8))
187 lex_discard_line (r->lexer);
191 ds_assign_string (&r->line, lex_entire_line_ds (r->lexer) );
196 /* Reads a record from a disk file into R.
197 Returns true if successful, false on failure. */
199 read_file_record (struct dfm_reader *r)
201 assert (r->fh != fh_inline_file ());
203 if (fh_get_mode (r->fh) == FH_MODE_TEXT)
205 if (!ds_read_line (&r->line, r->file))
207 if (ferror (r->file))
208 msg (ME, _("Error reading file %s: %s."),
209 fh_get_name (r->fh), strerror (errno));
212 ds_chomp (&r->line, '\n');
214 else if (fh_get_mode (r->fh) == FH_MODE_BINARY)
216 size_t record_width = fh_get_record_width (r->fh);
217 size_t amt = ds_read_stream (&r->line, 1, record_width, r->file);
218 if (record_width != amt)
220 if (ferror (r->file))
221 msg (ME, _("Error reading file %s: %s."),
222 fh_get_name (r->fh), strerror (errno));
224 msg (ME, _("%s: Partial record at end of file."),
225 fh_get_name (r->fh));
233 r->where.line_number++;
238 /* Reads a record from R, setting the current position to the
239 start of the line. If an error occurs or end-of-file is
240 encountered, the current line is set to null. */
242 read_record (struct dfm_reader *r)
244 return (fh_get_referent (r->fh) == FH_REF_FILE
245 ? read_file_record (r)
246 : read_inline_record (r));
249 /* Returns the number of attempts, thus far, to advance past
250 end-of-file in reader R. Reads forward in HANDLE's file, if
251 necessary, to find out.
253 Normally, the user stops attempting to read from the file the
254 first time EOF is reached (a return value of 1). If the user
255 tries to read past EOF again (a return value of 2 or more),
256 an error message is issued, and the caller should more
257 forcibly abort to avoid an infinite loop. */
259 dfm_eof (struct dfm_reader *r)
261 if (r->flags & DFM_ADVANCE)
263 r->flags &= ~DFM_ADVANCE;
265 if (r->eof_cnt == 0 && read_record (r) )
274 if (r->fh != fh_inline_file ())
275 msg (ME, _("Attempt to read beyond end-of-file on file %s."),
276 fh_get_name (r->fh));
278 msg (ME, _("Attempt to read beyond END DATA."));
285 /* Returns the current record in the file corresponding to
286 HANDLE. Aborts if reading from the file is necessary or at
287 end of file, so call dfm_eof() first. */
289 dfm_get_record (struct dfm_reader *r)
291 assert ((r->flags & DFM_ADVANCE) == 0);
292 assert (r->eof_cnt == 0);
294 return ds_substr (&r->line, r->pos, SIZE_MAX);
297 /* Expands tabs in the current line into the equivalent number of
298 spaces, if appropriate for this kind of file. Aborts if
299 reading from the file is necessary or at end of file, so call
302 dfm_expand_tabs (struct dfm_reader *r)
304 size_t ofs, new_pos, tab_width;
306 assert ((r->flags & DFM_ADVANCE) == 0);
307 assert (r->eof_cnt == 0);
309 if (r->flags & DFM_TABS_EXPANDED)
311 r->flags |= DFM_TABS_EXPANDED;
313 if (r->fh != fh_inline_file ()
314 && (fh_get_mode (r->fh) == FH_MODE_BINARY
315 || fh_get_tab_width (r->fh) == 0
316 || ds_find_char (&r->line, '\t') == SIZE_MAX))
319 /* Expand tabs from r->line into r->scratch, and figure out
320 new value for r->pos. */
321 tab_width = fh_get_tab_width (r->fh);
322 ds_clear (&r->scratch);
324 for (ofs = 0; ofs < ds_length (&r->line); ofs++)
329 new_pos = ds_length (&r->scratch);
331 c = ds_data (&r->line)[ofs];
333 ds_put_char (&r->scratch, c);
337 ds_put_char (&r->scratch, ' ');
338 while (ds_length (&r->scratch) % tab_width != 0);
341 if (new_pos == SIZE_MAX)
343 /* Maintain the same relationship between position and line
344 length that we had before. DATA LIST uses a
345 beyond-the-end position to deal with an empty field at
346 the end of the line. */
347 assert (r->pos >= ds_length (&r->line));
348 new_pos = (r->pos - ds_length (&r->line)) + ds_length (&r->scratch);
351 /* Swap r->line and r->scratch and set new r->pos. */
352 ds_swap (&r->line, &r->scratch);
356 /* Causes dfm_get_record() or dfm_get_whole_record() to read in
357 the next record the next time it is executed on file
360 dfm_forward_record (struct dfm_reader *r)
362 r->flags |= DFM_ADVANCE;
365 /* Cancels the effect of any previous dfm_fwd_record() executed
366 on file HANDLE. Sets the current line to begin in the 1-based
369 dfm_reread_record (struct dfm_reader *r, size_t column)
371 r->flags &= ~DFM_ADVANCE;
372 r->pos = MAX (column, 1) - 1;
375 /* Sets the current line to begin COLUMNS characters following
376 the current start. */
378 dfm_forward_columns (struct dfm_reader *r, size_t columns)
380 dfm_reread_record (r, (r->pos + 1) + columns);
383 /* Returns the 1-based column to which the line pointer in HANDLE
384 is set. Unless dfm_reread_record() or dfm_forward_columns()
385 have been called, this is 1. */
387 dfm_column_start (const struct dfm_reader *r)
392 /* Returns the number of columns we are currently beyond the end
393 of the line. At or before end-of-line, this is 0; one column
394 after end-of-line, this is 1; and so on. */
396 dfm_columns_past_end (const struct dfm_reader *r)
398 return r->pos < ds_length (&r->line) ? 0 : ds_length (&r->line) - r->pos;
401 /* Returns the 1-based column within the current line that P
404 dfm_get_column (const struct dfm_reader *r, const char *p)
406 return ds_pointer_to_position (&r->line, p) + 1;
409 /* Pushes the file name and line number on the fn/ln stack. */
411 dfm_push (struct dfm_reader *r)
413 if (r->fh != fh_inline_file ())
414 msg_push_msg_locator (&r->where);
417 /* Pops the file name and line number from the fn/ln stack. */
419 dfm_pop (struct dfm_reader *r)
421 if (r->fh != fh_inline_file ())
422 msg_pop_msg_locator (&r->where);
425 /* BEGIN DATA...END DATA procedure. */
427 /* Perform BEGIN DATA...END DATA as a procedure in itself. */
429 cmd_begin_data (struct lexer *lexer, struct dataset *ds)
431 struct dfm_reader *r;
434 if (!fh_is_open (fh_inline_file ()))
436 msg (SE, _("This command is not valid here since the current "
437 "input program does not access the inline file."));
438 return CMD_CASCADING_FAILURE;
441 /* Open inline file. */
442 r = dfm_open_reader (fh_inline_file (), lexer);
443 r->flags |= DFM_SAW_BEGIN_DATA;
445 /* Input procedure reads from inline file. */
446 prompt_set_style (PROMPT_DATA);
447 ok = procedure (ds, NULL, NULL);
449 dfm_close_reader (r);
451 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;