1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-2004, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include <language/data-io/data-reader.h>
29 #include <data/file-handle-def.h>
30 #include <data/file-name.h>
31 #include <data/procedure.h>
32 #include <language/command.h>
33 #include <language/data-io/file-handle.h>
34 #include <language/lexer/lexer.h>
35 #include <language/line-buffer.h>
36 #include <libpspp/alloc.h>
37 #include <libpspp/message.h>
38 #include <libpspp/str.h>
44 #define _(msgid) gettext (msgid)
46 /* Flags for DFM readers. */
49 DFM_ADVANCE = 002, /* Read next line on dfm_get_record() call? */
50 DFM_SAW_BEGIN_DATA = 004, /* For inline_file only, whether we've
51 already read a BEGIN DATA line. */
52 DFM_TABS_EXPANDED = 010, /* Tabs have been expanded. */
55 /* Data file reader. */
58 struct file_handle *fh; /* File handle. */
59 struct msg_locator where; /* Current location in data file. */
60 struct string line; /* Current line. */
61 struct string scratch; /* Extra line buffer. */
62 enum dfm_reader_flags flags; /* Zero or more of DFM_*. */
63 FILE *file; /* Associated file. */
64 size_t pos; /* Offset in line of current character. */
65 unsigned eof_cnt; /* # of attempts to advance past EOF. */
68 /* Closes reader R opened by dfm_open_reader(). */
70 dfm_close_reader (struct dfm_reader *r)
79 is_inline = r->fh == fh_inline_file ();
80 file_name = is_inline ? NULL : xstrdup (fh_get_file_name (r->fh));
81 still_open = fh_close (r->fh, "data file", "rs");
89 fn_close (file_name, r->file);
92 /* Skip any remaining data on the inline file. */
93 if (r->flags & DFM_SAW_BEGIN_DATA)
95 dfm_reread_record (r, 0);
97 dfm_forward_record (r);
101 ds_destroy (&r->line);
102 ds_destroy (&r->scratch);
107 /* Opens the file designated by file handle FH for reading as a
108 data file. Providing fh_inline_file() for FH designates the
109 "inline file", that is, data included inline in the command
110 file between BEGIN FILE and END FILE. Returns a reader if
111 successful, or a null pointer otherwise. */
113 dfm_open_reader (struct file_handle *fh)
115 struct dfm_reader *r;
118 rp = fh_open (fh, FH_REF_FILE | FH_REF_INLINE, "data file", "rs");
124 r = xmalloc (sizeof *r);
126 ds_init_empty (&r->line);
127 ds_init_empty (&r->scratch);
128 r->flags = DFM_ADVANCE;
130 if (fh != fh_inline_file ())
132 r->where.file_name = fh_get_file_name (fh);
133 r->where.line_number = 0;
134 r->file = fn_open (fh_get_file_name (fh), "rb");
137 msg (ME, _("Could not open \"%s\" for reading as a data file: %s."),
138 fh_get_file_name (r->fh), strerror (errno));
139 fh_close (fh,"data file", "rs");
149 /* Returns true if an I/O error occurred on READER, false otherwise. */
151 dfm_reader_error (const struct dfm_reader *r)
153 return fh_get_referent (r->fh) == FH_REF_FILE && ferror (r->file);
156 /* Reads a record from the inline file into R.
157 Returns true if successful, false on failure. */
159 read_inline_record (struct dfm_reader *r)
161 if ((r->flags & DFM_SAW_BEGIN_DATA) == 0)
163 r->flags |= DFM_SAW_BEGIN_DATA;
167 if (!lex_force_match_id ("BEGIN") || !lex_force_match_id ("DATA"))
169 getl_set_prompt_style (GETL_PROMPT_DATA);
172 if (!getl_read_line (NULL))
174 msg (SE, _("Unexpected end-of-file while reading data in BEGIN "
175 "DATA. This probably indicates "
176 "a missing or misformatted END DATA command. "
177 "END DATA must appear by itself on a single line "
178 "with exactly one space between words."));
182 if (ds_length (&getl_buf) >= 8
183 && !strncasecmp (ds_cstr (&getl_buf), "end data", 8))
185 lex_set_prog (ds_end (&getl_buf));
189 ds_assign_string (&r->line, &getl_buf);
193 /* Reads a record from a disk file into R.
194 Returns true if successful, false on failure. */
196 read_file_record (struct dfm_reader *r)
198 assert (r->fh != fh_inline_file ());
200 if (fh_get_mode (r->fh) == FH_MODE_TEXT)
202 if (!ds_read_line (&r->line, r->file))
204 if (ferror (r->file))
205 msg (ME, _("Error reading file %s: %s."),
206 fh_get_name (r->fh), strerror (errno));
210 else if (fh_get_mode (r->fh) == FH_MODE_BINARY)
212 size_t record_width = fh_get_record_width (r->fh);
213 size_t amt = ds_read_stream (&r->line, 1, record_width, r->file);
214 if (record_width != amt)
216 if (ferror (r->file))
217 msg (ME, _("Error reading file %s: %s."),
218 fh_get_name (r->fh), strerror (errno));
220 msg (ME, _("%s: Partial record at end of file."),
221 fh_get_name (r->fh));
229 r->where.line_number++;
234 /* Reads a record from R, setting the current position to the
235 start of the line. If an error occurs or end-of-file is
236 encountered, the current line is set to null. */
238 read_record (struct dfm_reader *r)
240 return (fh_get_referent (r->fh) == FH_REF_FILE
241 ? read_file_record (r)
242 : read_inline_record (r));
245 /* Returns the number of attempts, thus far, to advance past
246 end-of-file in reader R. Reads forward in HANDLE's file, if
247 necessary, to find out.
249 Normally, the user stops attempting to read from the file the
250 first time EOF is reached (a return value of 1). If the user
251 tries to read past EOF again (a return value of 2 or more),
252 an error message is issued, and the caller should more
253 forcibly abort to avoid an infinite loop. */
255 dfm_eof (struct dfm_reader *r)
257 if (r->flags & DFM_ADVANCE)
259 r->flags &= ~DFM_ADVANCE;
261 if (r->eof_cnt == 0 && read_record (r))
270 if (r->fh != fh_inline_file ())
271 msg (ME, _("Attempt to read beyond end-of-file on file %s."),
272 fh_get_name (r->fh));
274 msg (ME, _("Attempt to read beyond END DATA."));
281 /* Returns the current record in the file corresponding to
282 HANDLE. Aborts if reading from the file is necessary or at
283 end of file, so call dfm_eof() first. */
285 dfm_get_record (struct dfm_reader *r)
287 assert ((r->flags & DFM_ADVANCE) == 0);
288 assert (r->eof_cnt == 0);
290 return ds_substr (&r->line, r->pos, SIZE_MAX);
293 /* Expands tabs in the current line into the equivalent number of
294 spaces, if appropriate for this kind of file. Aborts if
295 reading from the file is necessary or at end of file, so call
298 dfm_expand_tabs (struct dfm_reader *r)
300 size_t ofs, new_pos, tab_width;
302 assert ((r->flags & DFM_ADVANCE) == 0);
303 assert (r->eof_cnt == 0);
305 if (r->flags & DFM_TABS_EXPANDED)
307 r->flags |= DFM_TABS_EXPANDED;
309 if (r->fh != fh_inline_file ()
310 && (fh_get_mode (r->fh) == FH_MODE_BINARY
311 || fh_get_tab_width (r->fh) == 0
312 || ds_find_char (&r->line, '\t') == SIZE_MAX))
315 /* Expand tabs from r->line into r->scratch, and figure out
316 new value for r->pos. */
317 tab_width = fh_get_tab_width (r->fh);
318 ds_clear (&r->scratch);
320 for (ofs = 0; ofs < ds_length (&r->line); ofs++)
325 new_pos = ds_length (&r->scratch);
327 c = ds_data (&r->line)[ofs];
329 ds_put_char (&r->scratch, c);
333 ds_put_char (&r->scratch, ' ');
334 while (ds_length (&r->scratch) % tab_width != 0);
337 if (new_pos == SIZE_MAX)
339 /* Maintain the same relationship between position and line
340 length that we had before. DATA LIST uses a
341 beyond-the-end position to deal with an empty field at
342 the end of the line. */
343 assert (r->pos >= ds_length (&r->line));
344 new_pos = (r->pos - ds_length (&r->line)) + ds_length (&r->scratch);
347 /* Swap r->line and r->scratch and set new r->pos. */
348 ds_swap (&r->line, &r->scratch);
352 /* Causes dfm_get_record() or dfm_get_whole_record() to read in
353 the next record the next time it is executed on file
356 dfm_forward_record (struct dfm_reader *r)
358 r->flags |= DFM_ADVANCE;
361 /* Cancels the effect of any previous dfm_fwd_record() executed
362 on file HANDLE. Sets the current line to begin in the 1-based
365 dfm_reread_record (struct dfm_reader *r, size_t column)
367 r->flags &= ~DFM_ADVANCE;
368 r->pos = MAX (column, 1) - 1;
371 /* Sets the current line to begin COLUMNS characters following
372 the current start. */
374 dfm_forward_columns (struct dfm_reader *r, size_t columns)
376 dfm_reread_record (r, (r->pos + 1) + columns);
379 /* Returns the 1-based column to which the line pointer in HANDLE
380 is set. Unless dfm_reread_record() or dfm_forward_columns()
381 have been called, this is 1. */
383 dfm_column_start (const struct dfm_reader *r)
388 /* Returns the number of columns we are currently beyond the end
389 of the line. At or before end-of-line, this is 0; one column
390 after end-of-line, this is 1; and so on. */
392 dfm_columns_past_end (const struct dfm_reader *r)
394 return r->pos < ds_length (&r->line) ? 0 : ds_length (&r->line) - r->pos;
397 /* Returns the 1-based column within the current line that P
400 dfm_get_column (const struct dfm_reader *r, const char *p)
402 return ds_pointer_to_position (&r->line, p) + 1;
405 /* Pushes the file name and line number on the fn/ln stack. */
407 dfm_push (struct dfm_reader *r)
409 if (r->fh != fh_inline_file ())
410 msg_push_msg_locator (&r->where);
413 /* Pops the file name and line number from the fn/ln stack. */
415 dfm_pop (struct dfm_reader *r)
417 if (r->fh != fh_inline_file ())
418 msg_pop_msg_locator (&r->where);
421 /* BEGIN DATA...END DATA procedure. */
423 /* Perform BEGIN DATA...END DATA as a procedure in itself. */
425 cmd_begin_data (void)
427 struct dfm_reader *r;
430 if (!fh_is_open (fh_inline_file ()))
432 msg (SE, _("This command is not valid here since the current "
433 "input program does not access the inline file."));
434 return CMD_CASCADING_FAILURE;
437 /* Open inline file. */
438 r = dfm_open_reader (fh_inline_file ());
439 r->flags |= DFM_SAW_BEGIN_DATA;
441 /* Input procedure reads from inline file. */
442 getl_set_prompt_style (GETL_PROMPT_DATA);
443 ok = procedure (NULL, NULL);
445 dfm_close_reader (r);
447 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;