1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-2004, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <language/data-io/data-reader.h>
26 #include <libpspp/alloc.h>
27 #include <language/command.h>
28 #include <libpspp/message.h>
29 #include <language/data-io/file-handle.h>
30 #include <data/file-handle-def.h>
31 #include <data/filename.h>
32 #include <language/line-buffer.h>
33 #include <language/lexer/lexer.h>
34 #include <libpspp/str.h>
35 #include <procedure.h>
38 #define _(msgid) gettext (msgid)
40 #include <libpspp/debug-print.h>
42 /* Flags for DFM readers. */
45 DFM_ADVANCE = 002, /* Read next line on dfm_get_record() call? */
46 DFM_SAW_BEGIN_DATA = 004, /* For inline_file only, whether we've
47 already read a BEGIN DATA line. */
48 DFM_TABS_EXPANDED = 010, /* Tabs have been expanded. */
51 /* Data file reader. */
54 struct file_handle *fh; /* File handle. */
55 struct file_locator where; /* Current location in data file. */
56 struct string line; /* Current line. */
57 struct string scratch; /* Extra line buffer. */
58 enum dfm_reader_flags flags; /* Zero or more of DFM_*. */
59 FILE *file; /* Associated file. */
60 size_t pos; /* Offset in line of current character. */
61 unsigned eof_cnt; /* # of attempts to advance past EOF. */
64 /* Closes reader R opened by dfm_open_reader(). */
66 dfm_close_reader (struct dfm_reader *r)
75 is_inline = r->fh == fh_inline_file ();
76 file_name = is_inline ? NULL : xstrdup (fh_get_filename (r->fh));
77 still_open = fh_close (r->fh, "data file", "rs");
85 fn_close (file_name, r->file);
88 /* Skip any remaining data on the inline file. */
89 if (r->flags & DFM_SAW_BEGIN_DATA)
91 dfm_reread_record (r, 0);
93 dfm_forward_record (r);
97 ds_destroy (&r->line);
98 ds_destroy (&r->scratch);
103 /* Opens the file designated by file handle FH for reading as a
104 data file. Providing fh_inline_file() for FH designates the
105 "inline file", that is, data included inline in the command
106 file between BEGIN FILE and END FILE. Returns a reader if
107 successful, or a null pointer otherwise. */
109 dfm_open_reader (struct file_handle *fh)
111 struct dfm_reader *r;
114 rp = fh_open (fh, FH_REF_FILE | FH_REF_INLINE, "data file", "rs");
120 r = xmalloc (sizeof *r);
122 ds_init (&r->line, 64);
123 ds_init (&r->scratch, 0);
124 r->flags = DFM_ADVANCE;
126 if (fh != fh_inline_file ())
128 r->where.filename = fh_get_filename (fh);
129 r->where.line_number = 0;
130 r->file = fn_open (fh_get_filename (fh), "rb");
133 msg (ME, _("Could not open \"%s\" for reading as a data file: %s."),
134 fh_get_filename (r->fh), strerror (errno));
135 fh_close (fh,"data file", "rs");
145 /* Returns true if an I/O error occurred on READER, false otherwise. */
147 dfm_reader_error (const struct dfm_reader *r)
149 return fh_get_referent (r->fh) == FH_REF_FILE && ferror (r->file);
152 /* Reads a record from the inline file into R.
153 Returns true if successful, false on failure. */
155 read_inline_record (struct dfm_reader *r)
157 if ((r->flags & DFM_SAW_BEGIN_DATA) == 0)
159 r->flags |= DFM_SAW_BEGIN_DATA;
163 if (!lex_force_match_id ("BEGIN") || !lex_force_match_id ("DATA"))
165 getl_set_prompt_style (GETL_PROMPT_DATA);
168 if (!getl_read_line (NULL))
170 msg (SE, _("Unexpected end-of-file while reading data in BEGIN "
171 "DATA. This probably indicates "
172 "a missing or misformatted END DATA command. "
173 "END DATA must appear by itself on a single line "
174 "with exactly one space between words."));
178 if (ds_length (&getl_buf) >= 8
179 && !strncasecmp (ds_c_str (&getl_buf), "end data", 8))
181 lex_set_prog (ds_c_str (&getl_buf) + ds_length (&getl_buf));
185 ds_assign_string (&r->line, &getl_buf);
189 /* Reads a record from a disk file into R.
190 Returns true if successful, false on failure. */
192 read_file_record (struct dfm_reader *r)
194 assert (r->fh != fh_inline_file ());
195 if (fh_get_mode (r->fh) == FH_MODE_TEXT)
198 if (!ds_gets (&r->line, r->file))
200 if (ferror (r->file))
201 msg (ME, _("Error reading file %s: %s."),
202 fh_get_name (r->fh), strerror (errno));
206 else if (fh_get_mode (r->fh) == FH_MODE_BINARY)
208 size_t record_width = fh_get_record_width (r->fh);
211 if (ds_length (&r->line) < record_width)
212 ds_rpad (&r->line, record_width, 0);
214 amt = fread (ds_c_str (&r->line), 1, record_width, r->file);
215 if (record_width != amt)
217 if (ferror (r->file))
218 msg (ME, _("Error reading file %s: %s."),
219 fh_get_name (r->fh), strerror (errno));
221 msg (ME, _("%s: Partial record at end of file."),
222 fh_get_name (r->fh));
230 r->where.line_number++;
235 /* Reads a record from R, setting the current position to the
236 start of the line. If an error occurs or end-of-file is
237 encountered, the current line is set to null. */
239 read_record (struct dfm_reader *r)
241 return (fh_get_referent (r->fh) == FH_REF_FILE
242 ? read_file_record (r)
243 : read_inline_record (r));
246 /* Returns the number of attempts, thus far, to advance past
247 end-of-file in reader R. Reads forward in HANDLE's file, if
248 necessary, to find out.
250 Normally, the user stops attempting to read from the file the
251 first time EOF is reached (a return value of 1). If the user
252 tries to read past EOF again (a return value of 2 or more),
253 an error message is issued, and the caller should more
254 forcibly abort to avoid an infinite loop. */
256 dfm_eof (struct dfm_reader *r)
258 if (r->flags & DFM_ADVANCE)
260 r->flags &= ~DFM_ADVANCE;
262 if (r->eof_cnt == 0 && read_record (r))
271 if (r->fh != fh_inline_file ())
272 msg (ME, _("Attempt to read beyond end-of-file on file %s."),
273 fh_get_name (r->fh));
275 msg (ME, _("Attempt to read beyond END DATA."));
282 /* Returns the current record in the file corresponding to
283 HANDLE. Aborts if reading from the file is necessary or at
284 end of file, so call dfm_eof() first. Sets *LINE to the line,
285 which is not null-terminated. The caller must not free or
286 modify the returned string. */
288 dfm_get_record (struct dfm_reader *r, struct fixed_string *line)
290 assert ((r->flags & DFM_ADVANCE) == 0);
291 assert (r->eof_cnt == 0);
292 assert (r->pos <= ds_length (&r->line));
294 line->string = ds_data (&r->line) + r->pos;
295 line->length = ds_length (&r->line) - r->pos;
298 /* Expands tabs in the current line into the equivalent number of
299 spaces, if appropriate for this kind of file. Aborts if
300 reading from the file is necessary or at end of file, so call
303 dfm_expand_tabs (struct dfm_reader *r)
306 size_t ofs, new_pos, tab_width;
308 assert ((r->flags & DFM_ADVANCE) == 0);
309 assert (r->eof_cnt == 0);
310 assert (r->pos <= ds_length (&r->line));
312 if (r->flags & DFM_TABS_EXPANDED)
314 r->flags |= DFM_TABS_EXPANDED;
316 if (r->fh != fh_inline_file ()
317 && (fh_get_mode (r->fh) == FH_MODE_BINARY
318 || fh_get_tab_width (r->fh) == 0
319 || memchr (ds_c_str (&r->line), '\t', ds_length (&r->line)) == NULL))
322 /* Expand tabs from r->line into r->scratch, and figure out
323 new value for r->pos. */
324 tab_width = fh_get_tab_width (r->fh);
325 ds_clear (&r->scratch);
327 for (ofs = 0; ofs < ds_length (&r->line); ofs++)
332 new_pos = ds_length (&r->scratch);
334 c = ds_c_str (&r->line)[ofs];
336 ds_putc (&r->scratch, c);
340 ds_putc (&r->scratch, ' ');
341 while (ds_length (&r->scratch) % tab_width != 0);
345 /* Swap r->line and r->scratch and set new r->pos. */
347 r->line = r->scratch;
352 /* Causes dfm_get_record() to read in the next record the next time it
353 is executed on file HANDLE. */
355 dfm_forward_record (struct dfm_reader *r)
357 r->flags |= DFM_ADVANCE;
360 /* Cancels the effect of any previous dfm_fwd_record() executed
361 on file HANDLE. Sets the current line to begin in the 1-based
364 dfm_reread_record (struct dfm_reader *r, size_t column)
366 r->flags &= ~DFM_ADVANCE;
369 else if (column > ds_length (&r->line))
370 r->pos = ds_length (&r->line);
375 /* Sets the current line to begin COLUMNS characters following
376 the current start. */
378 dfm_forward_columns (struct dfm_reader *r, size_t columns)
380 dfm_reread_record (r, (r->pos + 1) + columns);
383 /* Returns the 1-based column to which the line pointer in HANDLE
384 is set. Unless dfm_reread_record() or dfm_forward_columns()
385 have been called, this is 1. */
387 dfm_column_start (struct dfm_reader *r)
392 /* Pushes the filename and line number on the fn/ln stack. */
394 dfm_push (struct dfm_reader *r)
396 if (r->fh != fh_inline_file ())
397 err_push_file_locator (&r->where);
400 /* Pops the filename and line number from the fn/ln stack. */
402 dfm_pop (struct dfm_reader *r)
404 if (r->fh != fh_inline_file ())
405 err_pop_file_locator (&r->where);
408 /* BEGIN DATA...END DATA procedure. */
410 /* Perform BEGIN DATA...END DATA as a procedure in itself. */
412 cmd_begin_data (void)
414 struct dfm_reader *r;
417 if (!fh_is_open (fh_inline_file ()))
419 msg (SE, _("This command is not valid here since the current "
420 "input program does not access the inline file."));
421 return CMD_CASCADING_FAILURE;
424 /* Open inline file. */
425 r = dfm_open_reader (fh_inline_file ());
426 r->flags |= DFM_SAW_BEGIN_DATA;
428 /* Input procedure reads from inline file. */
429 getl_set_prompt_style (GETL_PROMPT_DATA);
430 ok = procedure (NULL, NULL);
432 dfm_close_reader (r);
434 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;