1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-2004, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <language/data-io/data-reader.h>
26 #include <libpspp/alloc.h>
27 #include <language/command.h>
28 #include <libpspp/message.h>
29 #include <language/data-io/file-handle.h>
30 #include <data/file-handle-def.h>
31 #include <data/file-name.h>
32 #include <language/line-buffer.h>
33 #include <language/lexer/lexer.h>
34 #include <libpspp/str.h>
35 #include <procedure.h>
38 #define _(msgid) gettext (msgid)
40 /* Flags for DFM readers. */
43 DFM_ADVANCE = 002, /* Read next line on dfm_get_record() call? */
44 DFM_SAW_BEGIN_DATA = 004, /* For inline_file only, whether we've
45 already read a BEGIN DATA line. */
46 DFM_TABS_EXPANDED = 010, /* Tabs have been expanded. */
49 /* Data file reader. */
52 struct file_handle *fh; /* File handle. */
53 struct msg_locator where; /* Current location in data file. */
54 struct string line; /* Current line. */
55 struct string scratch; /* Extra line buffer. */
56 enum dfm_reader_flags flags; /* Zero or more of DFM_*. */
57 FILE *file; /* Associated file. */
58 size_t pos; /* Offset in line of current character. */
59 unsigned eof_cnt; /* # of attempts to advance past EOF. */
62 /* Closes reader R opened by dfm_open_reader(). */
64 dfm_close_reader (struct dfm_reader *r)
73 is_inline = r->fh == fh_inline_file ();
74 file_name = is_inline ? NULL : xstrdup (fh_get_file_name (r->fh));
75 still_open = fh_close (r->fh, "data file", "rs");
83 fn_close (file_name, r->file);
86 /* Skip any remaining data on the inline file. */
87 if (r->flags & DFM_SAW_BEGIN_DATA)
89 dfm_reread_record (r, 0);
91 dfm_forward_record (r);
95 ds_destroy (&r->line);
96 ds_destroy (&r->scratch);
101 /* Opens the file designated by file handle FH for reading as a
102 data file. Providing fh_inline_file() for FH designates the
103 "inline file", that is, data included inline in the command
104 file between BEGIN FILE and END FILE. Returns a reader if
105 successful, or a null pointer otherwise. */
107 dfm_open_reader (struct file_handle *fh)
109 struct dfm_reader *r;
112 rp = fh_open (fh, FH_REF_FILE | FH_REF_INLINE, "data file", "rs");
118 r = xmalloc (sizeof *r);
120 ds_init (&r->line, 64);
121 ds_init (&r->scratch, 0);
122 r->flags = DFM_ADVANCE;
124 if (fh != fh_inline_file ())
126 r->where.file_name = fh_get_file_name (fh);
127 r->where.line_number = 0;
128 r->file = fn_open (fh_get_file_name (fh), "rb");
131 msg (ME, _("Could not open \"%s\" for reading as a data file: %s."),
132 fh_get_file_name (r->fh), strerror (errno));
133 fh_close (fh,"data file", "rs");
143 /* Returns true if an I/O error occurred on READER, false otherwise. */
145 dfm_reader_error (const struct dfm_reader *r)
147 return fh_get_referent (r->fh) == FH_REF_FILE && ferror (r->file);
150 /* Reads a record from the inline file into R.
151 Returns true if successful, false on failure. */
153 read_inline_record (struct dfm_reader *r)
155 if ((r->flags & DFM_SAW_BEGIN_DATA) == 0)
157 r->flags |= DFM_SAW_BEGIN_DATA;
161 if (!lex_force_match_id ("BEGIN") || !lex_force_match_id ("DATA"))
163 getl_set_prompt_style (GETL_PROMPT_DATA);
166 if (!getl_read_line (NULL))
168 msg (SE, _("Unexpected end-of-file while reading data in BEGIN "
169 "DATA. This probably indicates "
170 "a missing or misformatted END DATA command. "
171 "END DATA must appear by itself on a single line "
172 "with exactly one space between words."));
176 if (ds_length (&getl_buf) >= 8
177 && !strncasecmp (ds_c_str (&getl_buf), "end data", 8))
179 lex_set_prog (ds_c_str (&getl_buf) + ds_length (&getl_buf));
183 ds_assign_string (&r->line, &getl_buf);
187 /* Reads a record from a disk file into R.
188 Returns true if successful, false on failure. */
190 read_file_record (struct dfm_reader *r)
192 assert (r->fh != fh_inline_file ());
193 if (fh_get_mode (r->fh) == FH_MODE_TEXT)
196 if (!ds_gets (&r->line, r->file))
198 if (ferror (r->file))
199 msg (ME, _("Error reading file %s: %s."),
200 fh_get_name (r->fh), strerror (errno));
204 else if (fh_get_mode (r->fh) == FH_MODE_BINARY)
206 size_t record_width = fh_get_record_width (r->fh);
209 if (ds_length (&r->line) < record_width)
210 ds_rpad (&r->line, record_width, 0);
212 amt = fread (ds_c_str (&r->line), 1, record_width, r->file);
213 if (record_width != amt)
215 if (ferror (r->file))
216 msg (ME, _("Error reading file %s: %s."),
217 fh_get_name (r->fh), strerror (errno));
219 msg (ME, _("%s: Partial record at end of file."),
220 fh_get_name (r->fh));
228 r->where.line_number++;
233 /* Reads a record from R, setting the current position to the
234 start of the line. If an error occurs or end-of-file is
235 encountered, the current line is set to null. */
237 read_record (struct dfm_reader *r)
239 return (fh_get_referent (r->fh) == FH_REF_FILE
240 ? read_file_record (r)
241 : read_inline_record (r));
244 /* Returns the number of attempts, thus far, to advance past
245 end-of-file in reader R. Reads forward in HANDLE's file, if
246 necessary, to find out.
248 Normally, the user stops attempting to read from the file the
249 first time EOF is reached (a return value of 1). If the user
250 tries to read past EOF again (a return value of 2 or more),
251 an error message is issued, and the caller should more
252 forcibly abort to avoid an infinite loop. */
254 dfm_eof (struct dfm_reader *r)
256 if (r->flags & DFM_ADVANCE)
258 r->flags &= ~DFM_ADVANCE;
260 if (r->eof_cnt == 0 && read_record (r))
269 if (r->fh != fh_inline_file ())
270 msg (ME, _("Attempt to read beyond end-of-file on file %s."),
271 fh_get_name (r->fh));
273 msg (ME, _("Attempt to read beyond END DATA."));
280 /* Returns the current record in the file corresponding to
281 HANDLE. Aborts if reading from the file is necessary or at
282 end of file, so call dfm_eof() first. Sets *LINE to the line,
283 which is not null-terminated. The caller must not free or
284 modify the returned string. */
286 dfm_get_record (struct dfm_reader *r, struct fixed_string *line)
288 assert ((r->flags & DFM_ADVANCE) == 0);
289 assert (r->eof_cnt == 0);
290 assert (r->pos <= ds_length (&r->line));
292 line->string = ds_data (&r->line) + r->pos;
293 line->length = ds_length (&r->line) - r->pos;
296 /* Expands tabs in the current line into the equivalent number of
297 spaces, if appropriate for this kind of file. Aborts if
298 reading from the file is necessary or at end of file, so call
301 dfm_expand_tabs (struct dfm_reader *r)
304 size_t ofs, new_pos, tab_width;
306 assert ((r->flags & DFM_ADVANCE) == 0);
307 assert (r->eof_cnt == 0);
308 assert (r->pos <= ds_length (&r->line));
310 if (r->flags & DFM_TABS_EXPANDED)
312 r->flags |= DFM_TABS_EXPANDED;
314 if (r->fh != fh_inline_file ()
315 && (fh_get_mode (r->fh) == FH_MODE_BINARY
316 || fh_get_tab_width (r->fh) == 0
317 || memchr (ds_c_str (&r->line), '\t', ds_length (&r->line)) == NULL))
320 /* Expand tabs from r->line into r->scratch, and figure out
321 new value for r->pos. */
322 tab_width = fh_get_tab_width (r->fh);
323 ds_clear (&r->scratch);
325 for (ofs = 0; ofs < ds_length (&r->line); ofs++)
330 new_pos = ds_length (&r->scratch);
332 c = ds_c_str (&r->line)[ofs];
334 ds_putc (&r->scratch, c);
338 ds_putc (&r->scratch, ' ');
339 while (ds_length (&r->scratch) % tab_width != 0);
343 /* Swap r->line and r->scratch and set new r->pos. */
345 r->line = r->scratch;
350 /* Causes dfm_get_record() to read in the next record the next time it
351 is executed on file HANDLE. */
353 dfm_forward_record (struct dfm_reader *r)
355 r->flags |= DFM_ADVANCE;
358 /* Cancels the effect of any previous dfm_fwd_record() executed
359 on file HANDLE. Sets the current line to begin in the 1-based
362 dfm_reread_record (struct dfm_reader *r, size_t column)
364 r->flags &= ~DFM_ADVANCE;
367 else if (column > ds_length (&r->line))
368 r->pos = ds_length (&r->line);
373 /* Sets the current line to begin COLUMNS characters following
374 the current start. */
376 dfm_forward_columns (struct dfm_reader *r, size_t columns)
378 dfm_reread_record (r, (r->pos + 1) + columns);
381 /* Returns the 1-based column to which the line pointer in HANDLE
382 is set. Unless dfm_reread_record() or dfm_forward_columns()
383 have been called, this is 1. */
385 dfm_column_start (struct dfm_reader *r)
390 /* Pushes the file name and line number on the fn/ln stack. */
392 dfm_push (struct dfm_reader *r)
394 if (r->fh != fh_inline_file ())
395 msg_push_msg_locator (&r->where);
398 /* Pops the file name and line number from the fn/ln stack. */
400 dfm_pop (struct dfm_reader *r)
402 if (r->fh != fh_inline_file ())
403 msg_pop_msg_locator (&r->where);
406 /* BEGIN DATA...END DATA procedure. */
408 /* Perform BEGIN DATA...END DATA as a procedure in itself. */
410 cmd_begin_data (void)
412 struct dfm_reader *r;
415 if (!fh_is_open (fh_inline_file ()))
417 msg (SE, _("This command is not valid here since the current "
418 "input program does not access the inline file."));
419 return CMD_CASCADING_FAILURE;
422 /* Open inline file. */
423 r = dfm_open_reader (fh_inline_file ());
424 r->flags |= DFM_SAW_BEGIN_DATA;
426 /* Input procedure reads from inline file. */
427 getl_set_prompt_style (GETL_PROMPT_DATA);
428 ok = procedure (NULL, NULL);
430 dfm_close_reader (r);
432 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;