1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-2004, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include "data-reader.h"
29 #include "file-handle.h"
30 #include "file-handle-def.h"
32 #include "line-buffer.h"
35 #include "procedure.h"
38 #define _(msgid) gettext (msgid)
40 #include "debug-print.h"
42 /* Flags for DFM readers. */
45 DFM_ADVANCE = 002, /* Read next line on dfm_get_record() call? */
46 DFM_SAW_BEGIN_DATA = 004, /* For inline_file only, whether we've
47 already read a BEGIN DATA line. */
48 DFM_TABS_EXPANDED = 010, /* Tabs have been expanded. */
51 /* Data file reader. */
54 struct file_handle *fh; /* File handle. */
55 struct file_locator where; /* Current location in data file. */
56 struct string line; /* Current line. */
57 struct string scratch; /* Extra line buffer. */
58 enum dfm_reader_flags flags; /* Zero or more of DFM_*. */
59 struct file_ext file; /* Associated file. */
60 size_t pos; /* Offset in line of current character. */
61 unsigned eof_cnt; /* # of attempts to advance past EOF. */
64 /* Closes reader R opened by dfm_open_reader(). */
66 dfm_close_reader (struct dfm_reader *r)
74 is_inline = r->fh == fh_inline_file ();
75 still_open = fh_close (r->fh, "data file", "rs");
81 fn_close_ext (&r->file);
82 free (r->file.filename);
83 r->file.filename = NULL;
87 /* Skip any remaining data on the inline file. */
88 if (r->flags & DFM_SAW_BEGIN_DATA)
90 dfm_reread_record (r, 0);
92 dfm_forward_record (r);
96 ds_destroy (&r->line);
97 ds_destroy (&r->scratch);
101 /* Opens the file designated by file handle FH for reading as a
102 data file. Providing fh_inline_file() for FH designates the
103 "inline file", that is, data included inline in the command
104 file between BEGIN FILE and END FILE. Returns a reader if
105 successful, or a null pointer otherwise. */
107 dfm_open_reader (struct file_handle *fh)
109 struct dfm_reader *r;
112 rp = fh_open (fh, FH_REF_FILE | FH_REF_INLINE, "data file", "rs");
118 r = xmalloc (sizeof *r);
120 ds_init (&r->line, 64);
121 ds_init (&r->scratch, 0);
122 r->flags = DFM_ADVANCE;
124 if (fh != fh_inline_file ())
126 r->where.filename = fh_get_filename (fh);
127 r->where.line_number = 0;
129 r->file.filename = xstrdup (fh_get_filename (r->fh));
132 r->file.sequence_no = NULL;
133 r->file.param = NULL;
134 r->file.postopen = NULL;
135 r->file.preclose = NULL;
136 if (!fn_open_ext (&r->file))
138 msg (ME, _("Could not open \"%s\" for reading as a data file: %s."),
139 fh_get_filename (r->fh), strerror (errno));
140 fh_close (fh,"data file", "rs");
150 /* Returns true if an I/O error occurred on READER, false otherwise. */
152 dfm_reader_error (const struct dfm_reader *r)
154 return fh_get_referent (r->fh) == FH_REF_FILE && ferror (r->file.file);
157 /* Reads a record from the inline file into R.
158 Returns true if successful, false on failure. */
160 read_inline_record (struct dfm_reader *r)
162 if ((r->flags & DFM_SAW_BEGIN_DATA) == 0)
164 r->flags |= DFM_SAW_BEGIN_DATA;
168 if (!lex_force_match_id ("BEGIN") || !lex_force_match_id ("DATA"))
170 getl_set_prompt_style (GETL_PROMPT_DATA);
173 if (!getl_read_line (NULL))
175 msg (SE, _("Unexpected end-of-file while reading data in BEGIN "
176 "DATA. This probably indicates "
177 "a missing or misformatted END DATA command. "
178 "END DATA must appear by itself on a single line "
179 "with exactly one space between words."));
183 if (ds_length (&getl_buf) >= 8
184 && !strncasecmp (ds_c_str (&getl_buf), "end data", 8))
186 lex_set_prog (ds_c_str (&getl_buf) + ds_length (&getl_buf));
190 ds_replace (&r->line, ds_c_str (&getl_buf));
194 /* Reads a record from a disk file into R.
195 Returns true if successful, false on failure. */
197 read_file_record (struct dfm_reader *r)
199 assert (r->fh != fh_inline_file ());
200 if (fh_get_mode (r->fh) == FH_MODE_TEXT)
203 if (!ds_gets (&r->line, r->file.file))
205 if (ferror (r->file.file))
206 msg (ME, _("Error reading file %s: %s."),
207 fh_get_name (r->fh), strerror (errno));
211 else if (fh_get_mode (r->fh) == FH_MODE_BINARY)
213 size_t record_width = fh_get_record_width (r->fh);
216 if (ds_length (&r->line) < record_width)
217 ds_rpad (&r->line, record_width, 0);
219 amt = fread (ds_c_str (&r->line), 1, record_width,
221 if (record_width != amt)
223 if (ferror (r->file.file))
224 msg (ME, _("Error reading file %s: %s."),
225 fh_get_name (r->fh), strerror (errno));
227 msg (ME, _("%s: Partial record at end of file."),
228 fh_get_name (r->fh));
236 r->where.line_number++;
241 /* Reads a record from R, setting the current position to the
242 start of the line. If an error occurs or end-of-file is
243 encountered, the current line is set to null. */
245 read_record (struct dfm_reader *r)
247 return (fh_get_referent (r->fh) == FH_REF_FILE
248 ? read_file_record (r)
249 : read_inline_record (r));
252 /* Returns the number of attempts, thus far, to advance past
253 end-of-file in reader R. Reads forward in HANDLE's file, if
254 necessary, to find out.
256 Normally, the user stops attempting to read from the file the
257 first time EOF is reached (a return value of 1). If the user
258 tries to read past EOF again (a return value of 2 or more),
259 an error message is issued, and the caller should more
260 forcibly abort to avoid an infinite loop. */
262 dfm_eof (struct dfm_reader *r)
264 if (r->flags & DFM_ADVANCE)
266 r->flags &= ~DFM_ADVANCE;
268 if (r->eof_cnt == 0 && read_record (r))
277 if (r->fh != fh_inline_file ())
278 msg (ME, _("Attempt to read beyond end-of-file on file %s."),
279 fh_get_name (r->fh));
281 msg (ME, _("Attempt to read beyond END DATA."));
288 /* Returns the current record in the file corresponding to
289 HANDLE. Aborts if reading from the file is necessary or at
290 end of file, so call dfm_eof() first. Sets *LINE to the line,
291 which is not null-terminated. The caller must not free or
292 modify the returned string. */
294 dfm_get_record (struct dfm_reader *r, struct fixed_string *line)
296 assert ((r->flags & DFM_ADVANCE) == 0);
297 assert (r->eof_cnt == 0);
298 assert (r->pos <= ds_length (&r->line));
300 line->string = ds_data (&r->line) + r->pos;
301 line->length = ds_length (&r->line) - r->pos;
304 /* Expands tabs in the current line into the equivalent number of
305 spaces, if appropriate for this kind of file. Aborts if
306 reading from the file is necessary or at end of file, so call
309 dfm_expand_tabs (struct dfm_reader *r)
312 size_t ofs, new_pos, tab_width;
314 assert ((r->flags & DFM_ADVANCE) == 0);
315 assert (r->eof_cnt == 0);
316 assert (r->pos <= ds_length (&r->line));
318 if (r->flags & DFM_TABS_EXPANDED)
320 r->flags |= DFM_TABS_EXPANDED;
322 if (r->fh != fh_inline_file ()
323 && (fh_get_mode (r->fh) == FH_MODE_BINARY
324 || fh_get_tab_width (r->fh) == 0
325 || memchr (ds_c_str (&r->line), '\t', ds_length (&r->line)) == NULL))
328 /* Expand tabs from r->line into r->scratch, and figure out
329 new value for r->pos. */
330 tab_width = fh_get_tab_width (r->fh);
331 ds_clear (&r->scratch);
333 for (ofs = 0; ofs < ds_length (&r->line); ofs++)
338 new_pos = ds_length (&r->scratch);
340 c = ds_c_str (&r->line)[ofs];
342 ds_putc (&r->scratch, c);
346 ds_putc (&r->scratch, ' ');
347 while (ds_length (&r->scratch) % tab_width != 0);
351 /* Swap r->line and r->scratch and set new r->pos. */
353 r->line = r->scratch;
358 /* Causes dfm_get_record() to read in the next record the next time it
359 is executed on file HANDLE. */
361 dfm_forward_record (struct dfm_reader *r)
363 r->flags |= DFM_ADVANCE;
366 /* Cancels the effect of any previous dfm_fwd_record() executed
367 on file HANDLE. Sets the current line to begin in the 1-based
370 dfm_reread_record (struct dfm_reader *r, size_t column)
372 r->flags &= ~DFM_ADVANCE;
375 else if (column > ds_length (&r->line))
376 r->pos = ds_length (&r->line);
381 /* Sets the current line to begin COLUMNS characters following
382 the current start. */
384 dfm_forward_columns (struct dfm_reader *r, size_t columns)
386 dfm_reread_record (r, (r->pos + 1) + columns);
389 /* Returns the 1-based column to which the line pointer in HANDLE
390 is set. Unless dfm_reread_record() or dfm_forward_columns()
391 have been called, this is 1. */
393 dfm_column_start (struct dfm_reader *r)
398 /* Pushes the filename and line number on the fn/ln stack. */
400 dfm_push (struct dfm_reader *r)
402 if (r->fh != fh_inline_file ())
403 err_push_file_locator (&r->where);
406 /* Pops the filename and line number from the fn/ln stack. */
408 dfm_pop (struct dfm_reader *r)
410 if (r->fh != fh_inline_file ())
411 err_pop_file_locator (&r->where);
414 /* BEGIN DATA...END DATA procedure. */
416 /* Perform BEGIN DATA...END DATA as a procedure in itself. */
418 cmd_begin_data (void)
420 struct dfm_reader *r;
423 if (!fh_is_open (fh_inline_file ()))
425 msg (SE, _("This command is not valid here since the current "
426 "input program does not access the inline file."));
427 return CMD_CASCADING_FAILURE;
430 /* Open inline file. */
431 r = dfm_open_reader (fh_inline_file ());
432 r->flags |= DFM_SAW_BEGIN_DATA;
434 /* Input procedure reads from inline file. */
435 getl_set_prompt_style (GETL_PROMPT_DATA);
436 ok = procedure (NULL, NULL);
438 dfm_close_reader (r);
440 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;