1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2004, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <language/data-io/data-reader.h>
27 #include <data/casereader.h>
28 #include <data/file-handle-def.h>
29 #include <data/file-name.h>
30 #include <data/procedure.h>
31 #include <language/command.h>
32 #include <language/data-io/file-handle.h>
33 #include <language/lexer/lexer.h>
34 #include <language/prompt.h>
35 #include <libpspp/assertion.h>
36 #include <libpspp/message.h>
37 #include <libpspp/str.h>
43 #define _(msgid) gettext (msgid)
44 #define N_(msgid) (msgid)
46 /* Flags for DFM readers. */
49 DFM_ADVANCE = 002, /* Read next line on dfm_get_record() call? */
50 DFM_SAW_BEGIN_DATA = 004, /* For inline_file only, whether we've
51 already read a BEGIN DATA line. */
52 DFM_TABS_EXPANDED = 010, /* Tabs have been expanded. */
55 /* Data file reader. */
58 struct file_handle *fh; /* File handle. */
59 struct fh_lock *lock; /* Mutual exclusion lock for file. */
60 struct msg_locator where; /* Current location in data file. */
61 struct string line; /* Current line. */
62 struct string scratch; /* Extra line buffer. */
63 enum dfm_reader_flags flags; /* Zero or more of DFM_*. */
64 FILE *file; /* Associated file. */
65 size_t pos; /* Offset in line of current character. */
66 unsigned eof_cnt; /* # of attempts to advance past EOF. */
67 struct lexer *lexer; /* The lexer reading the file */
70 /* Closes reader R opened by dfm_open_reader(). */
72 dfm_close_reader (struct dfm_reader *r)
77 if (fh_unlock (r->lock))
79 /* File is still locked by another client. */
83 /* This was the last client, so close the underlying file. */
84 if (fh_get_referent (r->fh) != FH_REF_INLINE)
85 fn_close (fh_get_file_name (r->fh), r->file);
88 /* Skip any remaining data on the inline file. */
89 if (r->flags & DFM_SAW_BEGIN_DATA)
91 dfm_reread_record (r, 0);
93 dfm_forward_record (r);
98 ds_destroy (&r->line);
99 ds_destroy (&r->scratch);
103 /* Opens the file designated by file handle FH for reading as a
104 data file. Providing fh_inline_file() for FH designates the
105 "inline file", that is, data included inline in the command
106 file between BEGIN FILE and END FILE. Returns a reader if
107 successful, or a null pointer otherwise. */
109 dfm_open_reader (struct file_handle *fh, struct lexer *lexer)
111 struct dfm_reader *r;
112 struct fh_lock *lock;
114 /* TRANSLATORS: this fragment will be interpolated into
115 messages in fh_lock() that identify types of files. */
116 lock = fh_lock (fh, FH_REF_FILE | FH_REF_INLINE, N_("data file"),
121 r = fh_lock_get_aux (lock);
125 r = xmalloc (sizeof *r);
129 ds_init_empty (&r->line);
130 ds_init_empty (&r->scratch);
131 r->flags = DFM_ADVANCE;
133 if (fh_get_referent (fh) != FH_REF_INLINE)
135 r->where.file_name = fh_get_file_name (fh);
136 r->where.line_number = 0;
137 r->file = fn_open (fh_get_file_name (fh), "rb");
140 msg (ME, _("Could not open \"%s\" for reading as a data file: %s."),
141 fh_get_file_name (r->fh), strerror (errno));
148 fh_lock_set_aux (lock, r);
153 /* Returns true if an I/O error occurred on READER, false otherwise. */
155 dfm_reader_error (const struct dfm_reader *r)
157 return fh_get_referent (r->fh) == FH_REF_FILE && ferror (r->file);
160 /* Reads a record from the inline file into R.
161 Returns true if successful, false on failure. */
163 read_inline_record (struct dfm_reader *r)
165 if ((r->flags & DFM_SAW_BEGIN_DATA) == 0)
167 r->flags |= DFM_SAW_BEGIN_DATA;
169 while (lex_token (r->lexer) == '.')
171 if (!lex_force_match_id (r->lexer, "BEGIN") || !lex_force_match_id (r->lexer, "DATA"))
173 prompt_set_style (PROMPT_DATA);
176 if (!lex_get_line_raw (r->lexer))
178 msg (SE, _("Unexpected end-of-file while reading data in BEGIN "
179 "DATA. This probably indicates "
180 "a missing or misformatted END DATA command. "
181 "END DATA must appear by itself on a single line "
182 "with exactly one space between words."));
186 if (ds_length (lex_entire_line_ds (r->lexer) ) >= 8
187 && !strncasecmp (lex_entire_line (r->lexer), "end data", 8))
189 lex_discard_line (r->lexer);
193 ds_assign_string (&r->line, lex_entire_line_ds (r->lexer) );
198 /* Reads a record from a disk file into R.
199 Returns true if successful, false on failure. */
201 read_file_record (struct dfm_reader *r)
203 assert (r->fh != fh_inline_file ());
205 if (fh_get_mode (r->fh) == FH_MODE_TEXT)
207 if (!ds_read_line (&r->line, r->file))
209 if (ferror (r->file))
210 msg (ME, _("Error reading file %s: %s."),
211 fh_get_name (r->fh), strerror (errno));
214 ds_chomp (&r->line, '\n');
216 else if (fh_get_mode (r->fh) == FH_MODE_BINARY)
218 size_t record_width = fh_get_record_width (r->fh);
219 size_t amt = ds_read_stream (&r->line, 1, record_width, r->file);
220 if (record_width != amt)
222 if (ferror (r->file))
223 msg (ME, _("Error reading file %s: %s."),
224 fh_get_name (r->fh), strerror (errno));
226 msg (ME, _("%s: Partial record at end of file."),
227 fh_get_name (r->fh));
235 r->where.line_number++;
240 /* Reads a record from R, setting the current position to the
241 start of the line. If an error occurs or end-of-file is
242 encountered, the current line is set to null. */
244 read_record (struct dfm_reader *r)
246 return (fh_get_referent (r->fh) == FH_REF_FILE
247 ? read_file_record (r)
248 : read_inline_record (r));
251 /* Returns the number of attempts, thus far, to advance past
252 end-of-file in reader R. Reads forward in HANDLE's file, if
253 necessary, to find out.
255 Normally, the user stops attempting to read from the file the
256 first time EOF is reached (a return value of 1). If the user
257 tries to read past EOF again (a return value of 2 or more),
258 an error message is issued, and the caller should more
259 forcibly abort to avoid an infinite loop. */
261 dfm_eof (struct dfm_reader *r)
263 if (r->flags & DFM_ADVANCE)
265 r->flags &= ~DFM_ADVANCE;
267 if (r->eof_cnt == 0 && read_record (r) )
276 if (r->fh != fh_inline_file ())
277 msg (ME, _("Attempt to read beyond end-of-file on file %s."),
278 fh_get_name (r->fh));
280 msg (ME, _("Attempt to read beyond END DATA."));
287 /* Returns the current record in the file corresponding to
288 HANDLE. Aborts if reading from the file is necessary or at
289 end of file, so call dfm_eof() first. */
291 dfm_get_record (struct dfm_reader *r)
293 assert ((r->flags & DFM_ADVANCE) == 0);
294 assert (r->eof_cnt == 0);
296 return ds_substr (&r->line, r->pos, SIZE_MAX);
299 /* Expands tabs in the current line into the equivalent number of
300 spaces, if appropriate for this kind of file. Aborts if
301 reading from the file is necessary or at end of file, so call
304 dfm_expand_tabs (struct dfm_reader *r)
306 size_t ofs, new_pos, tab_width;
308 assert ((r->flags & DFM_ADVANCE) == 0);
309 assert (r->eof_cnt == 0);
311 if (r->flags & DFM_TABS_EXPANDED)
313 r->flags |= DFM_TABS_EXPANDED;
315 if (r->fh != fh_inline_file ()
316 && (fh_get_mode (r->fh) == FH_MODE_BINARY
317 || fh_get_tab_width (r->fh) == 0
318 || ds_find_char (&r->line, '\t') == SIZE_MAX))
321 /* Expand tabs from r->line into r->scratch, and figure out
322 new value for r->pos. */
323 tab_width = fh_get_tab_width (r->fh);
324 ds_clear (&r->scratch);
326 for (ofs = 0; ofs < ds_length (&r->line); ofs++)
331 new_pos = ds_length (&r->scratch);
333 c = ds_data (&r->line)[ofs];
335 ds_put_char (&r->scratch, c);
339 ds_put_char (&r->scratch, ' ');
340 while (ds_length (&r->scratch) % tab_width != 0);
343 if (new_pos == SIZE_MAX)
345 /* Maintain the same relationship between position and line
346 length that we had before. DATA LIST uses a
347 beyond-the-end position to deal with an empty field at
348 the end of the line. */
349 assert (r->pos >= ds_length (&r->line));
350 new_pos = (r->pos - ds_length (&r->line)) + ds_length (&r->scratch);
353 /* Swap r->line and r->scratch and set new r->pos. */
354 ds_swap (&r->line, &r->scratch);
358 /* Causes dfm_get_record() or dfm_get_whole_record() to read in
359 the next record the next time it is executed on file
362 dfm_forward_record (struct dfm_reader *r)
364 r->flags |= DFM_ADVANCE;
367 /* Cancels the effect of any previous dfm_fwd_record() executed
368 on file HANDLE. Sets the current line to begin in the 1-based
371 dfm_reread_record (struct dfm_reader *r, size_t column)
373 r->flags &= ~DFM_ADVANCE;
374 r->pos = MAX (column, 1) - 1;
377 /* Sets the current line to begin COLUMNS characters following
378 the current start. */
380 dfm_forward_columns (struct dfm_reader *r, size_t columns)
382 dfm_reread_record (r, (r->pos + 1) + columns);
385 /* Returns the 1-based column to which the line pointer in HANDLE
386 is set. Unless dfm_reread_record() or dfm_forward_columns()
387 have been called, this is 1. */
389 dfm_column_start (const struct dfm_reader *r)
394 /* Returns the number of columns we are currently beyond the end
395 of the line. At or before end-of-line, this is 0; one column
396 after end-of-line, this is 1; and so on. */
398 dfm_columns_past_end (const struct dfm_reader *r)
400 return r->pos < ds_length (&r->line) ? 0 : ds_length (&r->line) - r->pos;
403 /* Returns the 1-based column within the current line that P
406 dfm_get_column (const struct dfm_reader *r, const char *p)
408 return ds_pointer_to_position (&r->line, p) + 1;
411 /* Pushes the file name and line number on the fn/ln stack. */
413 dfm_push (struct dfm_reader *r)
415 if (r->fh != fh_inline_file ())
416 msg_push_msg_locator (&r->where);
419 /* Pops the file name and line number from the fn/ln stack. */
421 dfm_pop (struct dfm_reader *r)
423 if (r->fh != fh_inline_file ())
424 msg_pop_msg_locator (&r->where);
427 /* BEGIN DATA...END DATA procedure. */
429 /* Perform BEGIN DATA...END DATA as a procedure in itself. */
431 cmd_begin_data (struct lexer *lexer, struct dataset *ds)
433 struct dfm_reader *r;
436 if (!fh_is_locked (fh_inline_file (), FH_ACC_READ))
438 msg (SE, _("This command is not valid here since the current "
439 "input program does not access the inline file."));
440 return CMD_CASCADING_FAILURE;
443 /* Open inline file. */
444 r = dfm_open_reader (fh_inline_file (), lexer);
445 r->flags |= DFM_SAW_BEGIN_DATA;
447 /* Input procedure reads from inline file. */
448 prompt_set_style (PROMPT_DATA);
449 casereader_destroy (proc_open (ds));
450 ok = proc_commit (ds);
451 dfm_close_reader (r);
453 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;