1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
29 #include "file-handle.h"
37 #include "debug-print.h"
39 /* file_handle extension structure. */
42 struct file_ext file; /* Associated file. */
44 struct file_locator where; /* Current location in data file. */
45 char *line; /* Current line, not null-terminated. */
46 size_t size; /* Number of bytes allocated for line. */
47 size_t len; /* Length of line. */
49 char *ptr; /* Pointer into line that is returned by
51 int advance; /* Nonzero=dfm_get_record() reads a new
52 record; otherwise returns current record. */
53 int saw_begin_data; /* For inline_file only, whether we've
54 already read a BEGIN DATA line. */
57 /* These are defined at the end of this file. */
58 static struct fh_ext_class dfm_r_class;
59 static struct fh_ext_class dfm_w_class;
61 static void read_record (struct file_handle *h);
63 /* Internal (low level). */
65 /* Closes the file handle H which was opened by open_file_r() or
68 dfm_close (struct file_handle *h)
70 struct dfm_fhuser_ext *ext = h->ext;
72 /* Skip any remaining data on the inline file. */
74 while (ext->line != NULL)
77 msg (VM (2), _("%s: Closing data-file handle %s."),
78 handle_get_filename (h), handle_get_name (h));
79 assert (h->class == &dfm_r_class || h->class == &dfm_w_class);
82 fn_close_ext (&ext->file);
83 free (ext->file.filename);
84 ext->file.filename = NULL;
90 /* Opens a file handle for reading as a data file. Returns
91 nonzero only if successful. */
93 dfm_open_for_reading (struct file_handle *h)
95 struct dfm_fhuser_ext *ext;
99 if (h->class == &dfm_r_class)
103 msg (ME, _("Cannot read from file %s already opened for %s."),
104 handle_get_name (h), gettext (h->class->name));
109 ext = xmalloc (sizeof *ext);
110 ext->where.filename = handle_get_filename (h);
111 ext->where.line_number = 0;
112 ext->file.file = NULL;
113 ext->line = xmalloc (128);
118 ext->saw_begin_data = 0;
120 msg (VM (1), _("%s: Opening data-file handle %s for reading."),
121 handle_get_filename (h), handle_get_name (h));
124 if (h != inline_file)
126 ext->file.filename = xstrdup (handle_get_filename (h));
127 ext->file.mode = "rb";
128 ext->file.file = NULL;
129 ext->file.sequence_no = NULL;
130 ext->file.param = NULL;
131 ext->file.postopen = NULL;
132 ext->file.preclose = NULL;
133 if (!fn_open_ext (&ext->file))
135 msg (ME, _("Could not open \"%s\" for reading "
136 "as a data file: %s."),
137 handle_get_filename (h), strerror (errno));
142 h->class = &dfm_r_class;
152 /* Opens a file handle for writing as a data file. */
154 dfm_open_for_writing (struct file_handle *h)
156 struct dfm_fhuser_ext *ext;
158 if (h->class != NULL)
160 if (h->class == &dfm_w_class)
164 msg (ME, _("Cannot write to file %s already opened for %s."),
165 handle_get_name (h), gettext (h->class->name));
171 ext = xmalloc (sizeof *ext);
172 ext->where.filename = handle_get_filename (h);
173 ext->where.line_number = 0;
174 ext->file.file = NULL;
181 msg (VM (1), _("%s: Opening data-file handle %s for writing."),
182 handle_get_filename (h), handle_get_name (h));
185 if (h == inline_file)
187 msg (ME, _("Cannot open the inline file for writing."));
191 ext->file.filename = xstrdup (handle_get_filename (h));
192 ext->file.mode = "wb";
193 ext->file.file = NULL;
194 ext->file.sequence_no = NULL;
195 ext->file.param = NULL;
196 ext->file.postopen = NULL;
197 ext->file.preclose = NULL;
199 if (!fn_open_ext (&ext->file))
201 msg (ME, _("An error occurred while opening \"%s\" for writing "
202 "as a data file: %s."),
203 handle_get_filename (h), strerror (errno));
207 h->class = &dfm_w_class;
217 /* Ensures that the line buffer in file handle with extension EXT is
218 big enough to hold a line of length EXT->LEN characters not
219 including null terminator. */
220 #define force_line_buffer_expansion() \
223 if (ext->len + 1 > ext->size) \
225 ext->size = ext->len * 2; \
226 ext->line = xrealloc (ext->line, ext->size); \
231 /* Counts the number of tabs in string STRING of length LEN. */
233 count_tabs (char *s, size_t len)
239 char *cp = memchr (s, '\t', len);
248 /* Converts all the tabs in H->EXT->LINE to an equivalent number of
249 spaces, if necessary. */
251 tabs_to_spaces (struct file_handle *h)
253 struct dfm_fhuser_ext *ext = h->ext;
255 char *first_tab; /* Location of first tab (if any). */
256 char *second_tab; /* Location of second tab (if any). */
257 size_t orig_len; /* Line length at function entry. */
259 /* If there aren't any tabs then there's nothing to do. */
260 first_tab = memchr (ext->line, '\t', ext->len);
261 if (first_tab == NULL)
265 /* If there's just one tab then expand it inline. Otherwise do a
266 full string copy to another buffer. */
267 second_tab = memchr (first_tab + 1, '\t',
268 ext->len - (first_tab - ext->line + 1));
269 if (second_tab == NULL)
271 int n_spaces = 8 - (first_tab - ext->line) % 8;
273 ext->len += n_spaces - 1;
275 /* Expand the line if necessary, keeping the first_tab pointer
278 size_t ofs = first_tab - ext->line;
279 force_line_buffer_expansion ();
280 first_tab = ext->line + ofs;
283 memmove (first_tab + n_spaces, first_tab + 1,
284 orig_len - (first_tab - ext->line + 1));
285 memset (first_tab, ' ', n_spaces);
287 /* Make a local copy of original text. */
288 char *orig_line = local_alloc (ext->len + 1);
289 memcpy (orig_line, ext->line, ext->len);
291 /* Allocate memory assuming we need to add 8 spaces for every tab. */
292 ext->len += 2 + count_tabs (second_tab + 1,
293 ext->len - (second_tab - ext->line + 1));
295 /* Expand the line if necessary, keeping the first_tab pointer
298 size_t ofs = first_tab - ext->line;
299 force_line_buffer_expansion ();
300 first_tab = ext->line + ofs;
303 /* Walk through orig_line, expanding tabs into ext->line. */
305 char *src_p = orig_line + (first_tab - ext->line);
306 char *dest_p = first_tab;
308 for (; src_p < orig_line + orig_len; src_p++)
310 /* Most characters simply pass through untouched. */
317 /* Tabs are expanded into an equivalent number of
320 int n_spaces = 8 - (dest_p - ext->line) % 8;
322 memset (dest_p, ' ', n_spaces);
327 /* Supply null terminator and actual string length. */
329 ext->len = dest_p - ext->line;
332 local_free (orig_line);
336 /* Reads a record from H->EXT->FILE into H->EXT->LINE, setting
337 H->EXT->PTR to H->EXT->LINE, and setting H->EXT-LEN to the length
338 of the line. The line is not null-terminated. If an error occurs
339 or end-of-file is encountered, H->EXT->LINE is set to NULL. */
341 read_record (struct file_handle *h)
343 struct dfm_fhuser_ext *ext = h->ext;
345 if (h == inline_file)
347 if (!ext->saw_begin_data)
351 ext->saw_begin_data = 1;
353 /* FIXME: WTF can't this just be done with tokens?
354 Is this really a special case? */
359 if (!getl_read_line ())
361 msg (SE, _("BEGIN DATA expected."));
365 /* Skip leading whitespace, separate out first word, so that
366 S points to a single word reduced to lowercase. */
367 s = ds_value (&getl_buf);
368 while (isspace ((unsigned char) *s))
370 for (cp = s; isalpha ((unsigned char) *cp); cp++)
371 *cp = tolower ((unsigned char) (*cp));
372 ds_truncate (&getl_buf, cp - s);
376 if (!lex_id_match_len ("begin", 5, s, strcspn (s, " \t\r\v\n")))
378 msg (SE, _("BEGIN DATA expected."));
379 lex_preprocess_line ();
382 getl_prompt = GETL_PRPT_DATA;
385 if (!getl_read_line ())
387 msg (SE, _("Unexpected end-of-file while reading data in BEGIN "
388 "DATA. This probably indicates "
389 "a missing or misformatted END DATA command. "
390 "END DATA must appear by itself on a single line "
391 "with exactly one space between words."));
395 ext->where.line_number++;
397 if (ds_length (&getl_buf) >= 8
398 && !strncasecmp (ds_value (&getl_buf), "end data", 8))
400 lex_set_prog (ds_value (&getl_buf) + ds_length (&getl_buf));
404 ext->len = ds_length (&getl_buf);
405 force_line_buffer_expansion ();
406 strcpy (ext->line, ds_value (&getl_buf));
410 if (handle_get_mode (h) == MODE_TEXT)
412 /* PORTME: here you should adapt the routine to your
413 system's concept of a "line" of text. */
414 int read_len = getline (&ext->line, &ext->size, ext->file.file);
418 if (ferror (ext->file.file))
420 msg (ME, _("Error reading file %s: %s."),
421 handle_get_name (h), strerror (errno));
426 ext->len = (size_t) read_len;
428 else if (handle_get_mode (h) == MODE_BINARY)
430 size_t record_width = handle_get_record_width (h);
433 if (ext->size < record_width)
435 ext->size = record_width;
436 ext->line = xmalloc (ext->size);
438 amt = fread (ext->line, 1, record_width, ext->file.file);
439 if (record_width != amt)
441 if (ferror (ext->file.file))
442 msg (ME, _("Error reading file %s: %s."),
443 handle_get_name (h), strerror (errno));
445 msg (ME, _("%s: Partial record at end of file."),
446 handle_get_name (h));
457 ext->where.line_number++;
460 /* Strip trailing whitespace, I forget why. But there's a good
461 reason, I'm sure. I'm too scared to eliminate this code. */
462 if (handle_get_mode (h) == MODE_TEXT)
464 /* while (ext->len && isspace ((unsigned char) ext->line[ext->len - 1]))
467 /* Convert tabs to spaces. */
469 ext->ptr = ext->line;
474 /* Hit eof or an error, clean up everything. */
478 ext->line = ext->ptr = NULL;
482 /* Public (high level). */
484 /* Returns the current record in the file corresponding to HANDLE.
485 Opens files and reads records, etc., as necessary. Sets *LEN to
486 the length of the line. The line returned is not null-terminated.
487 Returns NULL at end of file. Calls fail() on attempt to read past
490 dfm_get_record (struct file_handle *h, int *len)
492 struct dfm_fhuser_ext *ext;
495 assert (h->class == &dfm_r_class);
496 assert (h->ext != NULL);
505 msg (SE, _("Attempt to read beyond end-of-file on file %s."),
506 handle_get_name (h));
513 *len = ext->len - (ext->ptr - ext->line);
517 /* Come here on reading beyond eof or reading from a file already
518 open for something else. */
524 /* Causes dfm_get_record() to read in the next record the next time it
525 is executed on file HANDLE. */
527 dfm_fwd_record (struct file_handle *h)
529 struct dfm_fhuser_ext *ext = h->ext;
531 assert (h->class == &dfm_r_class);
535 /* Cancels the effect of any previous dfm_fwd_record() executed on
536 file HANDLE. Sets the current line to begin in the 1-based column
537 COLUMN, as with dfm_set_record but based on a column number instead
538 of a character pointer. */
540 dfm_bkwd_record (struct file_handle *h, int column)
542 struct dfm_fhuser_ext *ext = h->ext;
544 assert (h->class == &dfm_r_class);
546 ext->ptr = ext->line + min ((int) ext->len + 1, column) - 1;
549 /* Sets the current line in HANDLE to NEW_LINE, which must point
550 somewhere in the line last returned by dfm_get_record(). Used by
551 DATA LIST FREE to strip the leading portion off the current line. */
553 dfm_set_record (struct file_handle *h, char *new_line)
555 struct dfm_fhuser_ext *ext = h->ext;
557 assert (h->class == &dfm_r_class);
561 /* Returns the 0-based current column to which the line pointer in
562 HANDLE is set. Unless dfm_set_record() or dfm_bkwd_record() have
563 been called, this is 0. */
565 dfm_get_cur_col (struct file_handle *h)
567 struct dfm_fhuser_ext *ext = h->ext;
569 assert (h->class == &dfm_r_class);
570 return ext->ptr - ext->line;
573 /* Writes record REC having length LEN to the file corresponding to
574 HANDLE. REC is not null-terminated. Returns nonzero on success,
577 dfm_put_record (struct file_handle *h, const char *rec, size_t len)
579 struct dfm_fhuser_ext *ext;
584 assert (h->class == &dfm_w_class);
585 assert (h->ext != NULL);
588 if (handle_get_mode (h) == MODE_BINARY && len < handle_get_record_width (h))
590 amt = handle_get_record_width (h);
591 ptr = local_alloc (amt);
592 memcpy (ptr, rec, len);
593 memset (&ptr[len], 0, amt - len);
601 if (1 != fwrite (ptr, amt, 1, ext->file.file))
603 msg (ME, _("Error writing file %s: %s."),
604 handle_get_name (h), strerror (errno));
615 /* Pushes the filename and line number on the fn/ln stack. */
617 dfm_push (struct file_handle *h)
619 struct dfm_fhuser_ext *ext = h->ext;
621 assert (h->class == &dfm_r_class || h->class == &dfm_w_class);
622 assert (ext != NULL);
623 if (h != inline_file)
624 err_push_file_locator (&ext->where);
627 /* Pops the filename and line number from the fn/ln stack. */
629 dfm_pop (struct file_handle *h)
631 struct dfm_fhuser_ext *ext = h->ext;
633 assert (h->class == &dfm_r_class || h->class == &dfm_w_class);
634 assert (ext != NULL);
635 if (h != inline_file)
636 err_pop_file_locator (&ext->where);
639 /* BEGIN DATA...END DATA procedure. */
641 /* Perform BEGIN DATA...END DATA as a procedure in itself. */
643 cmd_begin_data (void)
645 struct dfm_fhuser_ext *ext;
647 /* FIXME: figure out the *exact* conditions, not these really
648 lenient conditions. */
649 if (vfm_source == NULL
650 || case_source_is_class (vfm_source, &storage_source_class)
651 || case_source_is_class (vfm_source, &sort_source_class))
653 msg (SE, _("This command is not valid here since the current "
654 "input program does not access the inline file."));
659 /* Initialize inline_file. */
660 msg (VM (1), _("inline file: Opening for reading."));
661 dfm_open_for_reading (inline_file);
662 ext = inline_file->ext;
663 ext->saw_begin_data = 1;
665 /* We don't actually read from the inline file. The input procedure
666 is what reads from it. */
667 getl_prompt = GETL_PRPT_DATA;
668 procedure (NULL, NULL);
670 ext = inline_file->ext;
671 if (ext && ext->line)
673 msg (MW, _("Skipping remaining inline data."));
674 for (read_record (inline_file); ext->line; read_record (inline_file))
677 assert (inline_file->ext == NULL);
682 static struct fh_ext_class dfm_r_class =
685 N_("reading as a data file"),
689 static struct fh_ext_class dfm_w_class =
692 N_("writing as a data file"),