X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fdata-reader.c;h=7176aed43a03f7c763a86747cf889cc69c9c10e2;hb=68f08c4bb53fcde16035b622bdb6e9529f9cf3ae;hp=24b27b4048732b870d3ae15045537a5359bfcecf;hpb=dcf9b154cbcaa35c3d8459a201b77eec8bcb30bd;p=pspp-builds.git diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c index 24b27b40..7176aed4 100644 --- a/src/language/data-io/data-reader.c +++ b/src/language/data-io/data-reader.c @@ -1,49 +1,52 @@ -/* PSPP - computes sample statistics. +/* PSPP - a program for statistical analysis. Copyright (C) 1997-2004, 2006 Free Software Foundation, Inc. - Written by Ben Pfaff . - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include -#include "data-reader.h" + +#include + #include #include #include #include -#include "alloc.h" -#include "command.h" -#include "message.h" -#include "file-handle.h" -#include "file-handle-def.h" -#include "filename.h" -#include "line-buffer.h" -#include "lexer.h" -#include "str.h" -#include "procedure.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "minmax.h" +#include "size_max.h" #include "gettext.h" #define _(msgid) gettext (msgid) -#include "debug-print.h" - /* Flags for DFM readers. */ enum dfm_reader_flags { DFM_ADVANCE = 002, /* Read next line on dfm_get_record() call? */ - DFM_SAW_BEGIN_DATA = 004, /* For inline_file only, whether we've + DFM_SAW_BEGIN_DATA = 004, /* For inline_file only, whether we've already read a BEGIN DATA line. */ DFM_TABS_EXPANDED = 010, /* Tabs have been expanded. */ }; @@ -52,13 +55,14 @@ enum dfm_reader_flags struct dfm_reader { struct file_handle *fh; /* File handle. */ - struct file_locator where; /* Current location in data file. */ + struct msg_locator where; /* Current location in data file. */ struct string line; /* Current line. */ struct string scratch; /* Extra line buffer. */ enum dfm_reader_flags flags; /* Zero or more of DFM_*. */ - struct file_ext file; /* Associated file. */ + FILE *file; /* Associated file. */ size_t pos; /* Offset in line of current character. */ unsigned eof_cnt; /* # of attempts to advance past EOF. */ + struct lexer *lexer; /* The lexer reading the file */ }; /* Closes reader R opened by dfm_open_reader(). */ @@ -67,35 +71,37 @@ dfm_close_reader (struct dfm_reader *r) { int still_open; bool is_inline; + char *file_name; if (r == NULL) return; is_inline = r->fh == fh_inline_file (); + file_name = is_inline ? NULL : xstrdup (fh_get_file_name (r->fh)); still_open = fh_close (r->fh, "data file", "rs"); if (still_open) - return; - - if (!is_inline) { - fn_close_ext (&r->file); - free (r->file.filename); - r->file.filename = NULL; + free (file_name); + return; } + + if (!is_inline) + fn_close (file_name, r->file); else { /* Skip any remaining data on the inline file. */ - if (r->flags & DFM_SAW_BEGIN_DATA) + if (r->flags & DFM_SAW_BEGIN_DATA) { dfm_reread_record (r, 0); while (!dfm_eof (r)) - dfm_forward_record (r); + dfm_forward_record (r); } } ds_destroy (&r->line); ds_destroy (&r->scratch); free (r); + free (file_name); } /* Opens the file designated by file handle FH for reading as a @@ -104,7 +110,7 @@ dfm_close_reader (struct dfm_reader *r) file between BEGIN FILE and END FILE. Returns a reader if successful, or a null pointer otherwise. */ struct dfm_reader * -dfm_open_reader (struct file_handle *fh) +dfm_open_reader (struct file_handle *fh, struct lexer *lexer) { struct dfm_reader *r; void **rp; @@ -113,30 +119,24 @@ dfm_open_reader (struct file_handle *fh) if (rp == NULL) return NULL; if (*rp != NULL) - return *rp; - + return *rp; + r = xmalloc (sizeof *r); r->fh = fh; - ds_init (&r->line, 64); - ds_init (&r->scratch, 0); + r->lexer = lexer ; + ds_init_empty (&r->line); + ds_init_empty (&r->scratch); r->flags = DFM_ADVANCE; r->eof_cnt = 0; - if (fh != fh_inline_file ()) + if (fh != fh_inline_file ()) { - r->where.filename = fh_get_filename (fh); - r->where.line_number = 0; - r->file.file = NULL; - r->file.filename = xstrdup (fh_get_filename (r->fh)); - r->file.mode = "rb"; - r->file.file = NULL; - r->file.sequence_no = NULL; - r->file.param = NULL; - r->file.postopen = NULL; - r->file.preclose = NULL; - if (!fn_open_ext (&r->file)) + r->where.file_name = fh_get_file_name (fh); + r->where.line_number = 0; + r->file = fn_open (fh_get_file_name (fh), "rb"); + if (r->file == NULL) { msg (ME, _("Could not open \"%s\" for reading as a data file: %s."), - fh_get_filename (r->fh), strerror (errno)); + fh_get_file_name (r->fh), strerror (errno)); fh_close (fh,"data file", "rs"); free (r); return NULL; @@ -149,9 +149,9 @@ dfm_open_reader (struct file_handle *fh) /* Returns true if an I/O error occurred on READER, false otherwise. */ bool -dfm_reader_error (const struct dfm_reader *r) +dfm_reader_error (const struct dfm_reader *r) { - return fh_get_referent (r->fh) == FH_REF_FILE && ferror (r->file.file); + return fh_get_referent (r->fh) == FH_REF_FILE && ferror (r->file); } /* Reads a record from the inline file into R. @@ -163,14 +163,14 @@ read_inline_record (struct dfm_reader *r) { r->flags |= DFM_SAW_BEGIN_DATA; - while (token == '.') - lex_get (); - if (!lex_force_match_id ("BEGIN") || !lex_force_match_id ("DATA")) + while (lex_token (r->lexer) == '.') + lex_get (r->lexer); + if (!lex_force_match_id (r->lexer, "BEGIN") || !lex_force_match_id (r->lexer, "DATA")) return false; - getl_set_prompt_style (GETL_PROMPT_DATA); + prompt_set_style (PROMPT_DATA); } - - if (!getl_read_line (NULL)) + + if (!lex_get_line_raw (r->lexer)) { msg (SE, _("Unexpected end-of-file while reading data in BEGIN " "DATA. This probably indicates " @@ -180,14 +180,15 @@ read_inline_record (struct dfm_reader *r) return false; } - if (ds_length (&getl_buf) >= 8 - && !strncasecmp (ds_c_str (&getl_buf), "end data", 8)) + if (ds_length (lex_entire_line_ds (r->lexer) ) >= 8 + && !strncasecmp (lex_entire_line (r->lexer), "end data", 8)) { - lex_set_prog (ds_c_str (&getl_buf) + ds_length (&getl_buf)); + lex_discard_line (r->lexer); return false; } - ds_replace (&r->line, ds_c_str (&getl_buf)); + ds_assign_string (&r->line, lex_entire_line_ds (r->lexer) ); + return true; } @@ -197,30 +198,25 @@ static bool read_file_record (struct dfm_reader *r) { assert (r->fh != fh_inline_file ()); + ds_clear (&r->line); if (fh_get_mode (r->fh) == FH_MODE_TEXT) { - ds_clear (&r->line); - if (!ds_gets (&r->line, r->file.file)) + if (!ds_read_line (&r->line, r->file)) { - if (ferror (r->file.file)) + if (ferror (r->file)) msg (ME, _("Error reading file %s: %s."), fh_get_name (r->fh), strerror (errno)); return false; } + ds_chomp (&r->line, '\n'); } else if (fh_get_mode (r->fh) == FH_MODE_BINARY) { size_t record_width = fh_get_record_width (r->fh); - size_t amt; - - if (ds_length (&r->line) < record_width) - ds_rpad (&r->line, record_width, 0); - - amt = fread (ds_c_str (&r->line), 1, record_width, - r->file.file); + size_t amt = ds_read_stream (&r->line, 1, record_width, r->file); if (record_width != amt) { - if (ferror (r->file.file)) + if (ferror (r->file)) msg (ME, _("Error reading file %s: %s."), fh_get_name (r->fh), strerror (errno)); else if (amt != 0) @@ -231,7 +227,7 @@ read_file_record (struct dfm_reader *r) } } else - abort (); + NOT_REACHED (); r->where.line_number++; @@ -259,16 +255,16 @@ read_record (struct dfm_reader *r) an error message is issued, and the caller should more forcibly abort to avoid an infinite loop. */ unsigned -dfm_eof (struct dfm_reader *r) +dfm_eof (struct dfm_reader *r) { if (r->flags & DFM_ADVANCE) { r->flags &= ~DFM_ADVANCE; - if (r->eof_cnt == 0 && read_record (r)) + if (r->eof_cnt == 0 && read_record (r) ) { r->pos = 0; - return 0; + return 0; } r->eof_cnt++; @@ -287,18 +283,14 @@ dfm_eof (struct dfm_reader *r) /* Returns the current record in the file corresponding to HANDLE. Aborts if reading from the file is necessary or at - end of file, so call dfm_eof() first. Sets *LINE to the line, - which is not null-terminated. The caller must not free or - modify the returned string. */ -void -dfm_get_record (struct dfm_reader *r, struct fixed_string *line) + end of file, so call dfm_eof() first. */ +struct substring +dfm_get_record (struct dfm_reader *r) { assert ((r->flags & DFM_ADVANCE) == 0); assert (r->eof_cnt == 0); - assert (r->pos <= ds_length (&r->line)); - line->string = ds_data (&r->line) + r->pos; - line->length = ds_length (&r->line) - r->pos; + return ds_substr (&r->line, r->pos, SIZE_MAX); } /* Expands tabs in the current line into the equivalent number of @@ -306,14 +298,12 @@ dfm_get_record (struct dfm_reader *r, struct fixed_string *line) reading from the file is necessary or at end of file, so call dfm_eof() first.*/ void -dfm_expand_tabs (struct dfm_reader *r) +dfm_expand_tabs (struct dfm_reader *r) { - struct string temp; size_t ofs, new_pos, tab_width; assert ((r->flags & DFM_ADVANCE) == 0); assert (r->eof_cnt == 0); - assert (r->pos <= ds_length (&r->line)); if (r->flags & DFM_TABS_EXPANDED) return; @@ -322,41 +312,49 @@ dfm_expand_tabs (struct dfm_reader *r) if (r->fh != fh_inline_file () && (fh_get_mode (r->fh) == FH_MODE_BINARY || fh_get_tab_width (r->fh) == 0 - || memchr (ds_c_str (&r->line), '\t', ds_length (&r->line)) == NULL)) + || ds_find_char (&r->line, '\t') == SIZE_MAX)) return; /* Expand tabs from r->line into r->scratch, and figure out new value for r->pos. */ tab_width = fh_get_tab_width (r->fh); ds_clear (&r->scratch); - new_pos = 0; + new_pos = SIZE_MAX; for (ofs = 0; ofs < ds_length (&r->line); ofs++) { unsigned char c; - + if (ofs == r->pos) new_pos = ds_length (&r->scratch); - c = ds_c_str (&r->line)[ofs]; + c = ds_data (&r->line)[ofs]; if (c != '\t') - ds_putc (&r->scratch, c); - else + ds_put_char (&r->scratch, c); + else { do - ds_putc (&r->scratch, ' '); + ds_put_char (&r->scratch, ' '); while (ds_length (&r->scratch) % tab_width != 0); } } + if (new_pos == SIZE_MAX) + { + /* Maintain the same relationship between position and line + length that we had before. DATA LIST uses a + beyond-the-end position to deal with an empty field at + the end of the line. */ + assert (r->pos >= ds_length (&r->line)); + new_pos = (r->pos - ds_length (&r->line)) + ds_length (&r->scratch); + } /* Swap r->line and r->scratch and set new r->pos. */ - temp = r->line; - r->line = r->scratch; - r->scratch = temp; + ds_swap (&r->line, &r->scratch); r->pos = new_pos; } -/* Causes dfm_get_record() to read in the next record the next time it - is executed on file HANDLE. */ +/* Causes dfm_get_record() or dfm_get_whole_record() to read in + the next record the next time it is executed on file + HANDLE. */ void dfm_forward_record (struct dfm_reader *r) { @@ -370,12 +368,7 @@ void dfm_reread_record (struct dfm_reader *r, size_t column) { r->flags &= ~DFM_ADVANCE; - if (column < 1) - r->pos = 0; - else if (column > ds_length (&r->line)) - r->pos = ds_length (&r->line); - else - r->pos = column - 1; + r->pos = MAX (column, 1) - 1; } /* Sets the current line to begin COLUMNS characters following @@ -390,32 +383,49 @@ dfm_forward_columns (struct dfm_reader *r, size_t columns) is set. Unless dfm_reread_record() or dfm_forward_columns() have been called, this is 1. */ size_t -dfm_column_start (struct dfm_reader *r) +dfm_column_start (const struct dfm_reader *r) { return r->pos + 1; } -/* Pushes the filename and line number on the fn/ln stack. */ +/* Returns the number of columns we are currently beyond the end + of the line. At or before end-of-line, this is 0; one column + after end-of-line, this is 1; and so on. */ +size_t +dfm_columns_past_end (const struct dfm_reader *r) +{ + return r->pos < ds_length (&r->line) ? 0 : ds_length (&r->line) - r->pos; +} + +/* Returns the 1-based column within the current line that P + designates. */ +size_t +dfm_get_column (const struct dfm_reader *r, const char *p) +{ + return ds_pointer_to_position (&r->line, p) + 1; +} + +/* Pushes the file name and line number on the fn/ln stack. */ void dfm_push (struct dfm_reader *r) { if (r->fh != fh_inline_file ()) - err_push_file_locator (&r->where); + msg_push_msg_locator (&r->where); } -/* Pops the filename and line number from the fn/ln stack. */ +/* Pops the file name and line number from the fn/ln stack. */ void dfm_pop (struct dfm_reader *r) { if (r->fh != fh_inline_file ()) - err_pop_file_locator (&r->where); + msg_pop_msg_locator (&r->where); } /* BEGIN DATA...END DATA procedure. */ /* Perform BEGIN DATA...END DATA as a procedure in itself. */ int -cmd_begin_data (void) +cmd_begin_data (struct lexer *lexer, struct dataset *ds) { struct dfm_reader *r; bool ok; @@ -428,13 +438,13 @@ cmd_begin_data (void) } /* Open inline file. */ - r = dfm_open_reader (fh_inline_file ()); + r = dfm_open_reader (fh_inline_file (), lexer); r->flags |= DFM_SAW_BEGIN_DATA; /* Input procedure reads from inline file. */ - getl_set_prompt_style (GETL_PROMPT_DATA); - ok = procedure (NULL, NULL); - + prompt_set_style (PROMPT_DATA); + casereader_destroy (proc_open (ds)); + ok = proc_commit (ds); dfm_close_reader (r); return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;