X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fdata-reader.c;h=2cec413b759e24fdf9aa8554b4e02f0f323168de;hb=81579d9e9f994fb2908f50af41c3eb033d216e58;hp=e3ef8ca15be6b841de239d6d578501b5c7d26b47;hpb=d0371553a98cd169353bf6d211e375e5ffc3a3bd;p=pspp-builds.git diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c index e3ef8ca1..2cec413b 100644 --- a/src/language/data-io/data-reader.c +++ b/src/language/data-io/data-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-2004, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-2004, 2006, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,29 +16,31 @@ #include -#include +#include "language/data-io/data-reader.h" #include #include #include #include #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "minmax.h" -#include "xalloc.h" +#include + +#include "data/casereader.h" +#include "data/file-handle-def.h" +#include "data/file-name.h" +#include "data/procedure.h" +#include "language/command.h" +#include "language/data-io/file-handle.h" +#include "language/lexer/lexer.h" +#include "language/prompt.h" +#include "libpspp/assertion.h" +#include "libpspp/cast.h" +#include "libpspp/integer-format.h" +#include "libpspp/message.h" +#include "libpspp/str.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -63,6 +65,7 @@ struct dfm_reader struct string scratch; /* Extra line buffer. */ enum dfm_reader_flags flags; /* Zero or more of DFM_*. */ FILE *file; /* Associated file. */ + off_t file_size; /* File size, or -1 if unavailable. */ size_t pos; /* Offset in line of current character. */ unsigned eof_cnt; /* # of attempts to advance past EOF. */ struct lexer *lexer; /* The lexer reading the file */ @@ -137,20 +140,23 @@ dfm_open_reader (struct file_handle *fh, struct lexer *lexer) r->block_left = 0; if (fh_get_referent (fh) != FH_REF_INLINE) { - r->where.file_name = fh_get_file_name (fh); + struct stat s; + r->where.file_name = CONST_CAST (char *, fh_get_file_name (fh)); r->where.line_number = 0; - r->file = fn_open (fh_get_file_name (fh), - fh_get_mode (fh) == FH_MODE_TEXT ? "r" : "rb"); + r->file = fn_open (fh_get_file_name (fh), "rb"); if (r->file == NULL) { - msg (ME, _("Could not open \"%s\" for reading as a data file: %s."), + msg (ME, _("Could not open `%s' for reading as a data file: %s."), fh_get_file_name (r->fh), strerror (errno)); fh_unlock (r->lock); fh_unref (fh); free (r); return NULL; } + r->file_size = fstat (fileno (r->file), &s) == 0 ? s.st_size : -1; } + else + r->file_size = -1; fh_lock_set_aux (lock, r); return r; @@ -172,7 +178,7 @@ read_inline_record (struct dfm_reader *r) { r->flags |= DFM_SAW_BEGIN_DATA; - while (lex_token (r->lexer) == '.') + while (lex_token (r->lexer) == T_ENDCMD) lex_get (r->lexer); if (!lex_force_match_id (r->lexer, "BEGIN") || !lex_force_match_id (r->lexer, "DATA")) return false; @@ -181,9 +187,10 @@ read_inline_record (struct dfm_reader *r) if (!lex_get_line_raw (r->lexer)) { + lex_discard_line (r->lexer); msg (SE, _("Unexpected end-of-file while reading data in BEGIN " "DATA. This probably indicates " - "a missing or misformatted END DATA command. " + "a missing or incorrectly formatted END DATA command. " "END DATA must appear by itself on a single line " "with exactly one space between words.")); return false; @@ -334,7 +341,7 @@ read_file_record (struct dfm_reader *r) switch (fh_get_mode (r->fh)) { case FH_MODE_TEXT: - if (ds_read_line (&r->line, r->file)) + if (ds_read_line (&r->line, r->file, SIZE_MAX)) { ds_chomp (&r->line, '\n'); return true; @@ -549,7 +556,7 @@ dfm_expand_tabs (struct dfm_reader *r) if (r->fh != fh_inline_file () && (fh_get_mode (r->fh) != FH_MODE_TEXT || fh_get_tab_width (r->fh) == 0 - || ds_find_char (&r->line, '\t') == SIZE_MAX)) + || ds_find_byte (&r->line, '\t') == SIZE_MAX)) return; /* Expand tabs from r->line into r->scratch, and figure out @@ -566,11 +573,11 @@ dfm_expand_tabs (struct dfm_reader *r) c = ds_data (&r->line)[ofs]; if (c != '\t') - ds_put_char (&r->scratch, c); + ds_put_byte (&r->scratch, c); else { do - ds_put_char (&r->scratch, ' '); + ds_put_byte (&r->scratch, ' '); while (ds_length (&r->scratch) % tab_width != 0); } } @@ -590,12 +597,33 @@ dfm_expand_tabs (struct dfm_reader *r) } /* Returns the legacy character encoding of data read from READER. */ -enum legacy_encoding +const char * dfm_reader_get_legacy_encoding (const struct dfm_reader *reader) { return fh_get_legacy_encoding (reader->fh); } +/* Returns a number between 0 and 100 that approximates the + percentage of the data in READER that has already been read, + or -1 if this value cannot be estimated. + + ftello is slow in glibc (it flushes the read buffer), so don't + call this function unless you need to. */ +int +dfm_get_percent_read (const struct dfm_reader *reader) +{ + if (reader->file_size >= 0) + { + off_t position = ftello (reader->file); + if (position >= 0) + { + double p = 100.0 * position / reader->file_size; + return p < 0 ? 0 : p > 100 ? 100 : p; + } + } + return -1; +} + /* Causes dfm_get_record() or dfm_get_whole_record() to read in the next record the next time it is executed on file HANDLE. */ @@ -649,20 +677,16 @@ dfm_get_column (const struct dfm_reader *r, const char *p) return ds_pointer_to_position (&r->line, p) + 1; } -/* Pushes the file name and line number on the fn/ln stack. */ -void -dfm_push (struct dfm_reader *r) +const char * +dfm_get_file_name (const struct dfm_reader *r) { - if (r->fh != fh_inline_file ()) - msg_push_msg_locator (&r->where); + return fh_get_referent (r->fh) == FH_REF_FILE ? r->where.file_name : NULL; } -/* Pops the file name and line number from the fn/ln stack. */ -void -dfm_pop (struct dfm_reader *r) +int +dfm_get_line_number (const struct dfm_reader *r) { - if (r->fh != fh_inline_file ()) - msg_pop_msg_locator (&r->where); + return fh_get_referent (r->fh) == FH_REF_FILE ? r->where.line_number : -1; } /* BEGIN DATA...END DATA procedure. */