/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-2004, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1997-2004, 2006, 2010, 2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include <language/data-io/data-reader.h>
+#include "language/data-io/data-reader.h"
#include <ctype.h>
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
-
-#include <data/casereader.h>
-#include <data/file-handle-def.h>
-#include <data/file-name.h>
-#include <data/procedure.h>
-#include <language/command.h>
-#include <language/data-io/file-handle.h>
-#include <language/lexer/lexer.h>
-#include <language/prompt.h>
-#include <libpspp/assertion.h>
-#include <libpspp/integer-format.h>
-#include <libpspp/message.h>
-#include <libpspp/str.h>
-
-#include "minmax.h"
-#include "xalloc.h"
+#include <sys/stat.h>
+
+#include "data/casereader.h"
+#include "data/file-handle-def.h"
+#include "data/file-name.h"
+#include "data/procedure.h"
+#include "language/command.h"
+#include "language/data-io/file-handle.h"
+#include "language/lexer/lexer.h"
+#include "language/prompt.h"
+#include "libpspp/assertion.h"
+#include "libpspp/cast.h"
+#include "libpspp/integer-format.h"
+#include "libpspp/message.h"
+#include "libpspp/str.h"
+
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
struct string scratch; /* Extra line buffer. */
enum dfm_reader_flags flags; /* Zero or more of DFM_*. */
FILE *file; /* Associated file. */
+ off_t file_size; /* File size, or -1 if unavailable. */
size_t pos; /* Offset in line of current character. */
unsigned eof_cnt; /* # of attempts to advance past EOF. */
struct lexer *lexer; /* The lexer reading the file */
r->block_left = 0;
if (fh_get_referent (fh) != FH_REF_INLINE)
{
- r->where.file_name = fh_get_file_name (fh);
+ struct stat s;
+ r->where.file_name = CONST_CAST (char *, fh_get_file_name (fh));
r->where.line_number = 0;
- r->file = fn_open (fh_get_file_name (fh),
- fh_get_mode (fh) == FH_MODE_TEXT ? "r" : "rb");
+ r->file = fn_open (fh_get_file_name (fh), "rb");
if (r->file == NULL)
{
- msg (ME, _("Could not open \"%s\" for reading as a data file: %s."),
+ msg (ME, _("Could not open `%s' for reading as a data file: %s."),
fh_get_file_name (r->fh), strerror (errno));
fh_unlock (r->lock);
fh_unref (fh);
free (r);
return NULL;
}
+ r->file_size = fstat (fileno (r->file), &s) == 0 ? s.st_size : -1;
}
+ else
+ r->file_size = -1;
fh_lock_set_aux (lock, r);
return r;
{
r->flags |= DFM_SAW_BEGIN_DATA;
- while (lex_token (r->lexer) == '.')
+ while (lex_token (r->lexer) == T_ENDCMD)
lex_get (r->lexer);
if (!lex_force_match_id (r->lexer, "BEGIN") || !lex_force_match_id (r->lexer, "DATA"))
return false;
if (!lex_get_line_raw (r->lexer))
{
+ lex_discard_line (r->lexer);
msg (SE, _("Unexpected end-of-file while reading data in BEGIN "
"DATA. This probably indicates "
- "a missing or misformatted END DATA command. "
+ "a missing or incorrectly formatted END DATA command. "
"END DATA must appear by itself on a single line "
"with exactly one space between words."));
return false;
switch (fh_get_mode (r->fh))
{
case FH_MODE_TEXT:
- if (ds_read_line (&r->line, r->file))
+ if (ds_read_line (&r->line, r->file, SIZE_MAX))
{
ds_chomp (&r->line, '\n');
return true;
read_error (r);
return false;
}
- return true;
case FH_MODE_FIXED:
if (ds_read_stream (&r->line, 1, fh_get_record_width (r->fh), r->file))
partial_record (r);
return false;
}
- return true;
case FH_MODE_VARIABLE:
{
if (r->fh != fh_inline_file ()
&& (fh_get_mode (r->fh) != FH_MODE_TEXT
|| fh_get_tab_width (r->fh) == 0
- || ds_find_char (&r->line, '\t') == SIZE_MAX))
+ || ds_find_byte (&r->line, '\t') == SIZE_MAX))
return;
/* Expand tabs from r->line into r->scratch, and figure out
c = ds_data (&r->line)[ofs];
if (c != '\t')
- ds_put_char (&r->scratch, c);
+ ds_put_byte (&r->scratch, c);
else
{
do
- ds_put_char (&r->scratch, ' ');
+ ds_put_byte (&r->scratch, ' ');
while (ds_length (&r->scratch) % tab_width != 0);
}
}
}
/* Returns the legacy character encoding of data read from READER. */
-enum legacy_encoding
+const char *
dfm_reader_get_legacy_encoding (const struct dfm_reader *reader)
{
return fh_get_legacy_encoding (reader->fh);
}
+/* Returns a number between 0 and 100 that approximates the
+ percentage of the data in READER that has already been read,
+ or -1 if this value cannot be estimated.
+
+ ftello is slow in glibc (it flushes the read buffer), so don't
+ call this function unless you need to. */
+int
+dfm_get_percent_read (const struct dfm_reader *reader)
+{
+ if (reader->file_size >= 0)
+ {
+ off_t position = ftello (reader->file);
+ if (position >= 0)
+ {
+ double p = 100.0 * position / reader->file_size;
+ return p < 0 ? 0 : p > 100 ? 100 : p;
+ }
+ }
+ return -1;
+}
+
/* Causes dfm_get_record() or dfm_get_whole_record() to read in
the next record the next time it is executed on file
HANDLE. */
return ds_pointer_to_position (&r->line, p) + 1;
}
-/* Pushes the file name and line number on the fn/ln stack. */
-void
-dfm_push (struct dfm_reader *r)
+const char *
+dfm_get_file_name (const struct dfm_reader *r)
{
- if (r->fh != fh_inline_file ())
- msg_push_msg_locator (&r->where);
+ return fh_get_referent (r->fh) == FH_REF_FILE ? r->where.file_name : NULL;
}
-/* Pops the file name and line number from the fn/ln stack. */
-void
-dfm_pop (struct dfm_reader *r)
+int
+dfm_get_line_number (const struct dfm_reader *r)
{
- if (r->fh != fh_inline_file ())
- msg_pop_msg_locator (&r->where);
+ return fh_get_referent (r->fh) == FH_REF_FILE ? r->where.line_number : -1;
}
\f
/* BEGIN DATA...END DATA procedure. */