/* PSPP - a program for statistical analysis.
- Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include <language/data-io/data-parser.h>
+#include "language/data-io/data-parser.h"
#include <stdint.h>
#include <stdlib.h>
-#include <data/casereader-provider.h>
-#include <data/data-in.h>
-#include <data/dictionary.h>
-#include <data/format.h>
-#include <data/file-handle-def.h>
-#include <data/procedure.h>
-#include <data/settings.h>
-#include <language/data-io/data-reader.h>
-#include <libpspp/message.h>
-#include <libpspp/str.h>
-#include <output/tab.h>
+#include "data/casereader-provider.h"
+#include "data/data-in.h"
+#include "data/dataset.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/file-handle-def.h"
+#include "data/settings.h"
+#include "language/data-io/data-reader.h"
+#include "libpspp/message.h"
+#include "libpspp/str.h"
+#include "output/tab.h"
-#include "xalloc.h"
+#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
int *first_column, int *last_column, struct string *tmp,
struct substring *field)
{
+ size_t length_before_separators;
struct substring line, p;
+ bool quoted;
if (dfm_eof (reader))
return false;
}
*first_column = dfm_column_start (reader);
- if (ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX)
+ quoted = ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX;
+ if (quoted)
{
/* Quoted field. */
int quote = ss_get_byte (&p);
if (!ss_get_until (&p, quote, field))
- msg (SW, _("Quoted string extends beyond end of line."));
+ msg (DW, _("Quoted string extends beyond end of line."));
if (parser->quote_escape && ss_first (p) == quote)
{
ds_assign_substring (tmp, *field);
struct substring ss;
ds_put_byte (tmp, quote);
if (!ss_get_until (&p, quote, &ss))
- msg (SW, _("Quoted string extends beyond end of line."));
+ msg (DW, _("Quoted string extends beyond end of line."));
ds_put_substring (tmp, ss);
}
*field = ds_ss (tmp);
}
*last_column = *first_column + (ss_length (line) - ss_length (p));
-
- /* Skip trailing soft separator and a single hard separator
- if present. */
- ss_ltrim (&p, parser->soft_seps);
- if (!ss_is_empty (p)
- && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX)
- ss_advance (&p, 1);
}
else
{
/* Regular field. */
ss_get_bytes (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field);
*last_column = *first_column + ss_length (*field);
+ }
- if (!ss_ltrim (&p, parser->soft_seps) || ss_is_empty (p)
- || ss_find_byte (parser->hard_seps, p.string[0]) != SIZE_MAX)
- {
- /* Advance past a trailing hard separator,
- regardless of whether one actually existed. If
- we "skip" a delimiter that was not actually
- there, then we will return end-of-line on our
- next call, which is what we want. */
- dfm_forward_columns (reader, 1);
- }
+ /* Skip trailing soft separator and a single hard separator if present. */
+ length_before_separators = ss_length (p);
+ ss_ltrim (&p, parser->soft_seps);
+ if (!ss_is_empty (p)
+ && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX)
+ {
+ ss_advance (&p, 1);
+ ss_ltrim (&p, parser->soft_seps);
}
+ if (ss_is_empty (p))
+ dfm_forward_columns (reader, 1);
+ else if (quoted && length_before_separators == ss_length (p))
+ msg (DW, _("Missing delimiter following quoted string."));
dfm_forward_columns (reader, ss_length (line) - ss_length (p));
return true;
m.category = MSG_C_DATA;
m.severity = MSG_S_WARNING;
- m.where.file_name = CONST_CAST (char *, dfm_get_file_name (reader));
- m.where.line_number = dfm_get_line_number (reader);
- m.where.first_column = first_column;
- m.where.last_column = last_column;
+ m.file_name = CONST_CAST (char *, dfm_get_file_name (reader));
+ m.first_line = dfm_get_line_number (reader);
+ m.last_line = m.first_line + 1;
+ m.first_column = first_column;
+ m.last_column = last_column;
m.text = xasprintf (_("Data for variable %s is not valid as format %s: %s"),
field->name, fmt_name (field->format.type), error);
msg_emit (&m);
parse_fixed (const struct data_parser *parser, struct dfm_reader *reader,
struct ccase *c)
{
- const char *input_encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *input_encoding = dfm_reader_get_encoding (reader);
const char *output_encoding = dict_get_encoding (parser->dict);
struct field *f;
int row;
if (dfm_eof (reader))
{
- msg (SW, _("Partial case of %d of %d records discarded."),
+ msg (DW, _("Partial case of %d of %d records discarded."),
row - 1, parser->records_per_case);
return false;
}
parse_delimited_span (const struct data_parser *parser,
struct dfm_reader *reader, struct ccase *c)
{
- const char *input_encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *input_encoding = dfm_reader_get_encoding (reader);
const char *output_encoding = dict_get_encoding (parser->dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct field *f;
if (dfm_eof (reader))
{
if (f > parser->fields)
- msg (SW, _("Partial case discarded. The first variable "
+ msg (DW, _("Partial case discarded. The first variable "
"missing was %s."), f->name);
ds_destroy (&tmp);
return false;
parse_delimited_no_span (const struct data_parser *parser,
struct dfm_reader *reader, struct ccase *c)
{
- const char *input_encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *input_encoding = dfm_reader_get_encoding (reader);
const char *output_encoding = dict_get_encoding (parser->dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct substring s;
if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s))
{
if (f < end - 1 && settings_get_undefined ())
- msg (SW, _("Missing value(s) for all variables from %s onward. "
+ msg (DW, _("Missing value(s) for all variables from %s onward. "
"These will be filled with the system-missing value "
"or blanks, as appropriate."),
f->name);
s = dfm_get_record (reader);
ss_ltrim (&s, parser->soft_seps);
if (!ss_is_empty (s))
- msg (SW, _("Record ends in data not part of any field."));
+ msg (DW, _("Record ends in data not part of any field."));
exit:
dfm_forward_record (reader);
static const struct casereader_class data_parser_casereader_class;
-/* Replaces DS's active file by an input program that reads data
+/* Replaces DS's active dataset by an input program that reads data
from READER according to the rules in PARSER, using DICT as
the underlying dictionary. Ownership of PARSER and READER is
transferred to the input program, and ownership of DICT is
casereader = casereader_create_sequential (NULL, r->proto,
CASENUMBER_MAX,
&data_parser_casereader_class, r);
- proc_set_active_file (ds, casereader, dict);
+ dataset_set_dict (ds, dict);
+ dataset_set_source (ds, casereader);
}
static struct ccase *