/* PSPP - a program for statistical analysis.
- Copyright (C) 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013, 2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
const struct dictionary *dict; /*Dictionary of destination */
enum data_parser_type type; /* Type of data to parse. */
int skip_records; /* Records to skip before first real data. */
- casenumber max_cases; /* Max number of cases to read. */
- int percent_cases; /* Approximate percent of cases to read. */
struct field *fields; /* Fields to parse. */
size_t field_cnt; /* Number of fields. */
/* DP_DELIMITED parsers only. */
bool span; /* May cases span multiple records? */
bool empty_line_has_field; /* Does an empty line have an (empty) field? */
+ bool warn_missing_fields; /* Should missing fields be considered errors? */
struct substring quotes; /* Characters that can quote separators. */
bool quote_escape; /* Doubled quote acts as escape? */
struct substring soft_seps; /* Two soft separators act like just one. */
parser->type = DP_FIXED;
parser->skip_records = 0;
- parser->max_cases = -1;
- parser->percent_cases = 100;
parser->fields = NULL;
parser->field_cnt = 0;
parser->span = true;
parser->empty_line_has_field = false;
+ parser->warn_missing_fields = true;
ss_alloc_substring (&parser->quotes, ss_cstr ("\"'"));
parser->quote_escape = false;
ss_alloc_substring (&parser->soft_seps, ss_cstr (CC_SPACES));
parser->skip_records = initial_records_to_skip;
}
-/* Sets the maximum number of cases parsed by PARSER to
- MAX_CASES. The default is -1, meaning no limit. */
-void
-data_parser_set_case_limit (struct data_parser *parser, casenumber max_cases)
-{
- parser->max_cases = max_cases;
-}
-
-/* Sets the percentage of cases that PARSER should read from the
- input file to PERCENT_CASES. By default, all cases are
- read. */
-void
-data_parser_set_case_percent (struct data_parser *parser, int percent_cases)
-{
- assert (percent_cases >= 0 && percent_cases <= 100);
- parser->percent_cases = percent_cases;
-}
-
/* Returns true if PARSER is configured to allow cases to span
multiple records. */
bool
parser->empty_line_has_field = empty_line_has_field;
}
+
+/* If WARN_MISSING_FIELDS is true, configures PARSER to emit a warning
+ and cause an error condition when a missing field is encountered.
+ If WARN_MISSING_FIELDS is false, PARSER will silently fill such
+ fields with the system missing value.
+
+ This setting affects parsing of DP_DELIMITED files only. */
+void
+data_parser_set_warn_missing_fields (struct data_parser *parser,
+ bool warn_missing_fields)
+{
+ parser->warn_missing_fields = warn_missing_fields;
+}
+
+
/* Sets the characters that may be used for quoting field
contents to QUOTES. If QUOTES is empty, quoting will be
disabled.
}
/* Limit cases. */
- if (parser->max_cases != -1 && parser->max_cases-- == 0)
- return false;
- if (parser->percent_cases < 100
- && dfm_get_percent_read (reader) >= parser->percent_cases)
- return false;
-
if (parser->type == DP_DELIMITED)
{
if (parser->span)
*FIELD is set to the field content. The caller must not or
destroy this constant string.
- After parsing the field, sets the current position in the
- record to just past the field and any trailing delimiter.
- Returns 0 on failure or a 1-based column number indicating the
- beginning of the field on success. */
+ Sets *FIRST_COLUMN to the 1-based column number of the start of
+ the extracted field, and *LAST_COLUMN to the end of the extracted
+ field.
+
+ Returns true on success, false on failure. */
static bool
cut_field (const struct data_parser *parser, struct dfm_reader *reader,
int *first_column, int *last_column, struct string *tmp,
struct substring *field)
{
+ size_t length_before_separators;
struct substring line, p;
+ bool quoted;
if (dfm_eof (reader))
return false;
}
*first_column = dfm_column_start (reader);
- if (ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX)
+ quoted = ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX;
+ if (quoted)
{
/* Quoted field. */
int quote = ss_get_byte (&p);
if (!ss_get_until (&p, quote, field))
- msg (SW, _("Quoted string extends beyond end of line."));
+ msg (DW, _("Quoted string extends beyond end of line."));
if (parser->quote_escape && ss_first (p) == quote)
{
ds_assign_substring (tmp, *field);
struct substring ss;
ds_put_byte (tmp, quote);
if (!ss_get_until (&p, quote, &ss))
- msg (SW, _("Quoted string extends beyond end of line."));
+ msg (DW, _("Quoted string extends beyond end of line."));
ds_put_substring (tmp, ss);
}
*field = ds_ss (tmp);
}
*last_column = *first_column + (ss_length (line) - ss_length (p));
-
- /* Skip trailing soft separator and a single hard separator
- if present. */
- ss_ltrim (&p, parser->soft_seps);
- if (!ss_is_empty (p)
- && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX)
- ss_advance (&p, 1);
}
else
{
/* Regular field. */
ss_get_bytes (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field);
*last_column = *first_column + ss_length (*field);
+ }
- if (!ss_ltrim (&p, parser->soft_seps) || ss_is_empty (p)
- || ss_find_byte (parser->hard_seps, p.string[0]) != SIZE_MAX)
- {
- /* Advance past a trailing hard separator,
- regardless of whether one actually existed. If
- we "skip" a delimiter that was not actually
- there, then we will return end-of-line on our
- next call, which is what we want. */
- dfm_forward_columns (reader, 1);
- }
+ /* Skip trailing soft separator and a single hard separator if present. */
+ length_before_separators = ss_length (p);
+ ss_ltrim (&p, parser->soft_seps);
+ if (!ss_is_empty (p)
+ && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX)
+ {
+ ss_advance (&p, 1);
+ ss_ltrim (&p, parser->soft_seps);
}
+ if (ss_is_empty (p))
+ dfm_forward_columns (reader, 1);
+ else if (quoted && length_before_separators == ss_length (p))
+ msg (DW, _("Missing delimiter following quoted string."));
dfm_forward_columns (reader, ss_length (line) - ss_length (p));
return true;
if (dfm_eof (reader))
{
- msg (SW, _("Partial case of %d of %d records discarded."),
+ msg (DW, _("Partial case of %d of %d records discarded."),
row - 1, parser->records_per_case);
return false;
}
parse_delimited_span (const struct data_parser *parser,
struct dfm_reader *reader, struct ccase *c)
{
- const char *input_encoding = dfm_reader_get_encoding (reader);
const char *output_encoding = dict_get_encoding (parser->dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct field *f;
if (dfm_eof (reader))
{
if (f > parser->fields)
- msg (SW, _("Partial case discarded. The first variable "
+ msg (DW, _("Partial case discarded. The first variable "
"missing was %s."), f->name);
ds_destroy (&tmp);
return false;
}
}
+ const char *input_encoding = dfm_reader_get_encoding (reader);
error = data_in (s, input_encoding, f->format.type,
case_data_rw_idx (c, f->case_idx),
fmt_var_width (&f->format), output_encoding);
parse_delimited_no_span (const struct data_parser *parser,
struct dfm_reader *reader, struct ccase *c)
{
- const char *input_encoding = dfm_reader_get_encoding (reader);
const char *output_encoding = dict_get_encoding (parser->dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct substring s;
if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s))
{
- if (f < end - 1 && settings_get_undefined ())
- msg (SW, _("Missing value(s) for all variables from %s onward. "
+ if (f < end - 1 && settings_get_undefined () && parser->warn_missing_fields)
+ msg (DW, _("Missing value(s) for all variables from %s onward. "
"These will be filled with the system-missing value "
"or blanks, as appropriate."),
f->name);
goto exit;
}
+ const char *input_encoding = dfm_reader_get_encoding (reader);
error = data_in (s, input_encoding, f->format.type,
case_data_rw_idx (c, f->case_idx),
fmt_var_width (&f->format), output_encoding);
s = dfm_get_record (reader);
ss_ltrim (&s, parser->soft_seps);
if (!ss_is_empty (s))
- msg (SW, _("Record ends in data not part of any field."));
+ msg (DW, _("Record ends in data not part of any field."));
exit:
dfm_forward_record (reader);
transferred to the dataset. */
void
data_parser_make_active_file (struct data_parser *parser, struct dataset *ds,
- struct dfm_reader *reader,
- struct dictionary *dict)
+ struct dfm_reader *reader,
+ struct dictionary *dict,
+ struct casereader* (*func)(struct casereader *,
+ const struct dictionary *,
+ void *),
+ void *ud)
{
struct data_parser_casereader *r;
- struct casereader *casereader;
+ struct casereader *casereader0;
+ struct casereader *casereader1;
r = xmalloc (sizeof *r);
r->parser = parser;
r->reader = reader;
r->proto = caseproto_ref (dict_get_proto (dict));
- casereader = casereader_create_sequential (NULL, r->proto,
+ casereader0 = casereader_create_sequential (NULL, r->proto,
CASENUMBER_MAX,
&data_parser_casereader_class, r);
+
+ if (func)
+ casereader1 = func (casereader0, dict, ud);
+ else
+ casereader1 = casereader0;
+
dataset_set_dict (ds, dict);
- dataset_set_source (ds, casereader);
+ dataset_set_source (ds, casereader1);
}
+
static struct ccase *
data_parser_casereader_read (struct casereader *reader UNUSED, void *r_)
{