/* PSPP - a program for statistical analysis.
- Copyright (C) 2007 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2009 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
bool span; /* May cases span multiple records? */
bool empty_line_has_field; /* Does an empty line have an (empty) field? */
struct substring quotes; /* Characters that can quote separators. */
+ bool quote_escape; /* Doubled quote acts as escape? */
struct substring soft_seps; /* Two soft separators act like just one. */
struct substring hard_seps; /* Two hard separators yield empty fields. */
struct string any_sep; /* Concatenation of soft_seps and hard_seps. */
parser->span = true;
parser->empty_line_has_field = false;
ss_alloc_substring (&parser->quotes, ss_cstr ("\"'"));
+ parser->quote_escape = false;
ss_alloc_substring (&parser->soft_seps, ss_cstr (CC_SPACES));
ss_alloc_substring (&parser->hard_seps, ss_cstr (","));
ds_init_empty (&parser->any_sep);
ss_alloc_substring (&parser->quotes, quotes);
}
+/* If ESCAPE is false (the default setting), a character used for
+ quoting cannot itself be embedded within a quoted field. If
+ ESCAPE is true, then a quote character can be embedded within
+ a quoted field by doubling it.
+
+ This setting affects parsing of DP_DELIMITED files only, and
+ only when at least one quote character has been set (with
+ data_parser_set_quotes). */
+void
+data_parser_set_quote_escape (struct data_parser *parser, bool escape)
+{
+ parser->quote_escape = escape;
+}
+
/* Sets PARSER's soft delimiters to DELIMITERS. Soft delimiters
separate fields, but consecutive soft delimiters do not yield
empty fields. (Ordinarily, only white space characters are
static bool parse_fixed (const struct data_parser *,
struct dfm_reader *, struct ccase *);
-/* Reads a case from DFM into C, parsing it with PARSER.
- Returns true if successful, false at end of file or on I/O error. */
+/* Reads a case from DFM into C, parsing it with PARSER. Returns
+ true if successful, false at end of file or on I/O error.
+
+ Case C must not be shared. */
bool
data_parser_parse (struct data_parser *parser, struct dfm_reader *reader,
struct ccase *c)
{
bool retval;
+ assert (!case_is_shared (c));
assert (data_parser_any_fields (parser));
/* Skip the requested number of records before reading the
beginning of the field on success. */
static bool
cut_field (const struct data_parser *parser, struct dfm_reader *reader,
+ int *first_column, int *last_column, struct string *tmp,
struct substring *field)
{
struct substring line, p;
else
{
*field = p;
+ *first_column = dfm_column_start (reader);
+ *last_column = *first_column + 1;
dfm_forward_columns (reader, 1);
return true;
}
}
+ *first_column = dfm_column_start (reader);
if (ss_find_char (parser->quotes, ss_first (p)) != SIZE_MAX)
{
/* Quoted field. */
- if (!ss_get_until (&p, ss_get_char (&p), field))
+ int quote = ss_get_char (&p);
+ if (!ss_get_until (&p, quote, field))
msg (SW, _("Quoted string extends beyond end of line."));
+ if (parser->quote_escape && ss_first (p) == quote)
+ {
+ ds_assign_substring (tmp, *field);
+ while (ss_match_char (&p, quote))
+ {
+ struct substring ss;
+ ds_put_char (tmp, quote);
+ if (!ss_get_until (&p, quote, &ss))
+ msg (SW, _("Quoted string extends beyond end of line."));
+ ds_put_substring (tmp, ss);
+ }
+ *field = ds_ss (tmp);
+ }
+ *last_column = dfm_column_start (reader);
/* Skip trailing soft separator and a single hard separator
if present. */
{
/* Regular field. */
ss_get_chars (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field);
+ *last_column = dfm_column_start (reader);
if (!ss_ltrim (&p, parser->soft_seps) || ss_is_empty (p))
{
/* Advance past a trailing hard separator,
data_in (ss_substr (line, f->first_column - 1,
f->format.w),
encoding, f->format.type, f->format.d,
- f->first_column, case_data_rw_idx (c, f->case_idx),
+ f->first_column, f->first_column + f->format.w,
+ case_data_rw_idx (c, f->case_idx),
fmt_var_width (&f->format));
dfm_forward_record (reader);
struct dfm_reader *reader, struct ccase *c)
{
enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader);
+ struct string tmp = DS_EMPTY_INITIALIZER;
struct field *f;
for (f = parser->fields; f < &parser->fields[parser->field_cnt]; f++)
{
struct substring s;
+ int first_column, last_column;
/* Cut out a field and read in a new record if necessary. */
- while (!cut_field (parser, reader, &s))
+ while (!cut_field (parser, reader,
+ &first_column, &last_column, &tmp, &s))
{
if (!dfm_eof (reader))
dfm_forward_record (reader);
if (f > parser->fields)
msg (SW, _("Partial case discarded. The first variable "
"missing was %s."), f->name);
+ ds_destroy (&tmp);
return false;
}
}
data_in (s, encoding, f->format.type, 0,
- dfm_get_column (reader, ss_data (s)),
+ first_column, last_column,
case_data_rw_idx (c, f->case_idx),
fmt_var_width (&f->format));
}
+ ds_destroy (&tmp);
return true;
}
struct dfm_reader *reader, struct ccase *c)
{
enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader);
+ struct string tmp = DS_EMPTY_INITIALIZER;
struct substring s;
struct field *f;
for (f = parser->fields; f < &parser->fields[parser->field_cnt]; f++)
{
- if (!cut_field (parser, reader, &s))
+ int first_column, last_column;
+ if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s))
{
if (settings_get_undefined ())
msg (SW, _("Missing value(s) for all variables from %s onward. "
"or blanks, as appropriate."),
f->name);
for (; f < &parser->fields[parser->field_cnt]; f++)
- {
- int width = fmt_var_width (&f->format);
- if (width == 0)
- case_data_rw_idx (c, f->case_idx)->f = SYSMIS;
- else
- memset (case_data_rw_idx (c, f->case_idx)->s, ' ', width);
- }
+ value_set_missing (case_data_rw_idx (c, f->case_idx),
+ fmt_var_width (&f->format));
goto exit;
}
data_in (s, encoding, f->format.type, 0,
- dfm_get_column (reader, ss_data (s)),
+ first_column, last_column,
case_data_rw_idx (c, f->case_idx),
fmt_var_width (&f->format));
}
exit:
dfm_forward_record (reader);
+ ds_destroy (&tmp);
return true;
}
\f
proc_set_active_file (ds, casereader, dict);
}
-static bool
-data_parser_casereader_read (struct casereader *reader UNUSED, void *r_,
- struct ccase *c)
+static struct ccase *
+data_parser_casereader_read (struct casereader *reader UNUSED, void *r_)
{
struct data_parser_casereader *r = r_;
- bool ok;
-
- case_create (c, r->value_cnt);
- ok = data_parser_parse (r->parser, r->reader, c);
- if (!ok)
- case_destroy (c);
- return ok;
+ struct ccase *c = case_create (r->value_cnt);
+ if (data_parser_parse (r->parser, r->reader, c))
+ return c;
+ else
+ {
+ case_unref (c);
+ return NULL;
+ }
}
static void