#include "libpspp/intern.h"
#include "libpspp/message.h"
#include "libpspp/str.h"
+#include "libpspp/string-array.h"
#include "output/pivot-table.h"
#include "gl/xalloc.h"
/* Data parser for textual data like that read by DATA LIST. */
struct data_parser
{
- struct dictionary *dict; /* Dictionary of destination */
enum data_parser_type type; /* Type of data to parse. */
int skip_records; /* Records to skip before first real data. */
/* Creates and returns a new data parser. */
struct data_parser *
-data_parser_create (struct dictionary *dict)
+data_parser_create (void)
{
struct data_parser *parser = xmalloc (sizeof *parser);
parser->fields = NULL;
parser->n_fields = 0;
parser->field_allocated = 0;
- parser->dict = dict_ref (dict);
parser->span = true;
parser->empty_line_has_field = false;
{
size_t i;
- dict_unref (parser->dict);
for (i = 0; i < parser->n_fields; i++)
free (parser->fields[i].name);
free (parser->fields);
}
\f
static bool parse_delimited_span (const struct data_parser *,
- struct dfm_reader *, struct ccase *);
+ struct dfm_reader *,
+ struct dictionary *, struct ccase *);
static bool parse_delimited_no_span (const struct data_parser *,
- struct dfm_reader *, struct ccase *);
-static bool parse_fixed (const struct data_parser *,
- struct dfm_reader *, struct ccase *);
+ struct dfm_reader *,
+ struct dictionary *, struct ccase *);
+static bool parse_fixed (const struct data_parser *, struct dfm_reader *,
+ struct dictionary *, struct ccase *);
-/* Reads a case from DFM into C, parsing it with PARSER. Returns
- true if successful, false at end of file or on I/O error.
+/* Reads a case from DFM into C, which matches dictionary DICT, parsing it with
+ PARSER. Returns true if successful, false at end of file or on I/O error.
Case C must not be shared. */
bool
data_parser_parse (struct data_parser *parser, struct dfm_reader *reader,
- struct ccase *c)
+ struct dictionary *dict, struct ccase *c)
{
bool retval;
if (parser->type == DP_DELIMITED)
{
if (parser->span)
- retval = parse_delimited_span (parser, reader, c);
+ retval = parse_delimited_span (parser, reader, dict, c);
else
- retval = parse_delimited_no_span (parser, reader, c);
+ retval = parse_delimited_no_span (parser, reader, dict, c);
}
else
- retval = parse_fixed (parser, reader, c);
+ retval = parse_fixed (parser, reader, dict, c);
return retval;
}
+static void
+cut_field__ (const struct data_parser *parser, const struct substring *line,
+ struct substring *p, size_t *n_columns,
+ struct string *tmp, struct substring *field)
+{
+ bool quoted = ss_find_byte (parser->quotes, ss_first (*p)) != SIZE_MAX;
+ if (quoted)
+ {
+ /* Quoted field. */
+ int quote = ss_get_byte (p);
+ if (!ss_get_until (p, quote, field))
+ msg (DW, _("Quoted string extends beyond end of line."));
+ if (parser->quote_escape && ss_first (*p) == quote)
+ {
+ ds_assign_substring (tmp, *field);
+ while (ss_match_byte (p, quote))
+ {
+ struct substring ss;
+ ds_put_byte (tmp, quote);
+ if (!ss_get_until (p, quote, &ss))
+ msg (DW, _("Quoted string extends beyond end of line."));
+ ds_put_substring (tmp, ss);
+ }
+ *field = ds_ss (tmp);
+ }
+ *n_columns = ss_length (*line) - ss_length (*p);
+ }
+ else
+ {
+ /* Regular field. */
+ ss_get_bytes (p, ss_cspan (*p, ds_ss (&parser->any_sep)), field);
+ *n_columns = ss_length (*field);
+ }
+
+ /* Skip trailing soft separator and a single hard separator if present. */
+ size_t length_before_separators = ss_length (*p);
+ ss_ltrim (p, parser->soft_seps);
+ if (!ss_is_empty (*p)
+ && ss_find_byte (parser->hard_seps, ss_first (*p)) != SIZE_MAX)
+ {
+ ss_advance (p, 1);
+ ss_ltrim (p, parser->soft_seps);
+ }
+
+ if (!ss_is_empty (*p) && quoted && length_before_separators == ss_length (*p))
+ msg (DW, _("Missing delimiter following quoted string."));
+}
+
/* Extracts a delimited field from the current position in the
current record according to PARSER, reading data from READER.
int *first_column, int *last_column, struct string *tmp,
struct substring *field)
{
- size_t length_before_separators;
struct substring line, p;
- bool quoted;
if (dfm_eof (reader))
return false;
}
}
+ size_t n_columns;
+ cut_field__ (parser, &line, &p, &n_columns, tmp, field);
*first_column = dfm_column_start (reader);
- quoted = ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX;
- if (quoted)
- {
- /* Quoted field. */
- int quote = ss_get_byte (&p);
- if (!ss_get_until (&p, quote, field))
- msg (DW, _("Quoted string extends beyond end of line."));
- if (parser->quote_escape && ss_first (p) == quote)
- {
- ds_assign_substring (tmp, *field);
- while (ss_match_byte (&p, quote))
- {
- struct substring ss;
- ds_put_byte (tmp, quote);
- if (!ss_get_until (&p, quote, &ss))
- msg (DW, _("Quoted string extends beyond end of line."));
- ds_put_substring (tmp, ss);
- }
- *field = ds_ss (tmp);
- }
- *last_column = *first_column + (ss_length (line) - ss_length (p));
- }
- else
- {
- /* Regular field. */
- ss_get_bytes (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field);
- *last_column = *first_column + ss_length (*field);
- }
+ *last_column = *first_column + n_columns;
- /* Skip trailing soft separator and a single hard separator if present. */
- length_before_separators = ss_length (p);
- ss_ltrim (&p, parser->soft_seps);
- if (!ss_is_empty (p)
- && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX)
- {
- ss_advance (&p, 1);
- ss_ltrim (&p, parser->soft_seps);
- }
if (ss_is_empty (p))
dfm_forward_columns (reader, 1);
- else if (quoted && length_before_separators == ss_length (p))
- msg (DW, _("Missing delimiter following quoted string."));
dfm_forward_columns (reader, ss_length (line) - ss_length (p));
return true;
free (error);
}
-/* Reads a case from READER into C, parsing it according to
- fixed-format syntax rules in PARSER.
- Returns true if successful, false at end of file or on I/O error. */
+/* Reads a case from READER into C, which matches DICT, parsing it according to
+ fixed-format syntax rules in PARSER. Returns true if successful, false at
+ end of file or on I/O error. */
static bool
parse_fixed (const struct data_parser *parser, struct dfm_reader *reader,
- struct ccase *c)
+ struct dictionary *dict, struct ccase *c)
{
const char *input_encoding = dfm_reader_get_encoding (reader);
- const char *output_encoding = dict_get_encoding (parser->dict);
+ const char *output_encoding = dict_get_encoding (dict);
struct field *f;
int row;
return true;
}
-/* Reads a case from READER into C, parsing it according to
- free-format syntax rules in PARSER.
- Returns true if successful, false at end of file or on I/O error. */
+/* Splits the data line in LINE into individual text fields and returns the
+ number of fields. If SA is nonnull, appends each field to SA; the caller
+ retains ownership of SA and its contents. */
+size_t
+data_parser_split (const struct data_parser *parser,
+ struct substring line, struct string_array *sa)
+{
+ size_t n = 0;
+
+ struct string tmp = DS_EMPTY_INITIALIZER;
+ for (;;)
+ {
+ struct substring p = line;
+ ss_ltrim (&p, parser->soft_seps);
+ if (ss_is_empty (p))
+ {
+ ds_destroy (&tmp);
+ return n;
+ }
+
+ size_t n_columns;
+ struct substring field;
+
+ msg_disable ();
+ cut_field__ (parser, &line, &p, &n_columns, &tmp, &field);
+ msg_enable ();
+
+ if (sa)
+ string_array_append_nocopy (sa, ss_xstrdup (field));
+ n++;
+ line = p;
+ }
+}
+
+/* Reads a case from READER into C, which matches dictionary DICT, parsing it
+ according to free-format syntax rules in PARSER. Returns true if
+ successful, false at end of file or on I/O error. */
static bool
parse_delimited_span (const struct data_parser *parser,
- struct dfm_reader *reader, struct ccase *c)
+ struct dfm_reader *reader,
+ struct dictionary *dict, struct ccase *c)
{
- const char *output_encoding = dict_get_encoding (parser->dict);
+ const char *output_encoding = dict_get_encoding (dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct field *f;
return true;
}
-/* Reads a case from READER into C, parsing it according to
- delimited syntax rules with one case per record in PARSER.
+/* Reads a case from READER into C, which matches dictionary DICT, parsing it
+ according to delimited syntax rules with one case per record in PARSER.
Returns true if successful, false at end of file or on I/O error. */
static bool
parse_delimited_no_span (const struct data_parser *parser,
- struct dfm_reader *reader, struct ccase *c)
+ struct dfm_reader *reader,
+ struct dictionary *dict, struct ccase *c)
{
- const char *output_encoding = dict_get_encoding (parser->dict);
+ const char *output_encoding = dict_get_encoding (dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct substring s;
struct field *f, *end;
struct data_parser_casereader
{
struct data_parser *parser; /* Parser. */
+ struct dictionary *dict; /* Dictionary. */
struct dfm_reader *reader; /* Data file reader. */
struct caseproto *proto; /* Format of cases. */
};
void
data_parser_make_active_file (struct data_parser *parser, struct dataset *ds,
struct dfm_reader *reader,
- struct dictionary *dict,
+ struct dictionary *dict,
struct casereader* (*func)(struct casereader *,
const struct dictionary *,
void *),
r = xmalloc (sizeof *r);
r->parser = parser;
+ r->dict = dict_ref (dict);
r->reader = reader;
r->proto = caseproto_ref (dict_get_proto (dict));
casereader0 = casereader_create_sequential (NULL, r->proto,
{
struct data_parser_casereader *r = r_;
struct ccase *c = case_create (r->proto);
- if (data_parser_parse (r->parser, r->reader, c))
+ if (data_parser_parse (r->parser, r->reader, r->dict, c))
return c;
else
{
casereader_force_error (reader);
dfm_close_reader (r->reader);
caseproto_unref (r->proto);
+ dict_unref (r->dict);
data_parser_destroy (r->parser);
free (r);
}