/* PSPP - a program for statistical analysis.
- Copyright (C) 2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013, 2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "data/file-handle-def.h"
#include "data/settings.h"
#include "language/data-io/data-reader.h"
+#include "libpspp/intern.h"
#include "libpspp/message.h"
#include "libpspp/str.h"
-#include "output/tab.h"
+#include "libpspp/string-array.h"
+#include "output/pivot-table.h"
#include "gl/xalloc.h"
#include "gettext.h"
+#define N_(msgid) msgid
#define _(msgid) gettext (msgid)
/* Data parser for textual data like that read by DATA LIST. */
struct data_parser
{
- const struct dictionary *dict; /*Dictionary of destination */
enum data_parser_type type; /* Type of data to parse. */
int skip_records; /* Records to skip before first real data. */
- casenumber max_cases; /* Max number of cases to read. */
- int percent_cases; /* Approximate percent of cases to read. */
struct field *fields; /* Fields to parse. */
- size_t field_cnt; /* Number of fields. */
+ size_t n_fields; /* Number of fields. */
size_t field_allocated; /* Number of fields spaced allocated for. */
/* DP_DELIMITED parsers only. */
bool span; /* May cases span multiple records? */
bool empty_line_has_field; /* Does an empty line have an (empty) field? */
+ bool warn_missing_fields; /* Should missing fields be considered errors? */
struct substring quotes; /* Characters that can quote separators. */
bool quote_escape; /* Doubled quote acts as escape? */
struct substring soft_seps; /* Two soft separators act like just one. */
/* Creates and returns a new data parser. */
struct data_parser *
-data_parser_create (const struct dictionary *dict)
+data_parser_create (void)
{
struct data_parser *parser = xmalloc (sizeof *parser);
parser->type = DP_FIXED;
parser->skip_records = 0;
- parser->max_cases = -1;
- parser->percent_cases = 100;
parser->fields = NULL;
- parser->field_cnt = 0;
+ parser->n_fields = 0;
parser->field_allocated = 0;
- parser->dict = dict;
parser->span = true;
parser->empty_line_has_field = false;
+ parser->warn_missing_fields = true;
ss_alloc_substring (&parser->quotes, ss_cstr ("\"'"));
parser->quote_escape = false;
ss_alloc_substring (&parser->soft_seps, ss_cstr (CC_SPACES));
{
size_t i;
- for (i = 0; i < parser->field_cnt; i++)
+ for (i = 0; i < parser->n_fields; i++)
free (parser->fields[i].name);
free (parser->fields);
ss_dealloc (&parser->quotes);
void
data_parser_set_type (struct data_parser *parser, enum data_parser_type type)
{
- assert (parser->field_cnt == 0);
+ assert (parser->n_fields == 0);
assert (type == DP_FIXED || type == DP_DELIMITED);
parser->type = type;
}
parser->skip_records = initial_records_to_skip;
}
-/* Sets the maximum number of cases parsed by PARSER to
- MAX_CASES. The default is -1, meaning no limit. */
-void
-data_parser_set_case_limit (struct data_parser *parser, casenumber max_cases)
-{
- parser->max_cases = max_cases;
-}
-
-/* Sets the percentage of cases that PARSER should read from the
- input file to PERCENT_CASES. By default, all cases are
- read. */
-void
-data_parser_set_case_percent (struct data_parser *parser, int percent_cases)
-{
- assert (percent_cases >= 0 && percent_cases <= 100);
- parser->percent_cases = percent_cases;
-}
-
/* Returns true if PARSER is configured to allow cases to span
multiple records. */
bool
parser->empty_line_has_field = empty_line_has_field;
}
+
+/* If WARN_MISSING_FIELDS is true, configures PARSER to emit a warning
+ and cause an error condition when a missing field is encountered.
+ If WARN_MISSING_FIELDS is false, PARSER will silently fill such
+ fields with the system missing value.
+
+ This setting affects parsing of DP_DELIMITED files only. */
+void
+data_parser_set_warn_missing_fields (struct data_parser *parser,
+ bool warn_missing_fields)
+{
+ parser->warn_missing_fields = warn_missing_fields;
+}
+
+
/* Sets the characters that may be used for quoting field
contents to QUOTES. If QUOTES is empty, quoting will be
disabled.
{
struct field *field;
- if (p->field_cnt == p->field_allocated)
+ if (p->n_fields == p->field_allocated)
p->fields = x2nrealloc (p->fields, &p->field_allocated, sizeof *p->fields);
- field = &p->fields[p->field_cnt++];
+ field = &p->fields[p->n_fields++];
field->format = *format;
field->case_idx = case_idx;
field->name = xstrdup (name);
int record, int first_column)
{
assert (parser->type == DP_FIXED);
- assert (parser->field_cnt == 0
- || record >= parser->fields[parser->field_cnt - 1].record);
+ assert (parser->n_fields == 0
+ || record >= parser->fields[parser->n_fields - 1].record);
if (record > parser->records_per_case)
parser->records_per_case = record;
add_field (parser, format, case_idx, name, record, first_column);
bool
data_parser_any_fields (const struct data_parser *parser)
{
- return parser->field_cnt > 0;
+ return parser->n_fields > 0;
}
static void
}
\f
static bool parse_delimited_span (const struct data_parser *,
- struct dfm_reader *, struct ccase *);
+ struct dfm_reader *,
+ struct dictionary *, struct ccase *);
static bool parse_delimited_no_span (const struct data_parser *,
- struct dfm_reader *, struct ccase *);
-static bool parse_fixed (const struct data_parser *,
- struct dfm_reader *, struct ccase *);
+ struct dfm_reader *,
+ struct dictionary *, struct ccase *);
+static bool parse_fixed (const struct data_parser *, struct dfm_reader *,
+ struct dictionary *, struct ccase *);
-/* Reads a case from DFM into C, parsing it with PARSER. Returns
- true if successful, false at end of file or on I/O error.
+/* Reads a case from DFM into C, which matches dictionary DICT, parsing it with
+ PARSER. Returns true if successful, false at end of file or on I/O error.
Case C must not be shared. */
bool
data_parser_parse (struct data_parser *parser, struct dfm_reader *reader,
- struct ccase *c)
+ struct dictionary *dict, struct ccase *c)
{
bool retval;
}
/* Limit cases. */
- if (parser->max_cases != -1 && parser->max_cases-- == 0)
- return false;
- if (parser->percent_cases < 100
- && dfm_get_percent_read (reader) >= parser->percent_cases)
- return false;
-
if (parser->type == DP_DELIMITED)
{
if (parser->span)
- retval = parse_delimited_span (parser, reader, c);
+ retval = parse_delimited_span (parser, reader, dict, c);
else
- retval = parse_delimited_no_span (parser, reader, c);
+ retval = parse_delimited_no_span (parser, reader, dict, c);
}
else
- retval = parse_fixed (parser, reader, c);
+ retval = parse_fixed (parser, reader, dict, c);
return retval;
}
+static void
+cut_field__ (const struct data_parser *parser, const struct substring *line,
+ struct substring *p, size_t *n_columns,
+ struct string *tmp, struct substring *field)
+{
+ bool quoted = ss_find_byte (parser->quotes, ss_first (*p)) != SIZE_MAX;
+ if (quoted)
+ {
+ /* Quoted field. */
+ int quote = ss_get_byte (p);
+ if (!ss_get_until (p, quote, field))
+ msg (DW, _("Quoted string extends beyond end of line."));
+ if (parser->quote_escape && ss_first (*p) == quote)
+ {
+ ds_assign_substring (tmp, *field);
+ while (ss_match_byte (p, quote))
+ {
+ struct substring ss;
+ ds_put_byte (tmp, quote);
+ if (!ss_get_until (p, quote, &ss))
+ msg (DW, _("Quoted string extends beyond end of line."));
+ ds_put_substring (tmp, ss);
+ }
+ *field = ds_ss (tmp);
+ }
+ *n_columns = ss_length (*line) - ss_length (*p);
+ }
+ else
+ {
+ /* Regular field. */
+ ss_get_bytes (p, ss_cspan (*p, ds_ss (&parser->any_sep)), field);
+ *n_columns = ss_length (*field);
+ }
+
+ /* Skip trailing soft separator and a single hard separator if present. */
+ size_t length_before_separators = ss_length (*p);
+ ss_ltrim (p, parser->soft_seps);
+ if (!ss_is_empty (*p)
+ && ss_find_byte (parser->hard_seps, ss_first (*p)) != SIZE_MAX)
+ {
+ ss_advance (p, 1);
+ ss_ltrim (p, parser->soft_seps);
+ }
+
+ if (!ss_is_empty (*p) && quoted && length_before_separators == ss_length (*p))
+ msg (DW, _("Missing delimiter following quoted string."));
+}
+
/* Extracts a delimited field from the current position in the
current record according to PARSER, reading data from READER.
*FIELD is set to the field content. The caller must not or
destroy this constant string.
- After parsing the field, sets the current position in the
- record to just past the field and any trailing delimiter.
- Returns 0 on failure or a 1-based column number indicating the
- beginning of the field on success. */
+ Sets *FIRST_COLUMN to the 1-based column number of the start of
+ the extracted field, and *LAST_COLUMN to the end of the extracted
+ field.
+
+ Returns true on success, false on failure. */
static bool
cut_field (const struct data_parser *parser, struct dfm_reader *reader,
int *first_column, int *last_column, struct string *tmp,
}
}
+ size_t n_columns;
+ cut_field__ (parser, &line, &p, &n_columns, tmp, field);
*first_column = dfm_column_start (reader);
- if (ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX)
- {
- /* Quoted field. */
- int quote = ss_get_byte (&p);
- if (!ss_get_until (&p, quote, field))
- msg (SW, _("Quoted string extends beyond end of line."));
- if (parser->quote_escape && ss_first (p) == quote)
- {
- ds_assign_substring (tmp, *field);
- while (ss_match_byte (&p, quote))
- {
- struct substring ss;
- ds_put_byte (tmp, quote);
- if (!ss_get_until (&p, quote, &ss))
- msg (SW, _("Quoted string extends beyond end of line."));
- ds_put_substring (tmp, ss);
- }
- *field = ds_ss (tmp);
- }
- *last_column = *first_column + (ss_length (line) - ss_length (p));
-
- /* Skip trailing soft separator and a single hard separator
- if present. */
- if (!ss_is_empty (p))
- {
- size_t n_seps = ss_ltrim (&p, parser->soft_seps);
- if (!ss_is_empty (p)
- && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX)
- {
- ss_advance (&p, 1);
- n_seps++;
- }
- if (!n_seps)
- msg (SW, _("Missing delimiter following quoted string."));
- }
- }
- else
- {
- /* Regular field. */
- ss_get_bytes (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field);
- *last_column = *first_column + ss_length (*field);
+ *last_column = *first_column + n_columns;
- if (!ss_ltrim (&p, parser->soft_seps) || ss_is_empty (p)
- || ss_find_byte (parser->hard_seps, p.string[0]) != SIZE_MAX)
- {
- /* Advance past a trailing hard separator,
- regardless of whether one actually existed. If
- we "skip" a delimiter that was not actually
- there, then we will return end-of-line on our
- next call, which is what we want. */
- dfm_forward_columns (reader, 1);
- }
- }
+ if (ss_is_empty (p))
+ dfm_forward_columns (reader, 1);
dfm_forward_columns (reader, ss_length (line) - ss_length (p));
return true;
parse_error (const struct dfm_reader *reader, const struct field *field,
int first_column, int last_column, char *error)
{
- struct msg m;
-
- m.category = MSG_C_DATA;
- m.severity = MSG_S_WARNING;
- m.file_name = CONST_CAST (char *, dfm_get_file_name (reader));
- m.first_line = dfm_get_line_number (reader);
- m.last_line = m.first_line + 1;
- m.first_column = first_column;
- m.last_column = last_column;
- m.text = xasprintf (_("Data for variable %s is not valid as format %s: %s"),
- field->name, fmt_name (field->format.type), error);
- msg_emit (&m);
+ int line_number = dfm_get_line_number (reader);
+ struct msg_location *location = xmalloc (sizeof *location);
+ *location = (struct msg_location) {
+ .file_name = intern_new (dfm_get_file_name (reader)),
+ .start = { .line = line_number, .column = first_column },
+ .end = { .line = line_number, .column = last_column - 1 },
+ };
+ struct msg *m = xmalloc (sizeof *m);
+ *m = (struct msg) {
+ .category = MSG_C_DATA,
+ .severity = MSG_S_WARNING,
+ .location = location,
+ .text = xasprintf (_("Data for variable %s is not valid as format %s: %s"),
+ field->name, fmt_name (field->format.type), error),
+ };
+ msg_emit (m);
free (error);
}
-/* Reads a case from READER into C, parsing it according to
- fixed-format syntax rules in PARSER.
- Returns true if successful, false at end of file or on I/O error. */
+/* Reads a case from READER into C, which matches DICT, parsing it according to
+ fixed-format syntax rules in PARSER. Returns true if successful, false at
+ end of file or on I/O error. */
static bool
parse_fixed (const struct data_parser *parser, struct dfm_reader *reader,
- struct ccase *c)
+ struct dictionary *dict, struct ccase *c)
{
const char *input_encoding = dfm_reader_get_encoding (reader);
- const char *output_encoding = dict_get_encoding (parser->dict);
+ const char *output_encoding = dict_get_encoding (dict);
struct field *f;
int row;
if (dfm_eof (reader))
{
- msg (SW, _("Partial case of %d of %d records discarded."),
+ msg (DW, _("Partial case of %d of %d records discarded."),
row - 1, parser->records_per_case);
return false;
}
dfm_expand_tabs (reader);
line = dfm_get_record (reader);
- for (; f < &parser->fields[parser->field_cnt] && f->record == row; f++)
+ for (; f < &parser->fields[parser->n_fields] && f->record == row; f++)
{
struct substring s = ss_substr (line, f->first_column - 1,
f->format.w);
union value *value = case_data_rw_idx (c, f->case_idx);
char *error = data_in (s, input_encoding, f->format.type,
+ settings_get_fmt_settings (),
value, fmt_var_width (&f->format),
output_encoding);
if (error == NULL)
data_in_imply_decimals (s, input_encoding, f->format.type,
- f->format.d, value);
+ f->format.d, settings_get_fmt_settings (),
+ value);
else
parse_error (reader, f, f->first_column,
f->first_column + f->format.w, error);
return true;
}
-/* Reads a case from READER into C, parsing it according to
- free-format syntax rules in PARSER.
- Returns true if successful, false at end of file or on I/O error. */
+/* Splits the data line in LINE into individual text fields and returns the
+ number of fields. If SA is nonnull, appends each field to SA; the caller
+ retains ownership of SA and its contents. */
+size_t
+data_parser_split (const struct data_parser *parser,
+ struct substring line, struct string_array *sa)
+{
+ size_t n = 0;
+
+ struct string tmp = DS_EMPTY_INITIALIZER;
+ for (;;)
+ {
+ struct substring p = line;
+ ss_ltrim (&p, parser->soft_seps);
+ if (ss_is_empty (p))
+ {
+ ds_destroy (&tmp);
+ return n;
+ }
+
+ size_t n_columns;
+ struct substring field;
+
+ msg_disable ();
+ cut_field__ (parser, &line, &p, &n_columns, &tmp, &field);
+ msg_enable ();
+
+ if (sa)
+ string_array_append_nocopy (sa, ss_xstrdup (field));
+ n++;
+ line = p;
+ }
+}
+
+/* Reads a case from READER into C, which matches dictionary DICT, parsing it
+ according to free-format syntax rules in PARSER. Returns true if
+ successful, false at end of file or on I/O error. */
static bool
parse_delimited_span (const struct data_parser *parser,
- struct dfm_reader *reader, struct ccase *c)
+ struct dfm_reader *reader,
+ struct dictionary *dict, struct ccase *c)
{
- const char *input_encoding = dfm_reader_get_encoding (reader);
- const char *output_encoding = dict_get_encoding (parser->dict);
+ const char *output_encoding = dict_get_encoding (dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct field *f;
- for (f = parser->fields; f < &parser->fields[parser->field_cnt]; f++)
+ for (f = parser->fields; f < &parser->fields[parser->n_fields]; f++)
{
struct substring s;
int first_column, last_column;
if (dfm_eof (reader))
{
if (f > parser->fields)
- msg (SW, _("Partial case discarded. The first variable "
+ msg (DW, _("Partial case discarded. The first variable "
"missing was %s."), f->name);
ds_destroy (&tmp);
return false;
}
}
+ const char *input_encoding = dfm_reader_get_encoding (reader);
error = data_in (s, input_encoding, f->format.type,
+ settings_get_fmt_settings (),
case_data_rw_idx (c, f->case_idx),
fmt_var_width (&f->format), output_encoding);
if (error != NULL)
return true;
}
-/* Reads a case from READER into C, parsing it according to
- delimited syntax rules with one case per record in PARSER.
+/* Reads a case from READER into C, which matches dictionary DICT, parsing it
+ according to delimited syntax rules with one case per record in PARSER.
Returns true if successful, false at end of file or on I/O error. */
static bool
parse_delimited_no_span (const struct data_parser *parser,
- struct dfm_reader *reader, struct ccase *c)
+ struct dfm_reader *reader,
+ struct dictionary *dict, struct ccase *c)
{
- const char *input_encoding = dfm_reader_get_encoding (reader);
- const char *output_encoding = dict_get_encoding (parser->dict);
+ const char *output_encoding = dict_get_encoding (dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct substring s;
struct field *f, *end;
if (dfm_eof (reader))
return false;
- end = &parser->fields[parser->field_cnt];
+ end = &parser->fields[parser->n_fields];
for (f = parser->fields; f < end; f++)
{
int first_column, last_column;
if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s))
{
- if (f < end - 1 && settings_get_undefined ())
- msg (SW, _("Missing value(s) for all variables from %s onward. "
+ if (f < end - 1 && settings_get_undefined () && parser->warn_missing_fields)
+ msg (DW, _("Missing value(s) for all variables from %s onward. "
"These will be filled with the system-missing value "
"or blanks, as appropriate."),
f->name);
goto exit;
}
+ const char *input_encoding = dfm_reader_get_encoding (reader);
error = data_in (s, input_encoding, f->format.type,
+ settings_get_fmt_settings (),
case_data_rw_idx (c, f->case_idx),
fmt_var_width (&f->format), output_encoding);
if (error != NULL)
s = dfm_get_record (reader);
ss_ltrim (&s, parser->soft_seps);
if (!ss_is_empty (s))
- msg (SW, _("Record ends in data not part of any field."));
+ msg (DW, _("Record ends in data not part of any field."));
exit:
dfm_forward_record (reader);
dump_fixed_table (const struct data_parser *parser,
const struct file_handle *fh)
{
- struct tab_table *t;
- size_t i;
-
- t = tab_create (4, parser->field_cnt + 1);
- tab_headers (t, 0, 0, 1, 0);
- tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
- tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
- tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
- tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
- tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, parser->field_cnt);
- tab_hline (t, TAL_2, 0, 3, 1);
-
- for (i = 0; i < parser->field_cnt; i++)
+ /* XXX This should not be preformatted. */
+ char *title = xasprintf (ngettext ("Reading %d record from %s.",
+ "Reading %d records from %s.",
+ parser->records_per_case),
+ parser->records_per_case, fh_get_name (fh));
+ struct pivot_table *table = pivot_table_create__ (
+ pivot_value_new_user_text (title, -1), "Fixed Data Records");
+ free (title);
+
+ pivot_dimension_create (
+ table, PIVOT_AXIS_COLUMN, N_("Attributes"),
+ N_("Record"), N_("Columns"), N_("Format"));
+
+ struct pivot_dimension *variables = pivot_dimension_create (
+ table, PIVOT_AXIS_ROW, N_("Variable"));
+ variables->root->show_label = true;
+ for (size_t i = 0; i < parser->n_fields; i++)
{
struct field *f = &parser->fields[i];
- char fmt_string[FMT_STRING_LEN_MAX + 1];
- int row = i + 1;
-
- tab_text (t, 0, row, TAB_LEFT, f->name);
- tab_text_format (t, 1, row, 0, "%d", f->record);
- tab_text_format (t, 2, row, 0, "%3d-%3d",
- f->first_column, f->first_column + f->format.w - 1);
- tab_text (t, 3, row, TAB_LEFT | TAB_FIX,
- fmt_to_string (&f->format, fmt_string));
+
+ /* XXX It would be better to have the actual variable here. */
+ int variable_idx = pivot_category_create_leaf (
+ variables->root, pivot_value_new_user_text (f->name, -1));
+
+ pivot_table_put2 (table, 0, variable_idx,
+ pivot_value_new_integer (f->record));
+
+ int first_column = f->first_column;
+ int last_column = f->first_column + f->format.w - 1;
+ char *columns = xasprintf ("%d-%d", first_column, last_column);
+ pivot_table_put2 (table, 1, variable_idx,
+ pivot_value_new_user_text (columns, -1));
+ free (columns);
+
+ char str[FMT_STRING_LEN_MAX + 1];
+ pivot_table_put2 (table, 2, variable_idx,
+ pivot_value_new_user_text (
+ fmt_to_string (&f->format, str), -1));
+
}
- tab_title (t, ngettext ("Reading %d record from %s.",
- "Reading %d records from %s.",
- parser->records_per_case),
- parser->records_per_case, fh_get_name (fh));
- tab_submit (t);
+ pivot_table_submit (table);
}
/* Displays a table giving information on free-format variable parsing
dump_delimited_table (const struct data_parser *parser,
const struct file_handle *fh)
{
- struct tab_table *t;
- size_t i;
+ struct pivot_table *table = pivot_table_create__ (
+ pivot_value_new_text_format (N_("Reading free-form data from %s."),
+ fh_get_name (fh)),
+ "Free-Form Data Records");
- t = tab_create (2, parser->field_cnt + 1);
- tab_headers (t, 0, 0, 1, 0);
- tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
- tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
- tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, parser->field_cnt);
- tab_hline (t, TAL_2, 0, 1, 1);
+ pivot_dimension_create (
+ table, PIVOT_AXIS_COLUMN, N_("Attributes"), N_("Format"));
- for (i = 0; i < parser->field_cnt; i++)
+ struct pivot_dimension *variables = pivot_dimension_create (
+ table, PIVOT_AXIS_ROW, N_("Variable"));
+ variables->root->show_label = true;
+ for (size_t i = 0; i < parser->n_fields; i++)
{
struct field *f = &parser->fields[i];
- char str[FMT_STRING_LEN_MAX + 1];
- int row = i + 1;
- tab_text (t, 0, row, TAB_LEFT, f->name);
- tab_text (t, 1, row, TAB_LEFT | TAB_FIX,
- fmt_to_string (&f->format, str));
- }
+ /* XXX It would be better to have the actual variable here. */
+ int variable_idx = pivot_category_create_leaf (
+ variables->root, pivot_value_new_user_text (f->name, -1));
- tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
+ char str[FMT_STRING_LEN_MAX + 1];
+ pivot_table_put2 (table, 0, variable_idx,
+ pivot_value_new_user_text (
+ fmt_to_string (&f->format, str), -1));
+ }
- tab_submit (t);
+ pivot_table_submit (table);
}
/* Displays a table giving information on how PARSER will read
struct data_parser_casereader
{
struct data_parser *parser; /* Parser. */
+ struct dictionary *dict; /* Dictionary. */
struct dfm_reader *reader; /* Data file reader. */
struct caseproto *proto; /* Format of cases. */
};
transferred to the dataset. */
void
data_parser_make_active_file (struct data_parser *parser, struct dataset *ds,
- struct dfm_reader *reader,
- struct dictionary *dict)
+ struct dfm_reader *reader,
+ struct dictionary *dict,
+ struct casereader* (*func)(struct casereader *,
+ const struct dictionary *,
+ void *),
+ void *ud)
{
struct data_parser_casereader *r;
- struct casereader *casereader;
+ struct casereader *casereader0;
+ struct casereader *casereader1;
r = xmalloc (sizeof *r);
r->parser = parser;
+ r->dict = dict_ref (dict);
r->reader = reader;
r->proto = caseproto_ref (dict_get_proto (dict));
- casereader = casereader_create_sequential (NULL, r->proto,
+ casereader0 = casereader_create_sequential (NULL, r->proto,
CASENUMBER_MAX,
&data_parser_casereader_class, r);
+
+ if (func)
+ casereader1 = func (casereader0, dict, ud);
+ else
+ casereader1 = casereader0;
+
dataset_set_dict (ds, dict);
- dataset_set_source (ds, casereader);
+ dataset_set_source (ds, casereader1);
}
+
static struct ccase *
data_parser_casereader_read (struct casereader *reader UNUSED, void *r_)
{
struct data_parser_casereader *r = r_;
struct ccase *c = case_create (r->proto);
- if (data_parser_parse (r->parser, r->reader, c))
+ if (data_parser_parse (r->parser, r->reader, r->dict, c))
return c;
else
{
}
static void
-data_parser_casereader_destroy (struct casereader *reader UNUSED, void *r_)
+data_parser_casereader_destroy (struct casereader *reader, void *r_)
{
struct data_parser_casereader *r = r_;
if (dfm_reader_error (r->reader))
casereader_force_error (reader);
- data_parser_destroy (r->parser);
dfm_close_reader (r->reader);
caseproto_unref (r->proto);
+ dict_unref (r->dict);
+ data_parser_destroy (r->parser);
free (r);
}