&& dfm_get_percent_read (reader) >= parser->percent_cases)
return false;
- dfm_push (reader);
if (parser->type == DP_DELIMITED)
{
if (parser->span)
}
else
retval = parse_fixed (parser, reader, c);
- dfm_pop (reader);
return retval;
}
}
*first_column = dfm_column_start (reader);
- if (ss_find_char (parser->quotes, ss_first (p)) != SIZE_MAX)
+ if (ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX)
{
/* Quoted field. */
- int quote = ss_get_char (&p);
+ int quote = ss_get_byte (&p);
if (!ss_get_until (&p, quote, field))
msg (SW, _("Quoted string extends beyond end of line."));
if (parser->quote_escape && ss_first (p) == quote)
{
ds_assign_substring (tmp, *field);
- while (ss_match_char (&p, quote))
+ while (ss_match_byte (&p, quote))
{
struct substring ss;
- ds_put_char (tmp, quote);
+ ds_put_byte (tmp, quote);
if (!ss_get_until (&p, quote, &ss))
msg (SW, _("Quoted string extends beyond end of line."));
ds_put_substring (tmp, ss);
if present. */
ss_ltrim (&p, parser->soft_seps);
if (!ss_is_empty (p)
- && ss_find_char (parser->hard_seps, ss_first (p)) != SIZE_MAX)
+ && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX)
ss_advance (&p, 1);
}
else
{
/* Regular field. */
- ss_get_chars (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field);
+ ss_get_bytes (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field);
*last_column = *first_column + ss_length (*field);
if (!ss_ltrim (&p, parser->soft_seps) || ss_is_empty (p)
- || ss_find_char (parser->hard_seps, p.string[0]) != SIZE_MAX)
+ || ss_find_byte (parser->hard_seps, p.string[0]) != SIZE_MAX)
{
/* Advance past a trailing hard separator,
regardless of whether one actually existed. If
return true;
}
+static void
+parse_error (const struct dfm_reader *reader, const struct field *field,
+ int first_column, int last_column, char *error)
+{
+ struct msg m;
+
+ m.category = MSG_C_DATA;
+ m.severity = MSG_S_WARNING;
+ m.where.file_name = CONST_CAST (char *, dfm_get_file_name (reader));
+ m.where.line_number = dfm_get_line_number (reader);
+ m.where.first_column = first_column;
+ m.where.last_column = last_column;
+ m.text = xasprintf (_("Data for variable %s is not valid as format %s: %s"),
+ field->name, fmt_name (field->format.type), error);
+ msg_emit (&m);
+
+ free (error);
+}
+
/* Reads a case from READER into C, parsing it according to
fixed-format syntax rules in PARSER.
Returns true if successful, false at end of file or on I/O error. */
parse_fixed (const struct data_parser *parser, struct dfm_reader *reader,
struct ccase *c)
{
- const char *encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *input_encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *output_encoding = dict_get_encoding (parser->dict);
struct field *f;
int row;
struct substring s = ss_substr (line, f->first_column - 1,
f->format.w);
union value *value = case_data_rw_idx (c, f->case_idx);
-
- data_in (s, encoding, f->format.type,
- f->first_column, f->first_column + f->format.w,
- parser->dict, value, fmt_var_width (&f->format));
-
- data_in_imply_decimals (s, encoding, f->format.type, f->format.d,
- value);
+ char *error = data_in (s, input_encoding, f->format.type,
+ value, fmt_var_width (&f->format),
+ output_encoding);
+
+ if (error == NULL)
+ data_in_imply_decimals (s, input_encoding, f->format.type,
+ f->format.d, value);
+ else
+ parse_error (reader, f, f->first_column,
+ f->first_column + f->format.w, error);
}
dfm_forward_record (reader);
parse_delimited_span (const struct data_parser *parser,
struct dfm_reader *reader, struct ccase *c)
{
- const char *encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *input_encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *output_encoding = dict_get_encoding (parser->dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct field *f;
{
struct substring s;
int first_column, last_column;
+ char *error;
/* Cut out a field and read in a new record if necessary. */
while (!cut_field (parser, reader,
}
}
- data_in (s, encoding, f->format.type, first_column, last_column,
- parser->dict,
- case_data_rw_idx (c, f->case_idx),
- fmt_var_width (&f->format));
+ error = data_in (s, input_encoding, f->format.type,
+ case_data_rw_idx (c, f->case_idx),
+ fmt_var_width (&f->format), output_encoding);
+ if (error != NULL)
+ parse_error (reader, f, first_column, last_column, error);
}
ds_destroy (&tmp);
return true;
parse_delimited_no_span (const struct data_parser *parser,
struct dfm_reader *reader, struct ccase *c)
{
- const char *encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *input_encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *output_encoding = dict_get_encoding (parser->dict);
struct string tmp = DS_EMPTY_INITIALIZER;
struct substring s;
struct field *f, *end;
for (f = parser->fields; f < end; f++)
{
int first_column, last_column;
+ char *error;
+
if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s))
{
if (f < end - 1 && settings_get_undefined ())
goto exit;
}
- data_in (s, encoding, f->format.type, first_column, last_column,
- parser->dict,
- case_data_rw_idx (c, f->case_idx),
- fmt_var_width (&f->format));
+ error = data_in (s, input_encoding, f->format.type,
+ case_data_rw_idx (c, f->case_idx),
+ fmt_var_width (&f->format), output_encoding);
+ if (error != NULL)
+ parse_error (reader, f, first_column, last_column, error);
}
s = dfm_get_record (reader);