X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fdata-parser.c;h=24a671ee67a265babbf5dad51b8ff30c93ce524d;hb=2be9bee9da6a2ce27715e58128569594319abfa2;hp=eb578e270d3e9fee03e5b9c50e3fb688abad7b2f;hpb=f550aee00a62fe1d8baf62d83cd7efef6cc2ee92;p=pspp-builds.git diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c index eb578e27..24a671ee 100644 --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009 Free Software Foundation, Inc. + Copyright (C) 2007, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,24 +16,24 @@ #include -#include +#include "language/data-io/data-parser.h" #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "data/casereader-provider.h" +#include "data/data-in.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/file-handle-def.h" +#include "data/settings.h" +#include "language/data-io/data-reader.h" +#include "libpspp/message.h" +#include "libpspp/str.h" +#include "output/tab.h" -#include "xalloc.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -395,7 +395,6 @@ data_parser_parse (struct data_parser *parser, struct dfm_reader *reader, && dfm_get_percent_read (reader) >= parser->percent_cases) return false; - dfm_push (reader); if (parser->type == DP_DELIMITED) { if (parser->span) @@ -405,7 +404,6 @@ data_parser_parse (struct data_parser *parser, struct dfm_reader *reader, } else retval = parse_fixed (parser, reader, c); - dfm_pop (reader); return retval; } @@ -452,41 +450,42 @@ cut_field (const struct data_parser *parser, struct dfm_reader *reader, } *first_column = dfm_column_start (reader); - if (ss_find_char (parser->quotes, ss_first (p)) != SIZE_MAX) + if (ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX) { /* Quoted field. */ - int quote = ss_get_char (&p); + int quote = ss_get_byte (&p); if (!ss_get_until (&p, quote, field)) msg (SW, _("Quoted string extends beyond end of line.")); if (parser->quote_escape && ss_first (p) == quote) { ds_assign_substring (tmp, *field); - while (ss_match_char (&p, quote)) + while (ss_match_byte (&p, quote)) { struct substring ss; - ds_put_char (tmp, quote); + ds_put_byte (tmp, quote); if (!ss_get_until (&p, quote, &ss)) msg (SW, _("Quoted string extends beyond end of line.")); ds_put_substring (tmp, ss); } *field = ds_ss (tmp); } - *last_column = dfm_column_start (reader); + *last_column = *first_column + (ss_length (line) - ss_length (p)); /* Skip trailing soft separator and a single hard separator if present. */ ss_ltrim (&p, parser->soft_seps); if (!ss_is_empty (p) - && ss_find_char (parser->hard_seps, ss_first (p)) != SIZE_MAX) + && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX) ss_advance (&p, 1); } else { /* Regular field. */ - ss_get_chars (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field); - *last_column = dfm_column_start (reader); + ss_get_bytes (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field); + *last_column = *first_column + ss_length (*field); + if (!ss_ltrim (&p, parser->soft_seps) || ss_is_empty (p) - || ss_find_char (parser->hard_seps, p.string[0]) != SIZE_MAX) + || ss_find_byte (parser->hard_seps, p.string[0]) != SIZE_MAX) { /* Advance past a trailing hard separator, regardless of whether one actually existed. If @@ -501,6 +500,26 @@ cut_field (const struct data_parser *parser, struct dfm_reader *reader, return true; } +static void +parse_error (const struct dfm_reader *reader, const struct field *field, + int first_column, int last_column, char *error) +{ + struct msg m; + + m.category = MSG_C_DATA; + m.severity = MSG_S_WARNING; + m.file_name = CONST_CAST (char *, dfm_get_file_name (reader)); + m.first_line = dfm_get_line_number (reader); + m.last_line = m.first_line + 1; + m.first_column = first_column; + m.last_column = last_column; + m.text = xasprintf (_("Data for variable %s is not valid as format %s: %s"), + field->name, fmt_name (field->format.type), error); + msg_emit (&m); + + free (error); +} + /* Reads a case from READER into C, parsing it according to fixed-format syntax rules in PARSER. Returns true if successful, false at end of file or on I/O error. */ @@ -508,7 +527,8 @@ static bool parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - const char *encoding = dfm_reader_get_legacy_encoding (reader); + const char *input_encoding = dfm_reader_get_legacy_encoding (reader); + const char *output_encoding = dict_get_encoding (parser->dict); struct field *f; int row; @@ -530,13 +550,21 @@ parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, line = dfm_get_record (reader); for (; f < &parser->fields[parser->field_cnt] && f->record == row; f++) - data_in (ss_substr (line, f->first_column - 1, - f->format.w), - encoding, f->format.type, f->format.d, - f->first_column, f->first_column + f->format.w, - parser->dict, - case_data_rw_idx (c, f->case_idx), - fmt_var_width (&f->format)); + { + struct substring s = ss_substr (line, f->first_column - 1, + f->format.w); + union value *value = case_data_rw_idx (c, f->case_idx); + char *error = data_in (s, input_encoding, f->format.type, + value, fmt_var_width (&f->format), + output_encoding); + + if (error == NULL) + data_in_imply_decimals (s, input_encoding, f->format.type, + f->format.d, value); + else + parse_error (reader, f, f->first_column, + f->first_column + f->format.w, error); + } dfm_forward_record (reader); } @@ -551,7 +579,8 @@ static bool parse_delimited_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - const char *encoding = dfm_reader_get_legacy_encoding (reader); + const char *input_encoding = dfm_reader_get_legacy_encoding (reader); + const char *output_encoding = dict_get_encoding (parser->dict); struct string tmp = DS_EMPTY_INITIALIZER; struct field *f; @@ -559,6 +588,7 @@ parse_delimited_span (const struct data_parser *parser, { struct substring s; int first_column, last_column; + char *error; /* Cut out a field and read in a new record if necessary. */ while (!cut_field (parser, reader, @@ -576,11 +606,11 @@ parse_delimited_span (const struct data_parser *parser, } } - data_in (s, encoding, f->format.type, 0, - first_column, last_column, - parser->dict, - case_data_rw_idx (c, f->case_idx), - fmt_var_width (&f->format)); + error = data_in (s, input_encoding, f->format.type, + case_data_rw_idx (c, f->case_idx), + fmt_var_width (&f->format), output_encoding); + if (error != NULL) + parse_error (reader, f, first_column, last_column, error); } ds_destroy (&tmp); return true; @@ -593,35 +623,39 @@ static bool parse_delimited_no_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - const char *encoding = dfm_reader_get_legacy_encoding (reader); + const char *input_encoding = dfm_reader_get_legacy_encoding (reader); + const char *output_encoding = dict_get_encoding (parser->dict); struct string tmp = DS_EMPTY_INITIALIZER; struct substring s; - struct field *f; + struct field *f, *end; if (dfm_eof (reader)) return false; - for (f = parser->fields; f < &parser->fields[parser->field_cnt]; f++) + end = &parser->fields[parser->field_cnt]; + for (f = parser->fields; f < end; f++) { int first_column, last_column; + char *error; + if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s)) { - if (settings_get_undefined ()) + if (f < end - 1 && settings_get_undefined ()) msg (SW, _("Missing value(s) for all variables from %s onward. " "These will be filled with the system-missing value " "or blanks, as appropriate."), f->name); - for (; f < &parser->fields[parser->field_cnt]; f++) + for (; f < end; f++) value_set_missing (case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); goto exit; } - data_in (s, encoding, f->format.type, 0, - first_column, last_column, - parser->dict, - case_data_rw_idx (c, f->case_idx), - fmt_var_width (&f->format)); + error = data_in (s, input_encoding, f->format.type, + case_data_rw_idx (c, f->case_idx), + fmt_var_width (&f->format), output_encoding); + if (error != NULL) + parse_error (reader, f, first_column, last_column, error); } s = dfm_get_record (reader);