X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fdata-parser.c;h=960505a5ce25f322931f1e8050161f72f0698ede;hb=1b5c2d8fe129a5f8db76c42250a9c199deccb773;hp=aea3bbd0374693ca6bd1846aaa96ce9cfb041ca3;hpb=fe8dc2171009e90d2335f159d05f7e6660e24780;p=pspp diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c index aea3bbd037..960505a5ce 100644 --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013, 2016 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,8 +44,6 @@ struct data_parser const struct dictionary *dict; /*Dictionary of destination */ enum data_parser_type type; /* Type of data to parse. */ int skip_records; /* Records to skip before first real data. */ - casenumber max_cases; /* Max number of cases to read. */ - int percent_cases; /* Approximate percent of cases to read. */ struct field *fields; /* Fields to parse. */ size_t field_cnt; /* Number of fields. */ @@ -86,8 +84,6 @@ data_parser_create (const struct dictionary *dict) parser->type = DP_FIXED; parser->skip_records = 0; - parser->max_cases = -1; - parser->percent_cases = 100; parser->fields = NULL; parser->field_cnt = 0; @@ -154,24 +150,6 @@ data_parser_set_skip (struct data_parser *parser, int initial_records_to_skip) parser->skip_records = initial_records_to_skip; } -/* Sets the maximum number of cases parsed by PARSER to - MAX_CASES. The default is -1, meaning no limit. */ -void -data_parser_set_case_limit (struct data_parser *parser, casenumber max_cases) -{ - parser->max_cases = max_cases; -} - -/* Sets the percentage of cases that PARSER should read from the - input file to PERCENT_CASES. By default, all cases are - read. */ -void -data_parser_set_case_percent (struct data_parser *parser, int percent_cases) -{ - assert (percent_cases >= 0 && percent_cases <= 100); - parser->percent_cases = percent_cases; -} - /* Returns true if PARSER is configured to allow cases to span multiple records. */ bool @@ -389,12 +367,6 @@ data_parser_parse (struct data_parser *parser, struct dfm_reader *reader, } /* Limit cases. */ - if (parser->max_cases != -1 && parser->max_cases-- == 0) - return false; - if (parser->percent_cases < 100 - && dfm_get_percent_read (reader) >= parser->percent_cases) - return false; - if (parser->type == DP_DELIMITED) { if (parser->span) @@ -423,7 +395,9 @@ cut_field (const struct data_parser *parser, struct dfm_reader *reader, int *first_column, int *last_column, struct string *tmp, struct substring *field) { + size_t length_before_separators; struct substring line, p; + bool quoted; if (dfm_eof (reader)) return false; @@ -450,12 +424,13 @@ cut_field (const struct data_parser *parser, struct dfm_reader *reader, } *first_column = dfm_column_start (reader); - if (ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX) + quoted = ss_find_byte (parser->quotes, ss_first (p)) != SIZE_MAX; + if (quoted) { /* Quoted field. */ int quote = ss_get_byte (&p); if (!ss_get_until (&p, quote, field)) - msg (SW, _("Quoted string extends beyond end of line.")); + msg (DW, _("Quoted string extends beyond end of line.")); if (parser->quote_escape && ss_first (p) == quote) { ds_assign_substring (tmp, *field); @@ -464,37 +439,33 @@ cut_field (const struct data_parser *parser, struct dfm_reader *reader, struct substring ss; ds_put_byte (tmp, quote); if (!ss_get_until (&p, quote, &ss)) - msg (SW, _("Quoted string extends beyond end of line.")); + msg (DW, _("Quoted string extends beyond end of line.")); ds_put_substring (tmp, ss); } *field = ds_ss (tmp); } *last_column = *first_column + (ss_length (line) - ss_length (p)); - - /* Skip trailing soft separator and a single hard separator - if present. */ - ss_ltrim (&p, parser->soft_seps); - if (!ss_is_empty (p) - && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX) - ss_advance (&p, 1); } else { /* Regular field. */ ss_get_bytes (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field); *last_column = *first_column + ss_length (*field); + } - if (!ss_ltrim (&p, parser->soft_seps) || ss_is_empty (p) - || ss_find_byte (parser->hard_seps, p.string[0]) != SIZE_MAX) - { - /* Advance past a trailing hard separator, - regardless of whether one actually existed. If - we "skip" a delimiter that was not actually - there, then we will return end-of-line on our - next call, which is what we want. */ - dfm_forward_columns (reader, 1); - } + /* Skip trailing soft separator and a single hard separator if present. */ + length_before_separators = ss_length (p); + ss_ltrim (&p, parser->soft_seps); + if (!ss_is_empty (p) + && ss_find_byte (parser->hard_seps, ss_first (p)) != SIZE_MAX) + { + ss_advance (&p, 1); + ss_ltrim (&p, parser->soft_seps); } + if (ss_is_empty (p)) + dfm_forward_columns (reader, 1); + else if (quoted && length_before_separators == ss_length (p)) + msg (DW, _("Missing delimiter following quoted string.")); dfm_forward_columns (reader, ss_length (line) - ss_length (p)); return true; @@ -527,7 +498,7 @@ static bool parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - const char *input_encoding = dfm_reader_get_legacy_encoding (reader); + const char *input_encoding = dfm_reader_get_encoding (reader); const char *output_encoding = dict_get_encoding (parser->dict); struct field *f; int row; @@ -542,7 +513,7 @@ parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, if (dfm_eof (reader)) { - msg (SW, _("Partial case of %d of %d records discarded."), + msg (DW, _("Partial case of %d of %d records discarded."), row - 1, parser->records_per_case); return false; } @@ -579,7 +550,6 @@ static bool parse_delimited_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - const char *input_encoding = dfm_reader_get_legacy_encoding (reader); const char *output_encoding = dict_get_encoding (parser->dict); struct string tmp = DS_EMPTY_INITIALIZER; struct field *f; @@ -599,13 +569,14 @@ parse_delimited_span (const struct data_parser *parser, if (dfm_eof (reader)) { if (f > parser->fields) - msg (SW, _("Partial case discarded. The first variable " + msg (DW, _("Partial case discarded. The first variable " "missing was %s."), f->name); ds_destroy (&tmp); return false; } } + const char *input_encoding = dfm_reader_get_encoding (reader); error = data_in (s, input_encoding, f->format.type, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format), output_encoding); @@ -623,7 +594,6 @@ static bool parse_delimited_no_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - const char *input_encoding = dfm_reader_get_legacy_encoding (reader); const char *output_encoding = dict_get_encoding (parser->dict); struct string tmp = DS_EMPTY_INITIALIZER; struct substring s; @@ -641,7 +611,7 @@ parse_delimited_no_span (const struct data_parser *parser, if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s)) { if (f < end - 1 && settings_get_undefined ()) - msg (SW, _("Missing value(s) for all variables from %s onward. " + msg (DW, _("Missing value(s) for all variables from %s onward. " "These will be filled with the system-missing value " "or blanks, as appropriate."), f->name); @@ -651,6 +621,7 @@ parse_delimited_no_span (const struct data_parser *parser, goto exit; } + const char *input_encoding = dfm_reader_get_encoding (reader); error = data_in (s, input_encoding, f->format.type, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format), output_encoding); @@ -661,7 +632,7 @@ parse_delimited_no_span (const struct data_parser *parser, s = dfm_get_record (reader); ss_ltrim (&s, parser->soft_seps); if (!ss_is_empty (s)) - msg (SW, _("Record ends in data not part of any field.")); + msg (DW, _("Record ends in data not part of any field.")); exit: dfm_forward_record (reader);