X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=inline;f=src%2Flanguage%2Fdata-io%2Fdata-parser.c;h=63f9f0eda64afd0cf228a8bb4c43010870f15dc2;hb=cc0b5800fcdde6126c4fc65b656f39c1459bf17c;hp=55c62e08c0e0612868313e8647a1f42fb46f0785;hpb=254183e467ae32e6581fafc798a057f55e34773e;p=pspp diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c index 55c62e08c0..63f9f0eda6 100644 --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. + Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013, 2016 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,8 +44,6 @@ struct data_parser const struct dictionary *dict; /*Dictionary of destination */ enum data_parser_type type; /* Type of data to parse. */ int skip_records; /* Records to skip before first real data. */ - casenumber max_cases; /* Max number of cases to read. */ - int percent_cases; /* Approximate percent of cases to read. */ struct field *fields; /* Fields to parse. */ size_t field_cnt; /* Number of fields. */ @@ -54,6 +52,7 @@ struct data_parser /* DP_DELIMITED parsers only. */ bool span; /* May cases span multiple records? */ bool empty_line_has_field; /* Does an empty line have an (empty) field? */ + bool warn_missing_fields; /* Should missing fields be considered errors? */ struct substring quotes; /* Characters that can quote separators. */ bool quote_escape; /* Doubled quote acts as escape? */ struct substring soft_seps; /* Two soft separators act like just one. */ @@ -86,8 +85,6 @@ data_parser_create (const struct dictionary *dict) parser->type = DP_FIXED; parser->skip_records = 0; - parser->max_cases = -1; - parser->percent_cases = 100; parser->fields = NULL; parser->field_cnt = 0; @@ -96,6 +93,7 @@ data_parser_create (const struct dictionary *dict) parser->span = true; parser->empty_line_has_field = false; + parser->warn_missing_fields = true; ss_alloc_substring (&parser->quotes, ss_cstr ("\"'")); parser->quote_escape = false; ss_alloc_substring (&parser->soft_seps, ss_cstr (CC_SPACES)); @@ -154,24 +152,6 @@ data_parser_set_skip (struct data_parser *parser, int initial_records_to_skip) parser->skip_records = initial_records_to_skip; } -/* Sets the maximum number of cases parsed by PARSER to - MAX_CASES. The default is -1, meaning no limit. */ -void -data_parser_set_case_limit (struct data_parser *parser, casenumber max_cases) -{ - parser->max_cases = max_cases; -} - -/* Sets the percentage of cases that PARSER should read from the - input file to PERCENT_CASES. By default, all cases are - read. */ -void -data_parser_set_case_percent (struct data_parser *parser, int percent_cases) -{ - assert (percent_cases >= 0 && percent_cases <= 100); - parser->percent_cases = percent_cases; -} - /* Returns true if PARSER is configured to allow cases to span multiple records. */ bool @@ -208,6 +188,21 @@ data_parser_set_empty_line_has_field (struct data_parser *parser, parser->empty_line_has_field = empty_line_has_field; } + +/* If WARN_MISSING_FIELDS is true, configures PARSER to emit a warning + and cause an error condition when a missing field is encountered. + If WARN_MISSING_FIELDS is false, PARSER will silently fill such + fields with the system missing value. + + This setting affects parsing of DP_DELIMITED files only. */ +void +data_parser_set_warn_missing_fields (struct data_parser *parser, + bool warn_missing_fields) +{ + parser->warn_missing_fields = warn_missing_fields; +} + + /* Sets the characters that may be used for quoting field contents to QUOTES. If QUOTES is empty, quoting will be disabled. @@ -389,12 +384,6 @@ data_parser_parse (struct data_parser *parser, struct dfm_reader *reader, } /* Limit cases. */ - if (parser->max_cases != -1 && parser->max_cases-- == 0) - return false; - if (parser->percent_cases < 100 - && dfm_get_percent_read (reader) >= parser->percent_cases) - return false; - if (parser->type == DP_DELIMITED) { if (parser->span) @@ -414,10 +403,11 @@ data_parser_parse (struct data_parser *parser, struct dfm_reader *reader, *FIELD is set to the field content. The caller must not or destroy this constant string. - After parsing the field, sets the current position in the - record to just past the field and any trailing delimiter. - Returns 0 on failure or a 1-based column number indicating the - beginning of the field on success. */ + Sets *FIRST_COLUMN to the 1-based column number of the start of + the extracted field, and *LAST_COLUMN to the end of the extracted + field. + + Returns true on success, false on failure. */ static bool cut_field (const struct data_parser *parser, struct dfm_reader *reader, int *first_column, int *last_column, struct string *tmp, @@ -578,7 +568,6 @@ static bool parse_delimited_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - const char *input_encoding = dfm_reader_get_encoding (reader); const char *output_encoding = dict_get_encoding (parser->dict); struct string tmp = DS_EMPTY_INITIALIZER; struct field *f; @@ -605,6 +594,7 @@ parse_delimited_span (const struct data_parser *parser, } } + const char *input_encoding = dfm_reader_get_encoding (reader); error = data_in (s, input_encoding, f->format.type, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format), output_encoding); @@ -622,7 +612,6 @@ static bool parse_delimited_no_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - const char *input_encoding = dfm_reader_get_encoding (reader); const char *output_encoding = dict_get_encoding (parser->dict); struct string tmp = DS_EMPTY_INITIALIZER; struct substring s; @@ -639,7 +628,7 @@ parse_delimited_no_span (const struct data_parser *parser, if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s)) { - if (f < end - 1 && settings_get_undefined ()) + if (f < end - 1 && settings_get_undefined () && parser->warn_missing_fields) msg (DW, _("Missing value(s) for all variables from %s onward. " "These will be filled with the system-missing value " "or blanks, as appropriate."), @@ -650,6 +639,7 @@ parse_delimited_no_span (const struct data_parser *parser, goto exit; } + const char *input_encoding = dfm_reader_get_encoding (reader); error = data_in (s, input_encoding, f->format.type, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format), output_encoding); @@ -768,23 +758,35 @@ static const struct casereader_class data_parser_casereader_class; transferred to the dataset. */ void data_parser_make_active_file (struct data_parser *parser, struct dataset *ds, - struct dfm_reader *reader, - struct dictionary *dict) + struct dfm_reader *reader, + struct dictionary *dict, + struct casereader* (*func)(struct casereader *, + const struct dictionary *, + void *), + void *ud) { struct data_parser_casereader *r; - struct casereader *casereader; + struct casereader *casereader0; + struct casereader *casereader1; r = xmalloc (sizeof *r); r->parser = parser; r->reader = reader; r->proto = caseproto_ref (dict_get_proto (dict)); - casereader = casereader_create_sequential (NULL, r->proto, + casereader0 = casereader_create_sequential (NULL, r->proto, CASENUMBER_MAX, &data_parser_casereader_class, r); + + if (func) + casereader1 = func (casereader0, dict, ud); + else + casereader1 = casereader0; + dataset_set_dict (ds, dict); - dataset_set_source (ds, casereader); + dataset_set_source (ds, casereader1); } + static struct ccase * data_parser_casereader_read (struct casereader *reader UNUSED, void *r_) {