From 4f3cf328723cf70c06702904e85768d6601918a2 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 27 May 2022 17:15:02 -0700 Subject: [PATCH] data-parser: Make data parser not require a dictionary. An upcoming commit will make the GUI's text import use this code too, in a context where a dictionary isn't yet available. The data parser only uses the dictionary for one specific purpose that isn't relevant for the GUI, so remove the need for it. --- src/language/data-io/data-list.c | 7 +++- src/language/data-io/data-parser.c | 64 ++++++++++++++++-------------- src/language/data-io/data-parser.h | 7 ++-- src/language/data-io/get-data.c | 2 +- 4 files changed, 44 insertions(+), 36 deletions(-) diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index 1ade0dc617..e464db1223 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -58,6 +58,7 @@ struct data_list_trns { struct data_parser *parser; /* Parser. */ + struct dictionary *dict; /* Dictionary. */ struct dfm_reader *reader; /* Data file reader. */ struct variable *end; /* Variable specified on END subcommand. */ }; @@ -88,7 +89,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) dict = (in_input_program () ? dataset_dict (ds) : dict_create (get_default_encoding ())); - parser = data_parser_create (dict); + parser = data_parser_create (); reader = NULL; table = -1; /* Print table if nonzero, -1=undecided. */ @@ -292,6 +293,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) { struct data_list_trns *trns = xmalloc (sizeof *trns); trns->parser = parser; + trns->dict = dict_ref (dict); trns->reader = reader; trns->end = end; add_transformation (ds, &data_list_trns_class, trns); @@ -507,6 +509,7 @@ data_list_trns_free (void *trns_) struct data_list_trns *trns = trns_; data_parser_destroy (trns->parser); dfm_close_reader (trns->reader); + dict_unref (trns->dict); free (trns); return true; } @@ -519,7 +522,7 @@ data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED) enum trns_result retval; *c = case_unshare (*c); - if (data_parser_parse (trns->parser, trns->reader, *c)) + if (data_parser_parse (trns->parser, trns->reader, trns->dict, *c)) retval = TRNS_CONTINUE; else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1) { diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c index 359f2df64c..e7896f6bbe 100644 --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@ -43,7 +43,6 @@ /* Data parser for textual data like that read by DATA LIST. */ struct data_parser { - struct dictionary *dict; /* Dictionary of destination */ enum data_parser_type type; /* Type of data to parse. */ int skip_records; /* Records to skip before first real data. */ @@ -81,7 +80,7 @@ static void set_any_sep (struct data_parser *parser); /* Creates and returns a new data parser. */ struct data_parser * -data_parser_create (struct dictionary *dict) +data_parser_create (void) { struct data_parser *parser = xmalloc (sizeof *parser); @@ -91,7 +90,6 @@ data_parser_create (struct dictionary *dict) parser->fields = NULL; parser->n_fields = 0; parser->field_allocated = 0; - parser->dict = dict_ref (dict); parser->span = true; parser->empty_line_has_field = false; @@ -116,7 +114,6 @@ data_parser_destroy (struct data_parser *parser) { size_t i; - dict_unref (parser->dict); for (i = 0; i < parser->n_fields; i++) free (parser->fields[i].name); free (parser->fields); @@ -358,19 +355,21 @@ set_any_sep (struct data_parser *parser) } static bool parse_delimited_span (const struct data_parser *, - struct dfm_reader *, struct ccase *); + struct dfm_reader *, + struct dictionary *, struct ccase *); static bool parse_delimited_no_span (const struct data_parser *, - struct dfm_reader *, struct ccase *); -static bool parse_fixed (const struct data_parser *, - struct dfm_reader *, struct ccase *); + struct dfm_reader *, + struct dictionary *, struct ccase *); +static bool parse_fixed (const struct data_parser *, struct dfm_reader *, + struct dictionary *, struct ccase *); -/* Reads a case from DFM into C, parsing it with PARSER. Returns - true if successful, false at end of file or on I/O error. +/* Reads a case from DFM into C, which matches dictionary DICT, parsing it with + PARSER. Returns true if successful, false at end of file or on I/O error. Case C must not be shared. */ bool data_parser_parse (struct data_parser *parser, struct dfm_reader *reader, - struct ccase *c) + struct dictionary *dict, struct ccase *c) { bool retval; @@ -390,12 +389,12 @@ data_parser_parse (struct data_parser *parser, struct dfm_reader *reader, if (parser->type == DP_DELIMITED) { if (parser->span) - retval = parse_delimited_span (parser, reader, c); + retval = parse_delimited_span (parser, reader, dict, c); else - retval = parse_delimited_no_span (parser, reader, c); + retval = parse_delimited_no_span (parser, reader, dict, c); } else - retval = parse_fixed (parser, reader, c); + retval = parse_fixed (parser, reader, dict, c); return retval; } @@ -516,15 +515,15 @@ parse_error (const struct dfm_reader *reader, const struct field *field, free (error); } -/* Reads a case from READER into C, parsing it according to - fixed-format syntax rules in PARSER. - Returns true if successful, false at end of file or on I/O error. */ +/* Reads a case from READER into C, which matches DICT, parsing it according to + fixed-format syntax rules in PARSER. Returns true if successful, false at + end of file or on I/O error. */ static bool parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, - struct ccase *c) + struct dictionary *dict, struct ccase *c) { const char *input_encoding = dfm_reader_get_encoding (reader); - const char *output_encoding = dict_get_encoding (parser->dict); + const char *output_encoding = dict_get_encoding (dict); struct field *f; int row; @@ -570,14 +569,15 @@ parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, return true; } -/* Reads a case from READER into C, parsing it according to - free-format syntax rules in PARSER. - Returns true if successful, false at end of file or on I/O error. */ +/* Reads a case from READER into C, which matches dictionary DICT, parsing it + according to free-format syntax rules in PARSER. Returns true if + successful, false at end of file or on I/O error. */ static bool parse_delimited_span (const struct data_parser *parser, - struct dfm_reader *reader, struct ccase *c) + struct dfm_reader *reader, + struct dictionary *dict, struct ccase *c) { - const char *output_encoding = dict_get_encoding (parser->dict); + const char *output_encoding = dict_get_encoding (dict); struct string tmp = DS_EMPTY_INITIALIZER; struct field *f; @@ -615,14 +615,15 @@ parse_delimited_span (const struct data_parser *parser, return true; } -/* Reads a case from READER into C, parsing it according to - delimited syntax rules with one case per record in PARSER. +/* Reads a case from READER into C, which matches dictionary DICT, parsing it + according to delimited syntax rules with one case per record in PARSER. Returns true if successful, false at end of file or on I/O error. */ static bool parse_delimited_no_span (const struct data_parser *parser, - struct dfm_reader *reader, struct ccase *c) + struct dfm_reader *reader, + struct dictionary *dict, struct ccase *c) { - const char *output_encoding = dict_get_encoding (parser->dict); + const char *output_encoding = dict_get_encoding (dict); struct string tmp = DS_EMPTY_INITIALIZER; struct substring s; struct field *f, *end; @@ -769,6 +770,7 @@ data_parser_output_description (struct data_parser *parser, struct data_parser_casereader { struct data_parser *parser; /* Parser. */ + struct dictionary *dict; /* Dictionary. */ struct dfm_reader *reader; /* Data file reader. */ struct caseproto *proto; /* Format of cases. */ }; @@ -783,7 +785,7 @@ static const struct casereader_class data_parser_casereader_class; void data_parser_make_active_file (struct data_parser *parser, struct dataset *ds, struct dfm_reader *reader, - struct dictionary *dict, + struct dictionary *dict, struct casereader* (*func)(struct casereader *, const struct dictionary *, void *), @@ -795,6 +797,7 @@ data_parser_make_active_file (struct data_parser *parser, struct dataset *ds, r = xmalloc (sizeof *r); r->parser = parser; + r->dict = dict_ref (dict); r->reader = reader; r->proto = caseproto_ref (dict_get_proto (dict)); casereader0 = casereader_create_sequential (NULL, r->proto, @@ -816,7 +819,7 @@ data_parser_casereader_read (struct casereader *reader UNUSED, void *r_) { struct data_parser_casereader *r = r_; struct ccase *c = case_create (r->proto); - if (data_parser_parse (r->parser, r->reader, c)) + if (data_parser_parse (r->parser, r->reader, r->dict, c)) return c; else { @@ -833,6 +836,7 @@ data_parser_casereader_destroy (struct casereader *reader, void *r_) casereader_force_error (reader); dfm_close_reader (r->reader); caseproto_unref (r->proto); + dict_unref (r->dict); data_parser_destroy (r->parser); free (r); } diff --git a/src/language/data-io/data-parser.h b/src/language/data-io/data-parser.h index a932c7eb69..caef721d10 100644 --- a/src/language/data-io/data-parser.h +++ b/src/language/data-io/data-parser.h @@ -38,7 +38,7 @@ enum data_parser_type }; /* Creating and configuring any parser. */ -struct data_parser *data_parser_create (struct dictionary *dict); +struct data_parser *data_parser_create (void); void data_parser_destroy (struct data_parser *); enum data_parser_type data_parser_get_type (const struct data_parser *); @@ -73,8 +73,9 @@ void data_parser_add_fixed_field (struct data_parser *, const char *name, int record, int first_column); bool data_parser_any_fields (const struct data_parser *); -bool data_parser_parse (struct data_parser *, - struct dfm_reader *, struct ccase *); +bool data_parser_parse (struct data_parser *, struct dfm_reader *, + struct dictionary *, struct ccase *); + /* Uses for a configured parser. */ void data_parser_output_description (struct data_parser *, diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c index a37e3f0326..0c60bd3d63 100644 --- a/src/language/data-io/get-data.c +++ b/src/language/data-io/get-data.c @@ -383,7 +383,7 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds) if (fh == NULL) goto error; - parser = data_parser_create (dict); + parser = data_parser_create (); has_type = false; data_parser_set_type (parser, DP_DELIMITED); data_parser_set_span (parser, false); -- 2.30.2