X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fget-data.c;h=32202babdea8848f5c3a06b6431b0db3c4b007ff;hb=a5097a183f00ab2d2dc538ba7094a4696e2fea04;hp=36ea2e7a448f802ec00508f13c428320c7ef1227;hpb=b51bc9ca536e41bea78ffdb2d6b9b4935dddf82e;p=pspp-builds.git diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c index 36ea2e7a..32202bab 100644 --- a/src/language/data-io/get-data.c +++ b/src/language/data-io/get-data.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,20 +16,33 @@ #include +#include -#include #include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include #include -#include -#include +#include + +#include "xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) (msgid) static int parse_get_gnm (struct lexer *lexer, struct dataset *); +static int parse_get_txt (struct lexer *lexer, struct dataset *); +static int parse_get_psql (struct lexer *lexer, struct dataset *); int cmd_get_data (struct lexer *lexer, struct dataset *ds) @@ -43,11 +56,87 @@ cmd_get_data (struct lexer *lexer, struct dataset *ds) if (lex_match_id (lexer, "GNM")) return parse_get_gnm (lexer, ds); + else if (lex_match_id (lexer, "TXT")) + return parse_get_txt (lexer, ds); + else if (lex_match_id (lexer, "PSQL")) + return parse_get_psql (lexer, ds); msg (SE, _("Unsupported TYPE %s"), lex_tokid (lexer)); return CMD_FAILURE; } +static int +parse_get_psql (struct lexer *lexer, struct dataset *ds) +{ + struct psql_read_info psql; + psql.allow_clear = false; + psql.conninfo = NULL; + psql.str_width = -1; + psql.bsize = -1; + ds_init_empty (&psql.sql); + + lex_force_match (lexer, '/'); + + if (!lex_force_match_id (lexer, "CONNECT")) + goto error; + + lex_force_match (lexer, '='); + + if (!lex_force_string (lexer)) + goto error; + + psql.conninfo = xstrdup (ds_cstr (lex_tokstr (lexer))); + + lex_get (lexer); + + while (lex_match (lexer, '/') ) + { + if ( lex_match_id (lexer, "ASSUMEDSTRWIDTH")) + { + lex_match (lexer, '='); + psql.str_width = lex_integer (lexer); + lex_get (lexer); + } + else if ( lex_match_id (lexer, "BSIZE")) + { + lex_match (lexer, '='); + psql.bsize = lex_integer (lexer); + lex_get (lexer); + } + else if ( lex_match_id (lexer, "UNENCRYPTED")) + { + psql.allow_clear = true; + } + else if (lex_match_id (lexer, "SQL")) + { + lex_match (lexer, '='); + if ( ! lex_force_string (lexer) ) + goto error; + + ds_put_substring (&psql.sql, lex_tokstr (lexer)->ss); + lex_get (lexer); + } + } + { + struct dictionary *dict = NULL; + struct casereader *reader = psql_open_reader (&psql, &dict); + + if ( reader ) + proc_set_active_file (ds, reader, dict); + } + + ds_destroy (&psql.sql); + free (psql.conninfo); + + return CMD_SUCCESS; + + error: + + ds_destroy (&psql.sql); + free (psql.conninfo); + + return CMD_FAILURE; +} static int parse_get_gnm (struct lexer *lexer, struct dataset *ds) @@ -64,7 +153,7 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds) if (!lex_force_string (lexer)) goto error; - gri.file_name = strdup (ds_cstr (lex_tokstr (lexer))); + gri.file_name = xstrdup (ds_cstr (lex_tokstr (lexer))); lex_get (lexer); @@ -83,7 +172,7 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds) if ( ! lex_force_string (lexer) ) goto error; - gri.sheet_name = strdup (ds_cstr (lex_tokstr (lexer))); + gri.sheet_name = xstrdup (ds_cstr (lex_tokstr (lexer))); gri.sheet_index = -1; } else if (lex_match_id (lexer, "INDEX")) @@ -107,7 +196,7 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds) if ( ! lex_force_string (lexer) ) goto error; - gri.cell_range = strdup (ds_cstr (lex_tokstr (lexer))); + gri.cell_range = xstrdup (ds_cstr (lex_tokstr (lexer))); } else goto error; @@ -156,3 +245,314 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds) free (gri.cell_range); return CMD_FAILURE; } + +static bool +set_type (struct data_parser *parser, const char *subcommand, + enum data_parser_type type, bool *has_type) +{ + if (!*has_type) + { + data_parser_set_type (parser, type); + *has_type = true; + } + else if (type != data_parser_get_type (parser)) + { + msg (SE, _("%s is allowed only with %s arrangement, but %s arrangement " + "was stated or implied earlier in this command."), + subcommand, + type == DP_FIXED ? "FIXED" : "DELIMITED", + type == DP_FIXED ? "DELIMITED" : "FIXED"); + return false; + } + return true; +} + +static int +parse_get_txt (struct lexer *lexer, struct dataset *ds) +{ + struct data_parser *parser = NULL; + struct dictionary *dict = dict_create (); + struct file_handle *fh = NULL; + struct dfm_reader *reader = NULL; + + int record; + enum data_parser_type type; + bool has_type; + + lex_force_match (lexer, '/'); + + if (!lex_force_match_id (lexer, "FILE")) + goto error; + lex_force_match (lexer, '='); + fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE); + if (fh == NULL) + goto error; + + parser = data_parser_create (dict); + has_type = false; + data_parser_set_type (parser, DP_DELIMITED); + data_parser_set_span (parser, false); + data_parser_set_quotes (parser, ss_empty ()); + data_parser_set_empty_line_has_field (parser, true); + + for (;;) + { + if (!lex_force_match (lexer, '/')) + goto error; + + if (lex_match_id (lexer, "ARRANGEMENT")) + { + bool ok; + + lex_match (lexer, '='); + if (lex_match_id (lexer, "FIXED")) + ok = set_type (parser, "ARRANGEMENT=FIXED", DP_FIXED, &has_type); + else if (lex_match_id (lexer, "DELIMITED")) + ok = set_type (parser, "ARRANGEMENT=DELIMITED", + DP_DELIMITED, &has_type); + else + { + lex_error (lexer, _("expecting FIXED or DELIMITED")); + goto error; + } + if (!ok) + goto error; + } + else if (lex_match_id (lexer, "FIRSTCASE")) + { + lex_match (lexer, '='); + if (!lex_force_int (lexer)) + goto error; + if (lex_integer (lexer) < 1) + { + msg (SE, _("Value of FIRSTCASE must be 1 or greater.")); + goto error; + } + data_parser_set_skip (parser, lex_integer (lexer) - 1); + lex_get (lexer); + } + else if (lex_match_id_n (lexer, "DELCASE", 4)) + { + if (!set_type (parser, "DELCASE", DP_DELIMITED, &has_type)) + goto error; + lex_match (lexer, '='); + if (lex_match_id (lexer, "LINE")) + data_parser_set_span (parser, false); + else if (lex_match_id (lexer, "VARIABLES")) + { + data_parser_set_span (parser, true); + + /* VARIABLES takes an integer argument, but for no + good reason. We just ignore it. */ + if (!lex_force_int (lexer)) + goto error; + lex_get (lexer); + } + else + { + lex_error (lexer, _("expecting LINE or VARIABLES")); + goto error; + } + } + else if (lex_match_id (lexer, "FIXCASE")) + { + if (!set_type (parser, "FIXCASE", DP_FIXED, &has_type)) + goto error; + lex_match (lexer, '='); + if (!lex_force_int (lexer)) + goto error; + if (lex_integer (lexer) < 1) + { + msg (SE, _("Value of FIXCASE must be at least 1.")); + goto error; + } + data_parser_set_records (parser, lex_integer (lexer)); + lex_get (lexer); + } + else if (lex_match_id (lexer, "IMPORTCASES")) + { + lex_match (lexer, '='); + if (lex_match (lexer, T_ALL)) + { + data_parser_set_case_limit (parser, -1); + data_parser_set_case_percent (parser, 100); + } + else if (lex_match_id (lexer, "FIRST")) + { + if (!lex_force_int (lexer)) + goto error; + if (lex_integer (lexer) < 1) + { + msg (SE, _("Value of FIRST must be at least 1.")); + goto error; + } + data_parser_set_case_limit (parser, lex_integer (lexer)); + lex_get (lexer); + } + else if (lex_match_id (lexer, "PERCENT")) + { + if (!lex_force_int (lexer)) + goto error; + if (lex_integer (lexer) < 1 || lex_integer (lexer) > 100) + { + msg (SE, _("Value of PERCENT must be between 1 and 100.")); + goto error; + } + data_parser_set_case_percent (parser, lex_integer (lexer)); + lex_get (lexer); + } + } + else if (lex_match_id_n (lexer, "DELIMITERS", 4)) + { + struct string hard_seps = DS_EMPTY_INITIALIZER; + const char *soft_seps = ""; + struct substring s; + int c; + + if (!set_type (parser, "DELIMITERS", DP_DELIMITED, &has_type)) + goto error; + lex_match (lexer, '='); + + if (!lex_force_string (lexer)) + goto error; + + s = ds_ss (lex_tokstr (lexer)); + if (ss_match_string (&s, ss_cstr ("\\t"))) + ds_put_cstr (&hard_seps, "\t"); + if (ss_match_string (&s, ss_cstr ("\\\\"))) + ds_put_cstr (&hard_seps, "\\"); + while ((c = ss_get_char (&s)) != EOF) + if (c == ' ') + soft_seps = " "; + else + ds_put_char (&hard_seps, c); + data_parser_set_soft_delimiters (parser, ss_cstr (soft_seps)); + data_parser_set_hard_delimiters (parser, ds_ss (&hard_seps)); + ds_destroy (&hard_seps); + + lex_get (lexer); + } + else if (lex_match_id (lexer, "QUALIFIERS")) + { + if (!set_type (parser, "QUALIFIERS", DP_DELIMITED, &has_type)) + goto error; + lex_match (lexer, '='); + + if (!lex_force_string (lexer)) + goto error; + + if (settings_get_syntax () == COMPATIBLE + && ds_length (lex_tokstr (lexer)) != 1) + { + msg (SE, _("In compatible syntax mode, the QUALIFIER string " + "must contain exactly one character.")); + goto error; + } + + data_parser_set_quotes (parser, ds_ss (lex_tokstr (lexer))); + lex_get (lexer); + } + else if (settings_get_syntax () == ENHANCED + && lex_match_id (lexer, "ESCAPE")) + data_parser_set_quote_escape (parser, true); + else if (lex_match_id (lexer, "VARIABLES")) + break; + else + { + lex_error (lexer, _("expecting VARIABLES")); + goto error; + } + } + lex_match (lexer, '='); + + + record = 1; + type = data_parser_get_type (parser); + do + { + char name[VAR_NAME_LEN + 1]; + struct fmt_spec input, output; + int fc, lc; + struct variable *v; + + while (type == DP_FIXED && lex_match (lexer, '/')) + { + if (!lex_force_int (lexer)) + goto error; + if (lex_integer (lexer) < record) + { + msg (SE, _("The record number specified, %ld, is at or " + "before the previous record, %d. Data " + "fields must be listed in order of " + "increasing record number."), + lex_integer (lexer), record); + goto error; + } + if (lex_integer (lexer) > data_parser_get_records (parser)) + { + msg (SE, _("The record number specified, %ld, exceeds " + "the number of records per case specified " + "on FIXCASE, %d."), + lex_integer (lexer), data_parser_get_records (parser)); + goto error; + } + record = lex_integer (lexer); + lex_get (lexer); + } + + if (!lex_force_id (lexer)) + goto error; + strcpy (name, lex_tokid (lexer)); + lex_get (lexer); + + if (type == DP_DELIMITED) + { + if (!parse_format_specifier (lexer, &input) + || !fmt_check_input (&input)) + goto error; + } + else + { + if (!parse_column_range (lexer, 0, &fc, &lc, NULL)) + goto error; + if (!parse_format_specifier_name (lexer, &input.type)) + goto error; + input.w = lc - fc + 1; + input.d = 0; + if (!fmt_check_input (&input)) + goto error; + } + output = fmt_for_output_from_input (&input); + + v = dict_create_var (dict, name, fmt_var_width (&input)); + if (v == NULL) + { + msg (SE, _("%s is a duplicate variable name."), name); + goto error; + } + var_set_both_formats (v, &output); + + if (type == DP_DELIMITED) + data_parser_add_delimited_field (parser, &input, + var_get_case_index (v), + name); + else + data_parser_add_fixed_field (parser, &input, var_get_case_index (v), + name, record, fc); + } + while (lex_token (lexer) != '.'); + + reader = dfm_open_reader (fh, lexer); + if (reader == NULL) + goto error; + + data_parser_make_active_file (parser, ds, reader, dict); + fh_unref (fh); + return CMD_SUCCESS; + + error: + data_parser_destroy (parser); + dict_destroy (dict); + fh_unref (fh); + return CMD_CASCADING_FAILURE; +}