X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fcombine-files.c;h=21736da8c645239dec3dadcde4cae59765d0f712;hb=f481fd69631024bcdc7dc2369bbc1592d7a43ac7;hp=aa6d3b98ec319b60f1efd31b3fcf299b7ed87654;hpb=55e6e7ba37a30570f5a31e2d78c22dfa7b61a36f;p=pspp diff --git a/src/language/data-io/combine-files.c b/src/language/data-io/combine-files.c index aa6d3b98ec..21736da8c6 100644 --- a/src/language/data-io/combine-files.c +++ b/src/language/data-io/combine-files.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,28 +18,30 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xalloc.h" +#include "data/any-reader.h" +#include "data/case-matcher.h" +#include "data/case.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/subcase.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/data-io/file-handle.h" +#include "language/data-io/trim.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" +#include "language/stats/sort-criteria.h" +#include "libpspp/assertion.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/string-array.h" +#include "libpspp/taint.h" +#include "math/sort.h" + +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -78,7 +80,7 @@ struct comb_file bool is_sorted; /* Is file presorted on the BY variables? */ /* IN subcommand. */ - char in_name[VAR_NAME_LEN + 1]; + char *in_name; struct variable *in_var; }; @@ -147,8 +149,8 @@ combine_files (enum comb_command_type command, bool saw_sort = false; struct casereader *active_file = NULL; - char first_name[VAR_NAME_LEN + 1] = ""; - char last_name[VAR_NAME_LEN + 1] = ""; + char *first_name = NULL; + char *last_name = NULL; struct taint *taint = NULL; @@ -159,7 +161,7 @@ combine_files (enum comb_command_type command, proc.files = NULL; proc.n_files = 0; - proc.dict = dict_create (); + proc.dict = dict_create (get_default_encoding ()); proc.output = NULL; proc.matcher = NULL; subcase_init_empty (&proc.by_vars); @@ -200,32 +202,32 @@ combine_files (enum comb_command_type command, file->reader = NULL; file->data = NULL; file->is_sorted = true; - file->in_name[0] = '\0'; + file->in_name = NULL; file->in_var = NULL; if (lex_match (lexer, T_ASTERISK)) { - if (!proc_has_active_file (ds)) + if (!dataset_has_source (ds)) { - msg (SE, _("Cannot specify the active file since no active " - "file has been defined.")); + msg (SE, _("Cannot specify the active dataset since none " + "has been defined.")); goto error; } if (proc_make_temporary_transformations_permanent (ds)) msg (SE, _("This command may not be used after TEMPORARY when " - "the active file is an input source. " + "the active dataset is an input source. " "Temporary transformations will be made permanent.")); file->dict = dict_clone (dataset_dict (ds)); } else { - file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH); + file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds)); if (file->handle == NULL) goto error; - file->reader = any_reader_open (file->handle, &file->dict); + file->reader = any_reader_open (file->handle, NULL, &file->dict); if (file->reader == NULL) goto error; } @@ -245,13 +247,13 @@ combine_files (enum comb_command_type command, goto error; } - if (file->in_name[0]) + if (file->in_name) { msg (SE, _("Multiple IN subcommands for a single FILE or " "TABLE.")); goto error; } - strcpy (file->in_name, lex_tokcstr (lexer)); + file->in_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (lex_match_id (lexer, "SORT")) @@ -302,7 +304,8 @@ combine_files (enum comb_command_type command, msg (SE, _("File %s lacks BY variable %s."), fh_get_name (file->handle), name); else - msg (SE, _("Active file lacks BY variable %s."), name); + msg (SE, _("Active dataset lacks BY variable %s."), + name); ok = false; } } @@ -316,7 +319,7 @@ combine_files (enum comb_command_type command, } else if (command != COMB_UPDATE && lex_match_id (lexer, "FIRST")) { - if (first_name[0] != '\0') + if (first_name != NULL) { lex_sbc_only_once ("FIRST"); goto error; @@ -325,12 +328,12 @@ combine_files (enum comb_command_type command, lex_match (lexer, T_EQUALS); if (!lex_force_id (lexer)) goto error; - strcpy (first_name, lex_tokcstr (lexer)); + first_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST")) { - if (last_name[0] != '\0') + if (last_name != NULL) { lex_sbc_only_once ("LAST"); goto error; @@ -339,7 +342,7 @@ combine_files (enum comb_command_type command, lex_match (lexer, T_EQUALS); if (!lex_force_id (lexer)) goto error; - strcpy (last_name, lex_tokcstr (lexer)); + last_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (lex_match_id (lexer, "MAP")) @@ -373,7 +376,7 @@ combine_files (enum comb_command_type command, { if (command == COMB_UPDATE) { - msg (SE, _("The BY subcommand is required.")); + lex_sbc_missing ("BY"); goto error; } if (n_tables) @@ -465,12 +468,16 @@ combine_files (enum comb_command_type command, if (active_file != NULL) proc_commit (ds); - proc_set_active_file (ds, casewriter_make_reader (proc.output), proc.dict); + dataset_set_dict (ds, proc.dict); + dataset_set_source (ds, casewriter_make_reader (proc.output)); proc.dict = NULL; proc.output = NULL; free_comb_proc (&proc); + free (first_name); + free (last_name); + return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE; error: @@ -478,6 +485,8 @@ combine_files (enum comb_command_type command, proc_commit (ds); free_comb_proc (&proc); taint_destroy (taint); + free (first_name); + free (last_name); return CMD_CASCADING_FAILURE; } @@ -486,9 +495,8 @@ static bool merge_dictionary (struct dictionary *const m, struct comb_file *f) { struct dictionary *d = f->dict; - const char *d_docs, *m_docs; + const struct string_array *d_docs, *m_docs; int i; - const char *file_encoding; if (dict_get_label (m) == NULL) dict_set_label (m, dict_get_label (d)); @@ -502,17 +510,9 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) The correct thing to do would be to convert to an encoding which can cope with all the input files (eg UTF-8). */ - file_encoding = dict_get_encoding (f->dict); - if ( file_encoding != NULL) - { - if ( dict_get_encoding (m) == NULL) - dict_set_encoding (m, file_encoding); - else if ( 0 != strcmp (file_encoding, dict_get_encoding (m))) - { - msg (MW, - _("Combining files with incompatible encodings. String data may not be represented correctly.")); - } - } + if ( 0 != strcmp (dict_get_encoding (f->dict), dict_get_encoding (m))) + msg (MW, _("Combining files with incompatible encodings. String data may " + "not be represented correctly.")); if (d_docs != NULL) { @@ -520,9 +520,19 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) dict_set_documents (m, d_docs); else { - char *new_docs = xasprintf ("%s%s", m_docs, d_docs); - dict_set_documents (m, new_docs); - free (new_docs); + struct string_array new_docs; + size_t i; + + new_docs.n = m_docs->n + d_docs->n; + new_docs.strings = xmalloc (new_docs.n * sizeof *new_docs.strings); + for (i = 0; i < m_docs->n; i++) + new_docs.strings[i] = m_docs->strings[i]; + for (i = 0; i < d_docs->n; i++) + new_docs.strings[m_docs->n + i] = d_docs->strings[i]; + + dict_set_documents (m, &new_docs); + + free (new_docs.strings); } } @@ -572,7 +582,7 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) if (var_has_missing_values (dv) && !var_has_missing_values (mv)) var_set_missing_values (mv, var_get_missing_values (dv)); if (var_get_label (dv) && !var_get_label (mv)) - var_set_label (mv, var_get_label (dv)); + var_set_label (mv, var_get_label (dv), false); } else mv = dict_clone_var_assert (m, dv); @@ -581,18 +591,19 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) return true; } -/* If VAR_NAME is a non-empty string, attempts to create a +/* If VAR_NAME is non-NULL, attempts to create a variable named VAR_NAME, with format F1.0, in DICT, and stores a pointer to the variable in *VAR. Returns true if successful, false if the variable name is a duplicate (in which case a message saying that the variable specified on the - given SUBCOMMAND is a duplicate is emitted). Also returns - true, without doing anything, if VAR_NAME is null or empty. */ + given SUBCOMMAND is a duplicate is emitted). + + Does nothing and returns true if VAR_NAME is null. */ static bool create_flag_var (const char *subcommand, const char *var_name, struct dictionary *dict, struct variable **var) { - if (var_name[0] != '\0') + if (var_name != NULL) { struct fmt_spec format = fmt_for_output (FMT_F, 1, 0); *var = dict_create_var (dict, var_name, 0); @@ -626,6 +637,7 @@ close_all_comb_files (struct comb_proc *proc) dict_destroy (file->dict); casereader_destroy (file->reader); case_unref (file->data); + free (file->in_name); } free (proc->files); proc->files = NULL;