X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fcombine-files.c;h=a661d7c6cde3d55afabd245f582b643b479ba303;hb=2be9bee9da6a2ce27715e58128569594319abfa2;hp=ccbe7679d15a6331c93c7f4054ac168cae9f3c00;hpb=a992bf1121d3e1eef76fd6184b95fe079bb91558;p=pspp-builds.git diff --git a/src/language/data-io/combine-files.c b/src/language/data-io/combine-files.c index ccbe7679..a661d7c6 100644 --- a/src/language/data-io/combine-files.c +++ b/src/language/data-io/combine-files.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,28 +18,29 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xalloc.h" +#include "data/any-reader.h" +#include "data/case-matcher.h" +#include "data/case.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/subcase.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/data-io/file-handle.h" +#include "language/data-io/trim.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" +#include "language/stats/sort-criteria.h" +#include "libpspp/assertion.h" +#include "libpspp/message.h" +#include "libpspp/string-array.h" +#include "libpspp/taint.h" +#include "math/sort.h" + +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -78,7 +79,7 @@ struct comb_file bool is_sorted; /* Is file presorted on the BY variables? */ /* IN subcommand. */ - char in_name[VAR_NAME_LEN + 1]; + char *in_name; struct variable *in_var; }; @@ -147,8 +148,8 @@ combine_files (enum comb_command_type command, bool saw_sort = false; struct casereader *active_file = NULL; - char first_name[VAR_NAME_LEN + 1] = ""; - char last_name[VAR_NAME_LEN + 1] = ""; + char *first_name = NULL; + char *last_name = NULL; struct taint *taint = NULL; @@ -170,7 +171,7 @@ combine_files (enum comb_command_type command, dict_set_case_limit (proc.dict, dict_get_case_limit (dataset_dict (ds))); - lex_match (lexer, '/'); + lex_match (lexer, T_SLASH); for (;;) { struct comb_file *file; @@ -185,7 +186,7 @@ combine_files (enum comb_command_type command, } else break; - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (proc.n_files >= allocated_files) proc.files = x2nrealloc (proc.files, &allocated_files, @@ -200,10 +201,10 @@ combine_files (enum comb_command_type command, file->reader = NULL; file->data = NULL; file->is_sorted = true; - file->in_name[0] = '\0'; + file->in_name = NULL; file->in_var = NULL; - if (lex_match (lexer, '*')) + if (lex_match (lexer, T_ASTERISK)) { if (!proc_has_active_file (ds)) { @@ -230,7 +231,7 @@ combine_files (enum comb_command_type command, goto error; } - while (lex_match (lexer, '/')) + while (lex_match (lexer, T_SLASH)) if (lex_match_id (lexer, "RENAME")) { if (!parse_dict_rename (lexer, file->dict)) @@ -238,20 +239,20 @@ combine_files (enum comb_command_type command, } else if (lex_match_id (lexer, "IN")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (lex_token (lexer) != T_ID) { lex_error (lexer, NULL); goto error; } - if (file->in_name[0]) + if (file->in_name) { msg (SE, _("Multiple IN subcommands for a single FILE or " "TABLE.")); goto error; } - strcpy (file->in_name, lex_tokid (lexer)); + file->in_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (lex_match_id (lexer, "SORT")) @@ -263,7 +264,7 @@ combine_files (enum comb_command_type command, merge_dictionary (proc.dict, file); } - while (lex_token (lexer) != '.') + while (lex_token (lexer) != T_ENDCMD) { if (lex_match (lexer, T_BY)) { @@ -278,7 +279,7 @@ combine_files (enum comb_command_type command, } saw_by = true; - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars, &by_vars, NULL)) goto error; @@ -289,7 +290,7 @@ combine_files (enum comb_command_type command, struct comb_file *file = &proc.files[i]; size_t j; - for (j = 0; j < subcase_get_n_values (&proc.by_vars); j++) + for (j = 0; j < subcase_get_n_fields (&proc.by_vars); j++) { const char *name = var_get_name (by_vars[j]); struct variable *var = dict_lookup_var (file->dict, name); @@ -316,30 +317,30 @@ combine_files (enum comb_command_type command, } else if (command != COMB_UPDATE && lex_match_id (lexer, "FIRST")) { - if (first_name[0] != '\0') + if (first_name != NULL) { lex_sbc_only_once ("FIRST"); goto error; } - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (!lex_force_id (lexer)) goto error; - strcpy (first_name, lex_tokid (lexer)); + first_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST")) { - if (last_name[0] != '\0') + if (last_name != NULL) { lex_sbc_only_once ("LAST"); goto error; } - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (!lex_force_id (lexer)) goto error; - strcpy (last_name, lex_tokid (lexer)); + last_name = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); } else if (lex_match_id (lexer, "MAP")) @@ -362,7 +363,7 @@ combine_files (enum comb_command_type command, goto error; } - if (!lex_match (lexer, '/') && lex_token (lexer) != '.') + if (!lex_match (lexer, T_SLASH) && lex_token (lexer) != T_ENDCMD) { lex_end_of_command (lexer); goto error; @@ -378,12 +379,12 @@ combine_files (enum comb_command_type command, } if (n_tables) { - msg (SE, _("BY is required when TABLE is specified.")); + msg (SE, _("BY is required when %s is specified."), "TABLE"); goto error; } if (saw_sort) { - msg (SE, _("BY is required when SORT is specified.")); + msg (SE, _("BY is required when %s is specified."), "SORT"); goto error; } } @@ -423,7 +424,7 @@ combine_files (enum comb_command_type command, } } - proc.output = autopaging_writer_create (dict_get_next_value_idx (proc.dict)); + proc.output = autopaging_writer_create (dict_get_proto (proc.dict)); taint = taint_clone (casewriter_get_taint (proc.output)); /* Set up case matcher. */ @@ -471,6 +472,9 @@ combine_files (enum comb_command_type command, free_comb_proc (&proc); + free (first_name); + free (last_name); + return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE; error: @@ -478,6 +482,8 @@ combine_files (enum comb_command_type command, proc_commit (ds); free_comb_proc (&proc); taint_destroy (taint); + free (first_name); + free (last_name); return CMD_CASCADING_FAILURE; } @@ -486,23 +492,53 @@ static bool merge_dictionary (struct dictionary *const m, struct comb_file *f) { struct dictionary *d = f->dict; - const char *d_docs, *m_docs; + const struct string_array *d_docs, *m_docs; int i; + const char *file_encoding; if (dict_get_label (m) == NULL) dict_set_label (m, dict_get_label (d)); d_docs = dict_get_documents (d); m_docs = dict_get_documents (m); + + + /* FIXME: If the input files have different encodings, then + the result is undefined. + The correct thing to do would be to convert to an encoding + which can cope with all the input files (eg UTF-8). + */ + file_encoding = dict_get_encoding (f->dict); + if ( file_encoding != NULL) + { + if ( dict_get_encoding (m) == NULL) + dict_set_encoding (m, file_encoding); + else if ( 0 != strcmp (file_encoding, dict_get_encoding (m))) + { + msg (MW, + _("Combining files with incompatible encodings. String data may not be represented correctly.")); + } + } + if (d_docs != NULL) { if (m_docs == NULL) dict_set_documents (m, d_docs); else { - char *new_docs = xasprintf ("%s%s", m_docs, d_docs); - dict_set_documents (m, new_docs); - free (new_docs); + struct string_array new_docs; + size_t i; + + new_docs.n = m_docs->n + d_docs->n; + new_docs.strings = xmalloc (new_docs.n * sizeof *new_docs.strings); + for (i = 0; i < m_docs->n; i++) + new_docs.strings[i] = m_docs->strings[i]; + for (i = 0; i < d_docs->n; i++) + new_docs.strings[m_docs->n + i] = d_docs->strings[i]; + + dict_set_documents (m, &new_docs); + + free (new_docs.strings); } } @@ -542,7 +578,7 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) ds_put_format (&s, _("In an earlier file, %s was a string " "variable with width %d."), var_name, var_get_width (mv)); - msg (SE, ds_cstr (&s)); + msg (SE, "%s", ds_cstr (&s)); ds_destroy (&s); return false; } @@ -552,27 +588,28 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) if (var_has_missing_values (dv) && !var_has_missing_values (mv)) var_set_missing_values (mv, var_get_missing_values (dv)); if (var_get_label (dv) && !var_get_label (mv)) - var_set_label (mv, var_get_label (dv)); + var_set_label (mv, var_get_label (dv), file_encoding, false); } else - mv = dict_clone_var_assert (m, dv, var_get_name (dv)); + mv = dict_clone_var_assert (m, dv); } return true; } -/* If VAR_NAME is a non-empty string, attempts to create a +/* If VAR_NAME is non-NULL, attempts to create a variable named VAR_NAME, with format F1.0, in DICT, and stores a pointer to the variable in *VAR. Returns true if successful, false if the variable name is a duplicate (in which case a message saying that the variable specified on the - given SUBCOMMAND is a duplicate is emitted). Also returns - true, without doing anything, if VAR_NAME is null or empty. */ + given SUBCOMMAND is a duplicate is emitted). + + Does nothing and returns true if VAR_NAME is null. */ static bool create_flag_var (const char *subcommand, const char *var_name, struct dictionary *dict, struct variable **var) { - if (var_name[0] != '\0') + if (var_name != NULL) { struct fmt_spec format = fmt_for_output (FMT_F, 1, 0); *var = dict_create_var (dict, var_name, 0); @@ -606,6 +643,7 @@ close_all_comb_files (struct comb_proc *proc) dict_destroy (file->dict); casereader_destroy (file->reader); case_unref (file->data); + free (file->in_name); } free (proc->files); proc->files = NULL; @@ -620,9 +658,14 @@ free_comb_proc (struct comb_proc *proc) dict_destroy (proc->dict); casewriter_destroy (proc->output); case_matcher_destroy (proc->matcher); + if (proc->prev_BY) + { + caseproto_destroy_values (subcase_get_proto (&proc->by_vars), + proc->prev_BY); + free (proc->prev_BY); + } subcase_destroy (&proc->by_vars); case_unref (proc->buffered_case); - free (proc->prev_BY); } static bool scan_table (struct comb_file *, union value by[]); @@ -769,7 +812,7 @@ create_output_case (const struct comb_proc *proc) struct ccase *output; size_t i; - output = case_create (dict_get_next_value_idx (proc->dict)); + output = case_create (dict_get_proto (proc->dict)); for (i = 0; i < n_vars; i++) { struct variable *v = dict_get_var (proc->dict, i); @@ -841,11 +884,15 @@ output_case (struct comb_proc *proc, struct ccase *output, union value by[]) if (new_BY) { - size_t n = (subcase_get_n_values (&proc->by_vars) - * sizeof (union value)); + size_t n_values = subcase_get_n_fields (&proc->by_vars); + const struct caseproto *proto = subcase_get_proto (&proc->by_vars); if (proc->prev_BY == NULL) - proc->prev_BY = xmalloc (n); - memcpy (proc->prev_BY, by, n); + { + proc->prev_BY = xmalloc (n_values * sizeof *proc->prev_BY); + caseproto_init_values (proto, proc->prev_BY); + } + caseproto_copy (subcase_get_proto (&proc->by_vars), 0, n_values, + proc->prev_BY, by); } } }