X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fcombine-files.c;h=6d9ed5d43b0d628ca41390121837eaf9c2a38f7a;hb=4b2eb0d1ce8df60aa43e2fe37628ae00b80d1115;hp=5f82d1512be3a821dd01783f73bdc2049c0f93a6;hpb=fd2104e10011b87d6558e8623d629da4cee82b25;p=pspp diff --git a/src/language/data-io/combine-files.c b/src/language/data-io/combine-files.c index 5f82d1512b..6d9ed5d43b 100644 --- a/src/language/data-io/combine-files.c +++ b/src/language/data-io/combine-files.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -69,6 +69,7 @@ struct comb_file /* Variables. */ struct subcase by_vars; /* BY variables in this input file. */ struct subcase src, dst; /* Data to copy to output; where to put it. */ + const struct missing_values **mv; /* Each variable's missing values. */ /* Input files. */ struct file_handle *handle; /* Input file handle. */ @@ -197,6 +198,7 @@ combine_files (enum comb_command_type command, subcase_init_empty (&file->by_vars); subcase_init_empty (&file->src); subcase_init_empty (&file->dst); + file->mv = NULL; file->handle = NULL; file->dict = NULL; file->reader = NULL; @@ -223,11 +225,12 @@ combine_files (enum comb_command_type command, } else { - file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH); + file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds)); if (file->handle == NULL) goto error; - file->reader = any_reader_open (file->handle, &file->dict); + file->reader = any_reader_open_and_decode (file->handle, NULL, + &file->dict, NULL); if (file->reader == NULL) goto error; } @@ -262,7 +265,8 @@ combine_files (enum comb_command_type command, saw_sort = true; } - merge_dictionary (proc.dict, file); + if (!merge_dictionary (proc.dict, file)) + goto error; } while (lex_token (lexer) != T_ENDCMD) @@ -376,7 +380,7 @@ combine_files (enum comb_command_type command, { if (command == COMB_UPDATE) { - msg (SE, _("The BY subcommand is required.")); + lex_sbc_missing ("BY"); goto error; } if (n_tables) @@ -413,6 +417,7 @@ combine_files (enum comb_command_type command, size_t src_var_cnt = dict_get_var_cnt (file->dict); size_t j; + file->mv = xnmalloc (src_var_cnt, sizeof *file->mv); for (j = 0; j < src_var_cnt; j++) { struct variable *src_var = dict_get_var (file->dict, j); @@ -420,6 +425,8 @@ combine_files (enum comb_command_type command, var_get_name (src_var)); if (dst_var != NULL) { + size_t n = subcase_get_n_fields (&file->src); + file->mv[n] = var_get_missing_values (src_var); subcase_add_var (&file->src, src_var, SC_ASCEND); subcase_add_var (&file->dst, dst_var, SC_ASCEND); } @@ -439,7 +446,7 @@ combine_files (enum comb_command_type command, if (active_file == NULL) { proc_discard_output (ds); - file->reader = active_file = proc_open (ds); + file->reader = active_file = proc_open_filtering (ds, false); } else file->reader = casereader_clone (active_file); @@ -549,8 +556,10 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) if (var_get_width (mv) != var_get_width (dv)) { const char *var_name = var_get_name (dv); - const char *file_name = fh_get_name (f->handle); struct string s = DS_EMPTY_INITIALIZER; + const char *file_name; + + file_name = f->handle ? fh_get_name (f->handle) : "*"; ds_put_format (&s, _("Variable %s in file %s has different " "type or width from the same variable in " @@ -582,7 +591,7 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) if (var_has_missing_values (dv) && !var_has_missing_values (mv)) var_set_missing_values (mv, var_get_missing_values (dv)); if (var_get_label (dv) && !var_get_label (mv)) - var_set_label (mv, var_get_label (dv), false); + var_set_label (mv, var_get_label (dv)); } else mv = dict_clone_var_assert (m, dv); @@ -633,6 +642,7 @@ close_all_comb_files (struct comb_proc *proc) subcase_destroy (&file->by_vars); subcase_destroy (&file->src); subcase_destroy (&file->dst); + free (file->mv); fh_unref (file->handle); dict_destroy (file->dict); casereader_destroy (file->reader); @@ -665,8 +675,8 @@ free_comb_proc (struct comb_proc *proc) static bool scan_table (struct comb_file *, union value by[]); static struct ccase *create_output_case (const struct comb_proc *); static void apply_case (const struct comb_file *, struct ccase *); -static void apply_file_case_and_advance (struct comb_file *, struct ccase *, - union value by[]); +static void apply_nonmissing_case (const struct comb_file *, struct ccase *); +static void advance_file (struct comb_file *, union value by[]); static void output_case (struct comb_proc *, struct ccase *, union value by[]); static void output_buffered_case (struct comb_proc *); @@ -686,7 +696,8 @@ execute_add_files (struct comb_proc *proc) while (file->is_minimal) { struct ccase *output = create_output_case (proc); - apply_file_case_and_advance (file, output, by); + apply_case (file, output); + advance_file (file, by); output_case (proc, output, by); } } @@ -712,7 +723,10 @@ execute_match_files (struct comb_proc *proc) if (file->type == COMB_FILE) { if (file->is_minimal) - apply_file_case_and_advance (file, output, NULL); + { + apply_case (file, output); + advance_file (file, NULL); + } } else { @@ -743,7 +757,8 @@ execute_update (struct comb_proc *proc) for (first = &proc->files[0]; ; first++) if (first->is_minimal) break; - apply_file_case_and_advance (first, output, by); + apply_case (first, output); + advance_file (first, by); /* Read additional cases and update the output case from them. (Don't update the output case from any duplicate @@ -752,7 +767,10 @@ execute_update (struct comb_proc *proc) file < &proc->files[proc->n_files]; file++) { while (file->is_minimal) - apply_file_case_and_advance (file, output, by); + { + apply_nonmissing_case (file, output); + advance_file (file, by); + } } casewriter_write (proc->output, output); @@ -764,7 +782,8 @@ execute_update (struct comb_proc *proc) while (first->is_minimal) { output = create_output_case (proc); - apply_file_case_and_advance (first, output, by); + apply_case (first, output); + advance_file (first, by); casewriter_write (proc->output, output); } } @@ -821,25 +840,53 @@ create_output_case (const struct comb_proc *proc) return output; } +static void +mark_file_used (const struct comb_file *file, struct ccase *output) +{ + if (file->in_var != NULL) + case_data_rw (output, file->in_var)->f = true; +} + /* Copies the data from FILE's case into output case OUTPUT. If FILE has an IN variable, then it is set to 1 in OUTPUT. */ static void apply_case (const struct comb_file *file, struct ccase *output) { subcase_copy (&file->src, file->data, &file->dst, output); - if (file->in_var != NULL) - case_data_rw (output, file->in_var)->f = true; + mark_file_used (file, output); +} + +/* Copies the data from FILE's case into output case OUTPUT, + skipping values that are missing or all spaces. + + If FILE has an IN variable, then it is set to 1 in OUTPUT. */ +static void +apply_nonmissing_case (const struct comb_file *file, struct ccase *output) +{ + size_t i; + + for (i = 0; i < subcase_get_n_fields (&file->src); i++) + { + const struct subcase_field *src_field = &file->src.fields[i]; + const struct subcase_field *dst_field = &file->dst.fields[i]; + const union value *src_value + = case_data_idx (file->data, src_field->case_index); + int width = src_field->width; + + if (!mv_is_value_missing (file->mv[i], src_value, MV_ANY) + && !(width > 0 && value_is_spaces (src_value, width))) + value_copy (case_data_rw_idx (output, dst_field->case_index), + src_value, width); + } + mark_file_used (file, output); } -/* Like apply_case() above, but also advances FILE to its next - case. Also, if BY is nonnull, then FILE's is_minimal member - is updated based on whether the new case's BY values still - match those in BY. */ +/* Advances FILE to its next case. If BY is nonnull, then FILE's is_minimal + member is updated based on whether the new case's BY values still match + those in BY. */ static void -apply_file_case_and_advance (struct comb_file *file, struct ccase *output, - union value by[]) +advance_file (struct comb_file *file, union value by[]) { - apply_case (file, output); case_unref (file->data); file->data = casereader_read (file->reader); if (by)