X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fcombine-files.c;h=6eb9a3181a5f741281f480e9254baf4e48111130;hb=5f9212b9af772575bd3026ca9643684ce6493b3c;hp=21736da8c645239dec3dadcde4cae59765d0f712;hpb=21559edd9991628d96df331e5b391ca6bade3497;p=pspp diff --git a/src/language/data-io/combine-files.c b/src/language/data-io/combine-files.c index 21736da8c6..6eb9a3181a 100644 --- a/src/language/data-io/combine-files.c +++ b/src/language/data-io/combine-files.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -69,6 +69,7 @@ struct comb_file /* Variables. */ struct subcase by_vars; /* BY variables in this input file. */ struct subcase src, dst; /* Data to copy to output; where to put it. */ + const struct missing_values **mv; /* Each variable's missing values. */ /* Input files. */ struct file_handle *handle; /* Input file handle. */ @@ -197,6 +198,7 @@ combine_files (enum comb_command_type command, subcase_init_empty (&file->by_vars); subcase_init_empty (&file->src); subcase_init_empty (&file->dst); + file->mv = NULL; file->handle = NULL; file->dict = NULL; file->reader = NULL; @@ -262,7 +264,8 @@ combine_files (enum comb_command_type command, saw_sort = true; } - merge_dictionary (proc.dict, file); + if (!merge_dictionary (proc.dict, file)) + goto error; } while (lex_token (lexer) != T_ENDCMD) @@ -413,6 +416,7 @@ combine_files (enum comb_command_type command, size_t src_var_cnt = dict_get_var_cnt (file->dict); size_t j; + file->mv = xnmalloc (src_var_cnt, sizeof *file->mv); for (j = 0; j < src_var_cnt; j++) { struct variable *src_var = dict_get_var (file->dict, j); @@ -420,6 +424,8 @@ combine_files (enum comb_command_type command, var_get_name (src_var)); if (dst_var != NULL) { + size_t n = subcase_get_n_fields (&file->src); + file->mv[n] = var_get_missing_values (src_var); subcase_add_var (&file->src, src_var, SC_ASCEND); subcase_add_var (&file->dst, dst_var, SC_ASCEND); } @@ -439,7 +445,7 @@ combine_files (enum comb_command_type command, if (active_file == NULL) { proc_discard_output (ds); - file->reader = active_file = proc_open (ds); + file->reader = active_file = proc_open_filtering (ds, false); } else file->reader = casereader_clone (active_file); @@ -549,8 +555,10 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) if (var_get_width (mv) != var_get_width (dv)) { const char *var_name = var_get_name (dv); - const char *file_name = fh_get_name (f->handle); struct string s = DS_EMPTY_INITIALIZER; + const char *file_name; + + file_name = f->handle ? fh_get_name (f->handle) : "*"; ds_put_format (&s, _("Variable %s in file %s has different " "type or width from the same variable in " @@ -582,7 +590,7 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) if (var_has_missing_values (dv) && !var_has_missing_values (mv)) var_set_missing_values (mv, var_get_missing_values (dv)); if (var_get_label (dv) && !var_get_label (mv)) - var_set_label (mv, var_get_label (dv), false); + var_set_label (mv, var_get_label (dv)); } else mv = dict_clone_var_assert (m, dv); @@ -633,6 +641,7 @@ close_all_comb_files (struct comb_proc *proc) subcase_destroy (&file->by_vars); subcase_destroy (&file->src); subcase_destroy (&file->dst); + free (file->mv); fh_unref (file->handle); dict_destroy (file->dict); casereader_destroy (file->reader); @@ -665,8 +674,8 @@ free_comb_proc (struct comb_proc *proc) static bool scan_table (struct comb_file *, union value by[]); static struct ccase *create_output_case (const struct comb_proc *); static void apply_case (const struct comb_file *, struct ccase *); -static void apply_file_case_and_advance (struct comb_file *, struct ccase *, - union value by[]); +static void apply_nonmissing_case (const struct comb_file *, struct ccase *); +static void advance_file (struct comb_file *, union value by[]); static void output_case (struct comb_proc *, struct ccase *, union value by[]); static void output_buffered_case (struct comb_proc *); @@ -686,7 +695,8 @@ execute_add_files (struct comb_proc *proc) while (file->is_minimal) { struct ccase *output = create_output_case (proc); - apply_file_case_and_advance (file, output, by); + apply_case (file, output); + advance_file (file, by); output_case (proc, output, by); } } @@ -712,7 +722,10 @@ execute_match_files (struct comb_proc *proc) if (file->type == COMB_FILE) { if (file->is_minimal) - apply_file_case_and_advance (file, output, NULL); + { + apply_case (file, output); + advance_file (file, NULL); + } } else { @@ -743,7 +756,8 @@ execute_update (struct comb_proc *proc) for (first = &proc->files[0]; ; first++) if (first->is_minimal) break; - apply_file_case_and_advance (first, output, by); + apply_case (first, output); + advance_file (first, by); /* Read additional cases and update the output case from them. (Don't update the output case from any duplicate @@ -752,7 +766,10 @@ execute_update (struct comb_proc *proc) file < &proc->files[proc->n_files]; file++) { while (file->is_minimal) - apply_file_case_and_advance (file, output, by); + { + apply_nonmissing_case (file, output); + advance_file (file, by); + } } casewriter_write (proc->output, output); @@ -764,7 +781,8 @@ execute_update (struct comb_proc *proc) while (first->is_minimal) { output = create_output_case (proc); - apply_file_case_and_advance (first, output, by); + apply_case (first, output); + advance_file (first, by); casewriter_write (proc->output, output); } } @@ -821,25 +839,53 @@ create_output_case (const struct comb_proc *proc) return output; } +static void +mark_file_used (const struct comb_file *file, struct ccase *output) +{ + if (file->in_var != NULL) + case_data_rw (output, file->in_var)->f = true; +} + /* Copies the data from FILE's case into output case OUTPUT. If FILE has an IN variable, then it is set to 1 in OUTPUT. */ static void apply_case (const struct comb_file *file, struct ccase *output) { subcase_copy (&file->src, file->data, &file->dst, output); - if (file->in_var != NULL) - case_data_rw (output, file->in_var)->f = true; + mark_file_used (file, output); +} + +/* Copies the data from FILE's case into output case OUTPUT, + skipping values that are missing or all spaces. + + If FILE has an IN variable, then it is set to 1 in OUTPUT. */ +static void +apply_nonmissing_case (const struct comb_file *file, struct ccase *output) +{ + size_t i; + + for (i = 0; i < subcase_get_n_fields (&file->src); i++) + { + const struct subcase_field *src_field = &file->src.fields[i]; + const struct subcase_field *dst_field = &file->dst.fields[i]; + const union value *src_value + = case_data_idx (file->data, src_field->case_index); + int width = src_field->width; + + if (!mv_is_value_missing (file->mv[i], src_value, MV_ANY) + && !(width > 0 && value_is_spaces (src_value, width))) + value_copy (case_data_rw_idx (output, dst_field->case_index), + src_value, width); + } + mark_file_used (file, output); } -/* Like apply_case() above, but also advances FILE to its next - case. Also, if BY is nonnull, then FILE's is_minimal member - is updated based on whether the new case's BY values still - match those in BY. */ +/* Advances FILE to its next case. If BY is nonnull, then FILE's is_minimal + member is updated based on whether the new case's BY values still match + those in BY. */ static void -apply_file_case_and_advance (struct comb_file *file, struct ccase *output, - union value by[]) +advance_file (struct comb_file *file, union value by[]) { - apply_case (file, output); case_unref (file->data); file->data = casereader_read (file->reader); if (by)