X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fcombine-files.c;h=8322f5c03307f7e82597bcc5975a90621c0c7186;hb=1f37b5a7411fc50113943c6d30a1d95ccfc4b311;hp=5f82d1512be3a821dd01783f73bdc2049c0f93a6;hpb=fd2104e10011b87d6558e8623d629da4cee82b25;p=pspp diff --git a/src/language/data-io/combine-files.c b/src/language/data-io/combine-files.c index 5f82d1512b..8322f5c033 100644 --- a/src/language/data-io/combine-files.c +++ b/src/language/data-io/combine-files.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -69,6 +69,7 @@ struct comb_file /* Variables. */ struct subcase by_vars; /* BY variables in this input file. */ struct subcase src, dst; /* Data to copy to output; where to put it. */ + const struct missing_values **mv; /* Each variable's missing values. */ /* Input files. */ struct file_handle *handle; /* Input file handle. */ @@ -197,6 +198,7 @@ combine_files (enum comb_command_type command, subcase_init_empty (&file->by_vars); subcase_init_empty (&file->src); subcase_init_empty (&file->dst); + file->mv = NULL; file->handle = NULL; file->dict = NULL; file->reader = NULL; @@ -223,11 +225,12 @@ combine_files (enum comb_command_type command, } else { - file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH); + file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds)); if (file->handle == NULL) goto error; - file->reader = any_reader_open (file->handle, &file->dict); + file->reader = any_reader_open_and_decode (file->handle, NULL, + &file->dict, NULL); if (file->reader == NULL) goto error; } @@ -235,7 +238,7 @@ combine_files (enum comb_command_type command, while (lex_match (lexer, T_SLASH)) if (lex_match_id (lexer, "RENAME")) { - if (!parse_dict_rename (lexer, file->dict)) + if (!parse_dict_rename (lexer, file->dict, false)) goto error; } else if (lex_match_id (lexer, "IN")) @@ -262,7 +265,8 @@ combine_files (enum comb_command_type command, saw_sort = true; } - merge_dictionary (proc.dict, file); + if (!merge_dictionary (proc.dict, file)) + goto error; } while (lex_token (lexer) != T_ENDCMD) @@ -376,7 +380,7 @@ combine_files (enum comb_command_type command, { if (command == COMB_UPDATE) { - msg (SE, _("The BY subcommand is required.")); + lex_sbc_missing ("BY"); goto error; } if (n_tables) @@ -410,16 +414,19 @@ combine_files (enum comb_command_type command, for (i = 0; i < proc.n_files; i++) { struct comb_file *file = &proc.files[i]; - size_t src_var_cnt = dict_get_var_cnt (file->dict); + size_t src_n_vars = dict_get_n_vars (file->dict); size_t j; - for (j = 0; j < src_var_cnt; j++) + file->mv = xnmalloc (src_n_vars, sizeof *file->mv); + for (j = 0; j < src_n_vars; j++) { struct variable *src_var = dict_get_var (file->dict, j); struct variable *dst_var = dict_lookup_var (proc.dict, var_get_name (src_var)); if (dst_var != NULL) { + size_t n = subcase_get_n_fields (&file->src); + file->mv[n] = var_get_missing_values (src_var); subcase_add_var (&file->src, src_var, SC_ASCEND); subcase_add_var (&file->dst, dst_var, SC_ASCEND); } @@ -439,7 +446,7 @@ combine_files (enum comb_command_type command, if (active_file == NULL) { proc_discard_output (ds); - file->reader = active_file = proc_open (ds); + file->reader = active_file = proc_open_filtering (ds, false); } else file->reader = casereader_clone (active_file); @@ -510,7 +517,7 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) The correct thing to do would be to convert to an encoding which can cope with all the input files (eg UTF-8). */ - if ( 0 != strcmp (dict_get_encoding (f->dict), dict_get_encoding (m))) + if (0 != strcmp (dict_get_encoding (f->dict), dict_get_encoding (m))) msg (MW, _("Combining files with incompatible encodings. String data may " "not be represented correctly.")); @@ -536,7 +543,7 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) } } - for (i = 0; i < dict_get_var_cnt (d); i++) + for (i = 0; i < dict_get_n_vars (d); i++) { struct variable *dv = dict_get_var (d, i); struct variable *mv = dict_lookup_var (m, var_get_name (dv)); @@ -549,8 +556,10 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) if (var_get_width (mv) != var_get_width (dv)) { const char *var_name = var_get_name (dv); - const char *file_name = fh_get_name (f->handle); struct string s = DS_EMPTY_INITIALIZER; + const char *file_name; + + file_name = f->handle ? fh_get_name (f->handle) : "*"; ds_put_format (&s, _("Variable %s in file %s has different " "type or width from the same variable in " @@ -582,7 +591,7 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f) if (var_has_missing_values (dv) && !var_has_missing_values (mv)) var_set_missing_values (mv, var_get_missing_values (dv)); if (var_get_label (dv) && !var_get_label (mv)) - var_set_label (mv, var_get_label (dv), false); + var_set_label (mv, var_get_label (dv)); } else mv = dict_clone_var_assert (m, dv); @@ -633,8 +642,9 @@ close_all_comb_files (struct comb_proc *proc) subcase_destroy (&file->by_vars); subcase_destroy (&file->src); subcase_destroy (&file->dst); + free (file->mv); fh_unref (file->handle); - dict_destroy (file->dict); + dict_unref (file->dict); casereader_destroy (file->reader); case_unref (file->data); free (file->in_name); @@ -649,7 +659,7 @@ static void free_comb_proc (struct comb_proc *proc) { close_all_comb_files (proc); - dict_destroy (proc->dict); + dict_unref (proc->dict); casewriter_destroy (proc->output); case_matcher_destroy (proc->matcher); if (proc->prev_BY) @@ -665,8 +675,8 @@ free_comb_proc (struct comb_proc *proc) static bool scan_table (struct comb_file *, union value by[]); static struct ccase *create_output_case (const struct comb_proc *); static void apply_case (const struct comb_file *, struct ccase *); -static void apply_file_case_and_advance (struct comb_file *, struct ccase *, - union value by[]); +static void apply_nonmissing_case (const struct comb_file *, struct ccase *); +static void advance_file (struct comb_file *, union value by[]); static void output_case (struct comb_proc *, struct ccase *, union value by[]); static void output_buffered_case (struct comb_proc *); @@ -686,7 +696,8 @@ execute_add_files (struct comb_proc *proc) while (file->is_minimal) { struct ccase *output = create_output_case (proc); - apply_file_case_and_advance (file, output, by); + apply_case (file, output); + advance_file (file, by); output_case (proc, output, by); } } @@ -706,13 +717,16 @@ execute_match_files (struct comb_proc *proc) size_t i; output = create_output_case (proc); - for (i = proc->n_files; i-- > 0; ) + for (i = proc->n_files; i-- > 0;) { struct comb_file *file = &proc->files[i]; if (file->type == COMB_FILE) { if (file->is_minimal) - apply_file_case_and_advance (file, output, NULL); + { + apply_case (file, output); + advance_file (file, NULL); + } } else { @@ -743,7 +757,8 @@ execute_update (struct comb_proc *proc) for (first = &proc->files[0]; ; first++) if (first->is_minimal) break; - apply_file_case_and_advance (first, output, by); + apply_case (first, output); + advance_file (first, by); /* Read additional cases and update the output case from them. (Don't update the output case from any duplicate @@ -752,7 +767,10 @@ execute_update (struct comb_proc *proc) file < &proc->files[proc->n_files]; file++) { while (file->is_minimal) - apply_file_case_and_advance (file, output, by); + { + apply_nonmissing_case (file, output); + advance_file (file, by); + } } casewriter_write (proc->output, output); @@ -764,7 +782,8 @@ execute_update (struct comb_proc *proc) while (first->is_minimal) { output = create_output_case (proc); - apply_file_case_and_advance (first, output, by); + apply_case (first, output); + advance_file (first, by); casewriter_write (proc->output, output); } } @@ -802,7 +821,7 @@ scan_table (struct comb_file *file, union value by[]) static struct ccase * create_output_case (const struct comb_proc *proc) { - size_t n_vars = dict_get_var_cnt (proc->dict); + size_t n_vars = dict_get_n_vars (proc->dict); struct ccase *output; size_t i; @@ -816,30 +835,58 @@ create_output_case (const struct comb_proc *proc) { struct comb_file *file = &proc->files[i]; if (file->in_var != NULL) - case_data_rw (output, file->in_var)->f = false; + *case_num_rw (output, file->in_var) = false; } return output; } +static void +mark_file_used (const struct comb_file *file, struct ccase *output) +{ + if (file->in_var != NULL) + *case_num_rw (output, file->in_var) = true; +} + /* Copies the data from FILE's case into output case OUTPUT. If FILE has an IN variable, then it is set to 1 in OUTPUT. */ static void apply_case (const struct comb_file *file, struct ccase *output) { subcase_copy (&file->src, file->data, &file->dst, output); - if (file->in_var != NULL) - case_data_rw (output, file->in_var)->f = true; + mark_file_used (file, output); +} + +/* Copies the data from FILE's case into output case OUTPUT, + skipping values that are missing or all spaces. + + If FILE has an IN variable, then it is set to 1 in OUTPUT. */ +static void +apply_nonmissing_case (const struct comb_file *file, struct ccase *output) +{ + size_t i; + + for (i = 0; i < subcase_get_n_fields (&file->src); i++) + { + const struct subcase_field *src_field = &file->src.fields[i]; + const struct subcase_field *dst_field = &file->dst.fields[i]; + const union value *src_value + = case_data_idx (file->data, src_field->case_index); + int width = src_field->width; + + if (!mv_is_value_missing (file->mv[i], src_value) + && !(width > 0 && value_is_spaces (src_value, width))) + value_copy (case_data_rw_idx (output, dst_field->case_index), + src_value, width); + } + mark_file_used (file, output); } -/* Like apply_case() above, but also advances FILE to its next - case. Also, if BY is nonnull, then FILE's is_minimal member - is updated based on whether the new case's BY values still - match those in BY. */ +/* Advances FILE to its next case. If BY is nonnull, then FILE's is_minimal + member is updated based on whether the new case's BY values still match + those in BY. */ static void -apply_file_case_and_advance (struct comb_file *file, struct ccase *output, - union value by[]) +advance_file (struct comb_file *file, union value by[]) { - apply_case (file, output); case_unref (file->data); file->data = casereader_read (file->reader); if (by) @@ -866,7 +913,7 @@ output_case (struct comb_proc *proc, struct ccase *output, union value by[]) { new_BY = !subcase_equal_xx (&proc->by_vars, proc->prev_BY, by); if (proc->last != NULL) - case_data_rw (proc->buffered_case, proc->last)->f = new_BY; + *case_num_rw (proc->buffered_case, proc->last) = new_BY; casewriter_write (proc->output, proc->buffered_case); } else @@ -874,7 +921,7 @@ output_case (struct comb_proc *proc, struct ccase *output, union value by[]) proc->buffered_case = output; if (proc->first != NULL) - case_data_rw (proc->buffered_case, proc->first)->f = new_BY; + *case_num_rw (proc->buffered_case, proc->first) = new_BY; if (new_BY) { @@ -899,7 +946,7 @@ output_buffered_case (struct comb_proc *proc) if (proc->prev_BY != NULL) { if (proc->last != NULL) - case_data_rw (proc->buffered_case, proc->last)->f = 1.0; + *case_num_rw (proc->buffered_case, proc->last) = 1.0; casewriter_write (proc->output, proc->buffered_case); proc->buffered_case = NULL; }