1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 #include <data/any-reader.h>
24 #include <data/any-writer.h>
25 #include <data/case.h>
26 #include <data/casereader.h>
27 #include <data/casewriter.h>
28 #include <data/format.h>
29 #include <data/dictionary.h>
30 #include <data/por-file-writer.h>
31 #include <data/procedure.h>
32 #include <data/settings.h>
33 #include <data/sys-file-writer.h>
34 #include <data/transformations.h>
35 #include <data/value-labels.h>
36 #include <data/variable.h>
37 #include <language/command.h>
38 #include <language/data-io/file-handle.h>
39 #include <language/lexer/lexer.h>
40 #include <language/lexer/variable-parser.h>
41 #include <libpspp/alloc.h>
42 #include <libpspp/assertion.h>
43 #include <libpspp/compiler.h>
44 #include <libpspp/hash.h>
45 #include <libpspp/message.h>
46 #include <libpspp/misc.h>
47 #include <libpspp/str.h>
48 #include <libpspp/taint.h>
51 #define _(msgid) gettext (msgid)
53 /* Rearranging and reducing a dictionary. */
54 static void start_case_map (struct dictionary *);
55 static struct case_map *finish_case_map (struct dictionary *);
56 static void map_case (const struct case_map *,
57 const struct ccase *, struct ccase *);
58 static void destroy_case_map (struct case_map *);
60 static bool parse_dict_trim (struct lexer *, struct dictionary *);
62 /* Reading system and portable files. */
64 /* Type of command. */
71 static void get_translate_case (const struct ccase *, struct ccase *,
73 static bool get_destroy_case_map (void *map_);
75 /* Parses a GET or IMPORT command. */
77 parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
79 struct casereader *reader = NULL;
80 struct file_handle *fh = NULL;
81 struct dictionary *dict = NULL;
82 struct case_map *map = NULL;
86 lex_match (lexer, '/');
88 if (lex_match_id (lexer, "FILE") || lex_token (lexer) == T_STRING)
90 lex_match (lexer, '=');
92 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
96 else if (type == IMPORT_CMD && lex_match_id (lexer, "TYPE"))
98 lex_match (lexer, '=');
100 if (lex_match_id (lexer, "COMM"))
102 else if (lex_match_id (lexer, "TAPE"))
106 lex_error (lexer, _("expecting COMM or TAPE"));
116 lex_sbc_missing (lexer, "FILE");
120 reader = any_reader_open (fh, &dict);
124 start_case_map (dict);
126 while (lex_token (lexer) != '.')
128 lex_match (lexer, '/');
129 if (!parse_dict_trim (lexer, dict))
133 map = finish_case_map (dict);
135 reader = casereader_create_translator (reader,
136 dict_get_next_value_idx (dict),
138 get_destroy_case_map,
141 proc_set_active_file (ds, reader, dict);
146 casereader_destroy (reader);
149 return CMD_CASCADING_FAILURE;
153 get_translate_case (const struct ccase *input, struct ccase *output,
156 struct case_map *map = map_;
157 map_case (map, input, output);
161 get_destroy_case_map (void *map_)
163 struct case_map *map = map_;
164 destroy_case_map (map);
170 cmd_get (struct lexer *lexer, struct dataset *ds)
172 return parse_read_command (lexer, ds, GET_CMD);
177 cmd_import (struct lexer *lexer, struct dataset *ds)
179 return parse_read_command (lexer, ds, IMPORT_CMD);
182 /* Writing system and portable files. */
184 /* Type of output file. */
187 SYSFILE_WRITER, /* System file. */
188 PORFILE_WRITER /* Portable file. */
191 /* Type of a command. */
194 XFORM_CMD, /* Transformation. */
195 PROC_CMD /* Procedure. */
198 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
199 WRITER_TYPE identifies the type of file to write,
200 and COMMAND_TYPE identifies the type of command.
202 On success, returns a writer.
203 For procedures only, sets *RETAIN_UNSELECTED to true if cases
204 that would otherwise be excluded by FILTER or USE should be
207 On failure, returns a null pointer. */
208 static struct casewriter *
209 parse_write_command (struct lexer *lexer, struct dataset *ds,
210 enum writer_type writer_type,
211 enum command_type command_type,
212 bool *retain_unselected)
215 struct file_handle *handle; /* Output file. */
216 struct dictionary *dict; /* Dictionary for output file. */
217 struct casewriter *writer; /* Writer. */
218 struct case_map *map; /* Map from input data to data for writer. */
220 /* Common options. */
221 bool print_map; /* Print map? TODO. */
222 bool print_short_names; /* Print long-to-short name map. TODO. */
223 struct sfm_write_options sysfile_opts;
224 struct pfm_write_options porfile_opts;
226 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
227 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
228 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
230 if (command_type == PROC_CMD)
231 *retain_unselected = true;
234 dict = dict_clone (dataset_dict (ds));
238 print_short_names = false;
239 sysfile_opts = sfm_writer_default_options ();
240 porfile_opts = pfm_writer_default_options ();
242 start_case_map (dict);
243 dict_delete_scratch_vars (dict);
245 lex_match (lexer, '/');
248 if (lex_match_id (lexer, "OUTFILE"))
252 lex_sbc_only_once ("OUTFILE");
256 lex_match (lexer, '=');
258 handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
262 else if (lex_match_id (lexer, "NAMES"))
263 print_short_names = true;
264 else if (lex_match_id (lexer, "PERMISSIONS"))
268 lex_match (lexer, '=');
269 if (lex_match_id (lexer, "READONLY"))
271 else if (lex_match_id (lexer, "WRITEABLE"))
275 lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
278 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
280 else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
282 lex_match (lexer, '=');
283 if (lex_match_id (lexer, "RETAIN"))
284 *retain_unselected = true;
285 else if (lex_match_id (lexer, "DELETE"))
286 *retain_unselected = false;
289 lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
293 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
294 sysfile_opts.compress = true;
295 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
296 sysfile_opts.compress = false;
297 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
299 lex_match (lexer, '=');
300 if (!lex_force_int (lexer))
302 sysfile_opts.version = lex_integer (lexer);
305 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
307 lex_match (lexer, '=');
308 if (lex_match_id (lexer, "COMMUNICATIONS"))
309 porfile_opts.type = PFM_COMM;
310 else if (lex_match_id (lexer, "TAPE"))
311 porfile_opts.type = PFM_TAPE;
314 lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
318 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
320 lex_match (lexer, '=');
321 if (!lex_force_int (lexer))
323 porfile_opts.digits = lex_integer (lexer);
326 else if (!parse_dict_trim (lexer, dict))
329 if (!lex_match (lexer, '/'))
332 if (lex_end_of_command (lexer) != CMD_SUCCESS)
337 lex_sbc_missing (lexer, "OUTFILE");
341 dict_compact_values (dict);
343 if (fh_get_referent (handle) == FH_REF_FILE)
348 writer = sfm_open_writer (handle, dict, sysfile_opts);
351 writer = pfm_open_writer (handle, dict, porfile_opts);
356 writer = any_writer_open (handle, dict);
360 map = finish_case_map (dict);
362 writer = casewriter_create_translator (writer,
364 get_destroy_case_map,
371 casewriter_destroy (writer);
373 destroy_case_map (map);
377 /* SAVE and EXPORT. */
379 /* Parses and performs the SAVE or EXPORT procedure. */
381 parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
383 bool retain_unselected;
384 struct variable *saved_filter_variable;
385 struct casewriter *output;
388 output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
391 return CMD_CASCADING_FAILURE;
393 saved_filter_variable = dict_get_filter (dataset_dict (ds));
394 if (retain_unselected)
395 dict_set_filter (dataset_dict (ds), NULL);
397 casereader_transfer (proc_open (ds), output);
398 ok = casewriter_destroy (output);
399 ok = proc_commit (ds) && ok;
401 dict_set_filter (dataset_dict (ds), saved_filter_variable);
403 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
407 cmd_save (struct lexer *lexer, struct dataset *ds)
409 return parse_output_proc (lexer, ds, SYSFILE_WRITER);
413 cmd_export (struct lexer *lexer, struct dataset *ds)
415 return parse_output_proc (lexer, ds, PORFILE_WRITER);
418 /* XSAVE and XEXPORT. */
420 /* Transformation. */
423 struct casewriter *writer; /* Writer. */
426 static trns_proc_func output_trns_proc;
427 static trns_free_func output_trns_free;
429 /* Parses the XSAVE or XEXPORT transformation command. */
431 parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
433 struct output_trns *t = xmalloc (sizeof *t);
434 t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
435 if (t->writer == NULL)
438 return CMD_CASCADING_FAILURE;
441 add_transformation (ds, output_trns_proc, output_trns_free, t);
445 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
447 output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
449 struct output_trns *t = trns_;
451 case_clone (&tmp, c);
452 casewriter_write (t->writer, &tmp);
453 return TRNS_CONTINUE;
456 /* Frees an XSAVE or XEXPORT transformation.
457 Returns true if successful, false if an I/O error occurred. */
459 output_trns_free (void *trns_)
461 struct output_trns *t = trns_;
462 bool ok = casewriter_destroy (t->writer);
469 cmd_xsave (struct lexer *lexer, struct dataset *ds)
471 return parse_output_trns (lexer, ds, SYSFILE_WRITER);
474 /* XEXPORT command. */
476 cmd_xexport (struct lexer *lexer, struct dataset *ds)
478 return parse_output_trns (lexer, ds, PORFILE_WRITER);
481 static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
482 static bool drop_variables (struct lexer *, struct dictionary *dict);
483 static bool keep_variables (struct lexer *, struct dictionary *dict);
485 /* Commands that read and write system files share a great deal
486 of common syntactic structure for rearranging and dropping
487 variables. This function parses this syntax and modifies DICT
488 appropriately. Returns true on success, false on failure. */
490 parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
492 if (lex_match_id (lexer, "MAP"))
497 else if (lex_match_id (lexer, "DROP"))
498 return drop_variables (lexer, dict);
499 else if (lex_match_id (lexer, "KEEP"))
500 return keep_variables (lexer, dict);
501 else if (lex_match_id (lexer, "RENAME"))
502 return rename_variables (lexer, dict);
505 lex_error (lexer, _("expecting a valid subcommand"));
510 /* Parses and performs the RENAME subcommand of GET and SAVE. */
512 rename_variables (struct lexer *lexer, struct dictionary *dict)
525 lex_match (lexer, '=');
526 if (lex_token (lexer) != '(')
530 v = parse_variable (lexer, dict);
533 if (!lex_force_match (lexer, '=')
534 || !lex_force_id (lexer))
536 if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
538 msg (SE, _("Cannot rename %s as %s because there already exists "
539 "a variable named %s. To rename variables with "
540 "overlapping names, use a single RENAME subcommand "
541 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
542 "\"/RENAME (A B C=B C A)\"."),
543 var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
547 dict_rename_var (dict, v, lex_tokid (lexer));
556 while (lex_match (lexer, '('))
560 if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
562 if (!lex_match (lexer, '='))
564 msg (SE, _("`=' expected after variable list."));
567 if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
571 msg (SE, _("Number of variables on left side of `=' (%d) does not "
572 "match number of variables on right side (%d), in "
573 "parenthesized group %d of RENAME subcommand."),
574 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
577 if (!lex_force_match (lexer, ')'))
582 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
584 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
590 for (i = 0; i < nn; i++)
598 /* Parses and performs the DROP subcommand of GET and SAVE.
599 Returns true if successful, false on failure.*/
601 drop_variables (struct lexer *lexer, struct dictionary *dict)
606 lex_match (lexer, '=');
607 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
609 dict_delete_vars (dict, v, nv);
612 if (dict_get_var_cnt (dict) == 0)
614 msg (SE, _("Cannot DROP all variables from dictionary."));
620 /* Parses and performs the KEEP subcommand of GET and SAVE.
621 Returns true if successful, false on failure.*/
623 keep_variables (struct lexer *lexer, struct dictionary *dict)
629 lex_match (lexer, '=');
630 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
633 /* Move the specified variables to the beginning. */
634 dict_reorder_vars (dict, v, nv);
636 /* Delete the remaining variables. */
637 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
638 for (i = nv; i < dict_get_var_cnt (dict); i++)
639 v[i - nv] = dict_get_var (dict, i);
640 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
651 MTF_FILE, /* Specified on FILE= subcommand. */
652 MTF_TABLE /* Specified on TABLE= subcommand. */
655 /* One of the files on MATCH FILES. */
658 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
659 struct mtf_file *next_min; /* Next in the chain of minimums. */
661 int type; /* One of MTF_*. */
662 const struct variable **by; /* List of BY variables for this file. */
663 struct file_handle *handle; /* File handle. */
664 struct casereader *reader; /* File reader. */
665 struct dictionary *dict; /* Dictionary from system file. */
666 bool active_file; /* Active file? */
669 char *in_name; /* Variable name. */
670 struct variable *in_var; /* Variable (in master dictionary). */
672 struct ccase input; /* Input record. */
675 /* MATCH FILES procedure. */
678 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
679 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
681 bool ok; /* False if I/O error occurs. */
683 size_t by_cnt; /* Number of variables on BY subcommand. */
685 /* Names of FIRST, LAST variables. */
686 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
688 struct dictionary *dict; /* Dictionary of output file. */
689 struct casewriter *output; /* MATCH FILES output. */
690 struct ccase mtf_case; /* Case used for output. */
692 unsigned seq_num; /* Have we initialized this variable? */
693 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
696 static bool mtf_free (struct mtf_proc *);
697 static bool mtf_close_file (struct mtf_file *);
698 static bool mtf_close_all_files (struct mtf_proc *);
699 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
700 static bool mtf_read_records (struct mtf_proc *);
701 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
703 static bool mtf_processing (struct mtf_proc *);
705 static char *var_type_description (struct variable *);
707 static void set_master (struct variable *, struct variable *master);
708 static struct variable *get_master (struct variable *);
710 /* Parse and execute the MATCH FILES command. */
712 cmd_match_files (struct lexer *lexer, struct dataset *ds)
715 struct mtf_file *first_table = NULL;
716 struct mtf_file *iter;
718 bool used_active_file = false;
719 bool saw_table = false;
721 bool open_active_file = false;
723 mtf.head = mtf.tail = NULL;
727 mtf.dict = dict_create ();
729 case_nullify (&mtf.mtf_case);
732 dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
734 lex_match (lexer, '/');
735 while (lex_token (lexer) == T_ID
736 && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
737 || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
739 struct mtf_file *file = xmalloc (sizeof *file);
741 if (lex_match_id (lexer, "FILE"))
742 file->type = MTF_FILE;
743 else if (lex_match_id (lexer, "TABLE"))
745 file->type = MTF_TABLE;
750 lex_match (lexer, '=');
756 file->in_name = NULL;
758 file->active_file = false;
759 case_nullify (&file->input);
761 /* FILEs go first, then TABLEs. */
762 if (file->type == MTF_TABLE || first_table == NULL)
765 file->prev = mtf.tail;
767 mtf.tail->next = file;
769 if (mtf.head == NULL)
771 if (file->type == MTF_TABLE && first_table == NULL)
776 assert (file->type == MTF_FILE);
777 file->next = first_table;
778 file->prev = first_table->prev;
779 if (first_table->prev)
780 first_table->prev->next = file;
783 first_table->prev = file;
786 if (lex_match (lexer, '*'))
791 if (used_active_file)
793 msg (SE, _("The active file may not be specified more "
797 used_active_file = true;
799 if (!proc_has_active_file (ds))
801 msg (SE, _("Cannot specify the active file since no active "
802 "file has been defined."));
806 if (proc_make_temporary_transformations_permanent (ds))
808 _("MATCH FILES may not be used after TEMPORARY when "
809 "the active file is an input source. "
810 "Temporary transformations will be made permanent."));
812 file->dict = dataset_dict (ds);
813 file->active_file = true;
817 file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
818 if (file->handle == NULL)
821 file->reader = any_reader_open (file->handle, &file->dict);
822 if (file->reader == NULL)
826 while (lex_match (lexer, '/'))
827 if (lex_match_id (lexer, "RENAME"))
829 if (!rename_variables (lexer, file->dict))
832 else if (lex_match_id (lexer, "IN"))
834 lex_match (lexer, '=');
835 if (lex_token (lexer) != T_ID)
837 lex_error (lexer, NULL);
841 if (file->in_name != NULL)
843 msg (SE, _("Multiple IN subcommands for a single FILE or "
847 file->in_name = xstrdup (lex_tokid (lexer));
852 mtf_merge_dictionary (mtf.dict, file);
855 while (lex_token (lexer) != '.')
857 if (lex_match (lexer, T_BY))
859 const struct variable **by;
863 msg (SE, _("BY may appear at most once."));
867 lex_match (lexer, '=');
868 if (!parse_variables_const (lexer, mtf.dict, &by, &mtf.by_cnt,
869 PV_NO_DUPLICATE | PV_NO_SCRATCH))
872 for (iter = mtf.head; iter != NULL; iter = iter->next)
876 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
878 for (i = 0; i < mtf.by_cnt; i++)
880 iter->by[i] = dict_lookup_var (iter->dict,
881 var_get_name (by[i]));
882 if (iter->by[i] == NULL)
884 msg (SE, _("File %s lacks BY variable %s."),
885 iter->handle ? fh_get_name (iter->handle) : "*",
886 var_get_name (by[i]));
894 else if (lex_match_id (lexer, "FIRST"))
896 if (mtf.first[0] != '\0')
898 msg (SE, _("FIRST may appear at most once."));
902 lex_match (lexer, '=');
903 if (!lex_force_id (lexer))
905 strcpy (mtf.first, lex_tokid (lexer));
908 else if (lex_match_id (lexer, "LAST"))
910 if (mtf.last[0] != '\0')
912 msg (SE, _("LAST may appear at most once."));
916 lex_match (lexer, '=');
917 if (!lex_force_id (lexer))
919 strcpy (mtf.last, lex_tokid (lexer));
922 else if (lex_match_id (lexer, "MAP"))
926 else if (lex_match_id (lexer, "DROP"))
928 if (!drop_variables (lexer, mtf.dict))
931 else if (lex_match_id (lexer, "KEEP"))
933 if (!keep_variables (lexer, mtf.dict))
938 lex_error (lexer, NULL);
942 if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
944 lex_end_of_command (lexer);
953 msg (SE, _("BY is required when TABLE is specified."));
958 msg (SE, _("BY is required when IN is specified."));
963 /* Set up mapping from each file's variables to master
965 for (iter = mtf.head; iter != NULL; iter = iter->next)
967 struct dictionary *d = iter->dict;
970 for (i = 0; i < dict_get_var_cnt (d); i++)
972 struct variable *v = dict_get_var (d, i);
973 struct variable *mv = dict_lookup_var (mtf.dict, var_get_name (v));
979 /* Add IN variables to master dictionary. */
980 for (iter = mtf.head; iter != NULL; iter = iter->next)
981 if (iter->in_name != NULL)
983 struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
984 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
985 if (iter->in_var == NULL)
987 msg (SE, _("IN variable name %s duplicates an "
988 "existing variable name."),
989 var_get_name (iter->in_var));
992 var_set_both_formats (iter->in_var, &format);
995 /* MATCH FILES performs an n-way merge on all its input files.
998 1. Read one input record from every input FILE.
1000 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1002 3. Find the FILE input record(s) that have minimum BY
1003 values. Store all the values from these input records into
1006 4. For every TABLE, read another record as long as the BY values
1007 on the TABLE's input record are less than the FILEs' BY values.
1008 If an exact match is found, store all the values from the TABLE
1009 input record into the output record.
1011 5. Write the output record.
1013 6. Read another record from each input file FILE and TABLE that
1014 we stored values from above. If we come to the end of one of the
1015 input files, remove it from the list of input files.
1017 7. Repeat from step 2.
1019 FIXME: For merging large numbers of files (more than 10?) a
1020 better algorithm would use a heap for finding minimum
1023 if (used_active_file)
1025 proc_discard_output (ds);
1026 for (iter = mtf.head; iter != NULL; iter = iter->next)
1027 if (iter->reader == NULL)
1028 iter->reader = proc_open (ds);
1029 open_active_file = true;
1032 dict_compact_values (mtf.dict);
1033 mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
1034 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1035 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1037 if (!mtf_read_records (&mtf))
1039 while (mtf.head && mtf.head->type == MTF_FILE)
1040 if (!mtf_processing (&mtf))
1042 if (!mtf_close_all_files (&mtf))
1044 if (open_active_file)
1047 proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
1051 return mtf_free (&mtf) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1054 if (open_active_file)
1057 return CMD_CASCADING_FAILURE;
1060 /* Return a string in an allocated buffer describing V's variable
1063 var_type_description (struct variable *v)
1065 if (var_is_numeric (v))
1066 return xstrdup ("numeric");
1068 return xasprintf ("string with width %d", var_get_width (v));
1071 /* Closes FILE and frees its associated data.
1072 Returns true if successful, false if an I/O error
1073 occurred on FILE. */
1075 mtf_close_file (struct mtf_file *file)
1077 bool ok = casereader_destroy (file->reader);
1079 if (!file->active_file)
1080 dict_destroy (file->dict);
1081 free (file->in_name);
1082 case_destroy (&file->input);
1088 mtf_close_all_files (struct mtf_proc *mtf)
1090 struct mtf_file *iter, *next;
1093 for (iter = mtf->head; iter; iter = next)
1096 assert (iter->dict != mtf->dict);
1097 if (!mtf_close_file (iter))
1104 /* Free all the data for the MATCH FILES procedure.
1105 Returns true if successful, false if an I/O error
1108 mtf_free (struct mtf_proc *mtf)
1112 ok = mtf_close_all_files (mtf);
1114 casewriter_destroy (mtf->output);
1115 dict_destroy (mtf->dict);
1116 case_destroy (&mtf->mtf_case);
1117 free (mtf->seq_nums);
1122 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1123 file in the chain, or to NULL if was the last in the chain.
1124 Returns true if successful, false if an I/O error occurred. */
1126 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1128 struct mtf_file *f = *file;
1132 f->prev->next = f->next;
1134 f->next->prev = f->prev;
1136 mtf->head = f->next;
1138 mtf->tail = f->prev;
1141 if (f->in_var != NULL)
1142 case_data_rw (&mtf->mtf_case, f->in_var)->f = 0.;
1143 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1145 struct variable *v = dict_get_var (f->dict, i);
1146 struct variable *mv = get_master (v);
1149 union value *out = case_data_rw (&mtf->mtf_case, mv);
1151 if (var_is_numeric (v))
1154 memset (out->s, ' ', var_get_width (v));
1158 return mtf_close_file (f);
1161 /* Read a record from every input file.
1162 Returns true if successful, false if an I/O error occurred. */
1164 mtf_read_records (struct mtf_proc *mtf)
1166 struct mtf_file *iter, *next;
1169 for (iter = mtf->head; ok && iter != NULL; iter = next)
1172 if (!casereader_read (iter->reader, &iter->input))
1174 if (!mtf_delete_file_in_place (mtf, &iter))
1181 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1182 if A == B, 1 if A > B. */
1184 mtf_compare_BY_values (struct mtf_proc *mtf,
1185 struct mtf_file *a, struct mtf_file *b)
1187 return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
1190 /* Perform one iteration of steps 3...7 above.
1191 Returns true if successful, false if an I/O error occurred. */
1193 mtf_processing (struct mtf_proc *mtf)
1195 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1196 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1197 struct mtf_file *iter, *next;
1198 struct ccase out_case;
1200 /* 3. Find the FILE input record(s) that have minimum BY
1201 values. Store all the values from these input records into
1202 the output record. */
1203 min_head = min_tail = mtf->head;
1204 max_head = max_tail = NULL;
1205 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1208 int cmp = mtf_compare_BY_values (mtf, min_head, iter);
1212 max_tail = max_tail->next_min = iter;
1214 max_head = max_tail = iter;
1217 min_tail = min_tail->next_min = iter;
1222 max_tail->next_min = min_head;
1223 max_tail = min_tail;
1227 max_head = min_head;
1228 max_tail = min_tail;
1230 min_head = min_tail = iter;
1234 /* 4. For every TABLE, read another record as long as the BY
1235 values on the TABLE's input record are less than the FILEs'
1236 BY values. If an exact match is found, store all the values
1237 from the TABLE input record into the output record. */
1238 for (; iter != NULL; iter = next)
1240 assert (iter->type == MTF_TABLE);
1245 int cmp = mtf_compare_BY_values (mtf, min_head, iter);
1249 max_tail = max_tail->next_min = iter;
1251 max_head = max_tail = iter;
1254 min_tail = min_tail->next_min = iter;
1257 case_destroy (&iter->input);
1258 if (casereader_read (iter->reader, &iter->input))
1260 if (!mtf_delete_file_in_place (mtf, &iter))
1267 /* Next sequence number. */
1270 /* Store data to all the records we are using. */
1272 min_tail->next_min = NULL;
1273 for (iter = min_head; iter; iter = iter->next_min)
1277 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1279 struct variable *v = dict_get_var (iter->dict, i);
1280 struct variable *mv = get_master (v);
1281 size_t mv_index = mv ? var_get_dict_index (mv) : 0;
1283 if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num)
1285 union value *out = case_data_rw (&mtf->mtf_case, mv);
1287 mtf->seq_nums[mv_index] = mtf->seq_num;
1288 if (var_is_numeric (v))
1289 out->f = case_num (&iter->input, v);
1291 memcpy (out->s, case_str (&iter->input, v), var_get_width (v));
1294 if (iter->in_var != NULL)
1295 case_data_rw (&mtf->mtf_case, iter->in_var)->f = 1.;
1298 /* Store missing values to all the records we're not using. */
1300 max_tail->next_min = NULL;
1301 for (iter = max_head; iter; iter = iter->next_min)
1305 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1307 struct variable *v = dict_get_var (iter->dict, i);
1308 struct variable *mv = get_master (v);
1309 size_t mv_index = mv ? var_get_dict_index (mv) : 0;
1311 if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num)
1313 union value *out = case_data_rw (&mtf->mtf_case, mv);
1314 mtf->seq_nums[mv_index] = mtf->seq_num;
1316 if (var_is_numeric (v))
1319 memset (out->s, ' ', var_get_width (v));
1322 if (iter->in_var != NULL)
1323 case_data_rw (&mtf->mtf_case, iter->in_var)->f = 0.;
1326 /* 5. Write the output record. */
1327 case_clone (&out_case, &mtf->mtf_case);
1328 casewriter_write (mtf->output, &out_case);
1330 /* 6. Read another record from each input file FILE and TABLE
1331 that we stored values from above. If we come to the end of
1332 one of the input files, remove it from the list of input
1334 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1336 next = iter->next_min;
1337 case_destroy (&iter->input);
1338 if (!casereader_read (iter->reader, &iter->input))
1339 if (!mtf_delete_file_in_place (mtf, &iter))
1345 /* Merge the dictionary for file F into master dictionary M. */
1347 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1349 struct dictionary *d = f->dict;
1350 const char *d_docs, *m_docs;
1353 if (dict_get_label (m) == NULL)
1354 dict_set_label (m, dict_get_label (d));
1356 d_docs = dict_get_documents (d);
1357 m_docs = dict_get_documents (m);
1361 dict_set_documents (m, d_docs);
1364 char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
1365 dict_set_documents (m, new_docs);
1370 for (i = 0; i < dict_get_var_cnt (d); i++)
1372 struct variable *dv = dict_get_var (d, i);
1373 struct variable *mv = dict_lookup_var (m, var_get_name (dv));
1375 if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
1380 if (var_get_width (mv) != var_get_width (dv))
1382 msg (SE, _("Variable %s in file %s (%s) has different "
1383 "type or width from the same variable in "
1384 "earlier file (%s)."),
1385 var_get_name (dv), fh_get_name (f->handle),
1386 var_type_description (dv), var_type_description (mv));
1390 if (var_get_width (dv) == var_get_width (mv))
1392 if (var_has_value_labels (dv) && !var_has_value_labels (mv))
1393 var_set_value_labels (mv, var_get_value_labels (dv));
1394 if (var_has_missing_values (dv) && !var_has_missing_values (mv))
1395 var_set_missing_values (mv, var_get_missing_values (dv));
1398 if (var_get_label (dv) && !var_get_label (mv))
1399 var_set_label (mv, var_get_label (dv));
1402 mv = dict_clone_var_assert (m, dv, var_get_name (dv));
1408 /* Marks V's master variable as MASTER. */
1410 set_master (struct variable *v, struct variable *master)
1412 var_attach_aux (v, master, NULL);
1415 /* Returns the master variable corresponding to V,
1416 as set with set_master(). */
1417 static struct variable *
1418 get_master (struct variable *v)
1420 return var_get_aux (v);
1425 A case map copies data from a case that corresponds for one
1426 dictionary to a case that corresponds to a second dictionary
1427 derived from the first by, optionally, deleting, reordering,
1428 or renaming variables. (No new variables may be created.)
1434 size_t value_cnt; /* Number of values in map. */
1435 int *map; /* For each destination index, the
1436 corresponding source index. */
1439 /* Prepares dictionary D for producing a case map. Afterward,
1440 the caller may delete, reorder, or rename variables within D
1441 at will before using finish_case_map() to produce the case
1444 Uses D's aux members, which must otherwise not be in use. */
1446 start_case_map (struct dictionary *d)
1448 size_t var_cnt = dict_get_var_cnt (d);
1451 for (i = 0; i < var_cnt; i++)
1453 struct variable *v = dict_get_var (d, i);
1454 int *src_fv = xmalloc (sizeof *src_fv);
1455 *src_fv = var_get_case_index (v);
1456 var_attach_aux (v, src_fv, var_dtor_free);
1460 /* Produces a case map from dictionary D, which must have been
1461 previously prepared with start_case_map().
1463 Does not retain any reference to D, and clears the aux members
1464 set up by start_case_map().
1466 Returns the new case map, or a null pointer if no mapping is
1467 required (that is, no data has changed position). */
1468 static struct case_map *
1469 finish_case_map (struct dictionary *d)
1471 struct case_map *map;
1472 size_t var_cnt = dict_get_var_cnt (d);
1476 map = xmalloc (sizeof *map);
1477 map->value_cnt = dict_get_next_value_idx (d);
1478 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1479 for (i = 0; i < map->value_cnt; i++)
1483 for (i = 0; i < var_cnt; i++)
1485 struct variable *v = dict_get_var (d, i);
1486 size_t value_cnt = var_get_value_cnt (v);
1487 int *src_fv = (int *) var_detach_aux (v);
1490 if (var_get_case_index (v) != *src_fv)
1493 for (idx = 0; idx < value_cnt; idx++)
1495 int src_idx = *src_fv + idx;
1496 int dst_idx = var_get_case_index (v) + idx;
1498 assert (map->map[dst_idx] == -1);
1499 map->map[dst_idx] = src_idx;
1506 destroy_case_map (map);
1510 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1516 /* Maps from SRC to DST, applying case map MAP. */
1518 map_case (const struct case_map *map,
1519 const struct ccase *src, struct ccase *dst)
1523 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1525 int src_idx = map->map[dst_idx];
1527 *case_data_rw_idx (dst, dst_idx) = *case_data_idx (src, src_idx);
1531 /* Destroys case map MAP. */
1533 destroy_case_map (struct case_map *map)