1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <data/any-reader.h>
25 #include <data/any-writer.h>
26 #include <data/case-sink.h>
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/dictionary.h>
30 #include <data/por-file-writer.h>
31 #include <data/settings.h>
32 #include <data/storage-stream.h>
33 #include <data/sys-file-writer.h>
34 #include <data/value-labels.h>
35 #include <data/variable.h>
36 #include <language/command.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/lexer/lexer.h>
39 #include <libpspp/alloc.h>
40 #include <libpspp/compiler.h>
41 #include <libpspp/hash.h>
42 #include <libpspp/message.h>
43 #include <libpspp/message.h>
44 #include <libpspp/misc.h>
45 #include <libpspp/str.h>
46 #include <procedure.h>
49 #define _(msgid) gettext (msgid)
51 /* Rearranging and reducing a dictionary. */
52 static void start_case_map (struct dictionary *);
53 static struct case_map *finish_case_map (struct dictionary *);
54 static void map_case (const struct case_map *,
55 const struct ccase *, struct ccase *);
56 static void destroy_case_map (struct case_map *);
58 static bool parse_dict_trim (struct dictionary *);
60 /* Reading system and portable files. */
62 /* Type of command. */
69 /* Case reader input program. */
70 struct case_reader_pgm
72 struct any_reader *reader; /* File reader. */
73 struct case_map *map; /* Map from file dict to active file dict. */
74 struct ccase bounce; /* Bounce buffer. */
77 static const struct case_source_class case_reader_source_class;
79 static void case_reader_pgm_free (struct case_reader_pgm *);
81 /* Parses a GET or IMPORT command. */
83 parse_read_command (enum reader_command type)
85 struct case_reader_pgm *pgm = NULL;
86 struct file_handle *fh = NULL;
87 struct dictionary *dict = NULL;
93 if (lex_match_id ("FILE") || token == T_STRING)
97 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
101 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
105 if (lex_match_id ("COMM"))
107 else if (lex_match_id ("TAPE"))
111 lex_error (_("expecting COMM or TAPE"));
121 lex_sbc_missing ("FILE");
125 discard_variables ();
127 pgm = xmalloc (sizeof *pgm);
128 pgm->reader = any_reader_open (fh, &dict);
130 case_nullify (&pgm->bounce);
131 if (pgm->reader == NULL)
134 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
136 start_case_map (dict);
141 if (!parse_dict_trim (dict))
145 pgm->map = finish_case_map (dict);
147 dict_destroy (default_dict);
150 vfm_source = create_case_source (&case_reader_source_class, pgm);
155 case_reader_pgm_free (pgm);
158 return CMD_CASCADING_FAILURE;
161 /* Frees a struct case_reader_pgm. */
163 case_reader_pgm_free (struct case_reader_pgm *pgm)
167 any_reader_close (pgm->reader);
168 destroy_case_map (pgm->map);
169 case_destroy (&pgm->bounce);
174 /* Clears internal state related to case reader input procedure. */
176 case_reader_source_destroy (struct case_source *source)
178 struct case_reader_pgm *pgm = source->aux;
179 case_reader_pgm_free (pgm);
182 /* Reads all the cases from the data file into C and passes them
183 to WRITE_CASE one by one, passing WC_DATA.
184 Returns true if successful, false if an I/O error occurred. */
186 case_reader_source_read (struct case_source *source,
188 write_case_func *write_case, write_case_data wc_data)
190 struct case_reader_pgm *pgm = source->aux;
196 if (pgm->map == NULL)
197 got_case = any_reader_read (pgm->reader, c);
200 got_case = any_reader_read (pgm->reader, &pgm->bounce);
202 map_case (pgm->map, &pgm->bounce, c);
207 ok = write_case (wc_data);
211 return ok && !any_reader_error (pgm->reader);
214 static const struct case_source_class case_reader_source_class =
218 case_reader_source_read,
219 case_reader_source_destroy,
226 return parse_read_command (GET_CMD);
233 return parse_read_command (IMPORT_CMD);
236 /* Writing system and portable files. */
238 /* Type of output file. */
241 SYSFILE_WRITER, /* System file. */
242 PORFILE_WRITER /* Portable file. */
245 /* Type of a command. */
248 XFORM_CMD, /* Transformation. */
249 PROC_CMD /* Procedure. */
252 /* File writer plus a case map. */
255 struct any_writer *writer; /* File writer. */
256 struct case_map *map; /* Map to output file dictionary
257 (null pointer for identity mapping). */
258 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
263 case_writer_destroy (struct case_writer *aw)
268 ok = any_writer_close (aw->writer);
269 destroy_case_map (aw->map);
270 case_destroy (&aw->bounce);
276 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
277 WRITER_TYPE identifies the type of file to write,
278 and COMMAND_TYPE identifies the type of command.
280 On success, returns a writer.
281 For procedures only, sets *RETAIN_UNSELECTED to true if cases
282 that would otherwise be excluded by FILTER or USE should be
285 On failure, returns a null pointer. */
286 static struct case_writer *
287 parse_write_command (enum writer_type writer_type,
288 enum command_type command_type,
289 bool *retain_unselected)
292 struct file_handle *handle; /* Output file. */
293 struct dictionary *dict; /* Dictionary for output file. */
294 struct case_writer *aw; /* Writer. */
296 /* Common options. */
297 bool print_map; /* Print map? TODO. */
298 bool print_short_names; /* Print long-to-short name map. TODO. */
299 struct sfm_write_options sysfile_opts;
300 struct pfm_write_options porfile_opts;
302 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
303 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
304 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
306 if (command_type == PROC_CMD)
307 *retain_unselected = true;
310 dict = dict_clone (default_dict);
311 aw = xmalloc (sizeof *aw);
314 case_nullify (&aw->bounce);
316 print_short_names = false;
317 sysfile_opts = sfm_writer_default_options ();
318 porfile_opts = pfm_writer_default_options ();
320 start_case_map (dict);
321 dict_delete_scratch_vars (dict);
326 if (lex_match_id ("OUTFILE"))
330 lex_sbc_only_once ("OUTFILE");
336 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
340 else if (lex_match_id ("NAMES"))
341 print_short_names = true;
342 else if (lex_match_id ("PERMISSIONS"))
347 if (lex_match_id ("READONLY"))
349 else if (lex_match_id ("WRITEABLE"))
353 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
356 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
358 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
361 if (lex_match_id ("RETAIN"))
362 *retain_unselected = true;
363 else if (lex_match_id ("DELETE"))
364 *retain_unselected = false;
367 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
371 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
372 sysfile_opts.compress = true;
373 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
374 sysfile_opts.compress = false;
375 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
378 if (!lex_force_int ())
380 sysfile_opts.version = lex_integer ();
383 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
386 if (lex_match_id ("COMMUNICATIONS"))
387 porfile_opts.type = PFM_COMM;
388 else if (lex_match_id ("TAPE"))
389 porfile_opts.type = PFM_TAPE;
392 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
396 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
399 if (!lex_force_int ())
401 porfile_opts.digits = lex_integer ();
404 else if (!parse_dict_trim (dict))
407 if (!lex_match ('/'))
410 if (lex_end_of_command () != CMD_SUCCESS)
415 lex_sbc_missing ("OUTFILE");
419 dict_compact_values (dict);
420 aw->map = finish_case_map (dict);
422 case_create (&aw->bounce, dict_get_next_value_idx (dict));
424 if (fh_get_referent (handle) == FH_REF_FILE)
429 aw->writer = any_writer_from_sfm_writer (
430 sfm_open_writer (handle, dict, sysfile_opts));
433 aw->writer = any_writer_from_pfm_writer (
434 pfm_open_writer (handle, dict, porfile_opts));
439 aw->writer = any_writer_open (handle, dict);
445 case_writer_destroy (aw);
450 /* Writes case C to writer AW. */
452 case_writer_write_case (struct case_writer *aw, struct ccase *c)
456 map_case (aw->map, c, &aw->bounce);
459 return any_writer_write (aw->writer, c);
462 /* SAVE and EXPORT. */
464 static bool output_proc (struct ccase *, void *);
466 /* Parses and performs the SAVE or EXPORT procedure. */
468 parse_output_proc (enum writer_type writer_type)
470 bool retain_unselected;
471 struct variable *saved_filter_variable;
472 struct case_writer *aw;
475 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
477 return CMD_CASCADING_FAILURE;
479 saved_filter_variable = dict_get_filter (default_dict);
480 if (retain_unselected)
481 dict_set_filter (default_dict, NULL);
482 ok = procedure (output_proc, aw);
483 dict_set_filter (default_dict, saved_filter_variable);
485 case_writer_destroy (aw);
486 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
489 /* Writes case C to file. */
491 output_proc (struct ccase *c, void *aw_)
493 struct case_writer *aw = aw_;
494 return case_writer_write_case (aw, c);
500 return parse_output_proc (SYSFILE_WRITER);
506 return parse_output_proc (PORFILE_WRITER);
509 /* XSAVE and XEXPORT. */
511 /* Transformation. */
514 struct case_writer *aw; /* Writer. */
517 static trns_proc_func output_trns_proc;
518 static trns_free_func output_trns_free;
520 /* Parses the XSAVE or XEXPORT transformation command. */
522 parse_output_trns (enum writer_type writer_type)
524 struct output_trns *t = xmalloc (sizeof *t);
525 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
529 return CMD_CASCADING_FAILURE;
532 add_transformation (output_trns_proc, output_trns_free, t);
536 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
538 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
540 struct output_trns *t = trns_;
541 case_writer_write_case (t->aw, c);
542 return TRNS_CONTINUE;
545 /* Frees an XSAVE or XEXPORT transformation.
546 Returns true if successful, false if an I/O error occurred. */
548 output_trns_free (void *trns_)
550 struct output_trns *t = trns_;
555 ok = case_writer_destroy (t->aw);
565 return parse_output_trns (SYSFILE_WRITER);
568 /* XEXPORT command. */
572 return parse_output_trns (PORFILE_WRITER);
575 static bool rename_variables (struct dictionary *dict);
576 static bool drop_variables (struct dictionary *dict);
577 static bool keep_variables (struct dictionary *dict);
579 /* Commands that read and write system files share a great deal
580 of common syntactic structure for rearranging and dropping
581 variables. This function parses this syntax and modifies DICT
582 appropriately. Returns true on success, false on failure. */
584 parse_dict_trim (struct dictionary *dict)
586 if (lex_match_id ("MAP"))
591 else if (lex_match_id ("DROP"))
592 return drop_variables (dict);
593 else if (lex_match_id ("KEEP"))
594 return keep_variables (dict);
595 else if (lex_match_id ("RENAME"))
596 return rename_variables (dict);
599 lex_error (_("expecting a valid subcommand"));
604 /* Parses and performs the RENAME subcommand of GET and SAVE. */
606 rename_variables (struct dictionary *dict)
624 v = parse_dict_variable (dict);
627 if (!lex_force_match ('=')
630 if (dict_lookup_var (dict, tokid) != NULL)
632 msg (SE, _("Cannot rename %s as %s because there already exists "
633 "a variable named %s. To rename variables with "
634 "overlapping names, use a single RENAME subcommand "
635 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
636 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
640 dict_rename_var (dict, v, tokid);
649 while (lex_match ('('))
653 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
655 if (!lex_match ('='))
657 msg (SE, _("`=' expected after variable list."));
660 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
664 msg (SE, _("Number of variables on left side of `=' (%d) does not "
665 "match number of variables on right side (%d), in "
666 "parenthesized group %d of RENAME subcommand."),
667 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
670 if (!lex_force_match (')'))
675 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
677 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
683 for (i = 0; i < nn; i++)
691 /* Parses and performs the DROP subcommand of GET and SAVE.
692 Returns true if successful, false on failure.*/
694 drop_variables (struct dictionary *dict)
700 if (!parse_variables (dict, &v, &nv, PV_NONE))
702 dict_delete_vars (dict, v, nv);
705 if (dict_get_var_cnt (dict) == 0)
707 msg (SE, _("Cannot DROP all variables from dictionary."));
713 /* Parses and performs the KEEP subcommand of GET and SAVE.
714 Returns true if successful, false on failure.*/
716 keep_variables (struct dictionary *dict)
723 if (!parse_variables (dict, &v, &nv, PV_NONE))
726 /* Move the specified variables to the beginning. */
727 dict_reorder_vars (dict, v, nv);
729 /* Delete the remaining variables. */
730 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
731 for (i = nv; i < dict_get_var_cnt (dict); i++)
732 v[i - nv] = dict_get_var (dict, i);
733 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
744 MTF_FILE, /* Specified on FILE= subcommand. */
745 MTF_TABLE /* Specified on TABLE= subcommand. */
748 /* One of the files on MATCH FILES. */
751 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
752 struct mtf_file *next_min; /* Next in the chain of minimums. */
754 int type; /* One of MTF_*. */
755 struct variable **by; /* List of BY variables for this file. */
756 struct file_handle *handle; /* File handle. */
757 struct any_reader *reader; /* File reader. */
758 struct dictionary *dict; /* Dictionary from system file. */
761 char *in_name; /* Variable name. */
762 struct variable *in_var; /* Variable (in master dictionary). */
764 struct ccase input; /* Input record. */
767 /* MATCH FILES procedure. */
770 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
771 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
773 bool ok; /* False if I/O error occurs. */
775 size_t by_cnt; /* Number of variables on BY subcommand. */
777 /* Names of FIRST, LAST variables. */
778 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
780 struct dictionary *dict; /* Dictionary of output file. */
781 struct case_sink *sink; /* Sink to receive output. */
782 struct ccase mtf_case; /* Case used for output. */
784 unsigned seq_num; /* Have we initialized this variable? */
785 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
788 static bool mtf_free (struct mtf_proc *);
789 static bool mtf_close_file (struct mtf_file *);
790 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
791 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
793 static bool mtf_read_nonactive_records (void *);
794 static bool mtf_processing_finish (void *);
795 static bool mtf_processing (struct ccase *, void *);
797 static char *var_type_description (struct variable *);
799 static void set_master (struct variable *, struct variable *master);
800 static struct variable *get_master (struct variable *);
802 /* Parse and execute the MATCH FILES command. */
804 cmd_match_files (void)
807 struct mtf_file *first_table = NULL;
808 struct mtf_file *iter;
810 bool used_active_file = false;
811 bool saw_table = false;
816 mtf.head = mtf.tail = NULL;
820 mtf.dict = dict_create ();
822 case_nullify (&mtf.mtf_case);
825 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
829 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
831 struct mtf_file *file = xmalloc (sizeof *file);
833 if (lex_match_id ("FILE"))
834 file->type = MTF_FILE;
835 else if (lex_match_id ("TABLE"))
837 file->type = MTF_TABLE;
848 file->in_name = NULL;
850 case_nullify (&file->input);
852 /* FILEs go first, then TABLEs. */
853 if (file->type == MTF_TABLE || first_table == NULL)
856 file->prev = mtf.tail;
858 mtf.tail->next = file;
860 if (mtf.head == NULL)
862 if (file->type == MTF_TABLE && first_table == NULL)
867 assert (file->type == MTF_FILE);
868 file->next = first_table;
869 file->prev = first_table->prev;
870 if (first_table->prev)
871 first_table->prev->next = file;
874 first_table->prev = file;
882 if (used_active_file)
884 msg (SE, _("The active file may not be specified more "
888 used_active_file = true;
890 if (vfm_source == NULL)
892 msg (SE, _("Cannot specify the active file since no active "
893 "file has been defined."));
900 _("MATCH FILES may not be used after TEMPORARY when "
901 "the active file is an input source. "
902 "Temporary transformations will be made permanent."));
906 file->dict = default_dict;
910 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
911 if (file->handle == NULL)
914 file->reader = any_reader_open (file->handle, &file->dict);
915 if (file->reader == NULL)
918 case_create (&file->input, dict_get_next_value_idx (file->dict));
921 while (lex_match ('/'))
922 if (lex_match_id ("RENAME"))
924 if (!rename_variables (file->dict))
927 else if (lex_match_id ("IN"))
936 if (file->in_name != NULL)
938 msg (SE, _("Multiple IN subcommands for a single FILE or "
942 file->in_name = xstrdup (tokid);
947 mtf_merge_dictionary (mtf.dict, file);
952 if (lex_match (T_BY))
954 struct variable **by;
958 msg (SE, _("BY may appear at most once."));
963 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
964 PV_NO_DUPLICATE | PV_NO_SCRATCH))
967 for (iter = mtf.head; iter != NULL; iter = iter->next)
971 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
973 for (i = 0; i < mtf.by_cnt; i++)
975 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
976 if (iter->by[i] == NULL)
978 msg (SE, _("File %s lacks BY variable %s."),
979 iter->handle ? fh_get_name (iter->handle) : "*",
988 else if (lex_match_id ("FIRST"))
990 if (mtf.first[0] != '\0')
992 msg (SE, _("FIRST may appear at most once."));
997 if (!lex_force_id ())
999 strcpy (mtf.first, tokid);
1002 else if (lex_match_id ("LAST"))
1004 if (mtf.last[0] != '\0')
1006 msg (SE, _("LAST may appear at most once."));
1011 if (!lex_force_id ())
1013 strcpy (mtf.last, tokid);
1016 else if (lex_match_id ("MAP"))
1020 else if (lex_match_id ("DROP"))
1022 if (!drop_variables (mtf.dict))
1025 else if (lex_match_id ("KEEP"))
1027 if (!keep_variables (mtf.dict))
1036 if (!lex_match ('/') && token != '.')
1038 lex_end_of_command ();
1043 if (mtf.by_cnt == 0)
1047 msg (SE, _("BY is required when TABLE is specified."));
1052 msg (SE, _("BY is required when IN is specified."));
1057 /* Set up mapping from each file's variables to master
1059 for (iter = mtf.head; iter != NULL; iter = iter->next)
1061 struct dictionary *d = iter->dict;
1064 for (i = 0; i < dict_get_var_cnt (d); i++)
1066 struct variable *v = dict_get_var (d, i);
1067 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1073 /* Add IN variables to master dictionary. */
1074 for (iter = mtf.head; iter != NULL; iter = iter->next)
1075 if (iter->in_name != NULL)
1077 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1078 if (iter->in_var == NULL)
1080 msg (SE, _("IN variable name %s duplicates an "
1081 "existing variable name."),
1082 iter->in_var->name);
1085 iter->in_var->print = iter->in_var->write
1086 = make_output_format (FMT_F, 1, 0);
1089 /* MATCH FILES performs an n-way merge on all its input files.
1092 1. Read one input record from every input FILE.
1094 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1096 3. Find the FILE input record(s) that have minimum BY
1097 values. Store all the values from these input records into
1100 4. For every TABLE, read another record as long as the BY values
1101 on the TABLE's input record are less than the FILEs' BY values.
1102 If an exact match is found, store all the values from the TABLE
1103 input record into the output record.
1105 5. Write the output record.
1107 6. Read another record from each input file FILE and TABLE that
1108 we stored values from above. If we come to the end of one of the
1109 input files, remove it from the list of input files.
1111 7. Repeat from step 2.
1113 Unfortunately, this algorithm can't be implemented in a
1114 straightforward way because there's no function to read a
1115 record from the active file. Instead, it has to be written
1118 FIXME: For merging large numbers of files (more than 10?) a
1119 better algorithm would use a heap for finding minimum
1122 if (!used_active_file)
1123 discard_variables ();
1125 dict_compact_values (mtf.dict);
1126 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1127 if (mtf.sink->class->open != NULL)
1128 mtf.sink->class->open (mtf.sink);
1130 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1131 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1133 if (!mtf_read_nonactive_records (&mtf))
1136 if (used_active_file)
1137 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1139 ok = mtf_processing_finish (&mtf);
1141 free_case_source (vfm_source);
1144 dict_destroy (default_dict);
1145 default_dict = mtf.dict;
1147 vfm_source = mtf.sink->class->make_source (mtf.sink);
1148 free_case_sink (mtf.sink);
1150 if (!mtf_free (&mtf))
1152 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1156 return CMD_CASCADING_FAILURE;
1159 /* Repeats 2...7 an arbitrary number of times. */
1161 mtf_processing_finish (void *mtf_)
1163 struct mtf_proc *mtf = mtf_;
1164 struct mtf_file *iter;
1166 /* Find the active file and delete it. */
1167 for (iter = mtf->head; iter; iter = iter->next)
1168 if (iter->handle == NULL)
1170 if (!mtf_delete_file_in_place (mtf, &iter))
1175 while (mtf->head && mtf->head->type == MTF_FILE)
1176 if (!mtf_processing (NULL, mtf))
1182 /* Return a string in a static buffer describing V's variable type and
1185 var_type_description (struct variable *v)
1187 static char buf[2][32];
1194 if (v->type == NUMERIC)
1195 strcpy (s, "numeric");
1198 assert (v->type == ALPHA);
1199 sprintf (s, "string with width %d", v->width);
1204 /* Closes FILE and frees its associated data.
1205 Returns true if successful, false if an I/O error
1206 occurred on FILE. */
1208 mtf_close_file (struct mtf_file *file)
1210 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1212 any_reader_close (file->reader);
1213 if (file->handle != NULL)
1214 dict_destroy (file->dict);
1215 case_destroy (&file->input);
1216 free (file->in_name);
1221 /* Free all the data for the MATCH FILES procedure.
1222 Returns true if successful, false if an I/O error
1225 mtf_free (struct mtf_proc *mtf)
1227 struct mtf_file *iter, *next;
1230 for (iter = mtf->head; iter; iter = next)
1233 assert (iter->dict != mtf->dict);
1234 if (!mtf_close_file (iter))
1239 dict_destroy (mtf->dict);
1240 case_destroy (&mtf->mtf_case);
1241 free (mtf->seq_nums);
1246 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1247 file in the chain, or to NULL if was the last in the chain.
1248 Returns true if successful, false if an I/O error occurred. */
1250 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1252 struct mtf_file *f = *file;
1256 f->prev->next = f->next;
1258 f->next->prev = f->prev;
1260 mtf->head = f->next;
1262 mtf->tail = f->prev;
1265 if (f->in_var != NULL)
1266 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1267 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1269 struct variable *v = dict_get_var (f->dict, i);
1270 struct variable *mv = get_master (v);
1273 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1275 if (v->type == NUMERIC)
1278 memset (out->s, ' ', v->width);
1282 return mtf_close_file (f);
1285 /* Read a record from every input file except the active file.
1286 Returns true if successful, false if an I/O error occurred. */
1288 mtf_read_nonactive_records (void *mtf_)
1290 struct mtf_proc *mtf = mtf_;
1291 struct mtf_file *iter, *next;
1294 for (iter = mtf->head; ok && iter != NULL; iter = next)
1297 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1298 if (!mtf_delete_file_in_place (mtf, &iter))
1304 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1305 if A == B, 1 if A > B. */
1307 mtf_compare_BY_values (struct mtf_proc *mtf,
1308 struct mtf_file *a, struct mtf_file *b,
1311 struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1312 struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1313 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1314 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1317 /* Perform one iteration of steps 3...7 above.
1318 Returns true if successful, false if an I/O error occurred. */
1320 mtf_processing (struct ccase *c, void *mtf_)
1322 struct mtf_proc *mtf = mtf_;
1324 /* Do we need another record from the active file? */
1325 bool read_active_file;
1327 assert (mtf->head != NULL);
1328 if (mtf->head->type == MTF_TABLE)
1333 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1334 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1335 struct mtf_file *iter, *next;
1337 read_active_file = false;
1339 /* 3. Find the FILE input record(s) that have minimum BY
1340 values. Store all the values from these input records into
1341 the output record. */
1342 min_head = min_tail = mtf->head;
1343 max_head = max_tail = NULL;
1344 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1347 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1351 max_tail = max_tail->next_min = iter;
1353 max_head = max_tail = iter;
1356 min_tail = min_tail->next_min = iter;
1361 max_tail->next_min = min_head;
1362 max_tail = min_tail;
1366 max_head = min_head;
1367 max_tail = min_tail;
1369 min_head = min_tail = iter;
1373 /* 4. For every TABLE, read another record as long as the BY
1374 values on the TABLE's input record are less than the FILEs'
1375 BY values. If an exact match is found, store all the values
1376 from the TABLE input record into the output record. */
1377 for (; iter != NULL; iter = next)
1379 assert (iter->type == MTF_TABLE);
1384 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1388 max_tail = max_tail->next_min = iter;
1390 max_head = max_tail = iter;
1393 min_tail = min_tail->next_min = iter;
1396 if (iter->handle == NULL)
1398 if (any_reader_read (iter->reader, &iter->input))
1400 if (!mtf_delete_file_in_place (mtf, &iter))
1407 /* Next sequence number. */
1410 /* Store data to all the records we are using. */
1412 min_tail->next_min = NULL;
1413 for (iter = min_head; iter; iter = iter->next_min)
1417 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1419 struct variable *v = dict_get_var (iter->dict, i);
1420 struct variable *mv = get_master (v);
1422 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1424 struct ccase *record
1425 = case_is_null (&iter->input) ? c : &iter->input;
1426 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1428 mtf->seq_nums[mv->index] = mtf->seq_num;
1429 if (v->type == NUMERIC)
1430 out->f = case_num (record, v->fv);
1432 memcpy (out->s, case_str (record, v->fv), v->width);
1435 if (iter->in_var != NULL)
1436 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1438 if (iter->type == MTF_FILE && iter->handle == NULL)
1439 read_active_file = true;
1442 /* Store missing values to all the records we're not
1445 max_tail->next_min = NULL;
1446 for (iter = max_head; iter; iter = iter->next_min)
1450 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1452 struct variable *v = dict_get_var (iter->dict, i);
1453 struct variable *mv = get_master (v);
1455 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1457 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1458 mtf->seq_nums[mv->index] = mtf->seq_num;
1460 if (v->type == NUMERIC)
1463 memset (out->s, ' ', v->width);
1466 if (iter->in_var != NULL)
1467 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1470 /* 5. Write the output record. */
1471 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1473 /* 6. Read another record from each input file FILE and TABLE
1474 that we stored values from above. If we come to the end of
1475 one of the input files, remove it from the list of input
1477 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1479 next = iter->next_min;
1480 if (iter->reader != NULL
1481 && !any_reader_read (iter->reader, &iter->input))
1482 if (!mtf_delete_file_in_place (mtf, &iter))
1486 while (!read_active_file
1487 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1492 /* Merge the dictionary for file F into master dictionary M. */
1494 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1496 struct dictionary *d = f->dict;
1497 const char *d_docs, *m_docs;
1500 if (dict_get_label (m) == NULL)
1501 dict_set_label (m, dict_get_label (d));
1503 d_docs = dict_get_documents (d);
1504 m_docs = dict_get_documents (m);
1508 dict_set_documents (m, d_docs);
1514 new_len = strlen (m_docs) + strlen (d_docs);
1515 new_docs = xmalloc (new_len + 1);
1516 strcpy (new_docs, m_docs);
1517 strcat (new_docs, d_docs);
1518 dict_set_documents (m, new_docs);
1523 for (i = 0; i < dict_get_var_cnt (d); i++)
1525 struct variable *dv = dict_get_var (d, i);
1526 struct variable *mv = dict_lookup_var (m, dv->name);
1528 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1533 if (mv->width != dv->width)
1535 msg (SE, _("Variable %s in file %s (%s) has different "
1536 "type or width from the same variable in "
1537 "earlier file (%s)."),
1538 dv->name, fh_get_name (f->handle),
1539 var_type_description (dv), var_type_description (mv));
1543 if (dv->width == mv->width)
1545 if (val_labs_count (dv->val_labs)
1546 && !val_labs_count (mv->val_labs))
1547 mv->val_labs = val_labs_copy (dv->val_labs);
1548 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1549 mv_copy (&mv->miss, &dv->miss);
1552 if (dv->label && !mv->label)
1553 mv->label = xstrdup (dv->label);
1556 mv = dict_clone_var_assert (m, dv, dv->name);
1562 /* Marks V's master variable as MASTER. */
1564 set_master (struct variable *v, struct variable *master)
1566 var_attach_aux (v, master, NULL);
1569 /* Returns the master variable corresponding to V,
1570 as set with set_master(). */
1571 static struct variable *
1572 get_master (struct variable *v)
1581 A case map copies data from a case that corresponds for one
1582 dictionary to a case that corresponds to a second dictionary
1583 derived from the first by, optionally, deleting, reordering,
1584 or renaming variables. (No new variables may be created.)
1590 size_t value_cnt; /* Number of values in map. */
1591 int *map; /* For each destination index, the
1592 corresponding source index. */
1595 /* Prepares dictionary D for producing a case map. Afterward,
1596 the caller may delete, reorder, or rename variables within D
1597 at will before using finish_case_map() to produce the case
1600 Uses D's aux members, which must otherwise not be in use. */
1602 start_case_map (struct dictionary *d)
1604 size_t var_cnt = dict_get_var_cnt (d);
1607 for (i = 0; i < var_cnt; i++)
1609 struct variable *v = dict_get_var (d, i);
1610 int *src_fv = xmalloc (sizeof *src_fv);
1612 var_attach_aux (v, src_fv, var_dtor_free);
1616 /* Produces a case map from dictionary D, which must have been
1617 previously prepared with start_case_map().
1619 Does not retain any reference to D, and clears the aux members
1620 set up by start_case_map().
1622 Returns the new case map, or a null pointer if no mapping is
1623 required (that is, no data has changed position). */
1624 static struct case_map *
1625 finish_case_map (struct dictionary *d)
1627 struct case_map *map;
1628 size_t var_cnt = dict_get_var_cnt (d);
1632 map = xmalloc (sizeof *map);
1633 map->value_cnt = dict_get_next_value_idx (d);
1634 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1635 for (i = 0; i < map->value_cnt; i++)
1639 for (i = 0; i < var_cnt; i++)
1641 struct variable *v = dict_get_var (d, i);
1642 int *src_fv = (int *) var_detach_aux (v);
1645 if (v->fv != *src_fv)
1648 for (idx = 0; idx < v->nv; idx++)
1650 int src_idx = *src_fv + idx;
1651 int dst_idx = v->fv + idx;
1653 assert (map->map[dst_idx] == -1);
1654 map->map[dst_idx] = src_idx;
1661 destroy_case_map (map);
1665 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1671 /* Maps from SRC to DST, applying case map MAP. */
1673 map_case (const struct case_map *map,
1674 const struct ccase *src, struct ccase *dst)
1678 assert (map != NULL);
1679 assert (src != NULL);
1680 assert (dst != NULL);
1681 assert (src != dst);
1683 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1685 int src_idx = map->map[dst_idx];
1687 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1691 /* Destroys case map MAP. */
1693 destroy_case_map (struct case_map *map)