1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "any-reader.h"
25 #include "any-writer.h"
28 #include "dictionary.h"
30 #include "file-handle.h"
34 #include "pfm-write.h"
36 #include "sfm-write.h"
38 #include "value-labels.h"
44 #define _(msgid) gettext (msgid)
46 #include "debug-print.h"
48 /* Rearranging and reducing a dictionary. */
49 static void start_case_map (struct dictionary *);
50 static struct case_map *finish_case_map (struct dictionary *);
51 static void map_case (const struct case_map *,
52 const struct ccase *, struct ccase *);
53 static void destroy_case_map (struct case_map *);
55 static bool parse_dict_trim (struct dictionary *);
57 /* Reading system and portable files. */
59 /* Type of command. */
66 /* Case reader input program. */
67 struct case_reader_pgm
69 struct any_reader *reader; /* File reader. */
70 struct case_map *map; /* Map from file dict to active file dict. */
71 struct ccase bounce; /* Bounce buffer. */
74 static const struct case_source_class case_reader_source_class;
76 static void case_reader_pgm_free (struct case_reader_pgm *);
78 /* Parses a GET or IMPORT command. */
80 parse_read_command (enum reader_command type)
82 struct case_reader_pgm *pgm = NULL;
83 struct file_handle *fh = NULL;
84 struct dictionary *dict = NULL;
90 if (lex_match_id ("FILE") || token == T_STRING)
94 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
98 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
102 if (lex_match_id ("COMM"))
104 else if (lex_match_id ("TAPE"))
108 lex_error (_("expecting COMM or TAPE"));
118 lex_sbc_missing ("FILE");
122 discard_variables ();
124 pgm = xmalloc (sizeof *pgm);
125 pgm->reader = any_reader_open (fh, &dict);
127 case_nullify (&pgm->bounce);
128 if (pgm->reader == NULL)
131 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
133 start_case_map (dict);
138 if (!parse_dict_trim (dict))
142 pgm->map = finish_case_map (dict);
144 dict_destroy (default_dict);
147 vfm_source = create_case_source (&case_reader_source_class, pgm);
152 case_reader_pgm_free (pgm);
158 /* Frees a struct case_reader_pgm. */
160 case_reader_pgm_free (struct case_reader_pgm *pgm)
164 any_reader_close (pgm->reader);
165 destroy_case_map (pgm->map);
166 case_destroy (&pgm->bounce);
171 /* Clears internal state related to case reader input procedure. */
173 case_reader_source_destroy (struct case_source *source)
175 struct case_reader_pgm *pgm = source->aux;
176 case_reader_pgm_free (pgm);
179 /* Reads all the cases from the data file into C and passes them
180 to WRITE_CASE one by one, passing WC_DATA. */
182 case_reader_source_read (struct case_source *source,
184 write_case_func *write_case, write_case_data wc_data)
186 struct case_reader_pgm *pgm = source->aux;
191 if (pgm->map == NULL)
192 ok = any_reader_read (pgm->reader, c);
195 ok = any_reader_read (pgm->reader, &pgm->bounce);
197 map_case (pgm->map, &pgm->bounce, c);
201 ok = write_case (wc_data);
206 static const struct case_source_class case_reader_source_class =
210 case_reader_source_read,
211 case_reader_source_destroy,
218 return parse_read_command (GET_CMD);
225 return parse_read_command (IMPORT_CMD);
228 /* Writing system and portable files. */
230 /* Type of output file. */
233 SYSFILE_WRITER, /* System file. */
234 PORFILE_WRITER /* Portable file. */
237 /* Type of a command. */
240 XFORM_CMD, /* Transformation. */
241 PROC_CMD /* Procedure. */
244 /* File writer plus a case map. */
247 struct any_writer *writer; /* File writer. */
248 struct case_map *map; /* Map to output file dictionary
249 (null pointer for identity mapping). */
250 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
255 case_writer_destroy (struct case_writer *aw)
259 any_writer_close (aw->writer);
260 destroy_case_map (aw->map);
261 case_destroy (&aw->bounce);
266 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
267 WRITER_TYPE identifies the type of file to write,
268 and COMMAND_TYPE identifies the type of command.
270 On success, returns a writer.
271 For procedures only, sets *RETAIN_UNSELECTED to true if cases
272 that would otherwise be excluded by FILTER or USE should be
275 On failure, returns a null pointer. */
276 static struct case_writer *
277 parse_write_command (enum writer_type writer_type,
278 enum command_type command_type,
279 bool *retain_unselected)
282 struct file_handle *handle; /* Output file. */
283 struct dictionary *dict; /* Dictionary for output file. */
284 struct case_writer *aw; /* Writer. */
286 /* Common options. */
287 bool print_map; /* Print map? TODO. */
288 bool print_short_names; /* Print long-to-short name map. TODO. */
289 struct sfm_write_options sysfile_opts;
290 struct pfm_write_options porfile_opts;
292 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
293 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
294 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
296 if (command_type == PROC_CMD)
297 *retain_unselected = true;
300 dict = dict_clone (default_dict);
301 aw = xmalloc (sizeof *aw);
304 case_nullify (&aw->bounce);
306 print_short_names = false;
307 sysfile_opts = sfm_writer_default_options ();
308 porfile_opts = pfm_writer_default_options ();
310 start_case_map (dict);
311 dict_delete_scratch_vars (dict);
316 if (lex_match_id ("OUTFILE"))
320 lex_sbc_only_once ("OUTFILE");
326 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
330 else if (lex_match_id ("NAMES"))
331 print_short_names = true;
332 else if (lex_match_id ("PERMISSIONS"))
337 if (lex_match_id ("READONLY"))
339 else if (lex_match_id ("WRITEABLE"))
343 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
346 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
348 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
351 if (lex_match_id ("RETAIN"))
352 *retain_unselected = true;
353 else if (lex_match_id ("DELETE"))
354 *retain_unselected = false;
357 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
361 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
362 sysfile_opts.compress = true;
363 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
364 sysfile_opts.compress = false;
365 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
368 if (!lex_force_int ())
370 sysfile_opts.version = lex_integer ();
373 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
376 if (lex_match_id ("COMMUNICATIONS"))
377 porfile_opts.type = PFM_COMM;
378 else if (lex_match_id ("TAPE"))
379 porfile_opts.type = PFM_TAPE;
382 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
386 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
389 if (!lex_force_int ())
391 porfile_opts.digits = lex_integer ();
394 else if (!parse_dict_trim (dict))
397 if (!lex_match ('/'))
400 if (lex_end_of_command () != CMD_SUCCESS)
405 lex_sbc_missing ("OUTFILE");
409 dict_compact_values (dict);
410 aw->map = finish_case_map (dict);
412 case_create (&aw->bounce, dict_get_next_value_idx (dict));
414 if (fh_get_referent (handle) == FH_REF_FILE)
419 aw->writer = any_writer_from_sfm_writer (
420 sfm_open_writer (handle, dict, sysfile_opts));
423 aw->writer = any_writer_from_pfm_writer (
424 pfm_open_writer (handle, dict, porfile_opts));
429 aw->writer = any_writer_open (handle, dict);
435 case_writer_destroy (aw);
440 /* Writes case C to writer AW. */
442 case_writer_write_case (struct case_writer *aw, struct ccase *c)
446 map_case (aw->map, c, &aw->bounce);
449 any_writer_write (aw->writer, c);
452 /* SAVE and EXPORT. */
454 static int output_proc (struct ccase *, void *);
456 /* Parses and performs the SAVE or EXPORT procedure. */
458 parse_output_proc (enum writer_type writer_type)
460 bool retain_unselected;
461 struct variable *saved_filter_variable;
462 struct case_writer *aw;
464 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
468 saved_filter_variable = dict_get_filter (default_dict);
469 if (retain_unselected)
470 dict_set_filter (default_dict, NULL);
471 procedure (output_proc, aw);
472 dict_set_filter (default_dict, saved_filter_variable);
474 case_writer_destroy (aw);
478 /* Writes case C to file. */
480 output_proc (struct ccase *c, void *aw_)
482 struct case_writer *aw = aw_;
483 case_writer_write_case (aw, c);
490 return parse_output_proc (SYSFILE_WRITER);
496 return parse_output_proc (PORFILE_WRITER);
499 /* XSAVE and XEXPORT. */
501 /* Transformation. */
504 struct case_writer *aw; /* Writer. */
507 static trns_proc_func output_trns_proc;
508 static trns_free_func output_trns_free;
510 /* Parses the XSAVE or XEXPORT transformation command. */
512 parse_output_trns (enum writer_type writer_type)
514 struct output_trns *t = xmalloc (sizeof *t);
515 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
522 add_transformation (output_trns_proc, output_trns_free, t);
526 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
528 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
530 struct output_trns *t = trns_;
531 case_writer_write_case (t->aw, c);
535 /* Frees an XSAVE or XEXPORT transformation. */
537 output_trns_free (void *trns_)
539 struct output_trns *t = trns_;
543 case_writer_destroy (t->aw);
552 return parse_output_trns (SYSFILE_WRITER);
555 /* XEXPORT command. */
559 return parse_output_trns (PORFILE_WRITER);
562 static bool rename_variables (struct dictionary *dict);
563 static bool drop_variables (struct dictionary *dict);
564 static bool keep_variables (struct dictionary *dict);
566 /* Commands that read and write system files share a great deal
567 of common syntactic structure for rearranging and dropping
568 variables. This function parses this syntax and modifies DICT
569 appropriately. Returns true on success, false on failure. */
571 parse_dict_trim (struct dictionary *dict)
573 if (lex_match_id ("MAP"))
578 else if (lex_match_id ("DROP"))
579 return drop_variables (dict);
580 else if (lex_match_id ("KEEP"))
581 return keep_variables (dict);
582 else if (lex_match_id ("RENAME"))
583 return rename_variables (dict);
586 lex_error (_("expecting a valid subcommand"));
591 /* Parses and performs the RENAME subcommand of GET and SAVE. */
593 rename_variables (struct dictionary *dict)
611 v = parse_dict_variable (dict);
614 if (!lex_force_match ('=')
617 if (dict_lookup_var (dict, tokid) != NULL)
619 msg (SE, _("Cannot rename %s as %s because there already exists "
620 "a variable named %s. To rename variables with "
621 "overlapping names, use a single RENAME subcommand "
622 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
623 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
627 dict_rename_var (dict, v, tokid);
636 while (lex_match ('('))
640 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
642 if (!lex_match ('='))
644 msg (SE, _("`=' expected after variable list."));
647 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
651 msg (SE, _("Number of variables on left side of `=' (%d) does not "
652 "match number of variables on right side (%d), in "
653 "parenthesized group %d of RENAME subcommand."),
654 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
657 if (!lex_force_match (')'))
662 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
664 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
670 for (i = 0; i < nn; i++)
678 /* Parses and performs the DROP subcommand of GET and SAVE.
679 Returns true if successful, false on failure.*/
681 drop_variables (struct dictionary *dict)
687 if (!parse_variables (dict, &v, &nv, PV_NONE))
689 dict_delete_vars (dict, v, nv);
692 if (dict_get_var_cnt (dict) == 0)
694 msg (SE, _("Cannot DROP all variables from dictionary."));
700 /* Parses and performs the KEEP subcommand of GET and SAVE.
701 Returns true if successful, false on failure.*/
703 keep_variables (struct dictionary *dict)
710 if (!parse_variables (dict, &v, &nv, PV_NONE))
713 /* Move the specified variables to the beginning. */
714 dict_reorder_vars (dict, v, nv);
716 /* Delete the remaining variables. */
717 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
718 for (i = nv; i < dict_get_var_cnt (dict); i++)
719 v[i - nv] = dict_get_var (dict, i);
720 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
728 #include "debug-print.h"
733 MTF_FILE, /* Specified on FILE= subcommand. */
734 MTF_TABLE /* Specified on TABLE= subcommand. */
737 /* One of the files on MATCH FILES. */
740 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
741 struct mtf_file *next_min; /* Next in the chain of minimums. */
743 int type; /* One of MTF_*. */
744 struct variable **by; /* List of BY variables for this file. */
745 struct file_handle *handle; /* File handle. */
746 struct any_reader *reader; /* File reader. */
747 struct dictionary *dict; /* Dictionary from system file. */
750 char *in_name; /* Variable name. */
751 struct variable *in_var; /* Variable (in master dictionary). */
753 struct ccase input; /* Input record. */
756 /* MATCH FILES procedure. */
759 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
760 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
762 size_t by_cnt; /* Number of variables on BY subcommand. */
764 /* Names of FIRST, LAST variables. */
765 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
767 struct dictionary *dict; /* Dictionary of output file. */
768 struct case_sink *sink; /* Sink to receive output. */
769 struct ccase mtf_case; /* Case used for output. */
771 unsigned seq_num; /* Have we initialized this variable? */
772 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
775 static void mtf_free (struct mtf_proc *);
776 static void mtf_free_file (struct mtf_file *);
777 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
778 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
780 static void mtf_read_nonactive_records (void *);
781 static void mtf_processing_finish (void *);
782 static int mtf_processing (struct ccase *, void *);
784 static char *var_type_description (struct variable *);
786 static void set_master (struct variable *, struct variable *master);
787 static struct variable *get_master (struct variable *);
789 /* Parse and execute the MATCH FILES command. */
791 cmd_match_files (void)
794 struct mtf_file *first_table = NULL;
795 struct mtf_file *iter;
797 bool used_active_file = false;
798 bool saw_table = false;
801 mtf.head = mtf.tail = NULL;
805 mtf.dict = dict_create ();
807 case_nullify (&mtf.mtf_case);
810 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
814 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
816 struct mtf_file *file = xmalloc (sizeof *file);
818 if (lex_match_id ("FILE"))
819 file->type = MTF_FILE;
820 else if (lex_match_id ("TABLE"))
822 file->type = MTF_TABLE;
833 file->in_name = NULL;
835 case_nullify (&file->input);
837 /* FILEs go first, then TABLEs. */
838 if (file->type == MTF_TABLE || first_table == NULL)
841 file->prev = mtf.tail;
843 mtf.tail->next = file;
845 if (mtf.head == NULL)
847 if (file->type == MTF_TABLE && first_table == NULL)
852 assert (file->type == MTF_FILE);
853 file->next = first_table;
854 file->prev = first_table->prev;
855 if (first_table->prev)
856 first_table->prev->next = file;
859 first_table->prev = file;
867 if (used_active_file)
869 msg (SE, _("The active file may not be specified more "
873 used_active_file = true;
875 assert (pgm_state != STATE_INPUT);
876 if (pgm_state == STATE_INIT)
878 msg (SE, _("Cannot specify the active file since no active "
879 "file has been defined."));
886 _("MATCH FILES may not be used after TEMPORARY when "
887 "the active file is an input source. "
888 "Temporary transformations will be made permanent."));
892 file->dict = default_dict;
896 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
897 if (file->handle == NULL)
900 file->reader = any_reader_open (file->handle, &file->dict);
901 if (file->reader == NULL)
904 case_create (&file->input, dict_get_next_value_idx (file->dict));
907 while (lex_match ('/'))
908 if (lex_match_id ("RENAME"))
910 if (!rename_variables (file->dict))
913 else if (lex_match_id ("IN"))
922 if (file->in_name != NULL)
924 msg (SE, _("Multiple IN subcommands for a single FILE or "
928 file->in_name = xstrdup (tokid);
933 mtf_merge_dictionary (mtf.dict, file);
938 if (lex_match (T_BY))
940 struct variable **by;
944 msg (SE, _("BY may appear at most once."));
949 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
950 PV_NO_DUPLICATE | PV_NO_SCRATCH))
953 for (iter = mtf.head; iter != NULL; iter = iter->next)
957 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
959 for (i = 0; i < mtf.by_cnt; i++)
961 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
962 if (iter->by[i] == NULL)
964 msg (SE, _("File %s lacks BY variable %s."),
965 iter->handle ? fh_get_name (iter->handle) : "*",
974 else if (lex_match_id ("FIRST"))
976 if (mtf.first[0] != '\0')
978 msg (SE, _("FIRST may appear at most once."));
983 if (!lex_force_id ())
985 strcpy (mtf.first, tokid);
988 else if (lex_match_id ("LAST"))
990 if (mtf.last[0] != '\0')
992 msg (SE, _("LAST may appear at most once."));
997 if (!lex_force_id ())
999 strcpy (mtf.last, tokid);
1002 else if (lex_match_id ("MAP"))
1006 else if (lex_match_id ("DROP"))
1008 if (!drop_variables (mtf.dict))
1011 else if (lex_match_id ("KEEP"))
1013 if (!keep_variables (mtf.dict))
1022 if (!lex_match ('/') && token != '.')
1024 lex_end_of_command ();
1029 if (mtf.by_cnt == 0)
1033 msg (SE, _("BY is required when TABLE is specified."));
1038 msg (SE, _("BY is required when IN is specified."));
1043 /* Set up mapping from each file's variables to master
1045 for (iter = mtf.head; iter != NULL; iter = iter->next)
1047 struct dictionary *d = iter->dict;
1050 for (i = 0; i < dict_get_var_cnt (d); i++)
1052 struct variable *v = dict_get_var (d, i);
1053 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1059 /* Add IN variables to master dictionary. */
1060 for (iter = mtf.head; iter != NULL; iter = iter->next)
1061 if (iter->in_name != NULL)
1063 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1064 if (iter->in_var == NULL)
1066 msg (SE, _("IN variable name %s duplicates an "
1067 "existing variable name."),
1068 iter->in_var->name);
1071 iter->in_var->print = iter->in_var->write
1072 = make_output_format (FMT_F, 1, 0);
1075 /* MATCH FILES performs an n-way merge on all its input files.
1078 1. Read one input record from every input FILE.
1080 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1082 3. Find the FILE input record(s) that have minimum BY
1083 values. Store all the values from these input records into
1086 4. For every TABLE, read another record as long as the BY values
1087 on the TABLE's input record are less than the FILEs' BY values.
1088 If an exact match is found, store all the values from the TABLE
1089 input record into the output record.
1091 5. Write the output record.
1093 6. Read another record from each input file FILE and TABLE that
1094 we stored values from above. If we come to the end of one of the
1095 input files, remove it from the list of input files.
1097 7. Repeat from step 2.
1099 Unfortunately, this algorithm can't be implemented in a
1100 straightforward way because there's no function to read a
1101 record from the active file. Instead, it has to be written
1104 FIXME: For merging large numbers of files (more than 10?) a
1105 better algorithm would use a heap for finding minimum
1108 if (!used_active_file)
1109 discard_variables ();
1111 dict_compact_values (mtf.dict);
1112 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1113 if (mtf.sink->class->open != NULL)
1114 mtf.sink->class->open (mtf.sink);
1116 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1117 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1119 mtf_read_nonactive_records (&mtf);
1120 if (used_active_file)
1121 procedure (mtf_processing, &mtf);
1122 mtf_processing_finish (&mtf);
1124 free_case_source (vfm_source);
1127 dict_destroy (default_dict);
1128 default_dict = mtf.dict;
1130 vfm_source = mtf.sink->class->make_source (mtf.sink);
1131 free_case_sink (mtf.sink);
1141 /* Repeats 2...7 an arbitrary number of times. */
1143 mtf_processing_finish (void *mtf_)
1145 struct mtf_proc *mtf = mtf_;
1146 struct mtf_file *iter;
1148 /* Find the active file and delete it. */
1149 for (iter = mtf->head; iter; iter = iter->next)
1150 if (iter->handle == NULL)
1152 mtf_delete_file_in_place (mtf, &iter);
1156 while (mtf->head && mtf->head->type == MTF_FILE)
1157 if (!mtf_processing (NULL, mtf))
1161 /* Return a string in a static buffer describing V's variable type and
1164 var_type_description (struct variable *v)
1166 static char buf[2][32];
1173 if (v->type == NUMERIC)
1174 strcpy (s, "numeric");
1177 assert (v->type == ALPHA);
1178 sprintf (s, "string with width %d", v->width);
1183 /* Free FILE and associated data. */
1185 mtf_free_file (struct mtf_file *file)
1188 any_reader_close (file->reader);
1189 if (file->dict != default_dict)
1190 dict_destroy (file->dict);
1191 case_destroy (&file->input);
1192 free (file->in_name);
1196 /* Free all the data for the MATCH FILES procedure. */
1198 mtf_free (struct mtf_proc *mtf)
1200 struct mtf_file *iter, *next;
1202 for (iter = mtf->head; iter; iter = next)
1205 mtf_free_file (iter);
1209 dict_destroy (mtf->dict);
1210 case_destroy (&mtf->mtf_case);
1211 free (mtf->seq_nums);
1214 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1215 file in the chain, or to NULL if was the last in the chain. */
1217 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1219 struct mtf_file *f = *file;
1223 f->prev->next = f->next;
1225 f->next->prev = f->prev;
1227 mtf->head = f->next;
1229 mtf->tail = f->prev;
1232 if (f->in_var != NULL)
1233 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1234 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1236 struct variable *v = dict_get_var (f->dict, i);
1237 struct variable *mv = get_master (v);
1240 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1242 if (v->type == NUMERIC)
1245 memset (out->s, ' ', v->width);
1252 /* Read a record from every input file except the active file. */
1254 mtf_read_nonactive_records (void *mtf_)
1256 struct mtf_proc *mtf = mtf_;
1257 struct mtf_file *iter, *next;
1259 for (iter = mtf->head; iter != NULL; iter = next)
1262 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1263 mtf_delete_file_in_place (mtf, &iter);
1267 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1268 if A == B, 1 if A > B. */
1270 mtf_compare_BY_values (struct mtf_proc *mtf,
1271 struct mtf_file *a, struct mtf_file *b,
1274 struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1275 struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1276 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1277 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1280 /* Perform one iteration of steps 3...7 above. */
1282 mtf_processing (struct ccase *c, void *mtf_)
1284 struct mtf_proc *mtf = mtf_;
1286 /* Do we need another record from the active file? */
1287 bool read_active_file;
1289 assert (mtf->head != NULL);
1290 if (mtf->head->type == MTF_TABLE)
1295 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1296 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1297 struct mtf_file *iter, *next;
1299 read_active_file = false;
1301 /* 3. Find the FILE input record(s) that have minimum BY
1302 values. Store all the values from these input records into
1303 the output record. */
1304 min_head = min_tail = mtf->head;
1305 max_head = max_tail = NULL;
1306 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1309 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1313 max_tail = max_tail->next_min = iter;
1315 max_head = max_tail = iter;
1318 min_tail = min_tail->next_min = iter;
1323 max_tail->next_min = min_head;
1324 max_tail = min_tail;
1328 max_head = min_head;
1329 max_tail = min_tail;
1331 min_head = min_tail = iter;
1335 /* 4. For every TABLE, read another record as long as the BY
1336 values on the TABLE's input record are less than the FILEs'
1337 BY values. If an exact match is found, store all the values
1338 from the TABLE input record into the output record. */
1339 for (; iter != NULL; iter = next)
1341 assert (iter->type == MTF_TABLE);
1346 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1350 max_tail = max_tail->next_min = iter;
1352 max_head = max_tail = iter;
1355 min_tail = min_tail->next_min = iter;
1358 if (iter->handle == NULL)
1360 if (any_reader_read (iter->reader, &iter->input))
1362 mtf_delete_file_in_place (mtf, &iter);
1368 /* Next sequence number. */
1371 /* Store data to all the records we are using. */
1373 min_tail->next_min = NULL;
1374 for (iter = min_head; iter; iter = iter->next_min)
1378 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1380 struct variable *v = dict_get_var (iter->dict, i);
1381 struct variable *mv = get_master (v);
1383 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1385 struct ccase *record
1386 = case_is_null (&iter->input) ? c : &iter->input;
1387 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1389 mtf->seq_nums[mv->index] = mtf->seq_num;
1390 if (v->type == NUMERIC)
1391 out->f = case_num (record, v->fv);
1393 memcpy (out->s, case_str (record, v->fv), v->width);
1396 if (iter->in_var != NULL)
1397 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1399 if (iter->type == MTF_FILE && iter->handle == NULL)
1400 read_active_file = true;
1403 /* Store missing values to all the records we're not
1406 max_tail->next_min = NULL;
1407 for (iter = max_head; iter; iter = iter->next_min)
1411 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1413 struct variable *v = dict_get_var (iter->dict, i);
1414 struct variable *mv = get_master (v);
1416 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1418 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1419 mtf->seq_nums[mv->index] = mtf->seq_num;
1421 if (v->type == NUMERIC)
1424 memset (out->s, ' ', v->width);
1427 if (iter->in_var != NULL)
1428 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1431 /* 5. Write the output record. */
1432 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1434 /* 6. Read another record from each input file FILE and TABLE
1435 that we stored values from above. If we come to the end of
1436 one of the input files, remove it from the list of input
1438 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1440 next = iter->next_min;
1441 if (iter->reader != NULL
1442 && !any_reader_read (iter->reader, &iter->input))
1443 mtf_delete_file_in_place (mtf, &iter);
1446 while (!read_active_file
1447 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1449 return mtf->head != NULL && mtf->head->type == MTF_FILE;
1452 /* Merge the dictionary for file F into master dictionary M. */
1454 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1456 struct dictionary *d = f->dict;
1457 const char *d_docs, *m_docs;
1460 if (dict_get_label (m) == NULL)
1461 dict_set_label (m, dict_get_label (d));
1463 d_docs = dict_get_documents (d);
1464 m_docs = dict_get_documents (m);
1468 dict_set_documents (m, d_docs);
1474 new_len = strlen (m_docs) + strlen (d_docs);
1475 new_docs = xmalloc (new_len + 1);
1476 strcpy (new_docs, m_docs);
1477 strcat (new_docs, d_docs);
1478 dict_set_documents (m, new_docs);
1483 for (i = 0; i < dict_get_var_cnt (d); i++)
1485 struct variable *dv = dict_get_var (d, i);
1486 struct variable *mv = dict_lookup_var (m, dv->name);
1488 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1493 if (mv->width != dv->width)
1495 msg (SE, _("Variable %s in file %s (%s) has different "
1496 "type or width from the same variable in "
1497 "earlier file (%s)."),
1498 dv->name, fh_get_name (f->handle),
1499 var_type_description (dv), var_type_description (mv));
1503 if (dv->width == mv->width)
1505 if (val_labs_count (dv->val_labs)
1506 && !val_labs_count (mv->val_labs))
1507 mv->val_labs = val_labs_copy (dv->val_labs);
1508 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1509 mv_copy (&mv->miss, &dv->miss);
1512 if (dv->label && !mv->label)
1513 mv->label = xstrdup (dv->label);
1516 mv = dict_clone_var_assert (m, dv, dv->name);
1522 /* Marks V's master variable as MASTER. */
1524 set_master (struct variable *v, struct variable *master)
1526 var_attach_aux (v, master, NULL);
1529 /* Returns the master variable corresponding to V,
1530 as set with set_master(). */
1531 static struct variable *
1532 get_master (struct variable *v)
1541 A case map copies data from a case that corresponds for one
1542 dictionary to a case that corresponds to a second dictionary
1543 derived from the first by, optionally, deleting, reordering,
1544 or renaming variables. (No new variables may be created.)
1550 size_t value_cnt; /* Number of values in map. */
1551 int *map; /* For each destination index, the
1552 corresponding source index. */
1555 /* Prepares dictionary D for producing a case map. Afterward,
1556 the caller may delete, reorder, or rename variables within D
1557 at will before using finish_case_map() to produce the case
1560 Uses D's aux members, which must otherwise not be in use. */
1562 start_case_map (struct dictionary *d)
1564 size_t var_cnt = dict_get_var_cnt (d);
1567 for (i = 0; i < var_cnt; i++)
1569 struct variable *v = dict_get_var (d, i);
1570 int *src_fv = xmalloc (sizeof *src_fv);
1572 var_attach_aux (v, src_fv, var_dtor_free);
1576 /* Produces a case map from dictionary D, which must have been
1577 previously prepared with start_case_map().
1579 Does not retain any reference to D, and clears the aux members
1580 set up by start_case_map().
1582 Returns the new case map, or a null pointer if no mapping is
1583 required (that is, no data has changed position). */
1584 static struct case_map *
1585 finish_case_map (struct dictionary *d)
1587 struct case_map *map;
1588 size_t var_cnt = dict_get_var_cnt (d);
1592 map = xmalloc (sizeof *map);
1593 map->value_cnt = dict_get_next_value_idx (d);
1594 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1595 for (i = 0; i < map->value_cnt; i++)
1599 for (i = 0; i < var_cnt; i++)
1601 struct variable *v = dict_get_var (d, i);
1602 int *src_fv = (int *) var_detach_aux (v);
1605 if (v->fv != *src_fv)
1608 for (idx = 0; idx < v->nv; idx++)
1610 int src_idx = *src_fv + idx;
1611 int dst_idx = v->fv + idx;
1613 assert (map->map[dst_idx] == -1);
1614 map->map[dst_idx] = src_idx;
1621 destroy_case_map (map);
1625 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1631 /* Maps from SRC to DST, applying case map MAP. */
1633 map_case (const struct case_map *map,
1634 const struct ccase *src, struct ccase *dst)
1638 assert (map != NULL);
1639 assert (src != NULL);
1640 assert (dst != NULL);
1641 assert (src != dst);
1643 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1645 int src_idx = map->map[dst_idx];
1647 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1651 /* Destroys case map MAP. */
1653 destroy_case_map (struct case_map *map)