1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "any-reader.h"
25 #include "any-writer.h"
28 #include "dictionary.h"
30 #include "file-handle.h"
34 #include "por-file-writer.h"
36 #include "sys-file-writer.h"
38 #include "value-labels.h"
40 #include "procedure.h"
43 #define _(msgid) gettext (msgid)
45 #include "debug-print.h"
47 /* Rearranging and reducing a dictionary. */
48 static void start_case_map (struct dictionary *);
49 static struct case_map *finish_case_map (struct dictionary *);
50 static void map_case (const struct case_map *,
51 const struct ccase *, struct ccase *);
52 static void destroy_case_map (struct case_map *);
54 static bool parse_dict_trim (struct dictionary *);
56 /* Reading system and portable files. */
58 /* Type of command. */
65 /* Case reader input program. */
66 struct case_reader_pgm
68 struct any_reader *reader; /* File reader. */
69 struct case_map *map; /* Map from file dict to active file dict. */
70 struct ccase bounce; /* Bounce buffer. */
73 static const struct case_source_class case_reader_source_class;
75 static void case_reader_pgm_free (struct case_reader_pgm *);
77 /* Parses a GET or IMPORT command. */
79 parse_read_command (enum reader_command type)
81 struct case_reader_pgm *pgm = NULL;
82 struct file_handle *fh = NULL;
83 struct dictionary *dict = NULL;
89 if (lex_match_id ("FILE") || token == T_STRING)
93 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
97 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
101 if (lex_match_id ("COMM"))
103 else if (lex_match_id ("TAPE"))
107 lex_error (_("expecting COMM or TAPE"));
117 lex_sbc_missing ("FILE");
121 discard_variables ();
123 pgm = xmalloc (sizeof *pgm);
124 pgm->reader = any_reader_open (fh, &dict);
126 case_nullify (&pgm->bounce);
127 if (pgm->reader == NULL)
130 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
132 start_case_map (dict);
137 if (!parse_dict_trim (dict))
141 pgm->map = finish_case_map (dict);
143 dict_destroy (default_dict);
146 vfm_source = create_case_source (&case_reader_source_class, pgm);
151 case_reader_pgm_free (pgm);
154 return CMD_CASCADING_FAILURE;
157 /* Frees a struct case_reader_pgm. */
159 case_reader_pgm_free (struct case_reader_pgm *pgm)
163 any_reader_close (pgm->reader);
164 destroy_case_map (pgm->map);
165 case_destroy (&pgm->bounce);
170 /* Clears internal state related to case reader input procedure. */
172 case_reader_source_destroy (struct case_source *source)
174 struct case_reader_pgm *pgm = source->aux;
175 case_reader_pgm_free (pgm);
178 /* Reads all the cases from the data file into C and passes them
179 to WRITE_CASE one by one, passing WC_DATA.
180 Returns true if successful, false if an I/O error occurred. */
182 case_reader_source_read (struct case_source *source,
184 write_case_func *write_case, write_case_data wc_data)
186 struct case_reader_pgm *pgm = source->aux;
192 if (pgm->map == NULL)
193 got_case = any_reader_read (pgm->reader, c);
196 got_case = any_reader_read (pgm->reader, &pgm->bounce);
198 map_case (pgm->map, &pgm->bounce, c);
203 ok = write_case (wc_data);
207 return ok && !any_reader_error (pgm->reader);
210 static const struct case_source_class case_reader_source_class =
214 case_reader_source_read,
215 case_reader_source_destroy,
222 return parse_read_command (GET_CMD);
229 return parse_read_command (IMPORT_CMD);
232 /* Writing system and portable files. */
234 /* Type of output file. */
237 SYSFILE_WRITER, /* System file. */
238 PORFILE_WRITER /* Portable file. */
241 /* Type of a command. */
244 XFORM_CMD, /* Transformation. */
245 PROC_CMD /* Procedure. */
248 /* File writer plus a case map. */
251 struct any_writer *writer; /* File writer. */
252 struct case_map *map; /* Map to output file dictionary
253 (null pointer for identity mapping). */
254 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
259 case_writer_destroy (struct case_writer *aw)
264 ok = any_writer_close (aw->writer);
265 destroy_case_map (aw->map);
266 case_destroy (&aw->bounce);
272 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
273 WRITER_TYPE identifies the type of file to write,
274 and COMMAND_TYPE identifies the type of command.
276 On success, returns a writer.
277 For procedures only, sets *RETAIN_UNSELECTED to true if cases
278 that would otherwise be excluded by FILTER or USE should be
281 On failure, returns a null pointer. */
282 static struct case_writer *
283 parse_write_command (enum writer_type writer_type,
284 enum command_type command_type,
285 bool *retain_unselected)
288 struct file_handle *handle; /* Output file. */
289 struct dictionary *dict; /* Dictionary for output file. */
290 struct case_writer *aw; /* Writer. */
292 /* Common options. */
293 bool print_map; /* Print map? TODO. */
294 bool print_short_names; /* Print long-to-short name map. TODO. */
295 struct sfm_write_options sysfile_opts;
296 struct pfm_write_options porfile_opts;
298 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
299 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
300 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
302 if (command_type == PROC_CMD)
303 *retain_unselected = true;
306 dict = dict_clone (default_dict);
307 aw = xmalloc (sizeof *aw);
310 case_nullify (&aw->bounce);
312 print_short_names = false;
313 sysfile_opts = sfm_writer_default_options ();
314 porfile_opts = pfm_writer_default_options ();
316 start_case_map (dict);
317 dict_delete_scratch_vars (dict);
322 if (lex_match_id ("OUTFILE"))
326 lex_sbc_only_once ("OUTFILE");
332 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
336 else if (lex_match_id ("NAMES"))
337 print_short_names = true;
338 else if (lex_match_id ("PERMISSIONS"))
343 if (lex_match_id ("READONLY"))
345 else if (lex_match_id ("WRITEABLE"))
349 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
352 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
354 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
357 if (lex_match_id ("RETAIN"))
358 *retain_unselected = true;
359 else if (lex_match_id ("DELETE"))
360 *retain_unselected = false;
363 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
367 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
368 sysfile_opts.compress = true;
369 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
370 sysfile_opts.compress = false;
371 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
374 if (!lex_force_int ())
376 sysfile_opts.version = lex_integer ();
379 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
382 if (lex_match_id ("COMMUNICATIONS"))
383 porfile_opts.type = PFM_COMM;
384 else if (lex_match_id ("TAPE"))
385 porfile_opts.type = PFM_TAPE;
388 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
392 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
395 if (!lex_force_int ())
397 porfile_opts.digits = lex_integer ();
400 else if (!parse_dict_trim (dict))
403 if (!lex_match ('/'))
406 if (lex_end_of_command () != CMD_SUCCESS)
411 lex_sbc_missing ("OUTFILE");
415 dict_compact_values (dict);
416 aw->map = finish_case_map (dict);
418 case_create (&aw->bounce, dict_get_next_value_idx (dict));
420 if (fh_get_referent (handle) == FH_REF_FILE)
425 aw->writer = any_writer_from_sfm_writer (
426 sfm_open_writer (handle, dict, sysfile_opts));
429 aw->writer = any_writer_from_pfm_writer (
430 pfm_open_writer (handle, dict, porfile_opts));
435 aw->writer = any_writer_open (handle, dict);
441 case_writer_destroy (aw);
446 /* Writes case C to writer AW. */
448 case_writer_write_case (struct case_writer *aw, struct ccase *c)
452 map_case (aw->map, c, &aw->bounce);
455 return any_writer_write (aw->writer, c);
458 /* SAVE and EXPORT. */
460 static bool output_proc (struct ccase *, void *);
462 /* Parses and performs the SAVE or EXPORT procedure. */
464 parse_output_proc (enum writer_type writer_type)
466 bool retain_unselected;
467 struct variable *saved_filter_variable;
468 struct case_writer *aw;
471 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
473 return CMD_CASCADING_FAILURE;
475 saved_filter_variable = dict_get_filter (default_dict);
476 if (retain_unselected)
477 dict_set_filter (default_dict, NULL);
478 ok = procedure (output_proc, aw);
479 dict_set_filter (default_dict, saved_filter_variable);
481 case_writer_destroy (aw);
482 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
485 /* Writes case C to file. */
487 output_proc (struct ccase *c, void *aw_)
489 struct case_writer *aw = aw_;
490 return case_writer_write_case (aw, c);
496 return parse_output_proc (SYSFILE_WRITER);
502 return parse_output_proc (PORFILE_WRITER);
505 /* XSAVE and XEXPORT. */
507 /* Transformation. */
510 struct case_writer *aw; /* Writer. */
513 static trns_proc_func output_trns_proc;
514 static trns_free_func output_trns_free;
516 /* Parses the XSAVE or XEXPORT transformation command. */
518 parse_output_trns (enum writer_type writer_type)
520 struct output_trns *t = xmalloc (sizeof *t);
521 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
525 return CMD_CASCADING_FAILURE;
528 add_transformation (output_trns_proc, output_trns_free, t);
532 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
534 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
536 struct output_trns *t = trns_;
537 case_writer_write_case (t->aw, c);
538 return TRNS_CONTINUE;
541 /* Frees an XSAVE or XEXPORT transformation.
542 Returns true if successful, false if an I/O error occurred. */
544 output_trns_free (void *trns_)
546 struct output_trns *t = trns_;
551 ok = case_writer_destroy (t->aw);
561 return parse_output_trns (SYSFILE_WRITER);
564 /* XEXPORT command. */
568 return parse_output_trns (PORFILE_WRITER);
571 static bool rename_variables (struct dictionary *dict);
572 static bool drop_variables (struct dictionary *dict);
573 static bool keep_variables (struct dictionary *dict);
575 /* Commands that read and write system files share a great deal
576 of common syntactic structure for rearranging and dropping
577 variables. This function parses this syntax and modifies DICT
578 appropriately. Returns true on success, false on failure. */
580 parse_dict_trim (struct dictionary *dict)
582 if (lex_match_id ("MAP"))
587 else if (lex_match_id ("DROP"))
588 return drop_variables (dict);
589 else if (lex_match_id ("KEEP"))
590 return keep_variables (dict);
591 else if (lex_match_id ("RENAME"))
592 return rename_variables (dict);
595 lex_error (_("expecting a valid subcommand"));
600 /* Parses and performs the RENAME subcommand of GET and SAVE. */
602 rename_variables (struct dictionary *dict)
620 v = parse_dict_variable (dict);
623 if (!lex_force_match ('=')
626 if (dict_lookup_var (dict, tokid) != NULL)
628 msg (SE, _("Cannot rename %s as %s because there already exists "
629 "a variable named %s. To rename variables with "
630 "overlapping names, use a single RENAME subcommand "
631 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
632 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
636 dict_rename_var (dict, v, tokid);
645 while (lex_match ('('))
649 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
651 if (!lex_match ('='))
653 msg (SE, _("`=' expected after variable list."));
656 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
660 msg (SE, _("Number of variables on left side of `=' (%d) does not "
661 "match number of variables on right side (%d), in "
662 "parenthesized group %d of RENAME subcommand."),
663 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
666 if (!lex_force_match (')'))
671 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
673 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
679 for (i = 0; i < nn; i++)
687 /* Parses and performs the DROP subcommand of GET and SAVE.
688 Returns true if successful, false on failure.*/
690 drop_variables (struct dictionary *dict)
696 if (!parse_variables (dict, &v, &nv, PV_NONE))
698 dict_delete_vars (dict, v, nv);
701 if (dict_get_var_cnt (dict) == 0)
703 msg (SE, _("Cannot DROP all variables from dictionary."));
709 /* Parses and performs the KEEP subcommand of GET and SAVE.
710 Returns true if successful, false on failure.*/
712 keep_variables (struct dictionary *dict)
719 if (!parse_variables (dict, &v, &nv, PV_NONE))
722 /* Move the specified variables to the beginning. */
723 dict_reorder_vars (dict, v, nv);
725 /* Delete the remaining variables. */
726 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
727 for (i = nv; i < dict_get_var_cnt (dict); i++)
728 v[i - nv] = dict_get_var (dict, i);
729 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
737 #include "debug-print.h"
742 MTF_FILE, /* Specified on FILE= subcommand. */
743 MTF_TABLE /* Specified on TABLE= subcommand. */
746 /* One of the files on MATCH FILES. */
749 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
750 struct mtf_file *next_min; /* Next in the chain of minimums. */
752 int type; /* One of MTF_*. */
753 struct variable **by; /* List of BY variables for this file. */
754 struct file_handle *handle; /* File handle. */
755 struct any_reader *reader; /* File reader. */
756 struct dictionary *dict; /* Dictionary from system file. */
759 char *in_name; /* Variable name. */
760 struct variable *in_var; /* Variable (in master dictionary). */
762 struct ccase input; /* Input record. */
765 /* MATCH FILES procedure. */
768 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
769 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
771 bool ok; /* False if I/O error occurs. */
773 size_t by_cnt; /* Number of variables on BY subcommand. */
775 /* Names of FIRST, LAST variables. */
776 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
778 struct dictionary *dict; /* Dictionary of output file. */
779 struct case_sink *sink; /* Sink to receive output. */
780 struct ccase mtf_case; /* Case used for output. */
782 unsigned seq_num; /* Have we initialized this variable? */
783 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
786 static bool mtf_free (struct mtf_proc *);
787 static bool mtf_close_file (struct mtf_file *);
788 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
789 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
791 static bool mtf_read_nonactive_records (void *);
792 static bool mtf_processing_finish (void *);
793 static bool mtf_processing (struct ccase *, void *);
795 static char *var_type_description (struct variable *);
797 static void set_master (struct variable *, struct variable *master);
798 static struct variable *get_master (struct variable *);
800 /* Parse and execute the MATCH FILES command. */
802 cmd_match_files (void)
805 struct mtf_file *first_table = NULL;
806 struct mtf_file *iter;
808 bool used_active_file = false;
809 bool saw_table = false;
814 mtf.head = mtf.tail = NULL;
818 mtf.dict = dict_create ();
820 case_nullify (&mtf.mtf_case);
823 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
827 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
829 struct mtf_file *file = xmalloc (sizeof *file);
831 if (lex_match_id ("FILE"))
832 file->type = MTF_FILE;
833 else if (lex_match_id ("TABLE"))
835 file->type = MTF_TABLE;
846 file->in_name = NULL;
848 case_nullify (&file->input);
850 /* FILEs go first, then TABLEs. */
851 if (file->type == MTF_TABLE || first_table == NULL)
854 file->prev = mtf.tail;
856 mtf.tail->next = file;
858 if (mtf.head == NULL)
860 if (file->type == MTF_TABLE && first_table == NULL)
865 assert (file->type == MTF_FILE);
866 file->next = first_table;
867 file->prev = first_table->prev;
868 if (first_table->prev)
869 first_table->prev->next = file;
872 first_table->prev = file;
880 if (used_active_file)
882 msg (SE, _("The active file may not be specified more "
886 used_active_file = true;
888 assert (pgm_state != STATE_INPUT);
889 if (pgm_state == STATE_INIT)
891 msg (SE, _("Cannot specify the active file since no active "
892 "file has been defined."));
899 _("MATCH FILES may not be used after TEMPORARY when "
900 "the active file is an input source. "
901 "Temporary transformations will be made permanent."));
905 file->dict = default_dict;
909 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
910 if (file->handle == NULL)
913 file->reader = any_reader_open (file->handle, &file->dict);
914 if (file->reader == NULL)
917 case_create (&file->input, dict_get_next_value_idx (file->dict));
920 while (lex_match ('/'))
921 if (lex_match_id ("RENAME"))
923 if (!rename_variables (file->dict))
926 else if (lex_match_id ("IN"))
935 if (file->in_name != NULL)
937 msg (SE, _("Multiple IN subcommands for a single FILE or "
941 file->in_name = xstrdup (tokid);
946 mtf_merge_dictionary (mtf.dict, file);
951 if (lex_match (T_BY))
953 struct variable **by;
957 msg (SE, _("BY may appear at most once."));
962 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
963 PV_NO_DUPLICATE | PV_NO_SCRATCH))
966 for (iter = mtf.head; iter != NULL; iter = iter->next)
970 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
972 for (i = 0; i < mtf.by_cnt; i++)
974 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
975 if (iter->by[i] == NULL)
977 msg (SE, _("File %s lacks BY variable %s."),
978 iter->handle ? fh_get_name (iter->handle) : "*",
987 else if (lex_match_id ("FIRST"))
989 if (mtf.first[0] != '\0')
991 msg (SE, _("FIRST may appear at most once."));
996 if (!lex_force_id ())
998 strcpy (mtf.first, tokid);
1001 else if (lex_match_id ("LAST"))
1003 if (mtf.last[0] != '\0')
1005 msg (SE, _("LAST may appear at most once."));
1010 if (!lex_force_id ())
1012 strcpy (mtf.last, tokid);
1015 else if (lex_match_id ("MAP"))
1019 else if (lex_match_id ("DROP"))
1021 if (!drop_variables (mtf.dict))
1024 else if (lex_match_id ("KEEP"))
1026 if (!keep_variables (mtf.dict))
1035 if (!lex_match ('/') && token != '.')
1037 lex_end_of_command ();
1042 if (mtf.by_cnt == 0)
1046 msg (SE, _("BY is required when TABLE is specified."));
1051 msg (SE, _("BY is required when IN is specified."));
1056 /* Set up mapping from each file's variables to master
1058 for (iter = mtf.head; iter != NULL; iter = iter->next)
1060 struct dictionary *d = iter->dict;
1063 for (i = 0; i < dict_get_var_cnt (d); i++)
1065 struct variable *v = dict_get_var (d, i);
1066 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1072 /* Add IN variables to master dictionary. */
1073 for (iter = mtf.head; iter != NULL; iter = iter->next)
1074 if (iter->in_name != NULL)
1076 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1077 if (iter->in_var == NULL)
1079 msg (SE, _("IN variable name %s duplicates an "
1080 "existing variable name."),
1081 iter->in_var->name);
1084 iter->in_var->print = iter->in_var->write
1085 = make_output_format (FMT_F, 1, 0);
1088 /* MATCH FILES performs an n-way merge on all its input files.
1091 1. Read one input record from every input FILE.
1093 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1095 3. Find the FILE input record(s) that have minimum BY
1096 values. Store all the values from these input records into
1099 4. For every TABLE, read another record as long as the BY values
1100 on the TABLE's input record are less than the FILEs' BY values.
1101 If an exact match is found, store all the values from the TABLE
1102 input record into the output record.
1104 5. Write the output record.
1106 6. Read another record from each input file FILE and TABLE that
1107 we stored values from above. If we come to the end of one of the
1108 input files, remove it from the list of input files.
1110 7. Repeat from step 2.
1112 Unfortunately, this algorithm can't be implemented in a
1113 straightforward way because there's no function to read a
1114 record from the active file. Instead, it has to be written
1117 FIXME: For merging large numbers of files (more than 10?) a
1118 better algorithm would use a heap for finding minimum
1121 if (!used_active_file)
1122 discard_variables ();
1124 dict_compact_values (mtf.dict);
1125 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1126 if (mtf.sink->class->open != NULL)
1127 mtf.sink->class->open (mtf.sink);
1129 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1130 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1132 if (!mtf_read_nonactive_records (&mtf))
1135 if (used_active_file)
1136 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1138 ok = mtf_processing_finish (&mtf);
1140 free_case_source (vfm_source);
1143 dict_destroy (default_dict);
1144 default_dict = mtf.dict;
1146 vfm_source = mtf.sink->class->make_source (mtf.sink);
1147 free_case_sink (mtf.sink);
1149 if (!mtf_free (&mtf))
1151 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1155 return CMD_CASCADING_FAILURE;
1158 /* Repeats 2...7 an arbitrary number of times. */
1160 mtf_processing_finish (void *mtf_)
1162 struct mtf_proc *mtf = mtf_;
1163 struct mtf_file *iter;
1165 /* Find the active file and delete it. */
1166 for (iter = mtf->head; iter; iter = iter->next)
1167 if (iter->handle == NULL)
1169 if (!mtf_delete_file_in_place (mtf, &iter))
1174 while (mtf->head && mtf->head->type == MTF_FILE)
1175 if (!mtf_processing (NULL, mtf))
1181 /* Return a string in a static buffer describing V's variable type and
1184 var_type_description (struct variable *v)
1186 static char buf[2][32];
1193 if (v->type == NUMERIC)
1194 strcpy (s, "numeric");
1197 assert (v->type == ALPHA);
1198 sprintf (s, "string with width %d", v->width);
1203 /* Closes FILE and frees its associated data.
1204 Returns true if successful, false if an I/O error
1205 occurred on FILE. */
1207 mtf_close_file (struct mtf_file *file)
1209 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1211 any_reader_close (file->reader);
1212 if (file->handle != NULL)
1213 dict_destroy (file->dict);
1214 case_destroy (&file->input);
1215 free (file->in_name);
1220 /* Free all the data for the MATCH FILES procedure.
1221 Returns true if successful, false if an I/O error
1224 mtf_free (struct mtf_proc *mtf)
1226 struct mtf_file *iter, *next;
1229 for (iter = mtf->head; iter; iter = next)
1232 assert (iter->dict != mtf->dict);
1233 if (!mtf_close_file (iter))
1238 dict_destroy (mtf->dict);
1239 case_destroy (&mtf->mtf_case);
1240 free (mtf->seq_nums);
1245 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1246 file in the chain, or to NULL if was the last in the chain.
1247 Returns true if successful, false if an I/O error occurred. */
1249 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1251 struct mtf_file *f = *file;
1255 f->prev->next = f->next;
1257 f->next->prev = f->prev;
1259 mtf->head = f->next;
1261 mtf->tail = f->prev;
1264 if (f->in_var != NULL)
1265 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1266 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1268 struct variable *v = dict_get_var (f->dict, i);
1269 struct variable *mv = get_master (v);
1272 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1274 if (v->type == NUMERIC)
1277 memset (out->s, ' ', v->width);
1281 return mtf_close_file (f);
1284 /* Read a record from every input file except the active file.
1285 Returns true if successful, false if an I/O error occurred. */
1287 mtf_read_nonactive_records (void *mtf_)
1289 struct mtf_proc *mtf = mtf_;
1290 struct mtf_file *iter, *next;
1293 for (iter = mtf->head; ok && iter != NULL; iter = next)
1296 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1297 if (!mtf_delete_file_in_place (mtf, &iter))
1303 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1304 if A == B, 1 if A > B. */
1306 mtf_compare_BY_values (struct mtf_proc *mtf,
1307 struct mtf_file *a, struct mtf_file *b,
1310 struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1311 struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1312 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1313 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1316 /* Perform one iteration of steps 3...7 above.
1317 Returns true if successful, false if an I/O error occurred. */
1319 mtf_processing (struct ccase *c, void *mtf_)
1321 struct mtf_proc *mtf = mtf_;
1323 /* Do we need another record from the active file? */
1324 bool read_active_file;
1326 assert (mtf->head != NULL);
1327 if (mtf->head->type == MTF_TABLE)
1332 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1333 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1334 struct mtf_file *iter, *next;
1336 read_active_file = false;
1338 /* 3. Find the FILE input record(s) that have minimum BY
1339 values. Store all the values from these input records into
1340 the output record. */
1341 min_head = min_tail = mtf->head;
1342 max_head = max_tail = NULL;
1343 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1346 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1350 max_tail = max_tail->next_min = iter;
1352 max_head = max_tail = iter;
1355 min_tail = min_tail->next_min = iter;
1360 max_tail->next_min = min_head;
1361 max_tail = min_tail;
1365 max_head = min_head;
1366 max_tail = min_tail;
1368 min_head = min_tail = iter;
1372 /* 4. For every TABLE, read another record as long as the BY
1373 values on the TABLE's input record are less than the FILEs'
1374 BY values. If an exact match is found, store all the values
1375 from the TABLE input record into the output record. */
1376 for (; iter != NULL; iter = next)
1378 assert (iter->type == MTF_TABLE);
1383 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1387 max_tail = max_tail->next_min = iter;
1389 max_head = max_tail = iter;
1392 min_tail = min_tail->next_min = iter;
1395 if (iter->handle == NULL)
1397 if (any_reader_read (iter->reader, &iter->input))
1399 if (!mtf_delete_file_in_place (mtf, &iter))
1406 /* Next sequence number. */
1409 /* Store data to all the records we are using. */
1411 min_tail->next_min = NULL;
1412 for (iter = min_head; iter; iter = iter->next_min)
1416 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1418 struct variable *v = dict_get_var (iter->dict, i);
1419 struct variable *mv = get_master (v);
1421 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1423 struct ccase *record
1424 = case_is_null (&iter->input) ? c : &iter->input;
1425 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1427 mtf->seq_nums[mv->index] = mtf->seq_num;
1428 if (v->type == NUMERIC)
1429 out->f = case_num (record, v->fv);
1431 memcpy (out->s, case_str (record, v->fv), v->width);
1434 if (iter->in_var != NULL)
1435 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1437 if (iter->type == MTF_FILE && iter->handle == NULL)
1438 read_active_file = true;
1441 /* Store missing values to all the records we're not
1444 max_tail->next_min = NULL;
1445 for (iter = max_head; iter; iter = iter->next_min)
1449 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1451 struct variable *v = dict_get_var (iter->dict, i);
1452 struct variable *mv = get_master (v);
1454 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1456 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1457 mtf->seq_nums[mv->index] = mtf->seq_num;
1459 if (v->type == NUMERIC)
1462 memset (out->s, ' ', v->width);
1465 if (iter->in_var != NULL)
1466 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1469 /* 5. Write the output record. */
1470 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1472 /* 6. Read another record from each input file FILE and TABLE
1473 that we stored values from above. If we come to the end of
1474 one of the input files, remove it from the list of input
1476 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1478 next = iter->next_min;
1479 if (iter->reader != NULL
1480 && !any_reader_read (iter->reader, &iter->input))
1481 if (!mtf_delete_file_in_place (mtf, &iter))
1485 while (!read_active_file
1486 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1491 /* Merge the dictionary for file F into master dictionary M. */
1493 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1495 struct dictionary *d = f->dict;
1496 const char *d_docs, *m_docs;
1499 if (dict_get_label (m) == NULL)
1500 dict_set_label (m, dict_get_label (d));
1502 d_docs = dict_get_documents (d);
1503 m_docs = dict_get_documents (m);
1507 dict_set_documents (m, d_docs);
1513 new_len = strlen (m_docs) + strlen (d_docs);
1514 new_docs = xmalloc (new_len + 1);
1515 strcpy (new_docs, m_docs);
1516 strcat (new_docs, d_docs);
1517 dict_set_documents (m, new_docs);
1522 for (i = 0; i < dict_get_var_cnt (d); i++)
1524 struct variable *dv = dict_get_var (d, i);
1525 struct variable *mv = dict_lookup_var (m, dv->name);
1527 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1532 if (mv->width != dv->width)
1534 msg (SE, _("Variable %s in file %s (%s) has different "
1535 "type or width from the same variable in "
1536 "earlier file (%s)."),
1537 dv->name, fh_get_name (f->handle),
1538 var_type_description (dv), var_type_description (mv));
1542 if (dv->width == mv->width)
1544 if (val_labs_count (dv->val_labs)
1545 && !val_labs_count (mv->val_labs))
1546 mv->val_labs = val_labs_copy (dv->val_labs);
1547 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1548 mv_copy (&mv->miss, &dv->miss);
1551 if (dv->label && !mv->label)
1552 mv->label = xstrdup (dv->label);
1555 mv = dict_clone_var_assert (m, dv, dv->name);
1561 /* Marks V's master variable as MASTER. */
1563 set_master (struct variable *v, struct variable *master)
1565 var_attach_aux (v, master, NULL);
1568 /* Returns the master variable corresponding to V,
1569 as set with set_master(). */
1570 static struct variable *
1571 get_master (struct variable *v)
1580 A case map copies data from a case that corresponds for one
1581 dictionary to a case that corresponds to a second dictionary
1582 derived from the first by, optionally, deleting, reordering,
1583 or renaming variables. (No new variables may be created.)
1589 size_t value_cnt; /* Number of values in map. */
1590 int *map; /* For each destination index, the
1591 corresponding source index. */
1594 /* Prepares dictionary D for producing a case map. Afterward,
1595 the caller may delete, reorder, or rename variables within D
1596 at will before using finish_case_map() to produce the case
1599 Uses D's aux members, which must otherwise not be in use. */
1601 start_case_map (struct dictionary *d)
1603 size_t var_cnt = dict_get_var_cnt (d);
1606 for (i = 0; i < var_cnt; i++)
1608 struct variable *v = dict_get_var (d, i);
1609 int *src_fv = xmalloc (sizeof *src_fv);
1611 var_attach_aux (v, src_fv, var_dtor_free);
1615 /* Produces a case map from dictionary D, which must have been
1616 previously prepared with start_case_map().
1618 Does not retain any reference to D, and clears the aux members
1619 set up by start_case_map().
1621 Returns the new case map, or a null pointer if no mapping is
1622 required (that is, no data has changed position). */
1623 static struct case_map *
1624 finish_case_map (struct dictionary *d)
1626 struct case_map *map;
1627 size_t var_cnt = dict_get_var_cnt (d);
1631 map = xmalloc (sizeof *map);
1632 map->value_cnt = dict_get_next_value_idx (d);
1633 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1634 for (i = 0; i < map->value_cnt; i++)
1638 for (i = 0; i < var_cnt; i++)
1640 struct variable *v = dict_get_var (d, i);
1641 int *src_fv = (int *) var_detach_aux (v);
1644 if (v->fv != *src_fv)
1647 for (idx = 0; idx < v->nv; idx++)
1649 int src_idx = *src_fv + idx;
1650 int dst_idx = v->fv + idx;
1652 assert (map->map[dst_idx] == -1);
1653 map->map[dst_idx] = src_idx;
1660 destroy_case_map (map);
1664 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1670 /* Maps from SRC to DST, applying case map MAP. */
1672 map_case (const struct case_map *map,
1673 const struct ccase *src, struct ccase *dst)
1677 assert (map != NULL);
1678 assert (src != NULL);
1679 assert (dst != NULL);
1680 assert (src != dst);
1682 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1684 int src_idx = map->map[dst_idx];
1686 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1690 /* Destroys case map MAP. */
1692 destroy_case_map (struct case_map *map)