1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "any-reader.h"
25 #include "any-writer.h"
29 #include "dictionary.h"
31 #include "file-handle.h"
35 #include "por-file-writer.h"
37 #include "sys-file-writer.h"
39 #include "value-labels.h"
41 #include "procedure.h"
44 #define _(msgid) gettext (msgid)
46 #include "debug-print.h"
48 /* Rearranging and reducing a dictionary. */
49 static void start_case_map (struct dictionary *);
50 static struct case_map *finish_case_map (struct dictionary *);
51 static void map_case (const struct case_map *,
52 const struct ccase *, struct ccase *);
53 static void destroy_case_map (struct case_map *);
55 static bool parse_dict_trim (struct dictionary *);
57 /* Reading system and portable files. */
59 /* Type of command. */
66 /* Case reader input program. */
67 struct case_reader_pgm
69 struct any_reader *reader; /* File reader. */
70 struct case_map *map; /* Map from file dict to active file dict. */
71 struct ccase bounce; /* Bounce buffer. */
74 static const struct case_source_class case_reader_source_class;
76 static void case_reader_pgm_free (struct case_reader_pgm *);
78 /* Parses a GET or IMPORT command. */
80 parse_read_command (enum reader_command type)
82 struct case_reader_pgm *pgm = NULL;
83 struct file_handle *fh = NULL;
84 struct dictionary *dict = NULL;
90 if (lex_match_id ("FILE") || token == T_STRING)
94 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
98 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
102 if (lex_match_id ("COMM"))
104 else if (lex_match_id ("TAPE"))
108 lex_error (_("expecting COMM or TAPE"));
118 lex_sbc_missing ("FILE");
122 discard_variables ();
124 pgm = xmalloc (sizeof *pgm);
125 pgm->reader = any_reader_open (fh, &dict);
127 case_nullify (&pgm->bounce);
128 if (pgm->reader == NULL)
131 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
133 start_case_map (dict);
138 if (!parse_dict_trim (dict))
142 pgm->map = finish_case_map (dict);
144 dict_destroy (default_dict);
147 vfm_source = create_case_source (&case_reader_source_class, pgm);
152 case_reader_pgm_free (pgm);
155 return CMD_CASCADING_FAILURE;
158 /* Frees a struct case_reader_pgm. */
160 case_reader_pgm_free (struct case_reader_pgm *pgm)
164 any_reader_close (pgm->reader);
165 destroy_case_map (pgm->map);
166 case_destroy (&pgm->bounce);
171 /* Clears internal state related to case reader input procedure. */
173 case_reader_source_destroy (struct case_source *source)
175 struct case_reader_pgm *pgm = source->aux;
176 case_reader_pgm_free (pgm);
179 /* Reads all the cases from the data file into C and passes them
180 to WRITE_CASE one by one, passing WC_DATA.
181 Returns true if successful, false if an I/O error occurred. */
183 case_reader_source_read (struct case_source *source,
185 write_case_func *write_case, write_case_data wc_data)
187 struct case_reader_pgm *pgm = source->aux;
193 if (pgm->map == NULL)
194 got_case = any_reader_read (pgm->reader, c);
197 got_case = any_reader_read (pgm->reader, &pgm->bounce);
199 map_case (pgm->map, &pgm->bounce, c);
204 ok = write_case (wc_data);
208 return ok && !any_reader_error (pgm->reader);
211 static const struct case_source_class case_reader_source_class =
215 case_reader_source_read,
216 case_reader_source_destroy,
223 return parse_read_command (GET_CMD);
230 return parse_read_command (IMPORT_CMD);
233 /* Writing system and portable files. */
235 /* Type of output file. */
238 SYSFILE_WRITER, /* System file. */
239 PORFILE_WRITER /* Portable file. */
242 /* Type of a command. */
245 XFORM_CMD, /* Transformation. */
246 PROC_CMD /* Procedure. */
249 /* File writer plus a case map. */
252 struct any_writer *writer; /* File writer. */
253 struct case_map *map; /* Map to output file dictionary
254 (null pointer for identity mapping). */
255 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
260 case_writer_destroy (struct case_writer *aw)
265 ok = any_writer_close (aw->writer);
266 destroy_case_map (aw->map);
267 case_destroy (&aw->bounce);
273 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
274 WRITER_TYPE identifies the type of file to write,
275 and COMMAND_TYPE identifies the type of command.
277 On success, returns a writer.
278 For procedures only, sets *RETAIN_UNSELECTED to true if cases
279 that would otherwise be excluded by FILTER or USE should be
282 On failure, returns a null pointer. */
283 static struct case_writer *
284 parse_write_command (enum writer_type writer_type,
285 enum command_type command_type,
286 bool *retain_unselected)
289 struct file_handle *handle; /* Output file. */
290 struct dictionary *dict; /* Dictionary for output file. */
291 struct case_writer *aw; /* Writer. */
293 /* Common options. */
294 bool print_map; /* Print map? TODO. */
295 bool print_short_names; /* Print long-to-short name map. TODO. */
296 struct sfm_write_options sysfile_opts;
297 struct pfm_write_options porfile_opts;
299 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
300 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
301 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
303 if (command_type == PROC_CMD)
304 *retain_unselected = true;
307 dict = dict_clone (default_dict);
308 aw = xmalloc (sizeof *aw);
311 case_nullify (&aw->bounce);
313 print_short_names = false;
314 sysfile_opts = sfm_writer_default_options ();
315 porfile_opts = pfm_writer_default_options ();
317 start_case_map (dict);
318 dict_delete_scratch_vars (dict);
323 if (lex_match_id ("OUTFILE"))
327 lex_sbc_only_once ("OUTFILE");
333 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
337 else if (lex_match_id ("NAMES"))
338 print_short_names = true;
339 else if (lex_match_id ("PERMISSIONS"))
344 if (lex_match_id ("READONLY"))
346 else if (lex_match_id ("WRITEABLE"))
350 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
353 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
355 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
358 if (lex_match_id ("RETAIN"))
359 *retain_unselected = true;
360 else if (lex_match_id ("DELETE"))
361 *retain_unselected = false;
364 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
368 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
369 sysfile_opts.compress = true;
370 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
371 sysfile_opts.compress = false;
372 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
375 if (!lex_force_int ())
377 sysfile_opts.version = lex_integer ();
380 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
383 if (lex_match_id ("COMMUNICATIONS"))
384 porfile_opts.type = PFM_COMM;
385 else if (lex_match_id ("TAPE"))
386 porfile_opts.type = PFM_TAPE;
389 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
393 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
396 if (!lex_force_int ())
398 porfile_opts.digits = lex_integer ();
401 else if (!parse_dict_trim (dict))
404 if (!lex_match ('/'))
407 if (lex_end_of_command () != CMD_SUCCESS)
412 lex_sbc_missing ("OUTFILE");
416 dict_compact_values (dict);
417 aw->map = finish_case_map (dict);
419 case_create (&aw->bounce, dict_get_next_value_idx (dict));
421 if (fh_get_referent (handle) == FH_REF_FILE)
426 aw->writer = any_writer_from_sfm_writer (
427 sfm_open_writer (handle, dict, sysfile_opts));
430 aw->writer = any_writer_from_pfm_writer (
431 pfm_open_writer (handle, dict, porfile_opts));
436 aw->writer = any_writer_open (handle, dict);
442 case_writer_destroy (aw);
447 /* Writes case C to writer AW. */
449 case_writer_write_case (struct case_writer *aw, struct ccase *c)
453 map_case (aw->map, c, &aw->bounce);
456 return any_writer_write (aw->writer, c);
459 /* SAVE and EXPORT. */
461 static bool output_proc (struct ccase *, void *);
463 /* Parses and performs the SAVE or EXPORT procedure. */
465 parse_output_proc (enum writer_type writer_type)
467 bool retain_unselected;
468 struct variable *saved_filter_variable;
469 struct case_writer *aw;
472 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
474 return CMD_CASCADING_FAILURE;
476 saved_filter_variable = dict_get_filter (default_dict);
477 if (retain_unselected)
478 dict_set_filter (default_dict, NULL);
479 ok = procedure (output_proc, aw);
480 dict_set_filter (default_dict, saved_filter_variable);
482 case_writer_destroy (aw);
483 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
486 /* Writes case C to file. */
488 output_proc (struct ccase *c, void *aw_)
490 struct case_writer *aw = aw_;
491 return case_writer_write_case (aw, c);
497 return parse_output_proc (SYSFILE_WRITER);
503 return parse_output_proc (PORFILE_WRITER);
506 /* XSAVE and XEXPORT. */
508 /* Transformation. */
511 struct case_writer *aw; /* Writer. */
514 static trns_proc_func output_trns_proc;
515 static trns_free_func output_trns_free;
517 /* Parses the XSAVE or XEXPORT transformation command. */
519 parse_output_trns (enum writer_type writer_type)
521 struct output_trns *t = xmalloc (sizeof *t);
522 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
526 return CMD_CASCADING_FAILURE;
529 add_transformation (output_trns_proc, output_trns_free, t);
533 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
535 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
537 struct output_trns *t = trns_;
538 case_writer_write_case (t->aw, c);
539 return TRNS_CONTINUE;
542 /* Frees an XSAVE or XEXPORT transformation.
543 Returns true if successful, false if an I/O error occurred. */
545 output_trns_free (void *trns_)
547 struct output_trns *t = trns_;
552 ok = case_writer_destroy (t->aw);
562 return parse_output_trns (SYSFILE_WRITER);
565 /* XEXPORT command. */
569 return parse_output_trns (PORFILE_WRITER);
572 static bool rename_variables (struct dictionary *dict);
573 static bool drop_variables (struct dictionary *dict);
574 static bool keep_variables (struct dictionary *dict);
576 /* Commands that read and write system files share a great deal
577 of common syntactic structure for rearranging and dropping
578 variables. This function parses this syntax and modifies DICT
579 appropriately. Returns true on success, false on failure. */
581 parse_dict_trim (struct dictionary *dict)
583 if (lex_match_id ("MAP"))
588 else if (lex_match_id ("DROP"))
589 return drop_variables (dict);
590 else if (lex_match_id ("KEEP"))
591 return keep_variables (dict);
592 else if (lex_match_id ("RENAME"))
593 return rename_variables (dict);
596 lex_error (_("expecting a valid subcommand"));
601 /* Parses and performs the RENAME subcommand of GET and SAVE. */
603 rename_variables (struct dictionary *dict)
621 v = parse_dict_variable (dict);
624 if (!lex_force_match ('=')
627 if (dict_lookup_var (dict, tokid) != NULL)
629 msg (SE, _("Cannot rename %s as %s because there already exists "
630 "a variable named %s. To rename variables with "
631 "overlapping names, use a single RENAME subcommand "
632 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
633 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
637 dict_rename_var (dict, v, tokid);
646 while (lex_match ('('))
650 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
652 if (!lex_match ('='))
654 msg (SE, _("`=' expected after variable list."));
657 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
661 msg (SE, _("Number of variables on left side of `=' (%d) does not "
662 "match number of variables on right side (%d), in "
663 "parenthesized group %d of RENAME subcommand."),
664 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
667 if (!lex_force_match (')'))
672 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
674 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
680 for (i = 0; i < nn; i++)
688 /* Parses and performs the DROP subcommand of GET and SAVE.
689 Returns true if successful, false on failure.*/
691 drop_variables (struct dictionary *dict)
697 if (!parse_variables (dict, &v, &nv, PV_NONE))
699 dict_delete_vars (dict, v, nv);
702 if (dict_get_var_cnt (dict) == 0)
704 msg (SE, _("Cannot DROP all variables from dictionary."));
710 /* Parses and performs the KEEP subcommand of GET and SAVE.
711 Returns true if successful, false on failure.*/
713 keep_variables (struct dictionary *dict)
720 if (!parse_variables (dict, &v, &nv, PV_NONE))
723 /* Move the specified variables to the beginning. */
724 dict_reorder_vars (dict, v, nv);
726 /* Delete the remaining variables. */
727 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
728 for (i = nv; i < dict_get_var_cnt (dict); i++)
729 v[i - nv] = dict_get_var (dict, i);
730 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
738 #include "debug-print.h"
743 MTF_FILE, /* Specified on FILE= subcommand. */
744 MTF_TABLE /* Specified on TABLE= subcommand. */
747 /* One of the files on MATCH FILES. */
750 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
751 struct mtf_file *next_min; /* Next in the chain of minimums. */
753 int type; /* One of MTF_*. */
754 struct variable **by; /* List of BY variables for this file. */
755 struct file_handle *handle; /* File handle. */
756 struct any_reader *reader; /* File reader. */
757 struct dictionary *dict; /* Dictionary from system file. */
760 char *in_name; /* Variable name. */
761 struct variable *in_var; /* Variable (in master dictionary). */
763 struct ccase input; /* Input record. */
766 /* MATCH FILES procedure. */
769 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
770 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
772 bool ok; /* False if I/O error occurs. */
774 size_t by_cnt; /* Number of variables on BY subcommand. */
776 /* Names of FIRST, LAST variables. */
777 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
779 struct dictionary *dict; /* Dictionary of output file. */
780 struct case_sink *sink; /* Sink to receive output. */
781 struct ccase mtf_case; /* Case used for output. */
783 unsigned seq_num; /* Have we initialized this variable? */
784 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
787 static bool mtf_free (struct mtf_proc *);
788 static bool mtf_close_file (struct mtf_file *);
789 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
790 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
792 static bool mtf_read_nonactive_records (void *);
793 static bool mtf_processing_finish (void *);
794 static bool mtf_processing (struct ccase *, void *);
796 static char *var_type_description (struct variable *);
798 static void set_master (struct variable *, struct variable *master);
799 static struct variable *get_master (struct variable *);
801 /* Parse and execute the MATCH FILES command. */
803 cmd_match_files (void)
806 struct mtf_file *first_table = NULL;
807 struct mtf_file *iter;
809 bool used_active_file = false;
810 bool saw_table = false;
815 mtf.head = mtf.tail = NULL;
819 mtf.dict = dict_create ();
821 case_nullify (&mtf.mtf_case);
824 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
828 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
830 struct mtf_file *file = xmalloc (sizeof *file);
832 if (lex_match_id ("FILE"))
833 file->type = MTF_FILE;
834 else if (lex_match_id ("TABLE"))
836 file->type = MTF_TABLE;
847 file->in_name = NULL;
849 case_nullify (&file->input);
851 /* FILEs go first, then TABLEs. */
852 if (file->type == MTF_TABLE || first_table == NULL)
855 file->prev = mtf.tail;
857 mtf.tail->next = file;
859 if (mtf.head == NULL)
861 if (file->type == MTF_TABLE && first_table == NULL)
866 assert (file->type == MTF_FILE);
867 file->next = first_table;
868 file->prev = first_table->prev;
869 if (first_table->prev)
870 first_table->prev->next = file;
873 first_table->prev = file;
881 if (used_active_file)
883 msg (SE, _("The active file may not be specified more "
887 used_active_file = true;
889 assert (pgm_state != STATE_INPUT);
890 if (pgm_state == STATE_INIT)
892 msg (SE, _("Cannot specify the active file since no active "
893 "file has been defined."));
900 _("MATCH FILES may not be used after TEMPORARY when "
901 "the active file is an input source. "
902 "Temporary transformations will be made permanent."));
906 file->dict = default_dict;
910 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
911 if (file->handle == NULL)
914 file->reader = any_reader_open (file->handle, &file->dict);
915 if (file->reader == NULL)
918 case_create (&file->input, dict_get_next_value_idx (file->dict));
921 while (lex_match ('/'))
922 if (lex_match_id ("RENAME"))
924 if (!rename_variables (file->dict))
927 else if (lex_match_id ("IN"))
936 if (file->in_name != NULL)
938 msg (SE, _("Multiple IN subcommands for a single FILE or "
942 file->in_name = xstrdup (tokid);
947 mtf_merge_dictionary (mtf.dict, file);
952 if (lex_match (T_BY))
954 struct variable **by;
958 msg (SE, _("BY may appear at most once."));
963 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
964 PV_NO_DUPLICATE | PV_NO_SCRATCH))
967 for (iter = mtf.head; iter != NULL; iter = iter->next)
971 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
973 for (i = 0; i < mtf.by_cnt; i++)
975 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
976 if (iter->by[i] == NULL)
978 msg (SE, _("File %s lacks BY variable %s."),
979 iter->handle ? fh_get_name (iter->handle) : "*",
988 else if (lex_match_id ("FIRST"))
990 if (mtf.first[0] != '\0')
992 msg (SE, _("FIRST may appear at most once."));
997 if (!lex_force_id ())
999 strcpy (mtf.first, tokid);
1002 else if (lex_match_id ("LAST"))
1004 if (mtf.last[0] != '\0')
1006 msg (SE, _("LAST may appear at most once."));
1011 if (!lex_force_id ())
1013 strcpy (mtf.last, tokid);
1016 else if (lex_match_id ("MAP"))
1020 else if (lex_match_id ("DROP"))
1022 if (!drop_variables (mtf.dict))
1025 else if (lex_match_id ("KEEP"))
1027 if (!keep_variables (mtf.dict))
1036 if (!lex_match ('/') && token != '.')
1038 lex_end_of_command ();
1043 if (mtf.by_cnt == 0)
1047 msg (SE, _("BY is required when TABLE is specified."));
1052 msg (SE, _("BY is required when IN is specified."));
1057 /* Set up mapping from each file's variables to master
1059 for (iter = mtf.head; iter != NULL; iter = iter->next)
1061 struct dictionary *d = iter->dict;
1064 for (i = 0; i < dict_get_var_cnt (d); i++)
1066 struct variable *v = dict_get_var (d, i);
1067 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1073 /* Add IN variables to master dictionary. */
1074 for (iter = mtf.head; iter != NULL; iter = iter->next)
1075 if (iter->in_name != NULL)
1077 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1078 if (iter->in_var == NULL)
1080 msg (SE, _("IN variable name %s duplicates an "
1081 "existing variable name."),
1082 iter->in_var->name);
1085 iter->in_var->print = iter->in_var->write
1086 = make_output_format (FMT_F, 1, 0);
1089 /* MATCH FILES performs an n-way merge on all its input files.
1092 1. Read one input record from every input FILE.
1094 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1096 3. Find the FILE input record(s) that have minimum BY
1097 values. Store all the values from these input records into
1100 4. For every TABLE, read another record as long as the BY values
1101 on the TABLE's input record are less than the FILEs' BY values.
1102 If an exact match is found, store all the values from the TABLE
1103 input record into the output record.
1105 5. Write the output record.
1107 6. Read another record from each input file FILE and TABLE that
1108 we stored values from above. If we come to the end of one of the
1109 input files, remove it from the list of input files.
1111 7. Repeat from step 2.
1113 Unfortunately, this algorithm can't be implemented in a
1114 straightforward way because there's no function to read a
1115 record from the active file. Instead, it has to be written
1118 FIXME: For merging large numbers of files (more than 10?) a
1119 better algorithm would use a heap for finding minimum
1122 if (!used_active_file)
1123 discard_variables ();
1125 dict_compact_values (mtf.dict);
1126 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1127 if (mtf.sink->class->open != NULL)
1128 mtf.sink->class->open (mtf.sink);
1130 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1131 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1133 if (!mtf_read_nonactive_records (&mtf))
1136 if (used_active_file)
1137 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1139 ok = mtf_processing_finish (&mtf);
1141 free_case_source (vfm_source);
1144 dict_destroy (default_dict);
1145 default_dict = mtf.dict;
1147 vfm_source = mtf.sink->class->make_source (mtf.sink);
1148 free_case_sink (mtf.sink);
1150 if (!mtf_free (&mtf))
1152 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1156 return CMD_CASCADING_FAILURE;
1159 /* Repeats 2...7 an arbitrary number of times. */
1161 mtf_processing_finish (void *mtf_)
1163 struct mtf_proc *mtf = mtf_;
1164 struct mtf_file *iter;
1166 /* Find the active file and delete it. */
1167 for (iter = mtf->head; iter; iter = iter->next)
1168 if (iter->handle == NULL)
1170 if (!mtf_delete_file_in_place (mtf, &iter))
1175 while (mtf->head && mtf->head->type == MTF_FILE)
1176 if (!mtf_processing (NULL, mtf))
1182 /* Return a string in a static buffer describing V's variable type and
1185 var_type_description (struct variable *v)
1187 static char buf[2][32];
1194 if (v->type == NUMERIC)
1195 strcpy (s, "numeric");
1198 assert (v->type == ALPHA);
1199 sprintf (s, "string with width %d", v->width);
1204 /* Closes FILE and frees its associated data.
1205 Returns true if successful, false if an I/O error
1206 occurred on FILE. */
1208 mtf_close_file (struct mtf_file *file)
1210 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1212 any_reader_close (file->reader);
1213 if (file->handle != NULL)
1214 dict_destroy (file->dict);
1215 case_destroy (&file->input);
1216 free (file->in_name);
1221 /* Free all the data for the MATCH FILES procedure.
1222 Returns true if successful, false if an I/O error
1225 mtf_free (struct mtf_proc *mtf)
1227 struct mtf_file *iter, *next;
1230 for (iter = mtf->head; iter; iter = next)
1233 assert (iter->dict != mtf->dict);
1234 if (!mtf_close_file (iter))
1239 dict_destroy (mtf->dict);
1240 case_destroy (&mtf->mtf_case);
1241 free (mtf->seq_nums);
1246 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1247 file in the chain, or to NULL if was the last in the chain.
1248 Returns true if successful, false if an I/O error occurred. */
1250 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1252 struct mtf_file *f = *file;
1256 f->prev->next = f->next;
1258 f->next->prev = f->prev;
1260 mtf->head = f->next;
1262 mtf->tail = f->prev;
1265 if (f->in_var != NULL)
1266 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1267 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1269 struct variable *v = dict_get_var (f->dict, i);
1270 struct variable *mv = get_master (v);
1273 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1275 if (v->type == NUMERIC)
1278 memset (out->s, ' ', v->width);
1282 return mtf_close_file (f);
1285 /* Read a record from every input file except the active file.
1286 Returns true if successful, false if an I/O error occurred. */
1288 mtf_read_nonactive_records (void *mtf_)
1290 struct mtf_proc *mtf = mtf_;
1291 struct mtf_file *iter, *next;
1294 for (iter = mtf->head; ok && iter != NULL; iter = next)
1297 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1298 if (!mtf_delete_file_in_place (mtf, &iter))
1304 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1305 if A == B, 1 if A > B. */
1307 mtf_compare_BY_values (struct mtf_proc *mtf,
1308 struct mtf_file *a, struct mtf_file *b,
1311 struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1312 struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1313 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1314 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1317 /* Perform one iteration of steps 3...7 above.
1318 Returns true if successful, false if an I/O error occurred. */
1320 mtf_processing (struct ccase *c, void *mtf_)
1322 struct mtf_proc *mtf = mtf_;
1324 /* Do we need another record from the active file? */
1325 bool read_active_file;
1327 assert (mtf->head != NULL);
1328 if (mtf->head->type == MTF_TABLE)
1333 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1334 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1335 struct mtf_file *iter, *next;
1337 read_active_file = false;
1339 /* 3. Find the FILE input record(s) that have minimum BY
1340 values. Store all the values from these input records into
1341 the output record. */
1342 min_head = min_tail = mtf->head;
1343 max_head = max_tail = NULL;
1344 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1347 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1351 max_tail = max_tail->next_min = iter;
1353 max_head = max_tail = iter;
1356 min_tail = min_tail->next_min = iter;
1361 max_tail->next_min = min_head;
1362 max_tail = min_tail;
1366 max_head = min_head;
1367 max_tail = min_tail;
1369 min_head = min_tail = iter;
1373 /* 4. For every TABLE, read another record as long as the BY
1374 values on the TABLE's input record are less than the FILEs'
1375 BY values. If an exact match is found, store all the values
1376 from the TABLE input record into the output record. */
1377 for (; iter != NULL; iter = next)
1379 assert (iter->type == MTF_TABLE);
1384 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1388 max_tail = max_tail->next_min = iter;
1390 max_head = max_tail = iter;
1393 min_tail = min_tail->next_min = iter;
1396 if (iter->handle == NULL)
1398 if (any_reader_read (iter->reader, &iter->input))
1400 if (!mtf_delete_file_in_place (mtf, &iter))
1407 /* Next sequence number. */
1410 /* Store data to all the records we are using. */
1412 min_tail->next_min = NULL;
1413 for (iter = min_head; iter; iter = iter->next_min)
1417 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1419 struct variable *v = dict_get_var (iter->dict, i);
1420 struct variable *mv = get_master (v);
1422 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1424 struct ccase *record
1425 = case_is_null (&iter->input) ? c : &iter->input;
1426 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1428 mtf->seq_nums[mv->index] = mtf->seq_num;
1429 if (v->type == NUMERIC)
1430 out->f = case_num (record, v->fv);
1432 memcpy (out->s, case_str (record, v->fv), v->width);
1435 if (iter->in_var != NULL)
1436 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1438 if (iter->type == MTF_FILE && iter->handle == NULL)
1439 read_active_file = true;
1442 /* Store missing values to all the records we're not
1445 max_tail->next_min = NULL;
1446 for (iter = max_head; iter; iter = iter->next_min)
1450 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1452 struct variable *v = dict_get_var (iter->dict, i);
1453 struct variable *mv = get_master (v);
1455 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1457 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1458 mtf->seq_nums[mv->index] = mtf->seq_num;
1460 if (v->type == NUMERIC)
1463 memset (out->s, ' ', v->width);
1466 if (iter->in_var != NULL)
1467 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1470 /* 5. Write the output record. */
1471 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1473 /* 6. Read another record from each input file FILE and TABLE
1474 that we stored values from above. If we come to the end of
1475 one of the input files, remove it from the list of input
1477 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1479 next = iter->next_min;
1480 if (iter->reader != NULL
1481 && !any_reader_read (iter->reader, &iter->input))
1482 if (!mtf_delete_file_in_place (mtf, &iter))
1486 while (!read_active_file
1487 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1492 /* Merge the dictionary for file F into master dictionary M. */
1494 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1496 struct dictionary *d = f->dict;
1497 const char *d_docs, *m_docs;
1500 if (dict_get_label (m) == NULL)
1501 dict_set_label (m, dict_get_label (d));
1503 d_docs = dict_get_documents (d);
1504 m_docs = dict_get_documents (m);
1508 dict_set_documents (m, d_docs);
1514 new_len = strlen (m_docs) + strlen (d_docs);
1515 new_docs = xmalloc (new_len + 1);
1516 strcpy (new_docs, m_docs);
1517 strcat (new_docs, d_docs);
1518 dict_set_documents (m, new_docs);
1523 for (i = 0; i < dict_get_var_cnt (d); i++)
1525 struct variable *dv = dict_get_var (d, i);
1526 struct variable *mv = dict_lookup_var (m, dv->name);
1528 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1533 if (mv->width != dv->width)
1535 msg (SE, _("Variable %s in file %s (%s) has different "
1536 "type or width from the same variable in "
1537 "earlier file (%s)."),
1538 dv->name, fh_get_name (f->handle),
1539 var_type_description (dv), var_type_description (mv));
1543 if (dv->width == mv->width)
1545 if (val_labs_count (dv->val_labs)
1546 && !val_labs_count (mv->val_labs))
1547 mv->val_labs = val_labs_copy (dv->val_labs);
1548 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1549 mv_copy (&mv->miss, &dv->miss);
1552 if (dv->label && !mv->label)
1553 mv->label = xstrdup (dv->label);
1556 mv = dict_clone_var_assert (m, dv, dv->name);
1562 /* Marks V's master variable as MASTER. */
1564 set_master (struct variable *v, struct variable *master)
1566 var_attach_aux (v, master, NULL);
1569 /* Returns the master variable corresponding to V,
1570 as set with set_master(). */
1571 static struct variable *
1572 get_master (struct variable *v)
1581 A case map copies data from a case that corresponds for one
1582 dictionary to a case that corresponds to a second dictionary
1583 derived from the first by, optionally, deleting, reordering,
1584 or renaming variables. (No new variables may be created.)
1590 size_t value_cnt; /* Number of values in map. */
1591 int *map; /* For each destination index, the
1592 corresponding source index. */
1595 /* Prepares dictionary D for producing a case map. Afterward,
1596 the caller may delete, reorder, or rename variables within D
1597 at will before using finish_case_map() to produce the case
1600 Uses D's aux members, which must otherwise not be in use. */
1602 start_case_map (struct dictionary *d)
1604 size_t var_cnt = dict_get_var_cnt (d);
1607 for (i = 0; i < var_cnt; i++)
1609 struct variable *v = dict_get_var (d, i);
1610 int *src_fv = xmalloc (sizeof *src_fv);
1612 var_attach_aux (v, src_fv, var_dtor_free);
1616 /* Produces a case map from dictionary D, which must have been
1617 previously prepared with start_case_map().
1619 Does not retain any reference to D, and clears the aux members
1620 set up by start_case_map().
1622 Returns the new case map, or a null pointer if no mapping is
1623 required (that is, no data has changed position). */
1624 static struct case_map *
1625 finish_case_map (struct dictionary *d)
1627 struct case_map *map;
1628 size_t var_cnt = dict_get_var_cnt (d);
1632 map = xmalloc (sizeof *map);
1633 map->value_cnt = dict_get_next_value_idx (d);
1634 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1635 for (i = 0; i < map->value_cnt; i++)
1639 for (i = 0; i < var_cnt; i++)
1641 struct variable *v = dict_get_var (d, i);
1642 int *src_fv = (int *) var_detach_aux (v);
1645 if (v->fv != *src_fv)
1648 for (idx = 0; idx < v->nv; idx++)
1650 int src_idx = *src_fv + idx;
1651 int dst_idx = v->fv + idx;
1653 assert (map->map[dst_idx] == -1);
1654 map->map[dst_idx] = src_idx;
1661 destroy_case_map (map);
1665 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1671 /* Maps from SRC to DST, applying case map MAP. */
1673 map_case (const struct case_map *map,
1674 const struct ccase *src, struct ccase *dst)
1678 assert (map != NULL);
1679 assert (src != NULL);
1680 assert (dst != NULL);
1681 assert (src != dst);
1683 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1685 int src_idx = map->map[dst_idx];
1687 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1691 /* Destroys case map MAP. */
1693 destroy_case_map (struct case_map *map)