1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 #include "dictionary.h"
28 #include "file-handle.h"
33 #include "pfm-write.h"
36 #include "sfm-write.h"
38 #include "value-labels.h"
43 #include "debug-print.h"
45 /* Rearranging and reducing a dictionary. */
46 static void start_case_map (struct dictionary *);
47 static struct case_map *finish_case_map (struct dictionary *);
48 static void map_case (const struct case_map *,
49 const struct ccase *, struct ccase *);
50 static void destroy_case_map (struct case_map *);
55 OP_READ, /* GET or IMPORT. */
56 OP_SAVE, /* SAVE or XSAVE. */
57 OP_EXPORT /* EXPORT. */
60 static bool trim_dictionary (struct dictionary *,
61 enum operation, int *compress);
63 /* GET input program. */
66 struct sfm_reader *reader; /* System file reader. */
67 struct case_map *map; /* Map from system file to active file dict. */
68 struct ccase bounce; /* Bounce buffer. */
71 static void get_pgm_free (struct get_pgm *);
73 /* Parses the GET command. */
77 struct get_pgm *pgm = NULL;
78 struct file_handle *fh;
79 struct dictionary *dict = NULL;
81 pgm = xmalloc (sizeof *pgm);
84 case_nullify (&pgm->bounce);
89 if (lex_match_id ("FILE"))
95 pgm->reader = sfm_open_reader (fh, &dict, NULL);
96 if (pgm->reader == NULL)
98 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
100 start_case_map (dict);
101 if (!trim_dictionary (dict, OP_READ, NULL))
103 pgm->map = finish_case_map (dict);
105 dict_destroy (default_dict);
108 vfm_source = create_case_source (&get_source_class, pgm);
119 /* Frees a struct get_pgm. */
121 get_pgm_free (struct get_pgm *pgm)
125 sfm_close_reader (pgm->reader);
126 destroy_case_map (pgm->map);
127 case_destroy (&pgm->bounce);
132 /* Clears internal state related to GET input procedure. */
134 get_source_destroy (struct case_source *source)
136 struct get_pgm *pgm = source->aux;
140 /* Reads all the cases from the data file into C and passes them
141 to WRITE_CASE one by one, passing WC_DATA. */
143 get_source_read (struct case_source *source,
145 write_case_func *write_case, write_case_data wc_data)
147 struct get_pgm *pgm = source->aux;
152 if (pgm->map == NULL)
153 ok = sfm_read_case (pgm->reader, c);
156 ok = sfm_read_case (pgm->reader, &pgm->bounce);
158 map_case (pgm->map, &pgm->bounce, c);
162 ok = write_case (wc_data);
167 const struct case_source_class get_source_class =
175 /* XSAVE transformation and SAVE procedure. */
178 struct trns_header h;
179 struct sfm_writer *writer; /* System file writer. */
180 struct case_map *map; /* Map from active file to system file dict. */
181 struct ccase bounce; /* Bounce buffer. */
184 static int save_write_case_func (struct ccase *, void *);
185 static trns_proc_func save_trns_proc;
186 static trns_free_func save_trns_free;
188 /* Parses the SAVE or XSAVE command
189 and returns the parsed transformation. */
190 static struct save_trns *
191 cmd_save_internal (void)
193 struct file_handle *fh = NULL;
194 struct dictionary *dict = NULL;
195 struct save_trns *t = NULL;
196 int compress = get_scompression ();
197 const int default_version = 3;
198 int version = default_version;
199 short no_name_table = 0;
201 t = xmalloc (sizeof *t);
202 t->h.proc = save_trns_proc;
203 t->h.free = save_trns_free;
206 case_nullify (&t->bounce);
209 /* Read most of the subcommands. */
212 if (lex_match_id ("VERSION"))
215 if (lex_force_int ())
217 version = lex_integer ();
220 if (lex_match_id ("X"))
224 else if (lex_match_id ("OUTFILE"))
233 if ( ! lex_match('/') )
240 lex_error (_("expecting end of command"));
246 msg ( ME, _("The required %s subcommand was not present"), "OUTFILE");
250 if ( version != default_version )
252 msg (MW, _("Unsupported sysfile version: %d. Using version %d instead."),
253 version, default_version);
255 version = default_version;
258 dict = dict_clone (default_dict);
259 start_case_map (dict);
260 if (!trim_dictionary (dict, OP_SAVE, &compress))
262 t->map = finish_case_map (dict);
264 case_create (&t->bounce, dict_get_next_value_idx (dict));
266 t->writer = sfm_open_writer (fh, dict, compress, no_name_table);
267 if (t->writer == NULL)
277 save_trns_free (&t->h);
281 /* Parses and performs the SAVE procedure. */
285 struct save_trns *t = cmd_save_internal ();
288 procedure (save_write_case_func, t);
289 save_trns_free (&t->h);
297 /* Parses the XSAVE transformation command. */
301 struct save_trns *t = cmd_save_internal ();
304 add_transformation (&t->h);
311 /* Writes the given C to the file specified by T. */
313 do_write_case (struct save_trns *t, struct ccase *c)
316 sfm_write_case (t->writer, c);
319 map_case (t->map, c, &t->bounce);
320 sfm_write_case (t->writer, &t->bounce);
324 /* Writes case C to the system file specified on SAVE. */
326 save_write_case_func (struct ccase *c, void *aux UNUSED)
328 do_write_case (aux, c);
332 /* Writes case C to the system file specified on XSAVE. */
334 save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
336 struct save_trns *t = (struct save_trns *) h;
337 do_write_case (t, c);
341 /* Frees a SAVE transformation. */
343 save_trns_free (struct trns_header *t_)
345 struct save_trns *t = (struct save_trns *) t_;
349 sfm_close_writer (t->writer);
350 destroy_case_map (t->map);
351 case_destroy (&t->bounce);
355 static bool rename_variables (struct dictionary *dict);
356 static bool drop_variables (struct dictionary *dict);
357 static bool keep_variables (struct dictionary *dict);
359 /* Commands that read and write system files share a great deal
360 of common syntactic structure for rearranging and dropping
361 variables. This function parses this syntax and modifies DICT
364 OP is the operation being performed. For operations that
365 write a system file, *COMPRESS is set to 1 if the system file
366 should be compressed, 0 otherwise.
368 Returns true on success, false on failure. */
370 trim_dictionary (struct dictionary *dict, enum operation op, int *compress)
372 assert ((compress != NULL) == (op == OP_SAVE));
373 if (get_scompression())
376 if (op == OP_SAVE || op == OP_EXPORT)
378 /* Delete all the scratch variables. */
383 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
385 for (i = 0; i < dict_get_var_cnt (dict); i++)
386 if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
387 v[nv++] = dict_get_var (dict, i);
388 dict_delete_vars (dict, v, nv);
392 while (lex_match ('/'))
396 if (op == OP_SAVE && lex_match_id ("COMPRESSED"))
398 else if (op == OP_SAVE && lex_match_id ("UNCOMPRESSED"))
400 else if (lex_match_id ("DROP"))
401 ok = drop_variables (dict);
402 else if (lex_match_id ("KEEP"))
403 ok = keep_variables (dict);
404 else if (lex_match_id ("RENAME"))
405 ok = rename_variables (dict);
408 lex_error (_("expecting a valid subcommand"));
416 if (!lex_end_of_command ())
419 dict_compact_values (dict);
423 /* Parses and performs the RENAME subcommand of GET and SAVE. */
425 rename_variables (struct dictionary *dict)
443 v = parse_dict_variable (dict);
446 if (!lex_force_match ('=')
449 if (dict_lookup_var (dict, tokid) != NULL)
451 msg (SE, _("Cannot rename %s as %s because there already exists "
452 "a variable named %s. To rename variables with "
453 "overlapping names, use a single RENAME subcommand "
454 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
455 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
459 dict_rename_var (dict, v, tokid);
468 while (lex_match ('('))
472 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
474 if (!lex_match ('='))
476 msg (SE, _("`=' expected after variable list."));
479 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
483 msg (SE, _("Number of variables on left side of `=' (%d) does not "
484 "match number of variables on right side (%d), in "
485 "parenthesized group %d of RENAME subcommand."),
486 nv - old_nv, nn - old_nv, group);
489 if (!lex_force_match (')'))
494 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
496 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
502 for (i = 0; i < nn; i++)
510 /* Parses and performs the DROP subcommand of GET and SAVE.
511 Returns true if successful, false on failure.*/
513 drop_variables (struct dictionary *dict)
519 if (!parse_variables (dict, &v, &nv, PV_NONE))
521 dict_delete_vars (dict, v, nv);
524 if (dict_get_var_cnt (dict) == 0)
526 msg (SE, _("Cannot DROP all variables from dictionary."));
532 /* Parses and performs the KEEP subcommand of GET and SAVE.
533 Returns true if successful, false on failure.*/
535 keep_variables (struct dictionary *dict)
542 if (!parse_variables (dict, &v, &nv, PV_NONE))
545 /* Move the specified variables to the beginning. */
546 dict_reorder_vars (dict, v, nv);
548 /* Delete the remaining variables. */
549 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
550 for (i = nv; i < dict_get_var_cnt (dict); i++)
551 v[i - nv] = dict_get_var (dict, i);
552 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
558 /* EXPORT procedure. */
561 struct pfm_writer *writer; /* System file writer. */
562 struct case_map *map; /* Map from active file to system file dict. */
563 struct ccase bounce; /* Bounce buffer. */
566 static int export_write_case_func (struct ccase *, void *);
567 static void export_proc_free (struct export_proc *);
569 /* Parses the EXPORT command. */
570 /* FIXME: same as cmd_save_internal(). */
574 struct file_handle *fh;
575 struct dictionary *dict;
576 struct export_proc *proc;
578 proc = xmalloc (sizeof *proc);
581 case_nullify (&proc->bounce);
584 if (lex_match_id ("OUTFILE"))
590 dict = dict_clone (default_dict);
591 start_case_map (dict);
592 if (!trim_dictionary (dict, OP_EXPORT, NULL))
594 proc->map = finish_case_map (dict);
595 if (proc->map != NULL)
596 case_create (&proc->bounce, dict_get_next_value_idx (dict));
598 proc->writer = pfm_open_writer (fh, dict);
599 if (proc->writer == NULL)
604 procedure (export_write_case_func, proc);
605 export_proc_free (proc);
612 export_proc_free (proc);
617 /* Writes case C to the EXPORT file. */
619 export_write_case_func (struct ccase *c, void *aux)
621 struct export_proc *proc = aux;
622 if (proc->map == NULL)
623 pfm_write_case (proc->writer, c);
626 map_case (proc->map, c, &proc->bounce);
627 pfm_write_case (proc->writer, &proc->bounce);
633 export_proc_free (struct export_proc *proc)
637 pfm_close_writer (proc->writer);
638 destroy_case_map (proc->map);
639 case_destroy (&proc->bounce);
645 #include "debug-print.h"
650 MTF_FILE, /* Specified on FILE= subcommand. */
651 MTF_TABLE /* Specified on TABLE= subcommand. */
654 /* One of the files on MATCH FILES. */
657 struct mtf_file *next, *prev;
658 /* Next, previous in the list of files. */
659 struct mtf_file *next_min; /* Next in the chain of minimums. */
661 int type; /* One of MTF_*. */
662 struct variable **by; /* List of BY variables for this file. */
663 struct file_handle *handle; /* File handle. */
664 struct sfm_reader *reader; /* System file reader. */
665 struct dictionary *dict; /* Dictionary from system file. */
668 char *in_name; /* Variable name. */
669 struct variable *in_var; /* Variable (in master dictionary). */
671 struct ccase input; /* Input record. */
674 /* MATCH FILES procedure. */
677 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
678 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
680 size_t by_cnt; /* Number of variables on BY subcommand. */
682 /* Names of FIRST, LAST variables. */
683 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
685 struct dictionary *dict; /* Dictionary of output file. */
686 struct case_sink *sink; /* Sink to receive output. */
687 struct ccase mtf_case; /* Case used for output. */
689 unsigned seq_num; /* Have we initialized this variable? */
690 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
693 static void mtf_free (struct mtf_proc *);
694 static void mtf_free_file (struct mtf_file *);
695 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
696 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
698 static void mtf_read_nonactive_records (void *);
699 static void mtf_processing_finish (void *);
700 static int mtf_processing (struct ccase *, void *);
702 static char *var_type_description (struct variable *);
704 static void set_master (struct variable *, struct variable *master);
705 static struct variable *get_master (struct variable *);
707 /* Parse and execute the MATCH FILES command. */
709 cmd_match_files (void)
712 struct mtf_file *first_table = NULL;
713 struct mtf_file *iter;
715 bool used_active_file = false;
716 bool saw_table = false;
719 mtf.head = mtf.tail = NULL;
723 mtf.dict = dict_create ();
725 case_nullify (&mtf.mtf_case);
728 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
732 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
734 struct mtf_file *file = xmalloc (sizeof *file);
736 if (lex_match_id ("FILE"))
737 file->type = MTF_FILE;
738 else if (lex_match_id ("TABLE"))
740 file->type = MTF_TABLE;
751 file->in_name = NULL;
753 case_nullify (&file->input);
755 /* FILEs go first, then TABLEs. */
756 if (file->type == MTF_TABLE || first_table == NULL)
759 file->prev = mtf.tail;
761 mtf.tail->next = file;
763 if (mtf.head == NULL)
765 if (file->type == MTF_TABLE && first_table == NULL)
770 assert (file->type == MTF_FILE);
771 file->next = first_table;
772 file->prev = first_table->prev;
773 if (first_table->prev)
774 first_table->prev->next = file;
777 first_table->prev = file;
785 if (used_active_file)
787 msg (SE, _("The active file may not be specified more "
791 used_active_file = true;
793 assert (pgm_state != STATE_INPUT);
794 if (pgm_state == STATE_INIT)
796 msg (SE, _("Cannot specify the active file since no active "
797 "file has been defined."));
804 _("MATCH FILES may not be used after TEMPORARY when "
805 "the active file is an input source. "
806 "Temporary transformations will be made permanent."));
810 file->dict = default_dict;
814 file->handle = fh_parse ();
815 if (file->handle == NULL)
818 file->reader = sfm_open_reader (file->handle, &file->dict, NULL);
819 if (file->reader == NULL)
822 case_create (&file->input, dict_get_next_value_idx (file->dict));
825 while (lex_match ('/'))
826 if (lex_match_id ("RENAME"))
828 if (!rename_variables (file->dict))
831 else if (lex_match_id ("IN"))
840 if (file->in_name != NULL)
842 msg (SE, _("Multiple IN subcommands for a single FILE or "
846 file->in_name = xstrdup (tokid);
851 mtf_merge_dictionary (mtf.dict, file);
856 if (lex_match (T_BY))
858 struct variable **by;
862 msg (SE, _("BY may appear at most once."));
867 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
868 PV_NO_DUPLICATE | PV_NO_SCRATCH))
871 for (iter = mtf.head; iter != NULL; iter = iter->next)
875 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
877 for (i = 0; i < mtf.by_cnt; i++)
879 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
880 if (iter->by[i] == NULL)
882 msg (SE, _("File %s lacks BY variable %s."),
883 iter->handle ? handle_get_name (iter->handle) : "*",
892 else if (lex_match_id ("FIRST"))
894 if (mtf.first[0] != '\0')
896 msg (SE, _("FIRST may appear at most once."));
901 if (!lex_force_id ())
903 strcpy (mtf.first, tokid);
906 else if (lex_match_id ("LAST"))
908 if (mtf.last[0] != '\0')
910 msg (SE, _("LAST may appear at most once."));
915 if (!lex_force_id ())
917 strcpy (mtf.last, tokid);
920 else if (lex_match_id ("MAP"))
924 else if (lex_match_id ("DROP"))
926 if (!drop_variables (mtf.dict))
929 else if (lex_match_id ("KEEP"))
931 if (!keep_variables (mtf.dict))
940 if (!lex_match ('/') && token != '.')
942 lex_end_of_command ();
951 msg (SE, _("BY is required when TABLE is specified."));
956 msg (SE, _("BY is required when IN is specified."));
961 /* Set up mapping from each file's variables to master
963 for (iter = mtf.head; iter != NULL; iter = iter->next)
965 struct dictionary *d = iter->dict;
968 for (i = 0; i < dict_get_var_cnt (d); i++)
970 struct variable *v = dict_get_var (d, i);
971 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
977 /* Add IN variables to master dictionary. */
978 for (iter = mtf.head; iter != NULL; iter = iter->next)
979 if (iter->in_name != NULL)
981 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
982 if (iter->in_var == NULL)
984 msg (SE, _("IN variable name %s duplicates an "
985 "existing variable name."),
989 iter->in_var->print = iter->in_var->write
990 = make_output_format (FMT_F, 1, 0);
993 /* MATCH FILES performs an n-way merge on all its input files.
996 1. Read one input record from every input FILE.
998 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1000 3. Find the FILE input record(s) that have minimum BY
1001 values. Store all the values from these input records into
1004 4. For every TABLE, read another record as long as the BY values
1005 on the TABLE's input record are less than the FILEs' BY values.
1006 If an exact match is found, store all the values from the TABLE
1007 input record into the output record.
1009 5. Write the output record.
1011 6. Read another record from each input file FILE and TABLE that
1012 we stored values from above. If we come to the end of one of the
1013 input files, remove it from the list of input files.
1015 7. Repeat from step 2.
1017 Unfortunately, this algorithm can't be implemented in a
1018 straightforward way because there's no function to read a
1019 record from the active file. Instead, it has to be written
1022 FIXME: For merging large numbers of files (more than 10?) a
1023 better algorithm would use a heap for finding minimum
1026 if (!used_active_file)
1027 discard_variables ();
1029 dict_compact_values (mtf.dict);
1030 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1031 if (mtf.sink->class->open != NULL)
1032 mtf.sink->class->open (mtf.sink);
1034 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1035 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1037 mtf_read_nonactive_records (&mtf);
1038 if (used_active_file)
1039 procedure (mtf_processing, &mtf);
1040 mtf_processing_finish (&mtf);
1042 free_case_source (vfm_source);
1045 dict_destroy (default_dict);
1046 default_dict = mtf.dict;
1048 vfm_source = mtf.sink->class->make_source (mtf.sink);
1049 free_case_sink (mtf.sink);
1059 /* Repeats 2...7 an arbitrary number of times. */
1061 mtf_processing_finish (void *mtf_)
1063 struct mtf_proc *mtf = mtf_;
1064 struct mtf_file *iter;
1066 /* Find the active file and delete it. */
1067 for (iter = mtf->head; iter; iter = iter->next)
1068 if (iter->handle == NULL)
1070 mtf_delete_file_in_place (mtf, &iter);
1074 while (mtf->head && mtf->head->type == MTF_FILE)
1075 if (!mtf_processing (NULL, mtf))
1079 /* Return a string in a static buffer describing V's variable type and
1082 var_type_description (struct variable *v)
1084 static char buf[2][32];
1091 if (v->type == NUMERIC)
1092 strcpy (s, "numeric");
1095 assert (v->type == ALPHA);
1096 sprintf (s, "string with width %d", v->width);
1101 /* Free FILE and associated data. */
1103 mtf_free_file (struct mtf_file *file)
1106 sfm_close_reader (file->reader);
1107 if (file->dict != default_dict)
1108 dict_destroy (file->dict);
1109 case_destroy (&file->input);
1110 free (file->in_name);
1114 /* Free all the data for the MATCH FILES procedure. */
1116 mtf_free (struct mtf_proc *mtf)
1118 struct mtf_file *iter, *next;
1120 for (iter = mtf->head; iter; iter = next)
1123 mtf_free_file (iter);
1127 dict_destroy (mtf->dict);
1128 case_destroy (&mtf->mtf_case);
1129 free (mtf->seq_nums);
1132 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1133 file in the chain, or to NULL if was the last in the chain. */
1135 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1137 struct mtf_file *f = *file;
1141 f->prev->next = f->next;
1143 f->next->prev = f->prev;
1145 mtf->head = f->next;
1147 mtf->tail = f->prev;
1150 if (f->in_var != NULL)
1151 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1152 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1154 struct variable *v = dict_get_var (f->dict, i);
1155 struct variable *mv = get_master (v);
1158 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1160 if (v->type == NUMERIC)
1163 memset (out->s, ' ', v->width);
1170 /* Read a record from every input file except the active file. */
1172 mtf_read_nonactive_records (void *mtf_)
1174 struct mtf_proc *mtf = mtf_;
1175 struct mtf_file *iter, *next;
1177 for (iter = mtf->head; iter != NULL; iter = next)
1180 if (iter->handle && !sfm_read_case (iter->reader, &iter->input))
1181 mtf_delete_file_in_place (mtf, &iter);
1185 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1186 if A == B, 1 if A > B. */
1188 mtf_compare_BY_values (struct mtf_proc *mtf,
1189 struct mtf_file *a, struct mtf_file *b,
1192 struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1193 struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1194 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1195 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1198 /* Perform one iteration of steps 3...7 above. */
1200 mtf_processing (struct ccase *c, void *mtf_)
1202 struct mtf_proc *mtf = mtf_;
1204 /* Do we need another record from the active file? */
1205 bool read_active_file;
1207 assert (mtf->head != NULL);
1208 if (mtf->head->type == MTF_TABLE)
1213 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1214 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1215 struct mtf_file *iter, *next;
1217 read_active_file = false;
1219 /* 3. Find the FILE input record(s) that have minimum BY
1220 values. Store all the values from these input records into
1221 the output record. */
1222 min_head = min_tail = mtf->head;
1223 max_head = max_tail = NULL;
1224 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1227 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1231 max_tail = max_tail->next_min = iter;
1233 max_head = max_tail = iter;
1236 min_tail = min_tail->next_min = iter;
1241 max_tail->next_min = min_head;
1242 max_tail = min_tail;
1246 max_head = min_head;
1247 max_tail = min_tail;
1249 min_head = min_tail = iter;
1253 /* 4. For every TABLE, read another record as long as the BY
1254 values on the TABLE's input record are less than the FILEs'
1255 BY values. If an exact match is found, store all the values
1256 from the TABLE input record into the output record. */
1257 for (; iter != NULL; iter = next)
1259 assert (iter->type == MTF_TABLE);
1264 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1268 max_tail = max_tail->next_min = iter;
1270 max_head = max_tail = iter;
1273 min_tail = min_tail->next_min = iter;
1276 if (iter->handle == NULL)
1278 if (sfm_read_case (iter->reader, &iter->input))
1280 mtf_delete_file_in_place (mtf, &iter);
1286 /* Next sequence number. */
1289 /* Store data to all the records we are using. */
1291 min_tail->next_min = NULL;
1292 for (iter = min_head; iter; iter = iter->next_min)
1296 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1298 struct variable *v = dict_get_var (iter->dict, i);
1299 struct variable *mv = get_master (v);
1301 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1303 struct ccase *record
1304 = case_is_null (&iter->input) ? c : &iter->input;
1305 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1307 mtf->seq_nums[mv->index] = mtf->seq_num;
1308 if (v->type == NUMERIC)
1309 out->f = case_num (record, v->fv);
1311 memcpy (out->s, case_str (record, v->fv), v->width);
1314 if (iter->in_var != NULL)
1315 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1317 if (iter->type == MTF_FILE && iter->handle == NULL)
1318 read_active_file = true;
1321 /* Store missing values to all the records we're not
1324 max_tail->next_min = NULL;
1325 for (iter = max_head; iter; iter = iter->next_min)
1329 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1331 struct variable *v = dict_get_var (iter->dict, i);
1332 struct variable *mv = get_master (v);
1334 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1336 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1337 mtf->seq_nums[mv->index] = mtf->seq_num;
1339 if (v->type == NUMERIC)
1342 memset (out->s, ' ', v->width);
1345 if (iter->in_var != NULL)
1346 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1349 /* 5. Write the output record. */
1350 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1352 /* 6. Read another record from each input file FILE and TABLE
1353 that we stored values from above. If we come to the end of
1354 one of the input files, remove it from the list of input
1356 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1358 next = iter->next_min;
1359 if (iter->reader != NULL
1360 && !sfm_read_case (iter->reader, &iter->input))
1361 mtf_delete_file_in_place (mtf, &iter);
1364 while (!read_active_file
1365 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1367 return mtf->head != NULL && mtf->head->type == MTF_FILE;
1370 /* Merge the dictionary for file F into master dictionary M. */
1372 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1374 struct dictionary *d = f->dict;
1375 const char *d_docs, *m_docs;
1378 if (dict_get_label (m) == NULL)
1379 dict_set_label (m, dict_get_label (d));
1381 d_docs = dict_get_documents (d);
1382 m_docs = dict_get_documents (m);
1386 dict_set_documents (m, d_docs);
1392 new_len = strlen (m_docs) + strlen (d_docs);
1393 new_docs = xmalloc (new_len + 1);
1394 strcpy (new_docs, m_docs);
1395 strcat (new_docs, d_docs);
1396 dict_set_documents (m, new_docs);
1401 for (i = 0; i < dict_get_var_cnt (d); i++)
1403 struct variable *dv = dict_get_var (d, i);
1404 struct variable *mv = dict_lookup_var (m, dv->name);
1406 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1411 if (mv->width != dv->width)
1413 msg (SE, _("Variable %s in file %s (%s) has different "
1414 "type or width from the same variable in "
1415 "earlier file (%s)."),
1416 dv->name, handle_get_name (f->handle),
1417 var_type_description (dv), var_type_description (mv));
1421 if (dv->width == mv->width)
1423 if (val_labs_count (dv->val_labs)
1424 && !val_labs_count (mv->val_labs))
1425 mv->val_labs = val_labs_copy (dv->val_labs);
1426 if (dv->miss_type != MISSING_NONE
1427 && mv->miss_type == MISSING_NONE)
1428 copy_missing_values (mv, dv);
1431 if (dv->label && !mv->label)
1432 mv->label = xstrdup (dv->label);
1435 mv = dict_clone_var_assert (m, dv, dv->name);
1441 /* Marks V's master variable as MASTER. */
1443 set_master (struct variable *v, struct variable *master)
1445 var_attach_aux (v, master, NULL);
1448 /* Returns the master variable corresponding to V,
1449 as set with set_master(). */
1450 static struct variable *
1451 get_master (struct variable *v)
1456 /* IMPORT command. */
1458 /* IMPORT input program. */
1461 struct pfm_reader *reader; /* Portable file reader. */
1462 struct case_map *map; /* Map from system file to active file dict. */
1463 struct ccase bounce; /* Bounce buffer. */
1466 static void import_pgm_free (struct import_pgm *);
1468 /* Parses the IMPORT command. */
1472 struct import_pgm *pgm = NULL;
1473 struct file_handle *fh = NULL;
1474 struct dictionary *dict = NULL;
1477 pgm = xmalloc (sizeof *pgm);
1480 case_nullify (&pgm->bounce);
1486 if (lex_match_id ("FILE") || token == T_STRING)
1494 else if (lex_match_id ("TYPE"))
1498 if (lex_match_id ("COMM"))
1500 else if (lex_match_id ("TAPE"))
1504 lex_error (_("expecting COMM or TAPE"));
1510 if (!lex_match ('/') && token != '.')
1516 discard_variables ();
1518 pgm->reader = pfm_open_reader (fh, &dict, NULL);
1519 if (pgm->reader == NULL)
1521 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
1523 start_case_map (dict);
1524 if (!trim_dictionary (dict, OP_READ, NULL))
1526 pgm->map = finish_case_map (dict);
1528 dict_destroy (default_dict);
1529 default_dict = dict;
1531 vfm_source = create_case_source (&import_source_class, pgm);
1536 import_pgm_free (pgm);
1538 dict_destroy (dict);
1542 /* Frees a struct import_pgm. */
1544 import_pgm_free (struct import_pgm *pgm)
1548 pfm_close_reader (pgm->reader);
1549 destroy_case_map (pgm->map);
1550 case_destroy (&pgm->bounce);
1555 /* Clears internal state related to IMPORT input procedure. */
1557 import_source_destroy (struct case_source *source)
1559 struct import_pgm *pgm = source->aux;
1560 import_pgm_free (pgm);
1563 /* Reads all the cases from the data file into C and passes them
1564 to WRITE_CASE one by one, passing WC_DATA. */
1566 import_source_read (struct case_source *source,
1568 write_case_func *write_case, write_case_data wc_data)
1570 struct import_pgm *pgm = source->aux;
1575 if (pgm->map == NULL)
1576 ok = pfm_read_case (pgm->reader, c);
1579 ok = pfm_read_case (pgm->reader, &pgm->bounce);
1581 map_case (pgm->map, &pgm->bounce, c);
1585 ok = write_case (wc_data);
1590 const struct case_source_class import_source_class =
1595 import_source_destroy,
1601 A case map copies data from a case that corresponds for one
1602 dictionary to a case that corresponds to a second dictionary
1603 derived from the first by, optionally, deleting, reordering,
1604 or renaming variables. (No new variables may be created.)
1610 size_t value_cnt; /* Number of values in map. */
1611 int *map; /* For each destination index, the
1612 corresponding source index. */
1615 /* Prepares dictionary D for producing a case map. Afterward,
1616 the caller may delete, reorder, or rename variables within D
1617 at will before using finish_case_map() to produce the case
1620 Uses D's aux members, which may not otherwise be in use. */
1622 start_case_map (struct dictionary *d)
1624 size_t var_cnt = dict_get_var_cnt (d);
1627 for (i = 0; i < var_cnt; i++)
1629 struct variable *v = dict_get_var (d, i);
1630 int *src_fv = xmalloc (sizeof *src_fv);
1632 var_attach_aux (v, src_fv, var_dtor_free);
1636 /* Produces a case map from dictionary D, which must have been
1637 previously prepared with start_case_map().
1639 Does not retain any reference to D, and clears the aux members
1640 set up by start_case_map().
1642 Returns the new case map, or a null pointer if no mapping is
1643 required (that is, no data has changed position). */
1644 static struct case_map *
1645 finish_case_map (struct dictionary *d)
1647 struct case_map *map;
1648 size_t var_cnt = dict_get_var_cnt (d);
1652 map = xmalloc (sizeof *map);
1653 map->value_cnt = dict_get_next_value_idx (d);
1654 map->map = xmalloc (sizeof *map->map * map->value_cnt);
1655 for (i = 0; i < map->value_cnt; i++)
1659 for (i = 0; i < var_cnt; i++)
1661 struct variable *v = dict_get_var (d, i);
1662 int *src_fv = (int *) var_detach_aux (v);
1665 if (v->fv != *src_fv)
1668 for (idx = 0; idx < v->nv; idx++)
1670 int src_idx = *src_fv + idx;
1671 int dst_idx = v->fv + idx;
1673 assert (map->map[dst_idx] == -1);
1674 map->map[dst_idx] = src_idx;
1681 destroy_case_map (map);
1685 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1691 /* Maps from SRC to DST, applying case map MAP. */
1693 map_case (const struct case_map *map,
1694 const struct ccase *src, struct ccase *dst)
1698 assert (map != NULL);
1699 assert (src != NULL);
1700 assert (dst != NULL);
1701 assert (src != dst);
1703 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1705 int src_idx = map->map[dst_idx];
1707 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1711 /* Destroys case map MAP. */
1713 destroy_case_map (struct case_map *map)