1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 #include "dictionary.h"
28 #include "file-handle.h"
33 #include "pfm-write.h"
36 #include "sfm-write.h"
38 #include "value-labels.h"
43 #include "debug-print.h"
45 /* Rearranging and reducing a dictionary. */
46 static void start_case_map (struct dictionary *);
47 static struct case_map *finish_case_map (struct dictionary *);
48 static void map_case (const struct case_map *,
49 const struct ccase *, struct ccase *);
50 static void destroy_case_map (struct case_map *);
55 OP_READ, /* GET or IMPORT. */
56 OP_SAVE, /* SAVE or XSAVE. */
57 OP_EXPORT /* EXPORT. */
60 static bool trim_dictionary (struct dictionary *,
61 enum operation, int *compress);
63 /* GET input program. */
66 struct sfm_reader *reader; /* System file reader. */
67 struct case_map *map; /* Map from system file to active file dict. */
68 struct ccase bounce; /* Bounce buffer. */
71 static void get_pgm_free (struct get_pgm *);
73 /* Parses the GET command. */
77 struct get_pgm *pgm = NULL;
78 struct file_handle *fh;
79 struct dictionary *dict = NULL;
81 pgm = xmalloc (sizeof *pgm);
84 case_nullify (&pgm->bounce);
89 if (lex_match_id ("FILE"))
95 pgm->reader = sfm_open_reader (fh, &dict, NULL);
96 if (pgm->reader == NULL)
98 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
100 start_case_map (dict);
101 if (!trim_dictionary (dict, OP_READ, NULL))
103 pgm->map = finish_case_map (dict);
105 dict_destroy (default_dict);
108 vfm_source = create_case_source (&get_source_class, pgm);
119 /* Frees a struct get_pgm. */
121 get_pgm_free (struct get_pgm *pgm)
125 sfm_close_reader (pgm->reader);
126 destroy_case_map (pgm->map);
127 case_destroy (&pgm->bounce);
132 /* Clears internal state related to GET input procedure. */
134 get_source_destroy (struct case_source *source)
136 struct get_pgm *pgm = source->aux;
140 /* Reads all the cases from the data file into C and passes them
141 to WRITE_CASE one by one, passing WC_DATA. */
143 get_source_read (struct case_source *source,
145 write_case_func *write_case, write_case_data wc_data)
147 struct get_pgm *pgm = source->aux;
152 if (pgm->map == NULL)
153 ok = sfm_read_case (pgm->reader, c);
156 ok = sfm_read_case (pgm->reader, &pgm->bounce);
158 map_case (pgm->map, &pgm->bounce, c);
162 ok = write_case (wc_data);
167 const struct case_source_class get_source_class =
175 /* XSAVE transformation and SAVE procedure. */
178 struct trns_header h;
179 struct sfm_writer *writer; /* System file writer. */
180 struct case_map *map; /* Map from active file to system file dict. */
181 struct ccase bounce; /* Bounce buffer. */
184 static int save_write_case_func (struct ccase *, void *);
185 static trns_proc_func save_trns_proc;
186 static trns_free_func save_trns_free;
188 /* Parses the SAVE or XSAVE command
189 and returns the parsed transformation. */
190 static struct save_trns *
191 cmd_save_internal (void)
193 struct file_handle *fh = NULL;
194 struct dictionary *dict = NULL;
195 struct save_trns *t = NULL;
196 int compress = get_scompression ();
197 const int default_version = 3;
198 int version = default_version;
199 short no_name_table = 0;
201 t = xmalloc (sizeof *t);
202 t->h.proc = save_trns_proc;
203 t->h.free = save_trns_free;
206 case_nullify (&t->bounce);
209 /* Read most of the subcommands. */
212 if (lex_match_id ("VERSION"))
215 if ( lex_force_num() )
220 if ( 0 == strncasecmp (tokid,"x", 1) )
228 else if (lex_match_id ("OUTFILE"))
237 if ( ! lex_match('/') )
244 lex_error (_("expecting end of command"));
250 msg ( ME, _("The required %s subcommand was not present"), "OUTFILE");
254 if ( version != default_version )
256 msg (MW, _("Unsupported sysfile version: %d. Using version %d instead."),
257 version, default_version);
259 version = default_version;
262 dict = dict_clone (default_dict);
263 start_case_map (dict);
264 if (!trim_dictionary (dict, OP_SAVE, &compress))
266 t->map = finish_case_map (dict);
268 case_create (&t->bounce, dict_get_next_value_idx (dict));
270 t->writer = sfm_open_writer (fh, dict, compress, no_name_table);
271 if (t->writer == NULL)
281 save_trns_free (&t->h);
285 /* Parses and performs the SAVE procedure. */
289 struct save_trns *t = cmd_save_internal ();
292 procedure (save_write_case_func, t);
293 save_trns_free (&t->h);
301 /* Parses the XSAVE transformation command. */
305 struct save_trns *t = cmd_save_internal ();
308 add_transformation (&t->h);
315 /* Writes the given C to the file specified by T. */
317 do_write_case (struct save_trns *t, struct ccase *c)
320 sfm_write_case (t->writer, c);
323 map_case (t->map, c, &t->bounce);
324 sfm_write_case (t->writer, &t->bounce);
328 /* Writes case C to the system file specified on SAVE. */
330 save_write_case_func (struct ccase *c, void *aux UNUSED)
332 do_write_case (aux, c);
336 /* Writes case C to the system file specified on XSAVE. */
338 save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
340 struct save_trns *t = (struct save_trns *) h;
341 do_write_case (t, c);
345 /* Frees a SAVE transformation. */
347 save_trns_free (struct trns_header *t_)
349 struct save_trns *t = (struct save_trns *) t_;
353 sfm_close_writer (t->writer);
354 destroy_case_map (t->map);
355 case_destroy (&t->bounce);
359 static bool rename_variables (struct dictionary *dict);
360 static bool drop_variables (struct dictionary *dict);
361 static bool keep_variables (struct dictionary *dict);
363 /* Commands that read and write system files share a great deal
364 of common syntactic structure for rearranging and dropping
365 variables. This function parses this syntax and modifies DICT
368 OP is the operation being performed. For operations that
369 write a system file, *COMPRESS is set to 1 if the system file
370 should be compressed, 0 otherwise.
372 Returns true on success, false on failure. */
374 trim_dictionary (struct dictionary *dict, enum operation op, int *compress)
376 assert ((compress != NULL) == (op == OP_SAVE));
377 if (get_scompression())
380 if (op == OP_SAVE || op == OP_EXPORT)
382 /* Delete all the scratch variables. */
387 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
389 for (i = 0; i < dict_get_var_cnt (dict); i++)
390 if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
391 v[nv++] = dict_get_var (dict, i);
392 dict_delete_vars (dict, v, nv);
396 while (lex_match ('/'))
400 if (op == OP_SAVE && lex_match_id ("COMPRESSED"))
402 else if (op == OP_SAVE && lex_match_id ("UNCOMPRESSED"))
404 else if (lex_match_id ("DROP"))
405 ok = drop_variables (dict);
406 else if (lex_match_id ("KEEP"))
407 ok = keep_variables (dict);
408 else if (lex_match_id ("RENAME"))
409 ok = rename_variables (dict);
412 lex_error (_("expecting a valid subcommand"));
420 if (!lex_end_of_command ())
423 dict_compact_values (dict);
427 /* Parses and performs the RENAME subcommand of GET and SAVE. */
429 rename_variables (struct dictionary *dict)
447 v = parse_dict_variable (dict);
450 if (!lex_force_match ('=')
453 if (!strncmp (tokid, v->name, SHORT_NAME_LEN))
455 if (dict_lookup_var (dict, tokid) != NULL)
457 msg (SE, _("Cannot rename %s as %s because there already exists "
458 "a variable named %s. To rename variables with "
459 "overlapping names, use a single RENAME subcommand "
460 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
461 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
465 dict_rename_var (dict, v, tokid);
474 while (lex_match ('('))
478 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
480 if (!lex_match ('='))
482 msg (SE, _("`=' expected after variable list."));
485 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
489 msg (SE, _("Number of variables on left side of `=' (%d) does not "
490 "match number of variables on right side (%d), in "
491 "parenthesized group %d of RENAME subcommand."),
492 nv - old_nv, nn - old_nv, group);
495 if (!lex_force_match (')'))
500 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
502 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
508 for (i = 0; i < nn; i++)
516 /* Parses and performs the DROP subcommand of GET and SAVE.
517 Returns true if successful, false on failure.*/
519 drop_variables (struct dictionary *dict)
525 if (!parse_variables (dict, &v, &nv, PV_NONE))
527 dict_delete_vars (dict, v, nv);
530 if (dict_get_var_cnt (dict) == 0)
532 msg (SE, _("Cannot DROP all variables from dictionary."));
538 /* Parses and performs the KEEP subcommand of GET and SAVE.
539 Returns true if successful, false on failure.*/
541 keep_variables (struct dictionary *dict)
548 if (!parse_variables (dict, &v, &nv, PV_NONE))
551 /* Move the specified variables to the beginning. */
552 dict_reorder_vars (dict, v, nv);
554 /* Delete the remaining variables. */
555 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
556 for (i = nv; i < dict_get_var_cnt (dict); i++)
557 v[i - nv] = dict_get_var (dict, i);
558 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
564 /* EXPORT procedure. */
567 struct pfm_writer *writer; /* System file writer. */
568 struct case_map *map; /* Map from active file to system file dict. */
569 struct ccase bounce; /* Bounce buffer. */
572 static int export_write_case_func (struct ccase *, void *);
573 static void export_proc_free (struct export_proc *);
575 /* Parses the EXPORT command. */
576 /* FIXME: same as cmd_save_internal(). */
580 struct file_handle *fh;
581 struct dictionary *dict;
582 struct export_proc *proc;
584 proc = xmalloc (sizeof *proc);
587 case_nullify (&proc->bounce);
590 if (lex_match_id ("OUTFILE"))
596 dict = dict_clone (default_dict);
597 start_case_map (dict);
598 if (!trim_dictionary (dict, OP_EXPORT, NULL))
600 proc->map = finish_case_map (dict);
601 if (proc->map != NULL)
602 case_create (&proc->bounce, dict_get_next_value_idx (dict));
604 proc->writer = pfm_open_writer (fh, dict);
605 if (proc->writer == NULL)
610 procedure (export_write_case_func, proc);
611 export_proc_free (proc);
618 export_proc_free (proc);
623 /* Writes case C to the EXPORT file. */
625 export_write_case_func (struct ccase *c, void *aux)
627 struct export_proc *proc = aux;
628 if (proc->map == NULL)
629 pfm_write_case (proc->writer, c);
632 map_case (proc->map, c, &proc->bounce);
633 pfm_write_case (proc->writer, &proc->bounce);
639 export_proc_free (struct export_proc *proc)
643 pfm_close_writer (proc->writer);
644 destroy_case_map (proc->map);
645 case_destroy (&proc->bounce);
651 #include "debug-print.h"
656 MTF_FILE, /* Specified on FILE= subcommand. */
657 MTF_TABLE /* Specified on TABLE= subcommand. */
660 /* One of the files on MATCH FILES. */
663 struct mtf_file *next, *prev;
664 /* Next, previous in the list of files. */
665 struct mtf_file *next_min; /* Next in the chain of minimums. */
667 int type; /* One of MTF_*. */
668 struct variable **by; /* List of BY variables for this file. */
669 struct file_handle *handle; /* File handle. */
670 struct sfm_reader *reader; /* System file reader. */
671 struct dictionary *dict; /* Dictionary from system file. */
674 char *in_name; /* Variable name. */
675 struct variable *in_var; /* Variable (in master dictionary). */
677 struct ccase input; /* Input record. */
680 /* MATCH FILES procedure. */
683 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
684 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
686 size_t by_cnt; /* Number of variables on BY subcommand. */
688 /* Names of FIRST, LAST variables. */
689 char first[SHORT_NAME_LEN + 1], last[SHORT_NAME_LEN + 1];
691 struct dictionary *dict; /* Dictionary of output file. */
692 struct case_sink *sink; /* Sink to receive output. */
693 struct ccase mtf_case; /* Case used for output. */
695 unsigned seq_num; /* Have we initialized this variable? */
696 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
699 static void mtf_free (struct mtf_proc *);
700 static void mtf_free_file (struct mtf_file *);
701 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
702 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
704 static void mtf_read_nonactive_records (void *);
705 static void mtf_processing_finish (void *);
706 static int mtf_processing (struct ccase *, void *);
708 static char *var_type_description (struct variable *);
710 static void set_master (struct variable *, struct variable *master);
711 static struct variable *get_master (struct variable *);
713 /* Parse and execute the MATCH FILES command. */
715 cmd_match_files (void)
718 struct mtf_file *first_table = NULL;
719 struct mtf_file *iter;
721 bool used_active_file = false;
722 bool saw_table = false;
725 mtf.head = mtf.tail = NULL;
729 mtf.dict = dict_create ();
731 case_nullify (&mtf.mtf_case);
734 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
737 while (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
739 struct mtf_file *file = xmalloc (sizeof *file);
741 if (lex_match_id ("FILE"))
742 file->type = MTF_FILE;
743 else if (lex_match_id ("TABLE"))
745 file->type = MTF_TABLE;
755 file->in_name = NULL;
757 case_nullify (&file->input);
759 /* FILEs go first, then TABLEs. */
760 if (file->type == MTF_TABLE || first_table == NULL)
763 file->prev = mtf.tail;
765 mtf.tail->next = file;
767 if (mtf.head == NULL)
769 if (file->type == MTF_TABLE && first_table == NULL)
774 assert (file->type == MTF_FILE);
775 file->next = first_table;
776 file->prev = first_table->prev;
777 if (first_table->prev)
778 first_table->prev->next = file;
781 first_table->prev = file;
791 if (used_active_file)
793 msg (SE, _("The active file may not be specified more "
797 used_active_file = true;
799 assert (pgm_state != STATE_INPUT);
800 if (pgm_state == STATE_INIT)
802 msg (SE, _("Cannot specify the active file since no active "
803 "file has been defined."));
810 _("MATCH FILES may not be used after TEMPORARY when "
811 "the active file is an input source. "
812 "Temporary transformations will be made permanent."));
816 file->dict = default_dict;
820 file->handle = fh_parse ();
821 if (file->handle == NULL)
824 file->reader = sfm_open_reader (file->handle, &file->dict, NULL);
825 if (file->reader == NULL)
828 case_create (&file->input, dict_get_next_value_idx (file->dict));
831 while (lex_match ('/'))
832 if (lex_match_id ("RENAME"))
834 if (!rename_variables (file->dict))
837 else if (lex_match_id ("IN"))
846 if (file->in_name != NULL)
848 msg (SE, _("Multiple IN subcommands for a single FILE or "
852 file->in_name = xstrdup (tokid);
857 mtf_merge_dictionary (mtf.dict, file);
862 if (lex_match (T_BY))
864 struct variable **by;
868 msg (SE, _("BY may appear at most once."));
873 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
874 PV_NO_DUPLICATE | PV_NO_SCRATCH))
877 for (iter = mtf.head; iter != NULL; iter = iter->next)
881 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
883 for (i = 0; i < mtf.by_cnt; i++)
885 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
886 if (iter->by[i] == NULL)
888 msg (SE, _("File %s lacks BY variable %s."),
889 iter->handle ? handle_get_name (iter->handle) : "*",
897 else if (lex_match_id ("FIRST"))
899 if (mtf.first[0] != '\0')
901 msg (SE, _("FIRST may appear at most once."));
906 if (!lex_force_id ())
908 strcpy (mtf.first, tokid);
911 else if (lex_match_id ("LAST"))
913 if (mtf.last[0] != '\0')
915 msg (SE, _("LAST may appear at most once."));
920 if (!lex_force_id ())
922 strcpy (mtf.last, tokid);
925 else if (lex_match_id ("MAP"))
929 else if (lex_match_id ("DROP"))
931 if (!drop_variables (mtf.dict))
934 else if (lex_match_id ("KEEP"))
936 if (!keep_variables (mtf.dict))
945 if (!lex_match ('/') && token != '.')
947 lex_end_of_command ();
956 msg (SE, _("BY is required when TABLE is specified."));
961 msg (SE, _("BY is required when IN is specified."));
966 for (iter = mtf.head; iter != NULL; iter = iter->next)
968 struct dictionary *d = iter->dict;
971 for (i = 0; i < dict_get_var_cnt (d); i++)
973 struct variable *v = dict_get_var (d, i);
974 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
980 for (iter = mtf.head; iter != NULL; iter = iter->next)
981 if (iter->in_name != NULL)
983 static const struct fmt_spec f1_0 = {FMT_F, 1, 0};
985 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
986 if (iter->in_var == NULL)
988 msg (SE, _("IN variable name %s duplicates an "
989 "existing variable name."),
993 iter->in_var->print = iter->in_var->write = f1_0;
996 /* MATCH FILES performs an n-way merge on all its input files.
999 1. Read one input record from every input FILE.
1001 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1003 3. Find the FILE input record(s) that have minimum BY
1004 values. Store all the values from these input records into
1007 4. For every TABLE, read another record as long as the BY values
1008 on the TABLE's input record are less than the FILEs' BY values.
1009 If an exact match is found, store all the values from the TABLE
1010 input record into the output record.
1012 5. Write the output record.
1014 6. Read another record from each input file FILE and TABLE that
1015 we stored values from above. If we come to the end of one of the
1016 input files, remove it from the list of input files.
1018 7. Repeat from step 2.
1020 Unfortunately, this algorithm can't be implemented in a
1021 straightforward way because there's no function to read a
1022 record from the active file. Instead, it has to be written
1025 FIXME: For merging large numbers of files (more than 10?) a
1026 better algorithm would use a heap for finding minimum
1029 if (!used_active_file)
1030 discard_variables ();
1032 dict_compact_values (mtf.dict);
1033 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1034 if (mtf.sink->class->open != NULL)
1035 mtf.sink->class->open (mtf.sink);
1037 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums);
1038 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1040 mtf_read_nonactive_records (&mtf);
1041 if (used_active_file)
1042 procedure (mtf_processing, &mtf);
1043 mtf_processing_finish (&mtf);
1045 dict_destroy (default_dict);
1046 default_dict = mtf.dict;
1048 vfm_source = mtf.sink->class->make_source (mtf.sink);
1049 free_case_sink (mtf.sink);
1059 /* Repeats 2...7 an arbitrary number of times. */
1061 mtf_processing_finish (void *mtf_)
1063 struct mtf_proc *mtf = mtf_;
1064 struct mtf_file *iter;
1066 /* Find the active file and delete it. */
1067 for (iter = mtf->head; iter; iter = iter->next)
1068 if (iter->handle == NULL)
1070 mtf_delete_file_in_place (mtf, &iter);
1074 while (mtf->head && mtf->head->type == MTF_FILE)
1075 if (!mtf_processing (NULL, mtf))
1079 /* Return a string in a static buffer describing V's variable type and
1082 var_type_description (struct variable *v)
1084 static char buf[2][32];
1091 if (v->type == NUMERIC)
1092 strcpy (s, "numeric");
1095 assert (v->type == ALPHA);
1096 sprintf (s, "string with width %d", v->width);
1101 /* Free FILE and associated data. */
1103 mtf_free_file (struct mtf_file *file)
1106 sfm_close_reader (file->reader);
1107 if (file->dict != default_dict)
1108 dict_destroy (file->dict);
1109 case_destroy (&file->input);
1110 free (file->in_name);
1114 /* Free all the data for the MATCH FILES procedure. */
1116 mtf_free (struct mtf_proc *mtf)
1118 struct mtf_file *iter, *next;
1120 for (iter = mtf->head; iter; iter = next)
1123 mtf_free_file (iter);
1127 dict_destroy (mtf->dict);
1128 case_destroy (&mtf->mtf_case);
1129 free (mtf->seq_nums);
1132 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1133 file in the chain, or to NULL if was the last in the chain. */
1135 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1137 struct mtf_file *f = *file;
1141 f->prev->next = f->next;
1143 f->next->prev = f->prev;
1145 mtf->head = f->next;
1147 mtf->tail = f->prev;
1150 if (f->in_var != NULL)
1151 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1152 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1154 struct variable *v = dict_get_var (f->dict, i);
1155 struct variable *mv = get_master (v);
1158 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1160 if (v->type == NUMERIC)
1163 memset (out->s, ' ', v->width);
1170 /* Read a record from every input file except the active file. */
1172 mtf_read_nonactive_records (void *mtf_)
1174 struct mtf_proc *mtf = mtf_;
1175 struct mtf_file *iter, *next;
1177 for (iter = mtf->head; iter != NULL; iter = next)
1180 if (iter->handle && !sfm_read_case (iter->reader, &iter->input))
1181 mtf_delete_file_in_place (mtf, &iter);
1185 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1186 if A == B, 1 if A > B. */
1188 mtf_compare_BY_values (struct mtf_proc *mtf,
1189 struct mtf_file *a, struct mtf_file *b,
1192 struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1193 struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1194 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1195 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1198 /* Perform one iteration of steps 3...7 above. */
1200 mtf_processing (struct ccase *c, void *mtf_)
1202 struct mtf_proc *mtf = mtf_;
1204 /* Do we need another record from the active file? */
1205 bool read_active_file;
1207 assert (mtf->head != NULL);
1208 assert (mtf->head->type == MTF_FILE);
1211 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1212 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1213 struct mtf_file *iter, *next;
1215 read_active_file = false;
1217 /* 3. Find the FILE input record(s) that have minimum BY
1218 values. Store all the values from these input records into
1219 the output record. */
1220 min_head = min_tail = mtf->head;
1221 max_head = max_tail = NULL;
1222 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1224 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1228 max_tail = max_tail->next_min = iter;
1230 max_head = max_tail = iter;
1234 min_tail = min_tail->next_min = iter;
1240 max_tail->next_min = min_head;
1241 max_tail = min_tail;
1245 max_head = min_head;
1246 max_tail = min_tail;
1248 min_head = min_tail = iter;
1255 /* 4. For every TABLE, read another record as long as the BY
1256 values on the TABLE's input record are less than the FILEs'
1257 BY values. If an exact match is found, store all the values
1258 from the TABLE input record into the output record. */
1259 for (; iter != NULL; iter = next)
1261 assert (iter->type == MTF_TABLE);
1266 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1270 max_tail = max_tail->next_min = iter;
1272 max_head = max_tail = iter;
1276 min_tail = min_tail->next_min = iter;
1280 if (iter->handle == NULL)
1282 if (sfm_read_case (iter->reader, &iter->input))
1284 mtf_delete_file_in_place (mtf, &iter);
1292 /* Next sequence number. */
1295 /* Store data to all the records we are using. */
1297 min_tail->next_min = NULL;
1298 for (iter = min_head; iter; iter = iter->next_min)
1302 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1304 struct variable *v = dict_get_var (iter->dict, i);
1305 struct variable *mv = get_master (v);
1307 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1309 struct ccase *record
1310 = case_is_null (&iter->input) ? c : &iter->input;
1311 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1313 mtf->seq_nums[mv->index] = mtf->seq_num;
1314 if (v->type == NUMERIC)
1315 out->f = case_num (record, v->fv);
1317 memcpy (out->s, case_str (record, v->fv), v->width);
1320 if (iter->in_var != NULL)
1321 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1323 if (iter->type == MTF_FILE && iter->handle == NULL)
1324 read_active_file = true;
1327 /* Store missing values to all the records we're not
1330 max_tail->next_min = NULL;
1331 for (iter = max_head; iter; iter = iter->next_min)
1335 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1337 struct variable *v = dict_get_var (iter->dict, i);
1338 struct variable *mv = get_master (v);
1340 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1342 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1343 mtf->seq_nums[mv->index] = mtf->seq_num;
1345 if (v->type == NUMERIC)
1348 memset (out->s, ' ', v->width);
1351 if (iter->in_var != NULL)
1352 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1355 /* 5. Write the output record. */
1356 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1358 /* 6. Read another record from each input file FILE and TABLE
1359 that we stored values from above. If we come to the end of
1360 one of the input files, remove it from the list of input
1362 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1364 next = iter->next_min;
1365 if (iter->reader != NULL
1366 && !sfm_read_case (iter->reader, &iter->input))
1367 mtf_delete_file_in_place (mtf, &iter);
1370 while (!read_active_file
1371 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1373 return mtf->head != NULL && mtf->head->type == MTF_FILE;
1376 /* Merge the dictionary for file F into master dictionary M. */
1378 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1380 struct dictionary *d = f->dict;
1381 const char *d_docs, *m_docs;
1384 if (dict_get_label (m) == NULL)
1385 dict_set_label (m, dict_get_label (d));
1387 d_docs = dict_get_documents (d);
1388 m_docs = dict_get_documents (m);
1392 dict_set_documents (m, d_docs);
1398 new_len = strlen (m_docs) + strlen (d_docs);
1399 new_docs = xmalloc (new_len + 1);
1400 strcpy (new_docs, m_docs);
1401 strcat (new_docs, d_docs);
1402 dict_set_documents (m, new_docs);
1407 dict_compact_values (d);
1409 for (i = 0; i < dict_get_var_cnt (d); i++)
1411 struct variable *dv = dict_get_var (d, i);
1412 struct variable *mv = dict_lookup_var (m, dv->name);
1416 if (mv->width != dv->width)
1418 msg (SE, _("Variable %s in file %s (%s) has different "
1419 "type or width from the same variable in "
1420 "earlier file (%s)."),
1421 dv->name, handle_get_name (f->handle),
1422 var_type_description (dv), var_type_description (mv));
1426 if (dv->width == mv->width)
1428 if (val_labs_count (dv->val_labs)
1429 && !val_labs_count (mv->val_labs))
1430 mv->val_labs = val_labs_copy (dv->val_labs);
1431 if (dv->miss_type != MISSING_NONE
1432 && mv->miss_type == MISSING_NONE)
1433 copy_missing_values (mv, dv);
1436 if (dv->label && !mv->label)
1437 mv->label = xstrdup (dv->label);
1441 mv = dict_clone_var (m, dv, dv->name, dv->longname);
1442 assert (mv != NULL);
1449 /* Marks V's master variable as MASTER. */
1451 set_master (struct variable *v, struct variable *master)
1453 var_attach_aux (v, master, NULL);
1456 /* Returns the master variable corresponding to V,
1457 as set with set_master(). */
1458 static struct variable *
1459 get_master (struct variable *v)
1464 /* IMPORT command. */
1466 /* IMPORT input program. */
1469 struct pfm_reader *reader; /* Portable file reader. */
1470 struct case_map *map; /* Map from system file to active file dict. */
1471 struct ccase bounce; /* Bounce buffer. */
1474 static void import_pgm_free (struct import_pgm *);
1476 /* Parses the IMPORT command. */
1480 struct import_pgm *pgm = NULL;
1481 struct file_handle *fh = NULL;
1482 struct dictionary *dict = NULL;
1485 pgm = xmalloc (sizeof *pgm);
1488 case_nullify (&pgm->bounce);
1494 if (lex_match_id ("FILE") || token == T_STRING)
1502 else if (lex_match_id ("TYPE"))
1506 if (lex_match_id ("COMM"))
1508 else if (lex_match_id ("TAPE"))
1512 lex_error (_("expecting COMM or TAPE"));
1518 if (!lex_match ('/') && token != '.')
1524 discard_variables ();
1526 pgm->reader = pfm_open_reader (fh, &dict, NULL);
1527 if (pgm->reader == NULL)
1529 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
1531 start_case_map (dict);
1532 if (!trim_dictionary (dict, OP_READ, NULL))
1534 pgm->map = finish_case_map (dict);
1536 dict_destroy (default_dict);
1537 default_dict = dict;
1539 vfm_source = create_case_source (&import_source_class, pgm);
1544 import_pgm_free (pgm);
1546 dict_destroy (dict);
1550 /* Frees a struct import_pgm. */
1552 import_pgm_free (struct import_pgm *pgm)
1556 pfm_close_reader (pgm->reader);
1557 destroy_case_map (pgm->map);
1558 case_destroy (&pgm->bounce);
1563 /* Clears internal state related to IMPORT input procedure. */
1565 import_source_destroy (struct case_source *source)
1567 struct import_pgm *pgm = source->aux;
1568 import_pgm_free (pgm);
1571 /* Reads all the cases from the data file into C and passes them
1572 to WRITE_CASE one by one, passing WC_DATA. */
1574 import_source_read (struct case_source *source,
1576 write_case_func *write_case, write_case_data wc_data)
1578 struct import_pgm *pgm = source->aux;
1583 if (pgm->map == NULL)
1584 ok = pfm_read_case (pgm->reader, c);
1587 ok = pfm_read_case (pgm->reader, &pgm->bounce);
1589 map_case (pgm->map, &pgm->bounce, c);
1593 ok = write_case (wc_data);
1598 const struct case_source_class import_source_class =
1603 import_source_destroy,
1609 A case map copies data from a case that corresponds for one
1610 dictionary to a case that corresponds to a second dictionary
1611 derived from the first by, optionally, deleting, reordering,
1612 or renaming variables. (No new variables may be created.)
1618 size_t value_cnt; /* Number of values in map. */
1619 int *map; /* For each destination index, the
1620 corresponding source index. */
1623 /* Prepares dictionary D for producing a case map. Afterward,
1624 the caller may delete, reorder, or rename variables within D
1625 at will before using finish_case_map() to produce the case
1628 Uses D's aux members, which may not otherwise be in use. */
1630 start_case_map (struct dictionary *d)
1632 size_t var_cnt = dict_get_var_cnt (d);
1635 for (i = 0; i < var_cnt; i++)
1637 struct variable *v = dict_get_var (d, i);
1638 int *src_fv = xmalloc (sizeof *src_fv);
1640 var_attach_aux (v, src_fv, var_dtor_free);
1644 /* Produces a case map from dictionary D, which must have been
1645 previously prepared with start_case_map().
1647 Does not retain any reference to D, and clears the aux members
1648 set up by start_case_map().
1650 Returns the new case map, or a null pointer if no mapping is
1651 required (that is, no data has changed position). */
1652 static struct case_map *
1653 finish_case_map (struct dictionary *d)
1655 struct case_map *map;
1656 size_t var_cnt = dict_get_var_cnt (d);
1660 map = xmalloc (sizeof *map);
1661 map->value_cnt = dict_get_next_value_idx (d);
1662 map->map = xmalloc (sizeof *map->map * map->value_cnt);
1663 for (i = 0; i < map->value_cnt; i++)
1667 for (i = 0; i < var_cnt; i++)
1669 struct variable *v = dict_get_var (d, i);
1670 int *src_fv = (int *) var_detach_aux (v);
1673 if (v->fv != *src_fv)
1676 for (idx = 0; idx < v->nv; idx++)
1678 int src_idx = *src_fv + idx;
1679 int dst_idx = v->fv + idx;
1681 assert (map->map[dst_idx] == -1);
1682 map->map[dst_idx] = src_idx;
1689 destroy_case_map (map);
1693 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1699 /* Maps from SRC to DST, applying case map MAP. */
1701 map_case (const struct case_map *map,
1702 const struct ccase *src, struct ccase *dst)
1706 assert (map != NULL);
1707 assert (src != NULL);
1708 assert (dst != NULL);
1709 assert (src != dst);
1711 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1713 int src_idx = map->map[dst_idx];
1715 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1719 /* Destroys case map MAP. */
1721 destroy_case_map (struct case_map *map)