1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 #include "dictionary.h"
28 #include "file-handle.h"
33 #include "pfm-write.h"
36 #include "sfm-write.h"
38 #include "value-labels.h"
44 #define _(msgid) gettext (msgid)
46 #include "debug-print.h"
48 /* Rearranging and reducing a dictionary. */
49 static void start_case_map (struct dictionary *);
50 static struct case_map *finish_case_map (struct dictionary *);
51 static void map_case (const struct case_map *,
52 const struct ccase *, struct ccase *);
53 static void destroy_case_map (struct case_map *);
58 OP_READ, /* GET or IMPORT. */
59 OP_SAVE, /* SAVE or XSAVE. */
60 OP_EXPORT /* EXPORT. */
63 static bool trim_dictionary (struct dictionary *,
64 enum operation, int *compress);
66 /* GET input program. */
69 struct sfm_reader *reader; /* System file reader. */
70 struct case_map *map; /* Map from system file to active file dict. */
71 struct ccase bounce; /* Bounce buffer. */
74 static void get_pgm_free (struct get_pgm *);
76 /* Parses the GET command. */
80 struct get_pgm *pgm = NULL;
81 struct file_handle *fh;
82 struct dictionary *dict = NULL;
84 pgm = xmalloc (sizeof *pgm);
87 case_nullify (&pgm->bounce);
92 if (lex_match_id ("FILE"))
98 pgm->reader = sfm_open_reader (fh, &dict, NULL);
99 if (pgm->reader == NULL)
101 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
103 start_case_map (dict);
104 if (!trim_dictionary (dict, OP_READ, NULL))
106 pgm->map = finish_case_map (dict);
108 dict_destroy (default_dict);
111 vfm_source = create_case_source (&get_source_class, pgm);
122 /* Frees a struct get_pgm. */
124 get_pgm_free (struct get_pgm *pgm)
128 sfm_close_reader (pgm->reader);
129 destroy_case_map (pgm->map);
130 case_destroy (&pgm->bounce);
135 /* Clears internal state related to GET input procedure. */
137 get_source_destroy (struct case_source *source)
139 struct get_pgm *pgm = source->aux;
143 /* Reads all the cases from the data file into C and passes them
144 to WRITE_CASE one by one, passing WC_DATA. */
146 get_source_read (struct case_source *source,
148 write_case_func *write_case, write_case_data wc_data)
150 struct get_pgm *pgm = source->aux;
155 if (pgm->map == NULL)
156 ok = sfm_read_case (pgm->reader, c);
159 ok = sfm_read_case (pgm->reader, &pgm->bounce);
161 map_case (pgm->map, &pgm->bounce, c);
165 ok = write_case (wc_data);
170 const struct case_source_class get_source_class =
178 /* XSAVE transformation and SAVE procedure. */
181 struct trns_header h;
182 struct sfm_writer *writer; /* System file writer. */
183 struct case_map *map; /* Map from active file to system file dict. */
184 struct ccase bounce; /* Bounce buffer. */
187 static int save_write_case_func (struct ccase *, void *);
188 static trns_proc_func save_trns_proc;
189 static trns_free_func save_trns_free;
191 /* Parses the SAVE or XSAVE command
192 and returns the parsed transformation. */
193 static struct save_trns *
194 cmd_save_internal (void)
196 struct file_handle *fh = NULL;
197 struct dictionary *dict = NULL;
198 struct save_trns *t = NULL;
199 int compress = get_scompression ();
200 const int default_version = 3;
201 int version = default_version;
202 short no_name_table = 0;
204 t = xmalloc (sizeof *t);
205 t->h.proc = save_trns_proc;
206 t->h.free = save_trns_free;
209 case_nullify (&t->bounce);
212 /* Read most of the subcommands. */
215 if (lex_match_id ("VERSION"))
218 if (lex_force_int ())
220 version = lex_integer ();
223 if (lex_match_id ("X"))
227 else if (lex_match_id ("OUTFILE"))
236 if ( ! lex_match('/') )
243 lex_error (_("expecting end of command"));
249 msg ( ME, _("The required %s subcommand was not present"), "OUTFILE");
253 if ( version != default_version )
255 msg (MW, _("Unsupported sysfile version: %d. Using version %d instead."),
256 version, default_version);
258 version = default_version;
261 dict = dict_clone (default_dict);
262 start_case_map (dict);
263 if (!trim_dictionary (dict, OP_SAVE, &compress))
265 t->map = finish_case_map (dict);
267 case_create (&t->bounce, dict_get_next_value_idx (dict));
269 t->writer = sfm_open_writer (fh, dict, compress, no_name_table);
270 if (t->writer == NULL)
280 save_trns_free (&t->h);
284 /* Parses and performs the SAVE procedure. */
288 struct save_trns *t = cmd_save_internal ();
291 procedure (save_write_case_func, t);
292 save_trns_free (&t->h);
300 /* Parses the XSAVE transformation command. */
304 struct save_trns *t = cmd_save_internal ();
307 add_transformation (&t->h);
314 /* Writes the given C to the file specified by T. */
316 do_write_case (struct save_trns *t, struct ccase *c)
319 sfm_write_case (t->writer, c);
322 map_case (t->map, c, &t->bounce);
323 sfm_write_case (t->writer, &t->bounce);
327 /* Writes case C to the system file specified on SAVE. */
329 save_write_case_func (struct ccase *c, void *aux UNUSED)
331 do_write_case (aux, c);
335 /* Writes case C to the system file specified on XSAVE. */
337 save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
339 struct save_trns *t = (struct save_trns *) h;
340 do_write_case (t, c);
344 /* Frees a SAVE transformation. */
346 save_trns_free (struct trns_header *t_)
348 struct save_trns *t = (struct save_trns *) t_;
352 sfm_close_writer (t->writer);
353 destroy_case_map (t->map);
354 case_destroy (&t->bounce);
358 static bool rename_variables (struct dictionary *dict);
359 static bool drop_variables (struct dictionary *dict);
360 static bool keep_variables (struct dictionary *dict);
362 /* Commands that read and write system files share a great deal
363 of common syntactic structure for rearranging and dropping
364 variables. This function parses this syntax and modifies DICT
367 OP is the operation being performed. For operations that
368 write a system file, *COMPRESS is set to 1 if the system file
369 should be compressed, 0 otherwise.
371 Returns true on success, false on failure. */
373 trim_dictionary (struct dictionary *dict, enum operation op, int *compress)
375 assert ((compress != NULL) == (op == OP_SAVE));
376 if (get_scompression())
379 if (op == OP_SAVE || op == OP_EXPORT)
381 /* Delete all the scratch variables. */
386 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
388 for (i = 0; i < dict_get_var_cnt (dict); i++)
389 if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
390 v[nv++] = dict_get_var (dict, i);
391 dict_delete_vars (dict, v, nv);
395 while (lex_match ('/'))
399 if (op == OP_SAVE && lex_match_id ("COMPRESSED"))
401 else if (op == OP_SAVE && lex_match_id ("UNCOMPRESSED"))
403 else if (lex_match_id ("DROP"))
404 ok = drop_variables (dict);
405 else if (lex_match_id ("KEEP"))
406 ok = keep_variables (dict);
407 else if (lex_match_id ("RENAME"))
408 ok = rename_variables (dict);
411 lex_error (_("expecting a valid subcommand"));
419 if (!lex_end_of_command ())
422 dict_compact_values (dict);
426 /* Parses and performs the RENAME subcommand of GET and SAVE. */
428 rename_variables (struct dictionary *dict)
446 v = parse_dict_variable (dict);
449 if (!lex_force_match ('=')
452 if (dict_lookup_var (dict, tokid) != NULL)
454 msg (SE, _("Cannot rename %s as %s because there already exists "
455 "a variable named %s. To rename variables with "
456 "overlapping names, use a single RENAME subcommand "
457 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
458 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
462 dict_rename_var (dict, v, tokid);
471 while (lex_match ('('))
475 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
477 if (!lex_match ('='))
479 msg (SE, _("`=' expected after variable list."));
482 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
486 msg (SE, _("Number of variables on left side of `=' (%d) does not "
487 "match number of variables on right side (%d), in "
488 "parenthesized group %d of RENAME subcommand."),
489 nv - old_nv, nn - old_nv, group);
492 if (!lex_force_match (')'))
497 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
499 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
505 for (i = 0; i < nn; i++)
513 /* Parses and performs the DROP subcommand of GET and SAVE.
514 Returns true if successful, false on failure.*/
516 drop_variables (struct dictionary *dict)
522 if (!parse_variables (dict, &v, &nv, PV_NONE))
524 dict_delete_vars (dict, v, nv);
527 if (dict_get_var_cnt (dict) == 0)
529 msg (SE, _("Cannot DROP all variables from dictionary."));
535 /* Parses and performs the KEEP subcommand of GET and SAVE.
536 Returns true if successful, false on failure.*/
538 keep_variables (struct dictionary *dict)
545 if (!parse_variables (dict, &v, &nv, PV_NONE))
548 /* Move the specified variables to the beginning. */
549 dict_reorder_vars (dict, v, nv);
551 /* Delete the remaining variables. */
552 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
553 for (i = nv; i < dict_get_var_cnt (dict); i++)
554 v[i - nv] = dict_get_var (dict, i);
555 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
561 /* EXPORT procedure. */
564 struct pfm_writer *writer; /* System file writer. */
565 struct case_map *map; /* Map from active file to system file dict. */
566 struct ccase bounce; /* Bounce buffer. */
569 static int export_write_case_func (struct ccase *, void *);
570 static void export_proc_free (struct export_proc *);
572 /* Parses the EXPORT command. */
573 /* FIXME: same as cmd_save_internal(). */
577 struct file_handle *fh;
578 struct dictionary *dict;
579 struct export_proc *proc;
581 proc = xmalloc (sizeof *proc);
584 case_nullify (&proc->bounce);
587 if (lex_match_id ("OUTFILE"))
593 dict = dict_clone (default_dict);
594 start_case_map (dict);
595 if (!trim_dictionary (dict, OP_EXPORT, NULL))
597 proc->map = finish_case_map (dict);
598 if (proc->map != NULL)
599 case_create (&proc->bounce, dict_get_next_value_idx (dict));
601 proc->writer = pfm_open_writer (fh, dict);
602 if (proc->writer == NULL)
607 procedure (export_write_case_func, proc);
608 export_proc_free (proc);
615 export_proc_free (proc);
620 /* Writes case C to the EXPORT file. */
622 export_write_case_func (struct ccase *c, void *aux)
624 struct export_proc *proc = aux;
625 if (proc->map == NULL)
626 pfm_write_case (proc->writer, c);
629 map_case (proc->map, c, &proc->bounce);
630 pfm_write_case (proc->writer, &proc->bounce);
636 export_proc_free (struct export_proc *proc)
640 pfm_close_writer (proc->writer);
641 destroy_case_map (proc->map);
642 case_destroy (&proc->bounce);
648 #include "debug-print.h"
653 MTF_FILE, /* Specified on FILE= subcommand. */
654 MTF_TABLE /* Specified on TABLE= subcommand. */
657 /* One of the files on MATCH FILES. */
660 struct mtf_file *next, *prev;
661 /* Next, previous in the list of files. */
662 struct mtf_file *next_min; /* Next in the chain of minimums. */
664 int type; /* One of MTF_*. */
665 struct variable **by; /* List of BY variables for this file. */
666 struct file_handle *handle; /* File handle. */
667 struct sfm_reader *reader; /* System file reader. */
668 struct dictionary *dict; /* Dictionary from system file. */
671 char *in_name; /* Variable name. */
672 struct variable *in_var; /* Variable (in master dictionary). */
674 struct ccase input; /* Input record. */
677 /* MATCH FILES procedure. */
680 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
681 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
683 size_t by_cnt; /* Number of variables on BY subcommand. */
685 /* Names of FIRST, LAST variables. */
686 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
688 struct dictionary *dict; /* Dictionary of output file. */
689 struct case_sink *sink; /* Sink to receive output. */
690 struct ccase mtf_case; /* Case used for output. */
692 unsigned seq_num; /* Have we initialized this variable? */
693 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
696 static void mtf_free (struct mtf_proc *);
697 static void mtf_free_file (struct mtf_file *);
698 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
699 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
701 static void mtf_read_nonactive_records (void *);
702 static void mtf_processing_finish (void *);
703 static int mtf_processing (struct ccase *, void *);
705 static char *var_type_description (struct variable *);
707 static void set_master (struct variable *, struct variable *master);
708 static struct variable *get_master (struct variable *);
710 /* Parse and execute the MATCH FILES command. */
712 cmd_match_files (void)
715 struct mtf_file *first_table = NULL;
716 struct mtf_file *iter;
718 bool used_active_file = false;
719 bool saw_table = false;
722 mtf.head = mtf.tail = NULL;
726 mtf.dict = dict_create ();
728 case_nullify (&mtf.mtf_case);
731 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
735 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
737 struct mtf_file *file = xmalloc (sizeof *file);
739 if (lex_match_id ("FILE"))
740 file->type = MTF_FILE;
741 else if (lex_match_id ("TABLE"))
743 file->type = MTF_TABLE;
754 file->in_name = NULL;
756 case_nullify (&file->input);
758 /* FILEs go first, then TABLEs. */
759 if (file->type == MTF_TABLE || first_table == NULL)
762 file->prev = mtf.tail;
764 mtf.tail->next = file;
766 if (mtf.head == NULL)
768 if (file->type == MTF_TABLE && first_table == NULL)
773 assert (file->type == MTF_FILE);
774 file->next = first_table;
775 file->prev = first_table->prev;
776 if (first_table->prev)
777 first_table->prev->next = file;
780 first_table->prev = file;
788 if (used_active_file)
790 msg (SE, _("The active file may not be specified more "
794 used_active_file = true;
796 assert (pgm_state != STATE_INPUT);
797 if (pgm_state == STATE_INIT)
799 msg (SE, _("Cannot specify the active file since no active "
800 "file has been defined."));
807 _("MATCH FILES may not be used after TEMPORARY when "
808 "the active file is an input source. "
809 "Temporary transformations will be made permanent."));
813 file->dict = default_dict;
817 file->handle = fh_parse ();
818 if (file->handle == NULL)
821 file->reader = sfm_open_reader (file->handle, &file->dict, NULL);
822 if (file->reader == NULL)
825 case_create (&file->input, dict_get_next_value_idx (file->dict));
828 while (lex_match ('/'))
829 if (lex_match_id ("RENAME"))
831 if (!rename_variables (file->dict))
834 else if (lex_match_id ("IN"))
843 if (file->in_name != NULL)
845 msg (SE, _("Multiple IN subcommands for a single FILE or "
849 file->in_name = xstrdup (tokid);
854 mtf_merge_dictionary (mtf.dict, file);
859 if (lex_match (T_BY))
861 struct variable **by;
865 msg (SE, _("BY may appear at most once."));
870 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
871 PV_NO_DUPLICATE | PV_NO_SCRATCH))
874 for (iter = mtf.head; iter != NULL; iter = iter->next)
878 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
880 for (i = 0; i < mtf.by_cnt; i++)
882 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
883 if (iter->by[i] == NULL)
885 msg (SE, _("File %s lacks BY variable %s."),
886 iter->handle ? handle_get_name (iter->handle) : "*",
895 else if (lex_match_id ("FIRST"))
897 if (mtf.first[0] != '\0')
899 msg (SE, _("FIRST may appear at most once."));
904 if (!lex_force_id ())
906 strcpy (mtf.first, tokid);
909 else if (lex_match_id ("LAST"))
911 if (mtf.last[0] != '\0')
913 msg (SE, _("LAST may appear at most once."));
918 if (!lex_force_id ())
920 strcpy (mtf.last, tokid);
923 else if (lex_match_id ("MAP"))
927 else if (lex_match_id ("DROP"))
929 if (!drop_variables (mtf.dict))
932 else if (lex_match_id ("KEEP"))
934 if (!keep_variables (mtf.dict))
943 if (!lex_match ('/') && token != '.')
945 lex_end_of_command ();
954 msg (SE, _("BY is required when TABLE is specified."));
959 msg (SE, _("BY is required when IN is specified."));
964 /* Set up mapping from each file's variables to master
966 for (iter = mtf.head; iter != NULL; iter = iter->next)
968 struct dictionary *d = iter->dict;
971 for (i = 0; i < dict_get_var_cnt (d); i++)
973 struct variable *v = dict_get_var (d, i);
974 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
980 /* Add IN variables to master dictionary. */
981 for (iter = mtf.head; iter != NULL; iter = iter->next)
982 if (iter->in_name != NULL)
984 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
985 if (iter->in_var == NULL)
987 msg (SE, _("IN variable name %s duplicates an "
988 "existing variable name."),
992 iter->in_var->print = iter->in_var->write
993 = make_output_format (FMT_F, 1, 0);
996 /* MATCH FILES performs an n-way merge on all its input files.
999 1. Read one input record from every input FILE.
1001 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1003 3. Find the FILE input record(s) that have minimum BY
1004 values. Store all the values from these input records into
1007 4. For every TABLE, read another record as long as the BY values
1008 on the TABLE's input record are less than the FILEs' BY values.
1009 If an exact match is found, store all the values from the TABLE
1010 input record into the output record.
1012 5. Write the output record.
1014 6. Read another record from each input file FILE and TABLE that
1015 we stored values from above. If we come to the end of one of the
1016 input files, remove it from the list of input files.
1018 7. Repeat from step 2.
1020 Unfortunately, this algorithm can't be implemented in a
1021 straightforward way because there's no function to read a
1022 record from the active file. Instead, it has to be written
1025 FIXME: For merging large numbers of files (more than 10?) a
1026 better algorithm would use a heap for finding minimum
1029 if (!used_active_file)
1030 discard_variables ();
1032 dict_compact_values (mtf.dict);
1033 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1034 if (mtf.sink->class->open != NULL)
1035 mtf.sink->class->open (mtf.sink);
1037 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1038 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1040 mtf_read_nonactive_records (&mtf);
1041 if (used_active_file)
1042 procedure (mtf_processing, &mtf);
1043 mtf_processing_finish (&mtf);
1045 free_case_source (vfm_source);
1048 dict_destroy (default_dict);
1049 default_dict = mtf.dict;
1051 vfm_source = mtf.sink->class->make_source (mtf.sink);
1052 free_case_sink (mtf.sink);
1062 /* Repeats 2...7 an arbitrary number of times. */
1064 mtf_processing_finish (void *mtf_)
1066 struct mtf_proc *mtf = mtf_;
1067 struct mtf_file *iter;
1069 /* Find the active file and delete it. */
1070 for (iter = mtf->head; iter; iter = iter->next)
1071 if (iter->handle == NULL)
1073 mtf_delete_file_in_place (mtf, &iter);
1077 while (mtf->head && mtf->head->type == MTF_FILE)
1078 if (!mtf_processing (NULL, mtf))
1082 /* Return a string in a static buffer describing V's variable type and
1085 var_type_description (struct variable *v)
1087 static char buf[2][32];
1094 if (v->type == NUMERIC)
1095 strcpy (s, "numeric");
1098 assert (v->type == ALPHA);
1099 sprintf (s, "string with width %d", v->width);
1104 /* Free FILE and associated data. */
1106 mtf_free_file (struct mtf_file *file)
1109 sfm_close_reader (file->reader);
1110 if (file->dict != default_dict)
1111 dict_destroy (file->dict);
1112 case_destroy (&file->input);
1113 free (file->in_name);
1117 /* Free all the data for the MATCH FILES procedure. */
1119 mtf_free (struct mtf_proc *mtf)
1121 struct mtf_file *iter, *next;
1123 for (iter = mtf->head; iter; iter = next)
1126 mtf_free_file (iter);
1130 dict_destroy (mtf->dict);
1131 case_destroy (&mtf->mtf_case);
1132 free (mtf->seq_nums);
1135 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1136 file in the chain, or to NULL if was the last in the chain. */
1138 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1140 struct mtf_file *f = *file;
1144 f->prev->next = f->next;
1146 f->next->prev = f->prev;
1148 mtf->head = f->next;
1150 mtf->tail = f->prev;
1153 if (f->in_var != NULL)
1154 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1155 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1157 struct variable *v = dict_get_var (f->dict, i);
1158 struct variable *mv = get_master (v);
1161 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1163 if (v->type == NUMERIC)
1166 memset (out->s, ' ', v->width);
1173 /* Read a record from every input file except the active file. */
1175 mtf_read_nonactive_records (void *mtf_)
1177 struct mtf_proc *mtf = mtf_;
1178 struct mtf_file *iter, *next;
1180 for (iter = mtf->head; iter != NULL; iter = next)
1183 if (iter->handle && !sfm_read_case (iter->reader, &iter->input))
1184 mtf_delete_file_in_place (mtf, &iter);
1188 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1189 if A == B, 1 if A > B. */
1191 mtf_compare_BY_values (struct mtf_proc *mtf,
1192 struct mtf_file *a, struct mtf_file *b,
1195 struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1196 struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1197 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1198 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1201 /* Perform one iteration of steps 3...7 above. */
1203 mtf_processing (struct ccase *c, void *mtf_)
1205 struct mtf_proc *mtf = mtf_;
1207 /* Do we need another record from the active file? */
1208 bool read_active_file;
1210 assert (mtf->head != NULL);
1211 if (mtf->head->type == MTF_TABLE)
1216 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1217 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1218 struct mtf_file *iter, *next;
1220 read_active_file = false;
1222 /* 3. Find the FILE input record(s) that have minimum BY
1223 values. Store all the values from these input records into
1224 the output record. */
1225 min_head = min_tail = mtf->head;
1226 max_head = max_tail = NULL;
1227 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1230 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1234 max_tail = max_tail->next_min = iter;
1236 max_head = max_tail = iter;
1239 min_tail = min_tail->next_min = iter;
1244 max_tail->next_min = min_head;
1245 max_tail = min_tail;
1249 max_head = min_head;
1250 max_tail = min_tail;
1252 min_head = min_tail = iter;
1256 /* 4. For every TABLE, read another record as long as the BY
1257 values on the TABLE's input record are less than the FILEs'
1258 BY values. If an exact match is found, store all the values
1259 from the TABLE input record into the output record. */
1260 for (; iter != NULL; iter = next)
1262 assert (iter->type == MTF_TABLE);
1267 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1271 max_tail = max_tail->next_min = iter;
1273 max_head = max_tail = iter;
1276 min_tail = min_tail->next_min = iter;
1279 if (iter->handle == NULL)
1281 if (sfm_read_case (iter->reader, &iter->input))
1283 mtf_delete_file_in_place (mtf, &iter);
1289 /* Next sequence number. */
1292 /* Store data to all the records we are using. */
1294 min_tail->next_min = NULL;
1295 for (iter = min_head; iter; iter = iter->next_min)
1299 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1301 struct variable *v = dict_get_var (iter->dict, i);
1302 struct variable *mv = get_master (v);
1304 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1306 struct ccase *record
1307 = case_is_null (&iter->input) ? c : &iter->input;
1308 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1310 mtf->seq_nums[mv->index] = mtf->seq_num;
1311 if (v->type == NUMERIC)
1312 out->f = case_num (record, v->fv);
1314 memcpy (out->s, case_str (record, v->fv), v->width);
1317 if (iter->in_var != NULL)
1318 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1320 if (iter->type == MTF_FILE && iter->handle == NULL)
1321 read_active_file = true;
1324 /* Store missing values to all the records we're not
1327 max_tail->next_min = NULL;
1328 for (iter = max_head; iter; iter = iter->next_min)
1332 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1334 struct variable *v = dict_get_var (iter->dict, i);
1335 struct variable *mv = get_master (v);
1337 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1339 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1340 mtf->seq_nums[mv->index] = mtf->seq_num;
1342 if (v->type == NUMERIC)
1345 memset (out->s, ' ', v->width);
1348 if (iter->in_var != NULL)
1349 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1352 /* 5. Write the output record. */
1353 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1355 /* 6. Read another record from each input file FILE and TABLE
1356 that we stored values from above. If we come to the end of
1357 one of the input files, remove it from the list of input
1359 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1361 next = iter->next_min;
1362 if (iter->reader != NULL
1363 && !sfm_read_case (iter->reader, &iter->input))
1364 mtf_delete_file_in_place (mtf, &iter);
1367 while (!read_active_file
1368 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1370 return mtf->head != NULL && mtf->head->type == MTF_FILE;
1373 /* Merge the dictionary for file F into master dictionary M. */
1375 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1377 struct dictionary *d = f->dict;
1378 const char *d_docs, *m_docs;
1381 if (dict_get_label (m) == NULL)
1382 dict_set_label (m, dict_get_label (d));
1384 d_docs = dict_get_documents (d);
1385 m_docs = dict_get_documents (m);
1389 dict_set_documents (m, d_docs);
1395 new_len = strlen (m_docs) + strlen (d_docs);
1396 new_docs = xmalloc (new_len + 1);
1397 strcpy (new_docs, m_docs);
1398 strcat (new_docs, d_docs);
1399 dict_set_documents (m, new_docs);
1404 for (i = 0; i < dict_get_var_cnt (d); i++)
1406 struct variable *dv = dict_get_var (d, i);
1407 struct variable *mv = dict_lookup_var (m, dv->name);
1409 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1414 if (mv->width != dv->width)
1416 msg (SE, _("Variable %s in file %s (%s) has different "
1417 "type or width from the same variable in "
1418 "earlier file (%s)."),
1419 dv->name, handle_get_name (f->handle),
1420 var_type_description (dv), var_type_description (mv));
1424 if (dv->width == mv->width)
1426 if (val_labs_count (dv->val_labs)
1427 && !val_labs_count (mv->val_labs))
1428 mv->val_labs = val_labs_copy (dv->val_labs);
1429 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1430 mv_copy (&mv->miss, &dv->miss);
1433 if (dv->label && !mv->label)
1434 mv->label = xstrdup (dv->label);
1437 mv = dict_clone_var_assert (m, dv, dv->name);
1443 /* Marks V's master variable as MASTER. */
1445 set_master (struct variable *v, struct variable *master)
1447 var_attach_aux (v, master, NULL);
1450 /* Returns the master variable corresponding to V,
1451 as set with set_master(). */
1452 static struct variable *
1453 get_master (struct variable *v)
1458 /* IMPORT command. */
1460 /* IMPORT input program. */
1463 struct pfm_reader *reader; /* Portable file reader. */
1464 struct case_map *map; /* Map from system file to active file dict. */
1465 struct ccase bounce; /* Bounce buffer. */
1468 static void import_pgm_free (struct import_pgm *);
1470 /* Parses the IMPORT command. */
1474 struct import_pgm *pgm = NULL;
1475 struct file_handle *fh = NULL;
1476 struct dictionary *dict = NULL;
1479 pgm = xmalloc (sizeof *pgm);
1482 case_nullify (&pgm->bounce);
1488 if (lex_match_id ("FILE") || token == T_STRING)
1496 else if (lex_match_id ("TYPE"))
1500 if (lex_match_id ("COMM"))
1502 else if (lex_match_id ("TAPE"))
1506 lex_error (_("expecting COMM or TAPE"));
1512 if (!lex_match ('/') && token != '.')
1518 discard_variables ();
1520 pgm->reader = pfm_open_reader (fh, &dict, NULL);
1521 if (pgm->reader == NULL)
1523 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
1525 start_case_map (dict);
1526 if (!trim_dictionary (dict, OP_READ, NULL))
1528 pgm->map = finish_case_map (dict);
1530 dict_destroy (default_dict);
1531 default_dict = dict;
1533 vfm_source = create_case_source (&import_source_class, pgm);
1538 import_pgm_free (pgm);
1540 dict_destroy (dict);
1544 /* Frees a struct import_pgm. */
1546 import_pgm_free (struct import_pgm *pgm)
1550 pfm_close_reader (pgm->reader);
1551 destroy_case_map (pgm->map);
1552 case_destroy (&pgm->bounce);
1557 /* Clears internal state related to IMPORT input procedure. */
1559 import_source_destroy (struct case_source *source)
1561 struct import_pgm *pgm = source->aux;
1562 import_pgm_free (pgm);
1565 /* Reads all the cases from the data file into C and passes them
1566 to WRITE_CASE one by one, passing WC_DATA. */
1568 import_source_read (struct case_source *source,
1570 write_case_func *write_case, write_case_data wc_data)
1572 struct import_pgm *pgm = source->aux;
1577 if (pgm->map == NULL)
1578 ok = pfm_read_case (pgm->reader, c);
1581 ok = pfm_read_case (pgm->reader, &pgm->bounce);
1583 map_case (pgm->map, &pgm->bounce, c);
1587 ok = write_case (wc_data);
1592 const struct case_source_class import_source_class =
1597 import_source_destroy,
1603 A case map copies data from a case that corresponds for one
1604 dictionary to a case that corresponds to a second dictionary
1605 derived from the first by, optionally, deleting, reordering,
1606 or renaming variables. (No new variables may be created.)
1612 size_t value_cnt; /* Number of values in map. */
1613 int *map; /* For each destination index, the
1614 corresponding source index. */
1617 /* Prepares dictionary D for producing a case map. Afterward,
1618 the caller may delete, reorder, or rename variables within D
1619 at will before using finish_case_map() to produce the case
1622 Uses D's aux members, which may not otherwise be in use. */
1624 start_case_map (struct dictionary *d)
1626 size_t var_cnt = dict_get_var_cnt (d);
1629 for (i = 0; i < var_cnt; i++)
1631 struct variable *v = dict_get_var (d, i);
1632 int *src_fv = xmalloc (sizeof *src_fv);
1634 var_attach_aux (v, src_fv, var_dtor_free);
1638 /* Produces a case map from dictionary D, which must have been
1639 previously prepared with start_case_map().
1641 Does not retain any reference to D, and clears the aux members
1642 set up by start_case_map().
1644 Returns the new case map, or a null pointer if no mapping is
1645 required (that is, no data has changed position). */
1646 static struct case_map *
1647 finish_case_map (struct dictionary *d)
1649 struct case_map *map;
1650 size_t var_cnt = dict_get_var_cnt (d);
1654 map = xmalloc (sizeof *map);
1655 map->value_cnt = dict_get_next_value_idx (d);
1656 map->map = xmalloc (sizeof *map->map * map->value_cnt);
1657 for (i = 0; i < map->value_cnt; i++)
1661 for (i = 0; i < var_cnt; i++)
1663 struct variable *v = dict_get_var (d, i);
1664 int *src_fv = (int *) var_detach_aux (v);
1667 if (v->fv != *src_fv)
1670 for (idx = 0; idx < v->nv; idx++)
1672 int src_idx = *src_fv + idx;
1673 int dst_idx = v->fv + idx;
1675 assert (map->map[dst_idx] == -1);
1676 map->map[dst_idx] = src_idx;
1683 destroy_case_map (map);
1687 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1693 /* Maps from SRC to DST, applying case map MAP. */
1695 map_case (const struct case_map *map,
1696 const struct ccase *src, struct ccase *dst)
1700 assert (map != NULL);
1701 assert (src != NULL);
1702 assert (dst != NULL);
1703 assert (src != dst);
1705 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1707 int src_idx = map->map[dst_idx];
1709 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1713 /* Destroys case map MAP. */
1715 destroy_case_map (struct case_map *map)