1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26 #include "dictionary.h"
28 #include "file-handle.h"
33 #include "pfm-write.h"
36 #include "sfm-write.h"
38 #include "value-labels.h"
43 #include "debug-print.h"
45 /* Rearranging and reducing a dictionary. */
46 static void start_case_map (struct dictionary *);
47 static struct case_map *finish_case_map (struct dictionary *);
48 static void map_case (const struct case_map *,
49 const struct ccase *, struct ccase *);
50 static void destroy_case_map (struct case_map *);
55 OP_READ, /* GET or IMPORT. */
56 OP_SAVE, /* SAVE or XSAVE. */
57 OP_EXPORT, /* EXPORT. */
58 OP_MATCH /* MATCH FILES. */
61 static int trim_dictionary (struct dictionary *,
62 enum operation, int *compress);
64 /* GET input program. */
67 struct sfm_reader *reader; /* System file reader. */
68 struct case_map *map; /* Map from system file to active file dict. */
69 struct ccase bounce; /* Bounce buffer. */
72 static void get_pgm_free (struct get_pgm *);
74 /* Parses the GET command. */
78 struct get_pgm *pgm = NULL;
79 struct file_handle *fh;
80 struct dictionary *dict = NULL;
82 pgm = xmalloc (sizeof *pgm);
85 case_nullify (&pgm->bounce);
90 if (lex_match_id ("FILE"))
96 pgm->reader = sfm_open_reader (fh, &dict, NULL);
97 if (pgm->reader == NULL)
99 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
101 start_case_map (dict);
102 if (!trim_dictionary (dict, OP_READ, NULL))
104 pgm->map = finish_case_map (dict);
106 dict_destroy (default_dict);
109 vfm_source = create_case_source (&get_source_class, pgm);
120 /* Frees a struct get_pgm. */
122 get_pgm_free (struct get_pgm *pgm)
126 sfm_close_reader (pgm->reader);
127 destroy_case_map (pgm->map);
128 case_destroy (&pgm->bounce);
133 /* Clears internal state related to GET input procedure. */
135 get_source_destroy (struct case_source *source)
137 struct get_pgm *pgm = source->aux;
141 /* Reads all the cases from the data file into C and passes them
142 to WRITE_CASE one by one, passing WC_DATA. */
144 get_source_read (struct case_source *source,
146 write_case_func *write_case, write_case_data wc_data)
148 struct get_pgm *pgm = source->aux;
153 if (pgm->map == NULL)
154 ok = sfm_read_case (pgm->reader, c);
157 ok = sfm_read_case (pgm->reader, &pgm->bounce);
159 map_case (pgm->map, &pgm->bounce, c);
163 ok = write_case (wc_data);
168 const struct case_source_class get_source_class =
176 /* XSAVE transformation and SAVE procedure. */
179 struct trns_header h;
180 struct sfm_writer *writer; /* System file writer. */
181 struct case_map *map; /* Map from active file to system file dict. */
182 struct ccase bounce; /* Bounce buffer. */
185 static int save_write_case_func (struct ccase *, void *);
186 static trns_proc_func save_trns_proc;
187 static trns_free_func save_trns_free;
189 /* Parses the SAVE or XSAVE command
190 and returns the parsed transformation. */
191 static struct save_trns *
192 cmd_save_internal (void)
194 struct file_handle *fh = NULL;
195 struct dictionary *dict = NULL;
196 struct save_trns *t = NULL;
197 int compress = get_scompression ();
198 const int default_version = 3;
199 int version = default_version;
200 short no_name_table = 0;
202 t = xmalloc (sizeof *t);
203 t->h.proc = save_trns_proc;
204 t->h.free = save_trns_free;
207 case_nullify (&t->bounce);
210 /* Read most of the subcommands. */
213 if (lex_match_id ("VERSION"))
216 if ( lex_force_num() )
221 if ( 0 == strncasecmp (tokid,"x", 1) )
229 else if (lex_match_id ("OUTFILE"))
238 if ( ! lex_match('/') )
245 lex_error (_("expecting end of command"));
251 msg ( ME, _("The required %s subcommand was not present"), "OUTFILE");
255 if ( version != default_version )
257 msg (MW, _("Unsupported sysfile version: %d. Using version %d instead."),
258 version, default_version);
260 version = default_version;
263 dict = dict_clone (default_dict);
264 start_case_map (dict);
265 if (!trim_dictionary (dict, OP_SAVE, &compress))
267 t->map = finish_case_map (dict);
269 case_create (&t->bounce, dict_get_next_value_idx (dict));
271 t->writer = sfm_open_writer (fh, dict, compress, no_name_table);
272 if (t->writer == NULL)
282 save_trns_free (&t->h);
286 /* Parses and performs the SAVE procedure. */
290 struct save_trns *t = cmd_save_internal ();
293 procedure (save_write_case_func, t);
294 save_trns_free (&t->h);
302 /* Parses the XSAVE transformation command. */
306 struct save_trns *t = cmd_save_internal ();
309 add_transformation (&t->h);
316 /* Writes the given C to the file specified by T. */
318 do_write_case (struct save_trns *t, struct ccase *c)
321 sfm_write_case (t->writer, c);
324 map_case (t->map, c, &t->bounce);
325 sfm_write_case (t->writer, &t->bounce);
329 /* Writes case C to the system file specified on SAVE. */
331 save_write_case_func (struct ccase *c, void *aux UNUSED)
333 do_write_case (aux, c);
337 /* Writes case C to the system file specified on XSAVE. */
339 save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
341 struct save_trns *t = (struct save_trns *) h;
342 do_write_case (t, c);
346 /* Frees a SAVE transformation. */
348 save_trns_free (struct trns_header *t_)
350 struct save_trns *t = (struct save_trns *) t_;
354 sfm_close_writer (t->writer);
355 destroy_case_map (t->map);
356 case_destroy (&t->bounce);
360 static int rename_variables (struct dictionary *dict);
362 /* Commands that read and write system files share a great deal
363 of common syntactic structure for rearranging and dropping
364 variables. This function parses this syntax and modifies DICT
367 OP is the operation being performed. For operations that
368 write a system file, *COMPRESS is set to 1 if the system file
369 should be compressed, 0 otherwise.
371 Returns nonzero on success, zero on failure. */
372 /* FIXME: IN, FIRST, LAST, MAP. */
374 trim_dictionary (struct dictionary *dict, enum operation op, int *compress)
376 assert ((compress != NULL) == (op == OP_SAVE));
377 if (get_scompression())
380 if (op == OP_SAVE || op == OP_EXPORT)
382 /* Delete all the scratch variables. */
387 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
389 for (i = 0; i < dict_get_var_cnt (dict); i++)
390 if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
391 v[nv++] = dict_get_var (dict, i);
392 dict_delete_vars (dict, v, nv);
396 while (op == OP_MATCH || lex_match ('/'))
398 if (op == OP_SAVE && lex_match_id ("COMPRESSED"))
400 else if (op == OP_SAVE && lex_match_id ("UNCOMPRESSED"))
402 else if (lex_match_id ("DROP"))
408 if (!parse_variables (dict, &v, &nv, PV_NONE))
410 dict_delete_vars (dict, v, nv);
413 else if (lex_match_id ("KEEP"))
420 if (!parse_variables (dict, &v, &nv, PV_NONE))
423 /* Move the specified variables to the beginning. */
424 dict_reorder_vars (dict, v, nv);
426 /* Delete the remaining variables. */
427 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
428 for (i = nv; i < dict_get_var_cnt (dict); i++)
429 v[i - nv] = dict_get_var (dict, i);
430 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
433 else if (lex_match_id ("RENAME"))
435 if (!rename_variables (dict))
440 lex_error (_("while expecting a valid subcommand"));
444 if (dict_get_var_cnt (dict) == 0)
446 msg (SE, _("All variables deleted from system file dictionary."));
456 lex_error (_("expecting end of command"));
462 dict_compact_values (dict);
466 /* Parses and performs the RENAME subcommand of GET and SAVE. */
468 rename_variables (struct dictionary *dict)
486 v = parse_dict_variable (dict);
489 if (!lex_force_match ('=')
492 if (!strncmp (tokid, v->name, SHORT_NAME_LEN))
494 if (dict_lookup_var (dict, tokid) != NULL)
496 msg (SE, _("Cannot rename %s as %s because there already exists "
497 "a variable named %s. To rename variables with "
498 "overlapping names, use a single RENAME subcommand "
499 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
500 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
504 dict_rename_var (dict, v, tokid);
513 while (lex_match ('('))
517 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
519 if (!lex_match ('='))
521 msg (SE, _("`=' expected after variable list."));
524 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
528 msg (SE, _("Number of variables on left side of `=' (%d) does not "
529 "match number of variables on right side (%d), in "
530 "parenthesized group %d of RENAME subcommand."),
531 nv - old_nv, nn - old_nv, group);
534 if (!lex_force_match (')'))
539 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
541 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
547 for (i = 0; i < nn; i++)
555 /* EXPORT procedure. */
558 struct pfm_writer *writer; /* System file writer. */
559 struct case_map *map; /* Map from active file to system file dict. */
560 struct ccase bounce; /* Bounce buffer. */
563 static int export_write_case_func (struct ccase *, void *);
564 static void export_proc_free (struct export_proc *);
566 /* Parses the EXPORT command. */
567 /* FIXME: same as cmd_save_internal(). */
571 struct file_handle *fh;
572 struct dictionary *dict;
573 struct export_proc *proc;
575 proc = xmalloc (sizeof *proc);
578 case_nullify (&proc->bounce);
581 if (lex_match_id ("OUTFILE"))
587 dict = dict_clone (default_dict);
588 start_case_map (dict);
589 if (!trim_dictionary (dict, OP_EXPORT, NULL))
591 proc->map = finish_case_map (dict);
592 if (proc->map != NULL)
593 case_create (&proc->bounce, dict_get_next_value_idx (dict));
595 proc->writer = pfm_open_writer (fh, dict);
596 if (proc->writer == NULL)
601 procedure (export_write_case_func, proc);
602 export_proc_free (proc);
609 export_proc_free (proc);
614 /* Writes case C to the EXPORT file. */
616 export_write_case_func (struct ccase *c, void *aux)
618 struct export_proc *proc = aux;
619 if (proc->map == NULL)
620 pfm_write_case (proc->writer, c);
623 map_case (proc->map, c, &proc->bounce);
624 pfm_write_case (proc->writer, &proc->bounce);
630 export_proc_free (struct export_proc *proc)
634 pfm_close_writer (proc->writer);
635 destroy_case_map (proc->map);
636 case_destroy (&proc->bounce);
642 #include "debug-print.h"
647 MTF_FILE, /* Specified on FILE= subcommand. */
648 MTF_TABLE /* Specified on TABLE= subcommand. */
651 /* One of the files on MATCH FILES. */
654 struct mtf_file *next, *prev;
655 /* Next, previous in the list of files. */
656 struct mtf_file *next_min; /* Next in the chain of minimums. */
658 int type; /* One of MTF_*. */
659 struct variable **by; /* List of BY variables for this file. */
660 struct file_handle *handle; /* File handle. */
661 struct sfm_reader *reader; /* System file reader. */
662 struct dictionary *dict; /* Dictionary from system file. */
663 char in[SHORT_NAME_LEN + 1]; /* Name of the variable from IN=. */
664 char first[SHORT_NAME_LEN + 1];
665 char last[SHORT_NAME_LEN + 1]; /* Name of the variables from FIRST=, LAST=. */
666 struct ccase input; /* Input record. */
669 /* MATCH FILES procedure. */
672 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
673 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
675 struct variable **by; /* Variables on the BY subcommand. */
676 size_t by_cnt; /* Number of variables on BY subcommand. */
678 struct dictionary *dict; /* Dictionary of output file. */
679 struct case_sink *sink; /* Sink to receive output. */
680 struct ccase *mtf_case; /* Case used for output. */
682 unsigned seq_num; /* Have we initialized this variable? */
683 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
686 static void mtf_free (struct mtf_proc *);
687 static void mtf_free_file (struct mtf_file *);
688 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
689 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
691 static void mtf_read_nonactive_records (void *);
692 static void mtf_processing_finish (void *);
693 static int mtf_processing (struct ccase *, void *);
695 static char *var_type_description (struct variable *);
697 static void set_master (struct variable *, struct variable *master);
698 static struct variable *get_master (struct variable *);
700 /* Parse and execute the MATCH FILES command. */
702 cmd_match_files (void)
705 struct mtf_file *first_table = NULL;
706 struct mtf_file *iter;
710 mtf.head = mtf.tail = NULL;
713 mtf.dict = dict_create ();
718 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
724 if (lex_match (T_BY))
728 msg (SE, _("The BY subcommand may be given once at most."));
734 if (!parse_variables (mtf.dict, &mtf.by, &mtf.by_cnt,
735 PV_NO_DUPLICATE | PV_NO_SCRATCH))
738 else if (token != T_ID)
743 else if (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
745 struct mtf_file *file = xmalloc (sizeof *file);
747 if (lex_match_id ("FILE"))
748 file->type = MTF_FILE;
749 else if (lex_match_id ("TABLE"))
751 file->type = MTF_TABLE;
762 file->first[0] = '\0';
763 file->last[0] = '\0';
764 case_nullify (&file->input);
766 /* FILEs go first, then TABLEs. */
767 if (file->type == MTF_TABLE || first_table == NULL)
770 file->prev = mtf.tail;
772 mtf.tail->next = file;
774 if (mtf.head == NULL)
776 if (file->type == MTF_TABLE && first_table == NULL)
781 assert (file->type == MTF_FILE);
782 file->next = first_table;
783 file->prev = first_table->prev;
784 if (first_table->prev)
785 first_table->prev->next = file;
788 first_table->prev = file;
800 msg (SE, _("The active file may not be specified more "
806 assert (pgm_state != STATE_INPUT);
807 if (pgm_state == STATE_INIT)
809 msg (SE, _("Cannot specify the active file since no active "
810 "file has been defined."));
817 _("MATCH FILES may not be used after TEMPORARY when "
818 "the active file is an input source. "
819 "Temporary transformations will be made permanent."));
823 file->dict = default_dict;
827 file->handle = fh_parse ();
828 if (file->handle == NULL)
831 file->reader = sfm_open_reader (file->handle, &file->dict, NULL);
832 if (file->reader == NULL)
835 case_create (&file->input, dict_get_next_value_idx (file->dict));
838 else if (lex_id_match ("IN", tokid)
839 || lex_id_match ("FIRST", tokid)
840 || lex_id_match ("LAST", tokid))
845 if (mtf.tail == NULL)
847 msg (SE, _("IN, FIRST, and LAST subcommands may not occur "
848 "before the first FILE or TABLE."));
852 if (lex_match_id ("IN"))
857 else if (lex_match_id ("FIRST"))
859 name = mtf.tail->first;
862 else if (lex_match_id ("LAST"))
864 name = mtf.tail->last;
882 msg (SE, _("Multiple %s subcommands for a single FILE or "
887 strcpy (name, tokid);
890 if (!dict_create_var (mtf.dict, name, 0))
892 msg (SE, _("Duplicate variable name %s while creating %s "
898 else if (lex_id_match ("RENAME", tokid)
899 || lex_id_match ("KEEP", tokid)
900 || lex_id_match ("DROP", tokid))
902 if (mtf.tail == NULL)
904 msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur "
905 "before the first FILE or TABLE."));
909 if (!trim_dictionary (mtf.tail->dict, OP_MATCH, NULL))
912 else if (lex_match_id ("MAP"))
922 while (token != '.');
924 for (iter = mtf.head; iter != NULL; iter = iter->next)
925 mtf_merge_dictionary (mtf.dict, iter);
931 msg (SE, _("The BY subcommand is required when a TABLE subcommand "
939 for (iter = mtf.head; iter != NULL; iter = iter->next)
943 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
945 for (i = 0; i < mtf.by_cnt; i++)
947 iter->by[i] = dict_lookup_var (iter->dict, mtf.by[i]->name);
948 if (iter->by[i] == NULL)
950 msg (SE, _("File %s lacks BY variable %s."),
951 iter->handle ? handle_get_name (iter->handle) : "*",
959 /* MATCH FILES performs an n-way merge on all its input files.
962 1. Read one input record from every input FILE.
964 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
966 3. Find the FILE input record with minimum BY values. Store all
967 the values from this input record into the output record.
969 4. Find all the FILE input records with BY values identical to
970 the minimums. Store all the values from these input records into
973 5. For every TABLE, read another record as long as the BY values
974 on the TABLE's input record are less than the FILEs' BY values.
975 If an exact match is found, store all the values from the TABLE
976 input record into the output record.
978 6. Write the output record.
980 7. Read another record from each input file FILE and TABLE that
981 we stored values from above. If we come to the end of one of the
982 input files, remove it from the list of input files.
984 8. Repeat from step 2.
986 Unfortunately, this algorithm can't be directly implemented
987 because there's no function to read a record from the active
988 file; instead, it has to be done using callbacks.
990 FIXME: For merging large numbers of files (more than 10?) a
991 better algorithm would use a heap for finding minimum
995 discard_variables ();
997 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
999 mtf.seq_nums = xmalloc (dict_get_var_cnt (mtf.dict)
1000 * sizeof *mtf.seq_nums);
1001 memset (mtf.seq_nums, 0,
1002 dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums);
1003 mtf.mtf_case = xmalloc (dict_get_case_size (mtf.dict));
1005 mtf_read_nonactive_records (NULL);
1007 procedure (mtf_processing, NULL);
1008 mtf_processing_finish (NULL);
1010 dict_destroy (default_dict);
1011 default_dict = mtf.dict;
1013 vfm_source = mtf.sink->class->make_source (mtf.sink);
1014 free_case_sink (mtf.sink);
1024 /* Repeats 2...8 an arbitrary number of times. */
1026 mtf_processing_finish (void *mtf_)
1028 struct mtf_proc *mtf = mtf_;
1029 struct mtf_file *iter;
1031 /* Find the active file and delete it. */
1032 for (iter = mtf->head; iter; iter = iter->next)
1033 if (iter->handle == NULL)
1035 mtf_delete_file_in_place (mtf, &iter);
1039 while (mtf->head && mtf->head->type == MTF_FILE)
1040 if (!mtf_processing (NULL, mtf))
1044 /* Return a string in a static buffer describing V's variable type and
1047 var_type_description (struct variable *v)
1049 static char buf[2][32];
1056 if (v->type == NUMERIC)
1057 strcpy (s, "numeric");
1060 assert (v->type == ALPHA);
1061 sprintf (s, "string with width %d", v->width);
1066 /* Free FILE and associated data. */
1068 mtf_free_file (struct mtf_file *file)
1071 sfm_close_reader (file->reader);
1072 if (file->dict != default_dict)
1073 dict_destroy (file->dict);
1074 case_destroy (&file->input);
1078 /* Free all the data for the MATCH FILES procedure. */
1080 mtf_free (struct mtf_proc *mtf)
1082 struct mtf_file *iter, *next;
1084 for (iter = mtf->head; iter; iter = next)
1088 mtf_free_file (iter);
1093 dict_destroy (mtf->dict);
1094 free (mtf->seq_nums);
1097 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1098 file in the chain, or to NULL if was the last in the chain. */
1100 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1102 struct mtf_file *f = *file;
1105 f->prev->next = f->next;
1107 f->next->prev = f->prev;
1109 mtf->head = f->next;
1111 mtf->tail = f->prev;
1117 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1119 struct variable *v = dict_get_var (f->dict, i);
1120 union value *out = case_data_rw (mtf->mtf_case, get_master (v)->fv);
1122 if (v->type == NUMERIC)
1125 memset (out->s, ' ', v->width);
1132 /* Read a record from every input file except the active file. */
1134 mtf_read_nonactive_records (void *mtf_ UNUSED)
1136 struct mtf_proc *mtf = mtf_;
1137 struct mtf_file *iter;
1139 for (iter = mtf->head; iter; )
1143 if (!sfm_read_case (iter->reader, &iter->input))
1144 mtf_delete_file_in_place (mtf, &iter);
1153 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1154 if A == B, 1 if A > B. */
1156 mtf_compare_BY_values (struct mtf_proc *mtf,
1157 struct mtf_file *a, struct mtf_file *b,
1160 struct ccase *a_input, *b_input;
1163 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1164 a_input = case_is_null (&a->input) ? c : &a->input;
1165 b_input = case_is_null (&b->input) ? c : &b->input;
1166 for (i = 0; i < mtf->by_cnt; i++)
1168 assert (a->by[i]->type == b->by[i]->type);
1169 assert (a->by[i]->width == b->by[i]->width);
1171 if (a->by[i]->type == NUMERIC)
1173 double af = case_num (a_input, a->by[i]->fv);
1174 double bf = case_num (b_input, b->by[i]->fv);
1185 assert (a->by[i]->type == ALPHA);
1186 result = memcmp (case_str (a_input, a->by[i]->fv),
1187 case_str (b_input, b->by[i]->fv),
1191 else if (result > 0)
1198 /* Perform one iteration of steps 3...7 above. */
1200 mtf_processing (struct ccase *c, void *mtf_ UNUSED)
1202 struct mtf_proc *mtf = mtf_;
1203 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1204 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BY values. */
1205 struct mtf_file *iter; /* Iterator. */
1209 /* If the active file doesn't have the minimum BY values, don't
1210 return because that would cause a record to be skipped. */
1213 if (mtf->head->type == MTF_TABLE)
1216 /* 3. Find the FILE input record with minimum BY values. Store
1217 all the values from this input record into the output record.
1219 4. Find all the FILE input records with BY values identical
1220 to the minimums. Store all the values from these input
1221 records into the output record. */
1222 min_head = min_tail = mtf->head;
1223 max_head = max_tail = NULL;
1224 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1226 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1230 max_tail = max_tail->next_min = iter;
1232 max_head = max_tail = iter;
1236 min_tail = min_tail->next_min = iter;
1242 max_tail->next_min = min_head;
1243 max_tail = min_tail;
1247 max_head = min_head;
1248 max_tail = min_tail;
1250 min_head = min_tail = iter;
1257 /* 5. For every TABLE, read another record as long as the BY
1258 values on the TABLE's input record are less than the FILEs'
1259 BY values. If an exact match is found, store all the values
1260 from the TABLE input record into the output record. */
1263 struct mtf_file *next = iter->next;
1265 assert (iter->type == MTF_TABLE);
1267 if (iter->handle == NULL)
1271 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1275 max_tail = max_tail->next_min = iter;
1277 max_head = max_tail = iter;
1281 min_tail = min_tail->next_min = iter;
1285 if (iter->handle == NULL)
1287 if (sfm_read_case (iter->reader, &iter->input))
1289 mtf_delete_file_in_place (mtf, &iter);
1299 /* Next sequence number. */
1302 /* Store data to all the records we are using. */
1304 min_tail->next_min = NULL;
1305 for (iter = min_head; iter; iter = iter->next_min)
1309 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1311 struct variable *v = dict_get_var (iter->dict, i);
1312 struct ccase *record;
1315 if (mtf->seq_nums[get_master (v)->index] == mtf->seq_num)
1317 mtf->seq_nums[get_master (v)->index] = mtf->seq_num;
1319 record = case_is_null (&iter->input) ? c : &iter->input;
1321 assert (v->type == NUMERIC || v->type == ALPHA);
1322 out = case_data_rw (mtf->mtf_case, get_master (v)->fv);
1323 if (v->type == NUMERIC)
1324 out->f = case_num (record, v->fv);
1326 memcpy (out->s, case_str (record, v->fv), v->width);
1330 /* Store missing values to all the records we're not using. */
1332 max_tail->next_min = NULL;
1333 for (iter = max_head; iter; iter = iter->next_min)
1337 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1339 struct variable *v = dict_get_var (iter->dict, i);
1342 if (mtf->seq_nums[get_master (v)->index] == mtf->seq_num)
1344 mtf->seq_nums[get_master (v)->index] = mtf->seq_num;
1346 out = case_data_rw (mtf->mtf_case, get_master (v)->fv);
1347 if (v->type == NUMERIC)
1350 memset (out->s, ' ', v->width);
1353 if (iter->handle == NULL)
1357 /* 6. Write the output record. */
1358 mtf->sink->class->write (mtf->sink, mtf->mtf_case);
1360 /* 7. Read another record from each input file FILE and TABLE
1361 that we stored values from above. If we come to the end of
1362 one of the input files, remove it from the list of input
1364 for (iter = min_head; iter && iter->type == MTF_FILE; )
1366 struct mtf_file *next = iter->next_min;
1368 if (iter->reader != NULL)
1370 if (!sfm_read_case (iter->reader, &iter->input))
1371 mtf_delete_file_in_place (mtf, &iter);
1381 return (mtf->head && mtf->head->type != MTF_TABLE);
1384 /* Merge the dictionary for file F into master dictionary M. */
1386 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1388 struct dictionary *d = f->dict;
1389 const char *d_docs, *m_docs;
1391 if (dict_get_label (m) == NULL)
1392 dict_set_label (m, dict_get_label (d));
1394 d_docs = dict_get_documents (d);
1395 m_docs = dict_get_documents (m);
1399 dict_set_documents (m, d_docs);
1405 new_len = strlen (m_docs) + strlen (d_docs);
1406 new_docs = xmalloc (new_len + 1);
1407 strcpy (new_docs, m_docs);
1408 strcat (new_docs, d_docs);
1409 dict_set_documents (m, new_docs);
1414 dict_compact_values (d);
1419 for (i = 0; i < dict_get_var_cnt (d); i++)
1421 struct variable *dv = dict_get_var (d, i);
1422 struct variable *mv = dict_lookup_var (m, dv->name);
1424 assert (dv->type == ALPHA || dv->width == 0);
1425 assert (!mv || mv->type == ALPHA || mv->width == 0);
1426 if (mv && dv->width == mv->width)
1428 if (val_labs_count (dv->val_labs)
1429 && !val_labs_count (mv->val_labs))
1430 mv->val_labs = val_labs_copy (dv->val_labs);
1431 if (dv->miss_type != MISSING_NONE
1432 && mv->miss_type == MISSING_NONE)
1433 copy_missing_values (mv, dv);
1435 if (mv && dv->label && !mv->label)
1436 mv->label = xstrdup (dv->label);
1439 mv = dict_clone_var (m, dv, dv->name, dv->longname);
1440 assert (mv != NULL);
1442 else if (mv->width != dv->width)
1444 msg (SE, _("Variable %s in file %s (%s) has different "
1445 "type or width from the same variable in "
1446 "earlier file (%s)."),
1447 dv->name, handle_get_name (f->handle),
1448 var_type_description (dv), var_type_description (mv));
1451 set_master (dv, mv);
1458 /* Marks V's master variable as MASTER. */
1460 set_master (struct variable *v, struct variable *master)
1462 var_attach_aux (v, master, NULL);
1465 /* Returns the master variable corresponding to V,
1466 as set with set_master(). */
1467 static struct variable *
1468 get_master (struct variable *v)
1470 assert (v->aux != NULL);
1474 /* IMPORT command. */
1476 /* IMPORT input program. */
1479 struct pfm_reader *reader; /* Portable file reader. */
1480 struct case_map *map; /* Map from system file to active file dict. */
1481 struct ccase bounce; /* Bounce buffer. */
1484 static void import_pgm_free (struct import_pgm *);
1486 /* Parses the IMPORT command. */
1490 struct import_pgm *pgm = NULL;
1491 struct file_handle *fh = NULL;
1492 struct dictionary *dict = NULL;
1495 pgm = xmalloc (sizeof *pgm);
1498 case_nullify (&pgm->bounce);
1504 if (lex_match_id ("FILE") || token == T_STRING)
1512 else if (lex_match_id ("TYPE"))
1516 if (lex_match_id ("COMM"))
1518 else if (lex_match_id ("TAPE"))
1522 lex_error (_("expecting COMM or TAPE"));
1528 if (!lex_match ('/') && token != '.')
1534 discard_variables ();
1536 pgm->reader = pfm_open_reader (fh, &dict, NULL);
1537 if (pgm->reader == NULL)
1539 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
1541 start_case_map (dict);
1542 if (!trim_dictionary (dict, OP_READ, NULL))
1544 pgm->map = finish_case_map (dict);
1546 dict_destroy (default_dict);
1547 default_dict = dict;
1549 vfm_source = create_case_source (&import_source_class, pgm);
1554 import_pgm_free (pgm);
1556 dict_destroy (dict);
1560 /* Frees a struct import_pgm. */
1562 import_pgm_free (struct import_pgm *pgm)
1566 pfm_close_reader (pgm->reader);
1567 destroy_case_map (pgm->map);
1568 case_destroy (&pgm->bounce);
1573 /* Clears internal state related to IMPORT input procedure. */
1575 import_source_destroy (struct case_source *source)
1577 struct import_pgm *pgm = source->aux;
1578 import_pgm_free (pgm);
1581 /* Reads all the cases from the data file into C and passes them
1582 to WRITE_CASE one by one, passing WC_DATA. */
1584 import_source_read (struct case_source *source,
1586 write_case_func *write_case, write_case_data wc_data)
1588 struct import_pgm *pgm = source->aux;
1593 if (pgm->map == NULL)
1594 ok = pfm_read_case (pgm->reader, c);
1597 ok = pfm_read_case (pgm->reader, &pgm->bounce);
1599 map_case (pgm->map, &pgm->bounce, c);
1603 ok = write_case (wc_data);
1608 const struct case_source_class import_source_class =
1613 import_source_destroy,
1619 A case map copies data from a case that corresponds for one
1620 dictionary to a case that corresponds to a second dictionary
1621 derived from the first by, optionally, deleting, reordering,
1622 or renaming variables. (No new variables may be created.)
1628 size_t value_cnt; /* Number of values in map. */
1629 int *map; /* For each destination index, the
1630 corresponding source index. */
1633 /* Prepares dictionary D for producing a case map. Afterward,
1634 the caller may delete, reorder, or rename variables within D
1635 at will before using finish_case_map() to produce the case
1638 Uses D's aux members, which may not otherwise be in use. */
1640 start_case_map (struct dictionary *d)
1642 size_t var_cnt = dict_get_var_cnt (d);
1645 for (i = 0; i < var_cnt; i++)
1647 struct variable *v = dict_get_var (d, i);
1648 int *src_fv = xmalloc (sizeof *src_fv);
1650 var_attach_aux (v, src_fv, var_dtor_free);
1654 /* Produces a case map from dictionary D, which must have been
1655 previously prepared with start_case_map().
1657 Does not retain any reference to D, and clears the aux members
1658 set up by start_case_map().
1660 Returns the new case map, or a null pointer if no mapping is
1661 required (that is, no data has changed position). */
1662 static struct case_map *
1663 finish_case_map (struct dictionary *d)
1665 struct case_map *map;
1666 size_t var_cnt = dict_get_var_cnt (d);
1670 map = xmalloc (sizeof *map);
1671 map->value_cnt = dict_get_next_value_idx (d);
1672 map->map = xmalloc (sizeof *map->map * map->value_cnt);
1673 for (i = 0; i < map->value_cnt; i++)
1677 for (i = 0; i < var_cnt; i++)
1679 struct variable *v = dict_get_var (d, i);
1680 int *src_fv = (int *) var_detach_aux (v);
1683 if (v->fv != *src_fv)
1686 for (idx = 0; idx < v->nv; idx++)
1688 int src_idx = *src_fv + idx;
1689 int dst_idx = v->fv + idx;
1691 assert (map->map[dst_idx] == -1);
1692 map->map[dst_idx] = src_idx;
1699 destroy_case_map (map);
1703 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1709 /* Maps from SRC to DST, applying case map MAP. */
1711 map_case (const struct case_map *map,
1712 const struct ccase *src, struct ccase *dst)
1716 assert (map != NULL);
1717 assert (src != NULL);
1718 assert (dst != NULL);
1719 assert (src != dst);
1721 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1723 int src_idx = map->map[dst_idx];
1725 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1729 /* Destroys case map MAP. */
1731 destroy_case_map (struct case_map *map)