1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26 #include "dictionary.h"
28 #include "file-handle.h"
33 #include "pfm-write.h"
36 #include "sfm-write.h"
38 #include "value-labels.h"
43 #include "debug-print.h"
45 /* Rearranging and reducing a dictionary. */
46 static void start_case_map (struct dictionary *);
47 static struct case_map *finish_case_map (struct dictionary *);
48 static void map_case (const struct case_map *,
49 const struct ccase *, struct ccase *);
50 static void destroy_case_map (struct case_map *);
55 OP_READ, /* GET or IMPORT. */
56 OP_SAVE, /* SAVE or XSAVE. */
57 OP_EXPORT, /* EXPORT. */
58 OP_MATCH /* MATCH FILES. */
61 static int trim_dictionary (struct dictionary *,
62 enum operation, int *compress);
64 /* GET input program. */
67 struct sfm_reader *reader; /* System file reader. */
68 struct case_map *map; /* Map from system file to active file dict. */
69 struct ccase bounce; /* Bounce buffer. */
72 static void get_pgm_free (struct get_pgm *);
74 /* Parses the GET command. */
78 struct get_pgm *pgm = NULL;
79 struct file_handle *fh;
80 struct dictionary *dict = NULL;
82 pgm = xmalloc (sizeof *pgm);
85 case_nullify (&pgm->bounce);
90 if (lex_match_id ("FILE"))
96 pgm->reader = sfm_open_reader (fh, &dict, NULL);
97 if (pgm->reader == NULL)
99 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
101 start_case_map (dict);
102 if (!trim_dictionary (dict, OP_READ, NULL))
104 pgm->map = finish_case_map (dict);
106 dict_destroy (default_dict);
109 vfm_source = create_case_source (&get_source_class, default_dict, pgm);
120 /* Frees a struct get_pgm. */
122 get_pgm_free (struct get_pgm *pgm)
126 sfm_close_reader (pgm->reader);
127 destroy_case_map (pgm->map);
128 case_destroy (&pgm->bounce);
133 /* Clears internal state related to GET input procedure. */
135 get_source_destroy (struct case_source *source)
137 struct get_pgm *pgm = source->aux;
141 /* Reads all the cases from the data file into C and passes them
142 to WRITE_CASE one by one, passing WC_DATA. */
144 get_source_read (struct case_source *source,
146 write_case_func *write_case, write_case_data wc_data)
148 struct get_pgm *pgm = source->aux;
153 if (pgm->map == NULL)
154 ok = sfm_read_case (pgm->reader, c);
157 ok = sfm_read_case (pgm->reader, &pgm->bounce);
159 map_case (pgm->map, &pgm->bounce, c);
163 ok = write_case (wc_data);
168 const struct case_source_class get_source_class =
176 /* XSAVE transformation and SAVE procedure. */
179 struct trns_header h;
180 struct sfm_writer *writer; /* System file writer. */
181 struct case_map *map; /* Map from active file to system file dict. */
182 struct ccase bounce; /* Bounce buffer. */
185 static int save_write_case_func (struct ccase *, void *);
186 static trns_proc_func save_trns_proc;
187 static trns_free_func save_trns_free;
189 /* Parses the SAVE or XSAVE command
190 and returns the parsed transformation. */
191 static struct save_trns *
192 cmd_save_internal (void)
194 struct file_handle *fh;
195 struct dictionary *dict = NULL;
196 struct save_trns *t = NULL;
197 int compress = get_scompression ();
199 t = xmalloc (sizeof *t);
200 t->h.proc = save_trns_proc;
201 t->h.free = save_trns_free;
204 case_nullify (&t->bounce);
207 if (lex_match_id ("OUTFILE"))
213 dict = dict_clone (default_dict);
214 start_case_map (dict);
215 if (!trim_dictionary (dict, OP_SAVE, &compress))
217 t->map = finish_case_map (dict);
219 case_create (&t->bounce, dict_get_next_value_idx (dict));
221 t->writer = sfm_open_writer (fh, dict, compress);
222 if (t->writer == NULL)
232 save_trns_free (&t->h);
236 /* Parses and performs the SAVE procedure. */
240 struct save_trns *t = cmd_save_internal ();
243 procedure (save_write_case_func, t);
244 save_trns_free (&t->h);
251 /* Parses the XSAVE transformation command. */
255 struct save_trns *t = cmd_save_internal ();
258 add_transformation (&t->h);
265 /* Writes the given C to the file specified by T. */
267 do_write_case (struct save_trns *t, struct ccase *c)
270 sfm_write_case (t->writer, c);
273 map_case (t->map, c, &t->bounce);
274 sfm_write_case (t->writer, &t->bounce);
278 /* Writes case C to the system file specified on SAVE. */
280 save_write_case_func (struct ccase *c, void *aux UNUSED)
282 do_write_case (aux, c);
286 /* Writes case C to the system file specified on XSAVE. */
288 save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
290 struct save_trns *t = (struct save_trns *) h;
291 do_write_case (t, c);
295 /* Frees a SAVE transformation. */
297 save_trns_free (struct trns_header *t_)
299 struct save_trns *t = (struct save_trns *) t_;
303 sfm_close_writer (t->writer);
304 destroy_case_map (t->map);
305 case_destroy (&t->bounce);
309 static int rename_variables (struct dictionary *dict);
311 /* Commands that read and write system files share a great deal
312 of common syntactic structure for rearranging and dropping
313 variables. This function parses this syntax and modifies DICT
316 OP is the operation being performed. For operations that
317 write a system file, *COMPRESS is set to 1 if the system file
318 should be compressed, 0 otherwise.
320 Returns nonzero on success, zero on failure. */
321 /* FIXME: IN, FIRST, LAST, MAP. */
323 trim_dictionary (struct dictionary *dict, enum operation op, int *compress)
325 assert ((compress != NULL) == (op == OP_SAVE));
326 if (get_scompression())
329 if (op == OP_SAVE || op == OP_EXPORT)
331 /* Delete all the scratch variables. */
336 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
338 for (i = 0; i < dict_get_var_cnt (dict); i++)
339 if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
340 v[nv++] = dict_get_var (dict, i);
341 dict_delete_vars (dict, v, nv);
345 while (op == OP_MATCH || lex_match ('/'))
347 if (op == OP_SAVE && lex_match_id ("COMPRESSED"))
349 else if (op == OP_SAVE && lex_match_id ("UNCOMPRESSED"))
351 else if (lex_match_id ("DROP"))
357 if (!parse_variables (dict, &v, &nv, PV_NONE))
359 dict_delete_vars (dict, v, nv);
362 else if (lex_match_id ("KEEP"))
369 if (!parse_variables (dict, &v, &nv, PV_NONE))
372 /* Move the specified variables to the beginning. */
373 dict_reorder_vars (dict, v, nv);
375 /* Delete the remaining variables. */
376 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
377 for (i = nv; i < dict_get_var_cnt (dict); i++)
378 v[i - nv] = dict_get_var (dict, i);
379 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
382 else if (lex_match_id ("RENAME"))
384 if (!rename_variables (dict))
389 lex_error (_("while expecting a valid subcommand"));
393 if (dict_get_var_cnt (dict) == 0)
395 msg (SE, _("All variables deleted from system file dictionary."));
405 lex_error (_("expecting end of command"));
411 dict_compact_values (dict);
415 /* Parses and performs the RENAME subcommand of GET and SAVE. */
417 rename_variables (struct dictionary *dict)
435 v = parse_dict_variable (dict);
438 if (!lex_force_match ('=')
441 if (!strncmp (tokid, v->name, 8))
443 if (dict_lookup_var (dict, tokid) != NULL)
445 msg (SE, _("Cannot rename %s as %s because there already exists "
446 "a variable named %s. To rename variables with "
447 "overlapping names, use a single RENAME subcommand "
448 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
449 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
453 dict_rename_var (dict, v, tokid);
462 while (lex_match ('('))
466 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
468 if (!lex_match ('='))
470 msg (SE, _("`=' expected after variable list."));
473 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
477 msg (SE, _("Number of variables on left side of `=' (%d) does not "
478 "match number of variables on right side (%d), in "
479 "parenthesized group %d of RENAME subcommand."),
480 nv - old_nv, nn - old_nv, group);
483 if (!lex_force_match (')'))
488 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
490 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
496 for (i = 0; i < nn; i++)
504 /* EXPORT procedure. */
507 struct pfm_writer *writer; /* System file writer. */
508 struct case_map *map; /* Map from active file to system file dict. */
509 struct ccase bounce; /* Bounce buffer. */
512 static int export_write_case_func (struct ccase *, void *);
513 static void export_proc_free (struct export_proc *);
515 /* Parses the EXPORT command. */
516 /* FIXME: same as cmd_save_internal(). */
520 struct file_handle *fh;
521 struct dictionary *dict;
522 struct export_proc *proc;
524 proc = xmalloc (sizeof *proc);
527 case_nullify (&proc->bounce);
530 if (lex_match_id ("OUTFILE"))
536 dict = dict_clone (default_dict);
537 start_case_map (dict);
538 if (!trim_dictionary (dict, OP_EXPORT, NULL))
540 proc->map = finish_case_map (dict);
541 if (proc->map != NULL)
542 case_create (&proc->bounce, dict_get_next_value_idx (dict));
544 proc->writer = pfm_open_writer (fh, dict);
545 if (proc->writer == NULL)
550 procedure (export_write_case_func, proc);
551 export_proc_free (proc);
557 export_proc_free (proc);
561 /* Writes case C to the EXPORT file. */
563 export_write_case_func (struct ccase *c, void *aux)
565 struct export_proc *proc = aux;
566 if (proc->map == NULL)
567 pfm_write_case (proc->writer, c);
570 map_case (proc->map, c, &proc->bounce);
571 pfm_write_case (proc->writer, &proc->bounce);
577 export_proc_free (struct export_proc *proc)
581 pfm_close_writer (proc->writer);
582 destroy_case_map (proc->map);
583 case_destroy (&proc->bounce);
589 #include "debug-print.h"
594 MTF_FILE, /* Specified on FILE= subcommand. */
595 MTF_TABLE /* Specified on TABLE= subcommand. */
598 /* One of the files on MATCH FILES. */
601 struct mtf_file *next, *prev;
602 /* Next, previous in the list of files. */
603 struct mtf_file *next_min; /* Next in the chain of minimums. */
605 int type; /* One of MTF_*. */
606 struct variable **by; /* List of BY variables for this file. */
607 struct file_handle *handle; /* File handle. */
608 struct sfm_reader *reader; /* System file reader. */
609 struct dictionary *dict; /* Dictionary from system file. */
610 char in[9]; /* Name of the variable from IN=. */
611 char first[9], last[9]; /* Name of the variables from FIRST=, LAST=. */
612 struct ccase input; /* Input record. */
615 /* MATCH FILES procedure. */
618 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
619 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
621 struct variable **by; /* Variables on the BY subcommand. */
622 size_t by_cnt; /* Number of variables on BY subcommand. */
624 struct dictionary *dict; /* Dictionary of output file. */
625 struct case_sink *sink; /* Sink to receive output. */
626 struct ccase *mtf_case; /* Case used for output. */
628 unsigned seq_num; /* Have we initialized this variable? */
629 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
632 static void mtf_free (struct mtf_proc *);
633 static void mtf_free_file (struct mtf_file *);
634 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
635 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
637 static void mtf_read_nonactive_records (void *);
638 static void mtf_processing_finish (void *);
639 static int mtf_processing (struct ccase *, void *);
641 static char *var_type_description (struct variable *);
643 static void set_master (struct variable *, struct variable *master);
644 static struct variable *get_master (struct variable *);
646 /* Parse and execute the MATCH FILES command. */
648 cmd_match_files (void)
651 struct mtf_file *first_table = NULL;
652 struct mtf_file *iter;
656 mtf.head = mtf.tail = NULL;
659 mtf.dict = dict_create ();
664 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
670 if (lex_match (T_BY))
674 msg (SE, _("The BY subcommand may be given once at most."));
680 if (!parse_variables (mtf.dict, &mtf.by, &mtf.by_cnt,
681 PV_NO_DUPLICATE | PV_NO_SCRATCH))
684 else if (token != T_ID)
689 else if (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
691 struct mtf_file *file = xmalloc (sizeof *file);
693 if (lex_match_id ("FILE"))
694 file->type = MTF_FILE;
695 else if (lex_match_id ("TABLE"))
697 file->type = MTF_TABLE;
708 file->first[0] = '\0';
709 file->last[0] = '\0';
710 case_nullify (&file->input);
712 /* FILEs go first, then TABLEs. */
713 if (file->type == MTF_TABLE || first_table == NULL)
716 file->prev = mtf.tail;
718 mtf.tail->next = file;
720 if (mtf.head == NULL)
722 if (file->type == MTF_TABLE && first_table == NULL)
727 assert (file->type == MTF_FILE);
728 file->next = first_table;
729 file->prev = first_table->prev;
730 if (first_table->prev)
731 first_table->prev->next = file;
734 first_table->prev = file;
746 msg (SE, _("The active file may not be specified more "
752 assert (pgm_state != STATE_INPUT);
753 if (pgm_state == STATE_INIT)
755 msg (SE, _("Cannot specify the active file since no active "
756 "file has been defined."));
763 _("MATCH FILES may not be used after TEMPORARY when "
764 "the active file is an input source. "
765 "Temporary transformations will be made permanent."));
769 file->dict = default_dict;
773 file->handle = fh_parse ();
774 if (file->handle == NULL)
777 file->reader = sfm_open_reader (file->handle, &file->dict, NULL);
778 if (file->reader == NULL)
781 case_create (&file->input, dict_get_next_value_idx (file->dict));
784 else if (lex_id_match ("IN", tokid)
785 || lex_id_match ("FIRST", tokid)
786 || lex_id_match ("LAST", tokid))
791 if (mtf.tail == NULL)
793 msg (SE, _("IN, FIRST, and LAST subcommands may not occur "
794 "before the first FILE or TABLE."));
798 if (lex_match_id ("IN"))
803 else if (lex_match_id ("FIRST"))
805 name = mtf.tail->first;
808 else if (lex_match_id ("LAST"))
810 name = mtf.tail->last;
828 msg (SE, _("Multiple %s subcommands for a single FILE or "
833 strcpy (name, tokid);
836 if (!dict_create_var (mtf.dict, name, 0))
838 msg (SE, _("Duplicate variable name %s while creating %s "
844 else if (lex_id_match ("RENAME", tokid)
845 || lex_id_match ("KEEP", tokid)
846 || lex_id_match ("DROP", tokid))
848 if (mtf.tail == NULL)
850 msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur "
851 "before the first FILE or TABLE."));
855 if (!trim_dictionary (mtf.tail->dict, OP_MATCH, NULL))
858 else if (lex_match_id ("MAP"))
868 while (token != '.');
870 for (iter = mtf.head; iter != NULL; iter = iter->next)
871 mtf_merge_dictionary (mtf.dict, iter);
877 msg (SE, _("The BY subcommand is required when a TABLE subcommand "
885 for (iter = mtf.head; iter != NULL; iter = iter->next)
889 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
891 for (i = 0; i < mtf.by_cnt; i++)
893 iter->by[i] = dict_lookup_var (iter->dict, mtf.by[i]->name);
894 if (iter->by[i] == NULL)
896 msg (SE, _("File %s lacks BY variable %s."),
897 iter->handle ? handle_get_name (iter->handle) : "*",
905 /* MATCH FILES performs an n-way merge on all its input files.
908 1. Read one input record from every input FILE.
910 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
912 3. Find the FILE input record with minimum BY values. Store all
913 the values from this input record into the output record.
915 4. Find all the FILE input records with BY values identical to
916 the minimums. Store all the values from these input records into
919 5. For every TABLE, read another record as long as the BY values
920 on the TABLE's input record are less than the FILEs' BY values.
921 If an exact match is found, store all the values from the TABLE
922 input record into the output record.
924 6. Write the output record.
926 7. Read another record from each input file FILE and TABLE that
927 we stored values from above. If we come to the end of one of the
928 input files, remove it from the list of input files.
930 8. Repeat from step 2.
932 Unfortunately, this algorithm can't be directly implemented
933 because there's no function to read a record from the active
934 file; instead, it has to be done using callbacks.
936 FIXME: For merging large numbers of files (more than 10?) a
937 better algorithm would use a heap for finding minimum
941 discard_variables ();
943 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
945 mtf.seq_nums = xmalloc (dict_get_var_cnt (mtf.dict)
946 * sizeof *mtf.seq_nums);
947 memset (mtf.seq_nums, 0,
948 dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums);
949 mtf.mtf_case = xmalloc (dict_get_case_size (mtf.dict));
951 mtf_read_nonactive_records (NULL);
953 procedure (mtf_processing, NULL);
954 mtf_processing_finish (NULL);
956 dict_destroy (default_dict);
957 default_dict = mtf.dict;
959 vfm_source = mtf.sink->class->make_source (mtf.sink);
960 free_case_sink (mtf.sink);
970 /* Repeats 2...8 an arbitrary number of times. */
972 mtf_processing_finish (void *mtf_)
974 struct mtf_proc *mtf = mtf_;
975 struct mtf_file *iter;
977 /* Find the active file and delete it. */
978 for (iter = mtf->head; iter; iter = iter->next)
979 if (iter->handle == NULL)
981 mtf_delete_file_in_place (mtf, &iter);
985 while (mtf->head && mtf->head->type == MTF_FILE)
986 if (!mtf_processing (NULL, mtf))
990 /* Return a string in a static buffer describing V's variable type and
993 var_type_description (struct variable *v)
995 static char buf[2][32];
1002 if (v->type == NUMERIC)
1003 strcpy (s, "numeric");
1006 assert (v->type == ALPHA);
1007 sprintf (s, "string with width %d", v->width);
1012 /* Free FILE and associated data. */
1014 mtf_free_file (struct mtf_file *file)
1017 sfm_close_reader (file->reader);
1018 if (file->dict != default_dict)
1019 dict_destroy (file->dict);
1020 case_destroy (&file->input);
1024 /* Free all the data for the MATCH FILES procedure. */
1026 mtf_free (struct mtf_proc *mtf)
1028 struct mtf_file *iter, *next;
1030 for (iter = mtf->head; iter; iter = next)
1034 mtf_free_file (iter);
1039 dict_destroy (mtf->dict);
1040 free (mtf->seq_nums);
1043 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1044 file in the chain, or to NULL if was the last in the chain. */
1046 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1048 struct mtf_file *f = *file;
1051 f->prev->next = f->next;
1053 f->next->prev = f->prev;
1055 mtf->head = f->next;
1057 mtf->tail = f->prev;
1063 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1065 struct variable *v = dict_get_var (f->dict, i);
1066 union value *out = case_data_rw (mtf->mtf_case, get_master (v)->fv);
1068 if (v->type == NUMERIC)
1071 memset (out->s, ' ', v->width);
1078 /* Read a record from every input file except the active file. */
1080 mtf_read_nonactive_records (void *mtf_ UNUSED)
1082 struct mtf_proc *mtf = mtf_;
1083 struct mtf_file *iter;
1085 for (iter = mtf->head; iter; )
1089 if (!sfm_read_case (iter->reader, &iter->input))
1090 mtf_delete_file_in_place (mtf, &iter);
1099 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1100 if A == B, 1 if A > B. */
1102 mtf_compare_BY_values (struct mtf_proc *mtf,
1103 struct mtf_file *a, struct mtf_file *b,
1106 struct ccase *a_input, *b_input;
1109 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1110 a_input = case_is_null (&a->input) ? c : &a->input;
1111 b_input = case_is_null (&b->input) ? c : &b->input;
1112 for (i = 0; i < mtf->by_cnt; i++)
1114 assert (a->by[i]->type == b->by[i]->type);
1115 assert (a->by[i]->width == b->by[i]->width);
1117 if (a->by[i]->type == NUMERIC)
1119 double af = case_num (a_input, a->by[i]->fv);
1120 double bf = case_num (b_input, b->by[i]->fv);
1131 assert (a->by[i]->type == ALPHA);
1132 result = memcmp (case_str (a_input, a->by[i]->fv),
1133 case_str (b_input, b->by[i]->fv),
1137 else if (result > 0)
1144 /* Perform one iteration of steps 3...7 above. */
1146 mtf_processing (struct ccase *c, void *mtf_ UNUSED)
1148 struct mtf_proc *mtf = mtf_;
1149 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1150 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BY values. */
1151 struct mtf_file *iter; /* Iterator. */
1155 /* If the active file doesn't have the minimum BY values, don't
1156 return because that would cause a record to be skipped. */
1159 if (mtf->head->type == MTF_TABLE)
1162 /* 3. Find the FILE input record with minimum BY values. Store
1163 all the values from this input record into the output record.
1165 4. Find all the FILE input records with BY values identical
1166 to the minimums. Store all the values from these input
1167 records into the output record. */
1168 min_head = min_tail = mtf->head;
1169 max_head = max_tail = NULL;
1170 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1172 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1176 max_tail = max_tail->next_min = iter;
1178 max_head = max_tail = iter;
1182 min_tail = min_tail->next_min = iter;
1188 max_tail->next_min = min_head;
1189 max_tail = min_tail;
1193 max_head = min_head;
1194 max_tail = min_tail;
1196 min_head = min_tail = iter;
1203 /* 5. For every TABLE, read another record as long as the BY
1204 values on the TABLE's input record are less than the FILEs'
1205 BY values. If an exact match is found, store all the values
1206 from the TABLE input record into the output record. */
1209 struct mtf_file *next = iter->next;
1211 assert (iter->type == MTF_TABLE);
1213 if (iter->handle == NULL)
1217 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1221 max_tail = max_tail->next_min = iter;
1223 max_head = max_tail = iter;
1227 min_tail = min_tail->next_min = iter;
1231 if (iter->handle == NULL)
1233 if (sfm_read_case (iter->reader, &iter->input))
1235 mtf_delete_file_in_place (mtf, &iter);
1245 /* Next sequence number. */
1248 /* Store data to all the records we are using. */
1250 min_tail->next_min = NULL;
1251 for (iter = min_head; iter; iter = iter->next_min)
1255 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1257 struct variable *v = dict_get_var (iter->dict, i);
1258 struct ccase *record;
1261 if (mtf->seq_nums[get_master (v)->index] == mtf->seq_num)
1263 mtf->seq_nums[get_master (v)->index] = mtf->seq_num;
1265 record = case_is_null (&iter->input) ? c : &iter->input;
1267 assert (v->type == NUMERIC || v->type == ALPHA);
1268 out = case_data_rw (mtf->mtf_case, get_master (v)->fv);
1269 if (v->type == NUMERIC)
1270 out->f = case_num (record, v->fv);
1272 memcpy (out->s, case_str (record, v->fv), v->width);
1276 /* Store missing values to all the records we're not using. */
1278 max_tail->next_min = NULL;
1279 for (iter = max_head; iter; iter = iter->next_min)
1283 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1285 struct variable *v = dict_get_var (iter->dict, i);
1288 if (mtf->seq_nums[get_master (v)->index] == mtf->seq_num)
1290 mtf->seq_nums[get_master (v)->index] = mtf->seq_num;
1292 out = case_data_rw (mtf->mtf_case, get_master (v)->fv);
1293 if (v->type == NUMERIC)
1296 memset (out->s, ' ', v->width);
1299 if (iter->handle == NULL)
1303 /* 6. Write the output record. */
1304 mtf->sink->class->write (mtf->sink, mtf->mtf_case);
1306 /* 7. Read another record from each input file FILE and TABLE
1307 that we stored values from above. If we come to the end of
1308 one of the input files, remove it from the list of input
1310 for (iter = min_head; iter && iter->type == MTF_FILE; )
1312 struct mtf_file *next = iter->next_min;
1314 if (iter->reader != NULL)
1316 if (!sfm_read_case (iter->reader, &iter->input))
1317 mtf_delete_file_in_place (mtf, &iter);
1327 return (mtf->head && mtf->head->type != MTF_TABLE);
1330 /* Merge the dictionary for file F into master dictionary M. */
1332 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1334 struct dictionary *d = f->dict;
1335 const char *d_docs, *m_docs;
1337 if (dict_get_label (m) == NULL)
1338 dict_set_label (m, dict_get_label (d));
1340 d_docs = dict_get_documents (d);
1341 m_docs = dict_get_documents (m);
1345 dict_set_documents (m, d_docs);
1351 new_len = strlen (m_docs) + strlen (d_docs);
1352 new_docs = xmalloc (new_len + 1);
1353 strcpy (new_docs, m_docs);
1354 strcat (new_docs, d_docs);
1355 dict_set_documents (m, new_docs);
1360 dict_compact_values (d);
1365 for (i = 0; i < dict_get_var_cnt (d); i++)
1367 struct variable *dv = dict_get_var (d, i);
1368 struct variable *mv = dict_lookup_var (m, dv->name);
1370 assert (dv->type == ALPHA || dv->width == 0);
1371 assert (!mv || mv->type == ALPHA || mv->width == 0);
1372 if (mv && dv->width == mv->width)
1374 if (val_labs_count (dv->val_labs)
1375 && !val_labs_count (mv->val_labs))
1376 mv->val_labs = val_labs_copy (dv->val_labs);
1377 if (dv->miss_type != MISSING_NONE
1378 && mv->miss_type == MISSING_NONE)
1379 copy_missing_values (mv, dv);
1381 if (mv && dv->label && !mv->label)
1382 mv->label = xstrdup (dv->label);
1385 mv = dict_clone_var (m, dv, dv->name);
1386 assert (mv != NULL);
1388 else if (mv->width != dv->width)
1390 msg (SE, _("Variable %s in file %s (%s) has different "
1391 "type or width from the same variable in "
1392 "earlier file (%s)."),
1393 dv->name, handle_get_name (f->handle),
1394 var_type_description (dv), var_type_description (mv));
1397 set_master (dv, mv);
1404 /* Marks V's master variable as MASTER. */
1406 set_master (struct variable *v, struct variable *master)
1408 var_attach_aux (v, master, NULL);
1411 /* Returns the master variable corresponding to V,
1412 as set with set_master(). */
1413 static struct variable *
1414 get_master (struct variable *v)
1416 assert (v->aux != NULL);
1420 /* IMPORT command. */
1422 /* IMPORT input program. */
1425 struct pfm_reader *reader; /* Portable file reader. */
1426 struct case_map *map; /* Map from system file to active file dict. */
1427 struct ccase bounce; /* Bounce buffer. */
1430 static void import_pgm_free (struct import_pgm *);
1432 /* Parses the IMPORT command. */
1436 struct import_pgm *pgm = NULL;
1437 struct file_handle *fh = NULL;
1438 struct dictionary *dict = NULL;
1441 pgm = xmalloc (sizeof *pgm);
1444 case_nullify (&pgm->bounce);
1450 if (lex_match_id ("FILE") || token == T_STRING)
1458 else if (lex_match_id ("TYPE"))
1462 if (lex_match_id ("COMM"))
1464 else if (lex_match_id ("TAPE"))
1468 lex_error (_("expecting COMM or TAPE"));
1474 if (!lex_match ('/') && token != '.')
1480 discard_variables ();
1482 pgm->reader = pfm_open_reader (fh, &dict, NULL);
1483 if (pgm->reader == NULL)
1485 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
1487 start_case_map (dict);
1488 if (!trim_dictionary (dict, OP_READ, NULL))
1490 pgm->map = finish_case_map (dict);
1492 dict_destroy (default_dict);
1493 default_dict = dict;
1495 vfm_source = create_case_source (&import_source_class, default_dict, pgm);
1500 import_pgm_free (pgm);
1502 dict_destroy (dict);
1506 /* Frees a struct import_pgm. */
1508 import_pgm_free (struct import_pgm *pgm)
1512 pfm_close_reader (pgm->reader);
1513 destroy_case_map (pgm->map);
1514 case_destroy (&pgm->bounce);
1519 /* Clears internal state related to IMPORT input procedure. */
1521 import_source_destroy (struct case_source *source)
1523 struct import_pgm *pgm = source->aux;
1524 import_pgm_free (pgm);
1527 /* Reads all the cases from the data file into C and passes them
1528 to WRITE_CASE one by one, passing WC_DATA. */
1530 import_source_read (struct case_source *source,
1532 write_case_func *write_case, write_case_data wc_data)
1534 struct import_pgm *pgm = source->aux;
1539 if (pgm->map == NULL)
1540 ok = pfm_read_case (pgm->reader, c);
1543 ok = pfm_read_case (pgm->reader, &pgm->bounce);
1545 map_case (pgm->map, &pgm->bounce, c);
1549 ok = write_case (wc_data);
1554 const struct case_source_class import_source_class =
1559 import_source_destroy,
1565 A case map copies data from a case that corresponds for one
1566 dictionary to a case that corresponds to a second dictionary
1567 derived from the first by, optionally, deleting, reordering,
1568 or renaming variables. (No new variables may be created.)
1574 size_t value_cnt; /* Number of values in map. */
1575 int *map; /* For each destination index, the
1576 corresponding source index. */
1579 /* Prepares dictionary D for producing a case map. Afterward,
1580 the caller may delete, reorder, or rename variables within D
1581 at will before using finish_case_map() to produce the case
1584 Uses D's aux members, which may not otherwise be in use. */
1586 start_case_map (struct dictionary *d)
1588 size_t var_cnt = dict_get_var_cnt (d);
1591 for (i = 0; i < var_cnt; i++)
1593 struct variable *v = dict_get_var (d, i);
1594 int *src_fv = xmalloc (sizeof *src_fv);
1596 var_attach_aux (v, src_fv, var_dtor_free);
1600 /* Produces a case map from dictionary D, which must have been
1601 previously prepared with start_case_map().
1603 Does not retain any reference to D, and clears the aux members
1604 set up by start_case_map().
1606 Returns the new case map, or a null pointer if no mapping is
1607 required (that is, no data has changed position). */
1608 static struct case_map *
1609 finish_case_map (struct dictionary *d)
1611 struct case_map *map;
1612 size_t var_cnt = dict_get_var_cnt (d);
1616 map = xmalloc (sizeof *map);
1617 map->value_cnt = dict_get_next_value_idx (d);
1618 map->map = xmalloc (sizeof *map->map * map->value_cnt);
1619 for (i = 0; i < map->value_cnt; i++)
1623 for (i = 0; i < var_cnt; i++)
1625 struct variable *v = dict_get_var (d, i);
1626 int src_fv = *(int *) var_detach_aux (v);
1629 if (v->fv != src_fv)
1632 for (idx = 0; idx < v->nv; idx++)
1634 int src_idx = src_fv + idx;
1635 int dst_idx = v->fv + idx;
1637 assert (map->map[dst_idx] == -1);
1638 map->map[dst_idx] = src_idx;
1644 destroy_case_map (map);
1648 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1654 /* Maps from SRC to DST, applying case map MAP. */
1656 map_case (const struct case_map *map,
1657 const struct ccase *src, struct ccase *dst)
1661 assert (map != NULL);
1662 assert (src != NULL);
1663 assert (dst != NULL);
1664 assert (src != dst);
1666 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1668 int src_idx = map->map[dst_idx];
1670 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1674 /* Destroys case map MAP. */
1676 destroy_case_map (struct case_map *map)