1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26 #include "dictionary.h"
28 #include "file-handle.h"
33 #include "pfm-write.h"
36 #include "sfm-write.h"
38 #include "value-labels.h"
43 #include "debug-print.h"
45 /* Rearranging and reducing a dictionary. */
46 static void start_case_map (struct dictionary *);
47 static struct case_map *finish_case_map (struct dictionary *);
48 static void map_case (const struct case_map *,
49 const struct ccase *, struct ccase *);
50 static void destroy_case_map (struct case_map *);
55 OP_READ, /* GET or IMPORT. */
56 OP_SAVE, /* SAVE or XSAVE. */
57 OP_EXPORT, /* EXPORT. */
58 OP_MATCH /* MATCH FILES. */
61 static int trim_dictionary (struct dictionary *,
62 enum operation, int *compress);
64 /* GET input program. */
67 struct sfm_reader *reader; /* System file reader. */
68 struct case_map *map; /* Map from system file to active file dict. */
69 struct ccase bounce; /* Bounce buffer. */
72 static void get_pgm_free (struct get_pgm *);
74 /* Parses the GET command. */
78 struct get_pgm *pgm = NULL;
79 struct file_handle *fh;
80 struct dictionary *dict = NULL;
82 pgm = xmalloc (sizeof *pgm);
85 case_nullify (&pgm->bounce);
90 if (lex_match_id ("FILE"))
96 pgm->reader = sfm_open_reader (fh, &dict, NULL);
97 if (pgm->reader == NULL)
99 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
101 start_case_map (dict);
102 if (!trim_dictionary (dict, OP_READ, NULL))
104 pgm->map = finish_case_map (dict);
106 dict_destroy (default_dict);
109 vfm_source = create_case_source (&get_source_class, default_dict, pgm);
120 /* Frees a struct get_pgm. */
122 get_pgm_free (struct get_pgm *pgm)
126 sfm_close_reader (pgm->reader);
127 destroy_case_map (pgm->map);
128 case_destroy (&pgm->bounce);
133 /* Clears internal state related to GET input procedure. */
135 get_source_destroy (struct case_source *source)
137 struct get_pgm *pgm = source->aux;
141 /* Reads all the cases from the data file into C and passes them
142 to WRITE_CASE one by one, passing WC_DATA. */
144 get_source_read (struct case_source *source,
146 write_case_func *write_case, write_case_data wc_data)
148 struct get_pgm *pgm = source->aux;
153 if (pgm->map == NULL)
154 ok = sfm_read_case (pgm->reader, c);
157 ok = sfm_read_case (pgm->reader, &pgm->bounce);
159 map_case (pgm->map, &pgm->bounce, c);
163 ok = write_case (wc_data);
168 const struct case_source_class get_source_class =
176 /* XSAVE transformation and SAVE procedure. */
179 struct trns_header h;
180 struct sfm_writer *writer; /* System file writer. */
181 struct case_map *map; /* Map from active file to system file dict. */
182 struct ccase bounce; /* Bounce buffer. */
185 static int save_write_case_func (struct ccase *, void *);
186 static trns_proc_func save_trns_proc;
187 static trns_free_func save_trns_free;
189 /* Parses the SAVE or XSAVE command
190 and returns the parsed transformation. */
191 static struct save_trns *
192 cmd_save_internal (void)
194 struct file_handle *fh;
195 struct dictionary *dict = NULL;
196 struct save_trns *t = NULL;
197 int compress = get_scompression ();
199 t = xmalloc (sizeof *t);
200 t->h.proc = save_trns_proc;
201 t->h.free = save_trns_free;
204 case_nullify (&t->bounce);
207 if (lex_match_id ("OUTFILE"))
213 dict = dict_clone (default_dict);
214 start_case_map (dict);
215 if (!trim_dictionary (dict, OP_SAVE, &compress))
217 t->map = finish_case_map (dict);
219 case_create (&t->bounce, dict_get_next_value_idx (dict));
221 t->writer = sfm_open_writer (fh, dict, compress);
222 if (t->writer == NULL)
232 save_trns_free (&t->h);
236 /* Parses and performs the SAVE procedure. */
240 struct save_trns *t = cmd_save_internal ();
243 procedure (save_write_case_func, t);
244 save_trns_free (&t->h);
252 /* Parses the XSAVE transformation command. */
256 struct save_trns *t = cmd_save_internal ();
259 add_transformation (&t->h);
266 /* Writes the given C to the file specified by T. */
268 do_write_case (struct save_trns *t, struct ccase *c)
271 sfm_write_case (t->writer, c);
274 map_case (t->map, c, &t->bounce);
275 sfm_write_case (t->writer, &t->bounce);
279 /* Writes case C to the system file specified on SAVE. */
281 save_write_case_func (struct ccase *c, void *aux UNUSED)
283 do_write_case (aux, c);
287 /* Writes case C to the system file specified on XSAVE. */
289 save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
291 struct save_trns *t = (struct save_trns *) h;
292 do_write_case (t, c);
296 /* Frees a SAVE transformation. */
298 save_trns_free (struct trns_header *t_)
300 struct save_trns *t = (struct save_trns *) t_;
304 sfm_close_writer (t->writer);
305 destroy_case_map (t->map);
306 case_destroy (&t->bounce);
310 static int rename_variables (struct dictionary *dict);
312 /* Commands that read and write system files share a great deal
313 of common syntactic structure for rearranging and dropping
314 variables. This function parses this syntax and modifies DICT
317 OP is the operation being performed. For operations that
318 write a system file, *COMPRESS is set to 1 if the system file
319 should be compressed, 0 otherwise.
321 Returns nonzero on success, zero on failure. */
322 /* FIXME: IN, FIRST, LAST, MAP. */
324 trim_dictionary (struct dictionary *dict, enum operation op, int *compress)
326 assert ((compress != NULL) == (op == OP_SAVE));
327 if (get_scompression())
330 if (op == OP_SAVE || op == OP_EXPORT)
332 /* Delete all the scratch variables. */
337 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
339 for (i = 0; i < dict_get_var_cnt (dict); i++)
340 if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
341 v[nv++] = dict_get_var (dict, i);
342 dict_delete_vars (dict, v, nv);
346 while (op == OP_MATCH || lex_match ('/'))
348 if (op == OP_SAVE && lex_match_id ("COMPRESSED"))
350 else if (op == OP_SAVE && lex_match_id ("UNCOMPRESSED"))
352 else if (lex_match_id ("DROP"))
358 if (!parse_variables (dict, &v, &nv, PV_NONE))
360 dict_delete_vars (dict, v, nv);
363 else if (lex_match_id ("KEEP"))
370 if (!parse_variables (dict, &v, &nv, PV_NONE))
373 /* Move the specified variables to the beginning. */
374 dict_reorder_vars (dict, v, nv);
376 /* Delete the remaining variables. */
377 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
378 for (i = nv; i < dict_get_var_cnt (dict); i++)
379 v[i - nv] = dict_get_var (dict, i);
380 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
383 else if (lex_match_id ("RENAME"))
385 if (!rename_variables (dict))
390 lex_error (_("while expecting a valid subcommand"));
394 if (dict_get_var_cnt (dict) == 0)
396 msg (SE, _("All variables deleted from system file dictionary."));
406 lex_error (_("expecting end of command"));
412 dict_compact_values (dict);
416 /* Parses and performs the RENAME subcommand of GET and SAVE. */
418 rename_variables (struct dictionary *dict)
436 v = parse_dict_variable (dict);
439 if (!lex_force_match ('=')
442 if (!strncmp (tokid, v->name, 8))
444 if (dict_lookup_var (dict, tokid) != NULL)
446 msg (SE, _("Cannot rename %s as %s because there already exists "
447 "a variable named %s. To rename variables with "
448 "overlapping names, use a single RENAME subcommand "
449 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
450 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
454 dict_rename_var (dict, v, tokid);
463 while (lex_match ('('))
467 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
469 if (!lex_match ('='))
471 msg (SE, _("`=' expected after variable list."));
474 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
478 msg (SE, _("Number of variables on left side of `=' (%d) does not "
479 "match number of variables on right side (%d), in "
480 "parenthesized group %d of RENAME subcommand."),
481 nv - old_nv, nn - old_nv, group);
484 if (!lex_force_match (')'))
489 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
491 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
497 for (i = 0; i < nn; i++)
505 /* EXPORT procedure. */
508 struct pfm_writer *writer; /* System file writer. */
509 struct case_map *map; /* Map from active file to system file dict. */
510 struct ccase bounce; /* Bounce buffer. */
513 static int export_write_case_func (struct ccase *, void *);
514 static void export_proc_free (struct export_proc *);
516 /* Parses the EXPORT command. */
517 /* FIXME: same as cmd_save_internal(). */
521 struct file_handle *fh;
522 struct dictionary *dict;
523 struct export_proc *proc;
525 proc = xmalloc (sizeof *proc);
528 case_nullify (&proc->bounce);
531 if (lex_match_id ("OUTFILE"))
537 dict = dict_clone (default_dict);
538 start_case_map (dict);
539 if (!trim_dictionary (dict, OP_EXPORT, NULL))
541 proc->map = finish_case_map (dict);
542 if (proc->map != NULL)
543 case_create (&proc->bounce, dict_get_next_value_idx (dict));
545 proc->writer = pfm_open_writer (fh, dict);
546 if (proc->writer == NULL)
551 procedure (export_write_case_func, proc);
552 export_proc_free (proc);
559 export_proc_free (proc);
564 /* Writes case C to the EXPORT file. */
566 export_write_case_func (struct ccase *c, void *aux)
568 struct export_proc *proc = aux;
569 if (proc->map == NULL)
570 pfm_write_case (proc->writer, c);
573 map_case (proc->map, c, &proc->bounce);
574 pfm_write_case (proc->writer, &proc->bounce);
580 export_proc_free (struct export_proc *proc)
584 pfm_close_writer (proc->writer);
585 destroy_case_map (proc->map);
586 case_destroy (&proc->bounce);
592 #include "debug-print.h"
597 MTF_FILE, /* Specified on FILE= subcommand. */
598 MTF_TABLE /* Specified on TABLE= subcommand. */
601 /* One of the files on MATCH FILES. */
604 struct mtf_file *next, *prev;
605 /* Next, previous in the list of files. */
606 struct mtf_file *next_min; /* Next in the chain of minimums. */
608 int type; /* One of MTF_*. */
609 struct variable **by; /* List of BY variables for this file. */
610 struct file_handle *handle; /* File handle. */
611 struct sfm_reader *reader; /* System file reader. */
612 struct dictionary *dict; /* Dictionary from system file. */
613 char in[9]; /* Name of the variable from IN=. */
614 char first[9], last[9]; /* Name of the variables from FIRST=, LAST=. */
615 struct ccase input; /* Input record. */
618 /* MATCH FILES procedure. */
621 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
622 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
624 struct variable **by; /* Variables on the BY subcommand. */
625 size_t by_cnt; /* Number of variables on BY subcommand. */
627 struct dictionary *dict; /* Dictionary of output file. */
628 struct case_sink *sink; /* Sink to receive output. */
629 struct ccase *mtf_case; /* Case used for output. */
631 unsigned seq_num; /* Have we initialized this variable? */
632 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
635 static void mtf_free (struct mtf_proc *);
636 static void mtf_free_file (struct mtf_file *);
637 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
638 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
640 static void mtf_read_nonactive_records (void *);
641 static void mtf_processing_finish (void *);
642 static int mtf_processing (struct ccase *, void *);
644 static char *var_type_description (struct variable *);
646 static void set_master (struct variable *, struct variable *master);
647 static struct variable *get_master (struct variable *);
649 /* Parse and execute the MATCH FILES command. */
651 cmd_match_files (void)
654 struct mtf_file *first_table = NULL;
655 struct mtf_file *iter;
659 mtf.head = mtf.tail = NULL;
662 mtf.dict = dict_create ();
667 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
673 if (lex_match (T_BY))
677 msg (SE, _("The BY subcommand may be given once at most."));
683 if (!parse_variables (mtf.dict, &mtf.by, &mtf.by_cnt,
684 PV_NO_DUPLICATE | PV_NO_SCRATCH))
687 else if (token != T_ID)
692 else if (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
694 struct mtf_file *file = xmalloc (sizeof *file);
696 if (lex_match_id ("FILE"))
697 file->type = MTF_FILE;
698 else if (lex_match_id ("TABLE"))
700 file->type = MTF_TABLE;
711 file->first[0] = '\0';
712 file->last[0] = '\0';
713 case_nullify (&file->input);
715 /* FILEs go first, then TABLEs. */
716 if (file->type == MTF_TABLE || first_table == NULL)
719 file->prev = mtf.tail;
721 mtf.tail->next = file;
723 if (mtf.head == NULL)
725 if (file->type == MTF_TABLE && first_table == NULL)
730 assert (file->type == MTF_FILE);
731 file->next = first_table;
732 file->prev = first_table->prev;
733 if (first_table->prev)
734 first_table->prev->next = file;
737 first_table->prev = file;
749 msg (SE, _("The active file may not be specified more "
755 assert (pgm_state != STATE_INPUT);
756 if (pgm_state == STATE_INIT)
758 msg (SE, _("Cannot specify the active file since no active "
759 "file has been defined."));
766 _("MATCH FILES may not be used after TEMPORARY when "
767 "the active file is an input source. "
768 "Temporary transformations will be made permanent."));
772 file->dict = default_dict;
776 file->handle = fh_parse ();
777 if (file->handle == NULL)
780 file->reader = sfm_open_reader (file->handle, &file->dict, NULL);
781 if (file->reader == NULL)
784 case_create (&file->input, dict_get_next_value_idx (file->dict));
787 else if (lex_id_match ("IN", tokid)
788 || lex_id_match ("FIRST", tokid)
789 || lex_id_match ("LAST", tokid))
794 if (mtf.tail == NULL)
796 msg (SE, _("IN, FIRST, and LAST subcommands may not occur "
797 "before the first FILE or TABLE."));
801 if (lex_match_id ("IN"))
806 else if (lex_match_id ("FIRST"))
808 name = mtf.tail->first;
811 else if (lex_match_id ("LAST"))
813 name = mtf.tail->last;
831 msg (SE, _("Multiple %s subcommands for a single FILE or "
836 strcpy (name, tokid);
839 if (!dict_create_var (mtf.dict, name, 0))
841 msg (SE, _("Duplicate variable name %s while creating %s "
847 else if (lex_id_match ("RENAME", tokid)
848 || lex_id_match ("KEEP", tokid)
849 || lex_id_match ("DROP", tokid))
851 if (mtf.tail == NULL)
853 msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur "
854 "before the first FILE or TABLE."));
858 if (!trim_dictionary (mtf.tail->dict, OP_MATCH, NULL))
861 else if (lex_match_id ("MAP"))
871 while (token != '.');
873 for (iter = mtf.head; iter != NULL; iter = iter->next)
874 mtf_merge_dictionary (mtf.dict, iter);
880 msg (SE, _("The BY subcommand is required when a TABLE subcommand "
888 for (iter = mtf.head; iter != NULL; iter = iter->next)
892 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
894 for (i = 0; i < mtf.by_cnt; i++)
896 iter->by[i] = dict_lookup_var (iter->dict, mtf.by[i]->name);
897 if (iter->by[i] == NULL)
899 msg (SE, _("File %s lacks BY variable %s."),
900 iter->handle ? handle_get_name (iter->handle) : "*",
908 /* MATCH FILES performs an n-way merge on all its input files.
911 1. Read one input record from every input FILE.
913 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
915 3. Find the FILE input record with minimum BY values. Store all
916 the values from this input record into the output record.
918 4. Find all the FILE input records with BY values identical to
919 the minimums. Store all the values from these input records into
922 5. For every TABLE, read another record as long as the BY values
923 on the TABLE's input record are less than the FILEs' BY values.
924 If an exact match is found, store all the values from the TABLE
925 input record into the output record.
927 6. Write the output record.
929 7. Read another record from each input file FILE and TABLE that
930 we stored values from above. If we come to the end of one of the
931 input files, remove it from the list of input files.
933 8. Repeat from step 2.
935 Unfortunately, this algorithm can't be directly implemented
936 because there's no function to read a record from the active
937 file; instead, it has to be done using callbacks.
939 FIXME: For merging large numbers of files (more than 10?) a
940 better algorithm would use a heap for finding minimum
944 discard_variables ();
946 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
948 mtf.seq_nums = xmalloc (dict_get_var_cnt (mtf.dict)
949 * sizeof *mtf.seq_nums);
950 memset (mtf.seq_nums, 0,
951 dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums);
952 mtf.mtf_case = xmalloc (dict_get_case_size (mtf.dict));
954 mtf_read_nonactive_records (NULL);
956 procedure (mtf_processing, NULL);
957 mtf_processing_finish (NULL);
959 dict_destroy (default_dict);
960 default_dict = mtf.dict;
962 vfm_source = mtf.sink->class->make_source (mtf.sink);
963 free_case_sink (mtf.sink);
973 /* Repeats 2...8 an arbitrary number of times. */
975 mtf_processing_finish (void *mtf_)
977 struct mtf_proc *mtf = mtf_;
978 struct mtf_file *iter;
980 /* Find the active file and delete it. */
981 for (iter = mtf->head; iter; iter = iter->next)
982 if (iter->handle == NULL)
984 mtf_delete_file_in_place (mtf, &iter);
988 while (mtf->head && mtf->head->type == MTF_FILE)
989 if (!mtf_processing (NULL, mtf))
993 /* Return a string in a static buffer describing V's variable type and
996 var_type_description (struct variable *v)
998 static char buf[2][32];
1005 if (v->type == NUMERIC)
1006 strcpy (s, "numeric");
1009 assert (v->type == ALPHA);
1010 sprintf (s, "string with width %d", v->width);
1015 /* Free FILE and associated data. */
1017 mtf_free_file (struct mtf_file *file)
1020 sfm_close_reader (file->reader);
1021 if (file->dict != default_dict)
1022 dict_destroy (file->dict);
1023 case_destroy (&file->input);
1027 /* Free all the data for the MATCH FILES procedure. */
1029 mtf_free (struct mtf_proc *mtf)
1031 struct mtf_file *iter, *next;
1033 for (iter = mtf->head; iter; iter = next)
1037 mtf_free_file (iter);
1042 dict_destroy (mtf->dict);
1043 free (mtf->seq_nums);
1046 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1047 file in the chain, or to NULL if was the last in the chain. */
1049 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1051 struct mtf_file *f = *file;
1054 f->prev->next = f->next;
1056 f->next->prev = f->prev;
1058 mtf->head = f->next;
1060 mtf->tail = f->prev;
1066 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1068 struct variable *v = dict_get_var (f->dict, i);
1069 union value *out = case_data_rw (mtf->mtf_case, get_master (v)->fv);
1071 if (v->type == NUMERIC)
1074 memset (out->s, ' ', v->width);
1081 /* Read a record from every input file except the active file. */
1083 mtf_read_nonactive_records (void *mtf_ UNUSED)
1085 struct mtf_proc *mtf = mtf_;
1086 struct mtf_file *iter;
1088 for (iter = mtf->head; iter; )
1092 if (!sfm_read_case (iter->reader, &iter->input))
1093 mtf_delete_file_in_place (mtf, &iter);
1102 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1103 if A == B, 1 if A > B. */
1105 mtf_compare_BY_values (struct mtf_proc *mtf,
1106 struct mtf_file *a, struct mtf_file *b,
1109 struct ccase *a_input, *b_input;
1112 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1113 a_input = case_is_null (&a->input) ? c : &a->input;
1114 b_input = case_is_null (&b->input) ? c : &b->input;
1115 for (i = 0; i < mtf->by_cnt; i++)
1117 assert (a->by[i]->type == b->by[i]->type);
1118 assert (a->by[i]->width == b->by[i]->width);
1120 if (a->by[i]->type == NUMERIC)
1122 double af = case_num (a_input, a->by[i]->fv);
1123 double bf = case_num (b_input, b->by[i]->fv);
1134 assert (a->by[i]->type == ALPHA);
1135 result = memcmp (case_str (a_input, a->by[i]->fv),
1136 case_str (b_input, b->by[i]->fv),
1140 else if (result > 0)
1147 /* Perform one iteration of steps 3...7 above. */
1149 mtf_processing (struct ccase *c, void *mtf_ UNUSED)
1151 struct mtf_proc *mtf = mtf_;
1152 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1153 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BY values. */
1154 struct mtf_file *iter; /* Iterator. */
1158 /* If the active file doesn't have the minimum BY values, don't
1159 return because that would cause a record to be skipped. */
1162 if (mtf->head->type == MTF_TABLE)
1165 /* 3. Find the FILE input record with minimum BY values. Store
1166 all the values from this input record into the output record.
1168 4. Find all the FILE input records with BY values identical
1169 to the minimums. Store all the values from these input
1170 records into the output record. */
1171 min_head = min_tail = mtf->head;
1172 max_head = max_tail = NULL;
1173 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1175 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1179 max_tail = max_tail->next_min = iter;
1181 max_head = max_tail = iter;
1185 min_tail = min_tail->next_min = iter;
1191 max_tail->next_min = min_head;
1192 max_tail = min_tail;
1196 max_head = min_head;
1197 max_tail = min_tail;
1199 min_head = min_tail = iter;
1206 /* 5. For every TABLE, read another record as long as the BY
1207 values on the TABLE's input record are less than the FILEs'
1208 BY values. If an exact match is found, store all the values
1209 from the TABLE input record into the output record. */
1212 struct mtf_file *next = iter->next;
1214 assert (iter->type == MTF_TABLE);
1216 if (iter->handle == NULL)
1220 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1224 max_tail = max_tail->next_min = iter;
1226 max_head = max_tail = iter;
1230 min_tail = min_tail->next_min = iter;
1234 if (iter->handle == NULL)
1236 if (sfm_read_case (iter->reader, &iter->input))
1238 mtf_delete_file_in_place (mtf, &iter);
1248 /* Next sequence number. */
1251 /* Store data to all the records we are using. */
1253 min_tail->next_min = NULL;
1254 for (iter = min_head; iter; iter = iter->next_min)
1258 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1260 struct variable *v = dict_get_var (iter->dict, i);
1261 struct ccase *record;
1264 if (mtf->seq_nums[get_master (v)->index] == mtf->seq_num)
1266 mtf->seq_nums[get_master (v)->index] = mtf->seq_num;
1268 record = case_is_null (&iter->input) ? c : &iter->input;
1270 assert (v->type == NUMERIC || v->type == ALPHA);
1271 out = case_data_rw (mtf->mtf_case, get_master (v)->fv);
1272 if (v->type == NUMERIC)
1273 out->f = case_num (record, v->fv);
1275 memcpy (out->s, case_str (record, v->fv), v->width);
1279 /* Store missing values to all the records we're not using. */
1281 max_tail->next_min = NULL;
1282 for (iter = max_head; iter; iter = iter->next_min)
1286 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1288 struct variable *v = dict_get_var (iter->dict, i);
1291 if (mtf->seq_nums[get_master (v)->index] == mtf->seq_num)
1293 mtf->seq_nums[get_master (v)->index] = mtf->seq_num;
1295 out = case_data_rw (mtf->mtf_case, get_master (v)->fv);
1296 if (v->type == NUMERIC)
1299 memset (out->s, ' ', v->width);
1302 if (iter->handle == NULL)
1306 /* 6. Write the output record. */
1307 mtf->sink->class->write (mtf->sink, mtf->mtf_case);
1309 /* 7. Read another record from each input file FILE and TABLE
1310 that we stored values from above. If we come to the end of
1311 one of the input files, remove it from the list of input
1313 for (iter = min_head; iter && iter->type == MTF_FILE; )
1315 struct mtf_file *next = iter->next_min;
1317 if (iter->reader != NULL)
1319 if (!sfm_read_case (iter->reader, &iter->input))
1320 mtf_delete_file_in_place (mtf, &iter);
1330 return (mtf->head && mtf->head->type != MTF_TABLE);
1333 /* Merge the dictionary for file F into master dictionary M. */
1335 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1337 struct dictionary *d = f->dict;
1338 const char *d_docs, *m_docs;
1340 if (dict_get_label (m) == NULL)
1341 dict_set_label (m, dict_get_label (d));
1343 d_docs = dict_get_documents (d);
1344 m_docs = dict_get_documents (m);
1348 dict_set_documents (m, d_docs);
1354 new_len = strlen (m_docs) + strlen (d_docs);
1355 new_docs = xmalloc (new_len + 1);
1356 strcpy (new_docs, m_docs);
1357 strcat (new_docs, d_docs);
1358 dict_set_documents (m, new_docs);
1363 dict_compact_values (d);
1368 for (i = 0; i < dict_get_var_cnt (d); i++)
1370 struct variable *dv = dict_get_var (d, i);
1371 struct variable *mv = dict_lookup_var (m, dv->name);
1373 assert (dv->type == ALPHA || dv->width == 0);
1374 assert (!mv || mv->type == ALPHA || mv->width == 0);
1375 if (mv && dv->width == mv->width)
1377 if (val_labs_count (dv->val_labs)
1378 && !val_labs_count (mv->val_labs))
1379 mv->val_labs = val_labs_copy (dv->val_labs);
1380 if (dv->miss_type != MISSING_NONE
1381 && mv->miss_type == MISSING_NONE)
1382 copy_missing_values (mv, dv);
1384 if (mv && dv->label && !mv->label)
1385 mv->label = xstrdup (dv->label);
1388 mv = dict_clone_var (m, dv, dv->name);
1389 assert (mv != NULL);
1391 else if (mv->width != dv->width)
1393 msg (SE, _("Variable %s in file %s (%s) has different "
1394 "type or width from the same variable in "
1395 "earlier file (%s)."),
1396 dv->name, handle_get_name (f->handle),
1397 var_type_description (dv), var_type_description (mv));
1400 set_master (dv, mv);
1407 /* Marks V's master variable as MASTER. */
1409 set_master (struct variable *v, struct variable *master)
1411 var_attach_aux (v, master, NULL);
1414 /* Returns the master variable corresponding to V,
1415 as set with set_master(). */
1416 static struct variable *
1417 get_master (struct variable *v)
1419 assert (v->aux != NULL);
1423 /* IMPORT command. */
1425 /* IMPORT input program. */
1428 struct pfm_reader *reader; /* Portable file reader. */
1429 struct case_map *map; /* Map from system file to active file dict. */
1430 struct ccase bounce; /* Bounce buffer. */
1433 static void import_pgm_free (struct import_pgm *);
1435 /* Parses the IMPORT command. */
1439 struct import_pgm *pgm = NULL;
1440 struct file_handle *fh = NULL;
1441 struct dictionary *dict = NULL;
1444 pgm = xmalloc (sizeof *pgm);
1447 case_nullify (&pgm->bounce);
1453 if (lex_match_id ("FILE") || token == T_STRING)
1461 else if (lex_match_id ("TYPE"))
1465 if (lex_match_id ("COMM"))
1467 else if (lex_match_id ("TAPE"))
1471 lex_error (_("expecting COMM or TAPE"));
1477 if (!lex_match ('/') && token != '.')
1483 discard_variables ();
1485 pgm->reader = pfm_open_reader (fh, &dict, NULL);
1486 if (pgm->reader == NULL)
1488 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
1490 start_case_map (dict);
1491 if (!trim_dictionary (dict, OP_READ, NULL))
1493 pgm->map = finish_case_map (dict);
1495 dict_destroy (default_dict);
1496 default_dict = dict;
1498 vfm_source = create_case_source (&import_source_class, default_dict, pgm);
1503 import_pgm_free (pgm);
1505 dict_destroy (dict);
1509 /* Frees a struct import_pgm. */
1511 import_pgm_free (struct import_pgm *pgm)
1515 pfm_close_reader (pgm->reader);
1516 destroy_case_map (pgm->map);
1517 case_destroy (&pgm->bounce);
1522 /* Clears internal state related to IMPORT input procedure. */
1524 import_source_destroy (struct case_source *source)
1526 struct import_pgm *pgm = source->aux;
1527 import_pgm_free (pgm);
1530 /* Reads all the cases from the data file into C and passes them
1531 to WRITE_CASE one by one, passing WC_DATA. */
1533 import_source_read (struct case_source *source,
1535 write_case_func *write_case, write_case_data wc_data)
1537 struct import_pgm *pgm = source->aux;
1542 if (pgm->map == NULL)
1543 ok = pfm_read_case (pgm->reader, c);
1546 ok = pfm_read_case (pgm->reader, &pgm->bounce);
1548 map_case (pgm->map, &pgm->bounce, c);
1552 ok = write_case (wc_data);
1557 const struct case_source_class import_source_class =
1562 import_source_destroy,
1568 A case map copies data from a case that corresponds for one
1569 dictionary to a case that corresponds to a second dictionary
1570 derived from the first by, optionally, deleting, reordering,
1571 or renaming variables. (No new variables may be created.)
1577 size_t value_cnt; /* Number of values in map. */
1578 int *map; /* For each destination index, the
1579 corresponding source index. */
1582 /* Prepares dictionary D for producing a case map. Afterward,
1583 the caller may delete, reorder, or rename variables within D
1584 at will before using finish_case_map() to produce the case
1587 Uses D's aux members, which may not otherwise be in use. */
1589 start_case_map (struct dictionary *d)
1591 size_t var_cnt = dict_get_var_cnt (d);
1594 for (i = 0; i < var_cnt; i++)
1596 struct variable *v = dict_get_var (d, i);
1597 int *src_fv = xmalloc (sizeof *src_fv);
1599 var_attach_aux (v, src_fv, var_dtor_free);
1603 /* Produces a case map from dictionary D, which must have been
1604 previously prepared with start_case_map().
1606 Does not retain any reference to D, and clears the aux members
1607 set up by start_case_map().
1609 Returns the new case map, or a null pointer if no mapping is
1610 required (that is, no data has changed position). */
1611 static struct case_map *
1612 finish_case_map (struct dictionary *d)
1614 struct case_map *map;
1615 size_t var_cnt = dict_get_var_cnt (d);
1619 map = xmalloc (sizeof *map);
1620 map->value_cnt = dict_get_next_value_idx (d);
1621 map->map = xmalloc (sizeof *map->map * map->value_cnt);
1622 for (i = 0; i < map->value_cnt; i++)
1626 for (i = 0; i < var_cnt; i++)
1628 struct variable *v = dict_get_var (d, i);
1629 int *src_fv = (int *) var_detach_aux (v);
1632 if (v->fv != *src_fv)
1635 for (idx = 0; idx < v->nv; idx++)
1637 int src_idx = *src_fv + idx;
1638 int dst_idx = v->fv + idx;
1640 assert (map->map[dst_idx] == -1);
1641 map->map[dst_idx] = src_idx;
1648 destroy_case_map (map);
1652 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1658 /* Maps from SRC to DST, applying case map MAP. */
1660 map_case (const struct case_map *map,
1661 const struct ccase *src, struct ccase *dst)
1665 assert (map != NULL);
1666 assert (src != NULL);
1667 assert (dst != NULL);
1668 assert (src != dst);
1670 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1672 int src_idx = map->map[dst_idx];
1674 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1678 /* Destroys case map MAP. */
1680 destroy_case_map (struct case_map *map)