1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27 #include "file-handle.h"
35 #include "value-labels.h"
40 #include "debug-print.h"
42 /* GET or IMPORT input program. */
45 struct file_handle *handle; /* File to GET or IMPORT from. */
46 size_t case_size; /* Case size in bytes. */
49 /* XSAVE transformation (and related SAVE, EXPORT procedures). */
53 struct file_handle *f; /* Associated system file. */
54 int nvar; /* Number of variables. */
55 struct variable **var; /* Variables. */
56 flt64 *case_buf; /* Case transfer buffer. */
59 /* Options bits set by trim_dictionary(). */
60 #define GTSV_OPT_COMPRESSED 001 /* Compression; (X)SAVE only. */
61 #define GTSV_OPT_SAVE 002 /* The SAVE/XSAVE/EXPORT procedures. */
62 #define GTSV_OPT_MATCH_FILES 004 /* The MATCH FILES procedure. */
63 #define GTSV_OPT_NONE 0
65 static int trim_dictionary (struct dictionary * dict, int *options);
66 static int save_write_case_func (struct ccase *, void *);
67 static trns_proc_func save_trns_proc;
68 static trns_free_func save_trns_free;
70 /* Parses the GET command. */
74 struct file_handle *handle;
75 struct dictionary *dict;
77 int options = GTSV_OPT_NONE;
82 if (lex_match_id ("FILE"))
85 handle = fh_parse_file_handle ();
89 dict = sfm_read_dictionary (handle, NULL);
93 if (0 == trim_dictionary (dict, &options))
95 fh_close_handle (handle);
99 dict_compact_values (dict);
101 dict_destroy (default_dict);
104 pgm = xmalloc (sizeof *pgm);
105 pgm->handle = handle;
106 pgm->case_size = dict_get_case_size (default_dict);
107 vfm_source = create_case_source (&get_source_class, default_dict, pgm);
112 /* SAVE or XSAVE command? */
119 /* Parses the SAVE and XSAVE commands. */
121 cmd_save_internal (enum save_cmd save_cmd)
123 struct file_handle *handle;
124 struct dictionary *dict;
125 int options = GTSV_OPT_SAVE;
128 struct sfm_write_info inf;
133 if (lex_match_id ("OUTFILE"))
136 handle = fh_parse_file_handle ();
140 dict = dict_clone (default_dict);
141 for (i = 0; i < dict_get_var_cnt (dict); i++)
142 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
143 if (0 == trim_dictionary (dict, &options))
145 fh_close_handle (handle);
149 /* Write dictionary. */
152 inf.compress = !!(options & GTSV_OPT_COMPRESSED);
153 if (!sfm_write_dictionary (&inf))
156 fh_close_handle (handle);
160 /* Fill in transformation structure. */
161 t = xmalloc (sizeof *t);
162 t->h.proc = save_trns_proc;
163 t->h.free = save_trns_free;
165 t->nvar = dict_get_var_cnt (dict);
166 t->var = xmalloc (sizeof *t->var * t->nvar);
167 for (i = 0; i < t->nvar; i++)
168 t->var[i] = dict_get_var (dict, i)->aux;
169 t->case_buf = xmalloc (sizeof *t->case_buf * inf.case_size);
172 if (save_cmd == CMD_SAVE)
174 procedure (save_write_case_func, t);
175 save_trns_free (&t->h);
179 assert (save_cmd == CMD_XSAVE);
180 add_transformation (&t->h);
186 /* Parses and performs the SAVE procedure. */
190 return cmd_save_internal (CMD_SAVE);
193 /* Parses the XSAVE transformation command. */
197 return cmd_save_internal (CMD_XSAVE);
200 /* Writes the given C to the file specified by T. */
202 do_write_case (struct save_trns *t, struct ccase *c)
204 flt64 *p = t->case_buf;
207 for (i = 0; i < t->nvar; i++)
209 struct variable *v = t->var[i];
210 if (v->type == NUMERIC)
212 double src = case_num (c, v->fv);
220 memcpy (p, case_str (c, v->fv), v->width);
221 memset (&((char *) p)[v->width], ' ',
222 REM_RND_UP (v->width, sizeof *p));
223 p += DIV_RND_UP (v->width, sizeof *p);
227 sfm_write_case (t->f, t->case_buf, p - t->case_buf);
230 /* Writes case C to the system file specified on SAVE. */
232 save_write_case_func (struct ccase *c, void *aux UNUSED)
234 do_write_case (aux, c);
238 /* Writes case C to the system file specified on XSAVE. */
240 save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
242 struct save_trns *t = (struct save_trns *) h;
243 do_write_case (t, c);
247 /* Frees a SAVE transformation. */
249 save_trns_free (struct trns_header *pt)
251 struct save_trns *t = (struct save_trns *) pt;
253 fh_close_handle (t->f);
259 static int rename_variables (struct dictionary * dict);
261 /* The GET and SAVE commands have a common structure after the
262 FILE/OUTFILE subcommand. This function parses this structure and
263 returns nonzero on success, zero on failure. It both reads
264 *OPTIONS, for the GTSV_OPT_SAVE bit, and writes it, for the
265 GTSV_OPT_COMPRESSED bit. */
266 /* FIXME: IN, FIRST, LAST, MAP. */
267 /* FIXME? Should we call dict_compact_values() on dict as a
270 trim_dictionary (struct dictionary *dict, int *options)
272 if (get_scompression())
273 *options |= GTSV_OPT_COMPRESSED;
275 if (*options & GTSV_OPT_SAVE)
277 /* Delete all the scratch variables. */
282 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
284 for (i = 0; i < dict_get_var_cnt (dict); i++)
285 if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
286 v[nv++] = dict_get_var (dict, i);
287 dict_delete_vars (dict, v, nv);
291 while ((*options & GTSV_OPT_MATCH_FILES) || lex_match ('/'))
293 if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("COMPRESSED"))
294 *options |= GTSV_OPT_COMPRESSED;
295 else if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("UNCOMPRESSED"))
296 *options &= ~GTSV_OPT_COMPRESSED;
297 else if (lex_match_id ("DROP"))
303 if (!parse_variables (dict, &v, &nv, PV_NONE))
305 dict_delete_vars (dict, v, nv);
308 else if (lex_match_id ("KEEP"))
315 if (!parse_variables (dict, &v, &nv, PV_NONE))
318 /* Move the specified variables to the beginning. */
319 dict_reorder_vars (dict, v, nv);
321 /* Delete the remaining variables. */
322 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
323 for (i = nv; i < dict_get_var_cnt (dict); i++)
324 v[i - nv] = dict_get_var (dict, i);
325 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
328 else if (lex_match_id ("RENAME"))
330 if (!rename_variables (dict))
335 lex_error (_("while expecting a valid subcommand"));
339 if (dict_get_var_cnt (dict) == 0)
341 msg (SE, _("All variables deleted from system file dictionary."));
345 if (*options & GTSV_OPT_MATCH_FILES)
351 lex_error (_("expecting end of command"));
358 /* Parses and performs the RENAME subcommand of GET and SAVE. */
360 rename_variables (struct dictionary * dict)
378 v = parse_dict_variable (dict);
381 if (!lex_force_match ('=')
384 if (!strncmp (tokid, v->name, 8))
386 if (dict_lookup_var (dict, tokid) != NULL)
388 msg (SE, _("Cannot rename %s as %s because there already exists "
389 "a variable named %s. To rename variables with "
390 "overlapping names, use a single RENAME subcommand "
391 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
392 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
396 dict_rename_var (dict, v, tokid);
405 while (lex_match ('('))
409 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
411 if (!lex_match ('='))
413 msg (SE, _("`=' expected after variable list."));
416 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
420 msg (SE, _("Number of variables on left side of `=' (%d) does not "
421 "match number of variables on right side (%d), in "
422 "parenthesized group %d of RENAME subcommand."),
423 nv - old_nv, nn - old_nv, group);
426 if (!lex_force_match (')'))
431 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
433 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
439 for (i = 0; i < nn; i++)
447 /* Clears internal state related to GET input procedure. */
449 get_source_destroy (struct case_source *source)
451 struct get_pgm *pgm = source->aux;
453 /* It is not necessary to destroy the dictionary because if we get
454 to this point then the dictionary is default_dict. */
455 fh_close_handle (pgm->handle);
459 /* Reads all the cases from the data file into C and passes them
460 to WRITE_CASE one by one, passing WC_DATA. */
462 get_source_read (struct case_source *source,
464 write_case_func *write_case, write_case_data wc_data)
466 struct get_pgm *pgm = source->aux;
468 while (sfm_read_case (pgm->handle, c, default_dict)
469 && write_case (wc_data))
473 const struct case_source_class get_source_class =
484 #include "debug-print.h"
489 MTF_FILE, /* Specified on FILE= subcommand. */
490 MTF_TABLE /* Specified on TABLE= subcommand. */
493 /* One of the files on MATCH FILES. */
496 struct mtf_file *next, *prev;
497 /* Next, previous in the list of files. */
498 struct mtf_file *next_min; /* Next in the chain of minimums. */
500 int type; /* One of MTF_*. */
501 struct variable **by; /* List of BY variables for this file. */
502 struct file_handle *handle; /* File handle for the file. */
503 struct dictionary *dict; /* Dictionary from system file. */
504 char in[9]; /* Name of the variable from IN=. */
505 char first[9], last[9]; /* Name of the variables from FIRST=, LAST=. */
506 struct ccase input; /* Input record. */
509 /* MATCH FILES procedure. */
512 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
513 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
515 struct variable **by; /* Variables on the BY subcommand. */
516 size_t by_cnt; /* Number of variables on BY subcommand. */
518 struct dictionary *dict; /* Dictionary of output file. */
519 struct case_sink *sink; /* Sink to receive output. */
520 struct ccase *mtf_case; /* Case used for output. */
522 unsigned seq_num; /* Have we initialized this variable? */
523 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
526 static void mtf_free (struct mtf_proc *);
527 static void mtf_free_file (struct mtf_file *);
528 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
529 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
531 static void mtf_read_nonactive_records (void *);
532 static void mtf_processing_finish (void *);
533 static int mtf_processing (struct ccase *, void *);
535 static char *var_type_description (struct variable *);
537 /* Parse and execute the MATCH FILES command. */
539 cmd_match_files (void)
542 struct mtf_file *first_table = NULL;
546 mtf.head = mtf.tail = NULL;
549 mtf.dict = dict_create ();
554 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
560 if (lex_match (T_BY))
564 msg (SE, _("The BY subcommand may be given once at most."));
570 if (!parse_variables (mtf.dict, &mtf.by, &mtf.by_cnt,
571 PV_NO_DUPLICATE | PV_NO_SCRATCH))
574 else if (token != T_ID)
579 else if (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
581 struct mtf_file *file = xmalloc (sizeof *file);
583 file->in[0] = file->first[0] = file->last[0] = '\0';
586 case_nullify (&file->input);
588 if (lex_match_id ("FILE"))
589 file->type = MTF_FILE;
590 else if (lex_match_id ("TABLE"))
592 file->type = MTF_TABLE;
598 /* FILEs go first, then TABLEs. */
599 if (file->type == MTF_TABLE || first_table == NULL)
602 file->prev = mtf.tail;
604 mtf.tail->next = file;
606 if (mtf.head == NULL)
608 if (file->type == MTF_TABLE && first_table == NULL)
613 assert (file->type == MTF_FILE);
614 file->next = first_table;
615 file->prev = first_table->prev;
616 if (first_table->prev)
617 first_table->prev->next = file;
620 first_table->prev = file;
631 msg (SE, _("The active file may not be specified more "
637 assert (pgm_state != STATE_INPUT);
638 if (pgm_state == STATE_INIT)
640 msg (SE, _("Cannot specify the active file since no active "
641 "file has been defined."));
648 _("MATCH FILES may not be used after TEMPORARY when "
649 "the active file is an input source. "
650 "Temporary transformations will be made permanent."));
656 file->handle = fh_parse_file_handle ();
663 file->dict = sfm_read_dictionary (file->handle, NULL);
666 case_create (&file->input, dict_get_next_value_idx (file->dict));
669 file->dict = default_dict;
670 if (!mtf_merge_dictionary (mtf.dict, file))
673 else if (lex_id_match ("IN", tokid)
674 || lex_id_match ("FIRST", tokid)
675 || lex_id_match ("LAST", tokid))
680 if (mtf.tail == NULL)
682 msg (SE, _("IN, FIRST, and LAST subcommands may not occur "
683 "before the first FILE or TABLE."));
687 if (lex_match_id ("IN"))
692 else if (lex_match_id ("FIRST"))
694 name = mtf.tail->first;
697 else if (lex_match_id ("LAST"))
699 name = mtf.tail->last;
717 msg (SE, _("Multiple %s subcommands for a single FILE or "
722 strcpy (name, tokid);
725 if (!dict_create_var (mtf.dict, name, 0))
727 msg (SE, _("Duplicate variable name %s while creating %s "
733 else if (lex_id_match ("RENAME", tokid)
734 || lex_id_match ("KEEP", tokid)
735 || lex_id_match ("DROP", tokid))
737 int options = GTSV_OPT_MATCH_FILES;
739 if (mtf.tail == NULL)
741 msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur "
742 "before the first FILE or TABLE."));
746 if (!trim_dictionary (mtf.tail->dict, &options))
749 else if (lex_match_id ("MAP"))
759 while (token != '.');
765 msg (SE, _("The BY subcommand is required when a TABLE subcommand "
773 struct mtf_file *iter;
775 for (iter = mtf.head; iter; iter = iter->next)
779 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
781 for (i = 0; i < mtf.by_cnt; i++)
783 iter->by[i] = dict_lookup_var (iter->dict, mtf.by[i]->name);
784 if (iter->by[i] == NULL)
786 msg (SE, _("File %s lacks BY variable %s."),
787 iter->handle ? handle_get_name (iter->handle) : "*",
795 /* MATCH FILES performs an n-way merge on all its input files.
798 1. Read one input record from every input FILE.
800 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
802 3. Find the FILE input record with minimum BY values. Store all
803 the values from this input record into the output record.
805 4. Find all the FILE input records with BY values identical to
806 the minimums. Store all the values from these input records into
809 5. For every TABLE, read another record as long as the BY values
810 on the TABLE's input record are less than the FILEs' BY values.
811 If an exact match is found, store all the values from the TABLE
812 input record into the output record.
814 6. Write the output record.
816 7. Read another record from each input file FILE and TABLE that
817 we stored values from above. If we come to the end of one of the
818 input files, remove it from the list of input files.
820 8. Repeat from step 2.
822 Unfortunately, this algorithm can't be directly implemented
823 because there's no function to read a record from the active
824 file; instead, it has to be done using callbacks.
826 FIXME: For merging large numbers of files (more than 10?) a
827 better algorithm would use a heap for finding minimum
831 discard_variables ();
833 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
835 mtf.seq_nums = xmalloc (dict_get_var_cnt (mtf.dict)
836 * sizeof *mtf.seq_nums);
837 memset (mtf.seq_nums, 0,
838 dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums);
839 mtf.mtf_case = xmalloc (dict_get_case_size (mtf.dict));
841 mtf_read_nonactive_records (NULL);
843 procedure (mtf_processing, NULL);
844 mtf_processing_finish (NULL);
846 dict_destroy (default_dict);
847 default_dict = mtf.dict;
849 vfm_source = mtf.sink->class->make_source (mtf.sink);
850 free_case_sink (mtf.sink);
860 /* Repeats 2...8 an arbitrary number of times. */
862 mtf_processing_finish (void *mtf_)
864 struct mtf_proc *mtf = mtf_;
865 struct mtf_file *iter;
867 /* Find the active file and delete it. */
868 for (iter = mtf->head; iter; iter = iter->next)
869 if (iter->handle == NULL)
871 mtf_delete_file_in_place (mtf, &iter);
875 while (mtf->head && mtf->head->type == MTF_FILE)
876 if (!mtf_processing (NULL, mtf))
880 /* Return a string in a static buffer describing V's variable type and
883 var_type_description (struct variable *v)
885 static char buf[2][32];
892 if (v->type == NUMERIC)
893 strcpy (s, "numeric");
896 assert (v->type == ALPHA);
897 sprintf (s, "string with width %d", v->width);
902 /* Free FILE and associated data. */
904 mtf_free_file (struct mtf_file *file)
906 fh_close_handle (file->handle);
907 if (file->dict != NULL && file->dict != default_dict)
908 dict_destroy (file->dict);
911 case_destroy (&file->input);
915 /* Free all the data for the MATCH FILES procedure. */
917 mtf_free (struct mtf_proc *mtf)
919 struct mtf_file *iter, *next;
921 for (iter = mtf->head; iter; iter = next)
925 mtf_free_file (iter);
930 dict_destroy (mtf->dict);
931 free (mtf->seq_nums);
934 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
935 file in the chain, or to NULL if was the last in the chain. */
937 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
939 struct mtf_file *f = *file;
942 f->prev->next = f->next;
944 f->next->prev = f->prev;
954 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
956 struct variable *v = dict_get_var (f->dict, i);
957 union value *out = case_data_rw (mtf->mtf_case, v->p.mtf.master->fv);
959 if (v->type == NUMERIC)
962 memset (out->s, ' ', v->width);
969 /* Read a record from every input file except the active file. */
971 mtf_read_nonactive_records (void *mtf_ UNUSED)
973 struct mtf_proc *mtf = mtf_;
974 struct mtf_file *iter;
976 for (iter = mtf->head; iter; )
980 if (!sfm_read_case (iter->handle, &iter->input, iter->dict))
981 mtf_delete_file_in_place (mtf, &iter);
990 /* Compare the BY variables for files A and B; return -1 if A < B, 0
991 if A == B, 1 if A > B. */
993 mtf_compare_BY_values (struct mtf_proc *mtf,
994 struct mtf_file *a, struct mtf_file *b,
997 struct ccase *a_input, *b_input;
1000 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1001 a_input = case_is_null (&a->input) ? c : &a->input;
1002 b_input = case_is_null (&b->input) ? c : &b->input;
1003 for (i = 0; i < mtf->by_cnt; i++)
1005 assert (a->by[i]->type == b->by[i]->type);
1006 assert (a->by[i]->width == b->by[i]->width);
1008 if (a->by[i]->type == NUMERIC)
1010 double af = case_num (a_input, a->by[i]->fv);
1011 double bf = case_num (b_input, b->by[i]->fv);
1022 assert (a->by[i]->type == ALPHA);
1023 result = memcmp (case_str (a_input, a->by[i]->fv),
1024 case_str (b_input, b->by[i]->fv),
1028 else if (result > 0)
1035 /* Perform one iteration of steps 3...7 above. */
1037 mtf_processing (struct ccase *c, void *mtf_ UNUSED)
1039 struct mtf_proc *mtf = mtf_;
1040 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1041 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BY values. */
1042 struct mtf_file *iter; /* Iterator. */
1046 /* If the active file doesn't have the minimum BY values, don't
1047 return because that would cause a record to be skipped. */
1050 if (mtf->head->type == MTF_TABLE)
1053 /* 3. Find the FILE input record with minimum BY values. Store
1054 all the values from this input record into the output record.
1056 4. Find all the FILE input records with BY values identical
1057 to the minimums. Store all the values from these input
1058 records into the output record. */
1059 min_head = min_tail = mtf->head;
1060 max_head = max_tail = NULL;
1061 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1063 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1067 max_tail = max_tail->next_min = iter;
1069 max_head = max_tail = iter;
1073 min_tail = min_tail->next_min = iter;
1079 max_tail->next_min = min_head;
1080 max_tail = min_tail;
1084 max_head = min_head;
1085 max_tail = min_tail;
1087 min_head = min_tail = iter;
1094 /* 5. For every TABLE, read another record as long as the BY
1095 values on the TABLE's input record are less than the FILEs'
1096 BY values. If an exact match is found, store all the values
1097 from the TABLE input record into the output record. */
1100 struct mtf_file *next = iter->next;
1102 assert (iter->type == MTF_TABLE);
1104 if (iter->handle == NULL)
1108 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1112 max_tail = max_tail->next_min = iter;
1114 max_head = max_tail = iter;
1118 min_tail = min_tail->next_min = iter;
1122 if (iter->handle == NULL)
1124 if (sfm_read_case (iter->handle, &iter->input, iter->dict))
1126 mtf_delete_file_in_place (mtf, &iter);
1136 /* Next sequence number. */
1139 /* Store data to all the records we are using. */
1141 min_tail->next_min = NULL;
1142 for (iter = min_head; iter; iter = iter->next_min)
1146 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1148 struct variable *v = dict_get_var (iter->dict, i);
1149 struct ccase *record;
1152 if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num)
1154 mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num;
1156 record = case_is_null (&iter->input) ? c : &iter->input;
1158 assert (v->type == NUMERIC || v->type == ALPHA);
1159 out = case_data_rw (mtf->mtf_case, v->p.mtf.master->fv);
1160 if (v->type == NUMERIC)
1161 out->f = case_num (record, v->fv);
1163 memcpy (out->s, case_str (record, v->fv), v->width);
1167 /* Store missing values to all the records we're not using. */
1169 max_tail->next_min = NULL;
1170 for (iter = max_head; iter; iter = iter->next_min)
1174 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1176 struct variable *v = dict_get_var (iter->dict, i);
1179 if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num)
1181 mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num;
1183 out = case_data_rw (mtf->mtf_case, v->p.mtf.master->fv);
1184 if (v->type == NUMERIC)
1187 memset (out->s, ' ', v->width);
1190 if (iter->handle == NULL)
1194 /* 6. Write the output record. */
1195 mtf->sink->class->write (mtf->sink, mtf->mtf_case);
1197 /* 7. Read another record from each input file FILE and TABLE
1198 that we stored values from above. If we come to the end of
1199 one of the input files, remove it from the list of input
1201 for (iter = min_head; iter && iter->type == MTF_FILE; )
1203 struct mtf_file *next = iter->next_min;
1207 if (!sfm_read_case (iter->handle, &iter->input, iter->dict))
1208 mtf_delete_file_in_place (mtf, &iter);
1218 return (mtf->head && mtf->head->type != MTF_TABLE);
1221 /* Merge the dictionary for file F into the master dictionary
1224 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1226 struct dictionary *d = f->dict;
1227 const char *d_docs, *m_docs;
1229 if (dict_get_label (m) == NULL)
1230 dict_set_label (m, dict_get_label (d));
1232 d_docs = dict_get_documents (d);
1233 m_docs = dict_get_documents (m);
1237 dict_set_documents (m, d_docs);
1243 new_len = strlen (m_docs) + strlen (d_docs);
1244 new_docs = xmalloc (new_len + 1);
1245 strcpy (new_docs, m_docs);
1246 strcat (new_docs, d_docs);
1247 dict_set_documents (m, new_docs);
1252 dict_compact_values (d);
1257 for (i = 0; i < dict_get_var_cnt (d); i++)
1259 struct variable *dv = dict_get_var (d, i);
1260 struct variable *mv = dict_lookup_var (m, dv->name);
1262 assert (dv->type == ALPHA || dv->width == 0);
1263 assert (!mv || mv->type == ALPHA || mv->width == 0);
1264 if (mv && dv->width == mv->width)
1266 if (val_labs_count (dv->val_labs)
1267 && !val_labs_count (mv->val_labs))
1268 mv->val_labs = val_labs_copy (dv->val_labs);
1269 if (dv->miss_type != MISSING_NONE
1270 && mv->miss_type == MISSING_NONE)
1271 copy_missing_values (mv, dv);
1273 if (mv && dv->label && !mv->label)
1274 mv->label = xstrdup (dv->label);
1277 mv = dict_clone_var (m, dv, dv->name);
1278 assert (mv != NULL);
1280 else if (mv->width != dv->width)
1282 msg (SE, _("Variable %s in file %s (%s) has different "
1283 "type or width from the same variable in "
1284 "earlier file (%s)."),
1285 dv->name, handle_get_name (f->handle),
1286 var_type_description (dv), var_type_description (mv));
1289 dv->p.mtf.master = mv;
1296 /* IMPORT command. */
1298 /* Parses the IMPORT command. */
1302 struct file_handle *handle = NULL;
1303 struct dictionary *dict;
1304 struct get_pgm *pgm;
1305 int options = GTSV_OPT_NONE;
1312 if (lex_match_id ("FILE") || token == T_STRING)
1316 handle = fh_parse_file_handle ();
1320 else if (lex_match_id ("TYPE"))
1324 if (lex_match_id ("COMM"))
1326 else if (lex_match_id ("TAPE"))
1330 lex_error (_("expecting COMM or TAPE"));
1336 if (!lex_match ('/') && token != '.')
1342 discard_variables ();
1344 dict = pfm_read_dictionary (handle, NULL);
1348 if (0 == trim_dictionary (dict, &options))
1350 fh_close_handle (handle);
1354 dict_compact_values (dict);
1356 dict_destroy (default_dict);
1357 default_dict = dict;
1359 pgm = xmalloc (sizeof *pgm);
1360 pgm->handle = handle;
1361 pgm->case_size = dict_get_case_size (default_dict);
1362 vfm_source = create_case_source (&import_source_class, default_dict, pgm);
1367 /* Reads all the cases from the data file and passes them to
1370 import_source_read (struct case_source *source,
1372 write_case_func *write_case, write_case_data wc_data)
1374 struct get_pgm *pgm = source->aux;
1376 while (pfm_read_case (pgm->handle, c, default_dict))
1377 if (!write_case (wc_data))
1381 const struct case_source_class import_source_class =
1389 static int export_write_case_func (struct ccase *c, void *);
1391 /* Parses the EXPORT command. */
1392 /* FIXME: same as cmd_save_internal(). */
1396 struct file_handle *handle;
1397 struct dictionary *dict;
1398 int options = GTSV_OPT_SAVE;
1400 struct save_trns *t;
1405 if (lex_match_id ("OUTFILE"))
1408 handle = fh_parse_file_handle ();
1412 dict = dict_clone (default_dict);
1413 for (i = 0; i < dict_get_var_cnt (dict); i++)
1414 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
1415 if (0 == trim_dictionary (dict, &options))
1417 fh_close_handle (handle);
1421 /* Write dictionary. */
1422 if (!pfm_write_dictionary (handle, dict))
1424 dict_destroy (dict);
1425 fh_close_handle (handle);
1429 /* Fill in transformation structure. */
1430 t = xmalloc (sizeof *t);
1431 t->h.proc = save_trns_proc;
1432 t->h.free = save_trns_free;
1434 t->nvar = dict_get_var_cnt (dict);
1435 t->var = xmalloc (sizeof *t->var * t->nvar);
1436 for (i = 0; i < t->nvar; i++)
1437 t->var[i] = dict_get_var (dict, i)->aux;
1438 t->case_buf = xmalloc (sizeof *t->case_buf * t->nvar);
1439 dict_destroy (dict);
1441 procedure (export_write_case_func, t);
1442 save_trns_free (&t->h);
1447 /* Writes case C to the EXPORT file. */
1449 export_write_case_func (struct ccase *c, void *aux)
1451 struct save_trns *t = aux;
1452 union value *p = (union value *) t->case_buf;
1455 for (i = 0; i < t->nvar; i++)
1457 struct variable *v = t->var[i];
1459 if (v->type == NUMERIC)
1460 (*p++).f = case_num (c, v->fv);
1462 (*p++).c = (char *) case_str (c, v->fv);
1465 pfm_write_case (t->f, (union value *) t->case_buf);