1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26 #include "file-handle.h"
34 #include "value-labels.h"
39 #include "debug-print.h"
41 /* GET or IMPORT input program. */
44 struct file_handle *handle; /* File to GET or IMPORT from. */
45 size_t case_size; /* Case size in bytes. */
48 /* XSAVE transformation (and related SAVE, EXPORT procedures). */
52 struct file_handle *f; /* Associated system file. */
53 int nvar; /* Number of variables. */
54 struct variable **var; /* Variables. */
55 flt64 *case_buf; /* Case transfer buffer. */
58 /* Options bits set by trim_dictionary(). */
59 #define GTSV_OPT_COMPRESSED 001 /* Compression; (X)SAVE only. */
60 #define GTSV_OPT_SAVE 002 /* The SAVE/XSAVE/EXPORT procedures. */
61 #define GTSV_OPT_MATCH_FILES 004 /* The MATCH FILES procedure. */
62 #define GTSV_OPT_NONE 0
64 static int trim_dictionary (struct dictionary * dict, int *options);
65 static int save_write_case_func (struct ccase *, void *);
66 static trns_proc_func save_trns_proc;
67 static trns_free_func save_trns_free;
70 void dump_dict_variables (struct dictionary *);
73 /* Parses the GET command. */
77 struct file_handle *handle;
78 struct dictionary *dict;
80 int options = GTSV_OPT_NONE;
85 if (lex_match_id ("FILE"))
88 handle = fh_parse_file_handle ();
92 dict = sfm_read_dictionary (handle, NULL);
97 dump_dict_variables (dict);
99 if (0 == trim_dictionary (dict, &options))
101 fh_close_handle (handle);
105 dump_dict_variables (dict);
108 dict_compact_values (dict);
111 printf (_("GET translation table from file to memory:\n"));
112 for (i = 0; i < dict->nvar; i++)
114 struct variable *v = dict->var[i];
116 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
117 v->get.fv, v->get.nv, v->fv, v->nv);
121 dict_destroy (default_dict);
124 pgm = xmalloc (sizeof *pgm);
125 pgm->handle = handle;
126 pgm->case_size = dict_get_case_size (default_dict);
127 vfm_source = create_case_source (&get_source_class, default_dict, pgm);
132 /* SAVE or XSAVE command? */
139 /* Parses the SAVE and XSAVE commands. */
141 cmd_save_internal (enum save_cmd save_cmd)
143 struct file_handle *handle;
144 struct dictionary *dict;
145 int options = GTSV_OPT_SAVE;
148 struct sfm_write_info inf;
153 if (lex_match_id ("OUTFILE"))
156 handle = fh_parse_file_handle ();
160 dict = dict_clone (default_dict);
162 dump_dict_variables (dict);
164 for (i = 0; i < dict_get_var_cnt (dict); i++)
165 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
166 if (0 == trim_dictionary (dict, &options))
168 fh_close_handle (handle);
173 dump_dict_variables (dict);
176 /* Write dictionary. */
179 inf.compress = !!(options & GTSV_OPT_COMPRESSED);
180 if (!sfm_write_dictionary (&inf))
183 fh_close_handle (handle);
187 /* Fill in transformation structure. */
188 t = xmalloc (sizeof *t);
189 t->h.proc = save_trns_proc;
190 t->h.free = save_trns_free;
192 t->nvar = dict_get_var_cnt (dict);
193 t->var = xmalloc (sizeof *t->var * t->nvar);
194 for (i = 0; i < t->nvar; i++)
195 t->var[i] = dict_get_var (dict, i)->aux;
196 t->case_buf = xmalloc (sizeof *t->case_buf * inf.case_size);
199 if (save_cmd == CMD_SAVE)
201 procedure (save_write_case_func, t);
202 save_trns_free (&t->h);
206 assert (save_cmd == CMD_XSAVE);
207 add_transformation (&t->h);
213 /* Parses and performs the SAVE procedure. */
217 return cmd_save_internal (CMD_SAVE);
220 /* Parses the XSAVE transformation command. */
224 return cmd_save_internal (CMD_XSAVE);
227 /* Writes the given C to the file specified by T. */
229 do_write_case (struct save_trns *t, struct ccase *c)
231 flt64 *p = t->case_buf;
234 for (i = 0; i < t->nvar; i++)
236 struct variable *v = t->var[i];
237 if (v->type == NUMERIC)
239 double src = c->data[v->fv].f;
247 memcpy (p, c->data[v->fv].s, v->width);
248 memset (&((char *) p)[v->width], ' ',
249 REM_RND_UP (v->width, sizeof *p));
250 p += DIV_RND_UP (v->width, sizeof *p);
254 sfm_write_case (t->f, t->case_buf, p - t->case_buf);
257 /* Writes case C to the system file specified on SAVE. */
259 save_write_case_func (struct ccase *c, void *aux UNUSED)
261 do_write_case (aux, c);
265 /* Writes case C to the system file specified on XSAVE. */
267 save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
269 struct save_trns *t = (struct save_trns *) h;
270 do_write_case (t, c);
274 /* Frees a SAVE transformation. */
276 save_trns_free (struct trns_header *pt)
278 struct save_trns *t = (struct save_trns *) pt;
280 fh_close_handle (t->f);
286 static int rename_variables (struct dictionary * dict);
288 /* The GET and SAVE commands have a common structure after the
289 FILE/OUTFILE subcommand. This function parses this structure and
290 returns nonzero on success, zero on failure. It both reads
291 *OPTIONS, for the GTSV_OPT_SAVE bit, and writes it, for the
292 GTSV_OPT_COMPRESSED bit. */
293 /* FIXME: IN, FIRST, LAST, MAP. */
294 /* FIXME? Should we call dict_compact_values() on dict as a
297 trim_dictionary (struct dictionary *dict, int *options)
299 if (get_scompression())
300 *options |= GTSV_OPT_COMPRESSED;
302 if (*options & GTSV_OPT_SAVE)
304 /* Delete all the scratch variables. */
309 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
311 for (i = 0; i < dict_get_var_cnt (dict); i++)
312 if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
313 v[nv++] = dict_get_var (dict, i);
314 dict_delete_vars (dict, v, nv);
318 while ((*options & GTSV_OPT_MATCH_FILES) || lex_match ('/'))
320 if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("COMPRESSED"))
321 *options |= GTSV_OPT_COMPRESSED;
322 else if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("UNCOMPRESSED"))
323 *options &= ~GTSV_OPT_COMPRESSED;
324 else if (lex_match_id ("DROP"))
330 if (!parse_variables (dict, &v, &nv, PV_NONE))
332 dict_delete_vars (dict, v, nv);
335 else if (lex_match_id ("KEEP"))
342 if (!parse_variables (dict, &v, &nv, PV_NONE))
345 /* Move the specified variables to the beginning. */
346 dict_reorder_vars (dict, v, nv);
348 /* Delete the remaining variables. */
349 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
350 for (i = nv; i < dict_get_var_cnt (dict); i++)
351 v[i - nv] = dict_get_var (dict, i);
352 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
355 else if (lex_match_id ("RENAME"))
357 if (!rename_variables (dict))
362 lex_error (_("while expecting a valid subcommand"));
366 if (dict_get_var_cnt (dict) == 0)
368 msg (SE, _("All variables deleted from system file dictionary."));
372 if (*options & GTSV_OPT_MATCH_FILES)
378 lex_error (_("expecting end of command"));
385 /* Parses and performs the RENAME subcommand of GET and SAVE. */
387 rename_variables (struct dictionary * dict)
405 v = parse_dict_variable (dict);
408 if (!lex_force_match ('=')
411 if (!strncmp (tokid, v->name, 8))
413 if (dict_lookup_var (dict, tokid) != NULL)
415 msg (SE, _("Cannot rename %s as %s because there already exists "
416 "a variable named %s. To rename variables with "
417 "overlapping names, use a single RENAME subcommand "
418 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
419 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
423 dict_rename_var (dict, v, tokid);
432 while (lex_match ('('))
436 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
438 if (!lex_match ('='))
440 msg (SE, _("`=' expected after variable list."));
443 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
447 msg (SE, _("Number of variables on left side of `=' (%d) does not "
448 "match number of variables on right side (%d), in "
449 "parenthesized group %d of RENAME subcommand."),
450 nv - old_nv, nn - old_nv, group);
453 if (!lex_force_match (')'))
458 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
460 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
466 for (i = 0; i < nn; i++)
476 dump_dict_variables (struct dictionary * dict)
480 printf (_("\nVariables in dictionary:\n"));
481 for (i = 0; i < dict->nvar; i++)
482 printf ("%s, ", dict->var[i]->name);
487 /* Clears internal state related to GET input procedure. */
489 get_source_destroy (struct case_source *source)
491 struct get_pgm *pgm = source->aux;
493 /* It is not necessary to destroy the dictionary because if we get
494 to this point then the dictionary is default_dict. */
495 fh_close_handle (pgm->handle);
499 /* Reads all the cases from the data file into C and passes them
500 to WRITE_CASE one by one, passing WC_DATA. */
502 get_source_read (struct case_source *source,
504 write_case_func *write_case, write_case_data wc_data)
506 struct get_pgm *pgm = source->aux;
508 while (sfm_read_case (pgm->handle, c->data, default_dict)
509 && write_case (wc_data))
513 const struct case_source_class get_source_class =
524 #include "debug-print.h"
529 MTF_FILE, /* Specified on FILE= subcommand. */
530 MTF_TABLE /* Specified on TABLE= subcommand. */
533 /* One of the files on MATCH FILES. */
536 struct mtf_file *next, *prev;
537 /* Next, previous in the list of files. */
538 struct mtf_file *next_min; /* Next in the chain of minimums. */
540 int type; /* One of MTF_*. */
541 struct variable **by; /* List of BY variables for this file. */
542 struct file_handle *handle; /* File handle for the file. */
543 struct dictionary *dict; /* Dictionary from system file. */
544 char in[9]; /* Name of the variable from IN=. */
545 char first[9], last[9]; /* Name of the variables from FIRST=, LAST=. */
546 union value *input; /* Input record. */
549 /* MATCH FILES procedure. */
552 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
553 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
555 struct variable **by; /* Variables on the BY subcommand. */
556 size_t by_cnt; /* Number of variables on BY subcommand. */
558 struct dictionary *dict; /* Dictionary of output file. */
559 struct case_sink *sink; /* Sink to receive output. */
560 struct ccase *mtf_case; /* Case used for output. */
562 unsigned seq_num; /* Have we initialized this variable? */
563 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
566 static void mtf_free (struct mtf_proc *);
567 static void mtf_free_file (struct mtf_file *);
568 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
569 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
571 static void mtf_read_nonactive_records (void *);
572 static void mtf_processing_finish (void *);
573 static int mtf_processing (struct ccase *, void *);
575 static char *var_type_description (struct variable *);
577 /* Parse and execute the MATCH FILES command. */
579 cmd_match_files (void)
582 struct mtf_file *first_table = NULL;
586 mtf.head = mtf.tail = NULL;
589 mtf.dict = dict_create ();
594 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
600 if (lex_match (T_BY))
604 msg (SE, _("The BY subcommand may be given once at most."));
610 if (!parse_variables (mtf.dict, &mtf.by, &mtf.by_cnt,
611 PV_NO_DUPLICATE | PV_NO_SCRATCH))
614 else if (token != T_ID)
619 else if (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
621 struct mtf_file *file = xmalloc (sizeof *file);
623 file->in[0] = file->first[0] = file->last[0] = '\0';
628 if (lex_match_id ("FILE"))
629 file->type = MTF_FILE;
630 else if (lex_match_id ("TABLE"))
632 file->type = MTF_TABLE;
638 /* FILEs go first, then TABLEs. */
639 if (file->type == MTF_TABLE || first_table == NULL)
642 file->prev = mtf.tail;
644 mtf.tail->next = file;
646 if (mtf.head == NULL)
648 if (file->type == MTF_TABLE && first_table == NULL)
653 assert (file->type == MTF_FILE);
654 file->next = first_table;
655 file->prev = first_table->prev;
656 if (first_table->prev)
657 first_table->prev->next = file;
660 first_table->prev = file;
671 msg (SE, _("The active file may not be specified more "
677 assert (pgm_state != STATE_INPUT);
678 if (pgm_state == STATE_INIT)
680 msg (SE, _("Cannot specify the active file since no active "
681 "file has been defined."));
688 _("MATCH FILES may not be used after TEMPORARY when "
689 "the active file is an input source. "
690 "Temporary transformations will be made permanent."));
696 file->handle = fh_parse_file_handle ();
703 file->dict = sfm_read_dictionary (file->handle, NULL);
708 file->dict = default_dict;
709 if (!mtf_merge_dictionary (mtf.dict, file))
712 else if (lex_id_match ("IN", tokid)
713 || lex_id_match ("FIRST", tokid)
714 || lex_id_match ("LAST", tokid))
719 if (mtf.tail == NULL)
721 msg (SE, _("IN, FIRST, and LAST subcommands may not occur "
722 "before the first FILE or TABLE."));
726 if (lex_match_id ("IN"))
731 else if (lex_match_id ("FIRST"))
733 name = mtf.tail->first;
736 else if (lex_match_id ("LAST"))
738 name = mtf.tail->last;
753 msg (SE, _("Multiple %s subcommands for a single FILE or "
758 strcpy (name, tokid);
761 if (!dict_create_var (mtf.dict, name, 0))
763 msg (SE, _("Duplicate variable name %s while creating %s "
769 else if (lex_id_match ("RENAME", tokid)
770 || lex_id_match ("KEEP", tokid)
771 || lex_id_match ("DROP", tokid))
773 int options = GTSV_OPT_MATCH_FILES;
775 if (mtf.tail == NULL)
777 msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur "
778 "before the first FILE or TABLE."));
782 if (!trim_dictionary (mtf.tail->dict, &options))
785 else if (lex_match_id ("MAP"))
795 while (token != '.');
801 msg (SE, _("The BY subcommand is required when a TABLE subcommand "
809 struct mtf_file *iter;
811 for (iter = mtf.head; iter; iter = iter->next)
815 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
817 for (i = 0; i < mtf.by_cnt; i++)
819 iter->by[i] = dict_lookup_var (iter->dict, mtf.by[i]->name);
820 if (iter->by[i] == NULL)
822 msg (SE, _("File %s lacks BY variable %s."),
823 iter->handle ? handle_get_name (iter->handle) : "*",
833 /* From sfm-read.c. */
834 extern void dump_dictionary (struct dictionary *);
836 dump_dictionary (mtf.dict);
840 /* MATCH FILES performs an n-way merge on all its input files.
843 1. Read one input record from every input FILE.
845 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
847 3. Find the FILE input record with minimum BY values. Store all
848 the values from this input record into the output record.
850 4. Find all the FILE input records with BY values identical to
851 the minimums. Store all the values from these input records into
854 5. For every TABLE, read another record as long as the BY values
855 on the TABLE's input record are less than the FILEs' BY values.
856 If an exact match is found, store all the values from the TABLE
857 input record into the output record.
859 6. Write the output record.
861 7. Read another record from each input file FILE and TABLE that
862 we stored values from above. If we come to the end of one of the
863 input files, remove it from the list of input files.
865 8. Repeat from step 2.
867 Unfortunately, this algorithm can't be directly implemented
868 because there's no function to read a record from the active
869 file; instead, it has to be done using callbacks.
871 FIXME: For merging large numbers of files (more than 10?) a
872 better algorithm would use a heap for finding minimum
876 discard_variables ();
878 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
880 mtf.seq_nums = xmalloc (dict_get_var_cnt (mtf.dict)
881 * sizeof *mtf.seq_nums);
882 memset (mtf.seq_nums, 0,
883 dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums);
884 mtf.mtf_case = xmalloc (dict_get_case_size (mtf.dict));
886 mtf_read_nonactive_records (NULL);
888 procedure (mtf_processing, NULL);
889 mtf_processing_finish (NULL);
891 dict_destroy (default_dict);
892 default_dict = mtf.dict;
894 vfm_source = mtf.sink->class->make_source (mtf.sink);
895 free_case_sink (mtf.sink);
905 /* Repeats 2...8 an arbitrary number of times. */
907 mtf_processing_finish (void *mtf_)
909 struct mtf_proc *mtf = mtf_;
910 struct mtf_file *iter;
912 /* Find the active file and delete it. */
913 for (iter = mtf->head; iter; iter = iter->next)
914 if (iter->handle == NULL)
916 mtf_delete_file_in_place (mtf, &iter);
920 while (mtf->head && mtf->head->type == MTF_FILE)
921 if (!mtf_processing (NULL, mtf))
925 /* Return a string in a static buffer describing V's variable type and
928 var_type_description (struct variable *v)
930 static char buf[2][32];
937 if (v->type == NUMERIC)
938 strcpy (s, "numeric");
941 assert (v->type == ALPHA);
942 sprintf (s, "string with width %d", v->width);
947 /* Free FILE and associated data. */
949 mtf_free_file (struct mtf_file *file)
951 fh_close_handle (file->handle);
952 if (file->dict != NULL && file->dict != default_dict)
953 dict_destroy (file->dict);
960 /* Free all the data for the MATCH FILES procedure. */
962 mtf_free (struct mtf_proc *mtf)
964 struct mtf_file *iter, *next;
966 for (iter = mtf->head; iter; iter = next)
970 mtf_free_file (iter);
975 dict_destroy (mtf->dict);
976 free (mtf->seq_nums);
979 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
980 file in the chain, or to NULL if was the last in the chain. */
982 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
984 struct mtf_file *f = *file;
987 f->prev->next = f->next;
989 f->next->prev = f->prev;
999 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1001 struct variable *v = dict_get_var (f->dict, i);
1003 if (v->type == NUMERIC)
1004 mtf->mtf_case->data[v->p.mtf.master->fv].f = SYSMIS;
1006 memset (mtf->mtf_case->data[v->p.mtf.master->fv].s, ' ', v->width);
1013 /* Read a record from every input file except the active file. */
1015 mtf_read_nonactive_records (void *mtf_ UNUSED)
1017 struct mtf_proc *mtf = mtf_;
1018 struct mtf_file *iter;
1020 for (iter = mtf->head; iter; )
1024 assert (iter->input == NULL);
1025 iter->input = xmalloc (dict_get_case_size (iter->dict));
1027 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1028 mtf_delete_file_in_place (mtf, &iter);
1037 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1038 if A == B, 1 if A > B. */
1040 mtf_compare_BY_values (struct mtf_proc *mtf,
1041 struct mtf_file *a, struct mtf_file *b,
1044 union value *a_input, *b_input;
1047 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1048 a_input = a->input != NULL ? a->input : c->data;
1049 b_input = b->input != NULL ? b->input : c->data;
1050 for (i = 0; i < mtf->by_cnt; i++)
1052 assert (a->by[i]->type == b->by[i]->type);
1053 assert (a->by[i]->width == b->by[i]->width);
1055 if (a->by[i]->type == NUMERIC)
1057 double af = a_input[a->by[i]->fv].f;
1058 double bf = b_input[b->by[i]->fv].f;
1069 assert (a->by[i]->type == ALPHA);
1070 result = memcmp (a_input[a->by[i]->fv].s,
1071 b_input[b->by[i]->fv].s,
1075 else if (result > 0)
1082 /* Perform one iteration of steps 3...7 above. */
1084 mtf_processing (struct ccase *c, void *mtf_ UNUSED)
1086 struct mtf_proc *mtf = mtf_;
1087 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1088 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BY values. */
1089 struct mtf_file *iter; /* Iterator. */
1093 /* If the active file doesn't have the minimum BY values, don't
1094 return because that would cause a record to be skipped. */
1097 if (mtf->head->type == MTF_TABLE)
1100 /* 3. Find the FILE input record with minimum BY values. Store
1101 all the values from this input record into the output record.
1103 4. Find all the FILE input records with BY values identical
1104 to the minimums. Store all the values from these input
1105 records into the output record. */
1106 min_head = min_tail = mtf->head;
1107 max_head = max_tail = NULL;
1108 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1110 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1114 max_tail = max_tail->next_min = iter;
1116 max_head = max_tail = iter;
1120 min_tail = min_tail->next_min = iter;
1126 max_tail->next_min = min_head;
1127 max_tail = min_tail;
1131 max_head = min_head;
1132 max_tail = min_tail;
1134 min_head = min_tail = iter;
1141 /* 5. For every TABLE, read another record as long as the BY
1142 values on the TABLE's input record are less than the FILEs'
1143 BY values. If an exact match is found, store all the values
1144 from the TABLE input record into the output record. */
1147 struct mtf_file *next = iter->next;
1149 assert (iter->type == MTF_TABLE);
1151 if (iter->handle == NULL)
1155 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1159 max_tail = max_tail->next_min = iter;
1161 max_head = max_tail = iter;
1165 min_tail = min_tail->next_min = iter;
1169 if (iter->handle == NULL)
1171 if (sfm_read_case (iter->handle, iter->input, iter->dict))
1173 mtf_delete_file_in_place (mtf, &iter);
1183 /* Next sequence number. */
1186 /* Store data to all the records we are using. */
1188 min_tail->next_min = NULL;
1189 for (iter = min_head; iter; iter = iter->next_min)
1193 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1195 struct variable *v = dict_get_var (iter->dict, i);
1196 union value *record;
1198 if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num)
1200 mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num;
1202 record = iter->input != NULL ? iter->input : c->data;
1204 assert (v->type == NUMERIC || v->type == ALPHA);
1205 if (v->type == NUMERIC)
1206 mtf->mtf_case->data[v->p.mtf.master->fv].f = record[v->fv].f;
1208 memcpy (mtf->mtf_case->data[v->p.mtf.master->fv].s,
1209 record[v->fv].s, v->width);
1213 /* Store missing values to all the records we're not using. */
1215 max_tail->next_min = NULL;
1216 for (iter = max_head; iter; iter = iter->next_min)
1220 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1222 struct variable *v = dict_get_var (iter->dict, i);
1224 if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num)
1226 mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num;
1229 printf ("%s/%s: dest-fv=%d\n",
1230 fh_handle_name (iter->handle),
1232 v->p.mtf.master->fv);
1234 if (v->type == NUMERIC)
1235 mtf->mtf_case->data[v->p.mtf.master->fv].f = SYSMIS;
1237 memset (mtf->mtf_case->data[v->p.mtf.master->fv].s, ' ',
1241 if (iter->handle == NULL)
1245 /* 6. Write the output record. */
1246 mtf->sink->class->write (mtf->sink, mtf->mtf_case);
1248 /* 7. Read another record from each input file FILE and TABLE
1249 that we stored values from above. If we come to the end of
1250 one of the input files, remove it from the list of input
1252 for (iter = min_head; iter && iter->type == MTF_FILE; )
1254 struct mtf_file *next = iter->next_min;
1258 assert (iter->input != NULL);
1260 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1261 mtf_delete_file_in_place (mtf, &iter);
1271 return (mtf->head && mtf->head->type != MTF_TABLE);
1274 /* Merge the dictionary for file F into the master dictionary
1277 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1279 struct dictionary *d = f->dict;
1280 const char *d_docs, *m_docs;
1282 if (dict_get_label (m) == NULL)
1283 dict_set_label (m, dict_get_label (d));
1285 d_docs = dict_get_documents (d);
1286 m_docs = dict_get_documents (m);
1290 dict_set_documents (m, d_docs);
1296 new_len = strlen (m_docs) + strlen (d_docs);
1297 new_docs = xmalloc (new_len + 1);
1298 strcpy (new_docs, m_docs);
1299 strcat (new_docs, d_docs);
1300 dict_set_documents (m, new_docs);
1305 dict_compact_values (d);
1310 for (i = 0; i < dict_get_var_cnt (d); i++)
1312 struct variable *dv = dict_get_var (d, i);
1313 struct variable *mv = dict_lookup_var (m, dv->name);
1315 assert (dv->type == ALPHA || dv->width == 0);
1316 assert (!mv || mv->type == ALPHA || mv->width == 0);
1317 if (mv && dv->width == mv->width)
1319 if (val_labs_count (dv->val_labs)
1320 && !val_labs_count (mv->val_labs))
1321 mv->val_labs = val_labs_copy (dv->val_labs);
1322 if (dv->miss_type != MISSING_NONE
1323 && mv->miss_type == MISSING_NONE)
1324 copy_missing_values (mv, dv);
1326 if (mv && dv->label && !mv->label)
1327 mv->label = xstrdup (dv->label);
1330 mv = dict_clone_var (m, dv, dv->name);
1331 assert (mv != NULL);
1333 else if (mv->width != dv->width)
1335 msg (SE, _("Variable %s in file %s (%s) has different "
1336 "type or width from the same variable in "
1337 "earlier file (%s)."),
1338 dv->name, handle_get_name (f->handle),
1339 var_type_description (dv), var_type_description (mv));
1342 dv->p.mtf.master = mv;
1349 /* IMPORT command. */
1351 /* Parses the IMPORT command. */
1355 struct file_handle *handle = NULL;
1356 struct dictionary *dict;
1357 struct get_pgm *pgm;
1358 int options = GTSV_OPT_NONE;
1365 if (lex_match_id ("FILE") || token == T_STRING)
1369 handle = fh_parse_file_handle ();
1373 else if (lex_match_id ("TYPE"))
1377 if (lex_match_id ("COMM"))
1379 else if (lex_match_id ("TAPE"))
1383 lex_error (_("expecting COMM or TAPE"));
1389 if (!lex_match ('/') && token != '.')
1395 discard_variables ();
1397 dict = pfm_read_dictionary (handle, NULL);
1402 dump_dict_variables (dict);
1404 if (0 == trim_dictionary (dict, &options))
1406 fh_close_handle (handle);
1410 dump_dict_variables (dict);
1413 dict_compact_values (dict);
1416 printf (_("IMPORT translation table from file to memory:\n"));
1417 for (i = 0; i < dict->nvar; i++)
1419 struct variable *v = dict->var[i];
1421 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
1422 v->get.fv, v->get.nv, v->fv, v->nv);
1426 dict_destroy (default_dict);
1427 default_dict = dict;
1429 pgm = xmalloc (sizeof *pgm);
1430 pgm->handle = handle;
1431 pgm->case_size = dict_get_case_size (default_dict);
1432 vfm_source = create_case_source (&import_source_class, default_dict, pgm);
1437 /* Reads all the cases from the data file and passes them to
1440 import_source_read (struct case_source *source,
1442 write_case_func *write_case, write_case_data wc_data)
1444 struct get_pgm *pgm = source->aux;
1446 while (pfm_read_case (pgm->handle, c->data, default_dict))
1447 if (!write_case (wc_data))
1451 const struct case_source_class import_source_class =
1459 static int export_write_case_func (struct ccase *c, void *);
1461 /* Parses the EXPORT command. */
1462 /* FIXME: same as cmd_save_internal(). */
1466 struct file_handle *handle;
1467 struct dictionary *dict;
1468 int options = GTSV_OPT_SAVE;
1470 struct save_trns *t;
1475 if (lex_match_id ("OUTFILE"))
1478 handle = fh_parse_file_handle ();
1482 dict = dict_clone (default_dict);
1484 dump_dict_variables (dict);
1486 for (i = 0; i < dict_get_var_cnt (dict); i++)
1487 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
1488 if (0 == trim_dictionary (dict, &options))
1490 fh_close_handle (handle);
1495 dump_dict_variables (dict);
1498 /* Write dictionary. */
1499 if (!pfm_write_dictionary (handle, dict))
1501 dict_destroy (dict);
1502 fh_close_handle (handle);
1506 /* Fill in transformation structure. */
1507 t = xmalloc (sizeof *t);
1508 t->h.proc = save_trns_proc;
1509 t->h.free = save_trns_free;
1511 t->nvar = dict_get_var_cnt (dict);
1512 t->var = xmalloc (sizeof *t->var * t->nvar);
1513 for (i = 0; i < t->nvar; i++)
1514 t->var[i] = dict_get_var (dict, i)->aux;
1515 t->case_buf = xmalloc (sizeof *t->case_buf * t->nvar);
1516 dict_destroy (dict);
1518 procedure (export_write_case_func, t);
1519 save_trns_free (&t->h);
1524 /* Writes case C to the EXPORT file. */
1526 export_write_case_func (struct ccase *c, void *aux)
1528 struct save_trns *t = aux;
1529 union value *p = (union value *) t->case_buf;
1532 for (i = 0; i < t->nvar; i++)
1534 struct variable *v = t->var[i];
1536 if (v->type == NUMERIC)
1537 *p++ = c->data[v->fv];
1539 (*p++).c = c->data[v->fv].s;
1542 pfm_write_case (t->f, (union value *) t->case_buf);