1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26 #include "file-handle.h"
34 #include "value-labels.h"
39 #include "debug-print.h"
41 /* GET or IMPORT input program. */
44 struct file_handle *handle; /* File to GET or IMPORT from. */
45 size_t case_size; /* Case size in bytes. */
48 /* XSAVE transformation (and related SAVE, EXPORT procedures). */
52 struct file_handle *f; /* Associated system file. */
53 int nvar; /* Number of variables. */
54 struct variable **var; /* Variables. */
55 flt64 *case_buf; /* Case transfer buffer. */
58 /* Options bits set by trim_dictionary(). */
59 #define GTSV_OPT_COMPRESSED 001 /* Compression; (X)SAVE only. */
60 #define GTSV_OPT_SAVE 002 /* The SAVE/XSAVE/EXPORT procedures. */
61 #define GTSV_OPT_MATCH_FILES 004 /* The MATCH FILES procedure. */
62 #define GTSV_OPT_NONE 0
64 static int trim_dictionary (struct dictionary * dict, int *options);
65 static int save_write_case_func (struct ccase *, void *);
66 static trns_proc_func save_trns_proc;
67 static trns_free_func save_trns_free;
70 void dump_dict_variables (struct dictionary *);
73 /* Parses the GET command. */
77 struct file_handle *handle;
78 struct dictionary *dict;
80 int options = GTSV_OPT_NONE;
85 if (lex_match_id ("FILE"))
88 handle = fh_parse_file_handle ();
92 dict = sfm_read_dictionary (handle, NULL);
97 dump_dict_variables (dict);
99 if (0 == trim_dictionary (dict, &options))
101 fh_close_handle (handle);
105 dump_dict_variables (dict);
108 dict_compact_values (dict);
111 printf (_("GET translation table from file to memory:\n"));
112 for (i = 0; i < dict->nvar; i++)
114 struct variable *v = dict->var[i];
116 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
117 v->get.fv, v->get.nv, v->fv, v->nv);
121 dict_destroy (default_dict);
124 pgm = xmalloc (sizeof *pgm);
125 pgm->handle = handle;
126 pgm->case_size = dict_get_case_size (default_dict);
127 vfm_source = create_case_source (&get_source_class, default_dict, pgm);
132 /* SAVE or XSAVE command? */
139 /* Parses the SAVE and XSAVE commands. */
141 cmd_save_internal (enum save_cmd save_cmd)
143 struct file_handle *handle;
144 struct dictionary *dict;
145 int options = GTSV_OPT_SAVE;
148 struct sfm_write_info inf;
153 if (lex_match_id ("OUTFILE"))
156 handle = fh_parse_file_handle ();
160 dict = dict_clone (default_dict);
162 dump_dict_variables (dict);
164 for (i = 0; i < dict_get_var_cnt (dict); i++)
165 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
166 if (0 == trim_dictionary (dict, &options))
168 fh_close_handle (handle);
173 dump_dict_variables (dict);
176 /* Write dictionary. */
179 inf.compress = !!(options & GTSV_OPT_COMPRESSED);
180 if (!sfm_write_dictionary (&inf))
183 fh_close_handle (handle);
187 /* Fill in transformation structure. */
188 t = xmalloc (sizeof *t);
189 t->h.proc = save_trns_proc;
190 t->h.free = save_trns_free;
192 t->nvar = dict_get_var_cnt (dict);
193 t->var = xmalloc (sizeof *t->var * t->nvar);
194 for (i = 0; i < t->nvar; i++)
195 t->var[i] = dict_get_var (dict, i)->aux;
196 t->case_buf = xmalloc (sizeof *t->case_buf * inf.case_size);
199 if (save_cmd == CMD_SAVE)
201 procedure (save_write_case_func, t);
202 save_trns_free (&t->h);
206 assert (save_cmd == CMD_XSAVE);
207 add_transformation (&t->h);
213 /* Parses and performs the SAVE procedure. */
217 return cmd_save_internal (CMD_SAVE);
220 /* Parses the XSAVE transformation command. */
224 return cmd_save_internal (CMD_XSAVE);
227 /* Writes the given C to the file specified by T. */
229 do_write_case (struct save_trns *t, struct ccase *c)
231 flt64 *p = t->case_buf;
234 for (i = 0; i < t->nvar; i++)
236 struct variable *v = t->var[i];
237 if (v->type == NUMERIC)
239 double src = c->data[v->fv].f;
247 memcpy (p, c->data[v->fv].s, v->width);
248 memset (&((char *) p)[v->width], ' ',
249 REM_RND_UP (v->width, sizeof *p));
250 p += DIV_RND_UP (v->width, sizeof *p);
254 sfm_write_case (t->f, t->case_buf, p - t->case_buf);
257 /* Writes case C to the system file specified on SAVE. */
259 save_write_case_func (struct ccase *c, void *aux UNUSED)
261 do_write_case (aux, c);
265 /* Writes case C to the system file specified on XSAVE. */
267 save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
269 struct save_trns *t = (struct save_trns *) h;
270 do_write_case (t, c);
274 /* Frees a SAVE transformation. */
276 save_trns_free (struct trns_header *pt)
278 struct save_trns *t = (struct save_trns *) pt;
280 fh_close_handle (t->f);
286 static int rename_variables (struct dictionary * dict);
288 /* The GET and SAVE commands have a common structure after the
289 FILE/OUTFILE subcommand. This function parses this structure and
290 returns nonzero on success, zero on failure. It both reads
291 *OPTIONS, for the GTSV_OPT_SAVE bit, and writes it, for the
292 GTSV_OPT_COMPRESSED bit. */
293 /* FIXME: IN, FIRST, LAST, MAP. */
294 /* FIXME? Should we call dict_compact_values() on dict as a
297 trim_dictionary (struct dictionary *dict, int *options)
299 if (get_scompression())
300 *options |= GTSV_OPT_COMPRESSED;
302 if (*options & GTSV_OPT_SAVE)
304 /* Delete all the scratch variables. */
309 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
311 for (i = 0; i < dict_get_var_cnt (dict); i++)
312 if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
313 v[nv++] = dict_get_var (dict, i);
314 dict_delete_vars (dict, v, nv);
318 while ((*options & GTSV_OPT_MATCH_FILES) || lex_match ('/'))
320 if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("COMPRESSED"))
321 *options |= GTSV_OPT_COMPRESSED;
322 else if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("UNCOMPRESSED"))
323 *options &= ~GTSV_OPT_COMPRESSED;
324 else if (lex_match_id ("DROP"))
330 if (!parse_variables (dict, &v, &nv, PV_NONE))
332 dict_delete_vars (dict, v, nv);
335 else if (lex_match_id ("KEEP"))
342 if (!parse_variables (dict, &v, &nv, PV_NONE))
345 /* Move the specified variables to the beginning. */
346 dict_reorder_vars (dict, v, nv);
348 /* Delete the remaining variables. */
349 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
350 for (i = nv; i < dict_get_var_cnt (dict); i++)
351 v[i - nv] = dict_get_var (dict, i);
352 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
355 else if (lex_match_id ("RENAME"))
357 if (!rename_variables (dict))
362 lex_error (_("while expecting a valid subcommand"));
366 if (dict_get_var_cnt (dict) == 0)
368 msg (SE, _("All variables deleted from system file dictionary."));
372 if (*options & GTSV_OPT_MATCH_FILES)
378 lex_error (_("expecting end of command"));
385 /* Parses and performs the RENAME subcommand of GET and SAVE. */
387 rename_variables (struct dictionary * dict)
405 v = parse_dict_variable (dict);
408 if (!lex_force_match ('=')
411 if (!strncmp (tokid, v->name, 8))
413 if (dict_lookup_var (dict, tokid) != NULL)
415 msg (SE, _("Cannot rename %s as %s because there already exists "
416 "a variable named %s. To rename variables with "
417 "overlapping names, use a single RENAME subcommand "
418 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
419 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
423 dict_rename_var (dict, v, tokid);
432 while (lex_match ('('))
436 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
438 if (!lex_match ('='))
440 msg (SE, _("`=' expected after variable list."));
443 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
447 msg (SE, _("Number of variables on left side of `=' (%d) does not "
448 "match number of variables on right side (%d), in "
449 "parenthesized group %d of RENAME subcommand."),
450 nv - old_nv, nn - old_nv, group);
453 if (!lex_force_match (')'))
458 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
460 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
466 for (i = 0; i < nn; i++)
476 dump_dict_variables (struct dictionary * dict)
480 printf (_("\nVariables in dictionary:\n"));
481 for (i = 0; i < dict->nvar; i++)
482 printf ("%s, ", dict->var[i]->name);
487 /* Clears internal state related to GET input procedure. */
489 get_source_destroy (struct case_source *source)
491 struct get_pgm *pgm = source->aux;
493 /* It is not necessary to destroy the dictionary because if we get
494 to this point then the dictionary is default_dict. */
495 fh_close_handle (pgm->handle);
499 /* Reads all the cases from the data file into C and passes them
500 to WRITE_CASE one by one, passing WC_DATA. */
502 get_source_read (struct case_source *source,
504 write_case_func *write_case, write_case_data wc_data)
506 struct get_pgm *pgm = source->aux;
508 while (sfm_read_case (pgm->handle, c->data, default_dict)
509 && write_case (wc_data))
513 const struct case_source_class get_source_class =
524 #include "debug-print.h"
529 MTF_FILE, /* Specified on FILE= subcommand. */
530 MTF_TABLE /* Specified on TABLE= subcommand. */
533 /* One of the files on MATCH FILES. */
536 struct mtf_file *next, *prev;
537 /* Next, previous in the list of files. */
538 struct mtf_file *next_min; /* Next in the chain of minimums. */
540 int type; /* One of MTF_*. */
541 struct variable **by; /* List of BY variables for this file. */
542 struct file_handle *handle; /* File handle for the file. */
543 struct dictionary *dict; /* Dictionary from system file. */
544 char in[9]; /* Name of the variable from IN=. */
545 char first[9], last[9]; /* Name of the variables from FIRST=, LAST=. */
546 union value *input; /* Input record. */
549 /* MATCH FILES procedure. */
552 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
553 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
555 struct variable **by; /* Variables on the BY subcommand. */
556 size_t by_cnt; /* Number of variables on BY subcommand. */
558 struct dictionary *dict; /* Dictionary of output file. */
559 struct case_sink *sink; /* Sink to receive output. */
560 struct ccase *mtf_case; /* Case used for output. */
562 unsigned seq_num; /* Have we initialized this variable? */
563 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
566 static void mtf_free (struct mtf_proc *);
567 static void mtf_free_file (struct mtf_file *);
568 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
569 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
571 static void mtf_read_nonactive_records (void *);
572 static void mtf_processing_finish (void *);
573 static int mtf_processing (struct ccase *, void *);
575 static char *var_type_description (struct variable *);
577 /* Parse and execute the MATCH FILES command. */
579 cmd_match_files (void)
582 struct mtf_file *first_table = NULL;
586 mtf.head = mtf.tail = NULL;
589 mtf.dict = dict_create ();
594 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
600 if (lex_match (T_BY))
604 msg (SE, _("The BY subcommand may be given once at most."));
610 if (!parse_variables (mtf.dict, &mtf.by, &mtf.by_cnt,
611 PV_NO_DUPLICATE | PV_NO_SCRATCH))
614 else if (token != T_ID)
619 else if (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
621 struct mtf_file *file = xmalloc (sizeof *file);
623 file->in[0] = file->first[0] = file->last[0] = '\0';
628 if (lex_match_id ("FILE"))
629 file->type = MTF_FILE;
630 else if (lex_match_id ("TABLE"))
632 file->type = MTF_TABLE;
638 /* FILEs go first, then TABLEs. */
639 if (file->type == MTF_TABLE || first_table == NULL)
642 file->prev = mtf.tail;
644 mtf.tail->next = file;
646 if (mtf.head == NULL)
648 if (file->type == MTF_TABLE && first_table == NULL)
653 assert (file->type == MTF_FILE);
654 file->next = first_table;
655 file->prev = first_table->prev;
656 if (first_table->prev)
657 first_table->prev->next = file;
660 first_table->prev = file;
671 msg (SE, _("The active file may not be specified more "
677 assert (pgm_state != STATE_INPUT);
678 if (pgm_state == STATE_INIT)
680 msg (SE, _("Cannot specify the active file since no active "
681 "file has been defined."));
688 _("MATCH FILES may not be used after TEMPORARY when "
689 "the active file is an input source. "
690 "Temporary transformations will be made permanent."));
696 file->handle = fh_parse_file_handle ();
703 file->dict = sfm_read_dictionary (file->handle, NULL);
708 file->dict = default_dict;
709 if (!mtf_merge_dictionary (mtf.dict, file))
712 else if (lex_id_match ("IN", tokid)
713 || lex_id_match ("FIRST", tokid)
714 || lex_id_match ("LAST", tokid))
719 if (mtf.tail == NULL)
721 msg (SE, _("IN, FIRST, and LAST subcommands may not occur "
722 "before the first FILE or TABLE."));
726 if (lex_match_id ("IN"))
731 else if (lex_match_id ("FIRST"))
733 name = mtf.tail->first;
736 else if (lex_match_id ("LAST"))
738 name = mtf.tail->last;
756 msg (SE, _("Multiple %s subcommands for a single FILE or "
761 strcpy (name, tokid);
764 if (!dict_create_var (mtf.dict, name, 0))
766 msg (SE, _("Duplicate variable name %s while creating %s "
772 else if (lex_id_match ("RENAME", tokid)
773 || lex_id_match ("KEEP", tokid)
774 || lex_id_match ("DROP", tokid))
776 int options = GTSV_OPT_MATCH_FILES;
778 if (mtf.tail == NULL)
780 msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur "
781 "before the first FILE or TABLE."));
785 if (!trim_dictionary (mtf.tail->dict, &options))
788 else if (lex_match_id ("MAP"))
798 while (token != '.');
804 msg (SE, _("The BY subcommand is required when a TABLE subcommand "
812 struct mtf_file *iter;
814 for (iter = mtf.head; iter; iter = iter->next)
818 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
820 for (i = 0; i < mtf.by_cnt; i++)
822 iter->by[i] = dict_lookup_var (iter->dict, mtf.by[i]->name);
823 if (iter->by[i] == NULL)
825 msg (SE, _("File %s lacks BY variable %s."),
826 iter->handle ? handle_get_name (iter->handle) : "*",
836 /* From sfm-read.c. */
837 extern void dump_dictionary (struct dictionary *);
839 dump_dictionary (mtf.dict);
843 /* MATCH FILES performs an n-way merge on all its input files.
846 1. Read one input record from every input FILE.
848 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
850 3. Find the FILE input record with minimum BY values. Store all
851 the values from this input record into the output record.
853 4. Find all the FILE input records with BY values identical to
854 the minimums. Store all the values from these input records into
857 5. For every TABLE, read another record as long as the BY values
858 on the TABLE's input record are less than the FILEs' BY values.
859 If an exact match is found, store all the values from the TABLE
860 input record into the output record.
862 6. Write the output record.
864 7. Read another record from each input file FILE and TABLE that
865 we stored values from above. If we come to the end of one of the
866 input files, remove it from the list of input files.
868 8. Repeat from step 2.
870 Unfortunately, this algorithm can't be directly implemented
871 because there's no function to read a record from the active
872 file; instead, it has to be done using callbacks.
874 FIXME: For merging large numbers of files (more than 10?) a
875 better algorithm would use a heap for finding minimum
879 discard_variables ();
881 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
883 mtf.seq_nums = xmalloc (dict_get_var_cnt (mtf.dict)
884 * sizeof *mtf.seq_nums);
885 memset (mtf.seq_nums, 0,
886 dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums);
887 mtf.mtf_case = xmalloc (dict_get_case_size (mtf.dict));
889 mtf_read_nonactive_records (NULL);
891 procedure (mtf_processing, NULL);
892 mtf_processing_finish (NULL);
894 dict_destroy (default_dict);
895 default_dict = mtf.dict;
897 vfm_source = mtf.sink->class->make_source (mtf.sink);
898 free_case_sink (mtf.sink);
908 /* Repeats 2...8 an arbitrary number of times. */
910 mtf_processing_finish (void *mtf_)
912 struct mtf_proc *mtf = mtf_;
913 struct mtf_file *iter;
915 /* Find the active file and delete it. */
916 for (iter = mtf->head; iter; iter = iter->next)
917 if (iter->handle == NULL)
919 mtf_delete_file_in_place (mtf, &iter);
923 while (mtf->head && mtf->head->type == MTF_FILE)
924 if (!mtf_processing (NULL, mtf))
928 /* Return a string in a static buffer describing V's variable type and
931 var_type_description (struct variable *v)
933 static char buf[2][32];
940 if (v->type == NUMERIC)
941 strcpy (s, "numeric");
944 assert (v->type == ALPHA);
945 sprintf (s, "string with width %d", v->width);
950 /* Free FILE and associated data. */
952 mtf_free_file (struct mtf_file *file)
954 fh_close_handle (file->handle);
955 if (file->dict != NULL && file->dict != default_dict)
956 dict_destroy (file->dict);
963 /* Free all the data for the MATCH FILES procedure. */
965 mtf_free (struct mtf_proc *mtf)
967 struct mtf_file *iter, *next;
969 for (iter = mtf->head; iter; iter = next)
973 mtf_free_file (iter);
978 dict_destroy (mtf->dict);
979 free (mtf->seq_nums);
982 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
983 file in the chain, or to NULL if was the last in the chain. */
985 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
987 struct mtf_file *f = *file;
990 f->prev->next = f->next;
992 f->next->prev = f->prev;
1002 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1004 struct variable *v = dict_get_var (f->dict, i);
1006 if (v->type == NUMERIC)
1007 mtf->mtf_case->data[v->p.mtf.master->fv].f = SYSMIS;
1009 memset (mtf->mtf_case->data[v->p.mtf.master->fv].s, ' ', v->width);
1016 /* Read a record from every input file except the active file. */
1018 mtf_read_nonactive_records (void *mtf_ UNUSED)
1020 struct mtf_proc *mtf = mtf_;
1021 struct mtf_file *iter;
1023 for (iter = mtf->head; iter; )
1027 assert (iter->input == NULL);
1028 iter->input = xmalloc (dict_get_case_size (iter->dict));
1030 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1031 mtf_delete_file_in_place (mtf, &iter);
1040 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1041 if A == B, 1 if A > B. */
1043 mtf_compare_BY_values (struct mtf_proc *mtf,
1044 struct mtf_file *a, struct mtf_file *b,
1047 union value *a_input, *b_input;
1050 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1051 a_input = a->input != NULL ? a->input : c->data;
1052 b_input = b->input != NULL ? b->input : c->data;
1053 for (i = 0; i < mtf->by_cnt; i++)
1055 assert (a->by[i]->type == b->by[i]->type);
1056 assert (a->by[i]->width == b->by[i]->width);
1058 if (a->by[i]->type == NUMERIC)
1060 double af = a_input[a->by[i]->fv].f;
1061 double bf = b_input[b->by[i]->fv].f;
1072 assert (a->by[i]->type == ALPHA);
1073 result = memcmp (a_input[a->by[i]->fv].s,
1074 b_input[b->by[i]->fv].s,
1078 else if (result > 0)
1085 /* Perform one iteration of steps 3...7 above. */
1087 mtf_processing (struct ccase *c, void *mtf_ UNUSED)
1089 struct mtf_proc *mtf = mtf_;
1090 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1091 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BY values. */
1092 struct mtf_file *iter; /* Iterator. */
1096 /* If the active file doesn't have the minimum BY values, don't
1097 return because that would cause a record to be skipped. */
1100 if (mtf->head->type == MTF_TABLE)
1103 /* 3. Find the FILE input record with minimum BY values. Store
1104 all the values from this input record into the output record.
1106 4. Find all the FILE input records with BY values identical
1107 to the minimums. Store all the values from these input
1108 records into the output record. */
1109 min_head = min_tail = mtf->head;
1110 max_head = max_tail = NULL;
1111 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1113 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1117 max_tail = max_tail->next_min = iter;
1119 max_head = max_tail = iter;
1123 min_tail = min_tail->next_min = iter;
1129 max_tail->next_min = min_head;
1130 max_tail = min_tail;
1134 max_head = min_head;
1135 max_tail = min_tail;
1137 min_head = min_tail = iter;
1144 /* 5. For every TABLE, read another record as long as the BY
1145 values on the TABLE's input record are less than the FILEs'
1146 BY values. If an exact match is found, store all the values
1147 from the TABLE input record into the output record. */
1150 struct mtf_file *next = iter->next;
1152 assert (iter->type == MTF_TABLE);
1154 if (iter->handle == NULL)
1158 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1162 max_tail = max_tail->next_min = iter;
1164 max_head = max_tail = iter;
1168 min_tail = min_tail->next_min = iter;
1172 if (iter->handle == NULL)
1174 if (sfm_read_case (iter->handle, iter->input, iter->dict))
1176 mtf_delete_file_in_place (mtf, &iter);
1186 /* Next sequence number. */
1189 /* Store data to all the records we are using. */
1191 min_tail->next_min = NULL;
1192 for (iter = min_head; iter; iter = iter->next_min)
1196 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1198 struct variable *v = dict_get_var (iter->dict, i);
1199 union value *record;
1201 if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num)
1203 mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num;
1205 record = iter->input != NULL ? iter->input : c->data;
1207 assert (v->type == NUMERIC || v->type == ALPHA);
1208 if (v->type == NUMERIC)
1209 mtf->mtf_case->data[v->p.mtf.master->fv].f = record[v->fv].f;
1211 memcpy (mtf->mtf_case->data[v->p.mtf.master->fv].s,
1212 record[v->fv].s, v->width);
1216 /* Store missing values to all the records we're not using. */
1218 max_tail->next_min = NULL;
1219 for (iter = max_head; iter; iter = iter->next_min)
1223 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1225 struct variable *v = dict_get_var (iter->dict, i);
1227 if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num)
1229 mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num;
1232 printf ("%s/%s: dest-fv=%d\n",
1233 fh_handle_name (iter->handle),
1235 v->p.mtf.master->fv);
1237 if (v->type == NUMERIC)
1238 mtf->mtf_case->data[v->p.mtf.master->fv].f = SYSMIS;
1240 memset (mtf->mtf_case->data[v->p.mtf.master->fv].s, ' ',
1244 if (iter->handle == NULL)
1248 /* 6. Write the output record. */
1249 mtf->sink->class->write (mtf->sink, mtf->mtf_case);
1251 /* 7. Read another record from each input file FILE and TABLE
1252 that we stored values from above. If we come to the end of
1253 one of the input files, remove it from the list of input
1255 for (iter = min_head; iter && iter->type == MTF_FILE; )
1257 struct mtf_file *next = iter->next_min;
1261 assert (iter->input != NULL);
1263 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1264 mtf_delete_file_in_place (mtf, &iter);
1274 return (mtf->head && mtf->head->type != MTF_TABLE);
1277 /* Merge the dictionary for file F into the master dictionary
1280 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1282 struct dictionary *d = f->dict;
1283 const char *d_docs, *m_docs;
1285 if (dict_get_label (m) == NULL)
1286 dict_set_label (m, dict_get_label (d));
1288 d_docs = dict_get_documents (d);
1289 m_docs = dict_get_documents (m);
1293 dict_set_documents (m, d_docs);
1299 new_len = strlen (m_docs) + strlen (d_docs);
1300 new_docs = xmalloc (new_len + 1);
1301 strcpy (new_docs, m_docs);
1302 strcat (new_docs, d_docs);
1303 dict_set_documents (m, new_docs);
1308 dict_compact_values (d);
1313 for (i = 0; i < dict_get_var_cnt (d); i++)
1315 struct variable *dv = dict_get_var (d, i);
1316 struct variable *mv = dict_lookup_var (m, dv->name);
1318 assert (dv->type == ALPHA || dv->width == 0);
1319 assert (!mv || mv->type == ALPHA || mv->width == 0);
1320 if (mv && dv->width == mv->width)
1322 if (val_labs_count (dv->val_labs)
1323 && !val_labs_count (mv->val_labs))
1324 mv->val_labs = val_labs_copy (dv->val_labs);
1325 if (dv->miss_type != MISSING_NONE
1326 && mv->miss_type == MISSING_NONE)
1327 copy_missing_values (mv, dv);
1329 if (mv && dv->label && !mv->label)
1330 mv->label = xstrdup (dv->label);
1333 mv = dict_clone_var (m, dv, dv->name);
1334 assert (mv != NULL);
1336 else if (mv->width != dv->width)
1338 msg (SE, _("Variable %s in file %s (%s) has different "
1339 "type or width from the same variable in "
1340 "earlier file (%s)."),
1341 dv->name, handle_get_name (f->handle),
1342 var_type_description (dv), var_type_description (mv));
1345 dv->p.mtf.master = mv;
1352 /* IMPORT command. */
1354 /* Parses the IMPORT command. */
1358 struct file_handle *handle = NULL;
1359 struct dictionary *dict;
1360 struct get_pgm *pgm;
1361 int options = GTSV_OPT_NONE;
1368 if (lex_match_id ("FILE") || token == T_STRING)
1372 handle = fh_parse_file_handle ();
1376 else if (lex_match_id ("TYPE"))
1380 if (lex_match_id ("COMM"))
1382 else if (lex_match_id ("TAPE"))
1386 lex_error (_("expecting COMM or TAPE"));
1392 if (!lex_match ('/') && token != '.')
1398 discard_variables ();
1400 dict = pfm_read_dictionary (handle, NULL);
1405 dump_dict_variables (dict);
1407 if (0 == trim_dictionary (dict, &options))
1409 fh_close_handle (handle);
1413 dump_dict_variables (dict);
1416 dict_compact_values (dict);
1419 printf (_("IMPORT translation table from file to memory:\n"));
1420 for (i = 0; i < dict->nvar; i++)
1422 struct variable *v = dict->var[i];
1424 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
1425 v->get.fv, v->get.nv, v->fv, v->nv);
1429 dict_destroy (default_dict);
1430 default_dict = dict;
1432 pgm = xmalloc (sizeof *pgm);
1433 pgm->handle = handle;
1434 pgm->case_size = dict_get_case_size (default_dict);
1435 vfm_source = create_case_source (&import_source_class, default_dict, pgm);
1440 /* Reads all the cases from the data file and passes them to
1443 import_source_read (struct case_source *source,
1445 write_case_func *write_case, write_case_data wc_data)
1447 struct get_pgm *pgm = source->aux;
1449 while (pfm_read_case (pgm->handle, c->data, default_dict))
1450 if (!write_case (wc_data))
1454 const struct case_source_class import_source_class =
1462 static int export_write_case_func (struct ccase *c, void *);
1464 /* Parses the EXPORT command. */
1465 /* FIXME: same as cmd_save_internal(). */
1469 struct file_handle *handle;
1470 struct dictionary *dict;
1471 int options = GTSV_OPT_SAVE;
1473 struct save_trns *t;
1478 if (lex_match_id ("OUTFILE"))
1481 handle = fh_parse_file_handle ();
1485 dict = dict_clone (default_dict);
1487 dump_dict_variables (dict);
1489 for (i = 0; i < dict_get_var_cnt (dict); i++)
1490 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
1491 if (0 == trim_dictionary (dict, &options))
1493 fh_close_handle (handle);
1498 dump_dict_variables (dict);
1501 /* Write dictionary. */
1502 if (!pfm_write_dictionary (handle, dict))
1504 dict_destroy (dict);
1505 fh_close_handle (handle);
1509 /* Fill in transformation structure. */
1510 t = xmalloc (sizeof *t);
1511 t->h.proc = save_trns_proc;
1512 t->h.free = save_trns_free;
1514 t->nvar = dict_get_var_cnt (dict);
1515 t->var = xmalloc (sizeof *t->var * t->nvar);
1516 for (i = 0; i < t->nvar; i++)
1517 t->var[i] = dict_get_var (dict, i)->aux;
1518 t->case_buf = xmalloc (sizeof *t->case_buf * t->nvar);
1519 dict_destroy (dict);
1521 procedure (export_write_case_func, t);
1522 save_trns_free (&t->h);
1527 /* Writes case C to the EXPORT file. */
1529 export_write_case_func (struct ccase *c, void *aux)
1531 struct save_trns *t = aux;
1532 union value *p = (union value *) t->case_buf;
1535 for (i = 0; i < t->nvar; i++)
1537 struct variable *v = t->var[i];
1539 if (v->type == NUMERIC)
1540 *p++ = c->data[v->fv];
1542 (*p++).c = c->data[v->fv].s;
1545 pfm_write_case (t->f, (union value *) t->case_buf);