1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26 #include "file-handle.h"
34 #include "value-labels.h"
39 #include "debug-print.h"
41 /* GET or IMPORT input program. */
44 struct file_handle *handle; /* File to GET or IMPORT from. */
45 size_t case_size; /* Case size in bytes. */
48 /* XSAVE transformation (and related SAVE, EXPORT procedures). */
52 struct file_handle *f; /* Associated system file. */
53 int nvar; /* Number of variables. */
54 struct variable **var; /* Variables. */
55 flt64 *case_buf; /* Case transfer buffer. */
58 /* Options bits set by trim_dictionary(). */
59 #define GTSV_OPT_COMPRESSED 001 /* Compression; (X)SAVE only. */
60 #define GTSV_OPT_SAVE 002 /* The SAVE/XSAVE/EXPORT procedures. */
61 #define GTSV_OPT_MATCH_FILES 004 /* The MATCH FILES procedure. */
62 #define GTSV_OPT_NONE 0
64 static int trim_dictionary (struct dictionary * dict, int *options);
65 static int save_write_case_func (struct ccase *, void *);
66 static trns_proc_func save_trns_proc;
67 static trns_free_func save_trns_free;
70 void dump_dict_variables (struct dictionary *);
73 /* Parses the GET command. */
77 struct file_handle *handle;
78 struct dictionary *dict;
80 int options = GTSV_OPT_NONE;
86 if (lex_match_id ("FILE"))
89 handle = fh_parse_file_handle ();
93 dict = sfm_read_dictionary (handle, NULL);
98 dump_dict_variables (dict);
100 if (0 == trim_dictionary (dict, &options))
102 fh_close_handle (handle);
106 dump_dict_variables (dict);
109 dict_compact_values (dict);
112 printf (_("GET translation table from file to memory:\n"));
113 for (i = 0; i < dict->nvar; i++)
115 struct variable *v = dict->var[i];
117 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
118 v->get.fv, v->get.nv, v->fv, v->nv);
122 dict_destroy (default_dict);
125 pgm = xmalloc (sizeof *pgm);
126 pgm->handle = handle;
127 pgm->case_size = dict_get_case_size (default_dict);
128 vfm_source = create_case_source (&get_source_class, default_dict, pgm);
133 /* SAVE or XSAVE command? */
140 /* Parses the SAVE and XSAVE commands. */
142 cmd_save_internal (enum save_cmd save_cmd)
144 struct file_handle *handle;
145 struct dictionary *dict;
146 int options = GTSV_OPT_SAVE;
149 struct sfm_write_info inf;
153 lex_match_id ("SAVE");
156 if (lex_match_id ("OUTFILE"))
159 handle = fh_parse_file_handle ();
163 dict = dict_clone (default_dict);
165 dump_dict_variables (dict);
167 for (i = 0; i < dict_get_var_cnt (dict); i++)
168 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
169 if (0 == trim_dictionary (dict, &options))
171 fh_close_handle (handle);
176 dump_dict_variables (dict);
179 /* Write dictionary. */
182 inf.compress = !!(options & GTSV_OPT_COMPRESSED);
183 if (!sfm_write_dictionary (&inf))
186 fh_close_handle (handle);
190 /* Fill in transformation structure. */
191 t = xmalloc (sizeof *t);
192 t->h.proc = save_trns_proc;
193 t->h.free = save_trns_free;
195 t->nvar = dict_get_var_cnt (dict);
196 t->var = xmalloc (sizeof *t->var * t->nvar);
197 for (i = 0; i < t->nvar; i++)
198 t->var[i] = dict_get_var (dict, i)->aux;
199 t->case_buf = xmalloc (sizeof *t->case_buf * inf.case_size);
202 if (save_cmd == CMD_SAVE)
204 procedure (save_write_case_func, t);
205 save_trns_free (&t->h);
209 assert (save_cmd == CMD_XSAVE);
210 add_transformation (&t->h);
216 /* Parses and performs the SAVE procedure. */
220 return cmd_save_internal (CMD_SAVE);
223 /* Parses the XSAVE transformation command. */
227 return cmd_save_internal (CMD_XSAVE);
230 /* Writes the given C to the file specified by T. */
232 do_write_case (struct save_trns *t, struct ccase *c)
234 flt64 *p = t->case_buf;
237 for (i = 0; i < t->nvar; i++)
239 struct variable *v = t->var[i];
240 if (v->type == NUMERIC)
242 double src = c->data[v->fv].f;
250 memcpy (p, c->data[v->fv].s, v->width);
251 memset (&((char *) p)[v->width], ' ',
252 REM_RND_UP (v->width, sizeof *p));
253 p += DIV_RND_UP (v->width, sizeof *p);
257 sfm_write_case (t->f, t->case_buf, p - t->case_buf);
260 /* Writes case C to the system file specified on SAVE. */
262 save_write_case_func (struct ccase *c, void *aux UNUSED)
264 do_write_case (aux, c);
268 /* Writes case C to the system file specified on XSAVE. */
270 save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
272 struct save_trns *t = (struct save_trns *) h;
273 do_write_case (t, c);
277 /* Frees a SAVE transformation. */
279 save_trns_free (struct trns_header *pt)
281 struct save_trns *t = (struct save_trns *) pt;
283 fh_close_handle (t->f);
289 static int rename_variables (struct dictionary * dict);
291 /* The GET and SAVE commands have a common structure after the
292 FILE/OUTFILE subcommand. This function parses this structure and
293 returns nonzero on success, zero on failure. It both reads
294 *OPTIONS, for the GTSV_OPT_SAVE bit, and writes it, for the
295 GTSV_OPT_COMPRESSED bit. */
296 /* FIXME: IN, FIRST, LAST, MAP. */
297 /* FIXME? Should we call dict_compact_values() on dict as a
300 trim_dictionary (struct dictionary *dict, int *options)
302 if (get_scompression())
303 *options |= GTSV_OPT_COMPRESSED;
305 if (*options & GTSV_OPT_SAVE)
307 /* Delete all the scratch variables. */
312 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
314 for (i = 0; i < dict_get_var_cnt (dict); i++)
315 if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
316 v[nv++] = dict_get_var (dict, i);
317 dict_delete_vars (dict, v, nv);
321 while ((*options & GTSV_OPT_MATCH_FILES) || lex_match ('/'))
323 if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("COMPRESSED"))
324 *options |= GTSV_OPT_COMPRESSED;
325 else if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("UNCOMPRESSED"))
326 *options &= ~GTSV_OPT_COMPRESSED;
327 else if (lex_match_id ("DROP"))
333 if (!parse_variables (dict, &v, &nv, PV_NONE))
335 dict_delete_vars (dict, v, nv);
338 else if (lex_match_id ("KEEP"))
345 if (!parse_variables (dict, &v, &nv, PV_NONE))
348 /* Move the specified variables to the beginning. */
349 dict_reorder_vars (dict, v, nv);
351 /* Delete the remaining variables. */
352 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
353 for (i = nv; i < dict_get_var_cnt (dict); i++)
354 v[i - nv] = dict_get_var (dict, i);
355 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
358 else if (lex_match_id ("RENAME"))
360 if (!rename_variables (dict))
365 lex_error (_("while expecting a valid subcommand"));
369 if (dict_get_var_cnt (dict) == 0)
371 msg (SE, _("All variables deleted from system file dictionary."));
375 if (*options & GTSV_OPT_MATCH_FILES)
381 lex_error (_("expecting end of command"));
388 /* Parses and performs the RENAME subcommand of GET and SAVE. */
390 rename_variables (struct dictionary * dict)
408 v = parse_dict_variable (dict);
411 if (!lex_force_match ('=')
414 if (!strncmp (tokid, v->name, 8))
416 if (dict_lookup_var (dict, tokid) != NULL)
418 msg (SE, _("Cannot rename %s as %s because there already exists "
419 "a variable named %s. To rename variables with "
420 "overlapping names, use a single RENAME subcommand "
421 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
422 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
426 dict_rename_var (dict, v, tokid);
435 while (lex_match ('('))
439 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
441 if (!lex_match ('='))
443 msg (SE, _("`=' expected after variable list."));
446 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
450 msg (SE, _("Number of variables on left side of `=' (%d) does not "
451 "match number of variables on right side (%d), in "
452 "parenthesized group %d of RENAME subcommand."),
453 nv - old_nv, nn - old_nv, group);
456 if (!lex_force_match (')'))
461 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
463 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
469 for (i = 0; i < nn; i++)
479 dump_dict_variables (struct dictionary * dict)
483 printf (_("\nVariables in dictionary:\n"));
484 for (i = 0; i < dict->nvar; i++)
485 printf ("%s, ", dict->var[i]->name);
490 /* Clears internal state related to GET input procedure. */
492 get_source_destroy (struct case_source *source)
494 struct get_pgm *pgm = source->aux;
496 /* It is not necessary to destroy the dictionary because if we get
497 to this point then the dictionary is default_dict. */
498 fh_close_handle (pgm->handle);
502 /* Reads all the cases from the data file into C and passes them
503 to WRITE_CASE one by one, passing WC_DATA. */
505 get_source_read (struct case_source *source,
507 write_case_func *write_case, write_case_data wc_data)
509 struct get_pgm *pgm = source->aux;
511 while (sfm_read_case (pgm->handle, c->data, default_dict)
512 && write_case (wc_data))
516 const struct case_source_class get_source_class =
527 #include "debug-print.h"
532 MTF_FILE, /* Specified on FILE= subcommand. */
533 MTF_TABLE /* Specified on TABLE= subcommand. */
536 /* One of the files on MATCH FILES. */
539 struct mtf_file *next, *prev;
540 /* Next, previous in the list of files. */
541 struct mtf_file *next_min; /* Next in the chain of minimums. */
543 int type; /* One of MTF_*. */
544 struct variable **by; /* List of BY variables for this file. */
545 struct file_handle *handle; /* File handle for the file. */
546 struct dictionary *dict; /* Dictionary from system file. */
547 char in[9]; /* Name of the variable from IN=. */
548 char first[9], last[9]; /* Name of the variables from FIRST=, LAST=. */
549 union value *input; /* Input record. */
552 /* MATCH FILES procedure. */
555 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
556 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
558 struct variable **by; /* Variables on the BY subcommand. */
559 size_t by_cnt; /* Number of variables on BY subcommand. */
561 struct dictionary *dict; /* Dictionary of output file. */
562 struct case_sink *sink; /* Sink to receive output. */
563 struct ccase *mtf_case; /* Case used for output. */
565 unsigned seq_num; /* Have we initialized this variable? */
566 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
569 static void mtf_free (struct mtf_proc *);
570 static void mtf_free_file (struct mtf_file *);
571 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
572 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
574 static void mtf_read_nonactive_records (void *);
575 static void mtf_processing_finish (void *);
576 static int mtf_processing (struct ccase *, void *);
578 static char *var_type_description (struct variable *);
580 /* Parse and execute the MATCH FILES command. */
582 cmd_match_files (void)
585 struct mtf_file *first_table = NULL;
589 lex_match_id ("MATCH");
590 lex_match_id ("FILES");
592 mtf.head = mtf.tail = NULL;
595 mtf.dict = dict_create ();
600 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
606 if (lex_match (T_BY))
610 msg (SE, _("The BY subcommand may be given once at most."));
616 if (!parse_variables (mtf.dict, &mtf.by, &mtf.by_cnt,
617 PV_NO_DUPLICATE | PV_NO_SCRATCH))
620 else if (token != T_ID)
625 else if (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
627 struct mtf_file *file = xmalloc (sizeof *file);
629 file->in[0] = file->first[0] = file->last[0] = '\0';
634 if (lex_match_id ("FILE"))
635 file->type = MTF_FILE;
636 else if (lex_match_id ("TABLE"))
638 file->type = MTF_TABLE;
644 /* FILEs go first, then TABLEs. */
645 if (file->type == MTF_TABLE || first_table == NULL)
648 file->prev = mtf.tail;
650 mtf.tail->next = file;
652 if (mtf.head == NULL)
654 if (file->type == MTF_TABLE && first_table == NULL)
659 assert (file->type == MTF_FILE);
660 file->next = first_table;
661 file->prev = first_table->prev;
662 if (first_table->prev)
663 first_table->prev->next = file;
666 first_table->prev = file;
677 msg (SE, _("The active file may not be specified more "
683 assert (pgm_state != STATE_INPUT);
684 if (pgm_state == STATE_INIT)
686 msg (SE, _("Cannot specify the active file since no active "
687 "file has been defined."));
694 _("MATCH FILES may not be used after TEMPORARY when "
695 "the active file is an input source. "
696 "Temporary transformations will be made permanent."));
702 file->handle = fh_parse_file_handle ();
709 file->dict = sfm_read_dictionary (file->handle, NULL);
714 file->dict = default_dict;
715 if (!mtf_merge_dictionary (mtf.dict, file))
718 else if (lex_id_match ("IN", tokid)
719 || lex_id_match ("FIRST", tokid)
720 || lex_id_match ("LAST", tokid))
725 if (mtf.tail == NULL)
727 msg (SE, _("IN, FIRST, and LAST subcommands may not occur "
728 "before the first FILE or TABLE."));
732 if (lex_match_id ("IN"))
737 else if (lex_match_id ("FIRST"))
739 name = mtf.tail->first;
742 else if (lex_match_id ("LAST"))
744 name = mtf.tail->last;
759 msg (SE, _("Multiple %s subcommands for a single FILE or "
764 strcpy (name, tokid);
767 if (!dict_create_var (mtf.dict, name, 0))
769 msg (SE, _("Duplicate variable name %s while creating %s "
775 else if (lex_id_match ("RENAME", tokid)
776 || lex_id_match ("KEEP", tokid)
777 || lex_id_match ("DROP", tokid))
779 int options = GTSV_OPT_MATCH_FILES;
781 if (mtf.tail == NULL)
783 msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur "
784 "before the first FILE or TABLE."));
788 if (!trim_dictionary (mtf.tail->dict, &options))
791 else if (lex_match_id ("MAP"))
801 while (token != '.');
807 msg (SE, _("The BY subcommand is required when a TABLE subcommand "
815 struct mtf_file *iter;
817 for (iter = mtf.head; iter; iter = iter->next)
821 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
823 for (i = 0; i < mtf.by_cnt; i++)
825 iter->by[i] = dict_lookup_var (iter->dict, mtf.by[i]->name);
826 if (iter->by[i] == NULL)
828 msg (SE, _("File %s lacks BY variable %s."),
829 iter->handle ? fh_handle_name (iter->handle) : "*",
839 /* From sfm-read.c. */
840 extern void dump_dictionary (struct dictionary *);
842 dump_dictionary (mtf.dict);
846 /* MATCH FILES performs an n-way merge on all its input files.
849 1. Read one input record from every input FILE.
851 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
853 3. Find the FILE input record with minimum BY values. Store all
854 the values from this input record into the output record.
856 4. Find all the FILE input records with BY values identical to
857 the minimums. Store all the values from these input records into
860 5. For every TABLE, read another record as long as the BY values
861 on the TABLE's input record are less than the FILEs' BY values.
862 If an exact match is found, store all the values from the TABLE
863 input record into the output record.
865 6. Write the output record.
867 7. Read another record from each input file FILE and TABLE that
868 we stored values from above. If we come to the end of one of the
869 input files, remove it from the list of input files.
871 8. Repeat from step 2.
873 Unfortunately, this algorithm can't be directly implemented
874 because there's no function to read a record from the active
875 file; instead, it has to be done using callbacks.
877 FIXME: For merging large numbers of files (more than 10?) a
878 better algorithm would use a heap for finding minimum
882 discard_variables ();
884 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
886 mtf.seq_nums = xmalloc (dict_get_var_cnt (mtf.dict)
887 * sizeof *mtf.seq_nums);
888 memset (mtf.seq_nums, 0,
889 dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums);
890 mtf.mtf_case = xmalloc (dict_get_case_size (mtf.dict));
892 mtf_read_nonactive_records (NULL);
894 procedure (mtf_processing, NULL);
895 mtf_processing_finish (NULL);
897 dict_destroy (default_dict);
898 default_dict = mtf.dict;
900 vfm_source = mtf.sink->class->make_source (mtf.sink);
901 free_case_sink (mtf.sink);
911 /* Repeats 2...8 an arbitrary number of times. */
913 mtf_processing_finish (void *mtf_)
915 struct mtf_proc *mtf = mtf_;
916 struct mtf_file *iter;
918 /* Find the active file and delete it. */
919 for (iter = mtf->head; iter; iter = iter->next)
920 if (iter->handle == NULL)
922 mtf_delete_file_in_place (mtf, &iter);
926 while (mtf->head && mtf->head->type == MTF_FILE)
927 if (!mtf_processing (NULL, mtf))
931 /* Return a string in a static buffer describing V's variable type and
934 var_type_description (struct variable *v)
936 static char buf[2][32];
943 if (v->type == NUMERIC)
944 strcpy (s, "numeric");
947 assert (v->type == ALPHA);
948 sprintf (s, "string with width %d", v->width);
953 /* Free FILE and associated data. */
955 mtf_free_file (struct mtf_file *file)
957 fh_close_handle (file->handle);
958 if (file->dict != NULL && file->dict != default_dict)
959 dict_destroy (file->dict);
966 /* Free all the data for the MATCH FILES procedure. */
968 mtf_free (struct mtf_proc *mtf)
970 struct mtf_file *iter, *next;
972 for (iter = mtf->head; iter; iter = next)
976 mtf_free_file (iter);
981 dict_destroy (mtf->dict);
982 free (mtf->seq_nums);
985 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
986 file in the chain, or to NULL if was the last in the chain. */
988 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
990 struct mtf_file *f = *file;
993 f->prev->next = f->next;
995 f->next->prev = f->prev;
1005 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1007 struct variable *v = dict_get_var (f->dict, i);
1009 if (v->type == NUMERIC)
1010 mtf->mtf_case->data[v->p.mtf.master->fv].f = SYSMIS;
1012 memset (mtf->mtf_case->data[v->p.mtf.master->fv].s, ' ', v->width);
1019 /* Read a record from every input file except the active file. */
1021 mtf_read_nonactive_records (void *mtf_ UNUSED)
1023 struct mtf_proc *mtf = mtf_;
1024 struct mtf_file *iter;
1026 for (iter = mtf->head; iter; )
1030 assert (iter->input == NULL);
1031 iter->input = xmalloc (dict_get_case_size (iter->dict));
1033 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1034 mtf_delete_file_in_place (mtf, &iter);
1043 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1044 if A == B, 1 if A > B. */
1046 mtf_compare_BY_values (struct mtf_proc *mtf,
1047 struct mtf_file *a, struct mtf_file *b,
1050 union value *a_input, *b_input;
1053 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1054 a_input = a->input != NULL ? a->input : c->data;
1055 b_input = b->input != NULL ? b->input : c->data;
1056 for (i = 0; i < mtf->by_cnt; i++)
1058 assert (a->by[i]->type == b->by[i]->type);
1059 assert (a->by[i]->width == b->by[i]->width);
1061 if (a->by[i]->type == NUMERIC)
1063 double af = a_input[a->by[i]->fv].f;
1064 double bf = b_input[b->by[i]->fv].f;
1075 assert (a->by[i]->type == ALPHA);
1076 result = memcmp (a_input[a->by[i]->fv].s,
1077 b_input[b->by[i]->fv].s,
1081 else if (result > 0)
1088 /* Perform one iteration of steps 3...7 above. */
1090 mtf_processing (struct ccase *c, void *mtf_ UNUSED)
1092 struct mtf_proc *mtf = mtf_;
1093 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1094 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BY values. */
1095 struct mtf_file *iter; /* Iterator. */
1099 /* If the active file doesn't have the minimum BY values, don't
1100 return because that would cause a record to be skipped. */
1103 if (mtf->head->type == MTF_TABLE)
1106 /* 3. Find the FILE input record with minimum BY values. Store
1107 all the values from this input record into the output record.
1109 4. Find all the FILE input records with BY values identical
1110 to the minimums. Store all the values from these input
1111 records into the output record. */
1112 min_head = min_tail = mtf->head;
1113 max_head = max_tail = NULL;
1114 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1116 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1120 max_tail = max_tail->next_min = iter;
1122 max_head = max_tail = iter;
1126 min_tail = min_tail->next_min = iter;
1132 max_tail->next_min = min_head;
1133 max_tail = min_tail;
1137 max_head = min_head;
1138 max_tail = min_tail;
1140 min_head = min_tail = iter;
1147 /* 5. For every TABLE, read another record as long as the BY
1148 values on the TABLE's input record are less than the FILEs'
1149 BY values. If an exact match is found, store all the values
1150 from the TABLE input record into the output record. */
1153 struct mtf_file *next = iter->next;
1155 assert (iter->type == MTF_TABLE);
1157 if (iter->handle == NULL)
1161 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1165 max_tail = max_tail->next_min = iter;
1167 max_head = max_tail = iter;
1171 min_tail = min_tail->next_min = iter;
1175 if (iter->handle == NULL)
1177 if (sfm_read_case (iter->handle, iter->input, iter->dict))
1179 mtf_delete_file_in_place (mtf, &iter);
1189 /* Next sequence number. */
1192 /* Store data to all the records we are using. */
1194 min_tail->next_min = NULL;
1195 for (iter = min_head; iter; iter = iter->next_min)
1199 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1201 struct variable *v = dict_get_var (iter->dict, i);
1202 union value *record;
1204 if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num)
1206 mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num;
1208 record = iter->input != NULL ? iter->input : c->data;
1210 assert (v->type == NUMERIC || v->type == ALPHA);
1211 if (v->type == NUMERIC)
1212 mtf->mtf_case->data[v->p.mtf.master->fv].f = record[v->fv].f;
1214 memcpy (mtf->mtf_case->data[v->p.mtf.master->fv].s,
1215 record[v->fv].s, v->width);
1219 /* Store missing values to all the records we're not using. */
1221 max_tail->next_min = NULL;
1222 for (iter = max_head; iter; iter = iter->next_min)
1226 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1228 struct variable *v = dict_get_var (iter->dict, i);
1230 if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num)
1232 mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num;
1235 printf ("%s/%s: dest-fv=%d\n",
1236 fh_handle_name (iter->handle),
1238 v->p.mtf.master->fv);
1240 if (v->type == NUMERIC)
1241 mtf->mtf_case->data[v->p.mtf.master->fv].f = SYSMIS;
1243 memset (mtf->mtf_case->data[v->p.mtf.master->fv].s, ' ',
1247 if (iter->handle == NULL)
1251 /* 6. Write the output record. */
1252 mtf->sink->class->write (mtf->sink, mtf->mtf_case);
1254 /* 7. Read another record from each input file FILE and TABLE
1255 that we stored values from above. If we come to the end of
1256 one of the input files, remove it from the list of input
1258 for (iter = min_head; iter && iter->type == MTF_FILE; )
1260 struct mtf_file *next = iter->next_min;
1264 assert (iter->input != NULL);
1266 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1267 mtf_delete_file_in_place (mtf, &iter);
1277 return (mtf->head && mtf->head->type != MTF_TABLE);
1280 /* Merge the dictionary for file F into the master dictionary
1283 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1285 struct dictionary *d = f->dict;
1286 const char *d_docs, *m_docs;
1288 if (dict_get_label (m) == NULL)
1289 dict_set_label (m, dict_get_label (d));
1291 d_docs = dict_get_documents (d);
1292 m_docs = dict_get_documents (m);
1296 dict_set_documents (m, d_docs);
1302 new_len = strlen (m_docs) + strlen (d_docs);
1303 new_docs = xmalloc (new_len + 1);
1304 strcpy (new_docs, m_docs);
1305 strcat (new_docs, d_docs);
1306 dict_set_documents (m, new_docs);
1311 dict_compact_values (d);
1316 for (i = 0; i < dict_get_var_cnt (d); i++)
1318 struct variable *dv = dict_get_var (d, i);
1319 struct variable *mv = dict_lookup_var (m, dv->name);
1321 assert (dv->type == ALPHA || dv->width == 0);
1322 assert (!mv || mv->type == ALPHA || mv->width == 0);
1323 if (mv && dv->width == mv->width)
1325 if (val_labs_count (dv->val_labs)
1326 && !val_labs_count (mv->val_labs))
1327 mv->val_labs = val_labs_copy (dv->val_labs);
1328 if (dv->miss_type != MISSING_NONE
1329 && mv->miss_type == MISSING_NONE)
1330 copy_missing_values (mv, dv);
1332 if (mv && dv->label && !mv->label)
1333 mv->label = xstrdup (dv->label);
1336 mv = dict_clone_var (m, dv, dv->name);
1337 assert (mv != NULL);
1339 else if (mv->width != dv->width)
1341 msg (SE, _("Variable %s in file %s (%s) has different "
1342 "type or width from the same variable in "
1343 "earlier file (%s)."),
1344 dv->name, fh_handle_name (f->handle),
1345 var_type_description (dv), var_type_description (mv));
1348 dv->p.mtf.master = mv;
1355 /* IMPORT command. */
1357 /* Parses the IMPORT command. */
1361 struct file_handle *handle = NULL;
1362 struct dictionary *dict;
1363 struct get_pgm *pgm;
1364 int options = GTSV_OPT_NONE;
1367 lex_match_id ("IMPORT");
1373 if (lex_match_id ("FILE") || token == T_STRING)
1377 handle = fh_parse_file_handle ();
1381 else if (lex_match_id ("TYPE"))
1385 if (lex_match_id ("COMM"))
1387 else if (lex_match_id ("TAPE"))
1391 lex_error (_("expecting COMM or TAPE"));
1397 if (!lex_match ('/') && token != '.')
1403 discard_variables ();
1405 dict = pfm_read_dictionary (handle, NULL);
1410 dump_dict_variables (dict);
1412 if (0 == trim_dictionary (dict, &options))
1414 fh_close_handle (handle);
1418 dump_dict_variables (dict);
1421 dict_compact_values (dict);
1424 printf (_("IMPORT translation table from file to memory:\n"));
1425 for (i = 0; i < dict->nvar; i++)
1427 struct variable *v = dict->var[i];
1429 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
1430 v->get.fv, v->get.nv, v->fv, v->nv);
1434 dict_destroy (default_dict);
1435 default_dict = dict;
1437 pgm = xmalloc (sizeof *pgm);
1438 pgm->handle = handle;
1439 pgm->case_size = dict_get_case_size (default_dict);
1440 vfm_source = create_case_source (&import_source_class, default_dict, pgm);
1445 /* Reads all the cases from the data file and passes them to
1448 import_source_read (struct case_source *source,
1450 write_case_func *write_case, write_case_data wc_data)
1452 struct get_pgm *pgm = source->aux;
1454 while (pfm_read_case (pgm->handle, c->data, default_dict))
1455 if (!write_case (wc_data))
1459 const struct case_source_class import_source_class =
1467 static int export_write_case_func (struct ccase *c, void *);
1469 /* Parses the EXPORT command. */
1470 /* FIXME: same as cmd_save_internal(). */
1474 struct file_handle *handle;
1475 struct dictionary *dict;
1476 int options = GTSV_OPT_SAVE;
1478 struct save_trns *t;
1482 lex_match_id ("EXPORT");
1485 if (lex_match_id ("OUTFILE"))
1488 handle = fh_parse_file_handle ();
1492 dict = dict_clone (default_dict);
1494 dump_dict_variables (dict);
1496 for (i = 0; i < dict_get_var_cnt (dict); i++)
1497 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
1498 if (0 == trim_dictionary (dict, &options))
1500 fh_close_handle (handle);
1505 dump_dict_variables (dict);
1508 /* Write dictionary. */
1509 if (!pfm_write_dictionary (handle, dict))
1511 dict_destroy (dict);
1512 fh_close_handle (handle);
1516 /* Fill in transformation structure. */
1517 t = xmalloc (sizeof *t);
1518 t->h.proc = save_trns_proc;
1519 t->h.free = save_trns_free;
1521 t->nvar = dict_get_var_cnt (dict);
1522 t->var = xmalloc (sizeof *t->var * t->nvar);
1523 for (i = 0; i < t->nvar; i++)
1524 t->var[i] = dict_get_var (dict, i)->aux;
1525 t->case_buf = xmalloc (sizeof *t->case_buf * t->nvar);
1526 dict_destroy (dict);
1528 procedure (export_write_case_func, t);
1529 save_trns_free (&t->h);
1534 /* Writes case C to the EXPORT file. */
1536 export_write_case_func (struct ccase *c, void *aux)
1538 struct save_trns *t = aux;
1539 union value *p = (union value *) t->case_buf;
1542 for (i = 0; i < t->nvar; i++)
1544 struct variable *v = t->var[i];
1546 if (v->type == NUMERIC)
1547 *p++ = c->data[v->fv];
1549 (*p++).c = c->data[v->fv].s;
1552 pfm_write_case (t->f, (union value *) t->case_buf);