1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26 #include "file-handle.h"
34 #include "value-labels.h"
39 #include "debug-print.h"
41 /* XSAVE transformation (and related SAVE, EXPORT procedures). */
45 struct file_handle *f; /* Associated system file. */
46 int nvar; /* Number of variables. */
47 struct variable **var; /* Variables. */
48 flt64 *case_buf; /* Case transfer buffer. */
51 /* Options bits set by trim_dictionary(). */
52 #define GTSV_OPT_COMPRESSED 001 /* Compression; (X)SAVE only. */
53 #define GTSV_OPT_SAVE 002 /* The SAVE/XSAVE/EXPORT procedures. */
54 #define GTSV_OPT_MATCH_FILES 004 /* The MATCH FILES procedure. */
55 #define GTSV_OPT_NONE 0
57 static int trim_dictionary (struct dictionary * dict, int *options);
58 static int save_write_case_func (struct ccase *, void *);
59 static int save_trns_proc (struct trns_header *, struct ccase *);
60 static void save_trns_free (struct trns_header *);
63 void dump_dict_variables (struct dictionary *);
66 /* Parses the GET command. */
70 struct file_handle *handle;
71 struct dictionary *dict;
72 int options = GTSV_OPT_NONE;
78 if (lex_match_id ("FILE"))
81 handle = fh_parse_file_handle ();
85 dict = sfm_read_dictionary (handle, NULL);
90 dump_dict_variables (dict);
92 if (0 == trim_dictionary (dict, &options))
94 fh_close_handle (handle);
98 dump_dict_variables (dict);
101 dict_compact_values (dict);
104 printf (_("GET translation table from file to memory:\n"));
105 for (i = 0; i < dict->nvar; i++)
107 struct variable *v = dict->var[i];
109 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
110 v->get.fv, v->get.nv, v->fv, v->nv);
114 dict_destroy (default_dict);
117 vfm_source = create_case_source (&get_source_class, handle);
122 /* SAVE or XSAVE command? */
129 /* Parses the SAVE and XSAVE commands. */
131 cmd_save_internal (enum save_cmd save_cmd)
133 struct file_handle *handle;
134 struct dictionary *dict;
135 int options = GTSV_OPT_SAVE;
138 struct sfm_write_info inf;
142 lex_match_id ("SAVE");
145 if (lex_match_id ("OUTFILE"))
148 handle = fh_parse_file_handle ();
152 dict = dict_clone (default_dict);
154 dump_dict_variables (dict);
156 for (i = 0; i < dict_get_var_cnt (dict); i++)
157 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
158 if (0 == trim_dictionary (dict, &options))
160 fh_close_handle (handle);
165 dump_dict_variables (dict);
168 /* Write dictionary. */
171 inf.compress = !!(options & GTSV_OPT_COMPRESSED);
172 if (!sfm_write_dictionary (&inf))
175 fh_close_handle (handle);
179 /* Fill in transformation structure. */
180 t = xmalloc (sizeof *t);
181 t->h.proc = save_trns_proc;
182 t->h.free = save_trns_free;
184 t->nvar = dict_get_var_cnt (dict);
185 t->var = xmalloc (sizeof *t->var * t->nvar);
186 for (i = 0; i < t->nvar; i++)
187 t->var[i] = dict_get_var (dict, i)->aux;
188 t->case_buf = xmalloc (sizeof *t->case_buf * inf.case_size);
191 if (save_cmd == CMD_SAVE)
193 procedure (NULL, save_write_case_func, NULL, t);
194 save_trns_free (&t->h);
198 assert (save_cmd == CMD_XSAVE);
199 add_transformation (&t->h);
205 /* Parses and performs the SAVE procedure. */
209 return cmd_save_internal (CMD_SAVE);
212 /* Parses the XSAVE transformation command. */
216 return cmd_save_internal (CMD_XSAVE);
219 /* Writes the given C to the file specified by T. */
221 do_write_case (struct save_trns *t, struct ccase *c)
223 flt64 *p = t->case_buf;
226 for (i = 0; i < t->nvar; i++)
228 struct variable *v = t->var[i];
229 if (v->type == NUMERIC)
231 double src = c->data[v->fv].f;
239 memcpy (p, c->data[v->fv].s, v->width);
240 memset (&((char *) p)[v->width], ' ',
241 REM_RND_UP (v->width, sizeof *p));
242 p += DIV_RND_UP (v->width, sizeof *p);
246 sfm_write_case (t->f, t->case_buf, p - t->case_buf);
250 save_write_case_func (struct ccase * c, void *aux UNUSED)
252 do_write_case (aux, c);
257 save_trns_proc (struct trns_header *h, struct ccase * c)
259 struct save_trns *t = (struct save_trns *) h;
260 do_write_case (t, c);
265 save_trns_free (struct trns_header *pt)
267 struct save_trns *t = (struct save_trns *) pt;
269 fh_close_handle (t->f);
275 static int rename_variables (struct dictionary * dict);
277 /* The GET and SAVE commands have a common structure after the
278 FILE/OUTFILE subcommand. This function parses this structure and
279 returns nonzero on success, zero on failure. It both reads
280 *OPTIONS, for the GTSV_OPT_SAVE bit, and writes it, for the
281 GTSV_OPT_COMPRESSED bit. */
282 /* FIXME: IN, FIRST, LAST, MAP. */
283 /* FIXME? Should we call dict_compact_values() on dict as a
286 trim_dictionary (struct dictionary *dict, int *options)
288 if (set_scompression)
289 *options |= GTSV_OPT_COMPRESSED;
291 if (*options & GTSV_OPT_SAVE)
293 /* Delete all the scratch variables. */
298 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
300 for (i = 0; i < dict_get_var_cnt (dict); i++)
301 if (dict_get_var (dict, i)->name[0] == '#')
302 v[nv++] = dict_get_var (dict, i);
303 dict_delete_vars (dict, v, nv);
307 while ((*options & GTSV_OPT_MATCH_FILES) || lex_match ('/'))
309 if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("COMPRESSED"))
310 *options |= GTSV_OPT_COMPRESSED;
311 else if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("UNCOMPRESSED"))
312 *options &= ~GTSV_OPT_COMPRESSED;
313 else if (lex_match_id ("DROP"))
319 if (!parse_variables (dict, &v, &nv, PV_NONE))
321 dict_delete_vars (dict, v, nv);
324 else if (lex_match_id ("KEEP"))
331 if (!parse_variables (dict, &v, &nv, PV_NONE))
334 /* Move the specified variables to the beginning. */
335 dict_reorder_vars (dict, v, nv);
337 /* Delete the remaining variables. */
338 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
339 for (i = nv; i < dict_get_var_cnt (dict); i++)
340 v[i - nv] = dict_get_var (dict, i);
341 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
344 else if (lex_match_id ("RENAME"))
346 if (!rename_variables (dict))
351 lex_error (_("while expecting a valid subcommand"));
355 if (dict_get_var_cnt (dict) == 0)
357 msg (SE, _("All variables deleted from system file dictionary."));
361 if (*options & GTSV_OPT_MATCH_FILES)
367 lex_error (_("expecting end of command"));
374 /* Parses and performs the RENAME subcommand of GET and SAVE. */
376 rename_variables (struct dictionary * dict)
394 v = parse_dict_variable (dict);
397 if (!lex_force_match ('=')
400 if (!strncmp (tokid, v->name, 8))
402 if (dict_lookup_var (dict, tokid) != NULL)
404 msg (SE, _("Cannot rename %s as %s because there already exists "
405 "a variable named %s. To rename variables with "
406 "overlapping names, use a single RENAME subcommand "
407 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
408 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
412 dict_rename_var (dict, v, tokid);
421 while (lex_match ('('))
425 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
427 if (!lex_match ('='))
429 msg (SE, _("`=' expected after variable list."));
432 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
436 msg (SE, _("Number of variables on left side of `=' (%d) does not "
437 "match number of variables on right side (%d), in "
438 "parenthesized group %d of RENAME subcommand."),
439 nv - old_nv, nn - old_nv, group);
442 if (!lex_force_match (')'))
447 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
449 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
455 for (i = 0; i < nn; i++)
465 dump_dict_variables (struct dictionary * dict)
469 printf (_("\nVariables in dictionary:\n"));
470 for (i = 0; i < dict->nvar; i++)
471 printf ("%s, ", dict->var[i]->name);
476 /* Clears internal state related to GET input procedure. */
478 get_source_destroy (struct case_source *source)
480 struct file_handle *handle = source->aux;
482 /* It is not necessary to destroy the dictionary because if we get
483 to this point then the dictionary is default_dict. */
484 fh_close_handle (handle);
487 /* Reads all the cases from the data file and passes them to
490 get_source_read (struct case_source *source,
491 write_case_func *write_case, write_case_data wc_data)
493 struct file_handle *handle = source->aux;
495 while (sfm_read_case (handle, temp_case->data, default_dict)
496 && write_case (wc_data))
500 const struct case_source_class get_source_class =
510 #include "debug-print.h"
515 MTF_FILE, /* Specified on FILE= subcommand. */
516 MTF_TABLE /* Specified on TABLE= subcommand. */
519 /* One of the files on MATCH FILES. */
522 struct mtf_file *next, *prev;
523 /* Next, previous in the list of files. */
524 struct mtf_file *next_min; /* Next in the chain of minimums. */
526 int type; /* One of MTF_*. */
527 struct variable **by; /* List of BY variables for this file. */
528 struct file_handle *handle; /* File handle for the file. */
529 struct dictionary *dict; /* Dictionary from system file. */
530 char in[9]; /* Name of the variable from IN=. */
531 char first[9], last[9]; /* Name of the variables from FIRST=, LAST=. */
532 union value *input; /* Input record. */
535 /* All the files mentioned on FILE or TABLE. */
536 static struct mtf_file *mtf_head, *mtf_tail;
538 /* Variables on the BY subcommand. */
539 static struct variable **mtf_by;
542 /* Master dictionary. */
543 static struct dictionary *mtf_master;
545 /* Used to determine whether we've already initialized this
547 static unsigned mtf_seq_num;
549 /* Sequence numbers for each variable in mtf_master. */
550 static unsigned *mtf_seq_nums;
552 static void mtf_free (void);
553 static void mtf_free_file (struct mtf_file *file);
554 static int mtf_merge_dictionary (struct mtf_file *f);
555 static void mtf_delete_file_in_place (struct mtf_file **file);
557 static void mtf_read_nonactive_records (void *);
558 static void mtf_processing_finish (void *);
559 static int mtf_processing (struct ccase *, void *);
561 static char *var_type_description (struct variable *);
563 /* Parse and execute the MATCH FILES command. */
565 cmd_match_files (void)
567 struct mtf_file *first_table = NULL;
571 lex_match_id ("MATCH");
572 lex_match_id ("FILES");
574 mtf_head = mtf_tail = NULL;
577 mtf_master = dict_create ();
580 dict_set_case_limit (mtf_master, dict_get_case_limit (default_dict));
586 if (lex_match (T_BY))
590 msg (SE, _("The BY subcommand may be given once at most."));
596 if (!parse_variables (mtf_master, &mtf_by, &mtf_n_by,
597 PV_NO_DUPLICATE | PV_NO_SCRATCH))
600 else if (token != T_ID)
605 else if (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
607 struct mtf_file *file = xmalloc (sizeof *file);
609 file->in[0] = file->first[0] = file->last[0] = '\0';
614 if (lex_match_id ("FILE"))
615 file->type = MTF_FILE;
616 else if (lex_match_id ("TABLE"))
618 file->type = MTF_TABLE;
624 /* FILEs go first, then TABLEs. */
625 if (file->type == MTF_TABLE || first_table == NULL)
628 file->prev = mtf_tail;
630 mtf_tail->next = file;
632 if (mtf_head == NULL)
634 if (file->type == MTF_TABLE && first_table == NULL)
639 assert (file->type == MTF_FILE);
640 file->next = first_table;
641 file->prev = first_table->prev;
642 if (first_table->prev)
643 first_table->prev->next = file;
646 first_table->prev = file;
657 msg (SE, _("The active file may not be specified more "
663 assert (pgm_state != STATE_INPUT);
664 if (pgm_state == STATE_INIT)
666 msg (SE, _("Cannot specify the active file since no active "
667 "file has been defined."));
673 file->handle = fh_parse_file_handle ();
680 file->dict = sfm_read_dictionary (file->handle, NULL);
685 file->dict = default_dict;
686 if (!mtf_merge_dictionary (file))
689 else if (lex_id_match ("IN", tokid)
690 || lex_id_match ("FIRST", tokid)
691 || lex_id_match ("LAST", tokid))
696 if (mtf_tail == NULL)
698 msg (SE, _("IN, FIRST, and LAST subcommands may not occur "
699 "before the first FILE or TABLE."));
703 if (lex_match_id ("IN"))
708 else if (lex_match_id ("FIRST"))
710 name = mtf_tail->first;
713 else if (lex_match_id ("LAST"))
715 name = mtf_tail->last;
730 msg (SE, _("Multiple %s subcommands for a single FILE or "
735 strcpy (name, tokid);
738 if (!dict_create_var (mtf_master, name, 0))
740 msg (SE, _("Duplicate variable name %s while creating %s "
746 else if (lex_id_match ("RENAME", tokid)
747 || lex_id_match ("KEEP", tokid)
748 || lex_id_match ("DROP", tokid))
750 int options = GTSV_OPT_MATCH_FILES;
752 if (mtf_tail == NULL)
754 msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur "
755 "before the first FILE or TABLE."));
759 if (!trim_dictionary (mtf_tail->dict, &options))
762 else if (lex_match_id ("MAP"))
772 while (token != '.');
778 msg (SE, _("The BY subcommand is required when a TABLE subcommand "
786 struct mtf_file *iter;
788 for (iter = mtf_head; iter; iter = iter->next)
792 iter->by = xmalloc (sizeof *iter->by * mtf_n_by);
794 for (i = 0; i < mtf_n_by; i++)
796 iter->by[i] = dict_lookup_var (iter->dict, mtf_by[i]->name);
797 if (iter->by[i] == NULL)
799 msg (SE, _("File %s lacks BY variable %s."),
800 iter->handle ? fh_handle_name (iter->handle) : "*",
810 /* From sfm-read.c. */
811 extern void dump_dictionary (struct dictionary *);
813 dump_dictionary (mtf_master);
817 /* MATCH FILES performs an n-way merge on all its input files.
820 1. Read one input record from every input FILE.
822 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
824 3. Find the FILE input record with minimum BY values. Store all
825 the values from this input record into the output record.
827 4. Find all the FILE input records with BY values identical to
828 the minimums. Store all the values from these input records into
831 5. For every TABLE, read another record as long as the BY values
832 on the TABLE's input record are less than the FILEs' BY values.
833 If an exact match is found, store all the values from the TABLE
834 input record into the output record.
836 6. Write the output record.
838 7. Read another record from each input file FILE and TABLE that
839 we stored values from above. If we come to the end of one of the
840 input files, remove it from the list of input files.
842 8. Repeat from step 2.
844 Unfortunately, this algorithm can't be directly implemented
845 because there's no function to read a record from the active
846 file; instead, it has to be done using callbacks.
848 FIXME: For merging large numbers of files (more than 10?) a
849 better algorithm would use a heap for finding minimum
850 values, or replacement selection, as described by Knuth in
851 _Art of Computer Programming, Vol. 3_. The SORT CASES
852 procedure does this, and perhaps some of its code could be
856 discard_variables ();
859 temp_dict = mtf_master;
862 mtf_seq_nums = xmalloc (dict_get_var_cnt (mtf_master)
863 * sizeof *mtf_seq_nums);
864 memset (mtf_seq_nums, 0,
865 dict_get_var_cnt (mtf_master) * sizeof *mtf_seq_nums);
867 process_active_file (mtf_read_nonactive_records, mtf_processing,
868 mtf_processing_finish, NULL);
879 /* Repeats 2...8 an arbitrary number of times. */
881 mtf_processing_finish (void *aux UNUSED)
883 /* Find the active file and delete it. */
885 struct mtf_file *iter;
887 for (iter = mtf_head; iter; iter = iter->next)
888 if (iter->handle == NULL)
890 mtf_delete_file_in_place (&iter);
895 while (mtf_head && mtf_head->type == MTF_FILE)
896 if (!mtf_processing (temp_case, NULL))
900 /* Return a string in a static buffer describing V's variable type and
903 var_type_description (struct variable *v)
905 static char buf[2][32];
912 if (v->type == NUMERIC)
913 strcpy (s, "numeric");
916 assert (v->type == ALPHA);
917 sprintf (s, "string with width %d", v->width);
922 /* Free FILE and associated data. */
924 mtf_free_file (struct mtf_file *file)
926 fh_close_handle (file->handle);
927 if (file->dict != NULL && file->dict != default_dict)
928 dict_destroy (file->dict);
935 /* Free all the data for the MATCH FILES procedure. */
939 struct mtf_file *iter, *next;
941 for (iter = mtf_head; iter; iter = next)
945 mtf_free_file (iter);
950 dict_destroy (mtf_master);
954 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
955 file in the chain, or to NULL if was the last in the chain. */
957 mtf_delete_file_in_place (struct mtf_file **file)
959 struct mtf_file *f = *file;
962 f->prev->next = f->next;
964 f->next->prev = f->prev;
974 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
976 struct variable *v = dict_get_var (f->dict, i);
978 if (v->type == NUMERIC)
979 compaction_case->data[v->p.mtf.master->fv].f = SYSMIS;
981 memset (compaction_case->data[v->p.mtf.master->fv].s, ' ',
989 /* Read a record from every input file except the active file. */
991 mtf_read_nonactive_records (void *aux UNUSED)
993 struct mtf_file *iter;
995 for (iter = mtf_head; iter; )
999 assert (iter->input == NULL);
1000 iter->input = xmalloc (dict_get_case_size (iter->dict));
1002 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1003 mtf_delete_file_in_place (&iter);
1009 iter->input = temp_case->data;
1015 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1016 if A == B, 1 if A > B. */
1018 mtf_compare_BY_values (struct mtf_file *a, struct mtf_file *b)
1022 for (i = 0; i < mtf_n_by; i++)
1024 assert (a->by[i]->type == b->by[i]->type);
1025 assert (a->by[i]->width == b->by[i]->width);
1027 if (a->by[i]->type == NUMERIC)
1029 double af = a->input[a->by[i]->fv].f;
1030 double bf = b->input[b->by[i]->fv].f;
1041 assert (a->by[i]->type == ALPHA);
1042 result = memcmp (a->input[a->by[i]->fv].s,
1043 b->input[b->by[i]->fv].s,
1047 else if (result > 0)
1054 /* Perform one iteration of steps 3...7 above. */
1056 mtf_processing (struct ccase *c UNUSED, void *aux UNUSED)
1058 /* List of files with minimum BY values. */
1059 struct mtf_file *min_head, *min_tail;
1061 /* List of files with non-minimum BY values. */
1062 struct mtf_file *max_head, *max_tail;
1065 struct mtf_file *iter;
1069 /* If the active file doesn't have the minimum BY values, don't
1070 return because that would cause a record to be skipped. */
1073 if (mtf_head->type == MTF_TABLE)
1076 /* 3. Find the FILE input record with minimum BY values. Store
1077 all the values from this input record into the output record.
1079 4. Find all the FILE input records with BY values identical
1080 to the minimums. Store all the values from these input
1081 records into the output record. */
1082 min_head = min_tail = mtf_head;
1083 max_head = max_tail = NULL;
1084 for (iter = mtf_head->next; iter && iter->type == MTF_FILE;
1086 switch (mtf_compare_BY_values (min_head, iter))
1090 max_tail = max_tail->next_min = iter;
1092 max_head = max_tail = iter;
1096 min_tail = min_tail->next_min = iter;
1102 max_tail->next_min = min_head;
1103 max_tail = min_tail;
1107 max_head = min_head;
1108 max_tail = min_tail;
1110 min_head = min_tail = iter;
1117 /* 5. For every TABLE, read another record as long as the BY
1118 values on the TABLE's input record are less than the FILEs'
1119 BY values. If an exact match is found, store all the values
1120 from the TABLE input record into the output record. */
1123 struct mtf_file *next = iter->next;
1125 assert (iter->type == MTF_TABLE);
1127 if (iter->handle == NULL)
1131 switch (mtf_compare_BY_values (min_head, iter))
1135 max_tail = max_tail->next_min = iter;
1137 max_head = max_tail = iter;
1141 min_tail = min_tail->next_min = iter;
1145 if (iter->handle == NULL)
1147 if (sfm_read_case (iter->handle, iter->input, iter->dict))
1149 mtf_delete_file_in_place (&iter);
1159 /* Next sequence number. */
1162 /* Store data to all the records we are using. */
1164 min_tail->next_min = NULL;
1165 for (iter = min_head; iter; iter = iter->next_min)
1169 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1171 struct variable *v = dict_get_var (iter->dict, i);
1173 if (mtf_seq_nums[v->p.mtf.master->index] == mtf_seq_num)
1175 mtf_seq_nums[v->p.mtf.master->index] = mtf_seq_num;
1178 printf ("%s/%s: dest-fv=%d, src-fv=%d\n",
1179 fh_handle_name (iter->handle),
1181 v->p.mtf.master->fv, v->fv);
1183 if (v->type == NUMERIC)
1184 compaction_case->data[v->p.mtf.master->fv].f
1185 = iter->input[v->fv].f;
1188 assert (v->type == ALPHA);
1189 memcpy (compaction_case->data[v->p.mtf.master->fv].s,
1190 iter->input[v->fv].s, v->width);
1195 /* Store missing values to all the records we're not using. */
1197 max_tail->next_min = NULL;
1198 for (iter = max_head; iter; iter = iter->next_min)
1202 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1204 struct variable *v = dict_get_var (iter->dict, i);
1206 if (mtf_seq_nums[v->p.mtf.master->index] == mtf_seq_num)
1208 mtf_seq_nums[v->p.mtf.master->index] = mtf_seq_num;
1211 printf ("%s/%s: dest-fv=%d\n",
1212 fh_handle_name (iter->handle),
1214 v->p.mtf.master->fv);
1216 if (v->type == NUMERIC)
1217 compaction_case->data[v->p.mtf.master->fv].f = SYSMIS;
1219 memset (compaction_case->data[v->p.mtf.master->fv].s, ' ',
1223 if (iter->handle == NULL)
1227 /* 6. Write the output record. */
1228 process_active_file_output_case ();
1230 /* 7. Read another record from each input file FILE and TABLE
1231 that we stored values from above. If we come to the end of
1232 one of the input files, remove it from the list of input
1234 for (iter = min_head; iter && iter->type == MTF_FILE; )
1236 struct mtf_file *next = iter->next_min;
1240 assert (iter->input != NULL);
1242 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1243 mtf_delete_file_in_place (&iter);
1253 return (mtf_head && mtf_head->type != MTF_TABLE);
1256 /* Merge the dictionary for file F into the master dictionary
1259 mtf_merge_dictionary (struct mtf_file *f)
1261 struct dictionary *const m = mtf_master;
1262 struct dictionary *d = f->dict;
1263 const char *d_docs, *m_docs;
1265 if (dict_get_label (m) == NULL)
1266 dict_set_label (m, dict_get_label (d));
1268 d_docs = dict_get_documents (d);
1269 m_docs = dict_get_documents (m);
1273 dict_set_documents (m, d_docs);
1279 new_len = strlen (m_docs) + strlen (d_docs);
1280 new_docs = xmalloc (new_len + 1);
1281 strcpy (new_docs, m_docs);
1282 strcat (new_docs, d_docs);
1283 dict_set_documents (m, new_docs);
1288 dict_compact_values (d);
1293 for (i = 0; i < dict_get_var_cnt (d); i++)
1295 struct variable *dv = dict_get_var (d, i);
1296 struct variable *mv = dict_lookup_var (m, dv->name);
1298 assert (dv->type == ALPHA || dv->width == 0);
1299 assert (!mv || mv->type == ALPHA || mv->width == 0);
1300 if (mv && dv->width == mv->width)
1302 if (val_labs_count (dv->val_labs)
1303 && !val_labs_count (mv->val_labs))
1304 mv->val_labs = val_labs_copy (dv->val_labs);
1305 if (dv->miss_type != MISSING_NONE
1306 && mv->miss_type == MISSING_NONE)
1307 copy_missing_values (mv, dv);
1309 if (mv && dv->label && !mv->label)
1310 mv->label = xstrdup (dv->label);
1313 mv = dict_clone_var (m, dv, dv->name);
1314 assert (mv != NULL);
1316 else if (mv->width != dv->width)
1318 msg (SE, _("Variable %s in file %s (%s) has different "
1319 "type or width from the same variable in "
1320 "earlier file (%s)."),
1321 dv->name, fh_handle_name (f->handle),
1322 var_type_description (dv), var_type_description (mv));
1325 dv->p.mtf.master = mv;
1332 /* IMPORT command. */
1334 /* Parses the IMPORT command. */
1338 struct file_handle *handle = NULL;
1339 struct dictionary *dict;
1340 int options = GTSV_OPT_NONE;
1343 lex_match_id ("IMPORT");
1349 if (lex_match_id ("FILE") || token == T_STRING)
1353 handle = fh_parse_file_handle ();
1357 else if (lex_match_id ("TYPE"))
1361 if (lex_match_id ("COMM"))
1363 else if (lex_match_id ("TAPE"))
1367 lex_error (_("expecting COMM or TAPE"));
1373 if (!lex_match ('/') && token != '.')
1379 discard_variables ();
1381 dict = pfm_read_dictionary (handle, NULL);
1386 dump_dict_variables (dict);
1388 if (0 == trim_dictionary (dict, &options))
1390 fh_close_handle (handle);
1394 dump_dict_variables (dict);
1397 dict_compact_values (dict);
1400 printf (_("IMPORT translation table from file to memory:\n"));
1401 for (i = 0; i < dict->nvar; i++)
1403 struct variable *v = dict->var[i];
1405 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
1406 v->get.fv, v->get.nv, v->fv, v->nv);
1410 dict_destroy (default_dict);
1411 default_dict = dict;
1413 vfm_source = create_case_source (&import_source_class, handle);
1418 /* Reads all the cases from the data file and passes them to
1421 import_source_read (struct case_source *source,
1422 write_case_func *write_case, write_case_data wc_data)
1424 struct file_handle *handle = source->aux;
1425 while (pfm_read_case (handle, temp_case->data, default_dict))
1426 if (!write_case (wc_data))
1430 const struct case_source_class import_source_class =
1437 static int export_write_case_func (struct ccase *c, void *);
1439 /* Parses the EXPORT command. */
1440 /* FIXME: same as cmd_save_internal(). */
1444 struct file_handle *handle;
1445 struct dictionary *dict;
1446 int options = GTSV_OPT_SAVE;
1448 struct save_trns *t;
1452 lex_match_id ("EXPORT");
1455 if (lex_match_id ("OUTFILE"))
1458 handle = fh_parse_file_handle ();
1462 dict = dict_clone (default_dict);
1464 dump_dict_variables (dict);
1466 for (i = 0; i < dict_get_var_cnt (dict); i++)
1467 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
1468 if (0 == trim_dictionary (dict, &options))
1470 fh_close_handle (handle);
1475 dump_dict_variables (dict);
1478 /* Write dictionary. */
1479 if (!pfm_write_dictionary (handle, dict))
1481 dict_destroy (dict);
1482 fh_close_handle (handle);
1486 /* Fill in transformation structure. */
1487 t = xmalloc (sizeof *t);
1488 t->h.proc = save_trns_proc;
1489 t->h.free = save_trns_free;
1491 t->nvar = dict_get_var_cnt (dict);
1492 t->var = xmalloc (sizeof *t->var * t->nvar);
1493 for (i = 0; i < t->nvar; i++)
1494 t->var[i] = dict_get_var (dict, i)->aux;
1495 t->case_buf = xmalloc (sizeof *t->case_buf * t->nvar);
1496 dict_destroy (dict);
1498 procedure (NULL, export_write_case_func, NULL, t);
1499 save_trns_free (&t->h);
1505 export_write_case_func (struct ccase *c, void *aux)
1507 struct save_trns *t = aux;
1508 union value *p = (union value *) t->case_buf;
1511 for (i = 0; i < t->nvar; i++)
1513 struct variable *v = t->var[i];
1515 if (v->type == NUMERIC)
1516 *p++ = c->data[v->fv];
1518 (*p++).c = c->data[v->fv].s;
1521 pfm_write_case (t->f, (union value *) t->case_buf);