1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26 #include "file-handle.h"
34 #include "value-labels.h"
39 #include "debug-print.h"
41 /* XSAVE transformation (and related SAVE, EXPORT procedures). */
45 struct file_handle *f; /* Associated system file. */
46 int nvar; /* Number of variables. */
47 struct variable **var; /* Variables. */
48 flt64 *case_buf; /* Case transfer buffer. */
51 /* Options bits set by trim_dictionary(). */
52 #define GTSV_OPT_COMPRESSED 001 /* Compression; (X)SAVE only. */
53 #define GTSV_OPT_SAVE 002 /* The SAVE/XSAVE/EXPORT procedures. */
54 #define GTSV_OPT_MATCH_FILES 004 /* The MATCH FILES procedure. */
55 #define GTSV_OPT_NONE 0
57 static int trim_dictionary (struct dictionary * dict, int *options);
58 static int save_write_case_func (struct ccase *, void *);
59 static trns_proc_func save_trns_proc;
60 static trns_free_func save_trns_free;
63 void dump_dict_variables (struct dictionary *);
66 /* Parses the GET command. */
70 struct file_handle *handle;
71 struct dictionary *dict;
72 int options = GTSV_OPT_NONE;
78 if (lex_match_id ("FILE"))
81 handle = fh_parse_file_handle ();
85 dict = sfm_read_dictionary (handle, NULL);
90 dump_dict_variables (dict);
92 if (0 == trim_dictionary (dict, &options))
94 fh_close_handle (handle);
98 dump_dict_variables (dict);
101 dict_compact_values (dict);
104 printf (_("GET translation table from file to memory:\n"));
105 for (i = 0; i < dict->nvar; i++)
107 struct variable *v = dict->var[i];
109 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
110 v->get.fv, v->get.nv, v->fv, v->nv);
114 dict_destroy (default_dict);
117 vfm_source = create_case_source (&get_source_class, handle);
122 /* SAVE or XSAVE command? */
129 /* Parses the SAVE and XSAVE commands. */
131 cmd_save_internal (enum save_cmd save_cmd)
133 struct file_handle *handle;
134 struct dictionary *dict;
135 int options = GTSV_OPT_SAVE;
138 struct sfm_write_info inf;
142 lex_match_id ("SAVE");
145 if (lex_match_id ("OUTFILE"))
148 handle = fh_parse_file_handle ();
152 dict = dict_clone (default_dict);
154 dump_dict_variables (dict);
156 for (i = 0; i < dict_get_var_cnt (dict); i++)
157 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
158 if (0 == trim_dictionary (dict, &options))
160 fh_close_handle (handle);
165 dump_dict_variables (dict);
168 /* Write dictionary. */
171 inf.compress = !!(options & GTSV_OPT_COMPRESSED);
172 if (!sfm_write_dictionary (&inf))
175 fh_close_handle (handle);
179 /* Fill in transformation structure. */
180 t = xmalloc (sizeof *t);
181 t->h.proc = save_trns_proc;
182 t->h.free = save_trns_free;
184 t->nvar = dict_get_var_cnt (dict);
185 t->var = xmalloc (sizeof *t->var * t->nvar);
186 for (i = 0; i < t->nvar; i++)
187 t->var[i] = dict_get_var (dict, i)->aux;
188 t->case_buf = xmalloc (sizeof *t->case_buf * inf.case_size);
191 if (save_cmd == CMD_SAVE)
193 procedure (NULL, save_write_case_func, NULL, t);
194 save_trns_free (&t->h);
198 assert (save_cmd == CMD_XSAVE);
199 add_transformation (&t->h);
205 /* Parses and performs the SAVE procedure. */
209 return cmd_save_internal (CMD_SAVE);
212 /* Parses the XSAVE transformation command. */
216 return cmd_save_internal (CMD_XSAVE);
219 /* Writes the given C to the file specified by T. */
221 do_write_case (struct save_trns *t, struct ccase *c)
223 flt64 *p = t->case_buf;
226 for (i = 0; i < t->nvar; i++)
228 struct variable *v = t->var[i];
229 if (v->type == NUMERIC)
231 double src = c->data[v->fv].f;
239 memcpy (p, c->data[v->fv].s, v->width);
240 memset (&((char *) p)[v->width], ' ',
241 REM_RND_UP (v->width, sizeof *p));
242 p += DIV_RND_UP (v->width, sizeof *p);
246 sfm_write_case (t->f, t->case_buf, p - t->case_buf);
250 save_write_case_func (struct ccase * c, void *aux UNUSED)
252 do_write_case (aux, c);
257 save_trns_proc (struct trns_header *h, struct ccase * c, int case_num UNUSED)
259 struct save_trns *t = (struct save_trns *) h;
260 do_write_case (t, c);
265 save_trns_free (struct trns_header *pt)
267 struct save_trns *t = (struct save_trns *) pt;
269 fh_close_handle (t->f);
275 static int rename_variables (struct dictionary * dict);
277 /* The GET and SAVE commands have a common structure after the
278 FILE/OUTFILE subcommand. This function parses this structure and
279 returns nonzero on success, zero on failure. It both reads
280 *OPTIONS, for the GTSV_OPT_SAVE bit, and writes it, for the
281 GTSV_OPT_COMPRESSED bit. */
282 /* FIXME: IN, FIRST, LAST, MAP. */
283 /* FIXME? Should we call dict_compact_values() on dict as a
286 trim_dictionary (struct dictionary *dict, int *options)
288 if (set_scompression)
289 *options |= GTSV_OPT_COMPRESSED;
291 if (*options & GTSV_OPT_SAVE)
293 /* Delete all the scratch variables. */
298 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
300 for (i = 0; i < dict_get_var_cnt (dict); i++)
301 if (dict_get_var (dict, i)->name[0] == '#')
302 v[nv++] = dict_get_var (dict, i);
303 dict_delete_vars (dict, v, nv);
307 while ((*options & GTSV_OPT_MATCH_FILES) || lex_match ('/'))
309 if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("COMPRESSED"))
310 *options |= GTSV_OPT_COMPRESSED;
311 else if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("UNCOMPRESSED"))
312 *options &= ~GTSV_OPT_COMPRESSED;
313 else if (lex_match_id ("DROP"))
319 if (!parse_variables (dict, &v, &nv, PV_NONE))
321 dict_delete_vars (dict, v, nv);
324 else if (lex_match_id ("KEEP"))
331 if (!parse_variables (dict, &v, &nv, PV_NONE))
334 /* Move the specified variables to the beginning. */
335 dict_reorder_vars (dict, v, nv);
337 /* Delete the remaining variables. */
338 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
339 for (i = nv; i < dict_get_var_cnt (dict); i++)
340 v[i - nv] = dict_get_var (dict, i);
341 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
344 else if (lex_match_id ("RENAME"))
346 if (!rename_variables (dict))
351 lex_error (_("while expecting a valid subcommand"));
355 if (dict_get_var_cnt (dict) == 0)
357 msg (SE, _("All variables deleted from system file dictionary."));
361 if (*options & GTSV_OPT_MATCH_FILES)
367 lex_error (_("expecting end of command"));
374 /* Parses and performs the RENAME subcommand of GET and SAVE. */
376 rename_variables (struct dictionary * dict)
394 v = parse_dict_variable (dict);
397 if (!lex_force_match ('=')
400 if (!strncmp (tokid, v->name, 8))
402 if (dict_lookup_var (dict, tokid) != NULL)
404 msg (SE, _("Cannot rename %s as %s because there already exists "
405 "a variable named %s. To rename variables with "
406 "overlapping names, use a single RENAME subcommand "
407 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
408 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
412 dict_rename_var (dict, v, tokid);
421 while (lex_match ('('))
425 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
427 if (!lex_match ('='))
429 msg (SE, _("`=' expected after variable list."));
432 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
436 msg (SE, _("Number of variables on left side of `=' (%d) does not "
437 "match number of variables on right side (%d), in "
438 "parenthesized group %d of RENAME subcommand."),
439 nv - old_nv, nn - old_nv, group);
442 if (!lex_force_match (')'))
447 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
449 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
455 for (i = 0; i < nn; i++)
465 dump_dict_variables (struct dictionary * dict)
469 printf (_("\nVariables in dictionary:\n"));
470 for (i = 0; i < dict->nvar; i++)
471 printf ("%s, ", dict->var[i]->name);
476 /* Clears internal state related to GET input procedure. */
478 get_source_destroy (struct case_source *source)
480 struct file_handle *handle = source->aux;
482 /* It is not necessary to destroy the dictionary because if we get
483 to this point then the dictionary is default_dict. */
484 fh_close_handle (handle);
487 /* Reads all the cases from the data file and passes them to
490 get_source_read (struct case_source *source,
491 write_case_func *write_case, write_case_data wc_data)
493 struct file_handle *handle = source->aux;
495 while (sfm_read_case (handle, temp_case->data, default_dict)
496 && write_case (wc_data))
500 const struct case_source_class get_source_class =
511 #include "debug-print.h"
516 MTF_FILE, /* Specified on FILE= subcommand. */
517 MTF_TABLE /* Specified on TABLE= subcommand. */
520 /* One of the files on MATCH FILES. */
523 struct mtf_file *next, *prev;
524 /* Next, previous in the list of files. */
525 struct mtf_file *next_min; /* Next in the chain of minimums. */
527 int type; /* One of MTF_*. */
528 struct variable **by; /* List of BY variables for this file. */
529 struct file_handle *handle; /* File handle for the file. */
530 struct dictionary *dict; /* Dictionary from system file. */
531 char in[9]; /* Name of the variable from IN=. */
532 char first[9], last[9]; /* Name of the variables from FIRST=, LAST=. */
533 union value *input; /* Input record. */
536 /* All the files mentioned on FILE or TABLE. */
537 static struct mtf_file *mtf_head, *mtf_tail;
539 /* Variables on the BY subcommand. */
540 static struct variable **mtf_by;
543 /* Master dictionary. */
544 static struct dictionary *mtf_master;
546 /* Used to determine whether we've already initialized this
548 static unsigned mtf_seq_num;
550 /* Sequence numbers for each variable in mtf_master. */
551 static unsigned *mtf_seq_nums;
553 static void mtf_free (void);
554 static void mtf_free_file (struct mtf_file *file);
555 static int mtf_merge_dictionary (struct mtf_file *f);
556 static void mtf_delete_file_in_place (struct mtf_file **file);
558 static void mtf_read_nonactive_records (void *);
559 static void mtf_processing_finish (void *);
560 static int mtf_processing (struct ccase *, void *);
562 static char *var_type_description (struct variable *);
564 /* Parse and execute the MATCH FILES command. */
566 cmd_match_files (void)
568 struct mtf_file *first_table = NULL;
572 lex_match_id ("MATCH");
573 lex_match_id ("FILES");
575 mtf_head = mtf_tail = NULL;
578 mtf_master = dict_create ();
581 dict_set_case_limit (mtf_master, dict_get_case_limit (default_dict));
587 if (lex_match (T_BY))
591 msg (SE, _("The BY subcommand may be given once at most."));
597 if (!parse_variables (mtf_master, &mtf_by, &mtf_n_by,
598 PV_NO_DUPLICATE | PV_NO_SCRATCH))
601 else if (token != T_ID)
606 else if (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
608 struct mtf_file *file = xmalloc (sizeof *file);
610 file->in[0] = file->first[0] = file->last[0] = '\0';
615 if (lex_match_id ("FILE"))
616 file->type = MTF_FILE;
617 else if (lex_match_id ("TABLE"))
619 file->type = MTF_TABLE;
625 /* FILEs go first, then TABLEs. */
626 if (file->type == MTF_TABLE || first_table == NULL)
629 file->prev = mtf_tail;
631 mtf_tail->next = file;
633 if (mtf_head == NULL)
635 if (file->type == MTF_TABLE && first_table == NULL)
640 assert (file->type == MTF_FILE);
641 file->next = first_table;
642 file->prev = first_table->prev;
643 if (first_table->prev)
644 first_table->prev->next = file;
647 first_table->prev = file;
658 msg (SE, _("The active file may not be specified more "
664 assert (pgm_state != STATE_INPUT);
665 if (pgm_state == STATE_INIT)
667 msg (SE, _("Cannot specify the active file since no active "
668 "file has been defined."));
674 file->handle = fh_parse_file_handle ();
681 file->dict = sfm_read_dictionary (file->handle, NULL);
686 file->dict = default_dict;
687 if (!mtf_merge_dictionary (file))
690 else if (lex_id_match ("IN", tokid)
691 || lex_id_match ("FIRST", tokid)
692 || lex_id_match ("LAST", tokid))
697 if (mtf_tail == NULL)
699 msg (SE, _("IN, FIRST, and LAST subcommands may not occur "
700 "before the first FILE or TABLE."));
704 if (lex_match_id ("IN"))
709 else if (lex_match_id ("FIRST"))
711 name = mtf_tail->first;
714 else if (lex_match_id ("LAST"))
716 name = mtf_tail->last;
731 msg (SE, _("Multiple %s subcommands for a single FILE or "
736 strcpy (name, tokid);
739 if (!dict_create_var (mtf_master, name, 0))
741 msg (SE, _("Duplicate variable name %s while creating %s "
747 else if (lex_id_match ("RENAME", tokid)
748 || lex_id_match ("KEEP", tokid)
749 || lex_id_match ("DROP", tokid))
751 int options = GTSV_OPT_MATCH_FILES;
753 if (mtf_tail == NULL)
755 msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur "
756 "before the first FILE or TABLE."));
760 if (!trim_dictionary (mtf_tail->dict, &options))
763 else if (lex_match_id ("MAP"))
773 while (token != '.');
779 msg (SE, _("The BY subcommand is required when a TABLE subcommand "
787 struct mtf_file *iter;
789 for (iter = mtf_head; iter; iter = iter->next)
793 iter->by = xmalloc (sizeof *iter->by * mtf_n_by);
795 for (i = 0; i < mtf_n_by; i++)
797 iter->by[i] = dict_lookup_var (iter->dict, mtf_by[i]->name);
798 if (iter->by[i] == NULL)
800 msg (SE, _("File %s lacks BY variable %s."),
801 iter->handle ? fh_handle_name (iter->handle) : "*",
811 /* From sfm-read.c. */
812 extern void dump_dictionary (struct dictionary *);
814 dump_dictionary (mtf_master);
818 /* MATCH FILES performs an n-way merge on all its input files.
821 1. Read one input record from every input FILE.
823 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
825 3. Find the FILE input record with minimum BY values. Store all
826 the values from this input record into the output record.
828 4. Find all the FILE input records with BY values identical to
829 the minimums. Store all the values from these input records into
832 5. For every TABLE, read another record as long as the BY values
833 on the TABLE's input record are less than the FILEs' BY values.
834 If an exact match is found, store all the values from the TABLE
835 input record into the output record.
837 6. Write the output record.
839 7. Read another record from each input file FILE and TABLE that
840 we stored values from above. If we come to the end of one of the
841 input files, remove it from the list of input files.
843 8. Repeat from step 2.
845 Unfortunately, this algorithm can't be directly implemented
846 because there's no function to read a record from the active
847 file; instead, it has to be done using callbacks.
849 FIXME: For merging large numbers of files (more than 10?) a
850 better algorithm would use a heap for finding minimum
851 values, or replacement selection, as described by Knuth in
852 _Art of Computer Programming, Vol. 3_. The SORT CASES
853 procedure does this, and perhaps some of its code could be
857 discard_variables ();
860 temp_dict = mtf_master;
863 mtf_seq_nums = xmalloc (dict_get_var_cnt (mtf_master)
864 * sizeof *mtf_seq_nums);
865 memset (mtf_seq_nums, 0,
866 dict_get_var_cnt (mtf_master) * sizeof *mtf_seq_nums);
868 process_active_file (mtf_read_nonactive_records, mtf_processing,
869 mtf_processing_finish, NULL);
880 /* Repeats 2...8 an arbitrary number of times. */
882 mtf_processing_finish (void *aux UNUSED)
884 /* Find the active file and delete it. */
886 struct mtf_file *iter;
888 for (iter = mtf_head; iter; iter = iter->next)
889 if (iter->handle == NULL)
891 mtf_delete_file_in_place (&iter);
896 while (mtf_head && mtf_head->type == MTF_FILE)
897 if (!mtf_processing (temp_case, NULL))
901 /* Return a string in a static buffer describing V's variable type and
904 var_type_description (struct variable *v)
906 static char buf[2][32];
913 if (v->type == NUMERIC)
914 strcpy (s, "numeric");
917 assert (v->type == ALPHA);
918 sprintf (s, "string with width %d", v->width);
923 /* Free FILE and associated data. */
925 mtf_free_file (struct mtf_file *file)
927 fh_close_handle (file->handle);
928 if (file->dict != NULL && file->dict != default_dict)
929 dict_destroy (file->dict);
936 /* Free all the data for the MATCH FILES procedure. */
940 struct mtf_file *iter, *next;
942 for (iter = mtf_head; iter; iter = next)
946 mtf_free_file (iter);
951 dict_destroy (mtf_master);
955 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
956 file in the chain, or to NULL if was the last in the chain. */
958 mtf_delete_file_in_place (struct mtf_file **file)
960 struct mtf_file *f = *file;
963 f->prev->next = f->next;
965 f->next->prev = f->prev;
975 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
977 struct variable *v = dict_get_var (f->dict, i);
979 if (v->type == NUMERIC)
980 compaction_case->data[v->p.mtf.master->fv].f = SYSMIS;
982 memset (compaction_case->data[v->p.mtf.master->fv].s, ' ',
990 /* Read a record from every input file except the active file. */
992 mtf_read_nonactive_records (void *aux UNUSED)
994 struct mtf_file *iter;
996 for (iter = mtf_head; iter; )
1000 assert (iter->input == NULL);
1001 iter->input = xmalloc (dict_get_case_size (iter->dict));
1003 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1004 mtf_delete_file_in_place (&iter);
1010 iter->input = temp_case->data;
1016 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1017 if A == B, 1 if A > B. */
1019 mtf_compare_BY_values (struct mtf_file *a, struct mtf_file *b)
1023 for (i = 0; i < mtf_n_by; i++)
1025 assert (a->by[i]->type == b->by[i]->type);
1026 assert (a->by[i]->width == b->by[i]->width);
1028 if (a->by[i]->type == NUMERIC)
1030 double af = a->input[a->by[i]->fv].f;
1031 double bf = b->input[b->by[i]->fv].f;
1042 assert (a->by[i]->type == ALPHA);
1043 result = memcmp (a->input[a->by[i]->fv].s,
1044 b->input[b->by[i]->fv].s,
1048 else if (result > 0)
1055 /* Perform one iteration of steps 3...7 above. */
1057 mtf_processing (struct ccase *c UNUSED, void *aux UNUSED)
1059 /* List of files with minimum BY values. */
1060 struct mtf_file *min_head, *min_tail;
1062 /* List of files with non-minimum BY values. */
1063 struct mtf_file *max_head, *max_tail;
1066 struct mtf_file *iter;
1070 /* If the active file doesn't have the minimum BY values, don't
1071 return because that would cause a record to be skipped. */
1074 if (mtf_head->type == MTF_TABLE)
1077 /* 3. Find the FILE input record with minimum BY values. Store
1078 all the values from this input record into the output record.
1080 4. Find all the FILE input records with BY values identical
1081 to the minimums. Store all the values from these input
1082 records into the output record. */
1083 min_head = min_tail = mtf_head;
1084 max_head = max_tail = NULL;
1085 for (iter = mtf_head->next; iter && iter->type == MTF_FILE;
1087 switch (mtf_compare_BY_values (min_head, iter))
1091 max_tail = max_tail->next_min = iter;
1093 max_head = max_tail = iter;
1097 min_tail = min_tail->next_min = iter;
1103 max_tail->next_min = min_head;
1104 max_tail = min_tail;
1108 max_head = min_head;
1109 max_tail = min_tail;
1111 min_head = min_tail = iter;
1118 /* 5. For every TABLE, read another record as long as the BY
1119 values on the TABLE's input record are less than the FILEs'
1120 BY values. If an exact match is found, store all the values
1121 from the TABLE input record into the output record. */
1124 struct mtf_file *next = iter->next;
1126 assert (iter->type == MTF_TABLE);
1128 if (iter->handle == NULL)
1132 switch (mtf_compare_BY_values (min_head, iter))
1136 max_tail = max_tail->next_min = iter;
1138 max_head = max_tail = iter;
1142 min_tail = min_tail->next_min = iter;
1146 if (iter->handle == NULL)
1148 if (sfm_read_case (iter->handle, iter->input, iter->dict))
1150 mtf_delete_file_in_place (&iter);
1160 /* Next sequence number. */
1163 /* Store data to all the records we are using. */
1165 min_tail->next_min = NULL;
1166 for (iter = min_head; iter; iter = iter->next_min)
1170 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1172 struct variable *v = dict_get_var (iter->dict, i);
1174 if (mtf_seq_nums[v->p.mtf.master->index] == mtf_seq_num)
1176 mtf_seq_nums[v->p.mtf.master->index] = mtf_seq_num;
1179 printf ("%s/%s: dest-fv=%d, src-fv=%d\n",
1180 fh_handle_name (iter->handle),
1182 v->p.mtf.master->fv, v->fv);
1184 if (v->type == NUMERIC)
1185 compaction_case->data[v->p.mtf.master->fv].f
1186 = iter->input[v->fv].f;
1189 assert (v->type == ALPHA);
1190 memcpy (compaction_case->data[v->p.mtf.master->fv].s,
1191 iter->input[v->fv].s, v->width);
1196 /* Store missing values to all the records we're not using. */
1198 max_tail->next_min = NULL;
1199 for (iter = max_head; iter; iter = iter->next_min)
1203 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1205 struct variable *v = dict_get_var (iter->dict, i);
1207 if (mtf_seq_nums[v->p.mtf.master->index] == mtf_seq_num)
1209 mtf_seq_nums[v->p.mtf.master->index] = mtf_seq_num;
1212 printf ("%s/%s: dest-fv=%d\n",
1213 fh_handle_name (iter->handle),
1215 v->p.mtf.master->fv);
1217 if (v->type == NUMERIC)
1218 compaction_case->data[v->p.mtf.master->fv].f = SYSMIS;
1220 memset (compaction_case->data[v->p.mtf.master->fv].s, ' ',
1224 if (iter->handle == NULL)
1228 /* 6. Write the output record. */
1229 process_active_file_output_case ();
1231 /* 7. Read another record from each input file FILE and TABLE
1232 that we stored values from above. If we come to the end of
1233 one of the input files, remove it from the list of input
1235 for (iter = min_head; iter && iter->type == MTF_FILE; )
1237 struct mtf_file *next = iter->next_min;
1241 assert (iter->input != NULL);
1243 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1244 mtf_delete_file_in_place (&iter);
1254 return (mtf_head && mtf_head->type != MTF_TABLE);
1257 /* Merge the dictionary for file F into the master dictionary
1260 mtf_merge_dictionary (struct mtf_file *f)
1262 struct dictionary *const m = mtf_master;
1263 struct dictionary *d = f->dict;
1264 const char *d_docs, *m_docs;
1266 if (dict_get_label (m) == NULL)
1267 dict_set_label (m, dict_get_label (d));
1269 d_docs = dict_get_documents (d);
1270 m_docs = dict_get_documents (m);
1274 dict_set_documents (m, d_docs);
1280 new_len = strlen (m_docs) + strlen (d_docs);
1281 new_docs = xmalloc (new_len + 1);
1282 strcpy (new_docs, m_docs);
1283 strcat (new_docs, d_docs);
1284 dict_set_documents (m, new_docs);
1289 dict_compact_values (d);
1294 for (i = 0; i < dict_get_var_cnt (d); i++)
1296 struct variable *dv = dict_get_var (d, i);
1297 struct variable *mv = dict_lookup_var (m, dv->name);
1299 assert (dv->type == ALPHA || dv->width == 0);
1300 assert (!mv || mv->type == ALPHA || mv->width == 0);
1301 if (mv && dv->width == mv->width)
1303 if (val_labs_count (dv->val_labs)
1304 && !val_labs_count (mv->val_labs))
1305 mv->val_labs = val_labs_copy (dv->val_labs);
1306 if (dv->miss_type != MISSING_NONE
1307 && mv->miss_type == MISSING_NONE)
1308 copy_missing_values (mv, dv);
1310 if (mv && dv->label && !mv->label)
1311 mv->label = xstrdup (dv->label);
1314 mv = dict_clone_var (m, dv, dv->name);
1315 assert (mv != NULL);
1317 else if (mv->width != dv->width)
1319 msg (SE, _("Variable %s in file %s (%s) has different "
1320 "type or width from the same variable in "
1321 "earlier file (%s)."),
1322 dv->name, fh_handle_name (f->handle),
1323 var_type_description (dv), var_type_description (mv));
1326 dv->p.mtf.master = mv;
1333 /* IMPORT command. */
1335 /* Parses the IMPORT command. */
1339 struct file_handle *handle = NULL;
1340 struct dictionary *dict;
1341 int options = GTSV_OPT_NONE;
1344 lex_match_id ("IMPORT");
1350 if (lex_match_id ("FILE") || token == T_STRING)
1354 handle = fh_parse_file_handle ();
1358 else if (lex_match_id ("TYPE"))
1362 if (lex_match_id ("COMM"))
1364 else if (lex_match_id ("TAPE"))
1368 lex_error (_("expecting COMM or TAPE"));
1374 if (!lex_match ('/') && token != '.')
1380 discard_variables ();
1382 dict = pfm_read_dictionary (handle, NULL);
1387 dump_dict_variables (dict);
1389 if (0 == trim_dictionary (dict, &options))
1391 fh_close_handle (handle);
1395 dump_dict_variables (dict);
1398 dict_compact_values (dict);
1401 printf (_("IMPORT translation table from file to memory:\n"));
1402 for (i = 0; i < dict->nvar; i++)
1404 struct variable *v = dict->var[i];
1406 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
1407 v->get.fv, v->get.nv, v->fv, v->nv);
1411 dict_destroy (default_dict);
1412 default_dict = dict;
1414 vfm_source = create_case_source (&import_source_class, handle);
1419 /* Reads all the cases from the data file and passes them to
1422 import_source_read (struct case_source *source,
1423 write_case_func *write_case, write_case_data wc_data)
1425 struct file_handle *handle = source->aux;
1426 while (pfm_read_case (handle, temp_case->data, default_dict))
1427 if (!write_case (wc_data))
1431 const struct case_source_class import_source_class =
1439 static int export_write_case_func (struct ccase *c, void *);
1441 /* Parses the EXPORT command. */
1442 /* FIXME: same as cmd_save_internal(). */
1446 struct file_handle *handle;
1447 struct dictionary *dict;
1448 int options = GTSV_OPT_SAVE;
1450 struct save_trns *t;
1454 lex_match_id ("EXPORT");
1457 if (lex_match_id ("OUTFILE"))
1460 handle = fh_parse_file_handle ();
1464 dict = dict_clone (default_dict);
1466 dump_dict_variables (dict);
1468 for (i = 0; i < dict_get_var_cnt (dict); i++)
1469 dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
1470 if (0 == trim_dictionary (dict, &options))
1472 fh_close_handle (handle);
1477 dump_dict_variables (dict);
1480 /* Write dictionary. */
1481 if (!pfm_write_dictionary (handle, dict))
1483 dict_destroy (dict);
1484 fh_close_handle (handle);
1488 /* Fill in transformation structure. */
1489 t = xmalloc (sizeof *t);
1490 t->h.proc = save_trns_proc;
1491 t->h.free = save_trns_free;
1493 t->nvar = dict_get_var_cnt (dict);
1494 t->var = xmalloc (sizeof *t->var * t->nvar);
1495 for (i = 0; i < t->nvar; i++)
1496 t->var[i] = dict_get_var (dict, i)->aux;
1497 t->case_buf = xmalloc (sizeof *t->case_buf * t->nvar);
1498 dict_destroy (dict);
1500 procedure (NULL, export_write_case_func, NULL, t);
1501 save_trns_free (&t->h);
1507 export_write_case_func (struct ccase *c, void *aux)
1509 struct save_trns *t = aux;
1510 union value *p = (union value *) t->case_buf;
1513 for (i = 0; i < t->nvar; i++)
1515 struct variable *v = t->var[i];
1517 if (v->type == NUMERIC)
1518 *p++ = c->data[v->fv];
1520 (*p++).c = c->data[v->fv].s;
1523 pfm_write_case (t->f, (union value *) t->case_buf);