1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27 #include "file-handle.h"
39 /*#define DEBUGGING 1*/
40 #include "debug-print.h"
42 /* XSAVE transformation (and related SAVE, EXPORT procedures). */
46 struct file_handle *f; /* Associated system file. */
47 int nvar; /* Number of variables. */
48 int *var; /* Indices of variables. */
49 flt64 *case_buf; /* Case transfer buffer. */
52 /* Options bits set by trim_dictionary(). */
53 #define GTSV_OPT_COMPRESSED 001 /* Compression; (X)SAVE only. */
54 #define GTSV_OPT_SAVE 002 /* The SAVE/XSAVE/EXPORT procedures. */
55 #define GTSV_OPT_MATCH_FILES 004 /* The MATCH FILES procedure. */
56 #define GTSV_OPT_NONE 0
58 /* The file being read by the input program. */
59 static struct file_handle *get_file;
61 /* The transformation being used by the SAVE procedure. */
62 static struct save_trns *trns;
64 static int trim_dictionary (struct dictionary * dict, int *options);
65 static int save_write_case_func (struct ccase *);
66 static int save_trns_proc (struct trns_header *, struct ccase *);
67 static void save_trns_free (struct trns_header *);
70 void dump_dict_variables (struct dictionary *);
73 /* Parses the GET command. */
77 struct file_handle *handle;
78 struct dictionary *dict;
79 int options = GTSV_OPT_NONE;
88 if (lex_match_id ("FILE"))
91 handle = fh_parse_file_handle ();
95 dict = sfm_read_dictionary (handle, NULL);
100 dump_dict_variables (dict);
102 if (0 == trim_dictionary (dict, &options))
104 fh_close_handle (handle);
108 dump_dict_variables (dict);
111 /* Set the fv and lv elements of all variables remaining in the
114 for (i = 0; i < dict->nvar; i++)
116 struct variable *v = dict->var[i];
125 printf (_("GET translation table from file to memory:\n"));
126 for (i = 0; i < dict->nvar; i++)
128 struct variable *v = dict->var[i];
130 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
131 v->get.fv, v->get.nv, v->fv, v->nv);
135 restore_dictionary (dict);
137 vfm_source = &get_source;
143 /* Parses the SAVE (for X==0) and XSAVE (for X==1) commands. */
144 /* FIXME: save_dictionary() is too expensive. It would make more
145 sense to copy just the first few fields of each variables (up to
146 `foo'): that's a SMOP. */
148 cmd_save_internal (int x)
150 struct file_handle *handle;
151 struct dictionary *dict;
152 int options = GTSV_OPT_SAVE;
155 struct sfm_write_info inf;
159 lex_match_id ("SAVE");
162 if (lex_match_id ("OUTFILE"))
165 handle = fh_parse_file_handle ();
169 dict = save_dictionary ();
171 dump_dict_variables (dict);
173 for (i = 0; i < dict->nvar; i++)
174 dict->var[i]->foo = i;
175 if (0 == trim_dictionary (dict, &options))
177 fh_close_handle (handle);
182 dump_dict_variables (dict);
185 /* Write dictionary. */
188 inf.compress = !!(options & GTSV_OPT_COMPRESSED);
189 if (!sfm_write_dictionary (&inf))
191 free_dictionary (dict);
192 fh_close_handle (handle);
196 /* Fill in transformation structure. */
197 t = trns = xmalloc (sizeof *t);
198 t->h.proc = save_trns_proc;
199 t->h.free = save_trns_free;
201 t->nvar = dict->nvar;
202 t->var = xmalloc (sizeof *t->var * dict->nvar);
203 for (i = 0; i < dict->nvar; i++)
204 t->var[i] = dict->var[i]->foo;
205 t->case_buf = xmalloc (sizeof *t->case_buf * inf.case_size);
206 free_dictionary (dict);
211 procedure (NULL, save_write_case_func, NULL);
212 save_trns_free ((struct trns_header *) t);
216 add_transformation ((struct trns_header *) t);
221 /* Parses and performs the SAVE procedure. */
225 return cmd_save_internal (0);
228 /* Parses the XSAVE transformation command. */
232 return cmd_save_internal (1);
236 save_write_case_func (struct ccase * c)
238 save_trns_proc ((struct trns_header *) trns, c);
243 save_trns_proc (struct trns_header * t unused, struct ccase * c)
245 flt64 *p = trns->case_buf;
248 for (i = 0; i < trns->nvar; i++)
250 struct variable *v = default_dict.var[trns->var[i]];
251 if (v->type == NUMERIC)
253 double src = c->data[v->fv].f;
261 memcpy (p, c->data[v->fv].s, v->width);
262 memset (&((char *) p)[v->width], ' ',
263 REM_RND_UP (v->width, sizeof *p));
264 p += DIV_RND_UP (v->width, sizeof *p);
268 sfm_write_case (trns->f, trns->case_buf, p - trns->case_buf);
273 save_trns_free (struct trns_header *pt)
275 struct save_trns *t = (struct save_trns *) pt;
277 fh_close_handle (t->f);
283 /* Deletes NV variables from DICT, starting at index FIRST. The
284 variables must have consecutive indices. The variables are cleared
287 dict_delete_run (struct dictionary *dict, int first, int nv)
291 for (i = first; i < first + nv; i++)
293 clear_variable (dict, dict->var[i]);
296 for (i = first; i < dict->nvar - nv; i++)
298 dict->var[i] = dict->var[i + nv];
299 dict->var[i]->index -= nv;
304 static int rename_variables (struct dictionary * dict);
306 /* The GET and SAVE commands have a common structure after the
307 FILE/OUTFILE subcommand. This function parses this structure and
308 returns nonzero on success, zero on failure. It both reads
309 *OPTIONS, for the GTSV_OPT_SAVE bit, and writes it, for the
310 GTSV_OPT_COMPRESSED bit. */
311 /* FIXME: IN, FIRST, LAST, MAP. */
313 trim_dictionary (struct dictionary *dict, int *options)
315 if (set_scompression)
316 *options |= GTSV_OPT_COMPRESSED;
318 if (*options & GTSV_OPT_SAVE)
322 /* Delete all the scratch variables. */
323 for (i = 0; i < dict->nvar; i++)
327 if (dict->var[i]->name[0] != '#')
330 /* Find a run of variables to be deleted. */
331 for (j = i + 1; j < dict->nvar; j++)
332 if (dict->var[j]->name[0] != '#')
335 /* Actually delete 'em. */
336 dict_delete_run (dict, i, j - i);
340 while ((*options & GTSV_OPT_MATCH_FILES) || lex_match ('/'))
342 if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("COMPRESSED"))
343 *options |= GTSV_OPT_COMPRESSED;
344 else if (!(*options & GTSV_OPT_MATCH_FILES) && lex_match_id ("UNCOMPRESSED"))
345 *options &= ~GTSV_OPT_COMPRESSED;
346 else if (lex_match_id ("DROP"))
353 if (!parse_variables (dict, &v, &nv, PV_NONE))
356 /* Loop through the variables to delete. */
361 /* Find a run of variables to be deleted. */
362 for (j = i + 1; j < nv; j++)
363 if (v[j]->index != v[j - 1]->index + 1)
366 /* Actually delete 'em. */
367 dict_delete_run (dict, v[i]->index, j - i);
371 else if (lex_match_id ("KEEP"))
377 if (!parse_variables (dict, &v, &nv, PV_NONE))
380 /* Reorder the dictionary so that the kept variables are at
385 for (i1 = 0; i1 < nv; i1++)
387 int i2 = v[i1]->index;
389 /* Swap variables with indices i1 and i2. */
390 struct variable *t = dict->var[i1];
391 dict->var[i1] = dict->var[i2];
393 dict->var[i1]->index = i1;
394 dict->var[i2]->index = i2;
400 /* Delete all but the first NV variables from the
404 for (i = nv; i < dict->nvar; i++)
406 clear_variable (dict, dict->var[i]);
410 dict->var = xrealloc (dict->var, sizeof *dict->var * nv);
413 else if (lex_match_id ("RENAME"))
415 if (!rename_variables (dict))
420 lex_error (_("while expecting a valid subcommand"));
426 msg (SE, _("All variables deleted from system file dictionary."));
430 if (*options & GTSV_OPT_MATCH_FILES)
436 lex_error (_("expecting end of command"));
443 /* Parses and performs the RENAME subcommand of GET and SAVE. */
445 rename_variables (struct dictionary * dict)
462 v = parse_dict_variable (dict);
465 if (!lex_force_match ('=')
468 if (!strncmp (tokid, v->name, 8))
470 if (is_dict_varname (dict, tokid))
472 msg (SE, _("Cannot rename %s as %s because there already exists "
473 "a variable named %s. To rename variables with "
474 "overlapping names, use a single RENAME subcommand "
475 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
476 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
480 rename_variable (dict, v, tokid);
489 while (lex_match ('('))
493 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
495 if (!lex_match ('='))
497 msg (SE, _("`=' expected after variable list."));
500 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
504 msg (SE, _("Number of variables on left side of `=' (%d) do not "
505 "match number of variables on right side (%d), in "
506 "parenthesized group %d of RENAME subcommand."),
507 nv - old_nv, nn - old_nv, group);
510 if (!lex_force_match (')'))
515 for (i = 0; i < nv; i++)
516 avl_force_delete (dict->var_by_name, v[i]);
517 for (i = 0; i < nv; i++)
519 strcpy (v[i]->name, new_names[i]);
520 if (NULL != avl_insert (dict->var_by_name, v[i]))
522 msg (SE, _("Duplicate variables name %s."), v[i]->name);
529 /* The label is a bit of a misnomer, we actually come here on any
531 for (i = 0; i < nn; i++)
541 dump_dict_variables (struct dictionary * dict)
545 printf (_("\nVariables in dictionary:\n"));
546 for (i = 0; i < dict->nvar; i++)
547 printf ("%s, ", dict->var[i]->name);
552 /* Clears internal state related to GET input procedure. */
554 get_source_destroy_source (void)
556 /* It is not necessary to destroy the dictionary because if we get
557 to this point then the dictionary is default_dict. */
558 fh_close_handle (get_file);
561 /* Reads all the cases from the data file and passes them to
564 get_source_read (void)
566 while (sfm_read_case (get_file, temp_case->data, &default_dict)
569 get_source_destroy_source ();
572 struct case_stream get_source =
578 get_source_destroy_source,
587 /*#define DEBUGGING 1*/
588 #include "debug-print.h"
593 MTF_FILE, /* Specified on FILE= subcommand. */
594 MTF_TABLE /* Specified on TABLE= subcommand. */
597 /* One of the files on MATCH FILES. */
600 struct mtf_file *next, *prev;
601 /* Next, previous in the list of files. */
602 struct mtf_file *next_min; /* Next in the chain of minimums. */
604 int type; /* One of MTF_*. */
605 struct variable **by; /* List of BY variables for this file. */
606 struct file_handle *handle; /* File handle for the file. */
607 struct dictionary *dict; /* Dictionary from system file. */
608 char in[9]; /* Name of the variable from IN=. */
609 char first[9], last[9]; /* Name of the variables from FIRST=, LAST=. */
610 union value *input; /* Input record. */
613 /* All the files mentioned on FILE= or TABLE=. */
614 static struct mtf_file *mtf_head, *mtf_tail;
616 /* Variables on the BY subcommand. */
617 static struct variable **mtf_by;
620 /* Master dictionary. */
621 static struct dictionary *mtf_master;
623 static void mtf_free (void);
624 static void mtf_free_file (struct mtf_file *file);
625 static int mtf_merge_dictionary (struct mtf_file *f);
626 static void mtf_delete_file_in_place (struct mtf_file **file);
628 static void mtf_read_nonactive_records (void);
629 static void mtf_processing_finish (void);
630 static int mtf_processing (struct ccase *);
632 static char *var_type_description (struct variable *);
634 /* Parse and execute the MATCH FILES command. */
636 cmd_match_files (void)
638 struct mtf_file *first_table = NULL;
642 lex_match_id ("MATCH");
643 lex_match_id ("FILES");
645 mtf_head = mtf_tail = NULL;
648 mtf_master = new_dictionary (0);
649 mtf_master->N = default_dict.N;
655 if (lex_match (T_BY))
659 msg (SE, _("The BY subcommand may be given once at most."));
665 if (!parse_variables (mtf_master, &mtf_by, &mtf_n_by,
666 PV_NO_DUPLICATE | PV_NO_SCRATCH))
669 else if (token != T_ID)
674 else if (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
676 struct mtf_file *file = xmalloc (sizeof *file);
678 file->in[0] = file->first[0] = file->last[0] = '\0';
683 if (lex_match_id ("FILE"))
684 file->type = MTF_FILE;
685 else if (lex_match_id ("TABLE"))
687 file->type = MTF_TABLE;
693 /* FILEs go first, then TABLEs. */
694 if (file->type == MTF_TABLE || first_table == NULL)
697 file->prev = mtf_tail;
699 mtf_tail->next = file;
701 if (mtf_head == NULL)
703 if (file->type == MTF_TABLE && first_table == NULL)
708 assert (file->type == MTF_FILE);
709 file->next = first_table;
710 file->prev = first_table->prev;
711 if (first_table->prev)
712 first_table->prev->next = file;
715 first_table->prev = file;
726 msg (SE, _("The active file may not be specified more "
732 assert (pgm_state != STATE_INPUT);
733 if (pgm_state == STATE_INIT)
735 msg (SE, _("Cannot specify the active file since no active "
736 "file has been defined."));
742 file->handle = fh_parse_file_handle ();
749 file->dict = sfm_read_dictionary (file->handle, NULL);
754 file->dict = &default_dict;
755 if (!mtf_merge_dictionary (file))
758 else if (lex_id_match ("IN", tokid)
759 || lex_id_match ("FIRST", tokid)
760 || lex_id_match ("LAST", tokid))
765 if (mtf_tail == NULL)
767 msg (SE, _("IN, FIRST, and LAST subcommands may not occur "
768 "before the first FILE or TABLE."));
772 if (lex_match_id ("IN"))
777 else if (lex_match_id ("FIRST"))
779 name = mtf_tail->first;
782 else if (lex_match_id ("LAST"))
784 name = mtf_tail->last;
799 msg (SE, _("Multiple %s subcommands for a single FILE or "
804 strcpy (name, tokid);
807 if (!create_variable (mtf_master, name, NUMERIC, 0))
809 msg (SE, _("Duplicate variable name %s while creating %s "
815 else if (lex_id_match ("RENAME", tokid)
816 || lex_id_match ("KEEP", tokid)
817 || lex_id_match ("DROP", tokid))
819 int options = GTSV_OPT_MATCH_FILES;
821 if (mtf_tail == NULL)
823 msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur "
824 "before the first FILE or TABLE."));
828 if (!trim_dictionary (mtf_tail->dict, &options))
831 else if (lex_match_id ("MAP"))
841 while (token != '.');
847 msg (SE, _("The BY subcommand is required when a TABLE subcommand "
855 struct mtf_file *iter;
857 for (iter = mtf_head; iter; iter = iter->next)
861 iter->by = xmalloc (sizeof *iter->by * mtf_n_by);
863 for (i = 0; i < mtf_n_by; i++)
865 iter->by[i] = find_dict_variable (iter->dict, mtf_by[i]->name);
866 if (iter->by[i] == NULL)
868 msg (SE, _("File %s lacks BY variable %s."),
869 iter->handle ? fh_handle_name (iter->handle) : "*",
879 /* From sfm-read.c. */
880 extern void dump_dictionary (struct dictionary *);
882 dump_dictionary (mtf_master);
886 /* MATCH FILES performs an n-way merge on all its input files.
889 1. Read one input record from every input FILE.
891 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
893 3. Find the FILE input record with minimum BY values. Store all
894 the values from this input record into the output record.
896 4. Find all the FILE input records with BY values identical to
897 the minimums. Store all the values from these input records into
900 5. For every TABLE, read another record as long as the BY values
901 on the TABLE's input record are less than the FILEs' BY values.
902 If an exact match is found, store all the values from the TABLE
903 input record into the output record.
905 6. Write the output record.
907 7. Read another record from each input file FILE and TABLE that
908 we stored values from above. If we come to the end of one of the
909 input files, remove it from the list of input files.
911 8. Repeat from step 2.
913 Unfortunately, this algorithm can't be directly implemented
914 because there's no function to read a record from the active
915 file; instead, it has to be done using callbacks.
917 FIXME: A better algorithm would use a heap for finding minimum
918 values, or replacement selection, as described by Knuth in _Art
919 of Computer Programming, Vol. 3_. The SORT CASES procedure does
920 this, and perhaps some of its code could be adapted. */
923 discard_variables ();
926 temp_dict = mtf_master;
929 process_active_file (mtf_read_nonactive_records, mtf_processing,
930 mtf_processing_finish);
941 /* Repeats 2...8 an arbitrary number of times. */
943 mtf_processing_finish (void)
945 /* Find the active file and delete it. */
947 struct mtf_file *iter;
949 for (iter = mtf_head; iter; iter = iter->next)
950 if (iter->handle == NULL)
952 mtf_delete_file_in_place (&iter);
957 while (mtf_head && mtf_head->type == MTF_FILE)
958 if (!mtf_processing (temp_case))
962 /* Return a string in a static buffer describing V's variable type and
965 var_type_description (struct variable *v)
967 static char buf[2][32];
974 if (v->type == NUMERIC)
975 strcpy (s, "numeric");
978 assert (v->type == ALPHA);
979 sprintf (s, "string with width %d", v->width);
984 /* Free FILE and associated data. */
986 mtf_free_file (struct mtf_file *file)
988 fh_close_handle (file->handle);
989 if (file->dict && file->dict != &default_dict)
990 free_dictionary (file->dict);
997 /* Free all the data for the MATCH FILES procedure. */
1001 struct mtf_file *iter, *next;
1003 for (iter = mtf_head; iter; iter = next)
1007 mtf_free_file (iter);
1012 free_dictionary (mtf_master);
1015 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1016 file in the chain, or to NULL if was the last in the chain. */
1018 mtf_delete_file_in_place (struct mtf_file **file)
1020 struct mtf_file *f = *file;
1023 f->prev->next = f->next;
1025 f->next->prev = f->prev;
1035 for (i = 0; i < f->dict->nvar; i++)
1037 struct variable *v = f->dict->var[i];
1039 if (v->type == NUMERIC)
1040 compaction_case->data[v->p.mtf.master->fv].f = SYSMIS;
1042 memset (compaction_case->data[v->p.mtf.master->fv].s, ' ',
1050 /* Read a record from every input file except the active file. */
1052 mtf_read_nonactive_records (void)
1054 struct mtf_file *iter;
1056 for (iter = mtf_head; iter; )
1060 assert (iter->input == NULL);
1061 iter->input = xmalloc (sizeof *iter->input * iter->dict->nval);
1063 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1064 mtf_delete_file_in_place (&iter);
1070 iter->input = temp_case->data;
1076 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1077 if A == B, 1 if A > B. */
1079 mtf_compare_BY_values (struct mtf_file *a, struct mtf_file *b)
1083 for (i = 0; i < mtf_n_by; i++)
1085 assert (a->by[i]->type == b->by[i]->type);
1086 assert (a->by[i]->width == b->by[i]->width);
1088 if (a->by[i]->type == NUMERIC)
1090 double af = a->input[a->by[i]->fv].f;
1091 double bf = b->input[b->by[i]->fv].f;
1102 assert (a->by[i]->type == ALPHA);
1103 result = memcmp (a->input[a->by[i]->fv].s,
1104 b->input[b->by[i]->fv].s,
1108 else if (result > 0)
1115 /* Used to determine whether we've already initialized this
1117 static int mtf_seq_no = 0;
1119 /* Perform one iteration of steps 3...7 above. */
1121 mtf_processing (struct ccase *c unused)
1123 /* List of files with minimum BY values. */
1124 struct mtf_file *min_head, *min_tail;
1126 /* List of files with non-minimum BY values. */
1127 struct mtf_file *max_head, *max_tail;
1130 struct mtf_file *iter;
1134 /* If the active file doesn't have the minimum BY values, don't
1135 return because that would cause a record to be skipped. */
1138 if (mtf_head->type == MTF_TABLE)
1141 /* 3. Find the FILE input record with minimum BY values. Store
1142 all the values from this input record into the output record.
1144 4. Find all the FILE input records with BY values identical
1145 to the minimums. Store all the values from these input
1146 records into the output record. */
1147 min_head = min_tail = mtf_head;
1148 max_head = max_tail = NULL;
1149 for (iter = mtf_head->next; iter && iter->type == MTF_FILE;
1151 switch (mtf_compare_BY_values (min_head, iter))
1155 max_tail = max_tail->next_min = iter;
1157 max_head = max_tail = iter;
1161 min_tail = min_tail->next_min = iter;
1167 max_tail->next_min = min_head;
1168 max_tail = min_tail;
1172 max_head = min_head;
1173 max_tail = min_tail;
1175 min_head = min_tail = iter;
1182 /* 5. For every TABLE, read another record as long as the BY
1183 values on the TABLE's input record are less than the FILEs'
1184 BY values. If an exact match is found, store all the values
1185 from the TABLE input record into the output record. */
1188 struct mtf_file *next = iter->next;
1190 assert (iter->type == MTF_TABLE);
1192 if (iter->handle == NULL)
1196 switch (mtf_compare_BY_values (min_head, iter))
1200 max_tail = max_tail->next_min = iter;
1202 max_head = max_tail = iter;
1206 min_tail = min_tail->next_min = iter;
1210 if (iter->handle == NULL)
1212 if (sfm_read_case (iter->handle, iter->input, iter->dict))
1214 mtf_delete_file_in_place (&iter);
1224 /* Next sequence number. */
1227 /* Store data to all the records we are using. */
1229 min_tail->next_min = NULL;
1230 for (iter = min_head; iter; iter = iter->next_min)
1234 for (i = 0; i < iter->dict->nvar; i++)
1236 struct variable *v = iter->dict->var[i];
1238 if (v->p.mtf.master->foo == mtf_seq_no)
1240 v->p.mtf.master->foo = mtf_seq_no;
1243 printf ("%s/%s: dest-fv=%d, src-fv=%d\n",
1244 fh_handle_name (iter->handle),
1246 v->p.mtf.master->fv, v->fv);
1248 if (v->type == NUMERIC)
1249 compaction_case->data[v->p.mtf.master->fv].f
1250 = iter->input[v->fv].f;
1253 assert (v->type == ALPHA);
1254 memcpy (compaction_case->data[v->p.mtf.master->fv].s,
1255 iter->input[v->fv].s, v->width);
1257 memset (&compaction_case
1258 ->data[v->p.mtf.master->fv].s[v->width],
1259 0, REM_RND_UP (v->width, MAX_SHORT_STRING));
1265 /* Store missing values to all the records we're not using. */
1267 max_tail->next_min = NULL;
1268 for (iter = max_head; iter; iter = iter->next_min)
1272 for (i = 0; i < iter->dict->nvar; i++)
1274 struct variable *v = iter->dict->var[i];
1276 if (v->p.mtf.master->foo == mtf_seq_no)
1278 v->p.mtf.master->foo = mtf_seq_no;
1281 printf ("%s/%s: dest-fv=%d\n",
1282 fh_handle_name (iter->handle),
1284 v->p.mtf.master->fv);
1286 if (v->type == NUMERIC)
1287 compaction_case->data[v->p.mtf.master->fv].f = SYSMIS;
1290 memset (compaction_case->data[v->p.mtf.master->fv].s, ' ',
1293 memset (&compaction_case
1294 ->data[v->p.mtf.master->fv].s[v->width],
1295 0, REM_RND_UP (v->width, MAX_SHORT_STRING));
1300 if (iter->handle == NULL)
1304 /* 6. Write the output record. */
1305 process_active_file_output_case ();
1307 /* 7. Read another record from each input file FILE and TABLE
1308 that we stored values from above. If we come to the end of
1309 one of the input files, remove it from the list of input
1311 for (iter = min_head; iter && iter->type == MTF_FILE; )
1313 struct mtf_file *next = iter->next_min;
1317 assert (iter->input != NULL);
1319 if (!sfm_read_case (iter->handle, iter->input, iter->dict))
1320 mtf_delete_file_in_place (&iter);
1330 return (mtf_head && mtf_head->type != MTF_TABLE);
1333 /* Merge the dictionary for file F into the master dictionary
1336 mtf_merge_dictionary (struct mtf_file *f)
1338 struct dictionary *const m = mtf_master;
1339 struct dictionary *d = f->dict;
1341 if (d->label && m->label == NULL)
1342 m->label = xstrdup (d->label);
1346 m->documents = xrealloc (m->documents,
1347 80 * (m->n_documents + d->n_documents));
1348 memcpy (&m->documents[80 * m->n_documents],
1349 d->documents, 80 * d->n_documents);
1350 m->n_documents += d->n_documents;
1357 for (i = 0; i < d->nvar; i++)
1359 struct variable *dv = d->var[i];
1360 struct variable *mv = find_dict_variable (m, dv->name);
1365 assert (dv->type == ALPHA || dv->width == 0);
1366 assert (!mv || mv->type == ALPHA || mv->width == 0);
1367 if (mv && dv->width == mv->width)
1369 if (dv->val_lab && !mv->val_lab)
1370 mv->val_lab = copy_value_labels (dv->val_lab);
1371 if (dv->miss_type != MISSING_NONE && mv->miss_type == MISSING_NONE)
1372 copy_missing_values (mv, dv);
1374 if (mv && dv->label && !mv->label)
1375 mv->label = xstrdup (dv->label);
1378 mv = force_dup_variable (m, dv, dv->name);
1380 /* Used to make sure we initialize each variable in the
1381 master dictionary exactly once per case. */
1382 mv->foo = mtf_seq_no;
1384 else if (mv->width != dv->width)
1386 msg (SE, _("Variable %s in file %s (%s) has different "
1387 "type or width from the same variable in "
1388 "earlier file (%s)."),
1389 dv->name, fh_handle_name (f->handle),
1390 var_type_description (dv), var_type_description (mv));
1393 dv->p.mtf.master = mv;
1400 /* IMPORT command. */
1402 /* Parses the IMPORT command. */
1406 struct file_handle *handle = NULL;
1407 struct dictionary *dict;
1408 int options = GTSV_OPT_NONE;
1414 lex_match_id ("IMPORT");
1420 if (lex_match_id ("FILE") || token == T_STRING)
1424 handle = fh_parse_file_handle ();
1428 else if (lex_match_id ("TYPE"))
1432 if (lex_match_id ("COMM"))
1434 else if (lex_match_id ("TAPE"))
1438 lex_error (_("expecting COMM or TAPE"));
1444 if (!lex_match ('/') && token != '.')
1450 discard_variables ();
1452 dict = pfm_read_dictionary (handle, NULL);
1457 dump_dict_variables (dict);
1459 if (0 == trim_dictionary (dict, &options))
1461 fh_close_handle (handle);
1465 dump_dict_variables (dict);
1468 /* Set the fv and lv elements of all variables remaining in the
1471 for (i = 0; i < dict->nvar; i++)
1473 struct variable *v = dict->var[i];
1482 printf (_("IMPORT translation table from file to memory:\n"));
1483 for (i = 0; i < dict->nvar; i++)
1485 struct variable *v = dict->var[i];
1487 printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
1488 v->get.fv, v->get.nv, v->fv, v->nv);
1492 restore_dictionary (dict);
1494 vfm_source = &import_source;
1500 /* Reads all the cases from the data file and passes them to
1503 import_source_read (void)
1505 while (pfm_read_case (get_file, temp_case->data, &default_dict)
1508 get_source_destroy_source ();
1511 struct case_stream import_source =
1517 get_source_destroy_source,
1522 static int export_write_case_func (struct ccase *c);
1524 /* Parses the EXPORT command. */
1525 /* FIXME: same as cmd_save_internal(). */
1529 struct file_handle *handle;
1530 struct dictionary *dict;
1531 int options = GTSV_OPT_SAVE;
1533 struct save_trns *t;
1537 lex_match_id ("EXPORT");
1540 if (lex_match_id ("OUTFILE"))
1543 handle = fh_parse_file_handle ();
1547 dict = save_dictionary ();
1549 dump_dict_variables (dict);
1551 for (i = 0; i < dict->nvar; i++)
1552 dict->var[i]->foo = i;
1553 if (0 == trim_dictionary (dict, &options))
1555 fh_close_handle (handle);
1560 dump_dict_variables (dict);
1563 /* Write dictionary. */
1564 if (!pfm_write_dictionary (handle, dict))
1566 free_dictionary (dict);
1567 fh_close_handle (handle);
1571 /* Fill in transformation structure. */
1572 t = trns = xmalloc (sizeof *t);
1573 t->h.proc = save_trns_proc;
1574 t->h.free = save_trns_free;
1576 t->nvar = dict->nvar;
1577 t->var = xmalloc (sizeof *t->var * dict->nvar);
1578 for (i = 0; i < dict->nvar; i++)
1579 t->var[i] = dict->var[i]->foo;
1580 t->case_buf = xmalloc (sizeof *t->case_buf * dict->nvar);
1581 free_dictionary (dict);
1583 procedure (NULL, export_write_case_func, NULL);
1584 save_trns_free ((struct trns_header *) t);
1590 export_write_case_func (struct ccase *c)
1592 union value *p = (union value *) trns->case_buf;
1595 for (i = 0; i < trns->nvar; i++)
1597 struct variable *v = default_dict.var[trns->var[i]];
1599 if (v->type == NUMERIC)
1600 *p++ = c->data[v->fv];
1602 (*p++).c = c->data[v->fv].s;
1608 pfm_write_case (trns->f, (union value *) trns->case_buf);