1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26 #include "dictionary.h"
28 #include "file-handle.h"
33 #include "pfm-write.h"
36 #include "sfm-write.h"
38 #include "value-labels.h"
43 #include "debug-print.h"
45 /* Rearranging and reducing a dictionary. */
46 static void start_case_map (struct dictionary *);
47 static struct case_map *finish_case_map (struct dictionary *);
48 static void map_case (const struct case_map *,
49 const struct ccase *, struct ccase *);
50 static void destroy_case_map (struct case_map *);
55 OP_READ, /* GET or IMPORT. */
56 OP_SAVE, /* SAVE or XSAVE. */
57 OP_EXPORT /* EXPORT. */
60 static bool trim_dictionary (struct dictionary *,
61 enum operation, int *compress);
63 /* GET input program. */
66 struct sfm_reader *reader; /* System file reader. */
67 struct case_map *map; /* Map from system file to active file dict. */
68 struct ccase bounce; /* Bounce buffer. */
71 static void get_pgm_free (struct get_pgm *);
73 /* Parses the GET command. */
77 struct get_pgm *pgm = NULL;
78 struct file_handle *fh;
79 struct dictionary *dict = NULL;
81 pgm = xmalloc (sizeof *pgm);
84 case_nullify (&pgm->bounce);
89 if (lex_match_id ("FILE"))
95 pgm->reader = sfm_open_reader (fh, &dict, NULL);
96 if (pgm->reader == NULL)
98 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
100 start_case_map (dict);
101 if (!trim_dictionary (dict, OP_READ, NULL))
103 pgm->map = finish_case_map (dict);
105 dict_destroy (default_dict);
108 vfm_source = create_case_source (&get_source_class, pgm);
119 /* Frees a struct get_pgm. */
121 get_pgm_free (struct get_pgm *pgm)
125 sfm_close_reader (pgm->reader);
126 destroy_case_map (pgm->map);
127 case_destroy (&pgm->bounce);
132 /* Clears internal state related to GET input procedure. */
134 get_source_destroy (struct case_source *source)
136 struct get_pgm *pgm = source->aux;
140 /* Reads all the cases from the data file into C and passes them
141 to WRITE_CASE one by one, passing WC_DATA. */
143 get_source_read (struct case_source *source,
145 write_case_func *write_case, write_case_data wc_data)
147 struct get_pgm *pgm = source->aux;
152 if (pgm->map == NULL)
153 ok = sfm_read_case (pgm->reader, c);
156 ok = sfm_read_case (pgm->reader, &pgm->bounce);
158 map_case (pgm->map, &pgm->bounce, c);
162 ok = write_case (wc_data);
167 const struct case_source_class get_source_class =
175 /* XSAVE transformation and SAVE procedure. */
178 struct trns_header h;
179 struct sfm_writer *writer; /* System file writer. */
180 struct case_map *map; /* Map from active file to system file dict. */
181 struct ccase bounce; /* Bounce buffer. */
184 static int save_write_case_func (struct ccase *, void *);
185 static trns_proc_func save_trns_proc;
186 static trns_free_func save_trns_free;
188 /* Parses the SAVE or XSAVE command
189 and returns the parsed transformation. */
190 static struct save_trns *
191 cmd_save_internal (void)
193 struct file_handle *fh = NULL;
194 struct dictionary *dict = NULL;
195 struct save_trns *t = NULL;
196 int compress = get_scompression ();
197 const int default_version = 3;
198 int version = default_version;
199 short no_name_table = 0;
201 t = xmalloc (sizeof *t);
202 t->h.proc = save_trns_proc;
203 t->h.free = save_trns_free;
206 case_nullify (&t->bounce);
209 /* Read most of the subcommands. */
212 if (lex_match_id ("VERSION"))
215 if ( lex_force_num() )
220 if ( 0 == strncasecmp (tokid,"x", 1) )
228 else if (lex_match_id ("OUTFILE"))
237 if ( ! lex_match('/') )
244 lex_error (_("expecting end of command"));
250 msg ( ME, _("The required %s subcommand was not present"), "OUTFILE");
254 if ( version != default_version )
256 msg (MW, _("Unsupported sysfile version: %d. Using version %d instead."),
257 version, default_version);
259 version = default_version;
262 dict = dict_clone (default_dict);
263 start_case_map (dict);
264 if (!trim_dictionary (dict, OP_SAVE, &compress))
266 t->map = finish_case_map (dict);
268 case_create (&t->bounce, dict_get_next_value_idx (dict));
270 t->writer = sfm_open_writer (fh, dict, compress, no_name_table);
271 if (t->writer == NULL)
281 save_trns_free (&t->h);
285 /* Parses and performs the SAVE procedure. */
289 struct save_trns *t = cmd_save_internal ();
292 procedure (save_write_case_func, t);
293 save_trns_free (&t->h);
301 /* Parses the XSAVE transformation command. */
305 struct save_trns *t = cmd_save_internal ();
308 add_transformation (&t->h);
315 /* Writes the given C to the file specified by T. */
317 do_write_case (struct save_trns *t, struct ccase *c)
320 sfm_write_case (t->writer, c);
323 map_case (t->map, c, &t->bounce);
324 sfm_write_case (t->writer, &t->bounce);
328 /* Writes case C to the system file specified on SAVE. */
330 save_write_case_func (struct ccase *c, void *aux UNUSED)
332 do_write_case (aux, c);
336 /* Writes case C to the system file specified on XSAVE. */
338 save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
340 struct save_trns *t = (struct save_trns *) h;
341 do_write_case (t, c);
345 /* Frees a SAVE transformation. */
347 save_trns_free (struct trns_header *t_)
349 struct save_trns *t = (struct save_trns *) t_;
353 sfm_close_writer (t->writer);
354 destroy_case_map (t->map);
355 case_destroy (&t->bounce);
359 static int rename_variables (struct dictionary *dict);
361 /* Commands that read and write system files share a great deal
362 of common syntactic structure for rearranging and dropping
363 variables. This function parses this syntax and modifies DICT
366 OP is the operation being performed. For operations that
367 write a system file, *COMPRESS is set to 1 if the system file
368 should be compressed, 0 otherwise.
370 Returns true on success, false on failure. */
372 trim_dictionary (struct dictionary *dict, enum operation op, int *compress)
374 assert ((compress != NULL) == (op == OP_SAVE));
375 if (get_scompression())
378 if (op == OP_SAVE || op == OP_EXPORT)
380 /* Delete all the scratch variables. */
385 v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
387 for (i = 0; i < dict_get_var_cnt (dict); i++)
388 if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
389 v[nv++] = dict_get_var (dict, i);
390 dict_delete_vars (dict, v, nv);
394 while (lex_match ('/'))
396 if (op == OP_SAVE && lex_match_id ("COMPRESSED"))
398 else if (op == OP_SAVE && lex_match_id ("UNCOMPRESSED"))
400 else if (lex_match_id ("DROP"))
406 if (!parse_variables (dict, &v, &nv, PV_NONE))
408 dict_delete_vars (dict, v, nv);
411 else if (lex_match_id ("KEEP"))
418 if (!parse_variables (dict, &v, &nv, PV_NONE))
421 /* Move the specified variables to the beginning. */
422 dict_reorder_vars (dict, v, nv);
424 /* Delete the remaining variables. */
425 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
426 for (i = nv; i < dict_get_var_cnt (dict); i++)
427 v[i - nv] = dict_get_var (dict, i);
428 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
431 else if (lex_match_id ("RENAME"))
433 if (!rename_variables (dict))
438 lex_error (_("while expecting a valid subcommand"));
442 if (dict_get_var_cnt (dict) == 0)
444 msg (SE, _("All variables deleted from system file dictionary."));
449 if (!lex_end_of_command ())
452 dict_compact_values (dict);
456 /* Parses and performs the RENAME subcommand of GET and SAVE. */
458 rename_variables (struct dictionary *dict)
476 v = parse_dict_variable (dict);
479 if (!lex_force_match ('=')
482 if (!strncmp (tokid, v->name, SHORT_NAME_LEN))
484 if (dict_lookup_var (dict, tokid) != NULL)
486 msg (SE, _("Cannot rename %s as %s because there already exists "
487 "a variable named %s. To rename variables with "
488 "overlapping names, use a single RENAME subcommand "
489 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
490 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
494 dict_rename_var (dict, v, tokid);
503 while (lex_match ('('))
507 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
509 if (!lex_match ('='))
511 msg (SE, _("`=' expected after variable list."));
514 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
518 msg (SE, _("Number of variables on left side of `=' (%d) does not "
519 "match number of variables on right side (%d), in "
520 "parenthesized group %d of RENAME subcommand."),
521 nv - old_nv, nn - old_nv, group);
524 if (!lex_force_match (')'))
529 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
531 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
537 for (i = 0; i < nn; i++)
545 /* EXPORT procedure. */
548 struct pfm_writer *writer; /* System file writer. */
549 struct case_map *map; /* Map from active file to system file dict. */
550 struct ccase bounce; /* Bounce buffer. */
553 static int export_write_case_func (struct ccase *, void *);
554 static void export_proc_free (struct export_proc *);
556 /* Parses the EXPORT command. */
557 /* FIXME: same as cmd_save_internal(). */
561 struct file_handle *fh;
562 struct dictionary *dict;
563 struct export_proc *proc;
565 proc = xmalloc (sizeof *proc);
568 case_nullify (&proc->bounce);
571 if (lex_match_id ("OUTFILE"))
577 dict = dict_clone (default_dict);
578 start_case_map (dict);
579 if (!trim_dictionary (dict, OP_EXPORT, NULL))
581 proc->map = finish_case_map (dict);
582 if (proc->map != NULL)
583 case_create (&proc->bounce, dict_get_next_value_idx (dict));
585 proc->writer = pfm_open_writer (fh, dict);
586 if (proc->writer == NULL)
591 procedure (export_write_case_func, proc);
592 export_proc_free (proc);
599 export_proc_free (proc);
604 /* Writes case C to the EXPORT file. */
606 export_write_case_func (struct ccase *c, void *aux)
608 struct export_proc *proc = aux;
609 if (proc->map == NULL)
610 pfm_write_case (proc->writer, c);
613 map_case (proc->map, c, &proc->bounce);
614 pfm_write_case (proc->writer, &proc->bounce);
620 export_proc_free (struct export_proc *proc)
624 pfm_close_writer (proc->writer);
625 destroy_case_map (proc->map);
626 case_destroy (&proc->bounce);
632 #include "debug-print.h"
637 MTF_FILE, /* Specified on FILE= subcommand. */
638 MTF_TABLE /* Specified on TABLE= subcommand. */
641 /* One of the files on MATCH FILES. */
644 struct mtf_file *next, *prev;
645 /* Next, previous in the list of files. */
646 struct mtf_file *next_min; /* Next in the chain of minimums. */
648 int type; /* One of MTF_*. */
649 struct variable **by; /* List of BY variables for this file. */
650 struct file_handle *handle; /* File handle. */
651 struct sfm_reader *reader; /* System file reader. */
652 struct dictionary *dict; /* Dictionary from system file. */
653 char in[SHORT_NAME_LEN + 1]; /* Name of the variable from IN=. */
655 struct ccase input; /* Input record. */
658 /* MATCH FILES procedure. */
661 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
662 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
664 size_t by_cnt; /* Number of variables on BY subcommand. */
666 /* Names of FIRST, LAST variables. */
667 char first[SHORT_NAME_LEN + 1], last[SHORT_NAME_LEN + 1];
669 struct dictionary *dict; /* Dictionary of output file. */
670 struct case_sink *sink; /* Sink to receive output. */
671 struct ccase mtf_case; /* Case used for output. */
673 unsigned seq_num; /* Have we initialized this variable? */
674 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
677 static void mtf_free (struct mtf_proc *);
678 static void mtf_free_file (struct mtf_file *);
679 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
680 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
682 static void mtf_read_nonactive_records (void *);
683 static void mtf_processing_finish (void *);
684 static int mtf_processing (struct ccase *, void *);
686 static char *var_type_description (struct variable *);
688 static void set_master (struct variable *, struct variable *master);
689 static struct variable *get_master (struct variable *);
691 /* Parse and execute the MATCH FILES command. */
693 cmd_match_files (void)
696 struct mtf_file *first_table = NULL;
698 bool used_active_file = false;
699 bool saw_table = false;
701 mtf.head = mtf.tail = NULL;
705 mtf.dict = dict_create ();
707 case_nullify (&mtf.mtf_case);
710 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
713 while (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid))
715 struct mtf_file *file = xmalloc (sizeof *file);
717 if (lex_match_id ("FILE"))
718 file->type = MTF_FILE;
719 else if (lex_match_id ("TABLE"))
721 file->type = MTF_TABLE;
732 case_nullify (&file->input);
734 /* FILEs go first, then TABLEs. */
735 if (file->type == MTF_TABLE || first_table == NULL)
738 file->prev = mtf.tail;
740 mtf.tail->next = file;
742 if (mtf.head == NULL)
744 if (file->type == MTF_TABLE && first_table == NULL)
749 assert (file->type == MTF_FILE);
750 file->next = first_table;
751 file->prev = first_table->prev;
752 if (first_table->prev)
753 first_table->prev->next = file;
756 first_table->prev = file;
766 if (used_active_file)
768 msg (SE, _("The active file may not be specified more "
772 used_active_file = true;
774 assert (pgm_state != STATE_INPUT);
775 if (pgm_state == STATE_INIT)
777 msg (SE, _("Cannot specify the active file since no active "
778 "file has been defined."));
785 _("MATCH FILES may not be used after TEMPORARY when "
786 "the active file is an input source. "
787 "Temporary transformations will be made permanent."));
791 file->dict = default_dict;
795 file->handle = fh_parse ();
796 if (file->handle == NULL)
799 file->reader = sfm_open_reader (file->handle, &file->dict, NULL);
800 if (file->reader == NULL)
803 case_create (&file->input, dict_get_next_value_idx (file->dict));
806 while (lex_match ('/'))
807 if (lex_match_id ("RENAME"))
809 if (!rename_variables (file->dict))
812 else if (lex_match_id ("IN"))
823 msg (SE, _("Multiple IN subcommands for a single FILE or "
827 strcpy (file->in, tokid);
831 mtf_merge_dictionary (mtf.dict, file);
836 if (lex_match (T_BY))
838 struct variable **by;
839 struct mtf_file *iter;
843 msg (SE, _("BY may appear at most once."));
848 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
849 PV_NO_DUPLICATE | PV_NO_SCRATCH))
852 for (iter = mtf.head; iter != NULL; iter = iter->next)
856 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
858 for (i = 0; i < mtf.by_cnt; i++)
860 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
861 if (iter->by[i] == NULL)
863 msg (SE, _("File %s lacks BY variable %s."),
864 iter->handle ? handle_get_name (iter->handle) : "*",
872 else if (lex_match_id ("FIRST"))
874 if (mtf.first[0] != '\0')
876 msg (SE, _("FIRST may appear at most once."));
881 if (!lex_force_id ())
883 strcpy (mtf.first, tokid);
886 else if (lex_match_id ("LAST"))
888 if (mtf.last[0] != '\0')
890 msg (SE, _("LAST may appear at most once."));
895 if (!lex_force_id ())
897 strcpy (mtf.last, tokid);
900 else if (lex_match_id ("MAP"))
910 if (!lex_match ('/') && token != '.')
912 lex_end_of_command ();
917 if (mtf.by_cnt == 0 && saw_table)
919 msg (SE, _("BY is required when TABLE is specified."));
923 /* MATCH FILES performs an n-way merge on all its input files.
926 1. Read one input record from every input FILE.
928 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
930 3. Find the FILE input record with minimum BY values. Store all
931 the values from this input record into the output record.
933 4. Find all the FILE input records with BY values identical to
934 the minimums. Store all the values from these input records into
937 5. For every TABLE, read another record as long as the BY values
938 on the TABLE's input record are less than the FILEs' BY values.
939 If an exact match is found, store all the values from the TABLE
940 input record into the output record.
942 6. Write the output record.
944 7. Read another record from each input file FILE and TABLE that
945 we stored values from above. If we come to the end of one of the
946 input files, remove it from the list of input files.
948 8. Repeat from step 2.
950 Unfortunately, this algorithm can't be directly implemented
951 because there's no function to read a record from the active
952 file; instead, it has to be done using callbacks.
954 FIXME: For merging large numbers of files (more than 10?) a
955 better algorithm would use a heap for finding minimum
958 if (!used_active_file)
959 discard_variables ();
961 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
962 if (mtf.sink->class->open != NULL)
963 mtf.sink->class->open (mtf.sink);
965 mtf.seq_nums = xmalloc (dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums);
966 memset (mtf.seq_nums, 0,
967 dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums);
968 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
970 mtf_read_nonactive_records (&mtf);
971 if (used_active_file)
972 procedure (mtf_processing, &mtf);
973 mtf_processing_finish (&mtf);
975 dict_destroy (default_dict);
976 default_dict = mtf.dict;
978 vfm_source = mtf.sink->class->make_source (mtf.sink);
979 free_case_sink (mtf.sink);
989 /* Repeats 2...8 an arbitrary number of times. */
991 mtf_processing_finish (void *mtf_)
993 struct mtf_proc *mtf = mtf_;
994 struct mtf_file *iter;
996 /* Find the active file and delete it. */
997 for (iter = mtf->head; iter; iter = iter->next)
998 if (iter->handle == NULL)
1000 mtf_delete_file_in_place (mtf, &iter);
1004 while (mtf->head && mtf->head->type == MTF_FILE)
1005 if (!mtf_processing (NULL, mtf))
1009 /* Return a string in a static buffer describing V's variable type and
1012 var_type_description (struct variable *v)
1014 static char buf[2][32];
1021 if (v->type == NUMERIC)
1022 strcpy (s, "numeric");
1025 assert (v->type == ALPHA);
1026 sprintf (s, "string with width %d", v->width);
1031 /* Free FILE and associated data. */
1033 mtf_free_file (struct mtf_file *file)
1036 sfm_close_reader (file->reader);
1037 if (file->dict != default_dict)
1038 dict_destroy (file->dict);
1039 case_destroy (&file->input);
1043 /* Free all the data for the MATCH FILES procedure. */
1045 mtf_free (struct mtf_proc *mtf)
1047 struct mtf_file *iter, *next;
1049 for (iter = mtf->head; iter; iter = next)
1053 mtf_free_file (iter);
1057 dict_destroy (mtf->dict);
1058 case_destroy (&mtf->mtf_case);
1059 free (mtf->seq_nums);
1062 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1063 file in the chain, or to NULL if was the last in the chain. */
1065 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1067 struct mtf_file *f = *file;
1070 f->prev->next = f->next;
1072 f->next->prev = f->prev;
1074 mtf->head = f->next;
1076 mtf->tail = f->prev;
1082 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1084 struct variable *v = dict_get_var (f->dict, i);
1085 union value *out = case_data_rw (&mtf->mtf_case, get_master (v)->fv);
1087 if (v->type == NUMERIC)
1090 memset (out->s, ' ', v->width);
1097 /* Read a record from every input file except the active file. */
1099 mtf_read_nonactive_records (void *mtf_)
1101 struct mtf_proc *mtf = mtf_;
1102 struct mtf_file *iter;
1104 for (iter = mtf->head; iter; )
1108 if (!sfm_read_case (iter->reader, &iter->input))
1109 mtf_delete_file_in_place (mtf, &iter);
1118 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1119 if A == B, 1 if A > B. */
1121 mtf_compare_BY_values (struct mtf_proc *mtf,
1122 struct mtf_file *a, struct mtf_file *b,
1125 struct ccase *a_input, *b_input;
1128 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1129 a_input = case_is_null (&a->input) ? c : &a->input;
1130 b_input = case_is_null (&b->input) ? c : &b->input;
1131 for (i = 0; i < mtf->by_cnt; i++)
1133 assert (a->by[i]->type == b->by[i]->type);
1134 assert (a->by[i]->width == b->by[i]->width);
1136 if (a->by[i]->type == NUMERIC)
1138 double af = case_num (a_input, a->by[i]->fv);
1139 double bf = case_num (b_input, b->by[i]->fv);
1150 assert (a->by[i]->type == ALPHA);
1151 result = memcmp (case_str (a_input, a->by[i]->fv),
1152 case_str (b_input, b->by[i]->fv),
1156 else if (result > 0)
1163 /* Perform one iteration of steps 3...7 above. */
1165 mtf_processing (struct ccase *c, void *mtf_)
1167 struct mtf_proc *mtf = mtf_;
1168 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1169 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BY values. */
1170 struct mtf_file *iter; /* Iterator. */
1174 /* If the active file doesn't have the minimum BY values, don't
1175 return because that would cause a record to be skipped. */
1176 bool advance = true;
1178 if (mtf->head->type == MTF_TABLE)
1181 /* 3. Find the FILE input record with minimum BY values. Store
1182 all the values from this input record into the output record.
1184 4. Find all the FILE input records with BY values identical
1185 to the minimums. Store all the values from these input
1186 records into the output record. */
1187 min_head = min_tail = mtf->head;
1188 max_head = max_tail = NULL;
1189 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1191 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1195 max_tail = max_tail->next_min = iter;
1197 max_head = max_tail = iter;
1201 min_tail = min_tail->next_min = iter;
1207 max_tail->next_min = min_head;
1208 max_tail = min_tail;
1212 max_head = min_head;
1213 max_tail = min_tail;
1215 min_head = min_tail = iter;
1222 /* 5. For every TABLE, read another record as long as the BY
1223 values on the TABLE's input record are less than the FILEs'
1224 BY values. If an exact match is found, store all the values
1225 from the TABLE input record into the output record. */
1228 struct mtf_file *next = iter->next;
1230 assert (iter->type == MTF_TABLE);
1232 if (iter->handle == NULL)
1236 switch (mtf_compare_BY_values (mtf, min_head, iter, c))
1240 max_tail = max_tail->next_min = iter;
1242 max_head = max_tail = iter;
1246 min_tail = min_tail->next_min = iter;
1250 if (iter->handle == NULL)
1252 if (sfm_read_case (iter->reader, &iter->input))
1254 mtf_delete_file_in_place (mtf, &iter);
1264 /* Next sequence number. */
1267 /* Store data to all the records we are using. */
1269 min_tail->next_min = NULL;
1270 for (iter = min_head; iter; iter = iter->next_min)
1274 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1276 struct variable *v = dict_get_var (iter->dict, i);
1277 struct variable *mv = get_master (v);
1279 if (mtf->seq_nums[mv->index] != mtf->seq_num)
1281 struct ccase *record
1282 = case_is_null (&iter->input) ? c : &iter->input;
1283 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1285 mtf->seq_nums[mv->index] = mtf->seq_num;
1286 if (v->type == NUMERIC)
1287 out->f = case_num (record, v->fv);
1289 memcpy (out->s, case_str (record, v->fv), v->width);
1294 /* Store missing values to all the records we're not using. */
1296 max_tail->next_min = NULL;
1297 for (iter = max_head; iter; iter = iter->next_min)
1301 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1303 struct variable *v = dict_get_var (iter->dict, i);
1304 struct variable *mv = get_master (v);
1306 if (mtf->seq_nums[mv->index] != mtf->seq_num)
1308 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1309 mtf->seq_nums[mv->index] = mtf->seq_num;
1311 if (v->type == NUMERIC)
1314 memset (out->s, ' ', v->width);
1318 if (iter->handle == NULL)
1322 /* 6. Write the output record. */
1323 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1325 /* 7. Read another record from each input file FILE and TABLE
1326 that we stored values from above. If we come to the end of
1327 one of the input files, remove it from the list of input
1329 for (iter = min_head; iter && iter->type == MTF_FILE; )
1331 struct mtf_file *next = iter->next_min;
1333 if (iter->reader != NULL)
1335 if (!sfm_read_case (iter->reader, &iter->input))
1336 mtf_delete_file_in_place (mtf, &iter);
1346 return (mtf->head && mtf->head->type != MTF_TABLE);
1349 /* Merge the dictionary for file F into master dictionary M. */
1351 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1353 struct dictionary *d = f->dict;
1354 const char *d_docs, *m_docs;
1357 if (dict_get_label (m) == NULL)
1358 dict_set_label (m, dict_get_label (d));
1360 d_docs = dict_get_documents (d);
1361 m_docs = dict_get_documents (m);
1365 dict_set_documents (m, d_docs);
1371 new_len = strlen (m_docs) + strlen (d_docs);
1372 new_docs = xmalloc (new_len + 1);
1373 strcpy (new_docs, m_docs);
1374 strcat (new_docs, d_docs);
1375 dict_set_documents (m, new_docs);
1380 dict_compact_values (d);
1382 for (i = 0; i < dict_get_var_cnt (d); i++)
1384 struct variable *dv = dict_get_var (d, i);
1385 struct variable *mv = dict_lookup_var (m, dv->name);
1389 if (mv->width != dv->width)
1391 msg (SE, _("Variable %s in file %s (%s) has different "
1392 "type or width from the same variable in "
1393 "earlier file (%s)."),
1394 dv->name, handle_get_name (f->handle),
1395 var_type_description (dv), var_type_description (mv));
1399 if (dv->width == mv->width)
1401 if (val_labs_count (dv->val_labs)
1402 && !val_labs_count (mv->val_labs))
1403 mv->val_labs = val_labs_copy (dv->val_labs);
1404 if (dv->miss_type != MISSING_NONE
1405 && mv->miss_type == MISSING_NONE)
1406 copy_missing_values (mv, dv);
1409 if (dv->label && !mv->label)
1410 mv->label = xstrdup (dv->label);
1414 mv = dict_clone_var (m, dv, dv->name, dv->longname);
1415 assert (mv != NULL);
1418 set_master (dv, mv);
1424 /* Marks V's master variable as MASTER. */
1426 set_master (struct variable *v, struct variable *master)
1428 var_attach_aux (v, master, NULL);
1431 /* Returns the master variable corresponding to V,
1432 as set with set_master(). */
1433 static struct variable *
1434 get_master (struct variable *v)
1436 assert (v->aux != NULL);
1440 /* IMPORT command. */
1442 /* IMPORT input program. */
1445 struct pfm_reader *reader; /* Portable file reader. */
1446 struct case_map *map; /* Map from system file to active file dict. */
1447 struct ccase bounce; /* Bounce buffer. */
1450 static void import_pgm_free (struct import_pgm *);
1452 /* Parses the IMPORT command. */
1456 struct import_pgm *pgm = NULL;
1457 struct file_handle *fh = NULL;
1458 struct dictionary *dict = NULL;
1461 pgm = xmalloc (sizeof *pgm);
1464 case_nullify (&pgm->bounce);
1470 if (lex_match_id ("FILE") || token == T_STRING)
1478 else if (lex_match_id ("TYPE"))
1482 if (lex_match_id ("COMM"))
1484 else if (lex_match_id ("TAPE"))
1488 lex_error (_("expecting COMM or TAPE"));
1494 if (!lex_match ('/') && token != '.')
1500 discard_variables ();
1502 pgm->reader = pfm_open_reader (fh, &dict, NULL);
1503 if (pgm->reader == NULL)
1505 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
1507 start_case_map (dict);
1508 if (!trim_dictionary (dict, OP_READ, NULL))
1510 pgm->map = finish_case_map (dict);
1512 dict_destroy (default_dict);
1513 default_dict = dict;
1515 vfm_source = create_case_source (&import_source_class, pgm);
1520 import_pgm_free (pgm);
1522 dict_destroy (dict);
1526 /* Frees a struct import_pgm. */
1528 import_pgm_free (struct import_pgm *pgm)
1532 pfm_close_reader (pgm->reader);
1533 destroy_case_map (pgm->map);
1534 case_destroy (&pgm->bounce);
1539 /* Clears internal state related to IMPORT input procedure. */
1541 import_source_destroy (struct case_source *source)
1543 struct import_pgm *pgm = source->aux;
1544 import_pgm_free (pgm);
1547 /* Reads all the cases from the data file into C and passes them
1548 to WRITE_CASE one by one, passing WC_DATA. */
1550 import_source_read (struct case_source *source,
1552 write_case_func *write_case, write_case_data wc_data)
1554 struct import_pgm *pgm = source->aux;
1559 if (pgm->map == NULL)
1560 ok = pfm_read_case (pgm->reader, c);
1563 ok = pfm_read_case (pgm->reader, &pgm->bounce);
1565 map_case (pgm->map, &pgm->bounce, c);
1569 ok = write_case (wc_data);
1574 const struct case_source_class import_source_class =
1579 import_source_destroy,
1585 A case map copies data from a case that corresponds for one
1586 dictionary to a case that corresponds to a second dictionary
1587 derived from the first by, optionally, deleting, reordering,
1588 or renaming variables. (No new variables may be created.)
1594 size_t value_cnt; /* Number of values in map. */
1595 int *map; /* For each destination index, the
1596 corresponding source index. */
1599 /* Prepares dictionary D for producing a case map. Afterward,
1600 the caller may delete, reorder, or rename variables within D
1601 at will before using finish_case_map() to produce the case
1604 Uses D's aux members, which may not otherwise be in use. */
1606 start_case_map (struct dictionary *d)
1608 size_t var_cnt = dict_get_var_cnt (d);
1611 for (i = 0; i < var_cnt; i++)
1613 struct variable *v = dict_get_var (d, i);
1614 int *src_fv = xmalloc (sizeof *src_fv);
1616 var_attach_aux (v, src_fv, var_dtor_free);
1620 /* Produces a case map from dictionary D, which must have been
1621 previously prepared with start_case_map().
1623 Does not retain any reference to D, and clears the aux members
1624 set up by start_case_map().
1626 Returns the new case map, or a null pointer if no mapping is
1627 required (that is, no data has changed position). */
1628 static struct case_map *
1629 finish_case_map (struct dictionary *d)
1631 struct case_map *map;
1632 size_t var_cnt = dict_get_var_cnt (d);
1636 map = xmalloc (sizeof *map);
1637 map->value_cnt = dict_get_next_value_idx (d);
1638 map->map = xmalloc (sizeof *map->map * map->value_cnt);
1639 for (i = 0; i < map->value_cnt; i++)
1643 for (i = 0; i < var_cnt; i++)
1645 struct variable *v = dict_get_var (d, i);
1646 int *src_fv = (int *) var_detach_aux (v);
1649 if (v->fv != *src_fv)
1652 for (idx = 0; idx < v->nv; idx++)
1654 int src_idx = *src_fv + idx;
1655 int dst_idx = v->fv + idx;
1657 assert (map->map[dst_idx] == -1);
1658 map->map[dst_idx] = src_idx;
1665 destroy_case_map (map);
1669 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1675 /* Maps from SRC to DST, applying case map MAP. */
1677 map_case (const struct case_map *map,
1678 const struct ccase *src, struct ccase *dst)
1682 assert (map != NULL);
1683 assert (src != NULL);
1684 assert (dst != NULL);
1685 assert (src != dst);
1687 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1689 int src_idx = map->map[dst_idx];
1691 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1695 /* Destroys case map MAP. */
1697 destroy_case_map (struct case_map *map)