1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 #include "dictionary.h"
28 #include "file-handle.h"
33 #include "pfm-write.h"
36 #include "sfm-write.h"
38 #include "value-labels.h"
44 #define _(msgid) gettext (msgid)
46 #include "debug-print.h"
48 /* Rearranging and reducing a dictionary. */
49 static void start_case_map (struct dictionary *);
50 static struct case_map *finish_case_map (struct dictionary *);
51 static void map_case (const struct case_map *,
52 const struct ccase *, struct ccase *);
53 static void destroy_case_map (struct case_map *);
58 OP_READ, /* GET or IMPORT. */
59 OP_SAVE, /* SAVE or XSAVE. */
60 OP_EXPORT /* EXPORT. */
63 static bool parse_dict_trim (struct dictionary *);
65 /* GET input program. */
68 struct sfm_reader *reader; /* System file reader. */
69 struct case_map *map; /* Map from system file to active file dict. */
70 struct ccase bounce; /* Bounce buffer. */
73 static void get_pgm_free (struct get_pgm *);
75 /* Parses the GET command. */
79 struct get_pgm *pgm = NULL;
80 struct file_handle *fh;
81 struct dictionary *dict = NULL;
83 pgm = xmalloc (sizeof *pgm);
86 case_nullify (&pgm->bounce);
91 if (lex_match_id ("FILE"))
97 pgm->reader = sfm_open_reader (fh, &dict, NULL);
98 if (pgm->reader == NULL)
100 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
102 start_case_map (dict);
103 while (lex_match ('/'))
104 if (!parse_dict_trim (dict))
107 if (!lex_end_of_command ())
110 dict_compact_values (dict);
111 pgm->map = finish_case_map (dict);
113 dict_destroy (default_dict);
116 vfm_source = create_case_source (&get_source_class, pgm);
127 /* Frees a struct get_pgm. */
129 get_pgm_free (struct get_pgm *pgm)
133 sfm_close_reader (pgm->reader);
134 destroy_case_map (pgm->map);
135 case_destroy (&pgm->bounce);
140 /* Clears internal state related to GET input procedure. */
142 get_source_destroy (struct case_source *source)
144 struct get_pgm *pgm = source->aux;
148 /* Reads all the cases from the data file into C and passes them
149 to WRITE_CASE one by one, passing WC_DATA. */
151 get_source_read (struct case_source *source,
153 write_case_func *write_case, write_case_data wc_data)
155 struct get_pgm *pgm = source->aux;
160 if (pgm->map == NULL)
161 ok = sfm_read_case (pgm->reader, c);
164 ok = sfm_read_case (pgm->reader, &pgm->bounce);
166 map_case (pgm->map, &pgm->bounce, c);
170 ok = write_case (wc_data);
175 const struct case_source_class get_source_class =
183 /* Type of output file. */
186 SYSFILE_WRITER, /* System file. */
187 PORFILE_WRITER /* Portable file. */
190 /* Type of a command. */
193 XFORM_CMD, /* Transformation. */
194 PROC_CMD /* Procedure. */
197 /* Portable or system file writer plus a case map. */
200 enum writer_type writer_type;
202 struct case_map *map; /* Map to output file dictionary
203 (null pointer for identity mapping). */
204 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
209 any_writer_destroy (struct any_writer *aw)
213 switch (aw->writer_type)
216 pfm_close_writer (aw->writer);
219 sfm_close_writer (aw->writer);
222 destroy_case_map (aw->map);
223 case_destroy (&aw->bounce);
228 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
229 WRITER_TYPE identifies the type of file to write,
230 and COMMAND_TYPE identifies the type of command.
232 On success, returns a writer.
233 For procedures only, sets *RETAIN_UNSELECTED to true if cases
234 that would otherwise be excluded by FILTER or USE should be
237 On failure, returns a null pointer. */
238 static struct any_writer *
239 parse_write_command (enum writer_type writer_type,
240 enum command_type command_type,
241 bool *retain_unselected)
244 struct file_handle *handle; /* Output file. */
245 struct dictionary *dict; /* Dictionary for output file. */
246 struct any_writer *aw; /* Writer. */
248 /* Common options. */
249 bool print_map; /* Print map? TODO. */
250 bool print_short_names; /* Print long-to-short name map. TODO. */
251 struct sfm_write_options sysfile_opts;
252 struct pfm_write_options porfile_opts;
254 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
255 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
256 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
258 if (command_type == PROC_CMD)
259 *retain_unselected = true;
262 dict = dict_clone (default_dict);
263 aw = xmalloc (sizeof *aw);
264 aw->writer_type = writer_type;
267 case_nullify (&aw->bounce);
269 print_short_names = false;
270 sysfile_opts = sfm_writer_default_options ();
271 porfile_opts = pfm_writer_default_options ();
273 start_case_map (dict);
274 dict_delete_scratch_vars (dict);
279 if (lex_match_id ("OUTFILE"))
283 lex_sbc_only_once ("OUTFILE");
289 handle = fh_parse ();
293 else if (lex_match_id ("NAMES"))
294 print_short_names = true;
295 else if (lex_match_id ("PERMISSIONS"))
300 if (lex_match_id ("READONLY"))
302 else if (lex_match_id ("WRITEABLE"))
306 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
309 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
311 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
314 if (lex_match_id ("RETAIN"))
315 *retain_unselected = true;
316 else if (lex_match_id ("DELETE"))
317 *retain_unselected = false;
320 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
324 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
325 sysfile_opts.compress = true;
326 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
327 sysfile_opts.compress = false;
328 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
331 if (!lex_force_int ())
333 sysfile_opts.version = lex_integer ();
336 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
339 if (lex_match_id ("COMMUNICATIONS"))
340 porfile_opts.type = PFM_COMM;
341 else if (lex_match_id ("TAPE"))
342 porfile_opts.type = PFM_TAPE;
345 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
349 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
352 if (!lex_force_int ())
354 porfile_opts.digits = lex_integer ();
357 else if (!parse_dict_trim (dict))
360 if (!lex_match ('/'))
363 if (lex_end_of_command () != CMD_SUCCESS)
368 lex_sbc_missing ("OUTFILE");
372 dict_compact_values (dict);
373 aw->map = finish_case_map (dict);
375 case_create (&aw->bounce, dict_get_next_value_idx (dict));
380 aw->writer = sfm_open_writer (handle, dict, sysfile_opts);
383 aw->writer = pfm_open_writer (handle, dict, porfile_opts);
390 any_writer_destroy (aw);
395 /* Writes case C to writer AW. */
397 any_writer_write_case (struct any_writer *aw, struct ccase *c)
401 map_case (aw->map, c, &aw->bounce);
405 switch (aw->writer_type)
408 sfm_write_case (aw->writer, c);
411 pfm_write_case (aw->writer, c);
416 /* SAVE and EXPORT. */
418 static int output_proc (struct ccase *, void *);
420 /* Parses and performs the SAVE or EXPORT procedure. */
422 parse_output_proc (enum writer_type writer_type)
424 bool retain_unselected;
425 struct variable *saved_filter_variable;
426 struct any_writer *aw;
428 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
432 saved_filter_variable = dict_get_filter (default_dict);
433 if (retain_unselected)
434 dict_set_filter (default_dict, NULL);
435 procedure (output_proc, aw);
436 dict_set_filter (default_dict, saved_filter_variable);
438 any_writer_destroy (aw);
442 /* Writes case C to file. */
444 output_proc (struct ccase *c, void *aw_)
446 struct any_writer *aw = aw_;
447 any_writer_write_case (aw, c);
454 return parse_output_proc (SYSFILE_WRITER);
460 return parse_output_proc (PORFILE_WRITER);
463 /* XSAVE and XEXPORT. */
465 /* Transformation. */
468 struct trns_header h; /* Header. */
469 struct any_writer *aw; /* Writer. */
472 static trns_proc_func output_trns_proc;
473 static trns_free_func output_trns_free;
475 /* Parses the XSAVE or XEXPORT transformation command. */
477 parse_output_trns (enum writer_type writer_type)
479 struct output_trns *t = xmalloc (sizeof *t);
480 t->h.proc = output_trns_proc;
481 t->h.free = output_trns_free;
482 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
489 add_transformation (&t->h);
493 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
495 output_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
497 struct output_trns *t = (struct output_trns *) h;
498 any_writer_write_case (t->aw, c);
502 /* Frees an XSAVE or XEXPORT transformation. */
504 output_trns_free (struct trns_header *h)
506 struct output_trns *t = (struct output_trns *) h;
510 any_writer_destroy (t->aw);
519 return parse_output_trns (SYSFILE_WRITER);
522 /* XEXPORT command. */
526 return parse_output_trns (PORFILE_WRITER);
529 static bool rename_variables (struct dictionary *dict);
530 static bool drop_variables (struct dictionary *dict);
531 static bool keep_variables (struct dictionary *dict);
533 /* Commands that read and write system files share a great deal
534 of common syntactic structure for rearranging and dropping
535 variables. This function parses this syntax and modifies DICT
536 appropriately. Returns true on success, false on failure. */
538 parse_dict_trim (struct dictionary *dict)
540 if (lex_match_id ("MAP"))
545 else if (lex_match_id ("DROP"))
546 return drop_variables (dict);
547 else if (lex_match_id ("KEEP"))
548 return keep_variables (dict);
549 else if (lex_match_id ("RENAME"))
550 return rename_variables (dict);
553 lex_error (_("expecting a valid subcommand"));
558 /* Parses and performs the RENAME subcommand of GET and SAVE. */
560 rename_variables (struct dictionary *dict)
578 v = parse_dict_variable (dict);
581 if (!lex_force_match ('=')
584 if (dict_lookup_var (dict, tokid) != NULL)
586 msg (SE, _("Cannot rename %s as %s because there already exists "
587 "a variable named %s. To rename variables with "
588 "overlapping names, use a single RENAME subcommand "
589 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
590 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
594 dict_rename_var (dict, v, tokid);
603 while (lex_match ('('))
607 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
609 if (!lex_match ('='))
611 msg (SE, _("`=' expected after variable list."));
614 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
618 msg (SE, _("Number of variables on left side of `=' (%d) does not "
619 "match number of variables on right side (%d), in "
620 "parenthesized group %d of RENAME subcommand."),
621 nv - old_nv, nn - old_nv, group);
624 if (!lex_force_match (')'))
629 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
631 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
637 for (i = 0; i < nn; i++)
645 /* Parses and performs the DROP subcommand of GET and SAVE.
646 Returns true if successful, false on failure.*/
648 drop_variables (struct dictionary *dict)
654 if (!parse_variables (dict, &v, &nv, PV_NONE))
656 dict_delete_vars (dict, v, nv);
659 if (dict_get_var_cnt (dict) == 0)
661 msg (SE, _("Cannot DROP all variables from dictionary."));
667 /* Parses and performs the KEEP subcommand of GET and SAVE.
668 Returns true if successful, false on failure.*/
670 keep_variables (struct dictionary *dict)
677 if (!parse_variables (dict, &v, &nv, PV_NONE))
680 /* Move the specified variables to the beginning. */
681 dict_reorder_vars (dict, v, nv);
683 /* Delete the remaining variables. */
684 v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
685 for (i = nv; i < dict_get_var_cnt (dict); i++)
686 v[i - nv] = dict_get_var (dict, i);
687 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
695 #include "debug-print.h"
700 MTF_FILE, /* Specified on FILE= subcommand. */
701 MTF_TABLE /* Specified on TABLE= subcommand. */
704 /* One of the files on MATCH FILES. */
707 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
708 struct mtf_file *next_min; /* Next in the chain of minimums. */
710 int type; /* One of MTF_*. */
711 struct variable **by; /* List of BY variables for this file. */
712 struct file_handle *handle; /* File handle. */
713 struct sfm_reader *reader; /* System file reader. */
714 struct dictionary *dict; /* Dictionary from system file. */
717 char *in_name; /* Variable name. */
718 struct variable *in_var; /* Variable (in master dictionary). */
720 struct ccase input; /* Input record. */
723 /* MATCH FILES procedure. */
726 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
727 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
729 int by_cnt; /* Number of variables on BY subcommand. */
731 /* Names of FIRST, LAST variables. */
732 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
734 struct dictionary *dict; /* Dictionary of output file. */
735 struct case_sink *sink; /* Sink to receive output. */
736 struct ccase mtf_case; /* Case used for output. */
738 unsigned seq_num; /* Have we initialized this variable? */
739 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
742 static void mtf_free (struct mtf_proc *);
743 static void mtf_free_file (struct mtf_file *);
744 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
745 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
747 static void mtf_read_nonactive_records (void *);
748 static void mtf_processing_finish (void *);
749 static int mtf_processing (struct ccase *, void *);
751 static char *var_type_description (struct variable *);
753 static void set_master (struct variable *, struct variable *master);
754 static struct variable *get_master (struct variable *);
756 /* Parse and execute the MATCH FILES command. */
758 cmd_match_files (void)
761 struct mtf_file *first_table = NULL;
762 struct mtf_file *iter;
764 bool used_active_file = false;
765 bool saw_table = false;
768 mtf.head = mtf.tail = NULL;
772 mtf.dict = dict_create ();
774 case_nullify (&mtf.mtf_case);
777 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
781 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
783 struct mtf_file *file = xmalloc (sizeof *file);
785 if (lex_match_id ("FILE"))
786 file->type = MTF_FILE;
787 else if (lex_match_id ("TABLE"))
789 file->type = MTF_TABLE;
800 file->in_name = NULL;
802 case_nullify (&file->input);
804 /* FILEs go first, then TABLEs. */
805 if (file->type == MTF_TABLE || first_table == NULL)
808 file->prev = mtf.tail;
810 mtf.tail->next = file;
812 if (mtf.head == NULL)
814 if (file->type == MTF_TABLE && first_table == NULL)
819 assert (file->type == MTF_FILE);
820 file->next = first_table;
821 file->prev = first_table->prev;
822 if (first_table->prev)
823 first_table->prev->next = file;
826 first_table->prev = file;
834 if (used_active_file)
836 msg (SE, _("The active file may not be specified more "
840 used_active_file = true;
842 assert (pgm_state != STATE_INPUT);
843 if (pgm_state == STATE_INIT)
845 msg (SE, _("Cannot specify the active file since no active "
846 "file has been defined."));
853 _("MATCH FILES may not be used after TEMPORARY when "
854 "the active file is an input source. "
855 "Temporary transformations will be made permanent."));
859 file->dict = default_dict;
863 file->handle = fh_parse ();
864 if (file->handle == NULL)
867 file->reader = sfm_open_reader (file->handle, &file->dict, NULL);
868 if (file->reader == NULL)
871 case_create (&file->input, dict_get_next_value_idx (file->dict));
874 while (lex_match ('/'))
875 if (lex_match_id ("RENAME"))
877 if (!rename_variables (file->dict))
880 else if (lex_match_id ("IN"))
889 if (file->in_name != NULL)
891 msg (SE, _("Multiple IN subcommands for a single FILE or "
895 file->in_name = xstrdup (tokid);
900 mtf_merge_dictionary (mtf.dict, file);
905 if (lex_match (T_BY))
907 struct variable **by;
911 msg (SE, _("BY may appear at most once."));
916 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
917 PV_NO_DUPLICATE | PV_NO_SCRATCH))
920 for (iter = mtf.head; iter != NULL; iter = iter->next)
924 iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
926 for (i = 0; i < mtf.by_cnt; i++)
928 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
929 if (iter->by[i] == NULL)
931 msg (SE, _("File %s lacks BY variable %s."),
932 iter->handle ? handle_get_name (iter->handle) : "*",
941 else if (lex_match_id ("FIRST"))
943 if (mtf.first[0] != '\0')
945 msg (SE, _("FIRST may appear at most once."));
950 if (!lex_force_id ())
952 strcpy (mtf.first, tokid);
955 else if (lex_match_id ("LAST"))
957 if (mtf.last[0] != '\0')
959 msg (SE, _("LAST may appear at most once."));
964 if (!lex_force_id ())
966 strcpy (mtf.last, tokid);
969 else if (lex_match_id ("MAP"))
973 else if (lex_match_id ("DROP"))
975 if (!drop_variables (mtf.dict))
978 else if (lex_match_id ("KEEP"))
980 if (!keep_variables (mtf.dict))
989 if (!lex_match ('/') && token != '.')
991 lex_end_of_command ();
1000 msg (SE, _("BY is required when TABLE is specified."));
1005 msg (SE, _("BY is required when IN is specified."));
1010 /* Set up mapping from each file's variables to master
1012 for (iter = mtf.head; iter != NULL; iter = iter->next)
1014 struct dictionary *d = iter->dict;
1017 for (i = 0; i < dict_get_var_cnt (d); i++)
1019 struct variable *v = dict_get_var (d, i);
1020 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1026 /* Add IN variables to master dictionary. */
1027 for (iter = mtf.head; iter != NULL; iter = iter->next)
1028 if (iter->in_name != NULL)
1030 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1031 if (iter->in_var == NULL)
1033 msg (SE, _("IN variable name %s duplicates an "
1034 "existing variable name."),
1035 iter->in_var->name);
1038 iter->in_var->print = iter->in_var->write
1039 = make_output_format (FMT_F, 1, 0);
1042 /* MATCH FILES performs an n-way merge on all its input files.
1045 1. Read one input record from every input FILE.
1047 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1049 3. Find the FILE input record(s) that have minimum BY
1050 values. Store all the values from these input records into
1053 4. For every TABLE, read another record as long as the BY values
1054 on the TABLE's input record are less than the FILEs' BY values.
1055 If an exact match is found, store all the values from the TABLE
1056 input record into the output record.
1058 5. Write the output record.
1060 6. Read another record from each input file FILE and TABLE that
1061 we stored values from above. If we come to the end of one of the
1062 input files, remove it from the list of input files.
1064 7. Repeat from step 2.
1066 Unfortunately, this algorithm can't be implemented in a
1067 straightforward way because there's no function to read a
1068 record from the active file. Instead, it has to be written
1071 FIXME: For merging large numbers of files (more than 10?) a
1072 better algorithm would use a heap for finding minimum
1075 if (!used_active_file)
1076 discard_variables ();
1078 dict_compact_values (mtf.dict);
1079 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1080 if (mtf.sink->class->open != NULL)
1081 mtf.sink->class->open (mtf.sink);
1083 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1084 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1086 mtf_read_nonactive_records (&mtf);
1087 if (used_active_file)
1088 procedure (mtf_processing, &mtf);
1089 mtf_processing_finish (&mtf);
1091 free_case_source (vfm_source);
1094 dict_destroy (default_dict);
1095 default_dict = mtf.dict;
1097 vfm_source = mtf.sink->class->make_source (mtf.sink);
1098 free_case_sink (mtf.sink);
1108 /* Repeats 2...7 an arbitrary number of times. */
1110 mtf_processing_finish (void *mtf_)
1112 struct mtf_proc *mtf = mtf_;
1113 struct mtf_file *iter;
1115 /* Find the active file and delete it. */
1116 for (iter = mtf->head; iter; iter = iter->next)
1117 if (iter->handle == NULL)
1119 mtf_delete_file_in_place (mtf, &iter);
1123 while (mtf->head && mtf->head->type == MTF_FILE)
1124 if (!mtf_processing (NULL, mtf))
1128 /* Return a string in a static buffer describing V's variable type and
1131 var_type_description (struct variable *v)
1133 static char buf[2][32];
1140 if (v->type == NUMERIC)
1141 strcpy (s, "numeric");
1144 assert (v->type == ALPHA);
1145 sprintf (s, "string with width %d", v->width);
1150 /* Free FILE and associated data. */
1152 mtf_free_file (struct mtf_file *file)
1155 sfm_close_reader (file->reader);
1156 if (file->dict != default_dict)
1157 dict_destroy (file->dict);
1158 case_destroy (&file->input);
1159 free (file->in_name);
1163 /* Free all the data for the MATCH FILES procedure. */
1165 mtf_free (struct mtf_proc *mtf)
1167 struct mtf_file *iter, *next;
1169 for (iter = mtf->head; iter; iter = next)
1172 mtf_free_file (iter);
1176 dict_destroy (mtf->dict);
1177 case_destroy (&mtf->mtf_case);
1178 free (mtf->seq_nums);
1181 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1182 file in the chain, or to NULL if was the last in the chain. */
1184 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1186 struct mtf_file *f = *file;
1190 f->prev->next = f->next;
1192 f->next->prev = f->prev;
1194 mtf->head = f->next;
1196 mtf->tail = f->prev;
1199 if (f->in_var != NULL)
1200 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1201 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1203 struct variable *v = dict_get_var (f->dict, i);
1204 struct variable *mv = get_master (v);
1207 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1209 if (v->type == NUMERIC)
1212 memset (out->s, ' ', v->width);
1219 /* Read a record from every input file except the active file. */
1221 mtf_read_nonactive_records (void *mtf_)
1223 struct mtf_proc *mtf = mtf_;
1224 struct mtf_file *iter, *next;
1226 for (iter = mtf->head; iter != NULL; iter = next)
1229 if (iter->handle && !sfm_read_case (iter->reader, &iter->input))
1230 mtf_delete_file_in_place (mtf, &iter);
1234 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1235 if A == B, 1 if A > B. */
1237 mtf_compare_BY_values (struct mtf_proc *mtf,
1238 struct mtf_file *a, struct mtf_file *b,
1241 struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1242 struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1243 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1244 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1247 /* Perform one iteration of steps 3...7 above. */
1249 mtf_processing (struct ccase *c, void *mtf_)
1251 struct mtf_proc *mtf = mtf_;
1253 /* Do we need another record from the active file? */
1254 bool read_active_file;
1256 assert (mtf->head != NULL);
1257 if (mtf->head->type == MTF_TABLE)
1262 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1263 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1264 struct mtf_file *iter, *next;
1266 read_active_file = false;
1268 /* 3. Find the FILE input record(s) that have minimum BY
1269 values. Store all the values from these input records into
1270 the output record. */
1271 min_head = min_tail = mtf->head;
1272 max_head = max_tail = NULL;
1273 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1276 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1280 max_tail = max_tail->next_min = iter;
1282 max_head = max_tail = iter;
1285 min_tail = min_tail->next_min = iter;
1290 max_tail->next_min = min_head;
1291 max_tail = min_tail;
1295 max_head = min_head;
1296 max_tail = min_tail;
1298 min_head = min_tail = iter;
1302 /* 4. For every TABLE, read another record as long as the BY
1303 values on the TABLE's input record are less than the FILEs'
1304 BY values. If an exact match is found, store all the values
1305 from the TABLE input record into the output record. */
1306 for (; iter != NULL; iter = next)
1308 assert (iter->type == MTF_TABLE);
1313 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1317 max_tail = max_tail->next_min = iter;
1319 max_head = max_tail = iter;
1322 min_tail = min_tail->next_min = iter;
1325 if (iter->handle == NULL)
1327 if (sfm_read_case (iter->reader, &iter->input))
1329 mtf_delete_file_in_place (mtf, &iter);
1335 /* Next sequence number. */
1338 /* Store data to all the records we are using. */
1340 min_tail->next_min = NULL;
1341 for (iter = min_head; iter; iter = iter->next_min)
1345 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1347 struct variable *v = dict_get_var (iter->dict, i);
1348 struct variable *mv = get_master (v);
1350 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1352 struct ccase *record
1353 = case_is_null (&iter->input) ? c : &iter->input;
1354 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1356 mtf->seq_nums[mv->index] = mtf->seq_num;
1357 if (v->type == NUMERIC)
1358 out->f = case_num (record, v->fv);
1360 memcpy (out->s, case_str (record, v->fv), v->width);
1363 if (iter->in_var != NULL)
1364 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1366 if (iter->type == MTF_FILE && iter->handle == NULL)
1367 read_active_file = true;
1370 /* Store missing values to all the records we're not
1373 max_tail->next_min = NULL;
1374 for (iter = max_head; iter; iter = iter->next_min)
1378 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1380 struct variable *v = dict_get_var (iter->dict, i);
1381 struct variable *mv = get_master (v);
1383 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1385 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1386 mtf->seq_nums[mv->index] = mtf->seq_num;
1388 if (v->type == NUMERIC)
1391 memset (out->s, ' ', v->width);
1394 if (iter->in_var != NULL)
1395 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1398 /* 5. Write the output record. */
1399 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1401 /* 6. Read another record from each input file FILE and TABLE
1402 that we stored values from above. If we come to the end of
1403 one of the input files, remove it from the list of input
1405 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1407 next = iter->next_min;
1408 if (iter->reader != NULL
1409 && !sfm_read_case (iter->reader, &iter->input))
1410 mtf_delete_file_in_place (mtf, &iter);
1413 while (!read_active_file
1414 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1416 return mtf->head != NULL && mtf->head->type == MTF_FILE;
1419 /* Merge the dictionary for file F into master dictionary M. */
1421 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1423 struct dictionary *d = f->dict;
1424 const char *d_docs, *m_docs;
1427 if (dict_get_label (m) == NULL)
1428 dict_set_label (m, dict_get_label (d));
1430 d_docs = dict_get_documents (d);
1431 m_docs = dict_get_documents (m);
1435 dict_set_documents (m, d_docs);
1441 new_len = strlen (m_docs) + strlen (d_docs);
1442 new_docs = xmalloc (new_len + 1);
1443 strcpy (new_docs, m_docs);
1444 strcat (new_docs, d_docs);
1445 dict_set_documents (m, new_docs);
1450 for (i = 0; i < dict_get_var_cnt (d); i++)
1452 struct variable *dv = dict_get_var (d, i);
1453 struct variable *mv = dict_lookup_var (m, dv->name);
1455 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1460 if (mv->width != dv->width)
1462 msg (SE, _("Variable %s in file %s (%s) has different "
1463 "type or width from the same variable in "
1464 "earlier file (%s)."),
1465 dv->name, handle_get_name (f->handle),
1466 var_type_description (dv), var_type_description (mv));
1470 if (dv->width == mv->width)
1472 if (val_labs_count (dv->val_labs)
1473 && !val_labs_count (mv->val_labs))
1474 mv->val_labs = val_labs_copy (dv->val_labs);
1475 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1476 mv_copy (&mv->miss, &dv->miss);
1479 if (dv->label && !mv->label)
1480 mv->label = xstrdup (dv->label);
1483 mv = dict_clone_var_assert (m, dv, dv->name);
1489 /* Marks V's master variable as MASTER. */
1491 set_master (struct variable *v, struct variable *master)
1493 var_attach_aux (v, master, NULL);
1496 /* Returns the master variable corresponding to V,
1497 as set with set_master(). */
1498 static struct variable *
1499 get_master (struct variable *v)
1504 /* IMPORT command. */
1506 /* IMPORT input program. */
1509 struct pfm_reader *reader; /* Portable file reader. */
1510 struct case_map *map; /* Map from system file to active file dict. */
1511 struct ccase bounce; /* Bounce buffer. */
1514 static void import_pgm_free (struct import_pgm *);
1516 /* Parses the IMPORT command. */
1520 struct import_pgm *pgm = NULL;
1521 struct file_handle *fh = NULL;
1522 struct dictionary *dict = NULL;
1528 if (pgm == NULL && (lex_match_id ("FILE") || token == T_STRING))
1536 else if (pgm == NULL && lex_match_id ("TYPE"))
1540 if (lex_match_id ("COMM"))
1542 else if (lex_match_id ("TAPE"))
1546 lex_error (_("expecting COMM or TAPE"));
1556 lex_sbc_missing ("FILE");
1560 discard_variables ();
1562 pgm = xmalloc (sizeof *pgm);
1563 pgm->reader = pfm_open_reader (fh, &dict, NULL);
1565 case_nullify (&pgm->bounce);
1566 if (pgm->reader == NULL)
1569 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
1571 start_case_map (dict);
1577 if (!parse_dict_trim (dict))
1589 pgm->map = finish_case_map (dict);
1591 dict_destroy (default_dict);
1592 default_dict = dict;
1594 vfm_source = create_case_source (&import_source_class, pgm);
1599 import_pgm_free (pgm);
1601 dict_destroy (dict);
1605 /* Frees a struct import_pgm. */
1607 import_pgm_free (struct import_pgm *pgm)
1611 pfm_close_reader (pgm->reader);
1612 destroy_case_map (pgm->map);
1613 case_destroy (&pgm->bounce);
1618 /* Clears internal state related to IMPORT input procedure. */
1620 import_source_destroy (struct case_source *source)
1622 struct import_pgm *pgm = source->aux;
1623 import_pgm_free (pgm);
1626 /* Reads all the cases from the data file into C and passes them
1627 to WRITE_CASE one by one, passing WC_DATA. */
1629 import_source_read (struct case_source *source,
1631 write_case_func *write_case, write_case_data wc_data)
1633 struct import_pgm *pgm = source->aux;
1638 if (pgm->map == NULL)
1639 ok = pfm_read_case (pgm->reader, c);
1642 ok = pfm_read_case (pgm->reader, &pgm->bounce);
1644 map_case (pgm->map, &pgm->bounce, c);
1648 ok = write_case (wc_data);
1653 const struct case_source_class import_source_class =
1658 import_source_destroy,
1664 A case map copies data from a case that corresponds for one
1665 dictionary to a case that corresponds to a second dictionary
1666 derived from the first by, optionally, deleting, reordering,
1667 or renaming variables. (No new variables may be created.)
1673 size_t value_cnt; /* Number of values in map. */
1674 int *map; /* For each destination index, the
1675 corresponding source index. */
1678 /* Prepares dictionary D for producing a case map. Afterward,
1679 the caller may delete, reorder, or rename variables within D
1680 at will before using finish_case_map() to produce the case
1683 Uses D's aux members, which must otherwise not be in use. */
1685 start_case_map (struct dictionary *d)
1687 size_t var_cnt = dict_get_var_cnt (d);
1690 for (i = 0; i < var_cnt; i++)
1692 struct variable *v = dict_get_var (d, i);
1693 int *src_fv = xmalloc (sizeof *src_fv);
1695 var_attach_aux (v, src_fv, var_dtor_free);
1699 /* Produces a case map from dictionary D, which must have been
1700 previously prepared with start_case_map().
1702 Does not retain any reference to D, and clears the aux members
1703 set up by start_case_map().
1705 Returns the new case map, or a null pointer if no mapping is
1706 required (that is, no data has changed position). */
1707 static struct case_map *
1708 finish_case_map (struct dictionary *d)
1710 struct case_map *map;
1711 size_t var_cnt = dict_get_var_cnt (d);
1715 map = xmalloc (sizeof *map);
1716 map->value_cnt = dict_get_next_value_idx (d);
1717 map->map = xmalloc (sizeof *map->map * map->value_cnt);
1718 for (i = 0; i < map->value_cnt; i++)
1722 for (i = 0; i < var_cnt; i++)
1724 struct variable *v = dict_get_var (d, i);
1725 int *src_fv = (int *) var_detach_aux (v);
1728 if (v->fv != *src_fv)
1731 for (idx = 0; idx < v->nv; idx++)
1733 int src_idx = *src_fv + idx;
1734 int dst_idx = v->fv + idx;
1736 assert (map->map[dst_idx] == -1);
1737 map->map[dst_idx] = src_idx;
1744 destroy_case_map (map);
1748 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1754 /* Maps from SRC to DST, applying case map MAP. */
1756 map_case (const struct case_map *map,
1757 const struct ccase *src, struct ccase *dst)
1761 assert (map != NULL);
1762 assert (src != NULL);
1763 assert (dst != NULL);
1764 assert (src != dst);
1766 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1768 int src_idx = map->map[dst_idx];
1770 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1774 /* Destroys case map MAP. */
1776 destroy_case_map (struct case_map *map)