1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 #include "dictionary.h"
28 #include "file-handle.h"
33 #include "pfm-write.h"
36 #include "sfm-write.h"
38 #include "value-labels.h"
44 #define _(msgid) gettext (msgid)
46 #include "debug-print.h"
48 /* Rearranging and reducing a dictionary. */
49 static void start_case_map (struct dictionary *);
50 static struct case_map *finish_case_map (struct dictionary *);
51 static void map_case (const struct case_map *,
52 const struct ccase *, struct ccase *);
53 static void destroy_case_map (struct case_map *);
58 OP_READ, /* GET or IMPORT. */
59 OP_SAVE, /* SAVE or XSAVE. */
60 OP_EXPORT /* EXPORT. */
63 static bool parse_dict_trim (struct dictionary *);
65 /* GET input program. */
68 struct sfm_reader *reader; /* System file reader. */
69 struct case_map *map; /* Map from system file to active file dict. */
70 struct ccase bounce; /* Bounce buffer. */
73 static void get_pgm_free (struct get_pgm *);
75 /* Parses the GET command. */
79 struct get_pgm *pgm = NULL;
80 struct file_handle *fh;
81 struct dictionary *dict = NULL;
83 pgm = xmalloc (sizeof *pgm);
86 case_nullify (&pgm->bounce);
91 if (lex_match_id ("FILE"))
97 pgm->reader = sfm_open_reader (fh, &dict, NULL);
98 if (pgm->reader == NULL)
100 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
102 start_case_map (dict);
103 while (lex_match ('/'))
104 if (!parse_dict_trim (dict))
107 if (!lex_end_of_command ())
110 dict_compact_values (dict);
111 pgm->map = finish_case_map (dict);
113 dict_destroy (default_dict);
116 vfm_source = create_case_source (&get_source_class, pgm);
127 /* Frees a struct get_pgm. */
129 get_pgm_free (struct get_pgm *pgm)
133 sfm_close_reader (pgm->reader);
134 destroy_case_map (pgm->map);
135 case_destroy (&pgm->bounce);
140 /* Clears internal state related to GET input procedure. */
142 get_source_destroy (struct case_source *source)
144 struct get_pgm *pgm = source->aux;
148 /* Reads all the cases from the data file into C and passes them
149 to WRITE_CASE one by one, passing WC_DATA. */
151 get_source_read (struct case_source *source,
153 write_case_func *write_case, write_case_data wc_data)
155 struct get_pgm *pgm = source->aux;
160 if (pgm->map == NULL)
161 ok = sfm_read_case (pgm->reader, c);
164 ok = sfm_read_case (pgm->reader, &pgm->bounce);
166 map_case (pgm->map, &pgm->bounce, c);
170 ok = write_case (wc_data);
175 const struct case_source_class get_source_class =
183 /* Type of output file. */
186 SYSFILE_WRITER, /* System file. */
187 PORFILE_WRITER /* Portable file. */
190 /* Type of a command. */
193 XFORM_CMD, /* Transformation. */
194 PROC_CMD /* Procedure. */
197 /* Portable or system file writer plus a case map. */
200 enum writer_type writer_type;
202 struct case_map *map; /* Map to output file dictionary
203 (null pointer for identity mapping). */
204 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
209 any_writer_destroy (struct any_writer *aw)
213 switch (aw->writer_type)
216 pfm_close_writer (aw->writer);
219 sfm_close_writer (aw->writer);
222 destroy_case_map (aw->map);
223 case_destroy (&aw->bounce);
228 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
229 WRITER_TYPE identifies the type of file to write,
230 and COMMAND_TYPE identifies the type of command.
232 On success, returns a writer.
233 For procedures only, sets *RETAIN_UNSELECTED to true if cases
234 that would otherwise be excluded by FILTER or USE should be
237 On failure, returns a null pointer. */
238 static struct any_writer *
239 parse_write_command (enum writer_type writer_type,
240 enum command_type command_type,
241 bool *retain_unselected)
244 struct file_handle *handle; /* Output file. */
245 struct dictionary *dict; /* Dictionary for output file. */
246 struct any_writer *aw; /* Writer. */
248 /* Common options. */
249 bool print_map; /* Print map? TODO. */
250 bool print_short_names; /* Print long-to-short name map. TODO. */
251 struct sfm_write_options sysfile_opts;
252 struct pfm_write_options porfile_opts;
254 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
255 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
256 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
258 if (command_type == PROC_CMD)
259 *retain_unselected = true;
262 dict = dict_clone (default_dict);
263 aw = xmalloc (sizeof *aw);
264 aw->writer_type = writer_type;
267 case_nullify (&aw->bounce);
269 print_short_names = false;
270 sysfile_opts = sfm_writer_default_options ();
271 porfile_opts = pfm_writer_default_options ();
273 start_case_map (dict);
274 dict_delete_scratch_vars (dict);
279 if (lex_match_id ("OUTFILE"))
283 lex_sbc_only_once ("OUTFILE");
289 handle = fh_parse ();
293 else if (lex_match_id ("NAMES"))
294 print_short_names = true;
295 else if (lex_match_id ("PERMISSIONS"))
300 if (lex_match_id ("READONLY"))
302 else if (lex_match_id ("WRITEABLE"))
306 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
309 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
311 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
314 if (lex_match_id ("RETAIN"))
315 *retain_unselected = true;
316 else if (lex_match_id ("DELETE"))
317 *retain_unselected = false;
320 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
324 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
325 sysfile_opts.compress = true;
326 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
327 sysfile_opts.compress = false;
328 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
331 if (!lex_force_int ())
333 sysfile_opts.version = lex_integer ();
336 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
339 if (lex_match_id ("COMMUNICATIONS"))
340 porfile_opts.type = PFM_COMM;
341 else if (lex_match_id ("TAPE"))
342 porfile_opts.type = PFM_TAPE;
345 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
349 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
352 if (!lex_force_int ())
354 porfile_opts.digits = lex_integer ();
357 else if (!parse_dict_trim (dict))
360 if (!lex_match ('/'))
363 if (lex_end_of_command () != CMD_SUCCESS)
368 lex_sbc_missing ("OUTFILE");
372 dict_compact_values (dict);
373 aw->map = finish_case_map (dict);
375 case_create (&aw->bounce, dict_get_next_value_idx (dict));
380 aw->writer = sfm_open_writer (handle, dict, sysfile_opts);
383 aw->writer = pfm_open_writer (handle, dict, porfile_opts);
392 any_writer_destroy (aw);
397 /* Writes case C to writer AW. */
399 any_writer_write_case (struct any_writer *aw, struct ccase *c)
403 map_case (aw->map, c, &aw->bounce);
407 switch (aw->writer_type)
410 sfm_write_case (aw->writer, c);
413 pfm_write_case (aw->writer, c);
418 /* SAVE and EXPORT. */
420 static int output_proc (struct ccase *, void *);
422 /* Parses and performs the SAVE or EXPORT procedure. */
424 parse_output_proc (enum writer_type writer_type)
426 bool retain_unselected;
427 struct variable *saved_filter_variable;
428 struct any_writer *aw;
430 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
434 saved_filter_variable = dict_get_filter (default_dict);
435 if (retain_unselected)
436 dict_set_filter (default_dict, NULL);
437 procedure (output_proc, aw);
438 dict_set_filter (default_dict, saved_filter_variable);
440 any_writer_destroy (aw);
444 /* Writes case C to file. */
446 output_proc (struct ccase *c, void *aw_)
448 struct any_writer *aw = aw_;
449 any_writer_write_case (aw, c);
456 return parse_output_proc (SYSFILE_WRITER);
462 return parse_output_proc (PORFILE_WRITER);
465 /* XSAVE and XEXPORT. */
467 /* Transformation. */
470 struct any_writer *aw; /* Writer. */
473 static trns_proc_func output_trns_proc;
474 static trns_free_func output_trns_free;
476 /* Parses the XSAVE or XEXPORT transformation command. */
478 parse_output_trns (enum writer_type writer_type)
480 struct output_trns *t = xmalloc (sizeof *t);
481 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
488 add_transformation (output_trns_proc, output_trns_free, t);
492 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
494 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
496 struct output_trns *t = trns_;
497 any_writer_write_case (t->aw, c);
501 /* Frees an XSAVE or XEXPORT transformation. */
503 output_trns_free (void *trns_)
505 struct output_trns *t = trns_;
509 any_writer_destroy (t->aw);
518 return parse_output_trns (SYSFILE_WRITER);
521 /* XEXPORT command. */
525 return parse_output_trns (PORFILE_WRITER);
528 static bool rename_variables (struct dictionary *dict);
529 static bool drop_variables (struct dictionary *dict);
530 static bool keep_variables (struct dictionary *dict);
532 /* Commands that read and write system files share a great deal
533 of common syntactic structure for rearranging and dropping
534 variables. This function parses this syntax and modifies DICT
535 appropriately. Returns true on success, false on failure. */
537 parse_dict_trim (struct dictionary *dict)
539 if (lex_match_id ("MAP"))
544 else if (lex_match_id ("DROP"))
545 return drop_variables (dict);
546 else if (lex_match_id ("KEEP"))
547 return keep_variables (dict);
548 else if (lex_match_id ("RENAME"))
549 return rename_variables (dict);
552 lex_error (_("expecting a valid subcommand"));
557 /* Parses and performs the RENAME subcommand of GET and SAVE. */
559 rename_variables (struct dictionary *dict)
577 v = parse_dict_variable (dict);
580 if (!lex_force_match ('=')
583 if (dict_lookup_var (dict, tokid) != NULL)
585 msg (SE, _("Cannot rename %s as %s because there already exists "
586 "a variable named %s. To rename variables with "
587 "overlapping names, use a single RENAME subcommand "
588 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
589 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
593 dict_rename_var (dict, v, tokid);
602 while (lex_match ('('))
606 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
608 if (!lex_match ('='))
610 msg (SE, _("`=' expected after variable list."));
613 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
617 msg (SE, _("Number of variables on left side of `=' (%d) does not "
618 "match number of variables on right side (%d), in "
619 "parenthesized group %d of RENAME subcommand."),
620 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
623 if (!lex_force_match (')'))
628 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
630 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
636 for (i = 0; i < nn; i++)
644 /* Parses and performs the DROP subcommand of GET and SAVE.
645 Returns true if successful, false on failure.*/
647 drop_variables (struct dictionary *dict)
653 if (!parse_variables (dict, &v, &nv, PV_NONE))
655 dict_delete_vars (dict, v, nv);
658 if (dict_get_var_cnt (dict) == 0)
660 msg (SE, _("Cannot DROP all variables from dictionary."));
666 /* Parses and performs the KEEP subcommand of GET and SAVE.
667 Returns true if successful, false on failure.*/
669 keep_variables (struct dictionary *dict)
676 if (!parse_variables (dict, &v, &nv, PV_NONE))
679 /* Move the specified variables to the beginning. */
680 dict_reorder_vars (dict, v, nv);
682 /* Delete the remaining variables. */
683 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
684 for (i = nv; i < dict_get_var_cnt (dict); i++)
685 v[i - nv] = dict_get_var (dict, i);
686 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
694 #include "debug-print.h"
699 MTF_FILE, /* Specified on FILE= subcommand. */
700 MTF_TABLE /* Specified on TABLE= subcommand. */
703 /* One of the files on MATCH FILES. */
706 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
707 struct mtf_file *next_min; /* Next in the chain of minimums. */
709 int type; /* One of MTF_*. */
710 struct variable **by; /* List of BY variables for this file. */
711 struct file_handle *handle; /* File handle. */
712 struct sfm_reader *reader; /* System file reader. */
713 struct dictionary *dict; /* Dictionary from system file. */
716 char *in_name; /* Variable name. */
717 struct variable *in_var; /* Variable (in master dictionary). */
719 struct ccase input; /* Input record. */
722 /* MATCH FILES procedure. */
725 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
726 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
728 size_t by_cnt; /* Number of variables on BY subcommand. */
730 /* Names of FIRST, LAST variables. */
731 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
733 struct dictionary *dict; /* Dictionary of output file. */
734 struct case_sink *sink; /* Sink to receive output. */
735 struct ccase mtf_case; /* Case used for output. */
737 unsigned seq_num; /* Have we initialized this variable? */
738 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
741 static void mtf_free (struct mtf_proc *);
742 static void mtf_free_file (struct mtf_file *);
743 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
744 static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
746 static void mtf_read_nonactive_records (void *);
747 static void mtf_processing_finish (void *);
748 static int mtf_processing (struct ccase *, void *);
750 static char *var_type_description (struct variable *);
752 static void set_master (struct variable *, struct variable *master);
753 static struct variable *get_master (struct variable *);
755 /* Parse and execute the MATCH FILES command. */
757 cmd_match_files (void)
760 struct mtf_file *first_table = NULL;
761 struct mtf_file *iter;
763 bool used_active_file = false;
764 bool saw_table = false;
767 mtf.head = mtf.tail = NULL;
771 mtf.dict = dict_create ();
773 case_nullify (&mtf.mtf_case);
776 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
780 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
782 struct mtf_file *file = xmalloc (sizeof *file);
784 if (lex_match_id ("FILE"))
785 file->type = MTF_FILE;
786 else if (lex_match_id ("TABLE"))
788 file->type = MTF_TABLE;
799 file->in_name = NULL;
801 case_nullify (&file->input);
803 /* FILEs go first, then TABLEs. */
804 if (file->type == MTF_TABLE || first_table == NULL)
807 file->prev = mtf.tail;
809 mtf.tail->next = file;
811 if (mtf.head == NULL)
813 if (file->type == MTF_TABLE && first_table == NULL)
818 assert (file->type == MTF_FILE);
819 file->next = first_table;
820 file->prev = first_table->prev;
821 if (first_table->prev)
822 first_table->prev->next = file;
825 first_table->prev = file;
833 if (used_active_file)
835 msg (SE, _("The active file may not be specified more "
839 used_active_file = true;
841 assert (pgm_state != STATE_INPUT);
842 if (pgm_state == STATE_INIT)
844 msg (SE, _("Cannot specify the active file since no active "
845 "file has been defined."));
852 _("MATCH FILES may not be used after TEMPORARY when "
853 "the active file is an input source. "
854 "Temporary transformations will be made permanent."));
858 file->dict = default_dict;
862 file->handle = fh_parse ();
863 if (file->handle == NULL)
866 file->reader = sfm_open_reader (file->handle, &file->dict, NULL);
867 if (file->reader == NULL)
870 case_create (&file->input, dict_get_next_value_idx (file->dict));
873 while (lex_match ('/'))
874 if (lex_match_id ("RENAME"))
876 if (!rename_variables (file->dict))
879 else if (lex_match_id ("IN"))
888 if (file->in_name != NULL)
890 msg (SE, _("Multiple IN subcommands for a single FILE or "
894 file->in_name = xstrdup (tokid);
899 mtf_merge_dictionary (mtf.dict, file);
904 if (lex_match (T_BY))
906 struct variable **by;
910 msg (SE, _("BY may appear at most once."));
915 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
916 PV_NO_DUPLICATE | PV_NO_SCRATCH))
919 for (iter = mtf.head; iter != NULL; iter = iter->next)
923 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
925 for (i = 0; i < mtf.by_cnt; i++)
927 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
928 if (iter->by[i] == NULL)
930 msg (SE, _("File %s lacks BY variable %s."),
931 iter->handle ? handle_get_name (iter->handle) : "*",
940 else if (lex_match_id ("FIRST"))
942 if (mtf.first[0] != '\0')
944 msg (SE, _("FIRST may appear at most once."));
949 if (!lex_force_id ())
951 strcpy (mtf.first, tokid);
954 else if (lex_match_id ("LAST"))
956 if (mtf.last[0] != '\0')
958 msg (SE, _("LAST may appear at most once."));
963 if (!lex_force_id ())
965 strcpy (mtf.last, tokid);
968 else if (lex_match_id ("MAP"))
972 else if (lex_match_id ("DROP"))
974 if (!drop_variables (mtf.dict))
977 else if (lex_match_id ("KEEP"))
979 if (!keep_variables (mtf.dict))
988 if (!lex_match ('/') && token != '.')
990 lex_end_of_command ();
999 msg (SE, _("BY is required when TABLE is specified."));
1004 msg (SE, _("BY is required when IN is specified."));
1009 /* Set up mapping from each file's variables to master
1011 for (iter = mtf.head; iter != NULL; iter = iter->next)
1013 struct dictionary *d = iter->dict;
1016 for (i = 0; i < dict_get_var_cnt (d); i++)
1018 struct variable *v = dict_get_var (d, i);
1019 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1025 /* Add IN variables to master dictionary. */
1026 for (iter = mtf.head; iter != NULL; iter = iter->next)
1027 if (iter->in_name != NULL)
1029 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1030 if (iter->in_var == NULL)
1032 msg (SE, _("IN variable name %s duplicates an "
1033 "existing variable name."),
1034 iter->in_var->name);
1037 iter->in_var->print = iter->in_var->write
1038 = make_output_format (FMT_F, 1, 0);
1041 /* MATCH FILES performs an n-way merge on all its input files.
1044 1. Read one input record from every input FILE.
1046 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1048 3. Find the FILE input record(s) that have minimum BY
1049 values. Store all the values from these input records into
1052 4. For every TABLE, read another record as long as the BY values
1053 on the TABLE's input record are less than the FILEs' BY values.
1054 If an exact match is found, store all the values from the TABLE
1055 input record into the output record.
1057 5. Write the output record.
1059 6. Read another record from each input file FILE and TABLE that
1060 we stored values from above. If we come to the end of one of the
1061 input files, remove it from the list of input files.
1063 7. Repeat from step 2.
1065 Unfortunately, this algorithm can't be implemented in a
1066 straightforward way because there's no function to read a
1067 record from the active file. Instead, it has to be written
1070 FIXME: For merging large numbers of files (more than 10?) a
1071 better algorithm would use a heap for finding minimum
1074 if (!used_active_file)
1075 discard_variables ();
1077 dict_compact_values (mtf.dict);
1078 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1079 if (mtf.sink->class->open != NULL)
1080 mtf.sink->class->open (mtf.sink);
1082 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1083 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1085 mtf_read_nonactive_records (&mtf);
1086 if (used_active_file)
1087 procedure (mtf_processing, &mtf);
1088 mtf_processing_finish (&mtf);
1090 free_case_source (vfm_source);
1093 dict_destroy (default_dict);
1094 default_dict = mtf.dict;
1096 vfm_source = mtf.sink->class->make_source (mtf.sink);
1097 free_case_sink (mtf.sink);
1107 /* Repeats 2...7 an arbitrary number of times. */
1109 mtf_processing_finish (void *mtf_)
1111 struct mtf_proc *mtf = mtf_;
1112 struct mtf_file *iter;
1114 /* Find the active file and delete it. */
1115 for (iter = mtf->head; iter; iter = iter->next)
1116 if (iter->handle == NULL)
1118 mtf_delete_file_in_place (mtf, &iter);
1122 while (mtf->head && mtf->head->type == MTF_FILE)
1123 if (!mtf_processing (NULL, mtf))
1127 /* Return a string in a static buffer describing V's variable type and
1130 var_type_description (struct variable *v)
1132 static char buf[2][32];
1139 if (v->type == NUMERIC)
1140 strcpy (s, "numeric");
1143 assert (v->type == ALPHA);
1144 sprintf (s, "string with width %d", v->width);
1149 /* Free FILE and associated data. */
1151 mtf_free_file (struct mtf_file *file)
1154 sfm_close_reader (file->reader);
1155 if (file->dict != default_dict)
1156 dict_destroy (file->dict);
1157 case_destroy (&file->input);
1158 free (file->in_name);
1162 /* Free all the data for the MATCH FILES procedure. */
1164 mtf_free (struct mtf_proc *mtf)
1166 struct mtf_file *iter, *next;
1168 for (iter = mtf->head; iter; iter = next)
1171 mtf_free_file (iter);
1175 dict_destroy (mtf->dict);
1176 case_destroy (&mtf->mtf_case);
1177 free (mtf->seq_nums);
1180 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1181 file in the chain, or to NULL if was the last in the chain. */
1183 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1185 struct mtf_file *f = *file;
1189 f->prev->next = f->next;
1191 f->next->prev = f->prev;
1193 mtf->head = f->next;
1195 mtf->tail = f->prev;
1198 if (f->in_var != NULL)
1199 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1200 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1202 struct variable *v = dict_get_var (f->dict, i);
1203 struct variable *mv = get_master (v);
1206 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1208 if (v->type == NUMERIC)
1211 memset (out->s, ' ', v->width);
1218 /* Read a record from every input file except the active file. */
1220 mtf_read_nonactive_records (void *mtf_)
1222 struct mtf_proc *mtf = mtf_;
1223 struct mtf_file *iter, *next;
1225 for (iter = mtf->head; iter != NULL; iter = next)
1228 if (iter->handle && !sfm_read_case (iter->reader, &iter->input))
1229 mtf_delete_file_in_place (mtf, &iter);
1233 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1234 if A == B, 1 if A > B. */
1236 mtf_compare_BY_values (struct mtf_proc *mtf,
1237 struct mtf_file *a, struct mtf_file *b,
1240 struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1241 struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1242 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1243 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1246 /* Perform one iteration of steps 3...7 above. */
1248 mtf_processing (struct ccase *c, void *mtf_)
1250 struct mtf_proc *mtf = mtf_;
1252 /* Do we need another record from the active file? */
1253 bool read_active_file;
1255 assert (mtf->head != NULL);
1256 if (mtf->head->type == MTF_TABLE)
1261 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1262 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1263 struct mtf_file *iter, *next;
1265 read_active_file = false;
1267 /* 3. Find the FILE input record(s) that have minimum BY
1268 values. Store all the values from these input records into
1269 the output record. */
1270 min_head = min_tail = mtf->head;
1271 max_head = max_tail = NULL;
1272 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1275 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1279 max_tail = max_tail->next_min = iter;
1281 max_head = max_tail = iter;
1284 min_tail = min_tail->next_min = iter;
1289 max_tail->next_min = min_head;
1290 max_tail = min_tail;
1294 max_head = min_head;
1295 max_tail = min_tail;
1297 min_head = min_tail = iter;
1301 /* 4. For every TABLE, read another record as long as the BY
1302 values on the TABLE's input record are less than the FILEs'
1303 BY values. If an exact match is found, store all the values
1304 from the TABLE input record into the output record. */
1305 for (; iter != NULL; iter = next)
1307 assert (iter->type == MTF_TABLE);
1312 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1316 max_tail = max_tail->next_min = iter;
1318 max_head = max_tail = iter;
1321 min_tail = min_tail->next_min = iter;
1324 if (iter->handle == NULL)
1326 if (sfm_read_case (iter->reader, &iter->input))
1328 mtf_delete_file_in_place (mtf, &iter);
1334 /* Next sequence number. */
1337 /* Store data to all the records we are using. */
1339 min_tail->next_min = NULL;
1340 for (iter = min_head; iter; iter = iter->next_min)
1344 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1346 struct variable *v = dict_get_var (iter->dict, i);
1347 struct variable *mv = get_master (v);
1349 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1351 struct ccase *record
1352 = case_is_null (&iter->input) ? c : &iter->input;
1353 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1355 mtf->seq_nums[mv->index] = mtf->seq_num;
1356 if (v->type == NUMERIC)
1357 out->f = case_num (record, v->fv);
1359 memcpy (out->s, case_str (record, v->fv), v->width);
1362 if (iter->in_var != NULL)
1363 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1365 if (iter->type == MTF_FILE && iter->handle == NULL)
1366 read_active_file = true;
1369 /* Store missing values to all the records we're not
1372 max_tail->next_min = NULL;
1373 for (iter = max_head; iter; iter = iter->next_min)
1377 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1379 struct variable *v = dict_get_var (iter->dict, i);
1380 struct variable *mv = get_master (v);
1382 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1384 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1385 mtf->seq_nums[mv->index] = mtf->seq_num;
1387 if (v->type == NUMERIC)
1390 memset (out->s, ' ', v->width);
1393 if (iter->in_var != NULL)
1394 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1397 /* 5. Write the output record. */
1398 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1400 /* 6. Read another record from each input file FILE and TABLE
1401 that we stored values from above. If we come to the end of
1402 one of the input files, remove it from the list of input
1404 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1406 next = iter->next_min;
1407 if (iter->reader != NULL
1408 && !sfm_read_case (iter->reader, &iter->input))
1409 mtf_delete_file_in_place (mtf, &iter);
1412 while (!read_active_file
1413 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1415 return mtf->head != NULL && mtf->head->type == MTF_FILE;
1418 /* Merge the dictionary for file F into master dictionary M. */
1420 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1422 struct dictionary *d = f->dict;
1423 const char *d_docs, *m_docs;
1426 if (dict_get_label (m) == NULL)
1427 dict_set_label (m, dict_get_label (d));
1429 d_docs = dict_get_documents (d);
1430 m_docs = dict_get_documents (m);
1434 dict_set_documents (m, d_docs);
1440 new_len = strlen (m_docs) + strlen (d_docs);
1441 new_docs = xmalloc (new_len + 1);
1442 strcpy (new_docs, m_docs);
1443 strcat (new_docs, d_docs);
1444 dict_set_documents (m, new_docs);
1449 for (i = 0; i < dict_get_var_cnt (d); i++)
1451 struct variable *dv = dict_get_var (d, i);
1452 struct variable *mv = dict_lookup_var (m, dv->name);
1454 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1459 if (mv->width != dv->width)
1461 msg (SE, _("Variable %s in file %s (%s) has different "
1462 "type or width from the same variable in "
1463 "earlier file (%s)."),
1464 dv->name, handle_get_name (f->handle),
1465 var_type_description (dv), var_type_description (mv));
1469 if (dv->width == mv->width)
1471 if (val_labs_count (dv->val_labs)
1472 && !val_labs_count (mv->val_labs))
1473 mv->val_labs = val_labs_copy (dv->val_labs);
1474 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1475 mv_copy (&mv->miss, &dv->miss);
1478 if (dv->label && !mv->label)
1479 mv->label = xstrdup (dv->label);
1482 mv = dict_clone_var_assert (m, dv, dv->name);
1488 /* Marks V's master variable as MASTER. */
1490 set_master (struct variable *v, struct variable *master)
1492 var_attach_aux (v, master, NULL);
1495 /* Returns the master variable corresponding to V,
1496 as set with set_master(). */
1497 static struct variable *
1498 get_master (struct variable *v)
1503 /* IMPORT command. */
1505 /* IMPORT input program. */
1508 struct pfm_reader *reader; /* Portable file reader. */
1509 struct case_map *map; /* Map from system file to active file dict. */
1510 struct ccase bounce; /* Bounce buffer. */
1513 static void import_pgm_free (struct import_pgm *);
1515 /* Parses the IMPORT command. */
1519 struct import_pgm *pgm = NULL;
1520 struct file_handle *fh = NULL;
1521 struct dictionary *dict = NULL;
1527 if (pgm == NULL && (lex_match_id ("FILE") || token == T_STRING))
1535 else if (pgm == NULL && lex_match_id ("TYPE"))
1539 if (lex_match_id ("COMM"))
1541 else if (lex_match_id ("TAPE"))
1545 lex_error (_("expecting COMM or TAPE"));
1555 lex_sbc_missing ("FILE");
1559 discard_variables ();
1561 pgm = xmalloc (sizeof *pgm);
1562 pgm->reader = pfm_open_reader (fh, &dict, NULL);
1564 case_nullify (&pgm->bounce);
1565 if (pgm->reader == NULL)
1568 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
1570 start_case_map (dict);
1576 if (!parse_dict_trim (dict))
1588 pgm->map = finish_case_map (dict);
1590 dict_destroy (default_dict);
1591 default_dict = dict;
1593 vfm_source = create_case_source (&import_source_class, pgm);
1598 import_pgm_free (pgm);
1600 dict_destroy (dict);
1604 /* Frees a struct import_pgm. */
1606 import_pgm_free (struct import_pgm *pgm)
1610 pfm_close_reader (pgm->reader);
1611 destroy_case_map (pgm->map);
1612 case_destroy (&pgm->bounce);
1617 /* Clears internal state related to IMPORT input procedure. */
1619 import_source_destroy (struct case_source *source)
1621 struct import_pgm *pgm = source->aux;
1622 import_pgm_free (pgm);
1625 /* Reads all the cases from the data file into C and passes them
1626 to WRITE_CASE one by one, passing WC_DATA. */
1628 import_source_read (struct case_source *source,
1630 write_case_func *write_case, write_case_data wc_data)
1632 struct import_pgm *pgm = source->aux;
1637 if (pgm->map == NULL)
1638 ok = pfm_read_case (pgm->reader, c);
1641 ok = pfm_read_case (pgm->reader, &pgm->bounce);
1643 map_case (pgm->map, &pgm->bounce, c);
1647 ok = write_case (wc_data);
1652 const struct case_source_class import_source_class =
1657 import_source_destroy,
1663 A case map copies data from a case that corresponds for one
1664 dictionary to a case that corresponds to a second dictionary
1665 derived from the first by, optionally, deleting, reordering,
1666 or renaming variables. (No new variables may be created.)
1672 size_t value_cnt; /* Number of values in map. */
1673 int *map; /* For each destination index, the
1674 corresponding source index. */
1677 /* Prepares dictionary D for producing a case map. Afterward,
1678 the caller may delete, reorder, or rename variables within D
1679 at will before using finish_case_map() to produce the case
1682 Uses D's aux members, which must otherwise not be in use. */
1684 start_case_map (struct dictionary *d)
1686 size_t var_cnt = dict_get_var_cnt (d);
1689 for (i = 0; i < var_cnt; i++)
1691 struct variable *v = dict_get_var (d, i);
1692 int *src_fv = xmalloc (sizeof *src_fv);
1694 var_attach_aux (v, src_fv, var_dtor_free);
1698 /* Produces a case map from dictionary D, which must have been
1699 previously prepared with start_case_map().
1701 Does not retain any reference to D, and clears the aux members
1702 set up by start_case_map().
1704 Returns the new case map, or a null pointer if no mapping is
1705 required (that is, no data has changed position). */
1706 static struct case_map *
1707 finish_case_map (struct dictionary *d)
1709 struct case_map *map;
1710 size_t var_cnt = dict_get_var_cnt (d);
1714 map = xmalloc (sizeof *map);
1715 map->value_cnt = dict_get_next_value_idx (d);
1716 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1717 for (i = 0; i < map->value_cnt; i++)
1721 for (i = 0; i < var_cnt; i++)
1723 struct variable *v = dict_get_var (d, i);
1724 int *src_fv = (int *) var_detach_aux (v);
1727 if (v->fv != *src_fv)
1730 for (idx = 0; idx < v->nv; idx++)
1732 int src_idx = *src_fv + idx;
1733 int dst_idx = v->fv + idx;
1735 assert (map->map[dst_idx] == -1);
1736 map->map[dst_idx] = src_idx;
1743 destroy_case_map (map);
1747 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1753 /* Maps from SRC to DST, applying case map MAP. */
1755 map_case (const struct case_map *map,
1756 const struct ccase *src, struct ccase *dst)
1760 assert (map != NULL);
1761 assert (src != NULL);
1762 assert (dst != NULL);
1763 assert (src != dst);
1765 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1767 int src_idx = map->map[dst_idx];
1769 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1773 /* Destroys case map MAP. */
1775 destroy_case_map (struct case_map *map)