AC_LIBTOOL_DLOPEN
AC_PROG_LIBTOOL
PKG_PROG_PKG_CONFIG
+m4_pattern_forbid([PKG_CHECK_MODULES])
AC_ARG_ENABLE(
anachronistic-dependencies,
doc/data-selection.texi \
doc/expressions.texi \
doc/files.texi \
+ doc/combining.texi \
doc/flow-control.texi \
doc/function-index.texi \
doc/installing.texi \
--- /dev/null
+@node Combining Data Files
+@chapter Combining Data Files
+
+This chapter describes commands that allow data from system files,
+portable file, scratch files, and the active file to be combined to
+form a new active file. These commands can combine data files in the
+following ways:
+
+@itemize
+@item
+@cmd{ADD FILES} interleaves or appends the cases from each input file.
+It is used with input files that have variables in common, but
+distinct sets of cases.
+
+@item
+@cmd{MATCH FILES} adds the data together in cases that match across
+multiple input files. It is used with input files that have cases in
+common, but different information about each case.
+
+@item
+@cmd{UPDATE} updates a master data file from data in a set of
+transaction files. Each case in a transaction data file modifies a
+matching case in the primary data file, or it adds a new case if no
+matching case can be found.
+@end itemize
+
+These commands share the majority of their syntax, which is described
+in the following section, followed by one section for each command
+that describes its specific syntax and semantics.
+
+@menu
+* Combining Files Common Syntax::
+* ADD FILES:: Interleave cases from multiple files.
+* MATCH FILES:: Merge cases from multiple files.
+* UPDATE:: Update cases using transactional data.
+@end menu
+
+@node Combining Files Common Syntax
+@section Common Syntax
+
+@display
+Per input file:
+ /FILE=@{*,'file-name'@}
+ [/RENAME=(src_names=target_names)@dots{}]
+ [/IN=var_name]
+ [/SORT]
+
+Once per command:
+ /BY var_list[(@{D|A@})] [var_list[(@{D|A@}]]@dots{}
+ [/DROP=var_list]
+ [/KEEP=var_list]
+ [/FIRST=var_name]
+ [/LAST=var_name]
+ [/MAP]
+@end display
+
+This section describes the syntactical features in common among the
+@cmd{ADD FILES}, @cmd{MATCH FILES}, and @cmd{UPDATE} commands. The
+following sections describe details specific to each command.
+
+Each of these commands reads two or more input files and combines
+them. The command's output becomes the new active file. The input
+files are not changed on disk.
+
+The syntax of each command begins with a specification of the files to
+be read as input. For each input file, specify FILE with a system,
+portable, or scratch file's name as a string or a file handle
+(@pxref{File Handles}), or specify an asterisk (@samp{*}) to use the
+active file as input. Use of portable or scratch files on FILE is a
+PSPP extension.
+
+At least two FILE subcommands must be specified. If the active file
+is used as an input source, then @cmd{TEMPORARY} must not be in
+effect.
+
+Each FILE subcommand may be followed by any number of RENAME
+subcommands that specify a parenthesized group or groups of variable
+names as they appear in the input file, followed by those variables'
+new names, separated by an equals sign (@samp{=}),
+e.g. @samp{/RENAME=(OLD1=NEW1)(OLD2=NEW2)}. To rename a single
+variable, the parentheses may be omitted: @samp{/RENAME=OLD=NEW}.
+Within a parenthesized group, variables are renamed simultaneously, so
+that @samp{/RENAME=(A B=B A)} exchanges the names of variables A and
+B. Otherwise, renaming occurs in left-to-right order.
+
+Each FILE subcommand may optionally be followed by a single IN
+subcommand, which creates a numeric variable with the specified name
+and format F1.0. The IN variable takes value 1 in an output case if
+the given input file contributed to that output case, and 0 otherwise.
+The DROP, KEEP, and RENAME subcommands have no effect on IN variables.
+
+If BY is used (see below), the SORT keyword must be specified after a
+FILE if that input file is not already sorted on the BY variables.
+When SORT is specified, PSPP sorts the input file's data on the BY
+variables before it applies it to the command. When SORT is used, BY
+is required. SORT is a PSPP extension.
+
+PSPP merges the dictionaries of all of the input files to form the new
+active file dictionary, like so:
+
+@itemize @bullet
+@item
+The new active file's variables are the union of all the input files'
+variables, matched based on their name. When a single input file
+contains a variable with a given name, the output file will contain
+exactly that variable. When more than one input file contains a
+variable with a given name, those variables must all have the same
+type (numeric or string) and, for string variables, the same width.
+Variables are matched after renaming with the RENAME subcommand.
+Thus, RENAME can be used to resolve conflicts.
+
+@item
+The variable label for each output variable is taken from the first
+specified input file that has a variable label for that variable, and
+similarly for value labels and missing values.
+
+@item
+The new active file's file label (@pxref{FILE LABEL}) is that of the
+first specified FILE that has a file label.
+
+@item
+The new active file's documents (@pxref{DOCUMENT}) are the
+concatenation of all the input files' documents, in the order in which
+the FILE subcommands are specified.
+
+@item
+If all of the input files are weighted on the same variable, then the
+new active file is weighted on that variable. Otherwise, the new
+active file is not weighted.
+@end itemize
+
+The remaining subcommands apply to the output file as a whole, rather
+than to individual input files. They must be specified at the end of
+the command specification, following all of the FILE and related
+subcommands. The most important of these subcommands is BY, which
+specifies a set of one or more variables that may be used to find
+corresponding cases in each of the input files. The variables
+specified on BY must be present in all of the input files.
+Furthermore, if any of the input files are not sorted on the BY
+variables, then SORT must be specified for those input files.
+
+The variables listed on BY may include (A) or (D) annotations to
+specify ascending or descending sort order. @xref{SORT CASES}, for
+more details on this notation. Adding (A) or (D) to the BY subcommand
+specification is a PSPP extension.
+
+The DROP subcommand can be used to specify a list of variables to
+exclude from the output. By contrast, the KEEP subcommand can be used
+to specify variables to include in the output; all variables not
+listed are dropped. DROP and KEEP are executed in left-to-right order
+and may be repeated any number of times. DROP and KEEP do not affect
+variables created by the IN, FIRST, and LAST subcommands, which are
+always included in the new active file, but they can be used to drop
+BY variables.
+
+The FIRST and LAST subcommands are optional. They may only be
+specified on @cmd{MATCH FILES} and @cmd{ADD FILES}, and only when BY
+is used. FIRST and LIST each adds a numeric variable to the new
+active file, with the name given as the subcommand's argument and F1.0
+print and write formats. The value of the FIRST variable is 1 in the
+first output case with a given set of values for the BY variables, and
+0 in other cases. Similarly, the LAST variable is 1 in the last case
+with a given of BY values, and 0 in other cases.
+
+When any of these commands creates an output case, variables that are
+only in files that are not present for the current case are set to the
+system-missing value for numeric variables or spaces for string
+variables.
+
+@node ADD FILES
+@section ADD FILES
+@vindex ADD FILES
+
+@display
+ADD FILES
+
+Per input file:
+ /FILE=@{*,'file-name'@}
+ [/RENAME=(src_names=target_names)@dots{}]
+ [/IN=var_name]
+ [/SORT]
+
+Once per command:
+ [/BY var_list[(@{D|A@})] [var_list[(@{D|A@})]@dots{}]]
+ [/DROP=var_list]
+ [/KEEP=var_list]
+ [/FIRST=var_name]
+ [/LAST=var_name]
+ [/MAP]
+@end display
+
+@cmd{ADD FILES} adds cases from multiple input files. The output,
+which replaces the active file, consists all of the cases in all of
+the input files.
+
+ADD FILES shares the bulk of its syntax with other PSPP commands for
+combining multiple data files. @xref{Combining Files Common Syntax},
+above, for an explanation of this common syntax.
+
+When BY is not used, the output of ADD FILES consists of all the cases
+from the first input file specified, followed by all the cases from
+the second file specified, and so on. When BY is used, the output is
+additionally sorted on the BY variables.
+
+When ADD FILES creates an output case, variables that are not part of
+the input file from which the case was drawn are set to the
+system-missing value for numeric variables or spaces for string
+variables.
+
+@node MATCH FILES
+@section MATCH FILES
+@vindex MATCH FILES
+
+@display
+MATCH FILES
+
+Per input file:
+ /@{FILE,TABLE@}=@{*,'file-name'@}
+ [/RENAME=(src_names=target_names)@dots{}]
+ [/IN=var_name]
+ [/SORT]
+
+Once per command:
+ /BY var_list[(@{D|A@}] [var_list[(@{D|A@})]@dots{}]
+ [/DROP=var_list]
+ [/KEEP=var_list]
+ [/FIRST=var_name]
+ [/LAST=var_name]
+ [/MAP]
+@end display
+
+@cmd{MATCH FILES} merges sets of corresponding cases in multiple
+input files into single cases in the output, combining their data.
+
+MATCH FILES shares the bulk of its syntax with other PSPP commands for
+combining multiple data files. @xref{Combining Files Common Syntax},
+above, for an explanation of this common syntax.
+
+How MATCH FILES matches up cases from the input files depends on
+whether BY is specified:
+
+@itemize @bullet
+@item
+If BY is not used, MATCH FILES combines the first case from each input
+file to produce the first output case, then the second case from each
+input file for the second output case, and so on. If some input files
+have fewer cases than others, then the shorter files do not contribute
+to cases output after their input has been exhausted.
+
+@item
+If BY is used, MATCH FILES combines cases from each input file that
+have identical values for the BY variables.
+
+When BY is used, TABLE subcommands may be used to introduce @dfn{table
+lookup file}. TABLE has same syntax as FILE, and the RENAME, IN, and
+SORT subcommands may follow a TABLE in the same way as a FILE.
+Regardless of the number of TABLEs, at least one FILE must specified.
+Table lookup files are treated in the same way as other input files
+for most purposes and, in particular, table lookup files must be
+sorted on the BY variables or the SORT subcommand must be specified
+for that TABLE.
+
+Cases in table lookup files are not consumed after they have been used
+once. This means that data in table lookup files can correspond to
+any number of cases in FILE input files. Table lookup files are
+analogous to lookup tables in traditional relational database systems.
+
+If a table lookup file contains more than one case with a given set of
+BY variables, only the first case is used.
+@end itemize
+
+When MATCH FILES creates an output case, variables that are only in
+files that are not present for the current case are set to the
+system-missing value for numeric variables or spaces for string
+variables.
+
+@node UPDATE
+@section UPDATE
+@vindex UPDATE
+
+@display
+UPDATE
+
+Per input file:
+ /FILE=@{*,'file-name'@}
+ [/RENAME=(src_names=target_names)@dots{}]
+ [/IN=var_name]
+ [/SORT]
+
+Once per command:
+ /BY var_list[(@{D|A@})] [var_list[(@{D|A@})]]@dots{}
+ [/DROP=var_list]
+ [/KEEP=var_list]
+ [/MAP]
+@end display
+
+@cmd{UPDATE} updates a @dfn{master file} by applying modifications
+from one or more @dfn{transaction files}.
+
+UPDATE shares the bulk of its syntax with other PSPP commands for
+combining multiple data files. @xref{Combining Files Common Syntax},
+above, for an explanation of this common syntax.
+
+At least two FILE subcommands must be specified. The first FILE
+subcommand names the master file, and the rest name transaction files.
+Every input file must either be sorted on the variables named on the
+BY subcommand, or the SORT subcommand must be used just after the FILE
+subcommand for that input file.
+
+UPDATE uses the variables specified on the BY subcommand, which is
+required, to attempt to match each case in a transaction file with a
+case in the master file:
+
+@itemize @bullet
+@item
+When a match is found, then the values of the variables present in the
+transaction file replace those variable's values in the new active
+file. If there are matching cases in more than more transaction file,
+PSPP applies the replacements from the first transaction file, then
+from the second transaction file, and so on. Similarly, if a single
+transaction file has cases with duplicate BY values, then those are
+applied in order to the master file.
+
+When a variable in a transaction file has a missing value or a string
+variable's value is all blanks, that value is never used to update the
+master file.
+
+@item
+If a case in the master file has no matching case in any transaction
+file, then it is copied unchanged to the output.
+
+@item
+If a case in a transaction file has no matching case in the master
+file, then it causes a new case to be added to the output, initialized
+from the values in the transaction file.
+@end itemize
filling the vacated position.
To associate custom attributes with particular variables, instead of
-with the entire active file, use @cmd{VARIABLE ATTRIBUTE} instead.
+with the entire active file, use @cmd{VARIABLE ATTRIBUTE} (@pxref{VARIABLE ATTRIBUTE}) instead.
@cmd{DATAFILE ATTRIBUTE} takes effect immediately. It is not affected
by conditional and looping structures such as @cmd{DO IF} or
-@node System and Portable Files
-@chapter System Files and Portable Files
+@node System and Portable File IO
+@chapter System and Portable File I/O
The commands in this chapter read, write, and examine system files and
portable files.
* GET:: Read from a system file.
* GET DATA:: Read from foreign files.
* IMPORT:: Read from a portable file.
-* MATCH FILES:: Merge system files.
* SAVE:: Write to a system file.
* SYSFILE INFO:: Display system file dictionary.
* XEXPORT:: Write to a portable file, as a transformation.
Use of @cmd{IMPORT} to read a system file or scratch file is a PSPP
extension.
-@node MATCH FILES
-@section MATCH FILES
-@vindex MATCH FILES
-
-@display
-MATCH FILES
- /@{FILE,TABLE@}=@{*,'file-name'@}
- /RENAME=(src_names=target_names)@dots{}
- /IN=var_name
-
- /BY=var_list
- /DROP=var_list
- /KEEP=var_list
- /FIRST=var_name
- /LAST=var_name
- /MAP
-@end display
-
-@cmd{MATCH FILES} merges one or more system, portable, or scratch files,
-optionally
-including the active file. Cases with the same values for BY
-variables are combined into a single case. Cases with different
-values are output in order. Thus, multiple sorted files are
-combined into a single sorted file based on the value of the BY
-variables. The results of the merge become the new active file.
-
-Specify FILE with a system, portable, or scratch file as a file name
-string or file handle
-(@pxref{File Handles}), or with an asterisk (@samp{*}) to
-indicate the current active file. The files specified on FILE are
-merged together based on the BY variables, or combined case-by-case if
-BY is not specified.
-
-Specify TABLE with a file to use it as a @dfn{table
-lookup file}. Cases in table lookup files are not used up after
-they've been used once. This means that data in table lookup files can
-correspond to any number of cases in FILE files. Table lookup files
-correspond to lookup tables in traditional relational database systems.
-If a table lookup file contains more than one case with a given set of
-BY variables, only the first case is used.
-
-Any number of FILE and TABLE subcommands may be specified.
-Ordinarily, at least two FILE subcommands, or one FILE and at least
-one TABLE, should be specified. Each instance of FILE or TABLE can be
-followed by any sequence of RENAME subcommands. These have the same
-form and meaning as the corresponding subcommands of @cmd{GET}
-(@pxref{GET}), but apply only to variables in the given file.
-
-Each FILE or TABLE may optionally be followed by an IN subcommand,
-which creates a numeric variable with the specified name and format
-F1.0. The IN variable takes value 1 in a case if the given file
-contributed a row to the merged file, 0 otherwise. The DROP, KEEP,
-and RENAME subcommands do not affect IN variables.
-
-When more than one FILE or TABLE contains a variable with a given
-name, those variables must all have the same type (numeric or string)
-and, for string variables, the same width. This rules applies to
-variable names after renaming with RENAME; thus, RENAME can be used to
-resolve conflicts.
-
-FILE and TABLE must be specified at the beginning of the command, with
-any RENAME or IN specifications immediately after the corresponding
-FILE or TABLE. These subcommands are followed by BY, DROP, KEEP,
-FIRST, LAST, and MAP.
-
-The BY subcommand specifies a list of variables that are used to match
-cases from each of the files. When TABLE or IN is used, BY is
-required; otherwise, it is optional. When BY is specified, all the
-files named on FILE and TABLE subcommands must be sorted in ascending
-order of the BY variables. Variables belonging to files that are not
-present for the current case are set to the system-missing value for
-numeric variables or spaces for string variables.
-
-The DROP and KEEP subcommands allow variables to be dropped from or
-reordered within the new active file. These subcommands have the same
-form and meaning as the corresponding subcommands of @cmd{GET}
-(@pxref{GET}). They apply to the new active file as a whole, not to
-individual input files. The variable names specified on DROP and KEEP
-are those after any renaming with RENAME.
-
-The optional FIRST and LAST subcommands name variables that @cmd{MATCH
-FILES} adds to the active file. The new variables are numeric with
-print and write format F1.0. The value of the FIRST variable is 1 in
-the first case with a given set of values for the BY variables, and 0
-in other cases. Similarly, the LAST variable is 1 in the last case
-with a given of BY values, and 0 in other cases.
-
-@cmd{MATCH FILES} may not be specified following @cmd{TEMPORARY}
-(@pxref{TEMPORARY}) if the active file is used as an input source.
-
-Use of portable or scratch files on @cmd{MATCH FILES} is a PSPP
-extension.
-
@node SAVE
@section SAVE
@vindex SAVE
* Expressions:: Numeric and string expression syntax.
* Data Input and Output:: Reading data from user files.
-* System and Portable Files:: Dealing with system & portable files.
+* System and Portable File IO:: Reading and writing system & portable files.
+* Combining Data Files:: Combining data from multiple files.
* Variable Attributes:: Adjusting and examining variables.
* Data Manipulation:: Simple operations on data.
* Data Selection:: Select certain cases for analysis.
@include expressions.texi
@include data-io.texi
@include files.texi
+@include combining.texi
@include variables.texi
@include transformation.texi
@include data-selection.texi
deleted element are shifted down, filling the vacated position.
To associate custom attributes with the entire active file, instead of
-with particular variables, use @cmd{DATAFILE ATTRIBUTE} instead.
+with particular variables, use @cmd{DATAFILE ATTRIBUTE} (@pxref{DATAFILE ATTRIBUTE}) instead.
@cmd{VARIABLE ATTRIBUTE} takes effect immediately. It is not affected
by conditional and looping structures such as @cmd{DO IF} or
src/data/calendar.h \
src/data/case-map.c \
src/data/case-map.h \
- src/data/case-ordering.c \
- src/data/case-ordering.h \
+ src/data/case-matcher.c \
+ src/data/case-matcher.h \
src/data/case.c \
src/data/casegrouper.c \
src/data/casegrouper.h \
src/data/short-names.h \
src/data/sparse-cases.c \
src/data/sparse-cases.h \
+ src/data/subcase.c \
+ src/data/subcase.h \
src/data/sys-file-private.c \
src/data/sys-file-private.h \
src/data/sys-file-reader.c \
#include <stdio.h>
#include <stdlib.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
#include <data/dictionary.h>
#include <data/variable.h>
#include <data/case.h>
corresponding source index. */
};
+static void translate_case (struct ccase *, struct ccase *, void *map_);
+static bool destroy_case_map (void *map_);
+
/* Creates and returns an empty map. */
static struct case_map *
create_case_map (size_t n)
return map->value_cnt;
}
+/* Creates and returns a new casereader whose cases are produced
+ by reading from SUBREADER and executing the actions of MAP.
+ The casereader will have as many `union value's as MAP. When
+ the new casereader is destroyed, MAP will be destroyed too.
+
+ After this function is called, SUBREADER must not ever again
+ be referenced directly. It will be destroyed automatically
+ when the returned casereader is destroyed. */
+struct casereader *
+case_map_create_input_translator (struct case_map *map,
+ struct casereader *subreader)
+{
+ return casereader_create_translator (subreader,
+ case_map_get_value_cnt (map),
+ translate_case,
+ destroy_case_map,
+ map);
+}
+
+/* Creates and returns a new casewriter. Cases written to the
+ new casewriter will be passed through MAP and written to
+ SUBWRITER. The casewriter will have as many `union value's as
+ MAP. When the new casewriter is destroyed, MAP will be
+ destroyed too.
+
+ After this function is called, SUBWRITER must not ever again
+ be referenced directly. It will be destroyed automatically
+ when the returned casewriter is destroyed. */
+struct casewriter *
+case_map_create_output_translator (struct case_map *map,
+ struct casewriter *subwriter)
+{
+ return casewriter_create_translator (subwriter,
+ case_map_get_value_cnt (map),
+ translate_case,
+ destroy_case_map,
+ map);
+}
+
+/* Casereader/casewriter translation callback. */
+static void
+translate_case (struct ccase *input, struct ccase *output, void *map_)
+{
+ struct case_map *map = map_;
+ case_map_execute (map, input, output);
+ case_destroy (input);
+}
+
+/* Casereader/casewriter destruction callback. */
+static bool
+destroy_case_map (void *map_)
+{
+ struct case_map *map = map_;
+ case_map_destroy (map);
+ return true;
+}
+
/* Creates and returns a case_map that can be used to compact
cases for dictionary D.
#include <stddef.h>
struct case_map;
-struct dictionary;
+struct casereader;
+struct casewriter;
struct ccase;
+struct dictionary;
struct case_map *case_map_create (void);
void case_map_destroy (struct case_map *);
size_t case_map_get_value_cnt (const struct case_map *);
+struct casereader *case_map_create_input_translator (struct case_map *,
+ struct casereader *);
+struct casewriter *case_map_create_output_translator (struct case_map *,
+ struct casewriter *);
+
/* For mapping cases for one version of a dictionary to those in
a modified version of the same dictionary. */
void case_map_prepare_dict (const struct dictionary *);
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <data/case-matcher.h>
+
+#include <stdlib.h>
+
+#include <data/case.h>
+#include <data/subcase.h>
+#include <data/value.h>
+#include <libpspp/assertion.h>
+
+#include "xalloc.h"
+
+struct case_matcher_input
+ {
+ struct subcase by_vars;
+ const struct ccase *data;
+ bool *is_minimal;
+ };
+
+struct case_matcher
+ {
+ struct case_matcher_input *inputs;
+ size_t n_inputs, allocated_inputs;
+ union value *by_values;
+ };
+
+/* Creates and returns a new case matcher. */
+struct case_matcher *
+case_matcher_create (void)
+{
+ struct case_matcher *cm = xmalloc (sizeof *cm);
+ cm->inputs = NULL;
+ cm->n_inputs = 0;
+ cm->allocated_inputs = 0;
+ cm->by_values = NULL;
+ return cm;
+}
+
+/* Adds a new input file to case matcher CM.
+ case_matcher_match() will compare the variables specified in
+ BY in case DATA and set *IS_MINIMAL appropriately.
+
+ All of the BY subcases provided to this function for a given
+ CM must be conformable (see subcase_conformable()). */
+void
+case_matcher_add_input (struct case_matcher *cm, const struct subcase *by,
+ const struct ccase *data, bool *is_minimal)
+{
+ struct case_matcher_input *input;
+
+ if (cm->n_inputs == 0)
+ cm->by_values = xmalloc (subcase_get_n_values (by)
+ * sizeof *cm->by_values);
+ else
+ assert (subcase_conformable (by, &cm->inputs[0].by_vars));
+
+ if (cm->n_inputs >= cm->allocated_inputs)
+ cm->inputs = x2nrealloc (cm->inputs, &cm->allocated_inputs,
+ sizeof *cm->inputs);
+ input = &cm->inputs[cm->n_inputs++];
+ subcase_clone (&input->by_vars, by);
+ input->data = data;
+ input->is_minimal = is_minimal;
+}
+
+/* Destroys case matcher CM. */
+void
+case_matcher_destroy (struct case_matcher *cm)
+{
+ if (cm != NULL)
+ {
+ size_t i;
+
+ for (i = 0; i < cm->n_inputs; i++)
+ {
+ struct case_matcher_input *input = &cm->inputs[i];
+ subcase_destroy (&input->by_vars);
+ }
+ free (cm->inputs);
+ free (cm);
+ }
+}
+
+static int
+compare_BY_3way (struct case_matcher_input *a, struct case_matcher_input *b)
+{
+ return subcase_compare_3way (&a->by_vars, a->data, &b->by_vars, b->data);
+}
+
+/* Compares the values of the BY variables in all of the nonnull
+ cases provided to case_matcher_add_input() for CM, sets
+ *IS_MINIMAL for each one to true if it has the minimum BY
+ values among those cases or to false if its BY values are
+ greater than the minimum. Also sets *IS_MINIMAL to false for
+ null cases. Sets *BY to the BY values extracted from the
+ minimum case. (The caller must not free *BY.)
+
+ Returns true if at least one of the cases is nonnull, false
+ if they are all null.*/
+bool
+case_matcher_match (struct case_matcher *cm, union value **by)
+{
+ struct case_matcher_input *file, *min;
+
+ min = NULL;
+ for (file = cm->inputs; file < &cm->inputs[cm->n_inputs]; file++)
+ if (!case_is_null (file->data))
+ {
+ int cmp = min != NULL ? compare_BY_3way (min, file) : 1;
+ if (cmp < 0)
+ *file->is_minimal = false;
+ else
+ {
+ *file->is_minimal = true;
+ if (cmp > 0)
+ min = file;
+ }
+ }
+ else
+ *file->is_minimal = false;
+
+ if (min != NULL)
+ {
+ for (file = cm->inputs; file < min; file++)
+ *file->is_minimal = false;
+ subcase_extract (&min->by_vars, min->data, cm->by_values);
+ *by = cm->by_values;
+ return true;
+ }
+ else
+ {
+ *by = NULL;
+ return false;
+ }
+}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef DATA_CASE_MATCHER_H
+#define DATA_CASE_MATCHER_H 1
+
+#include <stdbool.h>
+
+struct ccase;
+struct subcase;
+union value;
+
+struct case_matcher *case_matcher_create (void);
+void case_matcher_add_input (struct case_matcher *, const struct subcase *,
+ const struct ccase *, bool *is_minimal);
+void case_matcher_destroy (struct case_matcher *);
+
+bool case_matcher_match (struct case_matcher *, union value **by);
+
+#endif /* data/case-matcher.h */
+++ /dev/null
-/* PSPP - a program for statistical analysis.
- Copyright (C) 2007 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-
-#include <data/case-ordering.h>
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <data/dictionary.h>
-#include <data/variable.h>
-
-#include "xalloc.h"
-
-/* One key used for sorting. */
-struct sort_key
- {
- const struct variable *var; /* Variable. */
- enum sort_direction dir; /* Sort direction. */
- };
-
-/* A set of criteria for ordering cases. */
-struct case_ordering
- {
- /* Sort keys. */
- struct sort_key *keys;
- size_t key_cnt;
- };
-
-/* Creates and returns a new case ordering for comparing cases
- that represent dictionary DICT. The case ordering initially
- contains no variables, so that all cases will compare as
- equal. */
-struct case_ordering *
-case_ordering_create (void)
-{
- struct case_ordering *co = xmalloc (sizeof *co);
- co->keys = NULL;
- co->key_cnt = 0;
- return co;
-}
-
-/* Creates and returns a clone of case ordering ORIG. */
-struct case_ordering *
-case_ordering_clone (const struct case_ordering *orig)
-{
- struct case_ordering *co = xmalloc (sizeof *co);
- co->keys = xmemdup (orig->keys, orig->key_cnt * sizeof *orig->keys);
- co->key_cnt = orig->key_cnt;
- return co;
-}
-
-/* Destroys case ordering CO. */
-void
-case_ordering_destroy (struct case_ordering *co)
-{
- if (co != NULL)
- {
- free (co->keys);
- free (co);
- }
-}
-
-/* Compares cases A and B given case ordering CO and returns a
- strcmp()-type result. */
-int
-case_ordering_compare_cases (const struct ccase *a, const struct ccase *b,
- const struct case_ordering *co)
-{
- size_t i;
-
- for (i = 0; i < co->key_cnt; i++)
- {
- const struct sort_key *key = &co->keys[i];
- int width = var_get_width (key->var);
- int cmp;
-
- if (width == 0)
- {
- double af = case_num (a, key->var);
- double bf = case_num (b, key->var);
- if (af == bf)
- continue;
- cmp = af > bf ? 1 : -1;
- }
- else
- {
- const char *as = case_str (a, key->var);
- const char *bs = case_str (b, key->var);
- cmp = memcmp (as, bs, width);
- if (cmp == 0)
- continue;
- }
-
- return key->dir == SRT_ASCEND ? cmp : -cmp;
- }
- return 0;
-}
-
-/* Adds VAR to case ordering CO as an additional sort key in sort
- direction DIR. Returns true if successful, false if VAR was
- already part of the ordering for CO. */
-bool
-case_ordering_add_var (struct case_ordering *co,
- const struct variable *var, enum sort_direction dir)
-{
- struct sort_key *key;
- size_t i;
-
- for (i = 0; i < co->key_cnt; i++)
- if (var_get_case_index (co->keys[i].var) == var_get_case_index (var))
- return false;
-
- co->keys = xnrealloc (co->keys, co->key_cnt + 1, sizeof *co->keys);
- key = &co->keys[co->key_cnt++];
- key->var = var;
- key->dir = dir;
- return true;
-}
-
-/* Returns the number of variables used for ordering within
- CO. */
-size_t
-case_ordering_get_var_cnt (const struct case_ordering *co)
-{
- return co->key_cnt;
-}
-
-/* Returns sort variable IDX within CO. An IDX of 0 returns the
- primary sort key (the one added first), an IDX of 1 returns
- the secondary sort key, and so on. IDX must be less than the
- number of sort variables. */
-const struct variable *
-case_ordering_get_var (const struct case_ordering *co, size_t idx)
-{
- assert (idx < co->key_cnt);
- return co->keys[idx].var;
-}
-
-/* Returns the sort direction for sort variable IDX within CO. */
-enum sort_direction
-case_ordering_get_direction (const struct case_ordering *co, size_t idx)
-{
- assert (idx < co->key_cnt);
- return co->keys[idx].dir;
-}
-
-/* Stores an array listing all of the variables used for sorting
- within CO into *VARS and the number of variables into
- *VAR_CNT. The caller is responsible for freeing *VARS when it
- is no longer needed. */
-void
-case_ordering_get_vars (const struct case_ordering *co,
- const struct variable ***vars, size_t *var_cnt)
-{
- size_t i;
-
- *var_cnt = co->key_cnt;
- *vars = xnmalloc (*var_cnt, sizeof **vars);
- for (i = 0; i < co->key_cnt; i++)
- (*vars)[i] = co->keys[i].var;
-}
-
+++ /dev/null
-/* PSPP - a program for statistical analysis.
- Copyright (C) 2007 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-/* Sort order for comparing cases. */
-
-#ifndef DATA_CASE_ORDERING_H
-#define DATA_CASE_ORDERING_H 1
-
-#include <stddef.h>
-#include <data/case.h>
-
-struct dictionary;
-
-/* Sort direction. */
-enum sort_direction
- {
- SRT_ASCEND, /* A, B, C, ..., X, Y, Z. */
- SRT_DESCEND /* Z, Y, X, ..., C, B, A. */
- };
-
-/* Creation and destruction. */
-struct case_ordering *case_ordering_create (void);
-struct case_ordering *case_ordering_clone (const struct case_ordering *);
-void case_ordering_destroy (struct case_ordering *);
-
-/* Modification. */
-bool case_ordering_add_var (struct case_ordering *,
- const struct variable *, enum sort_direction);
-
-/* Comparing cases. */
-int case_ordering_compare_cases (const struct ccase *, const struct ccase *,
- const struct case_ordering *);
-
-/* Inspection. */
-size_t case_ordering_get_value_cnt (const struct case_ordering *);
-size_t case_ordering_get_var_cnt (const struct case_ordering *);
-const struct variable *case_ordering_get_var (const struct case_ordering *,
- size_t);
-enum sort_direction case_ordering_get_direction (const struct case_ordering *,
- size_t);
-void case_ordering_get_vars (const struct case_ordering *,
- const struct variable ***, size_t *);
-
-#endif /* data/case-ordering.h */
#include <stdlib.h>
-#include <data/case-ordering.h>
#include <data/casereader.h>
#include <data/casewriter.h>
#include <data/dictionary.h>
+#include <data/subcase.h>
#include <libpspp/taint.h>
#include "xalloc.h"
/* Casegrouper based on equal values of variables from case to
case. */
-/* Casegrouper based on equal variables. */
-struct casegrouper_vars
- {
- const struct variable **vars; /* Variables to compare. */
- size_t var_cnt; /* Number of variables. */
- };
-
static bool casegrouper_vars_same_group (const struct ccase *,
const struct ccase *,
void *);
const struct variable *const *vars,
size_t var_cnt)
{
- if (var_cnt > 0)
+ if (var_cnt > 0)
{
- struct casegrouper_vars *cv = xmalloc (sizeof *cv);
- cv->vars = xmemdup (vars, sizeof *vars * var_cnt);
- cv->var_cnt = var_cnt;
- return casegrouper_create_func (reader,
- casegrouper_vars_same_group,
- casegrouper_vars_destroy,
- cv);
+ struct subcase *sc = xmalloc (sizeof *sc);
+ subcase_init_vars (sc, vars, var_cnt);
+ return casegrouper_create_func (reader, casegrouper_vars_same_group,
+ casegrouper_vars_destroy, sc);
}
else
return casegrouper_create_func (reader, NULL, NULL, NULL);
/* Creates and returns a casegrouper that reads data from READER
and breaks it into contiguous groups of cases that have equal
- values for the variables used for sorting in CO. If CO is
- empty (contains no sort keys), then all the cases will be put
+ values for the variables used for sorting in SC. If SC is
+ empty (contains no fields), then all the cases will be put
into a single group. */
struct casegrouper *
-casegrouper_create_case_ordering (struct casereader *reader,
- const struct case_ordering *co)
+casegrouper_create_subcase (struct casereader *reader,
+ const struct subcase *sc)
{
- const struct variable **vars;
- size_t var_cnt;
- struct casegrouper *grouper;
-
- case_ordering_get_vars (co, &vars, &var_cnt);
- grouper = casegrouper_create_vars (reader, vars, var_cnt);
- free (vars);
-
- return grouper;
+ if (subcase_get_n_fields (sc) > 0)
+ {
+ struct subcase *sc_copy = xmalloc (sizeof *sc);
+ subcase_clone (sc_copy, sc);
+ return casegrouper_create_func (reader, casegrouper_vars_same_group,
+ casegrouper_vars_destroy, sc_copy);
+ }
+ else
+ return casegrouper_create_func (reader, NULL, NULL, NULL);
}
/* "same_group" function for an equal-variables casegrouper. */
static bool
casegrouper_vars_same_group (const struct ccase *a, const struct ccase *b,
- void *cv_)
+ void *sc_)
{
- struct casegrouper_vars *cv = cv_;
- return case_compare (a, b, cv->vars, cv->var_cnt) == 0;
+ struct subcase *sc = sc_;
+ return subcase_equal (sc, a, sc, b);
}
/* "destroy" for an equal-variables casegrouper. */
static void
-casegrouper_vars_destroy (void *cv_)
+casegrouper_vars_destroy (void *sc_)
{
- struct casegrouper_vars *cv = cv_;
- free (cv->vars);
- free (cv);
+ struct subcase *sc = sc_;
+ if (sc != NULL)
+ {
+ subcase_destroy (sc);
+ free (sc);
+ }
}
-
#include <stdbool.h>
#include <stddef.h>
-struct case_ordering;
struct casereader;
struct ccase;
struct dictionary;
+struct subcase;
struct variable;
struct casegrouper *
size_t var_cnt);
struct casegrouper *casegrouper_create_splits (struct casereader *,
const struct dictionary *);
-struct casegrouper *casegrouper_create_case_ordering (struct casereader *,
- const struct case_ordering *);
+struct casegrouper *casegrouper_create_subcase (struct casereader *,
+ const struct subcase *);
bool casegrouper_get_next_group (struct casegrouper *, struct casereader **);
bool casegrouper_destroy (struct casegrouper *);
{
candidate = obs_vals->vals + i;
assert (candidate != NULL);
- if (!compare_values (candidate, val, v))
+ if (!compare_values_short (candidate, val, v))
{
return i;
}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <data/subcase.h>
+#include <stdlib.h>
+#include <data/case.h>
+#include <data/variable.h>
+#include <libpspp/assertion.h>
+
+#include "xalloc.h"
+
+/* Initializes SC as a subcase that contains no fields. */
+void
+subcase_init_empty (struct subcase *sc)
+{
+ sc->fields = NULL;
+ sc->n_fields = 0;
+ sc->n_values = 0;
+}
+
+/* Initializes SC as a subcase with fields extracted from the
+ N_VARS variables in VARS, with ascending sort order. */
+void
+subcase_init_vars (struct subcase *sc,
+ const struct variable *const *vars, size_t n_vars)
+{
+ size_t i;
+
+ sc->fields = xnmalloc (n_vars, sizeof *sc->fields);
+ sc->n_fields = n_vars;
+ sc->n_values = 0;
+ for (i = 0; i < n_vars; i++)
+ {
+ struct subcase_field *field = &sc->fields[i];
+ field->case_index = var_get_case_index (vars[i]);
+ field->width = var_get_width (vars[i]);
+ field->direction = SC_ASCEND;
+ sc->n_values += value_cnt_from_width (field->width);
+ }
+}
+
+/* Initializes SC as a subcase with a single field extracted
+ from VAR, with the sort order specified by DIRECTION. */
+void
+subcase_init_var (struct subcase *sc, const struct variable *var,
+ enum subcase_direction direction)
+{
+ subcase_init_empty (sc);
+ subcase_add_var (sc, var, direction);
+}
+
+/* Removes all the fields from SC. */
+void
+subcase_clear (struct subcase *sc)
+{
+ sc->n_fields = 0;
+ sc->n_values = 0;
+}
+
+/* Initializes SC with the same fields as ORIG. */
+void
+subcase_clone (struct subcase *sc, const struct subcase *orig)
+{
+ sc->fields = xmemdup (orig->fields, orig->n_fields * sizeof *orig->fields);
+ sc->n_fields = orig->n_fields;
+ sc->n_values = orig->n_values;
+}
+
+/* Frees the memory owned by SC (but not SC itself). */
+void
+subcase_destroy (struct subcase *sc)
+{
+ free (sc->fields);
+}
+
+/* Add a field for VAR to SC, with DIRECTION as the sort order.
+ Returns true if successful, false if VAR already has a field
+ in SC. */
+bool
+subcase_add_var (struct subcase *sc, const struct variable *var,
+ enum subcase_direction direction)
+{
+ size_t case_index = var_get_case_index (var);
+ struct subcase_field *field;
+ size_t i;
+
+ for (i = 0; i < sc->n_fields; i++)
+ if (sc->fields[i].case_index == case_index)
+ return false;
+
+ sc->fields = xnrealloc (sc->fields, sc->n_fields + 1, sizeof *sc->fields);
+ field = &sc->fields[sc->n_fields++];
+ field->case_index = case_index;
+ field->width = var_get_width (var);
+ field->direction = direction;
+ sc->n_values += value_cnt_from_width (field->width);
+ return true;
+}
+
+/* Returns true if and only if A and B are conformable, which
+ means that they have the same number of fields and that each
+ corresponding field in A and B have the same width. */
+bool
+subcase_conformable (const struct subcase *a, const struct subcase *b)
+{
+ size_t i;
+
+ if (a == b)
+ return true;
+ if (a->n_values != b->n_values || a->n_fields != b->n_fields)
+ return false;
+ for (i = 0; i < a->n_fields; i++)
+ if (a->fields[i].width != b->fields[i].width)
+ return false;
+ return true;
+}
+
+/* Copies the fields represented by SC from C into VALUES.
+ VALUES must have space for at least subcase_get_n_values(SC)
+ array elements. */
+void
+subcase_extract (const struct subcase *sc, const struct ccase *c,
+ union value values[])
+{
+ size_t i;
+
+ for (i = 0; i < sc->n_fields; i++)
+ {
+ const struct subcase_field *field = &sc->fields[i];
+ value_copy (values, case_data_idx (c, field->case_index), field->width);
+ values += value_cnt_from_width (field->width);
+ }
+}
+
+/* Copies the data in VALUES into the fields in C represented by
+ SC. VALUES must have at least subcase_get_n_values(SC) array
+ elements, and C must be large enough to contain all the fields
+ in SC. */
+void
+subcase_inject (const struct subcase *sc,
+ const union value values[], struct ccase *c)
+{
+ size_t i;
+
+ for (i = 0; i < sc->n_fields; i++)
+ {
+ const struct subcase_field *field = &sc->fields[i];
+ value_copy (case_data_rw_idx (c, field->case_index), values,
+ field->width);
+ values += value_cnt_from_width (field->width);
+ }
+}
+
+/* Copies the fields in SRC represented by SRC_SC into the
+ corresponding fields in DST respresented by DST_SC. SRC_SC
+ and DST_SC must be conformable (as tested by
+ subcase_conformable()). */
+void
+subcase_copy (const struct subcase *src_sc, const struct ccase *src,
+ const struct subcase *dst_sc, struct ccase *dst)
+{
+ size_t i;
+
+ expensive_assert (subcase_conformable (src_sc, dst_sc));
+ for (i = 0; i < src_sc->n_fields; i++)
+ {
+ const struct subcase_field *src_field = &src_sc->fields[i];
+ const struct subcase_field *dst_field = &dst_sc->fields[i];
+ value_copy (case_data_rw_idx (dst, dst_field->case_index),
+ case_data_idx (src, src_field->case_index),
+ src_field->width);
+ }
+}
+
+/* Compares the fields in A specified in A_SC against the fields
+ in B specified in B_SC. Returns -1, 0, or 1 if A's fields are
+ lexicographically less than, equal to, or greater than B's
+ fields, respectively.
+
+ A_SC and B_SC must be conformable (as tested by
+ subcase_conformable()). */
+int
+subcase_compare_3way (const struct subcase *a_sc, const struct ccase *a,
+ const struct subcase *b_sc, const struct ccase *b)
+{
+ size_t i;
+
+ expensive_assert (subcase_conformable (a_sc, b_sc));
+ for (i = 0; i < a_sc->n_fields; i++)
+ {
+ const struct subcase_field *a_field = &a_sc->fields[i];
+ const struct subcase_field *b_field = &b_sc->fields[i];
+ int cmp = value_compare_3way (case_data_idx (a, a_field->case_index),
+ case_data_idx (b, b_field->case_index),
+ a_field->width);
+ if (cmp != 0)
+ return a_field->direction == SC_ASCEND ? cmp : -cmp;
+ }
+ return 0;
+}
+
+/* Compares the values in A against the values in B specified by
+ SC's fields. Returns -1, 0, or 1 if A's values are
+ lexicographically less than, equal to, or greater than B's
+ values, respectively. */
+int
+subcase_compare_3way_xc (const struct subcase *sc,
+ const union value a[], const struct ccase *b)
+{
+ size_t i;
+
+ for (i = 0; i < sc->n_fields; i++)
+ {
+ const struct subcase_field *field = &sc->fields[i];
+ int cmp = value_compare_3way (a, case_data_idx (b, field->case_index),
+ field->width);
+ if (cmp != 0)
+ return field->direction == SC_ASCEND ? cmp : -cmp;
+ a += value_cnt_from_width (field->width);
+ }
+ return 0;
+}
+
+/* Compares the values in A specified by SC's fields against the
+ values in B. Returns -1, 0, or 1 if A's values are
+ lexicographically less than, equal to, or greater than B's
+ values, respectively. */
+int
+subcase_compare_3way_cx (const struct subcase *sc,
+ const struct ccase *a, const union value b[])
+{
+ return -subcase_compare_3way_xc (sc, b, a);
+}
+
+/* Compares the values in A against the values in B, using SC to
+ obtain the number and width of each value. Returns -1, 0, or
+ 1 if A's values are lexicographically less than, equal to, or
+ greater than B's values, respectively. */
+int
+subcase_compare_3way_xx (const struct subcase *sc,
+ const union value a[], const union value b[])
+{
+ size_t i;
+
+ for (i = 0; i < sc->n_fields; i++)
+ {
+ const struct subcase_field *field = &sc->fields[i];
+ size_t n_values;
+ int cmp;
+
+ cmp = value_compare_3way (a, b, field->width);
+ if (cmp != 0)
+ return field->direction == SC_ASCEND ? cmp : -cmp;
+
+ n_values = value_cnt_from_width (field->width);
+ a += n_values;
+ b += n_values;
+ }
+ return 0;
+}
+
+/* Compares the fields in A specified in A_SC against the fields
+ in B specified in B_SC. Returns true if the fields' values
+ are equal, false otherwise.
+
+ A_SC and B_SC must be conformable (as tested by
+ subcase_conformable()). */
+bool
+subcase_equal (const struct subcase *a_sc, const struct ccase *a,
+ const struct subcase *b_sc, const struct ccase *b)
+{
+ return subcase_compare_3way (a_sc, a, b_sc, b) == 0;
+}
+
+/* Compares the values in A against the values in B specified by
+ SC's fields. Returns true if A's values are equal to B's
+ values, otherwise false. */
+bool
+subcase_equal_xc (const struct subcase *sc,
+ const union value a[], const struct ccase *b)
+{
+ return subcase_compare_3way_xc (sc, a, b) == 0;
+}
+
+/* Compares the values in A specified by SC's fields against the
+ values in B. Returns true if A's values are equal to B's
+ values, otherwise false. */
+bool
+subcase_equal_cx (const struct subcase *sc,
+ const struct ccase *a, const union value b[])
+{
+ return subcase_compare_3way_cx (sc, a, b) == 0;
+}
+
+/* Compares the values in A against the values in B, using SC to
+ obtain the number and width of each value. Returns true if
+ A's values are equal to B's values, otherwise false. */
+bool
+subcase_equal_xx (const struct subcase *sc,
+ const union value a[], const union value b[])
+{
+ return subcase_compare_3way_xx (sc, a, b) == 0;
+}
+
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef DATA_SUBCASE_H
+#define DATA_SUBCASE_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+
+struct ccase;
+union value;
+struct variable;
+
+/* Sort order. */
+enum subcase_direction
+ {
+ SC_ASCEND, /* A, B, C, ..., X, Y, Z. */
+ SC_DESCEND /* Z, Y, X, ..., C, B, A. */
+ };
+
+/* A value within a case. */
+struct subcase_field
+ {
+ size_t case_index; /* Starting position in the case. */
+ int width; /* 0=numeric, otherwise string width. */
+ enum subcase_direction direction; /* Sort order. */
+ };
+
+/* A subcase specifies how to draw values from a case. */
+struct subcase
+ {
+ struct subcase_field *fields;
+ size_t n_fields;
+ size_t n_values;
+ };
+
+void subcase_init_empty (struct subcase *);
+void subcase_init_vars (struct subcase *,
+ const struct variable *const *, size_t n_vars);
+void subcase_init_var (struct subcase *,
+ const struct variable *, enum subcase_direction);
+void subcase_clone (struct subcase *, const struct subcase *);
+void subcase_clear (struct subcase *);
+void subcase_destroy (struct subcase *);
+
+bool subcase_add_var (struct subcase *, const struct variable *,
+ enum subcase_direction);
+
+static inline bool subcase_is_empty (const struct subcase *);
+static inline size_t subcase_get_n_fields (const struct subcase *);
+static inline size_t subcase_get_n_values (const struct subcase *);
+
+static inline enum subcase_direction subcase_get_direction (
+ const struct subcase *, size_t idx);
+
+bool subcase_conformable (const struct subcase *, const struct subcase *);
+
+void subcase_extract (const struct subcase *, const struct ccase *,
+ union value *values);
+void subcase_inject (const struct subcase *,
+ const union value *values, struct ccase *);
+void subcase_copy (const struct subcase *src_sc, const struct ccase *src,
+ const struct subcase *dst_sc, struct ccase *dst);
+
+int subcase_compare_3way (const struct subcase *a_sc, const struct ccase *a,
+ const struct subcase *b_sc, const struct ccase *b);
+int subcase_compare_3way_xc (const struct subcase *,
+ const union value *a, const struct ccase *b);
+int subcase_compare_3way_cx (const struct subcase *,
+ const struct ccase *a, const union value *b);
+int subcase_compare_3way_xx (const struct subcase *,
+ const union value *a, const union value *b);
+bool subcase_equal (const struct subcase *a_sc, const struct ccase *a,
+ const struct subcase *b_sc, const struct ccase *b);
+bool subcase_equal_xc (const struct subcase *,
+ const union value *a, const struct ccase *b);
+bool subcase_equal_cx (const struct subcase *,
+ const struct ccase *a, const union value *b);
+bool subcase_equal_xx (const struct subcase *,
+ const union value *a, const union value *b);
+
+static inline enum subcase_direction
+subcase_get_direction (const struct subcase *sc, size_t idx)
+{
+ return sc->fields[idx].direction;
+}
+
+static inline bool
+subcase_is_empty (const struct subcase *sc)
+{
+ return sc->n_fields == 0;
+}
+
+static inline size_t
+subcase_get_n_fields (const struct subcase *sc)
+{
+ return sc->n_fields;
+}
+
+static inline size_t
+subcase_get_n_values (const struct subcase *sc)
+{
+ return sc->n_values;
+}
+
+#endif /* data/subcase.h */
Only the short string portion of longer strings are
compared. */
int
-compare_values (const void *a_, const void *b_, const void *var_)
+compare_values_short (const void *a_, const void *b_, const void *var_)
{
const union value *a = a_;
const union value *b = b_;
const struct variable *var = var_;
int width = var_get_width (var);
- return (width == 0
- ? (a->f < b->f ? -1 : a->f > b->f)
- : memcmp (a->s, b->s, MIN (MAX_SHORT_STRING, width)));
+ return value_compare_3way (a, b, MIN (width, MAX_SHORT_STRING));
}
+
/* Create a hash of V, which has the given WIDTH.
Only the short string portion of a longer string is hashed. */
unsigned
-hash_value (const void *v_, const void *var_)
+hash_value_short (const void *v_, const void *var_)
{
const union value *v = v_;
const struct variable *var = var_;
if (new_width > old_width)
memset (&value->s[old_width], ' ', new_width - old_width);
}
+
+/* Compares A and B, which both have the given WIDTH, and returns
+ a strcmp()-type result. */
+int
+value_compare_3way (const union value *a, const union value *b, int width)
+{
+ return (width == 0
+ ? (a->f < b->f ? -1 : a->f > b->f)
+ : memcmp (a->s, b->s, width));
+}
int compare_values (const void *, const void *, const void *var);
unsigned hash_value (const void *, const void *var);
+int compare_values_short (const void *, const void *, const void *var);
+unsigned hash_value_short (const void *, const void *var);
+
static inline size_t value_cnt_from_width (int width);
void value_copy (union value *, const union value *, int width);
void value_set_missing (union value *, int width);
bool value_is_resizable (const union value *, int old_width, int new_width);
void value_resize (union value *, int old_width, int new_width);
+int value_compare_3way (const union value *, const union value *, int width);
/* Number of "union value"s required for a variable of the given
WIDTH. */
DEF_CMD (S_ANY, F_KEEP_FINAL_TOKEN, "TITLE", cmd_title)
/* Commands that define (or replace) the active file. */
+DEF_CMD (S_INITIAL | S_DATA, 0, "ADD FILES", cmd_add_files)
DEF_CMD (S_INITIAL | S_DATA | S_INPUT_PROGRAM | S_FILE_TYPE, 0, "DATA LIST", cmd_data_list)
DEF_CMD (S_INITIAL | S_DATA, 0, "GET", cmd_get)
DEF_CMD (S_INITIAL | S_DATA, 0, "GET DATA", cmd_get_data)
DEF_CMD (S_INITIAL | S_DATA, 0, "IMPORT", cmd_import)
DEF_CMD (S_INITIAL | S_DATA, 0, "INPUT PROGRAM", cmd_input_program)
+DEF_CMD (S_INITIAL | S_DATA, 0, "MATCH FILES", cmd_match_files)
+DEF_CMD (S_INITIAL | S_DATA, 0, "UPDATE", cmd_update)
/* Transformations and utilities that may appear after active
file definition or within INPUT PROGRAM. */
DEF_CMD (S_DATA, 0, "FLIP", cmd_flip)
DEF_CMD (S_DATA, 0, "FREQUENCIES", cmd_frequencies)
DEF_CMD (S_DATA, 0, "LIST", cmd_list)
-DEF_CMD (S_DATA, 0, "MATCH FILES", cmd_match_files)
DEF_CMD (S_DATA, 0, "MEANS", cmd_means)
DEF_CMD (S_DATA, 0, "MODIFY VARS", cmd_modify_vars)
DEF_CMD (S_DATA, 0, "NPAR TESTS", cmd_npar_tests)
/* Unimplemented commands. */
UNIMPL_CMD ("2SLS", "Two stage least squares regression")
UNIMPL_CMD ("ACF", "Autocorrelation function")
-UNIMPL_CMD ("ADD FILES", "Add files to dictionary")
UNIMPL_CMD ("ALSCAL", "Multidimensional scaling")
UNIMPL_CMD ("ANACOR", "Correspondence analysis")
UNIMPL_CMD ("ANOVA", "Factorial analysis of variance")
UNIMPL_CMD ("TWOSTEP CLUSTER", "Cluster observations")
UNIMPL_CMD ("UNIANOVA", "Univariate analysis")
UNIMPL_CMD ("UNNUMBERED", "obsolete")
-UNIMPL_CMD ("UPDATE", "Update working file")
UNIMPL_CMD ("VALIDATEDATA", "Identify suspicious cases")
UNIMPL_CMD ("VARCOMP", "Estimate variance")
UNIMPL_CMD ("VARSTOCASES", "Restructure complex data")
src/language/data-io/list.c
language_data_io_sources = \
+ src/language/data-io/combine-files.c \
src/language/data-io/data-list.c \
src/language/data-io/data-parser.c \
src/language/data-io/data-parser.h \
- src/language/data-io/get.c \
- src/language/data-io/get-data.c \
- src/language/data-io/inpt-pgm.c \
- src/language/data-io/inpt-pgm.h \
- src/language/data-io/print.c \
- src/language/data-io/print-space.c \
src/language/data-io/data-reader.c \
src/language/data-io/data-reader.h \
src/language/data-io/data-writer.c \
src/language/data-io/data-writer.h \
src/language/data-io/file-handle.h \
+ src/language/data-io/get-data.c \
+ src/language/data-io/get.c \
+ src/language/data-io/inpt-pgm.c \
+ src/language/data-io/inpt-pgm.h \
src/language/data-io/placement-parser.c \
- src/language/data-io/placement-parser.h
+ src/language/data-io/placement-parser.h \
+ src/language/data-io/print-space.c \
+ src/language/data-io/print.c \
+ src/language/data-io/save.c \
+ src/language/data-io/trim.c \
+ src/language/data-io/trim.h
all_q_sources += $(src_language_data_io_built_sources:.c=.q)
EXTRA_DIST += $(src_language_data_io_built_sources:.c=.q)
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <stdlib.h>
+
+#include <data/any-reader.h>
+#include <data/case-matcher.h>
+#include <data/case.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/format.h>
+#include <data/procedure.h>
+#include <data/subcase.h>
+#include <data/variable.h>
+#include <language/command.h>
+#include <language/data-io/file-handle.h>
+#include <language/data-io/trim.h>
+#include <language/lexer/lexer.h>
+#include <language/lexer/variable-parser.h>
+#include <language/stats/sort-criteria.h>
+#include <libpspp/assertion.h>
+#include <libpspp/message.h>
+#include <libpspp/taint.h>
+#include <math/sort.h>
+
+#include "xalloc.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+enum comb_command_type
+ {
+ COMB_ADD,
+ COMB_MATCH,
+ COMB_UPDATE
+ };
+
+/* File types. */
+enum comb_file_type
+ {
+ COMB_FILE, /* Specified on FILE= subcommand. */
+ COMB_TABLE /* Specified on TABLE= subcommand. */
+ };
+
+/* One FILE or TABLE subcommand. */
+struct comb_file
+ {
+ /* Basics. */
+ enum comb_file_type type; /* COMB_FILE or COMB_TABLE. */
+
+ /* Variables. */
+ struct subcase by_vars; /* BY variables in this input file. */
+ struct subcase src, dst; /* Data to copy to output; where to put it. */
+
+ /* Input files. */
+ struct file_handle *handle; /* Input file handle. */
+ struct dictionary *dict; /* Input file dictionary. */
+ struct casereader *reader; /* Input data source. */
+ struct ccase data; /* The current input case. */
+ bool is_minimal; /* Does 'data' have minimum BY values across
+ all input files? */
+ bool is_sorted; /* Is file presorted on the BY variables? */
+
+ /* IN subcommand. */
+ char in_name[VAR_NAME_LEN + 1];
+ struct variable *in_var;
+ };
+
+struct comb_proc
+ {
+ struct comb_file *files; /* All the files being merged. */
+ size_t n_files; /* Number of files. */
+
+ struct dictionary *dict; /* Dictionary of output file. */
+ struct subcase by_vars; /* BY variables in the output. */
+ struct casewriter *output; /* Destination for output. */
+
+ struct case_matcher *matcher;
+
+ /* FIRST, LAST.
+ Only if "first" or "last" is nonnull are the remaining
+ members used. */
+ struct variable *first; /* Variable specified on FIRST (if any). */
+ struct variable *last; /* Variable specified on LAST (if any). */
+ struct ccase buffered_case; /* Case ready for output except that we don't
+ know the value for the LAST variable yet. */
+ union value *prev_BY; /* Values of BY vars in buffered_case. */
+ };
+
+static int combine_files (enum comb_command_type, struct lexer *,
+ struct dataset *);
+static void free_comb_proc (struct comb_proc *);
+
+static void close_all_comb_files (struct comb_proc *);
+static bool merge_dictionary (struct dictionary *const, struct comb_file *);
+
+static void execute_update (struct comb_proc *);
+static void execute_match_files (struct comb_proc *);
+static void execute_add_files (struct comb_proc *);
+
+static bool create_flag_var (const char *subcommand_name, const char *var_name,
+ struct dictionary *, struct variable **);
+static void output_case (struct comb_proc *, struct ccase *, union value *by);
+static void output_buffered_case (struct comb_proc *);
+
+int
+cmd_add_files (struct lexer *lexer, struct dataset *ds)
+{
+ return combine_files (COMB_ADD, lexer, ds);
+}
+
+int
+cmd_match_files (struct lexer *lexer, struct dataset *ds)
+{
+ return combine_files (COMB_MATCH, lexer, ds);
+}
+
+int
+cmd_update (struct lexer *lexer, struct dataset *ds)
+{
+ return combine_files (COMB_UPDATE, lexer, ds);
+}
+
+static int
+combine_files (enum comb_command_type command,
+ struct lexer *lexer, struct dataset *ds)
+{
+ struct comb_proc proc;
+
+ bool saw_by = false;
+ bool saw_sort = false;
+ struct casereader *active_file = NULL;
+
+ char first_name[VAR_NAME_LEN + 1] = "";
+ char last_name[VAR_NAME_LEN + 1] = "";
+
+ struct taint *taint = NULL;
+
+ size_t n_tables = 0;
+ size_t allocated_files = 0;
+
+ size_t i;
+
+ proc.files = NULL;
+ proc.n_files = 0;
+ proc.dict = dict_create ();
+ proc.output = NULL;
+ proc.matcher = NULL;
+ subcase_init_empty (&proc.by_vars);
+ proc.first = NULL;
+ proc.last = NULL;
+ case_nullify (&proc.buffered_case);
+ proc.prev_BY = NULL;
+
+ dict_set_case_limit (proc.dict, dict_get_case_limit (dataset_dict (ds)));
+
+ lex_match (lexer, '/');
+ for (;;)
+ {
+ struct comb_file *file;
+ enum comb_file_type type;
+
+ if (lex_match_id (lexer, "FILE"))
+ type = COMB_FILE;
+ else if (command == COMB_MATCH && lex_match_id (lexer, "TABLE"))
+ {
+ type = COMB_TABLE;
+ n_tables++;
+ }
+ else
+ break;
+ lex_match (lexer, '=');
+
+ if (proc.n_files >= allocated_files)
+ proc.files = x2nrealloc (proc.files, &allocated_files,
+ sizeof *proc.files);
+ file = &proc.files[proc.n_files++];
+ file->type = type;
+ subcase_init_empty (&file->by_vars);
+ subcase_init_empty (&file->src);
+ subcase_init_empty (&file->dst);
+ file->handle = NULL;
+ file->dict = NULL;
+ file->reader = NULL;
+ case_nullify (&file->data);
+ file->is_sorted = true;
+ file->in_name[0] = '\0';
+ file->in_var = NULL;
+
+ if (lex_match (lexer, '*'))
+ {
+ if (!proc_has_active_file (ds))
+ {
+ msg (SE, _("Cannot specify the active file since no active "
+ "file has been defined."));
+ goto error;
+ }
+
+ if (proc_make_temporary_transformations_permanent (ds))
+ msg (SE, _("This command may not be used after TEMPORARY when "
+ "the active file is an input source. "
+ "Temporary transformations will be made permanent."));
+
+ file->dict = dict_clone (dataset_dict (ds));
+ }
+ else
+ {
+ file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
+ if (file->handle == NULL)
+ goto error;
+
+ file->reader = any_reader_open (file->handle, &file->dict);
+ if (file->reader == NULL)
+ goto error;
+ }
+
+ while (lex_match (lexer, '/'))
+ if (lex_match_id (lexer, "RENAME"))
+ {
+ if (!parse_dict_rename (lexer, file->dict))
+ goto error;
+ }
+ else if (lex_match_id (lexer, "IN"))
+ {
+ lex_match (lexer, '=');
+ if (lex_token (lexer) != T_ID)
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+
+ if (file->in_name[0])
+ {
+ msg (SE, _("Multiple IN subcommands for a single FILE or "
+ "TABLE."));
+ goto error;
+ }
+ strcpy (file->in_name, lex_tokid (lexer));
+ lex_get (lexer);
+ }
+ else if (lex_match_id (lexer, "SORT"))
+ {
+ file->is_sorted = false;
+ saw_sort = true;
+ }
+
+ merge_dictionary (proc.dict, file);
+ }
+
+ while (lex_token (lexer) != '.')
+ {
+ if (lex_match (lexer, T_BY))
+ {
+ const struct variable **by_vars;
+ size_t i;
+ bool ok;
+
+ if (saw_by)
+ {
+ lex_sbc_only_once ("BY");
+ goto error;
+ }
+ saw_by = true;
+
+ lex_match (lexer, '=');
+ if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars,
+ &by_vars, NULL))
+ goto error;
+
+ ok = true;
+ for (i = 0; i < proc.n_files; i++)
+ {
+ struct comb_file *file = &proc.files[i];
+ size_t j;
+
+ for (j = 0; j < subcase_get_n_values (&proc.by_vars); j++)
+ {
+ const char *name = var_get_name (by_vars[j]);
+ struct variable *var = dict_lookup_var (file->dict, name);
+ if (var != NULL)
+ subcase_add_var (&file->by_vars, var,
+ subcase_get_direction (&proc.by_vars, j));
+ else
+ {
+ if (file->handle != NULL)
+ msg (SE, _("File %s lacks BY variable %s."),
+ fh_get_name (file->handle), name);
+ else
+ msg (SE, _("Active file lacks BY variable %s."), name);
+ ok = false;
+ }
+ }
+ assert (!ok || subcase_conformable (&file->by_vars,
+ &proc.files[0].by_vars));
+ }
+ free (by_vars);
+
+ if (!ok)
+ goto error;
+ }
+ else if (command != COMB_UPDATE && lex_match_id (lexer, "FIRST"))
+ {
+ if (first_name[0] != '\0')
+ {
+ lex_sbc_only_once ("FIRST");
+ goto error;
+ }
+
+ lex_match (lexer, '=');
+ if (!lex_force_id (lexer))
+ goto error;
+ strcpy (first_name, lex_tokid (lexer));
+ lex_get (lexer);
+ }
+ else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST"))
+ {
+ if (last_name[0] != '\0')
+ {
+ lex_sbc_only_once ("LAST");
+ goto error;
+ }
+
+ lex_match (lexer, '=');
+ if (!lex_force_id (lexer))
+ goto error;
+ strcpy (last_name, lex_tokid (lexer));
+ lex_get (lexer);
+ }
+ else if (lex_match_id (lexer, "MAP"))
+ {
+ /* FIXME. */
+ }
+ else if (lex_match_id (lexer, "DROP"))
+ {
+ if (!parse_dict_drop (lexer, proc.dict))
+ goto error;
+ }
+ else if (lex_match_id (lexer, "KEEP"))
+ {
+ if (!parse_dict_keep (lexer, proc.dict))
+ goto error;
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+
+ if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
+ {
+ lex_end_of_command (lexer);
+ goto error;
+ }
+ }
+
+ if (!saw_by)
+ {
+ if (command == COMB_UPDATE)
+ {
+ msg (SE, _("The BY subcommand is required."));
+ goto error;
+ }
+ if (n_tables)
+ {
+ msg (SE, _("BY is required when TABLE is specified."));
+ goto error;
+ }
+ if (saw_sort)
+ {
+ msg (SE, _("BY is required when SORT is specified."));
+ goto error;
+ }
+ }
+
+ /* Add IN, FIRST, and LAST variables to master dictionary. */
+ for (i = 0; i < proc.n_files; i++)
+ {
+ struct comb_file *file = &proc.files[i];
+ if (!create_flag_var ("IN", file->in_name, proc.dict, &file->in_var))
+ goto error;
+ }
+ if (!create_flag_var ("FIRST", first_name, proc.dict, &proc.first)
+ || !create_flag_var ("LAST", last_name, proc.dict, &proc.last))
+ goto error;
+
+ dict_delete_scratch_vars (proc.dict);
+ dict_compact_values (proc.dict);
+
+ /* Set up mapping from each file's variables to master
+ variables. */
+ for (i = 0; i < proc.n_files; i++)
+ {
+ struct comb_file *file = &proc.files[i];
+ size_t src_var_cnt = dict_get_var_cnt (file->dict);
+ size_t j;
+
+ for (j = 0; j < src_var_cnt; j++)
+ {
+ struct variable *src_var = dict_get_var (file->dict, j);
+ struct variable *dst_var = dict_lookup_var (proc.dict,
+ var_get_name (src_var));
+ if (dst_var != NULL)
+ {
+ subcase_add_var (&file->src, src_var, SC_ASCEND);
+ subcase_add_var (&file->dst, dst_var, SC_ASCEND);
+ }
+ }
+ }
+
+ proc.output = autopaging_writer_create (dict_get_next_value_idx (proc.dict));
+ taint = taint_clone (casewriter_get_taint (proc.output));
+
+ /* Set up case matcher. */
+ proc.matcher = case_matcher_create ();
+ for (i = 0; i < proc.n_files; i++)
+ {
+ struct comb_file *file = &proc.files[i];
+ if (file->reader == NULL)
+ {
+ if (active_file == NULL)
+ {
+ proc_discard_output (ds);
+ file->reader = active_file = proc_open (ds);
+ }
+ else
+ file->reader = casereader_clone (active_file);
+ }
+ if (!file->is_sorted)
+ file->reader = sort_execute (file->reader, &file->by_vars);
+ taint_propagate (casereader_get_taint (file->reader), taint);
+ casereader_read (file->reader, &file->data);
+ if (file->type == COMB_FILE)
+ case_matcher_add_input (proc.matcher, &file->by_vars,
+ &file->data, &file->is_minimal);
+ }
+
+ if (command == COMB_ADD)
+ execute_add_files (&proc);
+ else if (command == COMB_MATCH)
+ execute_match_files (&proc);
+ else if (command == COMB_UPDATE)
+ execute_update (&proc);
+ else
+ NOT_REACHED ();
+
+ case_matcher_destroy (proc.matcher);
+ proc.matcher = NULL;
+ close_all_comb_files (&proc);
+ if (active_file != NULL)
+ proc_commit (ds);
+
+ proc_set_active_file (ds, casewriter_make_reader (proc.output), proc.dict);
+ proc.dict = NULL;
+ proc.output = NULL;
+
+ free_comb_proc (&proc);
+
+ return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
+
+ error:
+ if (active_file != NULL)
+ proc_commit (ds);
+ free_comb_proc (&proc);
+ taint_destroy (taint);
+ return CMD_CASCADING_FAILURE;
+}
+
+/* Merge the dictionary for file F into master dictionary M. */
+static bool
+merge_dictionary (struct dictionary *const m, struct comb_file *f)
+{
+ struct dictionary *d = f->dict;
+ const char *d_docs, *m_docs;
+ int i;
+
+ if (dict_get_label (m) == NULL)
+ dict_set_label (m, dict_get_label (d));
+
+ d_docs = dict_get_documents (d);
+ m_docs = dict_get_documents (m);
+ if (d_docs != NULL)
+ {
+ if (m_docs == NULL)
+ dict_set_documents (m, d_docs);
+ else
+ {
+ char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
+ dict_set_documents (m, new_docs);
+ free (new_docs);
+ }
+ }
+
+ for (i = 0; i < dict_get_var_cnt (d); i++)
+ {
+ struct variable *dv = dict_get_var (d, i);
+ struct variable *mv = dict_lookup_var (m, var_get_name (dv));
+
+ if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
+ continue;
+
+ if (mv != NULL)
+ {
+ if (var_get_width (mv) != var_get_width (dv))
+ {
+ const char *var_name = var_get_name (dv);
+ const char *file_name = fh_get_name (f->handle);
+ struct string s = DS_EMPTY_INITIALIZER;
+ ds_put_format (&s,
+ _("Variable %s in file %s has different "
+ "type or width from the same variable in "
+ "earlier file."),
+ var_name, file_name);
+ ds_put_cstr (&s, " ");
+ if (var_is_numeric (dv))
+ ds_put_format (&s, _("In file %s, %s is numeric."),
+ file_name, var_name);
+ else
+ ds_put_format (&s, _("In file %s, %s is a string variable "
+ "with width %d."),
+ file_name, var_name, var_get_width (dv));
+ ds_put_cstr (&s, " ");
+ if (var_is_numeric (mv))
+ ds_put_format (&s, _("In an earlier file, %s was numeric."),
+ var_name);
+ else
+ ds_put_format (&s, _("In an earlier file, %s was a string "
+ "variable with width %d."),
+ var_name, var_get_width (mv));
+ msg (SE, ds_cstr (&s));
+ ds_destroy (&s);
+ return false;
+ }
+
+ if (var_has_value_labels (dv) && !var_has_value_labels (mv))
+ var_set_value_labels (mv, var_get_value_labels (dv));
+ if (var_has_missing_values (dv) && !var_has_missing_values (mv))
+ var_set_missing_values (mv, var_get_missing_values (dv));
+ if (var_get_label (dv) && !var_get_label (mv))
+ var_set_label (mv, var_get_label (dv));
+ }
+ else
+ mv = dict_clone_var_assert (m, dv, var_get_name (dv));
+ }
+
+ return true;
+}
+
+/* If VAR_NAME is a non-empty string, attempts to create a
+ variable named VAR_NAME, with format F1.0, in DICT, and stores
+ a pointer to the variable in *VAR. Returns true if
+ successful, false if the variable name is a duplicate (in
+ which case a message saying that the variable specified on the
+ given SUBCOMMAND is a duplicate is emitted). Also returns
+ true, without doing anything, if VAR_NAME is null or empty. */
+static bool
+create_flag_var (const char *subcommand, const char *var_name,
+ struct dictionary *dict, struct variable **var)
+{
+ if (var_name[0] != '\0')
+ {
+ struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
+ *var = dict_create_var (dict, var_name, 0);
+ if (*var == NULL)
+ {
+ msg (SE, _("Variable name %s specified on %s subcommand "
+ "duplicates an existing variable name."),
+ subcommand, var_name);
+ return false;
+ }
+ var_set_both_formats (*var, &format);
+ }
+ else
+ *var = NULL;
+ return true;
+}
+
+/* Closes all the files in PROC and frees their associated data. */
+static void
+close_all_comb_files (struct comb_proc *proc)
+{
+ size_t i;
+
+ for (i = 0; i < proc->n_files; i++)
+ {
+ struct comb_file *file = &proc->files[i];
+ subcase_destroy (&file->by_vars);
+ subcase_destroy (&file->src);
+ subcase_destroy (&file->dst);
+ fh_unref (file->handle);
+ dict_destroy (file->dict);
+ casereader_destroy (file->reader);
+ case_destroy (&file->data);
+ }
+ free (proc->files);
+ proc->files = NULL;
+ proc->n_files = 0;
+}
+
+/* Frees all the data for the procedure. */
+static void
+free_comb_proc (struct comb_proc *proc)
+{
+ close_all_comb_files (proc);
+ dict_destroy (proc->dict);
+ casewriter_destroy (proc->output);
+ case_matcher_destroy (proc->matcher);
+ subcase_destroy (&proc->by_vars);
+ case_destroy (&proc->buffered_case);
+ free (proc->prev_BY);
+}
+\f
+static bool scan_table (struct comb_file *, union value by[]);
+static void create_output_case (const struct comb_proc *, struct ccase *);
+static void apply_case (const struct comb_file *, struct ccase *);
+static void apply_file_case_and_advance (struct comb_file *, struct ccase *,
+ union value by[]);
+static void output_case (struct comb_proc *, struct ccase *, union value by[]);
+static void output_buffered_case (struct comb_proc *);
+
+/* Executes the ADD FILES command. */
+static void
+execute_add_files (struct comb_proc *proc)
+{
+ union value *by;
+
+ while (case_matcher_match (proc->matcher, &by))
+ {
+ struct ccase output;
+ size_t i;
+
+ for (i = 0; i < proc->n_files; i++)
+ {
+ struct comb_file *file = &proc->files[i];
+ while (file->is_minimal)
+ {
+ create_output_case (proc, &output);
+ apply_file_case_and_advance (file, &output, by);
+ output_case (proc, &output, by);
+ }
+ }
+ }
+ output_buffered_case (proc);
+}
+
+/* Executes the MATCH FILES command. */
+static void
+execute_match_files (struct comb_proc *proc)
+{
+ union value *by;
+
+ while (case_matcher_match (proc->matcher, &by))
+ {
+ struct ccase output;
+ size_t i;
+
+ create_output_case (proc, &output);
+ for (i = proc->n_files; i-- > 0; )
+ {
+ struct comb_file *file = &proc->files[i];
+ if (file->type == COMB_FILE)
+ {
+ if (file->is_minimal)
+ apply_file_case_and_advance (file, &output, NULL);
+ }
+ else
+ {
+ if (scan_table (file, by))
+ apply_case (file, &output);
+ }
+ }
+ output_case (proc, &output, by);
+ }
+ output_buffered_case (proc);
+}
+
+/* Executes the UPDATE command. */
+static void
+execute_update (struct comb_proc *proc)
+{
+ union value *by;
+ size_t n_duplicates = 0;
+
+ while (case_matcher_match (proc->matcher, &by))
+ {
+ struct comb_file *first, *file;
+ struct ccase output;
+
+ /* Find first nonnull case in array and make an output case
+ from it. */
+ create_output_case (proc, &output);
+ for (first = &proc->files[0]; ; first++)
+ if (first->is_minimal)
+ break;
+ apply_file_case_and_advance (first, &output, by);
+
+ /* Read additional cases and update the output case from
+ them. (Don't update the output case from any duplicate
+ cases in the master file.) */
+ for (file = first + (first == proc->files);
+ file < &proc->files[proc->n_files]; file++)
+ {
+ while (file->is_minimal)
+ apply_file_case_and_advance (file, &output, by);
+ }
+ casewriter_write (proc->output, &output);
+
+ /* Write duplicate cases in the master file directly to the
+ output. */
+ if (first == proc->files && first->is_minimal)
+ {
+ n_duplicates++;
+ while (first->is_minimal)
+ {
+ create_output_case (proc, &output);
+ apply_file_case_and_advance (first, &output, by);
+ casewriter_write (proc->output, &output);
+ }
+ }
+ }
+
+ if (n_duplicates)
+ msg (SW, _("Encountered %zu sets of duplicate cases in the master file."),
+ n_duplicates);
+}
+
+/* Reads FILE, which must be of type COMB_TABLE, until it
+ encounters a case with BY or greater for its BY variables.
+ Returns true if a case with exactly BY for its BY variables
+ was found, otherwise false. */
+static bool
+scan_table (struct comb_file *file, union value by[])
+{
+ while (!case_is_null (&file->data))
+ {
+ int cmp = subcase_compare_3way_xc (&file->by_vars, by, &file->data);
+ if (cmp > 0)
+ {
+ case_destroy (&file->data);
+ casereader_read (file->reader, &file->data);
+ }
+ else
+ return cmp == 0;
+ }
+ return false;
+}
+
+/* Creates OUTPUT as an output case for PROC, by initializing each of
+ its values to system-missing or blanks, except that the values
+ of IN variables are set to 0. */
+static void
+create_output_case (const struct comb_proc *proc, struct ccase *output)
+{
+ size_t n_vars = dict_get_var_cnt (proc->dict);
+ size_t i;
+
+ case_create (output, dict_get_next_value_idx (proc->dict));
+ for (i = 0; i < n_vars; i++)
+ {
+ struct variable *v = dict_get_var (proc->dict, i);
+ value_set_missing (case_data_rw (output, v), var_get_width (v));
+ }
+ for (i = 0; i < proc->n_files; i++)
+ {
+ struct comb_file *file = &proc->files[i];
+ if (file->in_var != NULL)
+ case_data_rw (output, file->in_var)->f = false;
+ }
+}
+
+/* Copies the data from FILE's case into output case OUTPUT.
+ If FILE has an IN variable, then it is set to 1 in OUTPUT. */
+static void
+apply_case (const struct comb_file *file, struct ccase *output)
+{
+ subcase_copy (&file->src, &file->data, &file->dst, output);
+ if (file->in_var != NULL)
+ case_data_rw (output, file->in_var)->f = true;
+}
+
+/* Like apply_case() above, but also advances FILE to its next
+ case. Also, if BY is nonnull, then FILE's is_minimal member
+ is updated based on whether the new case's BY values still
+ match those in BY. */
+static void
+apply_file_case_and_advance (struct comb_file *file, struct ccase *output,
+ union value by[])
+{
+ apply_case (file, output);
+ case_destroy (&file->data);
+ casereader_read (file->reader, &file->data);
+ if (by)
+ file->is_minimal = (!case_is_null (&file->data)
+ && subcase_equal_cx (&file->by_vars, &file->data, by));
+}
+
+/* Writes OUTPUT, whose BY values has been extracted into BY, to
+ PROC's output file, first initializing any FIRST or LAST
+ variables in OUTPUT to the correct values. */
+static void
+output_case (struct comb_proc *proc, struct ccase *output, union value by[])
+{
+ if (proc->first == NULL && proc->last == NULL)
+ casewriter_write (proc->output, output);
+ else
+ {
+ /* It's harder with LAST, because we can't know whether
+ this case is the last in a group until we've prepared
+ the *next* case also. Thus, we buffer the previous
+ output case until the next one is ready. */
+ bool new_BY;
+ if (proc->prev_BY != NULL)
+ {
+ new_BY = !subcase_equal_xx (&proc->by_vars, proc->prev_BY, by);
+ if (proc->last != NULL)
+ case_data_rw (&proc->buffered_case, proc->last)->f = new_BY;
+ casewriter_write (proc->output, &proc->buffered_case);
+ }
+ else
+ new_BY = true;
+
+ case_move (&proc->buffered_case, output);
+ if (proc->first != NULL)
+ case_data_rw (&proc->buffered_case, proc->first)->f = new_BY;
+
+ if (new_BY)
+ {
+ size_t n = (subcase_get_n_values (&proc->by_vars)
+ * sizeof (union value));
+ if (proc->prev_BY == NULL)
+ proc->prev_BY = xmalloc (n);
+ memcpy (proc->prev_BY, by, n);
+ }
+ }
+}
+
+/* Writes a trailing buffered case to the output, if FIRST or
+ LAST is in use. */
+static void
+output_buffered_case (struct comb_proc *proc)
+{
+ if (proc->prev_BY != NULL)
+ {
+ if (proc->last != NULL)
+ case_data_rw (&proc->buffered_case, proc->last)->f = 1.0;
+ casewriter_write (proc->output, &proc->buffered_case);
+ case_nullify (&proc->buffered_case);
+ }
+}
#include <stdlib.h>
#include <data/any-reader.h>
-#include <data/any-writer.h>
#include <data/case.h>
#include <data/case-map.h>
#include <data/casereader.h>
-#include <data/casewriter.h>
-#include <data/format.h>
#include <data/dictionary.h>
#include <data/por-file-writer.h>
#include <data/procedure.h>
-#include <data/settings.h>
-#include <data/sys-file-writer.h>
-#include <data/transformations.h>
-#include <data/value-labels.h>
-#include <data/variable.h>
#include <language/command.h>
#include <language/data-io/file-handle.h>
+#include <language/data-io/trim.h>
#include <language/lexer/lexer.h>
-#include <language/lexer/variable-parser.h>
-#include <libpspp/assertion.h>
#include <libpspp/compiler.h>
-#include <libpspp/hash.h>
-#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
-#include <libpspp/taint.h>
#include "xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
-
-static bool parse_dict_trim (struct lexer *, struct dictionary *);
\f
/* Reading system and portable files. */
IMPORT_CMD
};
-static void get_translate_case (struct ccase *, struct ccase *, void *map_);
-static bool get_destroy_case_map (void *map_);
+static int parse_read_command (struct lexer *, struct dataset *,
+ enum reader_command);
+
+/* GET. */
+int
+cmd_get (struct lexer *lexer, struct dataset *ds)
+{
+ return parse_read_command (lexer, ds, GET_CMD);
+}
+
+/* IMPORT. */
+int
+cmd_import (struct lexer *lexer, struct dataset *ds)
+{
+ return parse_read_command (lexer, ds, IMPORT_CMD);
+}
/* Parses a GET or IMPORT command. */
static int
map = case_map_from_dict (dict);
if (map != NULL)
- reader = casereader_create_translator (reader,
- dict_get_next_value_idx (dict),
- get_translate_case,
- get_destroy_case_map,
- map);
+ reader = case_map_create_input_translator (map, reader);
proc_set_active_file (ds, reader, dict);
dict_destroy (dict);
return CMD_CASCADING_FAILURE;
}
-
-static void
-get_translate_case (struct ccase *input, struct ccase *output,
- void *map_)
-{
- struct case_map *map = map_;
- case_map_execute (map, input, output);
- case_destroy (input);
-}
-
-static bool
-get_destroy_case_map (void *map_)
-{
- struct case_map *map = map_;
- case_map_destroy (map);
- return true;
-}
-\f
-/* GET. */
-int
-cmd_get (struct lexer *lexer, struct dataset *ds)
-{
- return parse_read_command (lexer, ds, GET_CMD);
-}
-
-/* IMPORT. */
-int
-cmd_import (struct lexer *lexer, struct dataset *ds)
-{
- return parse_read_command (lexer, ds, IMPORT_CMD);
-}
-\f
-/* Writing system and portable files. */
-
-/* Type of output file. */
-enum writer_type
- {
- SYSFILE_WRITER, /* System file. */
- PORFILE_WRITER /* Portable file. */
- };
-
-/* Type of a command. */
-enum command_type
- {
- XFORM_CMD, /* Transformation. */
- PROC_CMD /* Procedure. */
- };
-
-/* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
- WRITER_TYPE identifies the type of file to write,
- and COMMAND_TYPE identifies the type of command.
-
- On success, returns a writer.
- For procedures only, sets *RETAIN_UNSELECTED to true if cases
- that would otherwise be excluded by FILTER or USE should be
- included.
-
- On failure, returns a null pointer. */
-static struct casewriter *
-parse_write_command (struct lexer *lexer, struct dataset *ds,
- enum writer_type writer_type,
- enum command_type command_type,
- bool *retain_unselected)
-{
- /* Common data. */
- struct file_handle *handle; /* Output file. */
- struct dictionary *dict; /* Dictionary for output file. */
- struct casewriter *writer; /* Writer. */
- struct case_map *map; /* Map from input data to data for writer. */
-
- /* Common options. */
- bool print_map; /* Print map? TODO. */
- bool print_short_names; /* Print long-to-short name map. TODO. */
- struct sfm_write_options sysfile_opts;
- struct pfm_write_options porfile_opts;
-
- assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
- assert (command_type == XFORM_CMD || command_type == PROC_CMD);
- assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
-
- if (command_type == PROC_CMD)
- *retain_unselected = true;
-
- handle = NULL;
- dict = dict_clone (dataset_dict (ds));
- writer = NULL;
- map = NULL;
- print_map = false;
- print_short_names = false;
- sysfile_opts = sfm_writer_default_options ();
- porfile_opts = pfm_writer_default_options ();
-
- case_map_prepare_dict (dict);
- dict_delete_scratch_vars (dict);
-
- lex_match (lexer, '/');
- for (;;)
- {
- if (lex_match_id (lexer, "OUTFILE"))
- {
- if (handle != NULL)
- {
- lex_sbc_only_once ("OUTFILE");
- goto error;
- }
-
- lex_match (lexer, '=');
-
- handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
- if (handle == NULL)
- goto error;
- }
- else if (lex_match_id (lexer, "NAMES"))
- print_short_names = true;
- else if (lex_match_id (lexer, "PERMISSIONS"))
- {
- bool cw;
-
- lex_match (lexer, '=');
- if (lex_match_id (lexer, "READONLY"))
- cw = false;
- else if (lex_match_id (lexer, "WRITEABLE"))
- cw = true;
- else
- {
- lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
- goto error;
- }
- sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
- }
- else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
- {
- lex_match (lexer, '=');
- if (lex_match_id (lexer, "RETAIN"))
- *retain_unselected = true;
- else if (lex_match_id (lexer, "DELETE"))
- *retain_unselected = false;
- else
- {
- lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
- goto error;
- }
- }
- else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
- sysfile_opts.compress = true;
- else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
- sysfile_opts.compress = false;
- else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
- {
- lex_match (lexer, '=');
- if (!lex_force_int (lexer))
- goto error;
- sysfile_opts.version = lex_integer (lexer);
- lex_get (lexer);
- }
- else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
- {
- lex_match (lexer, '=');
- if (lex_match_id (lexer, "COMMUNICATIONS"))
- porfile_opts.type = PFM_COMM;
- else if (lex_match_id (lexer, "TAPE"))
- porfile_opts.type = PFM_TAPE;
- else
- {
- lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
- goto error;
- }
- }
- else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
- {
- lex_match (lexer, '=');
- if (!lex_force_int (lexer))
- goto error;
- porfile_opts.digits = lex_integer (lexer);
- lex_get (lexer);
- }
- else if (!parse_dict_trim (lexer, dict))
- goto error;
-
- if (!lex_match (lexer, '/'))
- break;
- }
- if (lex_end_of_command (lexer) != CMD_SUCCESS)
- goto error;
-
- if (handle == NULL)
- {
- lex_sbc_missing (lexer, "OUTFILE");
- goto error;
- }
-
- dict_delete_scratch_vars (dict);
- dict_compact_values (dict);
-
- if (fh_get_referent (handle) == FH_REF_FILE)
- {
- switch (writer_type)
- {
- case SYSFILE_WRITER:
- writer = sfm_open_writer (handle, dict, sysfile_opts);
- break;
- case PORFILE_WRITER:
- writer = pfm_open_writer (handle, dict, porfile_opts);
- break;
- }
- }
- else
- writer = any_writer_open (handle, dict);
- if (writer == NULL)
- goto error;
-
- map = case_map_from_dict (dict);
- if (map != NULL)
- writer = casewriter_create_translator (writer,
- case_map_get_value_cnt (map),
- get_translate_case,
- get_destroy_case_map,
- map);
- dict_destroy (dict);
-
- fh_unref (handle);
- return writer;
-
- error:
- fh_unref (handle);
- casewriter_destroy (writer);
- dict_destroy (dict);
- case_map_destroy (map);
- return NULL;
-}
-\f
-/* SAVE and EXPORT. */
-
-/* Parses and performs the SAVE or EXPORT procedure. */
-static int
-parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
-{
- bool retain_unselected;
- struct variable *saved_filter_variable;
- struct casewriter *output;
- bool ok;
-
- output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
- &retain_unselected);
- if (output == NULL)
- return CMD_CASCADING_FAILURE;
-
- saved_filter_variable = dict_get_filter (dataset_dict (ds));
- if (retain_unselected)
- dict_set_filter (dataset_dict (ds), NULL);
-
- casereader_transfer (proc_open (ds), output);
- ok = casewriter_destroy (output);
- ok = proc_commit (ds) && ok;
-
- dict_set_filter (dataset_dict (ds), saved_filter_variable);
-
- return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
-}
-
-int
-cmd_save (struct lexer *lexer, struct dataset *ds)
-{
- return parse_output_proc (lexer, ds, SYSFILE_WRITER);
-}
-
-int
-cmd_export (struct lexer *lexer, struct dataset *ds)
-{
- return parse_output_proc (lexer, ds, PORFILE_WRITER);
-}
-\f
-/* XSAVE and XEXPORT. */
-
-/* Transformation. */
-struct output_trns
- {
- struct casewriter *writer; /* Writer. */
- };
-
-static trns_proc_func output_trns_proc;
-static trns_free_func output_trns_free;
-
-/* Parses the XSAVE or XEXPORT transformation command. */
-static int
-parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
-{
- struct output_trns *t = xmalloc (sizeof *t);
- t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
- if (t->writer == NULL)
- {
- free (t);
- return CMD_CASCADING_FAILURE;
- }
-
- add_transformation (ds, output_trns_proc, output_trns_free, t);
- return CMD_SUCCESS;
-}
-
-/* Writes case C to the system file specified on XSAVE or XEXPORT. */
-static int
-output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
-{
- struct output_trns *t = trns_;
- struct ccase tmp;
- case_clone (&tmp, c);
- casewriter_write (t->writer, &tmp);
- return TRNS_CONTINUE;
-}
-
-/* Frees an XSAVE or XEXPORT transformation.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-output_trns_free (void *trns_)
-{
- struct output_trns *t = trns_;
- bool ok = casewriter_destroy (t->writer);
- free (t);
- return ok;
-}
-
-/* XSAVE command. */
-int
-cmd_xsave (struct lexer *lexer, struct dataset *ds)
-{
- return parse_output_trns (lexer, ds, SYSFILE_WRITER);
-}
-
-/* XEXPORT command. */
-int
-cmd_xexport (struct lexer *lexer, struct dataset *ds)
-{
- return parse_output_trns (lexer, ds, PORFILE_WRITER);
-}
-\f
-static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
-static bool drop_variables (struct lexer *, struct dictionary *dict);
-static bool keep_variables (struct lexer *, struct dictionary *dict);
-
-/* Commands that read and write system files share a great deal
- of common syntactic structure for rearranging and dropping
- variables. This function parses this syntax and modifies DICT
- appropriately. Returns true on success, false on failure. */
-static bool
-parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
-{
- if (lex_match_id (lexer, "MAP"))
- {
- /* FIXME. */
- return true;
- }
- else if (lex_match_id (lexer, "DROP"))
- return drop_variables (lexer, dict);
- else if (lex_match_id (lexer, "KEEP"))
- return keep_variables (lexer, dict);
- else if (lex_match_id (lexer, "RENAME"))
- return rename_variables (lexer, dict);
- else
- {
- lex_error (lexer, _("expecting a valid subcommand"));
- return false;
- }
-}
-
-/* Parses and performs the RENAME subcommand of GET and SAVE. */
-static bool
-rename_variables (struct lexer *lexer, struct dictionary *dict)
-{
- size_t i;
-
- int success = 0;
-
- struct variable **v;
- char **new_names;
- size_t nv, nn;
- char *err_name;
-
- int group;
-
- lex_match (lexer, '=');
- if (lex_token (lexer) != '(')
- {
- struct variable *v;
-
- v = parse_variable (lexer, dict);
- if (v == NULL)
- return 0;
- if (!lex_force_match (lexer, '=')
- || !lex_force_id (lexer))
- return 0;
- if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
- {
- msg (SE, _("Cannot rename %s as %s because there already exists "
- "a variable named %s. To rename variables with "
- "overlapping names, use a single RENAME subcommand "
- "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
- "\"/RENAME (A B C=B C A)\"."),
- var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
- return 0;
- }
-
- dict_rename_var (dict, v, lex_tokid (lexer));
- lex_get (lexer);
- return 1;
- }
-
- nv = nn = 0;
- v = NULL;
- new_names = 0;
- group = 1;
- while (lex_match (lexer, '('))
- {
- size_t old_nv = nv;
-
- if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
- goto done;
- if (!lex_match (lexer, '='))
- {
- msg (SE, _("`=' expected after variable list."));
- goto done;
- }
- if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
- goto done;
- if (nn != nv)
- {
- msg (SE, _("Number of variables on left side of `=' (%zu) does not "
- "match number of variables on right side (%zu), in "
- "parenthesized group %d of RENAME subcommand."),
- nv - old_nv, nn - old_nv, group);
- goto done;
- }
- if (!lex_force_match (lexer, ')'))
- goto done;
- group++;
- }
-
- if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
- {
- msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
- goto done;
- }
- success = 1;
-
- done:
- for (i = 0; i < nn; i++)
- free (new_names[i]);
- free (new_names);
- free (v);
-
- return success;
-}
-
-/* Parses and performs the DROP subcommand of GET and SAVE.
- Returns true if successful, false on failure.*/
-static bool
-drop_variables (struct lexer *lexer, struct dictionary *dict)
-{
- struct variable **v;
- size_t nv;
-
- lex_match (lexer, '=');
- if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
- return false;
- dict_delete_vars (dict, v, nv);
- free (v);
-
- if (dict_get_var_cnt (dict) == 0)
- {
- msg (SE, _("Cannot DROP all variables from dictionary."));
- return false;
- }
- return true;
-}
-
-/* Parses and performs the KEEP subcommand of GET and SAVE.
- Returns true if successful, false on failure.*/
-static bool
-keep_variables (struct lexer *lexer, struct dictionary *dict)
-{
- struct variable **v;
- size_t nv;
- size_t i;
-
- lex_match (lexer, '=');
- if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
- return false;
-
- /* Move the specified variables to the beginning. */
- dict_reorder_vars (dict, v, nv);
-
- /* Delete the remaining variables. */
- v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
- for (i = nv; i < dict_get_var_cnt (dict); i++)
- v[i - nv] = dict_get_var (dict, i);
- dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
- free (v);
-
- return true;
-}
-\f
-/* MATCH FILES. */
-
-/* File types. */
-enum mtf_type
- {
- MTF_FILE, /* Specified on FILE= subcommand. */
- MTF_TABLE /* Specified on TABLE= subcommand. */
- };
-
-/* One of the FILEs or TABLEs on MATCH FILES. */
-struct mtf_file
- {
- struct ll ll; /* In list of all files and tables. */
-
- enum mtf_type type;
- int sequence;
-
- const struct variable **by; /* List of BY variables for this file. */
- struct mtf_variable *vars; /* Variables to copy to output. */
- size_t var_cnt; /* Number of other variables. */
-
- struct file_handle *handle; /* Input file handle. */
- struct dictionary *dict; /* Input file dictionary. */
- struct casereader *reader; /* Input reader. */
- struct ccase input; /* Input record (null at end of file). */
-
- /* IN subcommand. */
- char *in_name; /* Variable name. */
- struct variable *in_var; /* Variable (in master dictionary). */
- };
-
-struct mtf_variable
- {
- struct variable *in_var;
- struct variable *out_var;
- };
-
-/* MATCH FILES procedure. */
-struct mtf_proc
- {
- struct ll_list files; /* List of "struct mtf_file"s. */
- int nonempty_files; /* FILEs that are not at end-of-file. */
-
- bool ok; /* False if I/O error occurs. */
-
- struct dictionary *dict; /* Dictionary of output file. */
- struct casewriter *output; /* MATCH FILES output. */
-
- size_t by_cnt; /* Number of variables on BY subcommand. */
-
- /* FIRST, LAST.
- Only if "first" or "last" is nonnull are the remaining
- members used. */
- struct variable *first; /* Variable specified on FIRST (if any). */
- struct variable *last; /* Variable specified on LAST (if any). */
- struct ccase buffered_case; /* Case ready for output except that we don't
- know the value for the LAST variable yet. */
- struct ccase prev_BY_case; /* Case with values of last set of BY vars. */
- const struct variable **prev_BY; /* Last set of BY variables. */
- };
-
-static void mtf_free (struct mtf_proc *);
-
-static bool mtf_close_all_files (struct mtf_proc *);
-static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
-static bool mtf_read_record (struct mtf_proc *mtf, struct mtf_file *);
-
-static void mtf_process_case (struct mtf_proc *);
-
-static bool create_flag_var (const char *subcommand_name, const char *var_name,
- struct dictionary *, struct variable **);
-static char *var_type_description (struct variable *);
-
-/* Parse and execute the MATCH FILES command. */
-int
-cmd_match_files (struct lexer *lexer, struct dataset *ds)
-{
- struct mtf_proc mtf;
- struct ll *first_table;
- struct mtf_file *file, *next;
-
- bool saw_in = false;
- struct casereader *active_file = NULL;
-
- char first_name[VAR_NAME_LEN + 1] = "";
- char last_name[VAR_NAME_LEN + 1] = "";
-
- struct taint *taint = NULL;
-
- size_t i;
-
- ll_init (&mtf.files);
- mtf.nonempty_files = 0;
- first_table = ll_null (&mtf.files);
- mtf.dict = dict_create ();
- mtf.output = NULL;
- mtf.by_cnt = 0;
- mtf.first = mtf.last = NULL;
- case_nullify (&mtf.buffered_case);
- case_nullify (&mtf.prev_BY_case);
- mtf.prev_BY = NULL;
-
- dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
-
- lex_match (lexer, '/');
- while (lex_token (lexer) == T_ID
- && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
- || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
- {
- struct mtf_file *file = xmalloc (sizeof *file);
- file->by = NULL;
- file->handle = NULL;
- file->reader = NULL;
- file->dict = NULL;
- file->in_name = NULL;
- file->in_var = NULL;
- file->var_cnt = 0;
- file->vars = NULL;
- case_nullify (&file->input);
-
- if (lex_match_id (lexer, "FILE"))
- {
- file->type = MTF_FILE;
- ll_insert (first_table, &file->ll);
- mtf.nonempty_files++;
- }
- else if (lex_match_id (lexer, "TABLE"))
- {
- file->type = MTF_TABLE;
- ll_push_tail (&mtf.files, &file->ll);
- if (first_table == ll_null (&mtf.files))
- first_table = &file->ll;
- }
- else
- NOT_REACHED ();
- lex_match (lexer, '=');
-
- if (lex_match (lexer, '*'))
- {
- if (!proc_has_active_file (ds))
- {
- msg (SE, _("Cannot specify the active file since no active "
- "file has been defined."));
- goto error;
- }
-
- if (proc_make_temporary_transformations_permanent (ds))
- msg (SE,
- _("MATCH FILES may not be used after TEMPORARY when "
- "the active file is an input source. "
- "Temporary transformations will be made permanent."));
-
- file->dict = dict_clone (dataset_dict (ds));
- }
- else
- {
- file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
- if (file->handle == NULL)
- goto error;
-
- file->reader = any_reader_open (file->handle, &file->dict);
- if (file->reader == NULL)
- goto error;
- }
-
- while (lex_match (lexer, '/'))
- if (lex_match_id (lexer, "RENAME"))
- {
- if (!rename_variables (lexer, file->dict))
- goto error;
- }
- else if (lex_match_id (lexer, "IN"))
- {
- lex_match (lexer, '=');
- if (lex_token (lexer) != T_ID)
- {
- lex_error (lexer, NULL);
- goto error;
- }
-
- if (file->in_name != NULL)
- {
- msg (SE, _("Multiple IN subcommands for a single FILE or "
- "TABLE."));
- goto error;
- }
- file->in_name = xstrdup (lex_tokid (lexer));
- lex_get (lexer);
- saw_in = true;
- }
-
- mtf_merge_dictionary (mtf.dict, file);
- }
-
- while (lex_token (lexer) != '.')
- {
- if (lex_match (lexer, T_BY))
- {
- struct mtf_file *file;
- struct variable **by;
- bool ok;
-
- if (mtf.by_cnt)
- {
- lex_sbc_only_once ("BY");
- goto error;
- }
-
- lex_match (lexer, '=');
- if (!parse_variables (lexer, mtf.dict, &by, &mtf.by_cnt,
- PV_NO_DUPLICATE | PV_NO_SCRATCH))
- goto error;
-
- ok = true;
- ll_for_each (file, struct mtf_file, ll, &mtf.files)
- {
- size_t i;
-
- file->by = xnmalloc (mtf.by_cnt, sizeof *file->by);
- for (i = 0; i < mtf.by_cnt; i++)
- {
- const char *var_name = var_get_name (by[i]);
- file->by[i] = dict_lookup_var (file->dict, var_name);
- if (file->by[i] == NULL)
- {
- if (file->handle != NULL)
- msg (SE, _("File %s lacks BY variable %s."),
- fh_get_name (file->handle), var_name);
- else
- msg (SE, _("Active file lacks BY variable %s."),
- var_name);
- ok = false;
- }
- }
- }
- free (by);
-
- if (!ok)
- goto error;
- }
- else if (lex_match_id (lexer, "FIRST"))
- {
- if (first_name[0] != '\0')
- {
- lex_sbc_only_once ("FIRST");
- goto error;
- }
-
- lex_match (lexer, '=');
- if (!lex_force_id (lexer))
- goto error;
- strcpy (first_name, lex_tokid (lexer));
- lex_get (lexer);
- }
- else if (lex_match_id (lexer, "LAST"))
- {
- if (last_name[0] != '\0')
- {
- lex_sbc_only_once ("LAST");
- goto error;
- }
-
- lex_match (lexer, '=');
- if (!lex_force_id (lexer))
- goto error;
- strcpy (last_name, lex_tokid (lexer));
- lex_get (lexer);
- }
- else if (lex_match_id (lexer, "MAP"))
- {
- /* FIXME. */
- }
- else if (lex_match_id (lexer, "DROP"))
- {
- if (!drop_variables (lexer, mtf.dict))
- goto error;
- }
- else if (lex_match_id (lexer, "KEEP"))
- {
- if (!keep_variables (lexer, mtf.dict))
- goto error;
- }
- else
- {
- lex_error (lexer, NULL);
- goto error;
- }
-
- if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
- {
- lex_end_of_command (lexer);
- goto error;
- }
- }
-
- if (mtf.by_cnt == 0)
- {
- if (first_table != ll_null (&mtf.files))
- {
- msg (SE, _("BY is required when TABLE is specified."));
- goto error;
- }
- if (saw_in)
- {
- msg (SE, _("BY is required when IN is specified."));
- goto error;
- }
- }
-
- /* Set up mapping from each file's variables to master
- variables. */
- ll_for_each (file, struct mtf_file, ll, &mtf.files)
- {
- size_t in_var_cnt = dict_get_var_cnt (file->dict);
-
- file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
- file->var_cnt = 0;
- for (i = 0; i < in_var_cnt; i++)
- {
- struct variable *in_var = dict_get_var (file->dict, i);
- struct variable *out_var = dict_lookup_var (mtf.dict,
- var_get_name (in_var));
-
- if (out_var != NULL)
- {
- struct mtf_variable *mv = &file->vars[file->var_cnt++];
- mv->in_var = in_var;
- mv->out_var = out_var;
- }
- }
- }
-
- /* Add IN, FIRST, and LAST variables to master dictionary. */
- ll_for_each (file, struct mtf_file, ll, &mtf.files)
- if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
- goto error;
- if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
- || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
- goto error;
-
- dict_delete_scratch_vars (mtf.dict);
- dict_compact_values (mtf.dict);
- mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
- taint = taint_clone (casewriter_get_taint (mtf.output));
-
- ll_for_each (file, struct mtf_file, ll, &mtf.files)
- {
- if (file->reader == NULL)
- {
- if (active_file == NULL)
- {
- proc_discard_output (ds);
- file->reader = active_file = proc_open (ds);
- }
- else
- file->reader = casereader_clone (active_file);
- }
- taint_propagate (casereader_get_taint (file->reader), taint);
- }
-
- ll_for_each_safe (file, next, struct mtf_file, ll, &mtf.files)
- mtf_read_record (&mtf, file);
- while (mtf.nonempty_files > 0)
- mtf_process_case (&mtf);
- if ((mtf.first != NULL || mtf.last != NULL) && mtf.prev_BY != NULL)
- {
- if (mtf.last != NULL)
- case_data_rw (&mtf.buffered_case, mtf.last)->f = 1.0;
- casewriter_write (mtf.output, &mtf.buffered_case);
- case_nullify (&mtf.buffered_case);
- }
- mtf_close_all_files (&mtf);
- if (active_file != NULL)
- proc_commit (ds);
-
- proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
- mtf.dict = NULL;
- mtf.output = NULL;
-
- mtf_free (&mtf);
-
- return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
-
- error:
- if (active_file != NULL)
- proc_commit (ds);
- mtf_free (&mtf);
- taint_destroy (taint);
- return CMD_CASCADING_FAILURE;
-}
-
-/* If VAR_NAME is a nonnull pointer to a non-empty string,
- attempts to create a variable named VAR_NAME, with format
- F1.0, in DICT, and stores a pointer to the variable in *VAR.
- Returns true if successful, false if the variable name is a
- duplicate (in which case a message saying that the variable
- specified on the given SUBCOMMAND is a duplicate is emitted).
- Also returns true, without doing anything, if VAR_NAME is null
- or empty. */
-static bool
-create_flag_var (const char *subcommand, const char *var_name,
- struct dictionary *dict, struct variable **var)
-{
- if (var_name != NULL && var_name[0] != '\0')
- {
- struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
- *var = dict_create_var (dict, var_name, 0);
- if (*var == NULL)
- {
- msg (SE, _("Variable name %s specified on %s subcommand "
- "duplicates an existing variable name."),
- subcommand, var_name);
- return false;
- }
- var_set_both_formats (*var, &format);
- }
- else
- *var = NULL;
- return true;
-}
-
-/* Return a string in an allocated buffer describing V's variable
- type and width. */
-static char *
-var_type_description (struct variable *v)
-{
- if (var_is_numeric (v))
- return xstrdup ("numeric");
- else
- return xasprintf ("string with width %d", var_get_width (v));
-}
-
-/* Closes all the files in MTF and frees their associated data.
- Returns true if successful, false if an I/O error occurred on
- any of the files. */
-static bool
-mtf_close_all_files (struct mtf_proc *mtf)
-{
- struct mtf_file *file;
- bool ok = true;
-
- ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
- {
- fh_unref (file->handle);
- casereader_destroy (file->reader);
- free (file->by);
- dict_destroy (file->dict);
- free (file->in_name);
- case_destroy (&file->input);
- free (file->vars);
- free (file);
- }
-
- return ok;
-}
-
-/* Frees all the data for the MATCH FILES procedure. */
-static void
-mtf_free (struct mtf_proc *mtf)
-{
- mtf_close_all_files (mtf);
- dict_destroy (mtf->dict);
- casewriter_destroy (mtf->output);
- case_destroy (&mtf->buffered_case);
- case_destroy (&mtf->prev_BY_case);
-}
-
-/* Reads the next record into FILE, if possible, and update MTF's
- nonempty_files count if not. */
-static bool
-mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
-{
- case_destroy (&file->input);
- if (!casereader_read (file->reader, &file->input))
- {
- mtf->nonempty_files--;
- return false;
- }
- else
- return true;
-}
-
-/* Compare the BY variables for files A and B; return -1 if A <
- B, 0 if A == B, 1 if A > B. (If there are no BY variables,
- then all records are equal.) */
-static inline int
-mtf_compare_BY_values (struct mtf_proc *mtf,
- struct mtf_file *a, struct mtf_file *b)
-{
- return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
-}
-
-/* Processes input files and write one case to the output file. */
-static void
-mtf_process_case (struct mtf_proc *mtf)
-{
- struct ccase c;
- struct mtf_file *min;
- struct mtf_file *file;
- int min_sequence;
- size_t i;
-
- /* Find the set of one or more FILEs whose BY values are
- minimal, as well as the set of zero or more TABLEs whose BY
- values equal those of the minimum FILEs.
-
- After each iteration of the loop, this invariant holds: the
- FILEs with minimum BY values thus far have "sequence"
- members equal to min_sequence, and "min" points to one of
- the mtf_files whose case has those minimum BY values, and
- similarly for TABLEs. */
- min_sequence = 0;
- min = NULL;
- ll_for_each (file, struct mtf_file, ll, &mtf->files)
- if (case_is_null (&file->input))
- file->sequence = -1;
- else if (file->type == MTF_FILE)
- {
- int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
- if (cmp <= 0)
- file->sequence = cmp < 0 ? -1 : min_sequence;
- else
- {
- file->sequence = ++min_sequence;
- min = file;
- }
- }
- else
- {
- int cmp;
- assert (min != NULL);
- do
- {
- cmp = mtf_compare_BY_values (mtf, min, file);
- }
- while (cmp > 0 && mtf_read_record (mtf, file));
- file->sequence = cmp == 0 ? min_sequence : -1;
- }
-
- /* Form the output case from the input cases. */
- case_create (&c, dict_get_next_value_idx (mtf->dict));
- for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
- {
- struct variable *v = dict_get_var (mtf->dict, i);
- value_set_missing (case_data_rw (&c, v), var_get_width (v));
- }
- ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
- {
- bool include_file = file->sequence == min_sequence;
- if (include_file)
- for (i = 0; i < file->var_cnt; i++)
- {
- const struct mtf_variable *mv = &file->vars[i];
- const union value *in = case_data (&file->input, mv->in_var);
- union value *out = case_data_rw (&c, mv->out_var);
- value_copy (out, in, var_get_width (mv->in_var));
- }
- if (file->in_var != NULL)
- case_data_rw (&c, file->in_var)->f = include_file;
- }
-
- /* Write the output case. */
- if (mtf->first == NULL && mtf->last == NULL)
- {
- /* With no FIRST or LAST variables, it's trivial. */
- casewriter_write (mtf->output, &c);
- }
- else
- {
- /* It's harder with LAST, because we can't know whether
- this case is the last in a group until we've prepared
- the *next* case also. Thus, we buffer the previous
- output case until the next one is ready.
-
- We also have to save a copy of one of the previous input
- cases, so that we can compare the BY variables. We
- can't compare the BY variables between the current
- output case and the saved one because the BY variables
- might not be in the output (the user is allowed to drop
- them). */
- bool new_BY;
- if (mtf->prev_BY != NULL)
- {
- new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
- min->by, mtf->prev_BY,
- mtf->by_cnt);
- if (mtf->last != NULL)
- case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
- casewriter_write (mtf->output, &mtf->buffered_case);
- }
- else
- new_BY = true;
-
- case_move (&mtf->buffered_case, &c);
- if (mtf->first != NULL)
- case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
-
- if (new_BY)
- {
- mtf->prev_BY = min->by;
- case_destroy (&mtf->prev_BY_case);
- case_clone (&mtf->prev_BY_case, &min->input);
- }
- }
-
- /* Read another record from each input file FILE with minimum
- values. */
- ll_for_each (file, struct mtf_file, ll, &mtf->files)
- if (file->type == MTF_FILE)
- {
- if (file->sequence == min_sequence)
- mtf_read_record (mtf, file);
- }
- else
- break;
-}
-
-/* Merge the dictionary for file F into master dictionary M. */
-static bool
-mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
-{
- struct dictionary *d = f->dict;
- const char *d_docs, *m_docs;
- int i;
-
- if (dict_get_label (m) == NULL)
- dict_set_label (m, dict_get_label (d));
-
- d_docs = dict_get_documents (d);
- m_docs = dict_get_documents (m);
- if (d_docs != NULL)
- {
- if (m_docs == NULL)
- dict_set_documents (m, d_docs);
- else
- {
- char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
- dict_set_documents (m, new_docs);
- free (new_docs);
- }
- }
-
- for (i = 0; i < dict_get_var_cnt (d); i++)
- {
- struct variable *dv = dict_get_var (d, i);
- struct variable *mv = dict_lookup_var (m, var_get_name (dv));
-
- if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
- continue;
-
- if (mv != NULL)
- {
- if (var_get_width (mv) != var_get_width (dv))
- {
- char *dv_description = var_type_description (dv);
- char *mv_description = var_type_description (mv);
- msg (SE, _("Variable %s in file %s (%s) has different "
- "type or width from the same variable in "
- "earlier file (%s)."),
- var_get_name (dv), fh_get_name (f->handle),
- dv_description, mv_description);
- free (dv_description);
- free (mv_description);
- return false;
- }
-
- if (var_get_width (dv) == var_get_width (mv))
- {
- if (var_has_value_labels (dv) && !var_has_value_labels (mv))
- var_set_value_labels (mv, var_get_value_labels (dv));
- if (var_has_missing_values (dv) && !var_has_missing_values (mv))
- var_set_missing_values (mv, var_get_missing_values (dv));
- }
-
- if (var_get_label (dv) && !var_get_label (mv))
- var_set_label (mv, var_get_label (dv));
- }
- else
- mv = dict_clone_var_assert (m, dv, var_get_name (dv));
- }
-
- return true;
-}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <stdlib.h>
+
+#include <data/any-reader.h>
+#include <data/case-matcher.h>
+#include <data/case.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/format.h>
+#include <data/procedure.h>
+#include <data/subcase.h>
+#include <data/variable.h>
+#include <language/command.h>
+#include <language/data-io/file-handle.h>
+#include <language/data-io/trim.h>
+#include <language/lexer/lexer.h>
+#include <language/lexer/variable-parser.h>
+#include <language/stats/sort-criteria.h>
+#include <libpspp/assertion.h>
+#include <libpspp/message.h>
+#include <libpspp/taint.h>
+#include <math/sort.h>
+
+#include "xalloc.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+enum command_type
+ {
+ ADD_FILES,
+ MATCH_FILES,
+ UPDATE
+ };
+
+/* File types. */
+enum mtf_type
+ {
+ MTF_FILE, /* Specified on FILE= subcommand. */
+ MTF_TABLE /* Specified on TABLE= subcommand. */
+ };
+
+/* One FILE or TABLE subcommand. */
+struct mtf_file
+ {
+ enum mtf_type type;
+ struct casereader *reader;
+ struct subcase by;
+ int idx;
+ struct mtf_variable *vars; /* Variables to copy to output. */
+ size_t var_cnt; /* Number of other variables. */
+ bool is_sorted; /* Is presorted on the BY variables? */
+
+ struct file_handle *handle; /* Input file handle. */
+ struct dictionary *dict; /* Input file dictionary. */
+
+ /* Used by TABLE. */
+ struct ccase c;
+
+ char in_name[VAR_NAME_LEN + 1];
+ struct variable *in_var;
+ };
+
+struct mtf_variable
+ {
+ struct variable *in_var;
+ struct variable *out_var;
+ };
+
+struct mtf_proc
+ {
+ struct mtf_file **files; /* All the files being merged. */
+ size_t n_files; /* Number of files. */
+
+ struct dictionary *dict; /* Dictionary of output file. */
+ struct casewriter *output; /* Destination for output. */
+
+ struct case_matcher *matcher;
+ struct subcase by;
+
+ /* FIRST, LAST.
+ Only if "first" or "last" is nonnull are the remaining
+ members used. */
+ struct variable *first; /* Variable specified on FIRST (if any). */
+ struct variable *last; /* Variable specified on LAST (if any). */
+ struct ccase buffered_case; /* Case ready for output except that we don't
+ know the value for the LAST variable yet. */
+ union value *prev_BY; /* Values of BY vars in buffered_case. */
+ };
+
+static int combine_files (enum command_type, struct lexer *, struct dataset *);
+static void mtf_free (struct mtf_proc *);
+
+static bool mtf_close_all_files (struct mtf_proc *);
+static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
+
+static void process_update (struct mtf_proc *);
+static void process_match_files (struct mtf_proc *);
+static void process_add_files (struct mtf_proc *);
+
+static bool create_flag_var (const char *subcommand_name, const char *var_name,
+ struct dictionary *, struct variable **);
+static char *var_type_description (struct variable *);
+static void output_case (struct mtf_proc *, struct ccase *, union value *by);
+static void output_buffered_case (struct mtf_proc *);
+
+int
+cmd_add_files (struct lexer *lexer, struct dataset *ds)
+{
+ return combine_files (ADD_FILES, lexer, ds);
+}
+
+int
+cmd_match_files (struct lexer *lexer, struct dataset *ds)
+{
+ return combine_files (MATCH_FILES, lexer, ds);
+}
+
+int
+cmd_update (struct lexer *lexer, struct dataset *ds)
+{
+ return combine_files (UPDATE, lexer, ds);
+}
+
+static int
+combine_files (enum command_type command,
+ struct lexer *lexer, struct dataset *ds)
+{
+ struct mtf_proc mtf;
+
+ bool saw_by = false;
+ bool saw_sort = false;
+ struct casereader *active_file = NULL;
+
+ char first_name[VAR_NAME_LEN + 1] = "";
+ char last_name[VAR_NAME_LEN + 1] = "";
+
+ struct taint *taint = NULL;
+
+ size_t n_files = 0;
+ size_t n_tables = 0;
+ size_t allocated_files = 0;
+
+ size_t i;
+
+ mtf.files = NULL;
+ mtf.n_files = 0;
+ mtf.dict = dict_create ();
+ mtf.output = NULL;
+ mtf.matcher = NULL;
+ subcase_init_empty (&mtf.by);
+ mtf.first = NULL;
+ mtf.last = NULL;
+ case_nullify (&mtf.buffered_case);
+ mtf.prev_BY = NULL;
+
+ dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
+
+ lex_match (lexer, '/');
+ for (;;)
+ {
+ struct mtf_file *file;
+ enum mtf_type type;
+
+ if (lex_match_id (lexer, "FILE"))
+ type = MTF_FILE;
+ else if (command == MATCH_FILES && lex_match_id (lexer, "TABLE"))
+ type = MTF_TABLE;
+ else
+ break;
+ lex_match (lexer, '=');
+
+ if (mtf.n_files >= allocated_files)
+ mtf.files = x2nrealloc (mtf.files, &allocated_files,
+ sizeof *mtf.files);
+ mtf.files[mtf.n_files++] = file = xmalloc (sizeof *file);
+ file->type = type;
+ file->reader = NULL;
+ subcase_init_empty (&file->by);
+ file->idx = type == MTF_FILE ? n_files++ : n_tables++;
+ file->vars = NULL;
+ file->var_cnt = 0;
+ file->is_sorted = true;
+ file->handle = NULL;
+ file->dict = NULL;
+ case_nullify (&file->c);
+ file->in_name[0] = '\0';
+ file->in_var = NULL;
+
+ if (lex_match (lexer, '*'))
+ {
+ if (!proc_has_active_file (ds))
+ {
+ msg (SE, _("Cannot specify the active file since no active "
+ "file has been defined."));
+ goto error;
+ }
+
+ if (proc_make_temporary_transformations_permanent (ds))
+ msg (SE,
+ _("This command may not be used after TEMPORARY when "
+ "the active file is an input source. "
+ "Temporary transformations will be made permanent."));
+
+ file->dict = dict_clone (dataset_dict (ds));
+ }
+ else
+ {
+ file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
+ if (file->handle == NULL)
+ goto error;
+
+ file->reader = any_reader_open (file->handle, &file->dict);
+ if (file->reader == NULL)
+ goto error;
+ }
+
+ while (lex_match (lexer, '/'))
+ if (lex_match_id (lexer, "RENAME"))
+ {
+ if (!parse_dict_rename (lexer, file->dict))
+ goto error;
+ }
+ else if (lex_match_id (lexer, "IN"))
+ {
+ lex_match (lexer, '=');
+ if (lex_token (lexer) != T_ID)
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+
+ if (file->in_name[0])
+ {
+ msg (SE, _("Multiple IN subcommands for a single FILE or "
+ "TABLE."));
+ goto error;
+ }
+ strcpy (file->in_name, lex_tokid (lexer));
+ lex_get (lexer);
+ }
+ else if (lex_match_id (lexer, "SORT"))
+ {
+ file->is_sorted = false;
+ saw_sort = true;
+ }
+
+ mtf_merge_dictionary (mtf.dict, file);
+ }
+
+ while (lex_token (lexer) != '.')
+ {
+ if (lex_match (lexer, T_BY))
+ {
+ const struct variable **by_vars;
+ size_t i;
+ bool ok;
+
+ if (saw_by)
+ {
+ lex_sbc_only_once ("BY");
+ goto error;
+ }
+ saw_by = true;
+
+ lex_match (lexer, '=');
+ if (!parse_sort_criteria (lexer, mtf.dict, &mtf.by, &by_vars, NULL))
+ goto error;
+
+ ok = true;
+ for (i = 0; i < mtf.n_files; i++)
+ {
+ struct mtf_file *file = mtf.files[i];
+ size_t j;
+
+ for (j = 0; j < subcase_get_n_values (&mtf.by); j++)
+ {
+ const char *name = var_get_name (by_vars[j]);
+ struct variable *var = dict_lookup_var (file->dict, name);
+ if (var != NULL)
+ subcase_add_var (&file->by, var,
+ subcase_get_direction (&mtf.by, j));
+ else
+ {
+ if (file->handle != NULL)
+ msg (SE, _("File %s lacks BY variable %s."),
+ fh_get_name (file->handle), name);
+ else
+ msg (SE, _("Active file lacks BY variable %s."), name);
+ ok = false;
+ }
+ }
+ assert (!ok || subcase_conformable (&file->by,
+ &mtf.files[0]->by));
+ }
+ free (by_vars);
+
+ if (!ok)
+ goto error;
+ }
+ else if (command != UPDATE && lex_match_id (lexer, "FIRST"))
+ {
+ if (first_name[0] != '\0')
+ {
+ lex_sbc_only_once ("FIRST");
+ goto error;
+ }
+
+ lex_match (lexer, '=');
+ if (!lex_force_id (lexer))
+ goto error;
+ strcpy (first_name, lex_tokid (lexer));
+ lex_get (lexer);
+ }
+ else if (command != UPDATE && lex_match_id (lexer, "LAST"))
+ {
+ if (last_name[0] != '\0')
+ {
+ lex_sbc_only_once ("LAST");
+ goto error;
+ }
+
+ lex_match (lexer, '=');
+ if (!lex_force_id (lexer))
+ goto error;
+ strcpy (last_name, lex_tokid (lexer));
+ lex_get (lexer);
+ }
+ else if (lex_match_id (lexer, "MAP"))
+ {
+ /* FIXME. */
+ }
+ else if (lex_match_id (lexer, "DROP"))
+ {
+ if (!parse_dict_drop (lexer, mtf.dict))
+ goto error;
+ }
+ else if (lex_match_id (lexer, "KEEP"))
+ {
+ if (!parse_dict_keep (lexer, mtf.dict))
+ goto error;
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ goto error;
+ }
+
+ if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
+ {
+ lex_end_of_command (lexer);
+ goto error;
+ }
+ }
+
+ if (!saw_by)
+ {
+ if (command == UPDATE)
+ {
+ msg (SE, _("The BY subcommand is required."));
+ goto error;
+ }
+ if (n_tables)
+ {
+ msg (SE, _("BY is required when TABLE is specified."));
+ goto error;
+ }
+ if (saw_sort)
+ {
+ msg (SE, _("BY is required when SORT is specified."));
+ goto error;
+ }
+ }
+
+ /* Set up mapping from each file's variables to master
+ variables. */
+ for (i = 0; i < mtf.n_files; i++)
+ {
+ struct mtf_file *file = mtf.files[i];
+ size_t in_var_cnt = dict_get_var_cnt (file->dict);
+ size_t j;
+
+ file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
+ file->var_cnt = 0;
+ for (j = 0; j < in_var_cnt; j++)
+ {
+ struct variable *in_var = dict_get_var (file->dict, j);
+ struct variable *out_var = dict_lookup_var (mtf.dict,
+ var_get_name (in_var));
+
+ if (out_var != NULL)
+ {
+ struct mtf_variable *mv = &file->vars[file->var_cnt++];
+ mv->in_var = in_var;
+ mv->out_var = out_var;
+ }
+ }
+ }
+
+ /* Add IN, FIRST, and LAST variables to master dictionary. */
+ for (i = 0; i < mtf.n_files; i++)
+ {
+ struct mtf_file *file = mtf.files[i];
+ if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
+ goto error;
+ }
+ if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
+ || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
+ goto error;
+
+ dict_delete_scratch_vars (mtf.dict);
+ dict_compact_values (mtf.dict);
+ mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
+ taint = taint_clone (casewriter_get_taint (mtf.output));
+
+ mtf.matcher = case_matcher_create ();
+ taint_propagate (case_matcher_get_taint (mtf.matcher), taint);
+ for (i = 0; i < mtf.n_files; i++)
+ {
+ struct mtf_file *file = mtf.files[i];
+ if (file->reader == NULL)
+ {
+ if (active_file == NULL)
+ {
+ proc_discard_output (ds);
+ file->reader = active_file = proc_open (ds);
+ }
+ else
+ file->reader = casereader_clone (active_file);
+ }
+ if (!file->is_sorted)
+ file->reader = sort_execute (file->reader, &file->by);
+ if (file->type == MTF_FILE)
+ case_matcher_add_input (mtf.matcher, file->reader, &file->by);
+ else
+ {
+ casereader_read (file->reader, &file->c);
+ taint_propagate (casereader_get_taint (file->reader), taint);
+ }
+ }
+
+ if (command == ADD_FILES)
+ process_add_files (&mtf);
+ else if (command == MATCH_FILES)
+ process_match_files (&mtf);
+ else if (command == UPDATE)
+ process_update (&mtf);
+ else
+ NOT_REACHED ();
+
+ case_matcher_destroy (mtf.matcher);
+ mtf_close_all_files (&mtf);
+ if (active_file != NULL)
+ proc_commit (ds);
+
+ proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
+ mtf.dict = NULL;
+ mtf.output = NULL;
+
+ mtf_free (&mtf);
+
+ return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
+
+ error:
+ if (active_file != NULL)
+ proc_commit (ds);
+ mtf_free (&mtf);
+ taint_destroy (taint);
+ return CMD_CASCADING_FAILURE;
+}
+
+/* If VAR_NAME is a non-empty string, attempts to create a
+ variable named VAR_NAME, with format F1.0, in DICT, and stores
+ a pointer to the variable in *VAR. Returns true if
+ successful, false if the variable name is a duplicate (in
+ which case a message saying that the variable specified on the
+ given SUBCOMMAND is a duplicate is emitted). Also returns
+ true, without doing anything, if VAR_NAME is null or empty. */
+static bool
+create_flag_var (const char *subcommand, const char *var_name,
+ struct dictionary *dict, struct variable **var)
+{
+ if (var_name[0] != '\0')
+ {
+ struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
+ *var = dict_create_var (dict, var_name, 0);
+ if (*var == NULL)
+ {
+ msg (SE, _("Variable name %s specified on %s subcommand "
+ "duplicates an existing variable name."),
+ subcommand, var_name);
+ return false;
+ }
+ var_set_both_formats (*var, &format);
+ }
+ else
+ *var = NULL;
+ return true;
+}
+
+/* Return a string in an allocated buffer describing V's variable
+ type and width. */
+static char *
+var_type_description (struct variable *v)
+{
+ if (var_is_numeric (v))
+ return xstrdup ("numeric");
+ else
+ return xasprintf ("string with width %d", var_get_width (v));
+}
+
+/* Closes all the files in MTF and frees their associated data.
+ Returns true if successful, false if an I/O error occurred on
+ any of the files. */
+static bool
+mtf_close_all_files (struct mtf_proc *mtf)
+{
+ bool ok = true;
+ size_t i;
+
+ for (i = 0; i < mtf->n_files; i++)
+ {
+ struct mtf_file *file = mtf->files[i];
+ fh_unref (file->handle);
+ dict_destroy (file->dict);
+ subcase_destroy (&file->by);
+ if (file->type == MTF_TABLE)
+ casereader_destroy (file->reader);
+ free (file->vars);
+ free (file);
+ }
+ free (mtf->files);
+ mtf->files = NULL;
+ mtf->n_files = 0;
+
+ return ok;
+}
+
+/* Frees all the data for the procedure. */
+static void
+mtf_free (struct mtf_proc *mtf)
+{
+ mtf_close_all_files (mtf);
+ dict_destroy (mtf->dict);
+ subcase_destroy (&mtf->by);
+ casewriter_destroy (mtf->output);
+ case_destroy (&mtf->buffered_case);
+ free (mtf->prev_BY);
+}
+
+static bool
+scan_table (struct mtf_file *file, union value *by)
+{
+ while (!case_is_null (&file->c))
+ {
+ int cmp = subcase_compare_3way_xc (&file->by, by, &file->c);
+ if (cmp > 0)
+ casereader_read (file->reader, &file->c);
+ else
+ return cmp == 0;
+ }
+ return false;
+}
+
+static void
+create_output_case (const struct mtf_proc *mtf, struct ccase *c)
+{
+ size_t i;
+
+ case_create (c, dict_get_next_value_idx (mtf->dict));
+ for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
+ {
+ struct variable *v = dict_get_var (mtf->dict, i);
+ value_set_missing (case_data_rw (c, v), var_get_width (v));
+ }
+ for (i = 0; i < mtf->n_files; i++)
+ {
+ struct mtf_file *file = mtf->files[i];
+ if (file->in_var != NULL)
+ case_data_rw (c, file->in_var)->f = false;
+ }
+}
+
+static void
+apply_case (const struct mtf_file *file, struct ccase *file_case,
+ struct ccase *c)
+{
+ /* XXX subcases */
+ size_t j;
+ for (j = 0; j < file->var_cnt; j++)
+ {
+ const struct mtf_variable *mv = &file->vars[j];
+ const union value *in = case_data (file_case, mv->in_var);
+ union value *out = case_data_rw (c, mv->out_var);
+ value_copy (out, in, var_get_width (mv->in_var));
+ }
+ case_destroy (file_case);
+ if (file->in_var != NULL)
+ case_data_rw (c, file->in_var)->f = true;
+}
+
+static size_t
+find_first_match (struct ccase *cases)
+{
+ size_t i;
+ for (i = 0; ; i++)
+ if (!case_is_null (&cases[i]))
+ return i;
+}
+
+static void
+process_update (struct mtf_proc *mtf)
+{
+ struct ccase *cases;
+ union value *by;
+
+ while (case_matcher_read (mtf->matcher, &cases, &by))
+ {
+ struct mtf_file *min;
+ struct ccase c;
+ size_t min_idx;
+ size_t i;
+
+ create_output_case (mtf, &c);
+ min_idx = find_first_match (cases);
+ min = mtf->files[min_idx];
+ apply_case (min, &cases[min_idx], &c);
+ case_matcher_advance (mtf->matcher, min_idx, &cases[min_idx]);
+ for (i = MAX (1, min_idx); i < mtf->n_files; i++)
+ while (!case_is_null (&cases[i]))
+ {
+ apply_case (mtf->files[i], &cases[i], &c);
+ case_matcher_advance (mtf->matcher, i, &cases[i]);
+ }
+ casewriter_write (mtf->output, &c);
+
+ if (min_idx == 0)
+ {
+ size_t n_dups;
+
+ for (n_dups = 0; !case_is_null (&cases[0]); n_dups++)
+ {
+ create_output_case (mtf, &c);
+ apply_case (mtf->files[0], &cases[0], &c);
+ case_matcher_advance (mtf->matcher, 0, &cases[0]);
+ casewriter_write (mtf->output, &c);
+ }
+#if 0
+ if (n_dups > 0)
+ msg (SW, _("Encountered %zu duplicates."), n_dups);
+#endif
+ /* XXX warn. That's the whole point; otherwise we
+ don't need the 'if' statement at all. */
+ }
+ }
+}
+
+/* Executes MATCH FILES for key-based matches. */
+static void
+process_match_files (struct mtf_proc *mtf)
+{
+ union value *by;
+ struct ccase *cases;
+
+ while (case_matcher_read (mtf->matcher, &cases, &by))
+ {
+ struct ccase c;
+ size_t i;
+
+ create_output_case (mtf, &c);
+ for (i = mtf->n_files; i-- > 0; )
+ {
+ struct mtf_file *file = mtf->files[i];
+ struct ccase *file_case;
+ bool include;
+ if (file->type == MTF_FILE)
+ {
+ file_case = &cases[file->idx];
+ include = !case_is_null (file_case);
+ if (include)
+ case_matcher_advance (mtf->matcher, file->idx, NULL);
+ }
+ else
+ {
+ file_case = &file->c;
+ include = scan_table (file, by);
+ if (include)
+ case_clone (file_case, file_case);
+ }
+ if (include)
+ apply_case (file, file_case, &c);
+ }
+ output_case (mtf, &c, by);
+ }
+ output_buffered_case (mtf);
+}
+
+/* Processes input files and write one case to the output file. */
+static void
+process_add_files (struct mtf_proc *mtf)
+{
+ union value *by;
+ struct ccase *cases;
+
+ while (case_matcher_read (mtf->matcher, &cases, &by))
+ {
+ struct ccase c;
+ size_t i;
+
+ for (i = 0; i < mtf->n_files; i++)
+ {
+ struct mtf_file *file = mtf->files[i];
+ while (!case_is_null (&cases[i]))
+ {
+ create_output_case (mtf, &c);
+ apply_case (file, &cases[i], &c);
+ case_matcher_advance (mtf->matcher, i, &cases[i]);
+ output_case (mtf, &c, by);
+ }
+ }
+ }
+ output_buffered_case (mtf);
+}
+
+static void
+output_case (struct mtf_proc *mtf, struct ccase *c, union value *by)
+{
+ if (mtf->first == NULL && mtf->last == NULL)
+ casewriter_write (mtf->output, c);
+ else
+ {
+ /* It's harder with LAST, because we can't know whether
+ this case is the last in a group until we've prepared
+ the *next* case also. Thus, we buffer the previous
+ output case until the next one is ready. */
+ bool new_BY;
+ if (mtf->prev_BY != NULL)
+ {
+ new_BY = !subcase_equal_xx (&mtf->by, mtf->prev_BY, by);
+ if (mtf->last != NULL)
+ case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
+ casewriter_write (mtf->output, &mtf->buffered_case);
+ }
+ else
+ new_BY = true;
+
+ case_move (&mtf->buffered_case, c);
+ if (mtf->first != NULL)
+ case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
+
+ if (new_BY)
+ {
+ size_t n = subcase_get_n_values (&mtf->by) * sizeof (union value);
+ if (mtf->prev_BY == NULL)
+ mtf->prev_BY = xmalloc (n);
+ memcpy (mtf->prev_BY, by, n);
+ }
+ }
+}
+
+static void
+output_buffered_case (struct mtf_proc *mtf)
+{
+ if (mtf->prev_BY != NULL)
+ {
+ if (mtf->last != NULL)
+ case_data_rw (&mtf->buffered_case, mtf->last)->f = 1.0;
+ casewriter_write (mtf->output, &mtf->buffered_case);
+ case_nullify (&mtf->buffered_case);
+ }
+}
+
+/* Merge the dictionary for file F into master dictionary M. */
+static bool
+mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
+{
+ struct dictionary *d = f->dict;
+ const char *d_docs, *m_docs;
+ int i;
+
+ if (dict_get_label (m) == NULL)
+ dict_set_label (m, dict_get_label (d));
+
+ d_docs = dict_get_documents (d);
+ m_docs = dict_get_documents (m);
+ if (d_docs != NULL)
+ {
+ if (m_docs == NULL)
+ dict_set_documents (m, d_docs);
+ else
+ {
+ char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
+ dict_set_documents (m, new_docs);
+ free (new_docs);
+ }
+ }
+
+ for (i = 0; i < dict_get_var_cnt (d); i++)
+ {
+ struct variable *dv = dict_get_var (d, i);
+ struct variable *mv = dict_lookup_var (m, var_get_name (dv));
+
+ if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
+ continue;
+
+ if (mv != NULL)
+ {
+ if (var_get_width (mv) != var_get_width (dv))
+ {
+ char *dv_description = var_type_description (dv);
+ char *mv_description = var_type_description (mv);
+ msg (SE, _("Variable %s in file %s (%s) has different "
+ "type or width from the same variable in "
+ "earlier file (%s)."),
+ var_get_name (dv), fh_get_name (f->handle),
+ dv_description, mv_description);
+ free (dv_description);
+ free (mv_description);
+ return false;
+ }
+
+ if (var_has_value_labels (dv) && !var_has_value_labels (mv))
+ var_set_value_labels (mv, var_get_value_labels (dv));
+ if (var_has_missing_values (dv) && !var_has_missing_values (mv))
+ var_set_missing_values (mv, var_get_missing_values (dv));
+ if (var_get_label (dv) && !var_get_label (mv))
+ var_set_label (mv, var_get_label (dv));
+ }
+ else
+ mv = dict_clone_var_assert (m, dv, var_get_name (dv));
+ }
+
+ return true;
+}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <stdlib.h>
+
+#include <data/any-writer.h>
+#include <data/case-map.h>
+#include <data/case.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/por-file-writer.h>
+#include <data/procedure.h>
+#include <data/sys-file-writer.h>
+#include <data/transformations.h>
+#include <data/variable.h>
+#include <language/command.h>
+#include <language/data-io/file-handle.h>
+#include <language/data-io/trim.h>
+#include <language/lexer/lexer.h>
+#include <libpspp/assertion.h>
+#include <libpspp/compiler.h>
+
+#include "xalloc.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+/* Writing system and portable files. */
+
+/* Type of output file. */
+enum writer_type
+ {
+ SYSFILE_WRITER, /* System file. */
+ PORFILE_WRITER /* Portable file. */
+ };
+
+/* Type of a command. */
+enum command_type
+ {
+ XFORM_CMD, /* Transformation. */
+ PROC_CMD /* Procedure. */
+ };
+
+static int parse_output_proc (struct lexer *, struct dataset *,
+ enum writer_type);
+static int parse_output_trns (struct lexer *, struct dataset *,
+ enum writer_type);
+
+int
+cmd_save (struct lexer *lexer, struct dataset *ds)
+{
+ return parse_output_proc (lexer, ds, SYSFILE_WRITER);
+}
+
+int
+cmd_export (struct lexer *lexer, struct dataset *ds)
+{
+ return parse_output_proc (lexer, ds, PORFILE_WRITER);
+}
+
+int
+cmd_xsave (struct lexer *lexer, struct dataset *ds)
+{
+ return parse_output_trns (lexer, ds, SYSFILE_WRITER);
+}
+
+int
+cmd_xexport (struct lexer *lexer, struct dataset *ds)
+{
+ return parse_output_trns (lexer, ds, PORFILE_WRITER);
+}
+\f
+struct output_trns
+ {
+ struct casewriter *writer; /* Writer. */
+ };
+
+static trns_proc_func output_trns_proc;
+static trns_free_func output_trns_free;
+static struct casewriter *parse_write_command (struct lexer *,
+ struct dataset *,
+ enum writer_type,
+ enum command_type,
+ bool *retain_unselected);
+
+/* Parses and performs the SAVE or EXPORT procedure. */
+static int
+parse_output_proc (struct lexer *lexer, struct dataset *ds,
+ enum writer_type writer_type)
+{
+ bool retain_unselected;
+ struct variable *saved_filter_variable;
+ struct casewriter *output;
+ bool ok;
+
+ output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
+ &retain_unselected);
+ if (output == NULL)
+ return CMD_CASCADING_FAILURE;
+
+ saved_filter_variable = dict_get_filter (dataset_dict (ds));
+ if (retain_unselected)
+ dict_set_filter (dataset_dict (ds), NULL);
+
+ casereader_transfer (proc_open (ds), output);
+ ok = casewriter_destroy (output);
+ ok = proc_commit (ds) && ok;
+
+ dict_set_filter (dataset_dict (ds), saved_filter_variable);
+
+ return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
+}
+
+/* Parses the XSAVE or XEXPORT transformation command. */
+static int
+parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
+{
+ struct output_trns *t = xmalloc (sizeof *t);
+ t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
+ if (t->writer == NULL)
+ {
+ free (t);
+ return CMD_CASCADING_FAILURE;
+ }
+
+ add_transformation (ds, output_trns_proc, output_trns_free, t);
+ return CMD_SUCCESS;
+}
+
+/* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
+ WRITER_TYPE identifies the type of file to write,
+ and COMMAND_TYPE identifies the type of command.
+
+ On success, returns a writer.
+ For procedures only, sets *RETAIN_UNSELECTED to true if cases
+ that would otherwise be excluded by FILTER or USE should be
+ included.
+
+ On failure, returns a null pointer. */
+static struct casewriter *
+parse_write_command (struct lexer *lexer, struct dataset *ds,
+ enum writer_type writer_type,
+ enum command_type command_type,
+ bool *retain_unselected)
+{
+ /* Common data. */
+ struct file_handle *handle; /* Output file. */
+ struct dictionary *dict; /* Dictionary for output file. */
+ struct casewriter *writer; /* Writer. */
+ struct case_map *map; /* Map from input data to data for writer. */
+
+ /* Common options. */
+ bool print_map; /* Print map? TODO. */
+ bool print_short_names; /* Print long-to-short name map. TODO. */
+ struct sfm_write_options sysfile_opts;
+ struct pfm_write_options porfile_opts;
+
+ assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
+ assert (command_type == XFORM_CMD || command_type == PROC_CMD);
+ assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
+
+ if (command_type == PROC_CMD)
+ *retain_unselected = true;
+
+ handle = NULL;
+ dict = dict_clone (dataset_dict (ds));
+ writer = NULL;
+ map = NULL;
+ print_map = false;
+ print_short_names = false;
+ sysfile_opts = sfm_writer_default_options ();
+ porfile_opts = pfm_writer_default_options ();
+
+ case_map_prepare_dict (dict);
+ dict_delete_scratch_vars (dict);
+
+ lex_match (lexer, '/');
+ for (;;)
+ {
+ if (lex_match_id (lexer, "OUTFILE"))
+ {
+ if (handle != NULL)
+ {
+ lex_sbc_only_once ("OUTFILE");
+ goto error;
+ }
+
+ lex_match (lexer, '=');
+
+ handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
+ if (handle == NULL)
+ goto error;
+ }
+ else if (lex_match_id (lexer, "NAMES"))
+ print_short_names = true;
+ else if (lex_match_id (lexer, "PERMISSIONS"))
+ {
+ bool cw;
+
+ lex_match (lexer, '=');
+ if (lex_match_id (lexer, "READONLY"))
+ cw = false;
+ else if (lex_match_id (lexer, "WRITEABLE"))
+ cw = true;
+ else
+ {
+ lex_error (lexer, _("expecting %s or %s"),
+ "READONLY", "WRITEABLE");
+ goto error;
+ }
+ sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
+ }
+ else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
+ {
+ lex_match (lexer, '=');
+ if (lex_match_id (lexer, "RETAIN"))
+ *retain_unselected = true;
+ else if (lex_match_id (lexer, "DELETE"))
+ *retain_unselected = false;
+ else
+ {
+ lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
+ goto error;
+ }
+ }
+ else if (writer_type == SYSFILE_WRITER
+ && lex_match_id (lexer, "COMPRESSED"))
+ sysfile_opts.compress = true;
+ else if (writer_type == SYSFILE_WRITER
+ && lex_match_id (lexer, "UNCOMPRESSED"))
+ sysfile_opts.compress = false;
+ else if (writer_type == SYSFILE_WRITER
+ && lex_match_id (lexer, "VERSION"))
+ {
+ lex_match (lexer, '=');
+ if (!lex_force_int (lexer))
+ goto error;
+ sysfile_opts.version = lex_integer (lexer);
+ lex_get (lexer);
+ }
+ else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
+ {
+ lex_match (lexer, '=');
+ if (lex_match_id (lexer, "COMMUNICATIONS"))
+ porfile_opts.type = PFM_COMM;
+ else if (lex_match_id (lexer, "TAPE"))
+ porfile_opts.type = PFM_TAPE;
+ else
+ {
+ lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
+ goto error;
+ }
+ }
+ else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
+ {
+ lex_match (lexer, '=');
+ if (!lex_force_int (lexer))
+ goto error;
+ porfile_opts.digits = lex_integer (lexer);
+ lex_get (lexer);
+ }
+ else if (!parse_dict_trim (lexer, dict))
+ goto error;
+
+ if (!lex_match (lexer, '/'))
+ break;
+ }
+ if (lex_end_of_command (lexer) != CMD_SUCCESS)
+ goto error;
+
+ if (handle == NULL)
+ {
+ lex_sbc_missing (lexer, "OUTFILE");
+ goto error;
+ }
+
+ dict_delete_scratch_vars (dict);
+ dict_compact_values (dict);
+
+ if (fh_get_referent (handle) == FH_REF_FILE)
+ {
+ switch (writer_type)
+ {
+ case SYSFILE_WRITER:
+ writer = sfm_open_writer (handle, dict, sysfile_opts);
+ break;
+ case PORFILE_WRITER:
+ writer = pfm_open_writer (handle, dict, porfile_opts);
+ break;
+ }
+ }
+ else
+ writer = any_writer_open (handle, dict);
+ if (writer == NULL)
+ goto error;
+
+ map = case_map_from_dict (dict);
+ if (map != NULL)
+ writer = case_map_create_output_translator (map, writer);
+ dict_destroy (dict);
+
+ fh_unref (handle);
+ return writer;
+
+ error:
+ fh_unref (handle);
+ casewriter_destroy (writer);
+ dict_destroy (dict);
+ case_map_destroy (map);
+ return NULL;
+}
+
+/* Writes case C to the system file specified on XSAVE or XEXPORT. */
+static int
+output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
+{
+ struct output_trns *t = trns_;
+ struct ccase tmp;
+ case_clone (&tmp, c);
+ casewriter_write (t->writer, &tmp);
+ return TRNS_CONTINUE;
+}
+
+/* Frees an XSAVE or XEXPORT transformation.
+ Returns true if successful, false if an I/O error occurred. */
+static bool
+output_trns_free (void *trns_)
+{
+ struct output_trns *t = trns_;
+ bool ok = casewriter_destroy (t->writer);
+ free (t);
+ return ok;
+}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <language/data-io/trim.h>
+
+#include <stdlib.h>
+
+#include <data/dictionary.h>
+#include <data/variable.h>
+#include <language/lexer/lexer.h>
+#include <language/lexer/variable-parser.h>
+#include <libpspp/message.h>
+
+#include "xalloc.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+/* Commands that read and write system files share a great deal
+ of common syntactic structure for rearranging and dropping
+ variables. This function parses this syntax and modifies DICT
+ appropriately. Returns true on success, false on failure. */
+bool
+parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
+{
+ if (lex_match_id (lexer, "MAP"))
+ {
+ /* FIXME. */
+ return true;
+ }
+ else if (lex_match_id (lexer, "DROP"))
+ return parse_dict_drop (lexer, dict);
+ else if (lex_match_id (lexer, "KEEP"))
+ return parse_dict_keep (lexer, dict);
+ else if (lex_match_id (lexer, "RENAME"))
+ return parse_dict_rename (lexer, dict);
+ else
+ {
+ lex_error (lexer, _("expecting a valid subcommand"));
+ return false;
+ }
+}
+
+/* Parses and performs the RENAME subcommand of GET, SAVE, and
+ related commands. */
+bool
+parse_dict_rename (struct lexer *lexer, struct dictionary *dict)
+{
+ size_t i;
+
+ int success = 0;
+
+ struct variable **v;
+ char **new_names;
+ size_t nv, nn;
+ char *err_name;
+
+ int group;
+
+ lex_match (lexer, '=');
+ if (lex_token (lexer) != '(')
+ {
+ struct variable *v;
+
+ v = parse_variable (lexer, dict);
+ if (v == NULL)
+ return 0;
+ if (!lex_force_match (lexer, '=')
+ || !lex_force_id (lexer))
+ return 0;
+ if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
+ {
+ msg (SE, _("Cannot rename %s as %s because there already exists "
+ "a variable named %s. To rename variables with "
+ "overlapping names, use a single RENAME subcommand "
+ "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
+ "\"/RENAME (A B C=B C A)\"."),
+ var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
+ return 0;
+ }
+
+ dict_rename_var (dict, v, lex_tokid (lexer));
+ lex_get (lexer);
+ return 1;
+ }
+
+ nv = nn = 0;
+ v = NULL;
+ new_names = 0;
+ group = 1;
+ while (lex_match (lexer, '('))
+ {
+ size_t old_nv = nv;
+
+ if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
+ goto done;
+ if (!lex_match (lexer, '='))
+ {
+ msg (SE, _("`=' expected after variable list."));
+ goto done;
+ }
+ if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
+ goto done;
+ if (nn != nv)
+ {
+ msg (SE, _("Number of variables on left side of `=' (%zu) does not "
+ "match number of variables on right side (%zu), in "
+ "parenthesized group %d of RENAME subcommand."),
+ nv - old_nv, nn - old_nv, group);
+ goto done;
+ }
+ if (!lex_force_match (lexer, ')'))
+ goto done;
+ group++;
+ }
+
+ if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
+ {
+ msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
+ goto done;
+ }
+ success = 1;
+
+ done:
+ for (i = 0; i < nn; i++)
+ free (new_names[i]);
+ free (new_names);
+ free (v);
+
+ return success;
+}
+
+/* Parses and performs the DROP subcommand of GET, SAVE, and
+ related commands.
+ Returns true if successful, false on failure.*/
+bool
+parse_dict_drop (struct lexer *lexer, struct dictionary *dict)
+{
+ struct variable **v;
+ size_t nv;
+
+ lex_match (lexer, '=');
+ if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
+ return false;
+ dict_delete_vars (dict, v, nv);
+ free (v);
+
+ if (dict_get_var_cnt (dict) == 0)
+ {
+ msg (SE, _("Cannot DROP all variables from dictionary."));
+ return false;
+ }
+ return true;
+}
+
+/* Parses and performs the KEEP subcommand of GET, SAVE, and
+ related commands.
+ Returns true if successful, false on failure.*/
+bool
+parse_dict_keep (struct lexer *lexer, struct dictionary *dict)
+{
+ struct variable **v;
+ size_t nv;
+ size_t i;
+
+ lex_match (lexer, '=');
+ if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
+ return false;
+
+ /* Move the specified variables to the beginning. */
+ dict_reorder_vars (dict, v, nv);
+
+ /* Delete the remaining variables. */
+ v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
+ for (i = nv; i < dict_get_var_cnt (dict); i++)
+ v[i - nv] = dict_get_var (dict, i);
+ dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
+ free (v);
+
+ return true;
+}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2008 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef LANGUAGE_DATA_IO_TRIM_H
+#define LANGUAGE_DATA_IO_TRIM_H
+
+#include <stdbool.h>
+
+struct lexer;
+struct dictionary;
+bool parse_dict_trim (struct lexer *, struct dictionary *);
+bool parse_dict_rename (struct lexer *, struct dictionary *);
+bool parse_dict_drop (struct lexer *, struct dictionary *);
+bool parse_dict_keep (struct lexer *, struct dictionary *);
+
+#endif /* trim.c */
#include <stdlib.h>
#include <data/any-writer.h>
-#include <data/case-ordering.h>
#include <data/case.h>
#include <data/casegrouper.h>
#include <data/casereader.h>
#include <data/format.h>
#include <data/procedure.h>
#include <data/settings.h>
+#include <data/subcase.h>
#include <data/sys-file-writer.h>
#include <data/variable.h>
#include <language/command.h>
struct agr_proc
{
/* Break variables. */
- struct case_ordering *sort; /* Sort criteria (break variable). */
+ struct subcase sort; /* Sort criteria (break variables). */
const struct variable **break_vars; /* Break variables. */
size_t break_var_cnt; /* Number of break variables. */
struct ccase break_case; /* Last values of break variables. */
agr.dict = dict_create ();
agr.src_dict = dict;
+ subcase_init_empty (&agr.sort);
dict_set_label (agr.dict, dict_get_label (dict));
dict_set_documents (agr.dict, dict_get_documents (dict));
int i;
lex_match (lexer, '=');
- agr.sort = parse_case_ordering (lexer, dict,
-
- &saw_direction);
- if (agr.sort == NULL)
+ if (!parse_sort_criteria (lexer, dict, &agr.sort, &agr.break_vars,
+ &saw_direction))
goto error;
- case_ordering_get_vars (agr.sort,
- &agr.break_vars, &agr.break_var_cnt);
+ agr.break_var_cnt = subcase_get_n_fields (&agr.sort);
for (i = 0; i < agr.break_var_cnt; i++)
dict_clone_var_assert (agr.dict, agr.break_vars[i],
}
input = proc_open (ds);
- if (agr.sort != NULL && !presorted)
+ if (!subcase_is_empty (&agr.sort) && !presorted)
{
- input = sort_execute (input, agr.sort);
- agr.sort = NULL;
+ input = sort_execute (input, &agr.sort);
+ subcase_clear (&agr.sort);
}
for (grouper = casegrouper_create_vars (input, agr.break_vars,
{
struct agr_var *iter, *next;
- case_ordering_destroy (agr->sort);
+ subcase_destroy (&agr->sort);
free (agr->break_vars);
case_destroy (&agr->break_case);
for (iter = agr->agr_vars; iter; iter = next)
break;
case MEDIAN:
{
- struct case_ordering *ordering = case_ordering_create ();
+ struct subcase ordering;
if ( ! iter->subject)
iter->subject = var_create_internal (0);
if ( ! iter->weight)
iter->weight = var_create_internal (1);
- case_ordering_add_var (ordering, iter->subject, SRT_ASCEND);
+ subcase_init_var (&ordering, iter->subject, SC_ASCEND);
+ iter->writer = sort_create_writer (&ordering, 2);
+ subcase_destroy (&ordering);
- iter->writer = sort_create_writer (ordering, 2);
iter->cc = 0;
}
break;
cat1[v].value = value_dup (value, width);
cat1[v].count = w;
}
- else if ( 0 == compare_values (cat1[v].value, value, var))
+ else if ( 0 == compare_values_short (cat1[v].value, value, var))
cat1[v].count += w;
else if ( NULL == cat2[v].value )
{
cat2[v].value = value_dup (value, width);
cat2[v].count = w;
}
- else if ( 0 == compare_values (cat2[v].value, value, var))
+ else if ( 0 == compare_values_short (cat2[v].value, value, var))
cat2[v].count += w;
else if ( bst->category1 == SYSMIS)
msg (ME, _("Variable %s is not dichotomous"), var_get_name (var));
#include <data/casegrouper.h>
#include <data/casereader.h>
#include <data/casewriter.h>
-#include <data/case-ordering.h>
#include <data/dictionary.h>
#include <data/procedure.h>
+#include <data/subcase.h>
#include <data/value-labels.h>
#include <data/variable.h>
#include <language/command.h>
{
/* In this case, we need to sort the data, so we create a sorting
casewriter */
- struct case_ordering *up_ordering = case_ordering_create ();
-
- case_ordering_add_var (up_ordering, dependent_vars[v], SRT_ASCEND);
- writer = sort_create_writer (up_ordering,
+ struct subcase up_ordering;
+ subcase_init_var (&up_ordering, dependent_vars[v], SC_ASCEND);
+ writer = sort_create_writer (&up_ordering,
casereader_get_value_cnt (reader));
+ subcase_destroy (&up_ordering);
}
else
{
struct casereader *group = NULL;
struct casereader *level1;
struct casegrouper *grouper1 = NULL;
- struct case_ordering *ordering1 = case_ordering_create ();
- case_ordering_add_var (ordering1, factor->indep_var[0], SRT_ASCEND);
level1 = casereader_clone (input);
-
- level1 = sort_execute (level1,
- case_ordering_clone (ordering1));
- grouper1 = casegrouper_create_case_ordering (level1, ordering1);
- case_ordering_destroy (ordering1);
+ level1 = sort_execute_1var (level1, factor->indep_var[0]);
+ grouper1 = casegrouper_create_vars (level1, &factor->indep_var[0], 1);
while (casegrouper_get_next_group (grouper1, &group))
{
int n_groups = 0;
struct casereader *group2 = NULL;
struct casegrouper *grouper2 = NULL;
- struct case_ordering *ordering2 = case_ordering_create ();
- case_ordering_add_var (ordering2,
- factor->indep_var[1], SRT_ASCEND);
- group_copy = sort_execute (group_copy,
- case_ordering_clone (ordering2));
- grouper2 =
- casegrouper_create_case_ordering (group_copy, ordering2);
+ group_copy = sort_execute_1var (group_copy,
+ factor->indep_var[1]);
- case_ordering_destroy (ordering2);
+ grouper2 = casegrouper_create_vars (group_copy,
+ &factor->indep_var[1], 1);
while (casegrouper_get_next_group (grouper2, &group2))
{
{
if ( last_value == NULL ||
- compare_values (last_value, result->value[0],
- fctr->indep_var[0]))
+ compare_values_short (last_value, result->value[0],
+ fctr->indep_var[0]))
{
struct string str;
const struct freq *f2 = _f2;
const struct variable *var = _var;
- return compare_values (f1->value, f2->value, var );
+ return compare_values_short (f1->value, f2->value, var );
}
unsigned int
{
const struct freq *f = _f;
- return hash_value (f->value, var);
+ return hash_value_short (f->value, var);
}
/* Free function to be used on FR whose value parameter has been copied */
taint = taint_clone (casereader_get_taint (input));
global_group_hash = hsh_create (4,
- compare_values,
- hash_value,
+ compare_values_short,
+ hash_value_short,
free_value,
indep_var);
#include <limits.h>
#include <math.h>
-#include <data/case-ordering.h>
#include <data/case.h>
#include <data/casegrouper.h>
#include <data/casereader.h>
#include <data/missing-values.h>
#include <data/procedure.h>
#include <data/short-names.h>
+#include <data/subcase.h>
#include <data/variable.h>
#include <language/command.h>
#include <language/stats/sort-criteria.h>
static struct rank_spec *rank_specs;
static size_t n_rank_specs;
-static struct case_ordering *sc;
+static struct subcase sc;
static const struct variable **group_vars;
static size_t n_group_vars;
static bool
-rank_cmd (struct dataset *ds, const struct case_ordering *sc,
+rank_cmd (struct dataset *ds, const struct subcase *sc,
const struct rank_spec *rank_specs, int n_rank_specs)
{
struct dictionary *d = dataset_dict (ds);
bool ok = true;
int i;
- for (i = 0 ; i < case_ordering_get_var_cnt (sc) ; ++i )
+ for (i = 0 ; i < subcase_get_n_fields (sc) ; ++i )
{
/* Rank variable at index I in SC. */
struct casegrouper *split_grouper;
while (casegrouper_get_next_group (split_grouper, &split_group))
{
- struct case_ordering *ordering;
+ struct subcase ordering;
struct casereader *ordered;
struct casegrouper *by_grouper;
struct casereader *by_group;
- int j;
/* Sort this split group by the BY variables as primary
keys and the rank variable as secondary key. */
- ordering = case_ordering_create ();
- for (j = 0; j < n_group_vars; j++)
- case_ordering_add_var (ordering, group_vars[j], SRT_ASCEND);
- case_ordering_add_var (ordering,
- case_ordering_get_var (sc, i),
- case_ordering_get_direction (sc, i));
- ordered = sort_execute (split_group, ordering);
+ subcase_init_vars (&ordering, group_vars, n_group_vars);
+ subcase_add_var (&ordering, src_vars[i],
+ subcase_get_direction (sc, i));
+ ordered = sort_execute (split_group, &ordering);
+ subcase_destroy (&ordering);
/* Rank the rank variable within this split group. */
by_grouper = casegrouper_create_vars (ordered,
rank_specs = NULL;
n_rank_specs = 0;
- case_ordering_destroy (sc);
- sc = NULL;
+ subcase_destroy (&sc);
free (src_vars);
src_vars = NULL;
size_t i;
n_rank_specs = 0;
+ subcase_init_empty (&sc);
if ( !parse_rank (lexer, ds, &cmd, NULL) )
{
rank_cleanup ();
rank_specs = xmalloc (sizeof (*rank_specs));
rank_specs[0].rfunc = RANK;
rank_specs[0].destvars =
- xcalloc (case_ordering_get_var_cnt (sc), sizeof (struct variable *));
+ xcalloc (subcase_get_n_fields (&sc), sizeof (struct variable *));
n_rank_specs = 1;
}
- assert ( case_ordering_get_var_cnt (sc) == n_src_vars);
+ assert ( subcase_get_n_fields (&sc) == n_src_vars);
/* Create variables for all rank destinations which haven't
already been created with INTO.
add_transformation (ds, create_resort_key, 0, order);
/* Do the ranking */
- result = rank_cmd (ds, sc, rank_specs, n_rank_specs);
+ result = rank_cmd (ds, &sc, rank_specs, n_rank_specs);
/* Put the active file back in its original order. Delete
our sort key, which we don't need anymore. */
{
- struct case_ordering *ordering = case_ordering_create ();
struct casereader *sorted;
- case_ordering_add_var (ordering, order, SRT_ASCEND);
+
/* FIXME: loses error conditions. */
+
proc_discard_output (ds);
- sorted = sort_execute (proc_open (ds), ordering);
+ sorted = sort_execute_1var (proc_open (ds), order);
result = proc_commit (ds) && result;
dict_delete_var (dataset_dict (ds), order);
&& lex_token (lexer) != T_ALL)
return 2;
- sc = parse_case_ordering (lexer, dataset_dict (ds), NULL);
- if (sc == NULL)
+ if (!parse_sort_criteria (lexer, dataset_dict (ds), &sc, &src_vars, NULL))
return 0;
- case_ordering_get_vars (sc, &src_vars, &n_src_vars);
+ n_src_vars = subcase_get_n_fields (&sc);
if ( lex_match (lexer, T_BY) )
{
rank_specs[n_rank_specs - 1].destvars = NULL;
rank_specs[n_rank_specs - 1].destvars =
- xcalloc (case_ordering_get_var_cnt (sc),
- sizeof (struct variable *));
+ xcalloc (subcase_get_n_fields (&sc), sizeof (struct variable *));
if (lex_match_id (lexer, "INTO"))
{
msg(SE, _("Variable %s already exists."), lex_tokid (lexer));
return 0;
}
- if ( var_count >= case_ordering_get_var_cnt (sc) )
+ if ( var_count >= subcase_get_n_fields (&sc) )
{
msg(SE, _("Too many variables in INTO clause."));
return 0;
#include <language/command.h>
#include <language/lexer/lexer.h>
#include <libpspp/message.h>
-#include <data/case-ordering.h>
+#include <data/subcase.h>
#include <math/sort.h>
#include <sys/types.h>
int
cmd_sort_cases (struct lexer *lexer, struct dataset *ds)
{
- struct case_ordering *ordering;
+ struct subcase ordering;
struct casereader *output;
bool ok = false;
lex_match (lexer, T_BY);
proc_cancel_temporary_transformations (ds);
- ordering = parse_case_ordering (lexer, dataset_dict (ds), NULL);
- if (ordering == NULL)
+ subcase_init_empty (&ordering);
+ if (!parse_sort_criteria (lexer, dataset_dict (ds), &ordering, NULL, NULL))
return CMD_CASCADING_FAILURE;
if (settings_get_testing_mode () && lex_match (lexer, '/'))
}
proc_discard_output (ds);
- output = sort_execute (proc_open (ds), ordering);
- ordering = NULL;
+ output = sort_execute (proc_open (ds), &ordering);
ok = proc_commit (ds);
ok = proc_set_active_file_data (ds, output) && ok;
min_buffers = 64;
max_buffers = INT_MAX;
- case_ordering_destroy (ordering);
+ subcase_destroy (&ordering);
return ok ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE;
}
#include <stdlib.h>
-#include <data/case-ordering.h>
#include <data/dictionary.h>
+#include <data/subcase.h>
#include <data/variable.h>
#include <language/lexer/lexer.h>
#include <language/lexer/variable-parser.h>
#include "gettext.h"
#define _(msgid) gettext (msgid)
-/* Parses a list of sort keys and returns a struct sort_criteria
- based on it. Returns a null pointer on error.
+/* Parses a list of sort fields and appends them to ORDERING,
+ which the caller must already have initialized.
+ Returns true if successful, false on error.
If SAW_DIRECTION is nonnull, sets *SAW_DIRECTION to true if at
least one parenthesized sort direction was specified, false
otherwise. */
-struct case_ordering *
-parse_case_ordering (struct lexer *lexer, const struct dictionary *dict,
- bool *saw_direction)
+bool
+parse_sort_criteria (struct lexer *lexer, const struct dictionary *dict,
+ struct subcase *ordering,
+ const struct variable ***vars, bool *saw_direction)
{
- struct case_ordering *ordering = case_ordering_create ();
- const struct variable **vars = NULL;
+ const struct variable **local_vars = NULL;
size_t var_cnt = 0;
- if (saw_direction != NULL)
+ if (vars == NULL)
+ vars = &local_vars;
+ *vars = NULL;
+
+ if (saw_direction != NULL)
*saw_direction = false;
do
{
- enum sort_direction direction;
+ size_t prev_var_cnt = var_cnt;
+ enum subcase_direction direction;
size_t i;
/* Variables. */
- free (vars);
- vars = NULL;
- if (!parse_variables_const (lexer, dict, &vars, &var_cnt, PV_NO_SCRATCH))
+ if (!parse_variables_const (lexer, dict, vars, &var_cnt,
+ PV_APPEND | PV_NO_SCRATCH))
goto error;
/* Sort direction. */
if (lex_match (lexer, '('))
{
if (lex_match_id (lexer, "D") || lex_match_id (lexer, "DOWN"))
- direction = SRT_DESCEND;
+ direction = SC_DESCEND;
else if (lex_match_id (lexer, "A") || lex_match_id (lexer, "UP"))
- direction = SRT_ASCEND;
+ direction = SC_ASCEND;
else
{
msg (SE, _("`A' or `D' expected inside parentheses."));
*saw_direction = true;
}
else
- direction = SRT_ASCEND;
-
- for (i = 0; i < var_cnt; i++)
- if (!case_ordering_add_var (ordering, vars[i], direction))
- msg (SW, _("Variable %s specified twice in sort criteria."),
- var_get_name (vars[i]));
+ direction = SC_ASCEND;
+
+ for (i = prev_var_cnt; i < var_cnt; i++)
+ {
+ const struct variable *var = (*vars)[i];
+ if (!subcase_add_var (ordering, var, direction))
+ msg (SW, _("Variable %s specified twice in sort criteria."),
+ var_get_name (var));
+ }
}
while (lex_token (lexer) == T_ID
&& dict_lookup_var (dict, lex_tokid (lexer)) != NULL);
- free (vars);
- return ordering;
+ free (local_vars);
+ return true;
- error:
- free (vars);
- case_ordering_destroy (ordering);
- return NULL;
+error:
+ free (local_vars);
+ if (vars)
+ *vars = NULL;
+ return false;
}
struct dictionary;
struct lexer;
+struct variable;
+struct subcase;
-struct case_ordering *parse_case_ordering (struct lexer *,
- const struct dictionary *,
- bool *saw_direction);
+bool parse_sort_criteria (struct lexer *, const struct dictionary *,
+ struct subcase *, const struct variable ***vars,
+ bool *saw_direction);
-#endif /* SORT_PRS_H */
+#endif /* sort-criteria.h */
which_group (const struct group_statistics *g,
const struct group_properties *p)
{
- if ( 0 == compare_values (&g->id, &p->v.g_value[0], p->indep_var))
+ if ( 0 == compare_values_short (&g->id, &p->v.g_value[0], p->indep_var))
return 0;
- if ( 0 == compare_values (&g->id, &p->v.g_value[1], p->indep_var))
+ if ( 0 == compare_values_short (&g->id, &p->v.g_value[1], p->indep_var))
return 1;
return 2;
#include <data/variable.h>
#include <data/casereader.h>
#include <data/casewriter.h>
-#include <data/case-ordering.h>
+#include <data/subcase.h>
#include <math/sort.h>
#include <libpspp/message.h>
#include <xalloc.h>
struct casereader *r = casereader_clone (input);
struct casewriter *writer;
struct ccase c;
- struct case_ordering *ordering = case_ordering_create ();
+ struct subcase ordering;
variable_pair *vp = &t2s->pairs[i];
const int reader_width = weight ? 3 : 2;
ws[i].sign = var_create_internal (0);
ws[i].absdiff = var_create_internal (1);
- case_ordering_add_var (ordering, ws[i].absdiff, SRT_ASCEND);
-
-
r = casereader_create_filter_missing (r, *vp, 2,
exclude,
NULL, NULL);
- writer = sort_create_writer (ordering, reader_width);
+ subcase_init_var (&ordering, ws[i].absdiff, SC_ASCEND);
+ writer = sort_create_writer (&ordering, reader_width);
+ subcase_destroy (&ordering);
+
while (casereader_read (r, &c))
{
struct ccase output;
char
get_system_decimal (void)
{
- char *radix_char = NULL;
+ char radix_char;
char *ol = setlocale (LC_NUMERIC, NULL);
setlocale (LC_NUMERIC, "");
#if HAVE_NL_LANGINFO
- radix_char = nl_langinfo (RADIXCHAR);
+ radix_char = nl_langinfo (RADIXCHAR)[0];
#else
{
- char *buf = xmalloc (10);
- snprintf (buf, 10, "%f", 2.5);
- radix_char = &buf[1];
+ char buf[10];
+ snprintf (buf, sizeof buf, "%f", 2.5);
+ radix_char = buf[1];
}
#endif
/* We MUST leave LC_NUMERIC untouched, since it would
otherwise interfere with data_{in,out} */
setlocale (LC_NUMERIC, ol);
- return *radix_char;
+ return radix_char;
}
if (val != NULL)
{
j = i;
- while (j < n_coef && compare_values (pspp_coeff_get_value (coefs[j], v),
- val, v) != 0)
+ while (j < n_coef && compare_values_short (pspp_coeff_get_value (coefs[j], v),
+ val, v) != 0)
{
j++;
}
col += i;
y = -1.0 * cat_get_category_count (i, v) / ssize;
tmp_val = cat_subscript_to_value (i, v);
- if (compare_values (tmp_val, val1, v))
+ if (compare_values_short (tmp_val, val1, v))
{
y += -1.0;
}
row += i;
x = -1.0 * cat_get_category_count (i, v1) / ssize;
tmp_val = cat_subscript_to_value (i, v1);
- if (compare_values (tmp_val, val1, v1))
+ if (compare_values_short (tmp_val, val1, v1))
{
x += 1.0;
}
}
if (var_is_numeric (v1) && var_is_alpha (v2))
{
- if (compare_values (val2, c->val2, v2))
+ if (compare_values_short (val2, c->val2, v2))
{
return 0;
}
}
if (var_is_alpha (v1) && var_is_numeric (v2))
{
- if (compare_values (val1, c->val1, v1))
+ if (compare_values_short (val1, c->val1, v1))
{
return 0;
}
}
if (var_is_alpha (v1) && var_is_alpha (v2))
{
- if (compare_values (val1, c->val1, v1))
+ if (compare_values_short (val1, c->val1, v1))
{
- if (compare_values (val2, c->val2, v2))
+ if (compare_values_short (val2, c->val2, v2))
{
return 0;
}
{
i = 0;
tmp_val = cat_subscript_to_value (i, v1);
- while (!compare_values (tmp_val, val1, v1))
+ while (!compare_values_short (tmp_val, val1, v1))
{
i++;
tmp_val = cat_subscript_to_value (i, v1);
col = design_matrix_var_to_column (cov, v2);
i = 0;
tmp_val = cat_subscript_to_value (i, v1);
- while (!compare_values (tmp_val, val1, v1))
+ while (!compare_values_short (tmp_val, val1, v1))
{
i++;
tmp_val = cat_subscript_to_value (i, v1);
{
const struct group_statistics *a = a_;
const struct group_statistics *b = b_;
- return compare_values(&a->id, &b->id, var);
+ return compare_values_short (&a->id, &b->id, var);
}
unsigned id_hash;
const struct group_statistics *g = g_;;
- id_hash = hash_value(&g->id, var);
+ id_hash = hash_value_short (&g->id, var);
return id_hash;
}
#include <math/coefficient.h>
#include <math/linreg.h>
#include <math/coefficient.h>
-#include <math/covariance-matrix.h>
#include <math/design-matrix.h>
#include <src/data/category.h>
#include <src/data/variable.h>
only variables in the model are in the covariance matrix.
*/
static struct design_matrix *
-rearrange_covariance_matrix (const struct design_matrix *cov, pspp_linreg_cache *c)
+rearrange_covariance_matrix (const struct covariance_matrix *cm, pspp_linreg_cache *c)
{
const struct variable **model_vars;
+ struct design_matrix *cov;
struct design_matrix *result;
size_t *permutation;
size_t i;
size_t j;
size_t k;
+ assert (cm != NULL);
+ cov = covariance_to_design (cm);
assert (cov != NULL);
assert (c != NULL);
assert (cov->m->size1 > 0);
set CACHE->N_COEFFS.
*/
void
-pspp_linreg_with_cov (const struct design_matrix *full_cov,
+pspp_linreg_with_cov (const struct covariance_matrix *full_cov,
pspp_linreg_cache * cache)
{
struct design_matrix *cov;
cache_init (cache);
reg_sweep (cov->m);
post_sweep_computations (cache, cov, cov->m);
- covariance_matrix_destroy (cov);
+ design_matrix_destroy (cov);
}
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <src/math/coefficient.h>
+#include <math/covariance-matrix.h>
enum
{
/*
Regression using only the covariance matrix.
*/
-void pspp_linreg_with_cov (const struct design_matrix *, pspp_linreg_cache *);
+void pspp_linreg_with_cov (const struct covariance_matrix *, pspp_linreg_cache *);
#endif
#include <math/merge.h>
-#include <data/case-ordering.h>
#include <data/case.h>
#include <data/casereader.h>
#include <data/casewriter.h>
+#include <data/subcase.h>
#include <libpspp/array.h>
#include <libpspp/assertion.h>
#include <libpspp/taint.h>
struct merge
{
- struct case_ordering *ordering;
+ struct subcase ordering;
struct merge_input inputs[MAX_MERGE_ORDER];
size_t input_cnt;
size_t value_cnt;
static void do_merge (struct merge *m);
struct merge *
-merge_create (const struct case_ordering *ordering, size_t value_cnt)
+merge_create (const struct subcase *ordering, size_t value_cnt)
{
struct merge *m = xmalloc (sizeof *m);
- m->ordering = case_ordering_clone (ordering);
+ subcase_clone (&m->ordering, ordering);
m->input_cnt = 0;
m->value_cnt = value_cnt;
return m;
{
size_t i;
- case_ordering_destroy (m->ordering);
+ subcase_destroy (&m->ordering);
for (i = 0; i < m->input_cnt; i++)
casereader_destroy (m->inputs[i].reader);
free (m);
min = 0;
for (i = 1; i < m->input_cnt; i++)
- if (case_ordering_compare_cases (&m->inputs[i].c, &m->inputs[min].c,
- m->ordering) < 0)
+ if (subcase_compare_3way (&m->ordering, &m->inputs[i].c,
+ &m->ordering, &m->inputs[min].c) < 0)
min = i;
casewriter_write (w, &m->inputs[min].c);
#include <stdbool.h>
#include <stddef.h>
-struct case_ordering;
+struct subcase;
struct casereader;
-struct merge *merge_create (const struct case_ordering *, size_t);
+struct merge *merge_create (const struct subcase *, size_t);
void merge_destroy (struct merge *);
void merge_append (struct merge *, struct casereader *);
struct casereader *merge_make_reader (struct merge *);
#include <stdio.h>
-#include <data/case-ordering.h>
#include <data/case.h>
#include <data/casereader.h>
#include <data/casewriter.h>
#include <data/casewriter-provider.h>
#include <data/settings.h>
+#include <data/subcase.h>
#include <libpspp/array.h>
#include <libpspp/assertion.h>
#include <math/merge.h>
struct sort_writer
{
size_t value_cnt;
- struct case_ordering *ordering;
+ struct subcase ordering;
struct merge *merge;
struct pqueue *pqueue;
static struct casewriter_class sort_casewriter_class;
-static struct pqueue *pqueue_create (const struct case_ordering *, size_t);
+static struct pqueue *pqueue_create (const struct subcase *, size_t);
static void pqueue_destroy (struct pqueue *);
static bool pqueue_is_full (const struct pqueue *);
static bool pqueue_is_empty (const struct pqueue *);
static void output_record (struct sort_writer *);
struct casewriter *
-sort_create_writer (struct case_ordering *ordering, size_t value_cnt)
+sort_create_writer (const struct subcase *ordering, size_t value_cnt)
{
struct sort_writer *sort;
sort = xmalloc (sizeof *sort);
sort->value_cnt = value_cnt;
- sort->ordering = case_ordering_clone (ordering);
+ subcase_clone (&sort->ordering, ordering);
sort->merge = merge_create (ordering, value_cnt);
sort->pqueue = pqueue_create (ordering, value_cnt);
sort->run = NULL;
sort->run_id = 0;
case_nullify (&sort->run_end);
- case_ordering_destroy (ordering);
-
return casewriter_create (value_cnt, &sort_casewriter_class, sort);
}
output_record (sort);
next_run = (case_is_null (&sort->run_end)
- || case_ordering_compare_cases (c, &sort->run_end,
- sort->ordering) < 0);
+ || subcase_compare_3way (&sort->ordering, c,
+ &sort->ordering, &sort->run_end) < 0);
pqueue_push (sort->pqueue, c, sort->run_id + (next_run ? 1 : 0));
}
{
struct sort_writer *sort = sort_;
- case_ordering_destroy (sort->ordering);
+ subcase_destroy (&sort->ordering);
merge_destroy (sort->merge);
pqueue_destroy (sort->pqueue);
casewriter_destroy (sort->run);
};
\f
/* Reads all the cases from INPUT. Sorts the cases according to
- ORDERING. Returns the sorted cases in a new casereader, or a
- null pointer if an I/O error occurs. Both INPUT and ORDERING
- are destroyed upon return, regardless of success. */
+ ORDERING. Returns the sorted cases in a new casereader. */
struct casereader *
-sort_execute (struct casereader *input, struct case_ordering *ordering)
+sort_execute (struct casereader *input, const struct subcase *ordering)
{
struct casewriter *output =
sort_create_writer (ordering, casereader_get_value_cnt (input));
casereader_transfer (input, output);
return casewriter_make_reader (output);
}
+
+/* Reads all the cases from INPUT. Sorts the cases in ascending
+ order according to VARIABLE. Returns the sorted cases in a
+ new casereader. */
+struct casereader *
+sort_execute_1var (struct casereader *input, const struct variable *var)
+{
+ struct subcase sc;
+ struct casereader *reader;
+
+ subcase_init_var (&sc, var, SC_ASCEND);
+ reader = sort_execute (input, &sc);
+ subcase_destroy (&sc);
+ return reader;
+}
\f
struct pqueue
{
- struct case_ordering *ordering;
+ struct subcase ordering;
struct pqueue_record *records;
size_t record_cnt;
size_t record_cap;
const void *pq_);
static struct pqueue *
-pqueue_create (const struct case_ordering *ordering, size_t value_cnt)
+pqueue_create (const struct subcase *ordering, size_t value_cnt)
{
struct pqueue *pq;
pq = xmalloc (sizeof *pq);
- pq->ordering = case_ordering_clone (ordering);
+ subcase_clone (&pq->ordering, ordering);
pq->record_cap
= settings_get_workspace_cases (value_cnt);
if (pq->record_cap > max_buffers)
pqueue_pop (pq, &c, &id);
case_destroy (&c);
}
- case_ordering_destroy (pq->ordering);
+ subcase_destroy (&pq->ordering);
free (pq->records);
free (pq);
}
const struct pqueue *pq = pq_;
int result = a->id < b->id ? -1 : a->id > b->id;
if (result == 0)
- result = case_ordering_compare_cases (&a->c, &b->c, pq->ordering);
+ result = subcase_compare_3way (&pq->ordering, &a->c,
+ &pq->ordering, &b->c);
if (result == 0)
result = a->idx < b->idx ? -1 : a->idx > b->idx;
return -result;
#include <stddef.h>
#include <stdbool.h>
-struct case_ordering;
+struct subcase;
+struct variable;
extern int min_buffers ;
extern int max_buffers ;
-struct casewriter *sort_create_writer (struct case_ordering *, size_t value_cnt);
-struct casereader *sort_execute (struct casereader *, struct case_ordering *);
+struct casewriter *sort_create_writer (const struct subcase *,
+ size_t value_cnt);
+struct casereader *sort_execute (struct casereader *, const struct subcase *);
+struct casereader *sort_execute_1var (struct casereader *,
+ const struct variable *);
#endif /* math/sort.h */
const union value *v)
{
const struct value_comparator *vc = (const struct value_comparator *) cmptr;
- return 0 == compare_values (v, vc->pattern, cmptr->var);
+ return 0 == value_compare_3way (v, vc->pattern, var_get_width (cmptr->var));
}
goto use_fallback;
gtk_text_view_set_buffer (text_view, text_buffer);
+ gtk_widget_grab_focus (get_widget_assert (message_xml, "close-button"));
gtk_dialog_run ( GTK_DIALOG (message_dialog));
gtk_widget_hide (message_dialog);
<property name="events">GDK_POINTER_MOTION_MASK | GDK_POINTER_MOTION_HINT_MASK | GDK_BUTTON_PRESS_MASK | GDK_BUTTON_RELEASE_MASK</property>
<property name="layout_style">GTK_BUTTONBOX_END</property>
<child>
- <widget class="GtkButton" id="button1">
+ <widget class="GtkButton" id="close-button">
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">True</property>
gtk_widget_set_sensitive (dialog->change_button, FALSE);
repopulate_dialog (dialog);
+ gtk_widget_grab_focus (dialog->value_entry);
return FALSE;
}
gtk_widget_set_sensitive (dialog->add_button, FALSE);
repopulate_dialog (dialog);
+ gtk_widget_grab_focus (dialog->value_entry);
return FALSE;
}
val_labs_remove (dialog->labs, vl->value);
repopulate_dialog (dialog);
+ gtk_widget_grab_focus (dialog->value_entry);
gtk_widget_set_sensitive (dialog->remove_button, FALSE);
gtk_widget_set_sensitive (dialog->change_button, FALSE);
gtk_widget_set_sensitive (dialog->add_button, FALSE);
+ gtk_widget_grab_focus (dialog->value_entry);
+
repopulate_dialog (dialog);
gtk_widget_show (dialog->window);
}
TESTS_ENVIRONMENT += LC_ALL=C
dist_TESTS = \
+ tests/command/add-files.sh \
tests/command/aggregate.sh \
tests/command/attributes.sh \
tests/command/autorecod.sh \
tests/command/t-test-pairs.sh \
tests/command/trimmed-mean.sh \
tests/command/tabs.sh \
+ tests/command/update.sh \
tests/command/use.sh \
tests/command/variable-display.sh \
tests/command/vector.sh \
--- /dev/null
+#!/bin/sh
+
+# This program tests the ADD FILES procedure
+
+TEMPDIR=/tmp/pspp-tst-$$
+TESTFILE=$TEMPDIR/add-files.pspp
+
+
+# ensure that top_builddir are absolute
+if [ -z "$top_builddir" ] ; then top_builddir=. ; fi
+if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi
+top_builddir=`cd $top_builddir; pwd`
+PSPP=$top_builddir/src/ui/terminal/pspp
+
+# ensure that top_srcdir is absolute
+top_srcdir=`cd $top_srcdir; pwd`
+
+
+STAT_CONFIG_PATH=$top_srcdir/config
+export STAT_CONFIG_PATH
+
+cleanup()
+{
+ if [ x"$PSPP_TEST_NO_CLEANUP" != x ] ; then
+ echo "NOT cleaning $TEMPDIR"
+ return ;
+ fi
+ cd /
+ rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+ echo $activity
+ echo FAILED
+ cleanup;
+ exit 1;
+}
+
+
+no_result()
+{
+ echo $activity
+ echo NO RESULT;
+ cleanup;
+ exit 2;
+}
+
+pass()
+{
+ cleanup;
+ exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+activity="data create"
+cat > a.data <<EOF
+1aB
+8aM
+3aE
+5aG
+0aA
+5aH
+6aI
+7aJ
+2aD
+7aK
+1aC
+7aL
+4aF
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+cat > b.data <<EOF
+1bN
+3bO
+4bP
+6bQ
+7bR
+9bS
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+cat > concatenate.out <<EOF
+A B C D INA INB
+- - - - --- ---
+1 a B 1 0
+8 a M 1 0
+3 a E 1 0
+5 a G 1 0
+0 a A 1 0
+5 a H 1 0
+6 a I 1 0
+7 a J 1 0
+2 a D 1 0
+7 a K 1 0
+1 a C 1 0
+7 a L 1 0
+4 a F 1 0
+1 b N 0 1
+3 b O 0 1
+4 b P 0 1
+6 b Q 0 1
+7 b R 0 1
+9 b S 0 1
+EOF
+
+cat > interleave.out <<EOF
+A B C D INA INB FIRST LAST
+- - - - --- --- ----- ----
+0 a A 1 0 1 1
+1 a B 1 0 1 0
+1 a C 1 0 0 0
+1 b N 0 1 0 1
+2 a D 1 0 1 1
+3 a E 1 0 1 0
+3 b O 0 1 0 1
+4 a F 1 0 1 0
+4 b P 0 1 0 1
+5 a G 1 0 1 0
+5 a H 1 0 0 1
+6 a I 1 0 1 0
+6 b Q 0 1 0 1
+7 a J 1 0 1 0
+7 a K 1 0 0 0
+7 a L 1 0 0 0
+7 b R 0 1 0 1
+8 a M 1 0 1 1
+9 b S 0 1 1 1
+EOF
+
+# Test ADD FILES.
+dla="data list notable file='a.data' /A B C 1-3 (a)."
+sa="save outfile='a.sys'."
+dlb="data list notable file='b.data' /A B C 1-3 (a)."
+sb="save outfile='b.sys'."
+for type in interleave concatenate; do
+ if test $type = interleave; then
+ by="/by a /first=FIRST /last=LAST"
+ sort="/sort"
+ else
+ by=
+ sort=
+ fi
+ for sources in ss sa as; do
+ name="$type-$sources"
+ activity="create $name.pspp"
+ {
+ if [ $sources = ss ]; then
+ cat <<EOF
+$dla
+$sa
+$dlb
+$sb
+add files file='a.sys' /in=INA $sort
+ /file='b.sys' /in=INB /rename c=D
+ $by.
+EOF
+ elif [ $sources = sa ]; then
+ cat <<EOF
+$dla
+$sa
+$dlb
+add files file='a.sys' /in=INA $sort
+ /file=* /in=INB /rename c=D
+ $by.
+EOF
+ elif [ $sources = as ]; then
+ cat <<EOF
+$dlb
+$sb
+$dla
+add files file=* /in=INA $sort
+ /file='b.sys' /in=INB /rename c=D
+ $by.
+EOF
+ else
+ activity="internal error"
+ no_result
+ fi
+ echo 'list.'
+ } > $name.pspp
+ if [ $? -ne 0 ] ; then no_result ; fi
+
+ activity="run $name.pspp"
+ $SUPERVISOR $PSPP --testing-mode $name.pspp
+ if [ $? -ne 0 ] ; then no_result ; fi
+
+ activity="check $name output"
+ perl -pi -e 's/^\s*$//g' pspp.list
+ perl -pi -e 's/^\s*$//g' $type.out
+ diff -u -b -w pspp.list $type.out
+ if [ $? -ne 0 ] ; then fail ; fi
+ done
+done
+
+pass;
activity="data create"
cat > a.data <<EOF
-0aA
1aB
-1aC
-2aD
+8aM
3aE
-4aF
5aG
+0aA
5aH
6aI
7aJ
+2aD
7aK
+1aC
7aL
-8aM
+4aF
EOF
if [ $? -ne 0 ] ; then no_result ; fi
cat > b.data <<EOF
$sa
$dlb
$sb
-match files $type1='a.sys' /in=INA /$type2='b.sys' /in=INB /rename c=D /by a
- /first=FIRST /last=LAST.
+match files $type1='a.sys' /in=INA /sort
+ /$type2='b.sys' /in=INB /rename c=D
+ /by a /first=FIRST /last=LAST.
EOF
elif [ $sources = sa ]; then
cat <<EOF
$dla
$sa
$dlb
-match files $type1='a.sys' /in=INA /$type2=* /in=INB /rename c=D /by a
- /first=FIRST /last=LAST.
+match files $type1='a.sys' /in=INA /sort
+ /$type2=* /in=INB /rename c=D
+ /by a /first=FIRST /last=LAST.
EOF
elif [ $sources = as ]; then
cat <<EOF
$dlb
$sb
$dla
-match files $type1=* /in=INA /$type2='b.sys' /in=INB /rename c=D /by a
- /first=FIRST /last=LAST.
+match files $type1=* /in=INA /sort
+ /$type2='b.sys' /in=INB /rename c=D
+ /by a /first=FIRST /last=LAST.
EOF
else
activity="internal error"
diff -b -w - pspp.list <<EOF
A B C D E F
- - - - - -
-0 a A 1 b N
-1 a B 3 b O
-1 a C 4 b P
-2 a D 6 b Q
-3 a E 7 b R
-4 a F 9 b S
-5 a G
-5 a H
+1 a B 1 b N
+8 a M 3 b O
+3 a E 4 b P
+5 a G 6 b Q
+0 a A 7 b R
+5 a H 9 b S
6 a I
7 a J
+2 a D
7 a K
+1 a C
7 a L
-8 a M
+4 a F
EOF
if [ $? -ne 0 ] ; then fail ; fi
--- /dev/null
+#!/bin/sh
+
+# This program tests the UPDATE procedure
+
+TEMPDIR=/tmp/pspp-tst-$$
+TESTFILE=$TEMPDIR/update.pspp
+
+
+# ensure that top_builddir are absolute
+if [ -z "$top_builddir" ] ; then top_builddir=. ; fi
+if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi
+top_builddir=`cd $top_builddir; pwd`
+PSPP=$top_builddir/src/ui/terminal/pspp
+
+# ensure that top_srcdir is absolute
+top_srcdir=`cd $top_srcdir; pwd`
+
+
+STAT_CONFIG_PATH=$top_srcdir/config
+export STAT_CONFIG_PATH
+
+cleanup()
+{
+ if [ x"$PSPP_TEST_NO_CLEANUP" != x ] ; then
+ echo "NOT cleaning $TEMPDIR"
+ return ;
+ fi
+ cd /
+ rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+ echo $activity
+ echo FAILED
+ cleanup;
+ exit 1;
+}
+
+
+no_result()
+{
+ echo $activity
+ echo NO RESULT;
+ cleanup;
+ exit 2;
+}
+
+pass()
+{
+ cleanup;
+ exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+activity="data create"
+cat > a.data <<EOF
+1aB
+8aM
+3aE
+5aG
+0aA
+5aH
+6aI
+7aJ
+2aD
+7aK
+1aC
+7aL
+4aF
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+cat > b.data <<EOF
+1bN
+3bO
+4bP
+6bQ
+7bR
+9bS
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+cat > update.out <<EOF
+A B C D INA INB
+- - - - --- ---
+0 a A 1 0
+1 b B N 1 1
+1 a C 1 0
+2 a D 1 0
+3 b E O 1 1
+4 b F P 1 1
+5 a G 1 0
+5 a H 1 0
+6 b I Q 1 1
+7 b J R 1 1
+7 a K 1 0
+7 a L 1 0
+8 a M 1 0
+9 b S 0 1
+EOF
+perl -pi -e 's/^\s*$//g' update.out
+
+# Test UPDATE.
+dla="data list notable file='a.data' /A B C 1-3 (a)."
+sa="save outfile='a.sys'."
+dlb="data list notable file='b.data' /A B C 1-3 (a)."
+sb="save outfile='b.sys'."
+for sources in ss sa as; do
+ name="$sources"
+ activity="create $name.pspp"
+ {
+ if [ $sources = ss ]; then
+ cat <<EOF
+set errors=terminal.
+$dla
+$sa
+$dlb
+$sb
+update file='a.sys' /in=INA /sort
+ /file='b.sys' /in=INB /rename c=D
+ /by a.
+EOF
+ elif [ $sources = sa ]; then
+ cat <<EOF
+set errors=terminal.
+$dla
+$sa
+$dlb
+
+update file='a.sys' /in=INA /sort
+ /file=* /in=INB /rename c=D
+ /by a.
+EOF
+ elif [ $sources = as ]; then
+ cat <<EOF
+set errors=terminal.
+$dlb
+$sb
+$dla
+
+update file=* /in=INA /sort
+ /file='b.sys' /in=INB /rename c=D
+ /by a.
+EOF
+ else
+ activity="internal error"
+ no_result
+ fi
+ echo 'list.'
+ } > $name.pspp
+ if [ $? -ne 0 ] ; then no_result ; fi
+
+ activity="run $name.pspp"
+ rm -f errors
+ $SUPERVISOR $PSPP --testing-mode --error-file=errors $name.pspp
+ if [ $? -ne 0 ] ; then no_result ; fi
+
+ activity="check $name output"
+ perl -pi -e 's/^\s*$//g' pspp.list
+ diff -c -b -w pspp.list update.out
+ if [ $? -ne 0 ] ; then fail ; fi
+ diff -c -b -w - errors <<EOF
+$name.pspp:8: warning: UPDATE: Encountered 3 sets of duplicate cases in the master file.
+EOF
+ if [ $? -ne 0 ] ; then fail ; fi
+done
+
+pass;