From 5862de297bb487c81660beb3796d5c612eeb12b7 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 19 Dec 2006 14:21:52 +0000 Subject: [PATCH] Make it possible to pull cases from the active file with a function call, instead of requiring indirection through a callback function. See patch #5641. --- doc/data-io.texi | 103 -- src/data/ChangeLog | 34 + src/data/case-source.c | 12 +- src/data/case-source.h | 21 +- src/data/procedure.c | 248 ++-- src/data/procedure.h | 6 +- src/data/storage-stream.c | 135 +- src/data/transformations.c | 10 +- src/data/transformations.h | 6 +- src/language/ChangeLog | 8 + src/language/command.def | 2 +- src/language/data-io/ChangeLog | 33 + src/language/data-io/automake.mk | 1 - src/language/data-io/data-list.c | 35 +- src/language/data-io/get.c | 435 +++--- src/language/data-io/inpt-pgm.c | 76 +- src/language/data-io/matrix-data.c | 1990 ---------------------------- src/language/stats/ChangeLog | 21 + src/language/stats/aggregate.c | 64 +- src/language/stats/autorecode.c | 57 +- src/language/stats/flip.c | 74 +- 21 files changed, 673 insertions(+), 2698 deletions(-) delete mode 100644 src/language/data-io/matrix-data.c diff --git a/doc/data-io.texi b/doc/data-io.texi index b5e6ca22..8f727e50 100644 --- a/doc/data-io.texi +++ b/doc/data-io.texi @@ -30,7 +30,6 @@ actually be read until a procedure is executed. * FILE HANDLE:: Support for special file formats. * INPUT PROGRAM:: Support for complex input programs. * LIST:: List cases in the active file. -* MATRIX DATA:: Read matrices in text format. * NEW FILE:: Clear the active file and dictionary. * PRINT:: Display values in print formats. * PRINT EJECT:: Eject the current page then print. @@ -695,108 +694,6 @@ cannot fit on a single line, then a multi-line format will be used. @cmd{LIST} is a procedure. It causes the data to be read. -@node MATRIX DATA -@section MATRIX DATA -@vindex MATRIX DATA - -@display -MATRIX DATA - /VARIABLES=var_list - /FILE='file-name' - /FORMAT=@{LIST,FREE@} @{LOWER,UPPER,FULL@} @{DIAGONAL,NODIAGONAL@} - /SPLIT=@{new_var,var_list@} - /FACTORS=var_list - /CELLS=n_cells - /N=n - /CONTENTS=@{N_VECTOR,N_SCALAR,N_MATRIX,MEAN,STDDEV,COUNT,MSE, - DFE,MAT,COV,CORR,PROX@} -@end display - -@cmd{MATRIX DATA} command reads square matrices in one of several textual -formats. @cmd{MATRIX DATA} clears the dictionary and replaces it and -reads a -data file. - -Use VARIABLES to specify the variables that form the rows and columns of -the matrices. You may not specify a variable named @code{VARNAME_}. You -should specify VARIABLES first. - -Specify the file to read on FILE, either as a file name string or a file -handle (@pxref{File Handles}). If FILE is not specified then matrix data -must immediately follow @cmd{MATRIX DATA} with a @cmd{BEGIN -DATA}@dots{}@cmd{END DATA} -construct (@pxref{BEGIN DATA}). - -The FORMAT subcommand specifies how the matrices are formatted. LIST, -the default, indicates that there is one line per row of matrix data; -FREE allows single matrix rows to be broken across multiple lines. This -is analogous to the difference between @cmd{DATA LIST FREE} and -@cmd{DATA LIST LIST} -(@pxref{DATA LIST}). LOWER, the default, indicates that the lower -triangle of the matrix is given; UPPER indicates the upper triangle; and -FULL indicates that the entire matrix is given. DIAGONAL, the default, -indicates that the diagonal is part of the data; NODIAGONAL indicates -that it is omitted. DIAGONAL/NODIAGONAL have no effect when FULL is -specified. - -The SPLIT subcommand is used to specify @cmd{SPLIT FILE} variables for the -input matrices (@pxref{SPLIT FILE}). Specify either a single variable -not specified on VARIABLES, or one or more variables that are specified -on VARIABLES. In the former case, the SPLIT values are not present in -the data and ROWTYPE_ may not be specified on VARIABLES. In the latter -case, the SPLIT values are present in the data. - -Specify a list of factor variables on FACTORS. Factor variables must -also be listed on VARIABLES. Factor variables are used when there are -some variables where, for each possible combination of their values, -statistics on the matrix variables are included in the data. - -If FACTORS is specified and ROWTYPE_ is not specified on VARIABLES, the -CELLS subcommand is required. Specify the number of factor variable -combinations that are given. For instance, if factor variable A has 2 -values and factor variable B has 3 values, specify 6. - -The N subcommand specifies a population number of observations. When N -is specified, one N record is output for each @cmd{SPLIT FILE}. - -Use CONTENTS to specify what sort of information the matrices include. -Each possible option is described in more detail below. When ROWTYPE_ -is specified on VARIABLES, CONTENTS is optional; otherwise, if CONTENTS -is not specified then /CONTENTS=CORR is assumed. - -@table @asis -@item N -@item N_VECTOR -Number of observations as a vector, one value for each variable. -@item N_SCALAR -Number of observations as a single value. -@item N_MATRIX -Matrix of counts. -@item MEAN -Vector of means. -@item STDDEV -Vector of standard deviations. -@item COUNT -Vector of counts. -@item MSE -Vector of mean squared errors. -@item DFE -Vector of degrees of freedom. -@item MAT -Generic matrix. -@item COV -Covariance matrix. -@item CORR -Correlation matrix. -@item PROX -Proximities matrix. -@end table - -The exact semantics of the matrices read by @cmd{MATRIX DATA} are complex. -Right now @cmd{MATRIX DATA} isn't too useful due to a lack of procedures -accepting or producing related data, so these semantics aren't -documented. Later, they'll be described here in detail. - @node NEW FILE @section NEW FILE @vindex NEW FILE diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 470e8969..d324da4e 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,37 @@ +Sat Dec 16 22:05:18 2006 Ben Pfaff + + Make it possible to pull cases from the active file with a + function call, instead of requiring indirection through a callback + function. + + * case-source.h (struct case_source_class): Change ->read function + to return a single case, instead of calling a callback function + for each case. Change ->destroy function to return an error + status. + + * case-source.c (free_case_source): Pass along the value returned + by the case_source ->destroy function. + + * procedure.c (struct write_case_data): Removed. + (struct dataset): Added some members to track procedure state. + (procedure): Optimize the trivial case at this level. + (internal_procedure): Re-implement in terms of proc_open, + proc_read, proc_close. + (proc_open) New function. + (proc_read) New function. + (proc_close) New function. + (write_case) Moved into proc_read. + (close_active_file) Moved closing of data source into proc_close. + + * storage-source.c: Rewrote to conform with modified + case_source_class interface. + + * transformations.c (trns_chain_execute): Added argument to allow + starting execution from an arbitrary transformation. Updated + callers. + + * transformations.h (enum TRNS_NEXT_CASE) Renamed TRNS_END_CASE. + Sat Dec 16 14:09:25 2006 Ben Pfaff * sys-file-reader.c (read_display_parameters): Don't assume that diff --git a/src/data/case-source.c b/src/data/case-source.c index 90d709b1..542f3008 100644 --- a/src/data/case-source.c +++ b/src/data/case-source.c @@ -36,17 +36,21 @@ create_case_source (const struct case_source_class *class, return source; } -/* Destroys case source SOURCE. It is the caller's responsible to - call the source's destroy function, if any. */ -void +/* Destroys case source SOURCE. + Returns true if successful, + false if the source encountered an I/O error during + destruction or reading cases. */ +bool free_case_source (struct case_source *source) { + bool ok = true; if (source != NULL) { if (source->class->destroy != NULL) - source->class->destroy (source); + ok = source->class->destroy (source); free (source); } + return ok; } /* Returns true if CLASS is the class of SOURCE. */ diff --git a/src/data/case-source.h b/src/data/case-source.h index 0a30d298..833502cd 100644 --- a/src/data/case-source.h +++ b/src/data/case-source.h @@ -23,9 +23,6 @@ struct ccase; -typedef struct write_case_data *write_case_data; -typedef bool write_case_func (write_case_data); - /* A case source. */ struct case_source { @@ -42,21 +39,21 @@ struct case_source_class WRITE_CASE, if known, or -1 otherwise. */ int (*count) (const struct case_source *); - /* Reads the cases one by one into C and for each one calls - WRITE_CASE passing the given AUX data. - Returns true if successful, false if an I/O error occurred. */ - bool (*read) (struct case_source *, - struct ccase *c, - write_case_func *write_case, write_case_data aux); + /* Reads one case into C. + Returns true if successful, false at end of file or if an + I/O error occurred. */ + bool (*read) (struct case_source *, struct ccase *); - /* Destroys the source. */ - void (*destroy) (struct case_source *); + /* Destroys the source. + Returns true if successful read, false if an I/O occurred + during destruction or previously. */ + bool (*destroy) (struct case_source *); }; struct case_source *create_case_source (const struct case_source_class *, void *); -void free_case_source (struct case_source *); +bool free_case_source (struct case_source *); bool case_source_is_class (const struct case_source *, const struct case_source_class *); diff --git a/src/data/procedure.c b/src/data/procedure.c index 611eead0..baba8fa7 100644 --- a/src/data/procedure.c +++ b/src/data/procedure.c @@ -38,20 +38,6 @@ #include #include -/* Procedure execution data. */ -struct write_case_data - { - /* Function to call for each case. */ - case_func *proc; - void *aux; - - struct dataset *dataset; /* The dataset concerned */ - struct ccase trns_case; /* Case used for transformations. */ - struct ccase sink_case; /* Case written to sink, if - compacting is necessary. */ - size_t cases_written; /* Cases output so far. */ - }; - struct dataset { /* Cases are read from proc_source, pass through permanent_trns_chain (which transforms them into @@ -84,6 +70,13 @@ struct dataset { int lag_head; /* Index where next case will be added. */ struct ccase *lag_queue; /* Array of n_lag ccase * elements. */ + /* Procedure data. */ + bool is_open; /* Procedure open? */ + struct ccase trns_case; /* Case used for transformations. */ + struct ccase sink_case; /* Case written to sink, if + compacting is necessary. */ + size_t cases_written; /* Cases output so far. */ + bool ok; }; /* struct dataset */ @@ -96,7 +89,6 @@ static bool internal_procedure (struct dataset *ds, case_func *, static void update_last_proc_invocation (struct dataset *ds); static void create_trns_case (struct ccase *, struct dictionary *); static void open_active_file (struct dataset *ds); -static bool write_case (struct write_case_data *wc_data); static void lag_case (struct dataset *ds, const struct ccase *c); static void clear_case (const struct dataset *ds, struct ccase *c); static bool close_active_file (struct dataset *ds); @@ -134,6 +126,23 @@ time_of_last_procedure (struct dataset *ds) bool procedure (struct dataset *ds, case_func *cf, void *aux) { + update_last_proc_invocation (ds); + + /* Optimize the trivial case where we're not going to do + anything with the data, by not reading the data at all. */ + if (cf == NULL + && case_source_is_class (ds->proc_source, &storage_source_class) + && ds->proc_sink == NULL + && (ds->temporary_trns_chain == NULL + || trns_chain_is_empty (ds->temporary_trns_chain)) + && trns_chain_is_empty (ds->permanent_trns_chain)) + { + ds->n_lag = 0; + dict_set_case_limit (ds->dict, 0); + dict_clear_vectors (ds->dict); + return true; + } + return internal_procedure (ds, cf, NULL, aux); } @@ -187,7 +196,6 @@ multipass_procedure (struct dataset *ds, casefile_func *proc_func, void *aux) /* Procedure implementation. */ - /* Executes a procedure. Passes each case to CASE_FUNC. Calls END_FUNC after the last case. @@ -198,50 +206,133 @@ internal_procedure (struct dataset *ds, case_func *proc, end_func *end, void *aux) { - struct write_case_data wc_data; + struct ccase *c; bool ok = true; + + proc_open (ds); + while (ok && proc_read (ds, &c)) + if (proc != NULL) + ok = proc (c, aux, ds) && ok; + if (end != NULL) + ok = end (aux, ds) && ok; + return proc_close (ds) && ok; +} +/* Opens dataset DS for reading cases with proc_read. + proc_close must be called when done. */ +void +proc_open (struct dataset *ds) +{ assert (ds->proc_source != NULL); + assert (!ds->is_open); update_last_proc_invocation (ds); - /* Optimize the trivial case where we're not going to do - anything with the data, by not reading the data at all. */ - if (proc == NULL && end == NULL - && case_source_is_class (ds->proc_source, &storage_source_class) - && ds->proc_sink == NULL - && (ds->temporary_trns_chain == NULL - || trns_chain_is_empty (ds->temporary_trns_chain)) - && trns_chain_is_empty (ds->permanent_trns_chain)) + open_active_file (ds); + + ds->is_open = true; + create_trns_case (&ds->trns_case, ds->dict); + case_create (&ds->sink_case, dict_get_compacted_value_cnt (ds->dict)); + ds->cases_written = 0; + ds->ok = true; +} + +/* Reads the next case from dataset DS, which must have been + opened for reading with proc_open. + Returns true if successful, in which case a pointer to the + case is stored in *C. + Return false at end of file or if a read error occurs. In + this case a null pointer is stored in *C. */ +bool +proc_read (struct dataset *ds, struct ccase **c) +{ + enum trns_result retval = TRNS_DROP_CASE; + + assert (ds->is_open); + *c = NULL; + for (;;) { - ds->n_lag = 0; - dict_set_case_limit (ds->dict, 0); - dict_clear_vectors (ds->dict); - return true; - } + size_t case_nr; + + assert (retval == TRNS_DROP_CASE || retval == TRNS_ERROR); + if (retval == TRNS_ERROR) + ds->ok = false; + if (!ds->ok) + return false; + + /* Read a case from proc_source. */ + clear_case (ds, &ds->trns_case); + if (!ds->proc_source->class->read (ds->proc_source, &ds->trns_case)) + return false; + + /* Execute permanent transformations. */ + case_nr = ds->cases_written + 1; + retval = trns_chain_execute (ds->permanent_trns_chain, TRNS_CONTINUE, + &ds->trns_case, &case_nr); + if (retval != TRNS_CONTINUE) + continue; - open_active_file (ds); + /* Write case to LAG queue. */ + if (ds->n_lag) + lag_case (ds, &ds->trns_case); + + /* Write case to replacement active file. */ + ds->cases_written++; + if (ds->proc_sink->class->write != NULL) + { + if (ds->compactor != NULL) + { + dict_compactor_compact (ds->compactor, &ds->sink_case, + &ds->trns_case); + ds->proc_sink->class->write (ds->proc_sink, &ds->sink_case); + } + else + ds->proc_sink->class->write (ds->proc_sink, &ds->trns_case); + } - wc_data.proc = proc; - wc_data.aux = aux; - wc_data.dataset = ds; - create_trns_case (&wc_data.trns_case, ds->dict); - case_create (&wc_data.sink_case, - dict_get_compacted_value_cnt (ds->dict)); - wc_data.cases_written = 0; - - ok = ds->proc_source->class->read (ds->proc_source, - &wc_data.trns_case, - write_case, &wc_data) && ok; - if (end != NULL) - ok = end (aux, ds) && ok; + /* Execute temporary transformations. */ + if (ds->temporary_trns_chain != NULL) + { + retval = trns_chain_execute (ds->temporary_trns_chain, TRNS_CONTINUE, + &ds->trns_case, &ds->cases_written); + if (retval != TRNS_CONTINUE) + continue; + } - case_destroy (&wc_data.sink_case); - case_destroy (&wc_data.trns_case); + *c = &ds->trns_case; + return true; + } +} - ok = close_active_file (ds) && ok; +/* Closes dataset DS for reading. + Returns true if successful, false if an I/O error occurred + while reading or closing the data set. + If DS has not been opened, returns true without doing + anything else. */ +bool +proc_close (struct dataset *ds) +{ + if (!ds->is_open) + return true; - return ok; + /* Drain any remaining cases. */ + while (ds->ok) + { + struct ccase *c; + if (!proc_read (ds, &c)) + break; + } + + ds->ok = free_case_source (ds->proc_source) && ds->ok; + ds->proc_source = NULL; + + case_destroy (&ds->sink_case); + case_destroy (&ds->trns_case); + + ds->ok = close_active_file (ds) && ds->ok; + ds->is_open = false; + + return ds->ok; } /* Updates last_proc_invocation. */ @@ -314,63 +405,6 @@ open_active_file (struct dataset *ds) } } -/* Transforms trns_case and writes it to the replacement active - file if advisable. Returns true if more cases can be - accepted, false otherwise. Do not call this function again - after it has returned false once. */ -static bool -write_case (struct write_case_data *wc_data) -{ - enum trns_result retval; - size_t case_nr; - - struct dataset *ds = wc_data->dataset; - - /* Execute permanent transformations. */ - case_nr = wc_data->cases_written + 1; - retval = trns_chain_execute (ds->permanent_trns_chain, - &wc_data->trns_case, &case_nr); - if (retval != TRNS_CONTINUE) - goto done; - - /* Write case to LAG queue. */ - if (ds->n_lag) - lag_case (ds, &wc_data->trns_case); - - /* Write case to replacement active file. */ - wc_data->cases_written++; - if (ds->proc_sink->class->write != NULL) - { - if (ds->compactor != NULL) - { - dict_compactor_compact (ds->compactor, &wc_data->sink_case, - &wc_data->trns_case); - ds->proc_sink->class->write (ds->proc_sink, &wc_data->sink_case); - } - else - ds->proc_sink->class->write (ds->proc_sink, &wc_data->trns_case); - } - - /* Execute temporary transformations. */ - if (ds->temporary_trns_chain != NULL) - { - retval = trns_chain_execute (ds->temporary_trns_chain, - &wc_data->trns_case, - &wc_data->cases_written); - if (retval != TRNS_CONTINUE) - goto done; - } - - /* Pass case to procedure. */ - if (wc_data->proc != NULL) - if (!wc_data->proc (&wc_data->trns_case, wc_data->aux, ds)) - retval = TRNS_ERROR; - - done: - clear_case (ds, &wc_data->trns_case); - return retval != TRNS_ERROR; -} - /* Add C to the lag queue. */ static void lag_case (struct dataset *ds, const struct ccase *c) @@ -430,10 +464,6 @@ close_active_file (struct dataset *ds) ds->compactor = NULL; } - /* Free data source. */ - free_case_source (ds->proc_source); - ds->proc_source = NULL; - /* Old data sink becomes new data source. */ if (ds->proc_sink->class->make_source != NULL) ds->proc_source = ds->proc_sink->class->make_source (ds->proc_sink); diff --git a/src/data/procedure.h b/src/data/procedure.h index d426c8e4..3b501df9 100644 --- a/src/data/procedure.h +++ b/src/data/procedure.h @@ -92,9 +92,11 @@ bool multipass_procedure_with_splits (struct dataset *ds, void *aux) WARN_UNUSED_RESULT; - - time_t time_of_last_procedure (struct dataset *ds); + +void proc_open (struct dataset *); +bool proc_read (struct dataset *, struct ccase **); +bool proc_close (struct dataset *); struct ccase *lagged_case (const struct dataset *ds, int n_before); diff --git a/src/data/storage-stream.c b/src/data/storage-stream.c index 469d668b..a003e7be 100644 --- a/src/data/storage-stream.c +++ b/src/data/storage-stream.c @@ -31,42 +31,37 @@ #include "xalloc.h" -/* Information about storage sink or source. */ -struct storage_stream_info +/* Storage sink. */ + +/* Information about storage sink. */ +struct storage_sink_info { struct casefile *casefile; /* Storage. */ }; - -/* Storage sink. */ + +static struct storage_sink_info * +get_storage_sink_info (struct case_sink *sink) +{ + assert (sink->class == &storage_sink_class); + return sink->aux; +} /* Initializes a storage sink. */ static void storage_sink_open (struct case_sink *sink) { - struct storage_stream_info *info; + struct storage_sink_info *info; sink->aux = info = xmalloc (sizeof *info); info->casefile = fastfile_create (sink->value_cnt); } -/* Destroys storage stream represented by INFO. */ -static void -destroy_storage_stream_info (struct storage_stream_info *info) -{ - if (info != NULL) - { - casefile_destroy (info->casefile); - free (info); - } -} - /* Writes case C to the storage sink SINK. Returns true if successful, false if an I/O error occurred. */ static bool storage_sink_write (struct case_sink *sink, const struct ccase *c) { - struct storage_stream_info *info = sink->aux; - + struct storage_sink_info *info = get_storage_sink_info (sink); return casefile_append (info->casefile, c); } @@ -74,7 +69,9 @@ storage_sink_write (struct case_sink *sink, const struct ccase *c) static void storage_sink_destroy (struct case_sink *sink) { - destroy_storage_stream_info (sink->aux); + struct storage_sink_info *info = get_storage_sink_info (sink); + casefile_destroy (info->casefile); + free (info); } /* Closes the sink and returns a storage source to read back the @@ -82,9 +79,9 @@ storage_sink_destroy (struct case_sink *sink) static struct case_source * storage_sink_make_source (struct case_sink *sink) { - struct case_source *source - = create_case_source (&storage_source_class, sink->aux); - sink->aux = NULL; + struct storage_sink_info *info = get_storage_sink_info (sink); + struct case_source *source = storage_source_create (info->casefile); + info->casefile = NULL; return source; } @@ -100,65 +97,73 @@ const struct case_sink_class storage_sink_class = /* Storage source. */ +struct storage_source_info + { + struct casefile *casefile; /* Storage. */ + struct casereader *reader; /* Reader. */ + }; + +static struct storage_source_info * +get_storage_source_info (const struct case_source *source) +{ + assert (source->class == &storage_source_class); + return source->aux; +} + /* Returns the number of cases that will be read by storage_source_read(). */ static int storage_source_count (const struct case_source *source) { - struct storage_stream_info *info = source->aux; - + struct storage_source_info *info = get_storage_source_info (source); return casefile_get_case_cnt (info->casefile); } -/* Reads all cases from the storage source and passes them one by one to - write_case(). */ +/* Reads one case into OUTPUT_CASE. + Returns true if successful, false at end of file or if an + I/O error occurred. */ static bool -storage_source_read (struct case_source *source, - struct ccase *output_case, - write_case_func *write_case, write_case_data wc_data) +storage_source_read (struct case_source *source, struct ccase *output_case) { - struct storage_stream_info *info = source->aux; + struct storage_source_info *info = get_storage_source_info (source); struct ccase casefile_case; - struct casereader *reader; - bool ok = true; - for (reader = casefile_get_reader (info->casefile, NULL); - ok && casereader_read (reader, &casefile_case); - case_destroy (&casefile_case)) + if (info->reader == NULL) + info->reader = casefile_get_reader (info->casefile, NULL); + + if (casereader_read (info->reader, &casefile_case)) { case_copy (output_case, 0, &casefile_case, 0, casefile_get_value_cnt (info->casefile)); - ok = write_case (wc_data); + return true; } - casereader_destroy (reader); - - return ok; + else + return false; } -/* Destroys the source's internal data. */ -static void +/* Destroys the source. + Returns true if successful read, false if an I/O occurred + during destruction or previously. */ +static bool storage_source_destroy (struct case_source *source) { - destroy_storage_stream_info (source->aux); + struct storage_source_info *info = get_storage_source_info (source); + bool ok = true; + if (info->casefile) + { + ok = !casefile_error (info->casefile); + casefile_destroy (info->casefile); + } + free (info); + return ok; } -/* Storage source. */ -const struct case_source_class storage_source_class = - { - "storage", - storage_source_count, - storage_source_read, - storage_source_destroy, - }; - /* Returns the casefile encapsulated by SOURCE. */ struct casefile * storage_source_get_casefile (struct case_source *source) { - struct storage_stream_info *info = source->aux; - - assert (source->class == &storage_source_class); + struct storage_source_info *info = get_storage_source_info (source); return info->casefile; } @@ -167,25 +172,33 @@ storage_source_get_casefile (struct case_source *source) struct casefile * storage_source_decapsulate (struct case_source *source) { - struct storage_stream_info *info = source->aux; - struct casefile *casefile; - - assert (source->class == &storage_source_class); - casefile = info->casefile; + struct storage_source_info *info = get_storage_source_info (source); + struct casefile *casefile = info->casefile; + assert (info->reader == NULL); info->casefile = NULL; free_case_source (source); return casefile; } -/* Creates and returns a new storage stream that encapsulates +/* Creates and returns a new storage source that encapsulates CASEFILE. */ struct case_source * storage_source_create (struct casefile *casefile) { - struct storage_stream_info *info; + struct storage_source_info *info; info = xmalloc (sizeof *info); info->casefile = casefile; + info->reader = NULL; return create_case_source (&storage_source_class, info); } + +/* Storage source. */ +const struct case_source_class storage_source_class = + { + "storage", + storage_source_count, + storage_source_read, + storage_source_destroy, + }; diff --git a/src/data/transformations.c b/src/data/transformations.c index b5a6c9d8..f77c24f8 100644 --- a/src/data/transformations.c +++ b/src/data/transformations.c @@ -188,13 +188,13 @@ trns_chain_next (struct trns_chain *chain) terminate, or TRNS_CONTINUE if the transformations finished due to "falling off the end" of the set of transformations. */ enum trns_result -trns_chain_execute (struct trns_chain *chain, struct ccase *c, - const size_t *case_nr) +trns_chain_execute (struct trns_chain *chain, enum trns_result start, + struct ccase *c, const size_t *case_nr) { size_t i; assert (chain->finalized); - for (i = 0; i < chain->trns_cnt; ) + for (i = start < 0 ? 0 : start; i < chain->trns_cnt; ) { struct transformation *trns = &chain->trns[i]; int retval = trns->execute (trns->aux, c, *case_nr); @@ -202,8 +202,8 @@ trns_chain_execute (struct trns_chain *chain, struct ccase *c, i++; else if (retval >= 0) i = retval + trns->idx_ofs; - else - return retval; + else + return retval == TRNS_END_CASE ? i + 1 : retval; } return TRNS_CONTINUE; diff --git a/src/data/transformations.h b/src/data/transformations.h index f9efb24a..866a2fc4 100644 --- a/src/data/transformations.h +++ b/src/data/transformations.h @@ -30,7 +30,7 @@ enum trns_result TRNS_CONTINUE = -1, /* Continue to next transformation. */ TRNS_DROP_CASE = -2, /* Drop this case. */ TRNS_ERROR = -3, /* A serious error, so stop the procedure. */ - TRNS_NEXT_CASE = -4, /* Skip to next case. INPUT PROGRAM only. */ + TRNS_END_CASE = -4, /* Skip to next case. INPUT PROGRAM only. */ TRNS_END_FILE = -5 /* End of input. INPUT PROGRAM only. */ }; @@ -50,8 +50,8 @@ bool trns_chain_is_empty (const struct trns_chain *); void trns_chain_append (struct trns_chain *, trns_finalize_func *, trns_proc_func *, trns_free_func *, void *); size_t trns_chain_next (struct trns_chain *); -enum trns_result trns_chain_execute (struct trns_chain *, struct ccase *, - const size_t *case_nr); +enum trns_result trns_chain_execute (struct trns_chain *, enum trns_result, + struct ccase *, const size_t *case_nr); void trns_chain_splice (struct trns_chain *, struct trns_chain *); diff --git a/src/language/ChangeLog b/src/language/ChangeLog index d3f3d6d8..215740fa 100644 --- a/src/language/ChangeLog +++ b/src/language/ChangeLog @@ -1,3 +1,11 @@ +Sat Dec 16 22:15:55 2006 Ben Pfaff + + Make it possible to pull cases from the active file with a + function call, instead of requiring indirection through a callback + function. + + * command.def: Marked MATRIX DATA as unimplemented. + Sun Dec 3 11:59:10 2006 Ben Pfaff * syntax-file.c (read_syntax_file): Always read GETL_BATCH lines. diff --git a/src/language/command.def b/src/language/command.def index 87fb917a..732d8f2a 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -44,7 +44,6 @@ DEF_CMD (S_INITIAL | S_DATA | S_INPUT_PROGRAM | S_FILE_TYPE, 0, "DATA LIST", cmd DEF_CMD (S_INITIAL | S_DATA, 0, "GET", cmd_get) DEF_CMD (S_INITIAL | S_DATA, 0, "IMPORT", cmd_import) DEF_CMD (S_INITIAL | S_DATA, 0, "INPUT PROGRAM", cmd_input_program) -DEF_CMD (S_INITIAL | S_DATA, 0, "MATRIX DATA", cmd_matrix_data) /* Transformations and utilities that may appear after active file definition or within INPUT PROGRAM. */ @@ -188,6 +187,7 @@ UNIMPL_CMD ("LOGLINEAR", "General model fitting") UNIMPL_CMD ("MANOVA", "Multivariate analysis of variance") UNIMPL_CMD ("MAPS", "Geographical display") UNIMPL_CMD ("MATRIX", "Matrix processing") +UNIMPL_CMD ("MATRIX DATA", "Matrix data input") UNIMPL_CMD ("MCONVERT", "Convert covariance/correlation matrices") UNIMPL_CMD ("MIXED", "Mixed linear models") UNIMPL_CMD ("MODEL CLOSE ", "Close server connection") diff --git a/src/language/data-io/ChangeLog b/src/language/data-io/ChangeLog index 5b49843a..a0a4da02 100644 --- a/src/language/data-io/ChangeLog +++ b/src/language/data-io/ChangeLog @@ -1,3 +1,36 @@ +Sat Dec 16 22:16:18 2006 Ben Pfaff + + Make it possible to pull cases from the active file with a + function call, instead of requiring indirection through a callback + function. + + * automake.mk: Removed matrix-data.c. + + * matrix-data.c: Removed. + + * data-list.c (data_list_source_read): Conform with new + case_source_class interface. + (data_list_source_destroy): Ditto. + + * get.c (case_reader_source_class): Ditto. + (case_reader_source_destroy): Ditto. + (parse_output_proc): Take advantage of new procedure interface. + (output_proc): Removed. + (struct mtf_file): Add "struct ccase *" member to allow use of new + procedure interface. + (cmd_match_files): Take advantage of new procedure interface. + (mtf_processing_finish): Removed. + (mtf_read_nonactive_records): Renamed mtf_read_records. Now reads + from every file, without any exception for the active file. + (mtf_compare_BY_values): Simplify for new interface. + (mtf_processing): Simplify for new interface. + + * inpt-pgm.c (is_valid_state): New function. + (input_program_source_read): Conform with new case_source_class + interface. + (input_program_source_destroy): Ditto. + (end_case_trns_proc): Now just needs to return TRNS_END_CASE. + Sat Dec 9 18:43:34 2006 Ben Pfaff * list.q (cmd_list): Use new var_create, var_destroy functions. diff --git a/src/language/data-io/automake.mk b/src/language/data-io/automake.mk index c31b0275..df51c631 100644 --- a/src/language/data-io/automake.mk +++ b/src/language/data-io/automake.mk @@ -11,7 +11,6 @@ language_data_io_sources = \ src/language/data-io/inpt-pgm.h \ src/language/data-io/print.c \ src/language/data-io/print-space.c \ - src/language/data-io/matrix-data.c \ src/language/data-io/data-reader.c \ src/language/data-io/data-reader.h \ src/language/data-io/data-writer.c \ diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index 00cbb0c9..9594ead0 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -806,13 +806,11 @@ data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED) return retval; } -/* Reads all the records from the data file and passes them to - write_case(). - Returns true if successful, false if an I/O error occurred. */ +/* Reads one case into OUTPUT_CASE. + Returns true if successful, false at end of file or if an + I/O error occurred. */ static bool -data_list_source_read (struct case_source *source, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) +data_list_source_read (struct case_source *source, struct ccase *c) { struct data_list_pgm *dls = source->aux; @@ -826,26 +824,19 @@ data_list_source_read (struct case_source *source, dls->skip_records--; } - for (;;) - { - bool ok; - - if (!read_from_data_list (dls, c)) - return !dfm_reader_error (dls->reader); - - dfm_push (dls->reader); - ok = write_case (wc_data); - dfm_pop (dls->reader); - if (!ok) - return false; - } + return read_from_data_list (dls, c); } -/* Destroys the source's internal data. */ -static void +/* Destroys the source. + Returns true if successful read, false if an I/O occurred + during destruction or previously. */ +static bool data_list_source_destroy (struct case_source *source) { - data_list_trns_free (source->aux); + struct data_list_pgm *dls = source->aux; + bool ok = !dfm_reader_error (dls->reader); + data_list_trns_free (dls); + return ok; } static const struct case_source_class data_list_source_class = diff --git a/src/language/data-io/get.c b/src/language/data-io/get.c index 1cd25519..8b03ec95 100644 --- a/src/language/data-io/get.c +++ b/src/language/data-io/get.c @@ -177,44 +177,33 @@ case_reader_pgm_free (struct case_reader_pgm *pgm) } } -/* Clears internal state related to case reader input procedure. */ -static void -case_reader_source_destroy (struct case_source *source) +/* Reads one case into C. + Returns true if successful, false at end of file or if an + I/O error occurred. */ +static bool +case_reader_source_read (struct case_source *source, struct ccase *c) { struct case_reader_pgm *pgm = source->aux; - case_reader_pgm_free (pgm); + if (any_reader_read (pgm->reader, pgm->map == NULL ? c : &pgm->bounce)) + { + if (pgm->map != NULL) + map_case (pgm->map, &pgm->bounce, c); + return true; + } + else + return false; } -/* Reads all the cases from the data file into C and passes them - to WRITE_CASE one by one, passing WC_DATA. - Returns true if successful, false if an I/O error occurred. */ +/* Destroys the source. + Returns true if successful read, false if an I/O occurred + during destruction or previously. */ static bool -case_reader_source_read (struct case_source *source, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) +case_reader_source_destroy (struct case_source *source) { struct case_reader_pgm *pgm = source->aux; - bool ok = true; - - do - { - bool got_case; - if (pgm->map == NULL) - got_case = any_reader_read (pgm->reader, c); - else - { - got_case = any_reader_read (pgm->reader, &pgm->bounce); - if (got_case) - map_case (pgm->map, &pgm->bounce, c); - } - if (!got_case) - break; - - ok = write_case (wc_data); - } - while (ok); - - return ok && !any_reader_error (pgm->reader); + bool ok = !any_reader_error (pgm->reader); + case_reader_pgm_free (pgm); + return ok; } static const struct case_source_class case_reader_source_class = @@ -470,8 +459,6 @@ case_writer_write_case (struct case_writer *aw, const struct ccase *c) /* SAVE and EXPORT. */ -static bool output_proc (const struct ccase *, void *, const struct dataset *); - /* Parses and performs the SAVE or EXPORT procedure. */ static int parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type) @@ -479,7 +466,8 @@ parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type wri bool retain_unselected; struct variable *saved_filter_variable; struct case_writer *aw; - bool ok; + struct ccase *c; + bool ok = true; aw = parse_write_command (lexer, ds, writer_type, PROC_CMD, &retain_unselected); if (aw == NULL) @@ -488,21 +476,18 @@ parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type wri saved_filter_variable = dict_get_filter (dataset_dict (ds)); if (retain_unselected) dict_set_filter (dataset_dict (ds), NULL); - ok = procedure (ds, output_proc, aw); + + proc_open (ds); + while (ok && proc_read (ds, &c)) + ok = case_writer_write_case (aw, c); + ok = proc_close (ds) && ok; + dict_set_filter (dataset_dict (ds), saved_filter_variable); case_writer_destroy (aw); return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; } -/* Writes case C to file. */ -static bool -output_proc (const struct ccase *c, void *aw_, const struct dataset *ds UNUSED) -{ - struct case_writer *aw = aw_; - return case_writer_write_case (aw, c); -} - int cmd_save (struct lexer *lexer, struct dataset *ds) { @@ -771,7 +756,8 @@ struct mtf_file char *in_name; /* Variable name. */ struct variable *in_var; /* Variable (in master dictionary). */ - struct ccase input; /* Input record. */ + struct ccase input_storage; /* Input record storage. */ + struct ccase *input; /* Input record. */ }; /* MATCH FILES procedure. */ @@ -798,11 +784,10 @@ struct mtf_proc static bool mtf_free (struct mtf_proc *); static bool mtf_close_file (struct mtf_file *); static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *); +static bool mtf_read_records (struct mtf_proc *, struct dataset *); static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **); -static bool mtf_read_nonactive_records (void *); -static bool mtf_processing_finish (void *, const struct dataset *); -static bool mtf_processing (const struct ccase *, void *, const struct dataset *); +static bool mtf_processing (struct mtf_proc *, struct dataset *); static char *var_type_description (struct variable *); @@ -821,8 +806,6 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) bool saw_table = false; bool saw_in = false; - bool ok; - mtf.head = mtf.tail = NULL; mtf.by_cnt = 0; mtf.first[0] = '\0'; @@ -858,7 +841,8 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) file->dict = NULL; file->in_name = NULL; file->in_var = NULL; - case_nullify (&file->input); + case_nullify (&file->input_storage); + file->input = &file->input_storage; /* FILEs go first, then TABLEs. */ if (file->type == MTF_TABLE || first_table == NULL) @@ -923,7 +907,8 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) if (file->reader == NULL) goto error; - case_create (&file->input, dict_get_next_value_idx (file->dict)); + case_create (&file->input_storage, + dict_get_next_value_idx (file->dict)); } while (lex_match (lexer, '/')) @@ -1119,16 +1104,17 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) 7. Repeat from step 2. - Unfortunately, this algorithm can't be implemented in a - straightforward way because there's no function to read a - record from the active file. Instead, it has to be written - as a state machine. - FIXME: For merging large numbers of files (more than 10?) a better algorithm would use a heap for finding minimum values. */ - if (!used_active_file) + if (used_active_file) + { + proc_set_sink (ds, create_case_sink (&null_sink_class, + dataset_dict (ds), NULL)); + proc_open (ds); + } + else discard_variables (ds); dict_compact_values (mtf.dict); @@ -1136,20 +1122,13 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums); case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict)); - if (!mtf_read_nonactive_records (&mtf)) + if (!mtf_read_records (&mtf, ds)) + goto error; + while (mtf.head && mtf.head->type == MTF_FILE) + if (!mtf_processing (&mtf, ds)) + goto error; + if (!proc_close (ds)) goto error; - - if (used_active_file) - { - proc_set_sink (ds, - create_case_sink (&null_sink_class, - dataset_dict (ds), NULL)); - ok = - ( procedure (ds, mtf_processing, &mtf) && - mtf_processing_finish (&mtf, ds) ); - } - else - ok = mtf_processing_finish (&mtf, ds); discard_variables (ds); @@ -1159,38 +1138,14 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) proc_set_source (ds, storage_source_create (mtf.output)); mtf.output = NULL; - if (!mtf_free (&mtf)) - ok = false; - return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; + return mtf_free (&mtf) ? CMD_SUCCESS : CMD_CASCADING_FAILURE; error: + proc_close (ds); mtf_free (&mtf); return CMD_CASCADING_FAILURE; } -/* Repeats 2...7 an arbitrary number of times. */ -static bool -mtf_processing_finish (void *mtf_, const struct dataset *ds) -{ - struct mtf_proc *mtf = mtf_; - struct mtf_file *iter; - - /* Find the active file and delete it. */ - for (iter = mtf->head; iter; iter = iter->next) - if (iter->handle == NULL) - { - if (!mtf_delete_file_in_place (mtf, &iter)) - NOT_REACHED (); - break; - } - - while (mtf->head && mtf->head->type == MTF_FILE) - if (!mtf_processing (NULL, mtf, ds)) - return false; - - return true; -} - /* Return a string in a static buffer describing V's variable type and width. */ static char * @@ -1221,7 +1176,7 @@ mtf_close_file (struct mtf_file *file) any_reader_close (file->reader); if (file->handle != NULL) dict_destroy (file->dict); - case_destroy (&file->input); + case_destroy (&file->input_storage); free (file->in_name); free (file); return ok; @@ -1291,21 +1246,24 @@ mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file) return mtf_close_file (f); } -/* Read a record from every input file except the active file. +/* Read a record from every input file. Returns true if successful, false if an I/O error occurred. */ static bool -mtf_read_nonactive_records (void *mtf_) +mtf_read_records (struct mtf_proc *mtf, struct dataset *ds) { - struct mtf_proc *mtf = mtf_; struct mtf_file *iter, *next; bool ok = true; for (iter = mtf->head; ok && iter != NULL; iter = next) { next = iter->next; - if (iter->handle && !any_reader_read (iter->reader, &iter->input)) - if (!mtf_delete_file_in_place (mtf, &iter)) - ok = false; + if (iter->handle + ? !any_reader_read (iter->reader, iter->input) + : !proc_read (ds, &iter->input)) + { + if (!mtf_delete_file_in_place (mtf, &iter)) + ok = false; + } } return ok; } @@ -1314,46 +1272,66 @@ mtf_read_nonactive_records (void *mtf_) if A == B, 1 if A > B. */ static inline int mtf_compare_BY_values (struct mtf_proc *mtf, - struct mtf_file *a, struct mtf_file *b, - const struct ccase *c) + struct mtf_file *a, struct mtf_file *b) { - const struct ccase *ca = case_is_null (&a->input) ? c : &a->input; - const struct ccase *cb = case_is_null (&b->input) ? c : &b->input; - assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1); - return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt); + return case_compare_2dict (a->input, b->input, a->by, b->by, mtf->by_cnt); } /* Perform one iteration of steps 3...7 above. Returns true if successful, false if an I/O error occurred. */ static bool -mtf_processing (const struct ccase *c, void *mtf_, const struct dataset *ds UNUSED) +mtf_processing (struct mtf_proc *mtf, struct dataset *ds) { - struct mtf_proc *mtf = mtf_; - - /* Do we need another record from the active file? */ - bool read_active_file; + struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */ + struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */ + struct mtf_file *iter, *next; - assert (mtf->head != NULL); - if (mtf->head->type == MTF_TABLE) - return true; - - do + /* 3. Find the FILE input record(s) that have minimum BY + values. Store all the values from these input records into + the output record. */ + min_head = min_tail = mtf->head; + max_head = max_tail = NULL; + for (iter = mtf->head->next; iter && iter->type == MTF_FILE; + iter = iter->next) { - struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */ - struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */ - struct mtf_file *iter, *next; - - read_active_file = false; + int cmp = mtf_compare_BY_values (mtf, min_head, iter); + if (cmp < 0) + { + if (max_head) + max_tail = max_tail->next_min = iter; + else + max_head = max_tail = iter; + } + else if (cmp == 0) + min_tail = min_tail->next_min = iter; + else /* cmp > 0 */ + { + if (max_head) + { + max_tail->next_min = min_head; + max_tail = min_tail; + } + else + { + max_head = min_head; + max_tail = min_tail; + } + min_head = min_tail = iter; + } + } - /* 3. Find the FILE input record(s) that have minimum BY - values. Store all the values from these input records into - the output record. */ - min_head = min_tail = mtf->head; - max_head = max_tail = NULL; - for (iter = mtf->head->next; iter && iter->type == MTF_FILE; - iter = iter->next) + /* 4. For every TABLE, read another record as long as the BY + values on the TABLE's input record are less than the FILEs' + BY values. If an exact match is found, store all the values + from the TABLE input record into the output record. */ + for (; iter != NULL; iter = next) + { + assert (iter->type == MTF_TABLE); + + next = iter->next; + for (;;) { - int cmp = mtf_compare_BY_values (mtf, min_head, iter, c); + int cmp = mtf_compare_BY_values (mtf, min_head, iter); if (cmp < 0) { if (max_head) @@ -1361,142 +1339,97 @@ mtf_processing (const struct ccase *c, void *mtf_, const struct dataset *ds UNUS else max_head = max_tail = iter; } - else if (cmp == 0) - min_tail = min_tail->next_min = iter; + else if (cmp == 0) + min_tail = min_tail->next_min = iter; else /* cmp > 0 */ { - if (max_head) - { - max_tail->next_min = min_head; - max_tail = min_tail; - } - else - { - max_head = min_head; - max_tail = min_tail; - } - min_head = min_tail = iter; + if (iter->handle + ? any_reader_read (iter->reader, iter->input) + : proc_read (ds, &iter->input)) + continue; + if (!mtf_delete_file_in_place (mtf, &iter)) + return false; } + break; } - - /* 4. For every TABLE, read another record as long as the BY - values on the TABLE's input record are less than the FILEs' - BY values. If an exact match is found, store all the values - from the TABLE input record into the output record. */ - for (; iter != NULL; iter = next) - { - assert (iter->type == MTF_TABLE); - - next = iter->next; - for (;;) - { - int cmp = mtf_compare_BY_values (mtf, min_head, iter, c); - if (cmp < 0) - { - if (max_head) - max_tail = max_tail->next_min = iter; - else - max_head = max_tail = iter; - } - else if (cmp == 0) - min_tail = min_tail->next_min = iter; - else /* cmp > 0 */ - { - if (iter->handle == NULL) - return true; - if (any_reader_read (iter->reader, &iter->input)) - continue; - if (!mtf_delete_file_in_place (mtf, &iter)) - return false; - } - break; - } - } + } - /* Next sequence number. */ - mtf->seq_num++; + /* Next sequence number. */ + mtf->seq_num++; - /* Store data to all the records we are using. */ - if (min_tail) - min_tail->next_min = NULL; - for (iter = min_head; iter; iter = iter->next_min) - { - int i; + /* Store data to all the records we are using. */ + if (min_tail) + min_tail->next_min = NULL; + for (iter = min_head; iter; iter = iter->next_min) + { + int i; - for (i = 0; i < dict_get_var_cnt (iter->dict); i++) - { - struct variable *v = dict_get_var (iter->dict, i); - struct variable *mv = get_master (v); - size_t mv_index = mv ? var_get_dict_index (mv) : 0; + for (i = 0; i < dict_get_var_cnt (iter->dict); i++) + { + struct variable *v = dict_get_var (iter->dict, i); + struct variable *mv = get_master (v); + size_t mv_index = mv ? var_get_dict_index (mv) : 0; - if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num) - { - const struct ccase *record - = case_is_null (&iter->input) ? c : &iter->input; - union value *out = case_data_rw (&mtf->mtf_case, mv); - - mtf->seq_nums[mv_index] = mtf->seq_num; - if (var_is_numeric (v)) - out->f = case_num (record, v); - else - memcpy (out->s, case_str (record, v), var_get_width (v)); - } - } - if (iter->in_var != NULL) - case_data_rw (&mtf->mtf_case, iter->in_var)->f = 1.; + if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num) + { + const struct ccase *record = iter->input; + union value *out = case_data_rw (&mtf->mtf_case, mv); - if (iter->type == MTF_FILE && iter->handle == NULL) - read_active_file = true; - } + mtf->seq_nums[mv_index] = mtf->seq_num; + if (var_is_numeric (v)) + out->f = case_num (record, v); + else + memcpy (out->s, case_str (record, v), var_get_width (v)); + } + } + if (iter->in_var != NULL) + case_data_rw (&mtf->mtf_case, iter->in_var)->f = 1.; + } - /* Store missing values to all the records we're not - using. */ - if (max_tail) - max_tail->next_min = NULL; - for (iter = max_head; iter; iter = iter->next_min) - { - int i; + /* Store missing values to all the records we're not using. */ + if (max_tail) + max_tail->next_min = NULL; + for (iter = max_head; iter; iter = iter->next_min) + { + int i; - for (i = 0; i < dict_get_var_cnt (iter->dict); i++) - { - struct variable *v = dict_get_var (iter->dict, i); - struct variable *mv = get_master (v); - size_t mv_index = mv ? var_get_dict_index (mv) : 0; + for (i = 0; i < dict_get_var_cnt (iter->dict); i++) + { + struct variable *v = dict_get_var (iter->dict, i); + struct variable *mv = get_master (v); + size_t mv_index = mv ? var_get_dict_index (mv) : 0; - if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num) - { - union value *out = case_data_rw (&mtf->mtf_case, mv); - mtf->seq_nums[mv_index] = mtf->seq_num; + if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num) + { + union value *out = case_data_rw (&mtf->mtf_case, mv); + mtf->seq_nums[mv_index] = mtf->seq_num; - if (var_is_numeric (v)) - out->f = SYSMIS; - else - memset (out->s, ' ', var_get_width (v)); - } + if (var_is_numeric (v)) + out->f = SYSMIS; + else + memset (out->s, ' ', var_get_width (v)); } - if (iter->in_var != NULL) - case_data_rw (&mtf->mtf_case, iter->in_var)->f = 0.; - } + } + if (iter->in_var != NULL) + case_data_rw (&mtf->mtf_case, iter->in_var)->f = 0.; + } - /* 5. Write the output record. */ - casefile_append (mtf->output, &mtf->mtf_case); + /* 5. Write the output record. */ + casefile_append (mtf->output, &mtf->mtf_case); - /* 6. Read another record from each input file FILE and TABLE - that we stored values from above. If we come to the end of - one of the input files, remove it from the list of input - files. */ - for (iter = min_head; iter && iter->type == MTF_FILE; iter = next) - { - next = iter->next_min; - if (iter->reader != NULL - && !any_reader_read (iter->reader, &iter->input)) - if (!mtf_delete_file_in_place (mtf, &iter)) - return false; - } + /* 6. Read another record from each input file FILE and TABLE + that we stored values from above. If we come to the end of + one of the input files, remove it from the list of input + files. */ + for (iter = min_head; iter && iter->type == MTF_FILE; iter = next) + { + next = iter->next_min; + if (iter->reader != NULL + ? !any_reader_read (iter->reader, iter->input) + : !proc_read (ds, &iter->input)) + if (!mtf_delete_file_in_place (mtf, &iter)) + return false; } - while (!read_active_file - && mtf->head != NULL && mtf->head->type == MTF_FILE); - return true; } diff --git a/src/language/data-io/inpt-pgm.c b/src/language/data-io/inpt-pgm.c index 306a8bdd..cfd2c746 100644 --- a/src/language/data-io/inpt-pgm.c +++ b/src/language/data-io/inpt-pgm.c @@ -66,10 +66,10 @@ enum value_init_type struct input_program_pgm { struct trns_chain *trns_chain; + enum trns_result restart; + bool inited_case; /* Did one-time case initialization? */ size_t case_nr; /* Incremented by END CASE transformation. */ - write_case_func *write_case;/* Called by END CASE. */ - write_case_data wc_data; /* Aux data used by END CASE. */ enum value_init_type *init; /* How to initialize each `union value'. */ size_t init_cnt; /* Number of elements in inp_init. */ @@ -152,6 +152,10 @@ cmd_input_program (struct lexer *lexer, struct dataset *ds) inp->trns_chain = proc_capture_transformations (ds); trns_chain_finalize (inp->trns_chain); + inp->restart = TRNS_CONTINUE; + inp->inited_case = false; + inp->case_nr = 1; + /* Figure out how to initialize each input case. */ inp->init_cnt = dict_get_next_value_idx (dataset_dict (ds)); inp->init = xnmalloc (inp->init_cnt, sizeof *inp->init); @@ -175,7 +179,7 @@ cmd_input_program (struct lexer *lexer, struct dataset *ds) inp->case_size = dict_get_case_size (dataset_dict (ds)); proc_set_source (ds, - create_case_source (&input_program_source_class, inp)); + create_case_source (&input_program_source_class, inp)); return CMD_SUCCESS; } @@ -237,28 +241,45 @@ clear_case (const struct input_program_pgm *inp, struct ccase *c) } } -/* Executes each transformation in turn on a `blank' case. - Returns true if successful, false if an I/O error occurred. */ +/* Returns true if STATE is valid given the transformations that + are allowed within INPUT PROGRAM. */ +static bool +is_valid_state (enum trns_result state) +{ + return (state == TRNS_CONTINUE + || state == TRNS_ERROR + || state == TRNS_END_FILE + || state >= 0); +} + +/* Reads one case into C. + Returns true if successful, false at end of file or if an + I/O error occurred. */ static bool -input_program_source_read (struct case_source *source, - struct ccase *c, - write_case_func *write_case, - write_case_data wc_data) +input_program_source_read (struct case_source *source, struct ccase *c) { struct input_program_pgm *inp = source->aux; - inp->case_nr = 1; - inp->write_case = write_case; - inp->wc_data = wc_data; - for (init_case (inp, c); ; clear_case (inp, c)) + if (!inp->inited_case) + { + init_case (inp, c); + inp->inited_case = true; + } + + do { - enum trns_result result = trns_chain_execute (inp->trns_chain, c, - &inp->case_nr); - if (result == TRNS_ERROR) + assert (is_valid_state (inp->restart)); + if (inp->restart == TRNS_ERROR || inp->restart == TRNS_END_FILE) return false; - else if (result == TRNS_END_FILE) - return true; + + clear_case (inp, c); + inp->restart = trns_chain_execute (inp->trns_chain, inp->restart, + c, &inp->case_nr); + assert (is_valid_state (inp->restart)); } + while (inp->restart < 0); + + return true; } static void @@ -272,13 +293,16 @@ destroy_input_program (struct input_program_pgm *pgm) } } -/* Destroys an INPUT PROGRAM source. */ -static void +/* Destroys the source. + Returns true if successful read, false if an I/O occurred + during destruction or previously. */ +static bool input_program_source_destroy (struct case_source *source) { struct input_program_pgm *inp = source->aux; - + bool ok = inp->restart != TRNS_ERROR; destroy_input_program (inp); + return ok; } static const struct case_source_class input_program_source_class = @@ -300,16 +324,12 @@ cmd_end_case (struct lexer *lexer, struct dataset *ds UNUSED) /* Sends the current case as the source's output. */ int -end_case_trns_proc (void *inp_, struct ccase *c, casenumber case_nr UNUSED) +end_case_trns_proc (void *inp_, struct ccase *c UNUSED, + casenumber case_nr UNUSED) { struct input_program_pgm *inp = inp_; - - if (!inp->write_case (inp->wc_data)) - return TRNS_ERROR; - inp->case_nr++; - clear_case (inp, c); - return TRNS_CONTINUE; + return TRNS_END_CASE; } /* REREAD transformation. */ diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c deleted file mode 100644 index f958584b..00000000 --- a/src/language/data-io/matrix-data.c +++ /dev/null @@ -1,1990 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ - -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "minmax.h" -#include "size_max.h" - -#include "gettext.h" -#define _(msgid) gettext (msgid) - -/* FIXME: /N subcommand not implemented. It should be pretty simple, - too. */ - -/* Different types of variables for MATRIX DATA procedure. Order is - important: these are used for sort keys. */ -enum - { - MXD_SPLIT, /* SPLIT FILE variables. */ - MXD_ROWTYPE, /* ROWTYPE_. */ - MXD_FACTOR, /* Factor variables. */ - MXD_VARNAME, /* VARNAME_. */ - MXD_CONTINUOUS, /* Continuous variables. */ - - MXD_COUNT - }; - -/* Format type enums. */ -enum format_type - { - LIST, - FREE - }; - -/* Matrix section enums. */ -enum matrix_section - { - LOWER, - UPPER, - FULL - }; - -/* Diagonal inclusion enums. */ -enum include_diagonal - { - DIAGONAL, - NODIAGONAL - }; - -/* CONTENTS types. */ -enum content_type - { - N_VECTOR, - N_SCALAR, - N_MATRIX, - MEAN, - STDDEV, - COUNT, - MSE, - DFE, - MAT, - COV, - CORR, - PROX, - - LPAREN, - RPAREN, - EOC - }; - -/* 0=vector, 1=matrix, 2=scalar. */ -static const int content_type[PROX + 1] = - { - 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, - }; - -/* Name of each content type. */ -static const char *const content_names[PROX + 1] = - { - "N", "N", "N_MATRIX", "MEAN", "STDDEV", "COUNT", "MSE", - "DFE", "MAT", "COV", "CORR", "PROX", - }; - -/* A MATRIX DATA input program. */ -struct matrix_data_pgm - { - struct pool *container; /* Arena used for all allocations. */ - struct dfm_reader *reader; /* Data file to read. */ - - /* Format. */ - enum format_type fmt; /* LIST or FREE. */ - enum matrix_section section;/* LOWER or UPPER or FULL. */ - enum include_diagonal diag; /* DIAGONAL or NODIAGONAL. */ - - int explicit_rowtype; /* ROWTYPE_ specified explicitly in data? */ - struct variable *rowtype_, *varname_; /* ROWTYPE_, VARNAME_ variables. */ - - struct variable *single_split; /* Single SPLIT FILE variable. */ - - /* Factor variables. */ - size_t n_factors; /* Number of factor variables. */ - struct variable **factors; /* Factor variables. */ - int is_per_factor[PROX + 1]; /* Is there per-factor data? */ - - int cells; /* Number of cells, or -1 if none. */ - - int pop_n; /* Population N specified by user. */ - - /* CONTENTS subcommand. */ - int contents[EOC * 3 + 1]; /* Contents. */ - int n_contents; /* Number of entries. */ - - /* Continuous variables. */ - int n_continuous; /* Number of continuous variables. */ - int first_continuous; /* Index into dictionary of - first continuous variable. */ - }; - -/* Auxiliary data attached to MATRIX DATA variables. */ -struct mxd_var - { - int var_type; /* Variable type. */ - int sub_type; /* Subtype. */ - }; - -static const struct case_source_class matrix_data_with_rowtype_source_class; -static const struct case_source_class matrix_data_without_rowtype_source_class; - -static int compare_variables_by_mxd_var_type (const void *pa, - const void *pb); -static bool read_matrices_without_rowtype (struct dataset *ds, struct matrix_data_pgm *); -static bool read_matrices_with_rowtype (struct dataset *ds, struct matrix_data_pgm *); -static int string_to_content_type (const char *, int *); -static void attach_mxd_aux (struct variable *, int var_type, int sub_type); - -int -cmd_matrix_data (struct lexer *lexer, struct dataset *ds) -{ - struct pool *pool; - struct matrix_data_pgm *mx; - struct file_handle *fh = fh_inline_file (); - bool ok; - - unsigned seen = 0; - - discard_variables (ds); - - pool = pool_create (); - mx = pool_alloc (pool, sizeof *mx); - mx->container = pool; - mx->reader = NULL; - mx->fmt = LIST; - mx->section = LOWER; - mx->diag = DIAGONAL; - mx->explicit_rowtype = 0; - mx->rowtype_ = NULL; - mx->varname_ = NULL; - mx->single_split = NULL; - mx->n_factors = 0; - mx->factors = NULL; - memset (mx->is_per_factor, 0, sizeof mx->is_per_factor); - mx->cells = -1; - mx->pop_n = -1; - mx->n_contents = 0; - mx->n_continuous = 0; - mx->first_continuous = 0; - while (lex_token (lexer) != '.') - { - lex_match (lexer, '/'); - - if (lex_match_id (lexer, "VARIABLES")) - { - char **v; - size_t nv; - - if (seen & 1) - { - msg (SE, _("VARIABLES subcommand multiply specified.")); - goto lossage; - } - seen |= 1; - - lex_match (lexer, '='); - if (!parse_DATA_LIST_vars (lexer, &v, &nv, PV_NO_DUPLICATE)) - goto lossage; - - { - size_t i; - - for (i = 0; i < nv; i++) - if (!strcasecmp (v[i], "VARNAME_")) - { - msg (SE, _("VARNAME_ cannot be explicitly specified on " - "VARIABLES.")); - for (i = 0; i < nv; i++) - free (v[i]); - free (v); - goto lossage; - } - } - - { - size_t i; - - for (i = 0; i < nv; i++) - { - struct variable *new_var; - - if (strcasecmp (v[i], "ROWTYPE_")) - { - new_var = dict_create_var_assert (dataset_dict (ds), v[i], 0); - attach_mxd_aux (new_var, MXD_CONTINUOUS, i); - } - else - mx->explicit_rowtype = 1; - free (v[i]); - } - free (v); - } - - mx->rowtype_ = dict_create_var_assert (dataset_dict (ds), - "ROWTYPE_", 8); - attach_mxd_aux (mx->rowtype_, MXD_ROWTYPE, 0); - } - else if (lex_match_id (lexer, "FILE")) - { - lex_match (lexer, '='); - fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE); - if (fh == NULL) - goto lossage; - } - else if (lex_match_id (lexer, "FORMAT")) - { - lex_match (lexer, '='); - - while (lex_token (lexer) == T_ID) - { - if (lex_match_id (lexer, "LIST")) - mx->fmt = LIST; - else if (lex_match_id (lexer, "FREE")) - mx->fmt = FREE; - else if (lex_match_id (lexer, "LOWER")) - mx->section = LOWER; - else if (lex_match_id (lexer, "UPPER")) - mx->section = UPPER; - else if (lex_match_id (lexer, "FULL")) - mx->section = FULL; - else if (lex_match_id (lexer, "DIAGONAL")) - mx->diag = DIAGONAL; - else if (lex_match_id (lexer, "NODIAGONAL")) - mx->diag = NODIAGONAL; - else - { - lex_error (lexer, _("in FORMAT subcommand")); - goto lossage; - } - } - } - else if (lex_match_id (lexer, "SPLIT")) - { - lex_match (lexer, '='); - - if (seen & 2) - { - msg (SE, _("SPLIT subcommand multiply specified.")); - goto lossage; - } - seen |= 2; - - if (lex_token (lexer) != T_ID) - { - lex_error (lexer, _("in SPLIT subcommand")); - goto lossage; - } - - if (dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL - && (lex_look_ahead (lexer) == '.' || lex_look_ahead (lexer) == '/')) - { - if (!strcasecmp (lex_tokid (lexer), "ROWTYPE_") - || !strcasecmp (lex_tokid (lexer), "VARNAME_")) - { - msg (SE, _("Split variable may not be named ROWTYPE_ " - "or VARNAME_.")); - goto lossage; - } - - mx->single_split = dict_create_var_assert (dataset_dict (ds), - lex_tokid (lexer), 0); - attach_mxd_aux (mx->single_split, MXD_CONTINUOUS, 0); - lex_get (lexer); - - dict_set_split_vars (dataset_dict (ds), &mx->single_split, 1); - } - else - { - struct variable **split; - size_t n; - - if (!parse_variables (lexer, dataset_dict (ds), - &split, &n, PV_NO_DUPLICATE)) - goto lossage; - - dict_set_split_vars (dataset_dict (ds), split, n); - } - - { - struct variable *const *split = dict_get_split_vars (dataset_dict (ds)); - size_t split_cnt = dict_get_split_cnt (dataset_dict (ds)); - int i; - - for (i = 0; i < split_cnt; i++) - { - struct mxd_var *mv = var_get_aux (split[i]); - if (mv->var_type != MXD_CONTINUOUS) - { - msg (SE, _("Split variable %s is already another type."), - lex_tokid (lexer)); - goto lossage; - } - var_clear_aux (split[i]); - attach_mxd_aux (split[i], MXD_SPLIT, i); - } - } - } - else if (lex_match_id (lexer, "FACTORS")) - { - lex_match (lexer, '='); - - if (seen & 4) - { - msg (SE, _("FACTORS subcommand multiply specified.")); - goto lossage; - } - seen |= 4; - - if (!parse_variables (lexer, dataset_dict (ds), &mx->factors, &mx->n_factors, - PV_NONE)) - goto lossage; - - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - { - struct variable *v = mx->factors[i]; - struct mxd_var *mv = var_get_aux (v); - if (mv->var_type != MXD_CONTINUOUS) - { - msg (SE, _("Factor variable %s is already another type."), - lex_tokid (lexer)); - goto lossage; - } - var_clear_aux (v); - attach_mxd_aux (v, MXD_FACTOR, i); - } - } - } - else if (lex_match_id (lexer, "CELLS")) - { - lex_match (lexer, '='); - - if (mx->cells != -1) - { - msg (SE, _("CELLS subcommand multiply specified.")); - goto lossage; - } - - if (!lex_is_integer (lexer) || lex_integer (lexer) < 1) - { - lex_error (lexer, _("expecting positive integer")); - goto lossage; - } - - mx->cells = lex_integer (lexer); - lex_get (lexer); - } - else if (lex_match_id (lexer, "N")) - { - lex_match (lexer, '='); - - if (mx->pop_n != -1) - { - msg (SE, _("N subcommand multiply specified.")); - goto lossage; - } - - if (!lex_is_integer (lexer) || lex_integer (lexer) < 1) - { - lex_error (lexer, _("expecting positive integer")); - goto lossage; - } - - mx->pop_n = lex_integer (lexer); - lex_get (lexer); - } - else if (lex_match_id (lexer, "CONTENTS")) - { - int inside_parens = 0; - unsigned collide = 0; - int item; - - if (seen & 8) - { - msg (SE, _("CONTENTS subcommand multiply specified.")); - goto lossage; - } - seen |= 8; - - lex_match (lexer, '='); - - { - int i; - - for (i = 0; i <= PROX; i++) - mx->is_per_factor[i] = 0; - } - - for (;;) - { - if (lex_match (lexer, '(')) - { - if (inside_parens) - { - msg (SE, _("Nested parentheses not allowed.")); - goto lossage; - } - inside_parens = 1; - item = LPAREN; - } - else if (lex_match (lexer, ')')) - { - if (!inside_parens) - { - msg (SE, _("Mismatched right parenthesis (`(').")); - goto lossage; - } - if (mx->contents[mx->n_contents - 1] == LPAREN) - { - msg (SE, _("Empty parentheses not allowed.")); - goto lossage; - } - inside_parens = 0; - item = RPAREN; - } - else - { - int content_type; - int collide_index; - - if (lex_token (lexer) != T_ID) - { - lex_error (lexer, _("in CONTENTS subcommand")); - goto lossage; - } - - content_type = string_to_content_type (lex_tokid (lexer), - &collide_index); - if (content_type == -1) - { - lex_error (lexer, _("in CONTENTS subcommand")); - goto lossage; - } - lex_get (lexer); - - if (collide & (1 << collide_index)) - { - msg (SE, _("Content multiply specified for %s."), - content_names[content_type]); - goto lossage; - } - collide |= (1 << collide_index); - - item = content_type; - mx->is_per_factor[item] = inside_parens; - } - mx->contents[mx->n_contents++] = item; - - if (lex_token (lexer) == '/' || lex_token (lexer) == '.') - break; - } - - if (inside_parens) - { - msg (SE, _("Missing right parenthesis.")); - goto lossage; - } - mx->contents[mx->n_contents] = EOC; - } - else - { - lex_error (lexer, NULL); - goto lossage; - } - } - - if (lex_token (lexer) != '.') - { - lex_error (lexer, _("expecting end of command")); - goto lossage; - } - - if (!(seen & 1)) - { - msg (SE, _("Missing VARIABLES subcommand.")); - goto lossage; - } - - if (!mx->n_contents && !mx->explicit_rowtype) - { - msg (SW, _("CONTENTS subcommand not specified: assuming file " - "contains only CORR matrix.")); - - mx->contents[0] = CORR; - mx->contents[1] = EOC; - mx->n_contents = 0; - } - - if (mx->n_factors && !mx->explicit_rowtype && mx->cells == -1) - { - msg (SE, _("Missing CELLS subcommand. CELLS is required " - "when ROWTYPE_ is not given in the data and " - "factors are present.")); - goto lossage; - } - - if (mx->explicit_rowtype && mx->single_split) - { - msg (SE, _("Split file values must be present in the data when " - "ROWTYPE_ is present.")); - goto lossage; - } - - /* Create VARNAME_. */ - mx->varname_ = dict_create_var_assert (dataset_dict (ds), "VARNAME_", 8); - attach_mxd_aux (mx->varname_, MXD_VARNAME, 0); - - /* Sort the dictionary variables into the desired order for the - system file output. */ - { - struct variable **v; - size_t nv; - - dict_get_vars (dataset_dict (ds), &v, &nv, 0); - qsort (v, nv, sizeof *v, compare_variables_by_mxd_var_type); - dict_reorder_vars (dataset_dict (ds), v, nv); - free (v); - } - - /* Set formats. */ - { - static const struct fmt_spec fmt_tab[MXD_COUNT] = - { - {FMT_F, 4, 0}, - {FMT_A, 8, 0}, - {FMT_F, 4, 0}, - {FMT_A, 8, 0}, - {FMT_F, 10, 4}, - }; - - int i; - - mx->first_continuous = -1; - for (i = 0; i < dict_get_var_cnt (dataset_dict (ds)); i++) - { - struct variable *v = dict_get_var (dataset_dict (ds), i); - struct mxd_var *mv = var_get_aux (v); - int type = mv->var_type; - - assert (type >= 0 && type < MXD_COUNT); - var_set_both_formats (v, &fmt_tab[type]); - - if (type == MXD_CONTINUOUS) - mx->n_continuous++; - if (mx->first_continuous == -1 && type == MXD_CONTINUOUS) - mx->first_continuous = i; - } - } - - if (mx->n_continuous == 0) - { - msg (SE, _("No continuous variables specified.")); - goto lossage; - } - - mx->reader = dfm_open_reader (fh, lexer); - if (mx->reader == NULL) - goto lossage; - - if (mx->explicit_rowtype) - ok = read_matrices_with_rowtype (ds, mx); - else - ok = read_matrices_without_rowtype (ds, mx); - - dfm_close_reader (mx->reader); - - pool_destroy (mx->container); - - return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; - -lossage: - discard_variables (ds); - free (mx->factors); - pool_destroy (mx->container); - return CMD_CASCADING_FAILURE; -} - -/* Look up string S as a content-type name and return the - corresponding enumerated value, or -1 if there is no match. If - COLLIDE is non-NULL then *COLLIDE returns a value (suitable for use - as a bit-index) which can be used for determining whether a related - statistic has already been used. */ -static int -string_to_content_type (const char *s, int *collide) -{ - static const struct - { - int value; - int collide; - const char *string; - } - *tp, - tab[] = - { - {N_VECTOR, 0, "N_VECTOR"}, - {N_VECTOR, 0, "N"}, - {N_SCALAR, 0, "N_SCALAR"}, - {N_MATRIX, 1, "N_MATRIX"}, - {MEAN, 2, "MEAN"}, - {STDDEV, 3, "STDDEV"}, - {STDDEV, 3, "SD"}, - {COUNT, 4, "COUNT"}, - {MSE, 5, "MSE"}, - {DFE, 6, "DFE"}, - {MAT, 7, "MAT"}, - {COV, 8, "COV"}, - {CORR, 9, "CORR"}, - {PROX, 10, "PROX"}, - {-1, -1, NULL}, - }; - - for (tp = tab; tp->value != -1; tp++) - if (!strcasecmp (s, tp->string)) - { - if (collide) - *collide = tp->collide; - - return tp->value; - } - return -1; -} - -/* Compare two variables using p.mxd.var_type and p.mxd.sub_type - fields. */ -static int -compare_variables_by_mxd_var_type (const void *a_, const void *b_) -{ - struct variable *const *pa = a_; - struct variable *const *pb = b_; - const struct mxd_var *a = var_get_aux (*pa); - const struct mxd_var *b = var_get_aux (*pb); - - if (a->var_type != b->var_type) - return a->var_type > b->var_type ? 1 : -1; - else - return a->sub_type < b->sub_type ? -1 : a->sub_type > b->sub_type; -} - -/* Attaches a struct mxd_var with the specific member values to - V. */ -static void -attach_mxd_aux (struct variable *v, int var_type, int sub_type) -{ - struct mxd_var *mv; - - assert (var_get_aux (v) == NULL); - mv = xmalloc (sizeof *mv); - mv->var_type = var_type; - mv->sub_type = sub_type; - var_attach_aux (v, mv, var_dtor_free); -} - -/* Matrix tokenizer. */ - -/* Matrix token types. */ -enum matrix_token_type - { - MNUM, /* Number. */ - MSTR /* String. */ - }; - -/* A MATRIX DATA parsing token. */ -struct matrix_token - { - enum matrix_token_type type; - double number; /* MNUM: token value. */ - char *string; /* MSTR: token string; not null-terminated. */ - int length; /* MSTR: tokstr length. */ - }; - -static int mget_token (struct matrix_token *, struct dfm_reader *); - -#if DEBUGGING -#define mget_token(TOKEN, READER) mget_token_dump(TOKEN, READER) - -static void -mdump_token (const struct matrix_token *token) -{ - switch (token->type) - { - case MNUM: - printf (" #%g", token->number); - break; - case MSTR: - printf (" '%.*s'", token->length, token->string); - break; - default: - NOT_REACHED (); - } - fflush (stdout); -} - -static int -mget_token_dump (struct matrix_token *token, struct dfm_reader *reader) -{ - int result = (mget_token) (token, reader); - mdump_token (token); - return result; -} -#endif - -/* Return the current position in READER. */ -static const char * -context (struct dfm_reader *reader) -{ - static struct string buf = DS_EMPTY_INITIALIZER; - - ds_clear (&buf); - if (dfm_eof (reader)) - ds_assign_cstr (&buf, "at end of file"); - else - { - struct substring p; - - p = dfm_get_record (reader); - ss_ltrim (&p, ss_cstr (CC_SPACES)); - if (ss_is_empty (p)) - ds_assign_cstr (&buf, "at end of line"); - else - ds_put_format (&buf, "before `%.*s'", - (int) ss_cspan (p, ss_cstr (CC_SPACES)), ss_data (p)); - } - - return ds_cstr (&buf); -} - -/* Is there at least one token left in the data file? */ -static bool -another_token (struct dfm_reader *reader) -{ - for (;;) - { - struct substring p; - size_t space_cnt; - - if (dfm_eof (reader)) - return false; - - p = dfm_get_record (reader); - space_cnt = ss_span (p, ss_cstr (CC_SPACES)); - if (space_cnt < ss_length (p)) - { - dfm_forward_columns (reader, space_cnt); - return true; - } - - dfm_forward_record (reader); - } - NOT_REACHED(); -} - -/* Parse a MATRIX DATA token from READER into TOKEN. */ -static int -(mget_token) (struct matrix_token *token, struct dfm_reader *reader) -{ - struct substring line, p; - struct substring s; - int c; - - if (!another_token (reader)) - return 0; - - line = p = dfm_get_record (reader); - - /* Three types of fields: quoted with ', quoted with ", unquoted. */ - c = ss_first (p); - if (c == '\'' || c == '"') - { - ss_get_char (&p); - if (!ss_get_until (&p, c, &s)) - msg (SW, _("Scope of string exceeds line.")); - } - else - { - bool is_num = isdigit (c) || c == '.'; - const char *start = ss_data (p); - - for (;;) - { - c = ss_first (p); - if (strchr (CC_SPACES ",-+", c) != NULL) - break; - - if (isdigit (c)) - is_num = true; - if (strchr ("deDE", c) && strchr ("+-", ss_at (p, 1))) - { - is_num = true; - ss_advance (&p, 2); - } - else - ss_advance (&p, 1); - } - s = ss_buffer (start, ss_data (p) - start); - - if (is_num) - data_in (s, FMT_F, 0, - dfm_get_column (reader, ss_data (s)), - (union value *) &token->number, 0); - else - token->type = MSTR; - } - token->string = ss_data (s); - token->length = ss_length (s); - - dfm_reread_record (reader, dfm_get_column (reader, ss_end (s))); - - return 1; -} - -/* Forcibly skip the end of a line for content type CONTENT in - READER. */ -static int -force_eol (struct dfm_reader *reader, const char *content) -{ - struct substring p; - - if (dfm_eof (reader)) - return 0; - - p = dfm_get_record (reader); - if (ss_span (p, ss_cstr (CC_SPACES)) != ss_length (p)) - { - msg (SE, _("End of line expected %s while reading %s."), - context (reader), content); - return 0; - } - - dfm_forward_record (reader); - return 1; -} - -/* Back end, omitting ROWTYPE_. */ - -struct nr_aux_data - { - const struct dictionary *dict; /* The dictionary */ - struct matrix_data_pgm *mx; /* MATRIX DATA program. */ - double ***data; /* MATRIX DATA data. */ - double *factor_values; /* Factor values. */ - int max_cell_idx; /* Max-numbered cell that we have - read so far, plus one. */ - double *split_values; /* SPLIT FILE variable values. */ - }; - -static bool nr_read_splits (struct nr_aux_data *, int compare); -static bool nr_read_factors (struct nr_aux_data *, int cell); -static bool nr_output_data (struct nr_aux_data *, struct ccase *, - write_case_func *, write_case_data); -static bool matrix_data_read_without_rowtype (struct case_source *source, - struct ccase *, - write_case_func *, - write_case_data); - -/* Read from the data file and write it to the active file. - Returns true if successful, false if an I/O error occurred. */ -static bool -read_matrices_without_rowtype (struct dataset *ds, struct matrix_data_pgm *mx) -{ - struct nr_aux_data nr; - bool ok; - - if (mx->cells == -1) - mx->cells = 1; - - nr.mx = mx; - nr.dict = dataset_dict (ds); - nr.data = NULL; - nr.factor_values = xnmalloc (mx->n_factors * mx->cells, - sizeof *nr.factor_values); - nr.max_cell_idx = 0; - nr.split_values = xnmalloc (dict_get_split_cnt (dataset_dict (ds)), - sizeof *nr.split_values); - - proc_set_source (ds, create_case_source ( - &matrix_data_without_rowtype_source_class, &nr)); - - ok = procedure (ds, NULL, NULL); - - free (nr.split_values); - free (nr.factor_values); - - return ok; -} - -/* Mirror data across the diagonal of matrix CP which contains - CONTENT type data. */ -static void -fill_matrix (struct matrix_data_pgm *mx, int content, double *cp) -{ - int type = content_type[content]; - - if (type == 1 && mx->section != FULL) - { - if (mx->diag == NODIAGONAL) - { - const double fill = content == CORR ? 1.0 : SYSMIS; - int i; - - for (i = 0; i < mx->n_continuous; i++) - cp[i * (1 + mx->n_continuous)] = fill; - } - - { - int c, r; - - if (mx->section == LOWER) - { - int n_lines = mx->n_continuous; - if (mx->section != FULL && mx->diag == NODIAGONAL) - n_lines--; - - for (r = 1; r < n_lines; r++) - for (c = 0; c < r; c++) - cp[r + c * mx->n_continuous] = cp[c + r * mx->n_continuous]; - } - else - { - assert (mx->section == UPPER); - for (r = 1; r < mx->n_continuous; r++) - for (c = 0; c < r; c++) - cp[c + r * mx->n_continuous] = cp[r + c * mx->n_continuous]; - } - } - } - else if (type == 2) - { - int c; - - for (c = 1; c < mx->n_continuous; c++) - cp[c] = cp[0]; - } -} - -/* Read data lines for content type CONTENT from the data file. - If PER_FACTOR is nonzero, then factor information is read from - the data file. Data is for cell number CELL. */ -static int -nr_read_data_lines (struct nr_aux_data *nr, - int per_factor, int cell, int content, int compare) -{ - struct matrix_data_pgm *mx = nr->mx; - const int type = content_type[content]; /* Content type. */ - int n_lines; /* Number of lines to parse from data file for this type. */ - double *cp; /* Current position in vector or matrix. */ - int i; - - if (type != 1) - n_lines = 1; - else - { - n_lines = mx->n_continuous; - if (mx->section != FULL && mx->diag == NODIAGONAL) - n_lines--; - } - - cp = nr->data[content][cell]; - if (type == 1 && mx->section == LOWER && mx->diag == NODIAGONAL) - cp += mx->n_continuous; - - for (i = 0; i < n_lines; i++) - { - int n_cols; - - if (!nr_read_splits (nr, 1)) - return 0; - if (per_factor && !nr_read_factors (nr, cell)) - return 0; - compare = 1; - - switch (type) - { - case 0: - n_cols = mx->n_continuous; - break; - case 1: - switch (mx->section) - { - case LOWER: - n_cols = i + 1; - break; - case UPPER: - cp += i; - n_cols = mx->n_continuous - i; - if (mx->diag == NODIAGONAL) - { - n_cols--; - cp++; - } - break; - case FULL: - n_cols = mx->n_continuous; - break; - default: - NOT_REACHED (); - } - break; - case 2: - n_cols = 1; - break; - default: - NOT_REACHED (); - } - - { - int j; - - for (j = 0; j < n_cols; j++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return 0; - if (token.type != MNUM) - { - msg (SE, _("expecting value for %s %s"), - var_get_name (dict_get_var (nr->dict, j)), - context (mx->reader)); - return 0; - } - - *cp++ = token.number; - } - if (mx->fmt != FREE - && !force_eol (mx->reader, content_names[content])) - return 0; - } - - if (mx->section == LOWER) - cp += mx->n_continuous - n_cols; - } - - fill_matrix (mx, content, nr->data[content][cell]); - - return 1; -} - -/* When ROWTYPE_ does not appear in the data, reads the matrices and - writes them to the output file. - Returns true if successful, false if an I/O error occurred. */ -static bool -matrix_data_read_without_rowtype (struct case_source *source, - struct ccase *c, - write_case_func *write_case, - write_case_data wc_data) -{ - struct nr_aux_data *nr = source->aux; - struct matrix_data_pgm *mx = nr->mx; - - { - int *cp; - - nr->data = pool_nalloc (mx->container, PROX + 1, sizeof *nr->data); - - { - int i; - - for (i = 0; i <= PROX; i++) - nr->data[i] = NULL; - } - - for (cp = mx->contents; *cp != EOC; cp++) - if (*cp != LPAREN && *cp != RPAREN) - { - int per_factor = mx->is_per_factor[*cp]; - int n_entries; - - n_entries = mx->n_continuous; - if (content_type[*cp] == 1) - n_entries *= mx->n_continuous; - - { - int n_vectors = per_factor ? mx->cells : 1; - int i; - - nr->data[*cp] = pool_nalloc (mx->container, - n_vectors, sizeof **nr->data); - - for (i = 0; i < n_vectors; i++) - nr->data[*cp][i] = pool_nalloc (mx->container, - n_entries, sizeof ***nr->data); - } - } - } - - for (;;) - { - int *bp, *ep, *np; - - if (!nr_read_splits (nr, 0)) - return true; - - for (bp = mx->contents; *bp != EOC; bp = np) - { - int per_factor; - - /* Trap the CONTENTS that we should parse in this pass - between bp and ep. Set np to the starting bp for next - iteration. */ - if (*bp == LPAREN) - { - ep = ++bp; - while (*ep != RPAREN) - ep++; - np = &ep[1]; - per_factor = 1; - } - else - { - ep = &bp[1]; - while (*ep != EOC && *ep != LPAREN) - ep++; - np = ep; - per_factor = 0; - } - - { - int i; - - for (i = 0; i < (per_factor ? mx->cells : 1); i++) - { - int *cp; - - for (cp = bp; cp < ep; cp++) - if (!nr_read_data_lines (nr, per_factor, i, *cp, cp != bp)) - return true; - } - } - } - - if (!nr_output_data (nr, c, write_case, wc_data)) - return false; - - if (dict_get_split_cnt (nr->dict) == 0 - || !another_token (mx->reader)) - return true; - } -} - -/* Read the split file variables. If COMPARE is 1, compares the - values read to the last values read and returns true if they're equal, - false otherwise. */ -static bool -nr_read_splits (struct nr_aux_data *nr, int compare) -{ - struct matrix_data_pgm *mx = nr->mx; - static int just_read = 0; /* FIXME: WTF? */ - size_t split_cnt; - size_t i; - - if (compare && just_read) - { - just_read = 0; - return true; - } - - if (dict_get_split_vars (nr->dict) == NULL) - return true; - - if (mx->single_split) - { - if (!compare) - { - struct mxd_var *mv = var_get_aux (dict_get_split_vars (nr->dict)[0]); - nr->split_values[0] = ++mv->sub_type; - } - return true; - } - - if (!compare) - just_read = 1; - - split_cnt = dict_get_split_cnt (nr->dict); - for (i = 0; i < split_cnt; i++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return false; - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting SPLIT FILE value %s."), - context (mx->reader)); - return false; - } - - if (!compare) - nr->split_values[i] = token.number; - else if (nr->split_values[i] != token.number) - { - msg (SE, _("Expecting value %g for %s."), - nr->split_values[i], - var_get_name (dict_get_split_vars (nr->dict)[i])); - return false; - } - } - - return true; -} - -/* Read the factors for cell CELL. If COMPARE is 1, compares the - values read to the last values read and returns true if they're equal, - false otherwise. */ -static bool -nr_read_factors (struct nr_aux_data *nr, int cell) -{ - struct matrix_data_pgm *mx = nr->mx; - bool compare; - - if (mx->n_factors == 0) - return true; - - assert (nr->max_cell_idx >= cell); - if (cell != nr->max_cell_idx) - compare = true; - else - { - compare = false; - nr->max_cell_idx++; - } - - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return false; - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting factor value %s."), - context (mx->reader)); - return false; - } - - if (!compare) - nr->factor_values[i + mx->n_factors * cell] = token.number; - else if (nr->factor_values[i + mx->n_factors * cell] != token.number) - { - msg (SE, _("Syntax error expecting value %g for %s %s."), - nr->factor_values[i + mx->n_factors * cell], - var_get_name (mx->factors[i]), context (mx->reader)); - return false; - } - } - } - - return true; -} - -/* Write the contents of a cell having content type CONTENT and data - CP to the active file. - Returns true if successful, false if an I/O error occurred. */ -static bool -dump_cell_content (const struct dictionary *dict, - struct matrix_data_pgm *mx, int content, double *cp, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) -{ - int type = content_type[content]; - - { - buf_copy_str_rpad (case_data_rw (c, mx->rowtype_)->s, 8, - content_names[content]); - - if (type != 1) - memset (case_data_rw (c, mx->varname_)->s, ' ', 8); - } - - { - int n_lines = (type == 1) ? mx->n_continuous : 1; - int i; - - for (i = 0; i < n_lines; i++) - { - int j; - - for (j = 0; j < mx->n_continuous; j++) - { - struct variable *v = dict_get_var (dict, mx->first_continuous + j); - case_data_rw (c, v)->f = *cp; - cp++; - } - if (type == 1) - buf_copy_str_rpad (case_data_rw (c, mx->varname_)->s, 8, - var_get_name ( - dict_get_var (dict, mx->first_continuous + i))); - if (!write_case (wc_data)) - return false; - } - } - return true; -} - -/* Finally dump out everything from nr_data[] to the output file. */ -static bool -nr_output_data (struct nr_aux_data *nr, struct ccase *c, - write_case_func *write_case, write_case_data wc_data) -{ - struct matrix_data_pgm *mx = nr->mx; - - { - struct variable *const *split; - size_t split_cnt; - size_t i; - - split_cnt = dict_get_split_cnt (nr->dict); - split = dict_get_split_vars (nr->dict); - for (i = 0; i < split_cnt; i++) - case_data_rw (c, split[i])->f = nr->split_values[i]; - } - - if (mx->n_factors) - { - int cell; - - for (cell = 0; cell < mx->cells; cell++) - { - { - size_t factor; - - for (factor = 0; factor < mx->n_factors; factor++) - case_data_rw (c, mx->factors[factor])->f - = nr->factor_values[factor + cell * mx->n_factors]; - } - - { - int content; - - for (content = 0; content <= PROX; content++) - if (mx->is_per_factor[content]) - { - assert (nr->data[content] != NULL - && nr->data[content][cell] != NULL); - - if (!dump_cell_content (nr->dict, mx, - content, nr->data[content][cell], - c, write_case, wc_data)) - return false; - } - } - } - } - - { - int content; - - { - size_t factor; - - for (factor = 0; factor < mx->n_factors; factor++) - case_data_rw (c, mx->factors[factor])->f = SYSMIS; - } - - for (content = 0; content <= PROX; content++) - if (!mx->is_per_factor[content] && nr->data[content] != NULL) - { - if (!dump_cell_content (nr->dict, mx, content, nr->data[content][0], - c, write_case, wc_data)) - return false; - } - } - - return true; -} - -/* Back end, with ROWTYPE_. */ - -/* All the data for one set of factor values. */ -struct factor_data - { - double *factors; - int n_rows[PROX + 1]; - double *data[PROX + 1]; - struct factor_data *next; - }; - -/* With ROWTYPE_ auxiliary data. */ -struct wr_aux_data - { - const struct dictionary *dict; /* The dictionary */ - struct matrix_data_pgm *mx; /* MATRIX DATA program. */ - int content; /* Type of current row. */ - double *split_values; /* SPLIT FILE variable values. */ - struct factor_data *data; /* All the data. */ - struct factor_data *current; /* Current factor. */ - }; - -static bool wr_read_splits (struct wr_aux_data *, struct ccase *, - write_case_func *, write_case_data); -static bool wr_output_data (struct wr_aux_data *, struct ccase *, - write_case_func *, write_case_data); -static bool wr_read_rowtype (struct wr_aux_data *, - const struct matrix_token *, struct dfm_reader *); -static bool wr_read_factors (struct wr_aux_data *); -static bool wr_read_indeps (struct wr_aux_data *); -static bool matrix_data_read_with_rowtype (struct case_source *, - struct ccase *, - write_case_func *, - write_case_data); - -/* When ROWTYPE_ appears in the data, reads the matrices and writes - them to the output file. - Returns true if successful, false if an I/O error occurred. */ -static bool -read_matrices_with_rowtype (struct dataset *ds, struct matrix_data_pgm *mx) -{ - struct wr_aux_data wr; - bool ok; - - wr.mx = mx; - wr.content = -1; - wr.split_values = NULL; - wr.data = NULL; - wr.current = NULL; - wr.dict = dataset_dict (ds); - mx->cells = 0; - - proc_set_source (ds, - create_case_source (&matrix_data_with_rowtype_source_class, - &wr)); - ok = procedure (ds, NULL, NULL); - - free (wr.split_values); - return ok; -} - -/* Read from the data file and write it to the active file. - Returns true if successful, false if an I/O error occurred. */ -static bool -matrix_data_read_with_rowtype (struct case_source *source, - struct ccase *c, - write_case_func *write_case, - write_case_data wc_data) -{ - struct wr_aux_data *wr = source->aux; - struct matrix_data_pgm *mx = wr->mx; - - do - { - if (!wr_read_splits (wr, c, write_case, wc_data)) - return true; - - if (!wr_read_factors (wr)) - return true; - - if (!wr_read_indeps (wr)) - return true; - } - while (another_token (mx->reader)); - - return wr_output_data (wr, c, write_case, wc_data); -} - -/* Read the split file variables. If they differ from the previous - set of split variables then output the data. Returns success. */ -static bool -wr_read_splits (struct wr_aux_data *wr, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) -{ - struct matrix_data_pgm *mx = wr->mx; - bool compare; - size_t split_cnt; - - split_cnt = dict_get_split_cnt (wr->dict); - if (split_cnt == 0) - return true; - - if (wr->split_values) - compare = true; - else - { - compare = false; - wr->split_values = xnmalloc (split_cnt, sizeof *wr->split_values); - } - - { - bool different = false; - int i; - - for (i = 0; i < split_cnt; i++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return false; - if (token.type != MNUM) - { - msg (SE, _("Syntax error %s expecting SPLIT FILE value."), - context (mx->reader)); - return false; - } - - if (compare && wr->split_values[i] != token.number && !different) - { - if (!wr_output_data (wr, c, write_case, wc_data)) - return 0; - different = true; - mx->cells = 0; - } - wr->split_values[i] = token.number; - } - } - - return true; -} - -/* Compares doubles A and B, treating SYSMIS as greatest. */ -static int -compare_doubles (const void *a_, const void *b_, const void *aux UNUSED) -{ - const double *a = a_; - const double *b = b_; - - if (*a == *b) - return 0; - else if (*a == SYSMIS) - return 1; - else if (*b == SYSMIS) - return -1; - else if (*a > *b) - return 1; - else - return -1; -} - -/* Return strcmp()-type comparison of the MX->n_factors factors at _A and - _B. Sort missing values toward the end. */ -static int -compare_factors (const void *a_, const void *b_, const void *mx_) -{ - const struct matrix_data_pgm *mx = mx_; - struct factor_data *const *pa = a_; - struct factor_data *const *pb = b_; - const double *a = (*pa)->factors; - const double *b = (*pb)->factors; - - return lexicographical_compare_3way (a, mx->n_factors, - b, mx->n_factors, - sizeof *a, - compare_doubles, NULL); -} - -/* Write out the data for the current split file to the active - file. - Returns true if successful, false if an I/O error occurred. */ -static bool -wr_output_data (struct wr_aux_data *wr, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) -{ - struct matrix_data_pgm *mx = wr->mx; - bool ok = true; - - { - struct variable *const *split; - size_t split_cnt; - size_t i; - - split_cnt = dict_get_split_cnt (wr->dict); - split = dict_get_split_vars (wr->dict); - for (i = 0; i < split_cnt; i++) - case_data_rw (c, split[i])->f = wr->split_values[i]; - } - - /* Sort the wr->data list. */ - { - struct factor_data **factors; - struct factor_data *iter; - int i; - - factors = xnmalloc (mx->cells, sizeof *factors); - - for (i = 0, iter = wr->data; iter; iter = iter->next, i++) - factors[i] = iter; - - sort (factors, mx->cells, sizeof *factors, compare_factors, mx); - - wr->data = factors[0]; - for (i = 0; i < mx->cells - 1; i++) - factors[i]->next = factors[i + 1]; - factors[mx->cells - 1]->next = NULL; - - free (factors); - } - - /* Write out records for every set of factor values. */ - { - struct factor_data *iter; - - for (iter = wr->data; iter; iter = iter->next) - { - { - size_t factor; - - for (factor = 0; factor < mx->n_factors; factor++) - case_data_rw (c, mx->factors[factor])->f = iter->factors[factor]; - } - - { - int content; - - for (content = 0; content <= PROX; content++) - { - if (!iter->n_rows[content]) - continue; - - { - int type = content_type[content]; - int n_lines = (type == 1 - ? (mx->n_continuous - - (mx->section != FULL && mx->diag == NODIAGONAL)) - : 1); - - if (n_lines != iter->n_rows[content]) - { - msg (SE, _("Expected %d lines of data for %s content; " - "actually saw %d lines. No data will be " - "output for this content."), - n_lines, content_names[content], - iter->n_rows[content]); - continue; - } - } - - fill_matrix (mx, content, iter->data[content]); - - ok = dump_cell_content (wr->dict, mx, content, - iter->data[content], - c, write_case, wc_data); - if (!ok) - break; - } - } - } - } - - pool_destroy (mx->container); - mx->container = pool_create (); - - wr->data = wr->current = NULL; - - return ok; -} - -/* Sets ROWTYPE_ based on the given TOKEN read from READER. - Return success. */ -static bool -wr_read_rowtype (struct wr_aux_data *wr, - const struct matrix_token *token, - struct dfm_reader *reader) -{ - if (wr->content != -1) - { - msg (SE, _("Multiply specified ROWTYPE_ %s."), context (reader)); - return false; - } - if (token->type != MSTR) - { - msg (SE, _("Syntax error %s expecting ROWTYPE_ string."), - context (reader)); - return false; - } - - { - char s[16]; - char *cp; - - memcpy (s, token->string, MIN (15, token->length)); - s[MIN (15, token->length)] = 0; - - for (cp = s; *cp; cp++) - *cp = toupper ((unsigned char) *cp); - - wr->content = string_to_content_type (s, NULL); - } - - if (wr->content == -1) - { - msg (SE, _("Syntax error %s."), context (reader)); - return 0; - } - - return true; -} - -/* Read the factors for the current row. Select a set of factors and - point wr_current to it. */ -static bool -wr_read_factors (struct wr_aux_data *wr) -{ - struct matrix_data_pgm *mx = wr->mx; - double *factor_values = local_alloc (sizeof *factor_values * mx->n_factors); - - wr->content = -1; - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - goto lossage; - if (token.type == MSTR) - { - if (!wr_read_rowtype (wr, &token, mx->reader)) - goto lossage; - if (!mget_token (&token, mx->reader)) - goto lossage; - } - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting factor value %s."), - context (mx->reader)); - goto lossage; - } - - factor_values[i] = token.number; - } - } - if (wr->content == -1) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - goto lossage; - if (!wr_read_rowtype (wr, &token, mx->reader)) - goto lossage; - } - - /* Try the most recent factor first as a simple caching - mechanism. */ - if (wr->current) - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - if (factor_values[i] != wr->current->factors[i]) - goto cache_miss; - goto winnage; - } - - /* Linear search through the list. */ -cache_miss: - { - struct factor_data *iter; - - for (iter = wr->data; iter; iter = iter->next) - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - if (factor_values[i] != iter->factors[i]) - goto next_item; - - wr->current = iter; - goto winnage; - - next_item: ; - } - } - - /* Not found. Make a new item. */ - { - struct factor_data *new = pool_alloc (mx->container, sizeof *new); - - new->factors = pool_nalloc (mx->container, - mx->n_factors, sizeof *new->factors); - - { - size_t i; - - for (i = 0; i < mx->n_factors; i++) - new->factors[i] = factor_values[i]; - } - - { - int i; - - for (i = 0; i <= PROX; i++) - { - new->n_rows[i] = 0; - new->data[i] = NULL; - } - } - - new->next = wr->data; - wr->data = wr->current = new; - mx->cells++; - } - -winnage: - local_free (factor_values); - return true; - -lossage: - local_free (factor_values); - return false; -} - -/* Read the independent variables into wr->current. */ -static bool -wr_read_indeps (struct wr_aux_data *wr) -{ - struct matrix_data_pgm *mx = wr->mx; - struct factor_data *c = wr->current; - const int type = content_type[wr->content]; - const int n_rows = c->n_rows[wr->content]; - double *cp; - int n_cols; - - /* Allocate room for data if necessary. */ - if (c->data[wr->content] == NULL) - { - int n_items = mx->n_continuous; - if (type == 1) - n_items *= mx->n_continuous; - - c->data[wr->content] = pool_nalloc (mx->container, - n_items, sizeof **c->data); - } - - cp = &c->data[wr->content][n_rows * mx->n_continuous]; - - /* Figure out how much to read from this line. */ - switch (type) - { - case 0: - case 2: - if (n_rows > 0) - { - msg (SE, _("Duplicate specification for %s."), - content_names[wr->content]); - return false; - } - if (type == 0) - n_cols = mx->n_continuous; - else - n_cols = 1; - break; - case 1: - if (n_rows >= mx->n_continuous - (mx->section != FULL && mx->diag == NODIAGONAL)) - { - msg (SE, _("Too many rows of matrix data for %s."), - content_names[wr->content]); - return false; - } - - switch (mx->section) - { - case LOWER: - n_cols = n_rows + 1; - if (mx->diag == NODIAGONAL) - cp += mx->n_continuous; - break; - case UPPER: - cp += n_rows; - n_cols = mx->n_continuous - n_rows; - if (mx->diag == NODIAGONAL) - { - n_cols--; - cp++; - } - break; - case FULL: - n_cols = mx->n_continuous; - break; - default: - NOT_REACHED (); - } - break; - default: - NOT_REACHED (); - } - c->n_rows[wr->content]++; - - /* Read N_COLS items at CP. */ - { - int j; - - for (j = 0; j < n_cols; j++) - { - struct matrix_token token; - if (!mget_token (&token, mx->reader)) - return false; - if (token.type != MNUM) - { - msg (SE, _("Syntax error expecting value for %s %s."), - var_get_name (dict_get_var (wr->dict, - mx->first_continuous + j)), - context (mx->reader)); - return false; - } - - *cp++ = token.number; - } - if (mx->fmt != FREE - && !force_eol (mx->reader, content_names[wr->content])) - return false; - } - - return true; -} - -/* Matrix source. */ - -static const struct case_source_class matrix_data_with_rowtype_source_class = - { - "MATRIX DATA", - NULL, - matrix_data_read_with_rowtype, - NULL, - }; - -static const struct case_source_class -matrix_data_without_rowtype_source_class = - { - "MATRIX DATA", - NULL, - matrix_data_read_without_rowtype, - NULL, - }; - diff --git a/src/language/stats/ChangeLog b/src/language/stats/ChangeLog index c082e42a..bb3e2921 100644 --- a/src/language/stats/ChangeLog +++ b/src/language/stats/ChangeLog @@ -1,3 +1,24 @@ +Sat Dec 16 22:26:44 2006 Ben Pfaff + + Make it possible to pull cases from the active file with a + function call, instead of requiring indirection through a callback + function. + + * aggregate.c (cmd_aggregate): Take advantage of new procedure + interface. + (agr_to_active_file): Removed. + (presorted_agr_to_sysfile): Removed. + + * autorecode.c (cmd_autorecode): Take advantage of new procedure + interface. + (autorecode_proc_func): Removed. + + * flip.c (struct flip_pgm): New members to allow conformance with + new case_source_class interface. + (cmd_flip): Adapt to new case_source_class interface. + (flip_source_read): Ditto. + (flip_source_destroy): Ditto. + Sat Dec 16 12:54:27 2006 Ben Pfaff * rank.q (rank_custom_variables): Allow grouping variables to be diff --git a/src/language/stats/aggregate.c b/src/language/stats/aggregate.c index 4c85592b..79d90fdd 100644 --- a/src/language/stats/aggregate.c +++ b/src/language/stats/aggregate.c @@ -164,12 +164,6 @@ static bool aggregate_single_case (struct agr_proc *agr, const struct ccase *input, struct ccase *output); static void dump_aggregate_info (struct agr_proc *agr, struct ccase *output); - -/* Aggregating to the active file. */ -static bool agr_to_active_file (const struct ccase *, void *aux, const struct dataset *); - -/* Aggregating to a system file. */ -static bool presorted_agr_to_sysfile (const struct ccase *, void *aux, const struct dataset *); /* Parsing. */ @@ -272,6 +266,8 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) /* Output to active file or external file? */ if (out_file == NULL) { + struct ccase *c; + /* The active file will be replaced by the aggregated data, so TEMPORARY is moot. */ proc_cancel_temporary_transformations (ds); @@ -286,9 +282,16 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) if (agr.sink->class->open != NULL) agr.sink->class->open (agr.sink); proc_set_sink (ds, - create_case_sink (&null_sink_class, - dict, NULL)); - if (!procedure (ds, agr_to_active_file, &agr)) + create_case_sink (&null_sink_class, dict, NULL)); + proc_open (ds); + while (proc_read (ds, &c)) + if (aggregate_single_case (&agr, c, &agr.agr_case)) + if (!agr.sink->class->write (agr.sink, &agr.agr_case)) + { + proc_close (ds); + goto error; + } + if (!proc_close (ds)) goto error; if (agr.case_cnt > 0) { @@ -300,8 +303,7 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) dict_destroy (dict); dataset_set_dict (ds, agr.dict); agr.dict = NULL; - proc_set_source (ds, - agr.sink->class->make_source (agr.sink)); + proc_set_source (ds, agr.sink->class->make_source (agr.sink)); free_case_sink (agr.sink); } else @@ -338,7 +340,17 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) else { /* Active file is already sorted. */ - if (!procedure (ds, presorted_agr_to_sysfile, &agr)) + struct ccase *c; + + proc_open (ds); + while (proc_read (ds, &c)) + if (aggregate_single_case (&agr, c, &agr.agr_case)) + if (!any_writer_write (agr.writer, &agr.agr_case)) + { + proc_close (ds); + goto error; + } + if (!proc_close (ds)) goto error; } @@ -1102,31 +1114,3 @@ initialize_aggregate_info (struct agr_proc *agr, const struct ccase *input) } } } - -/* Aggregate each case as it comes through. Cases which aren't needed - are dropped. - Returns true if successful, false if an I/O error occurred. */ -static bool -agr_to_active_file (const struct ccase *c, void *agr_, const struct dataset *ds UNUSED) -{ - struct agr_proc *agr = agr_; - - if (aggregate_single_case (agr, c, &agr->agr_case)) - return agr->sink->class->write (agr->sink, &agr->agr_case); - - return true; -} - -/* Aggregate the current case and output it if we passed a - breakpoint. */ -static bool -presorted_agr_to_sysfile (const struct ccase *c, void *agr_, - const struct dataset *ds UNUSED) -{ - struct agr_proc *agr = agr_; - - if (aggregate_single_case (agr, c, &agr->agr_case)) - return any_writer_write (agr->writer, &agr->agr_case); - - return true; -} diff --git a/src/language/stats/autorecode.c b/src/language/stats/autorecode.c index 72534c76..a84528af 100644 --- a/src/language/stats/autorecode.c +++ b/src/language/stats/autorecode.c @@ -92,7 +92,6 @@ struct autorecode_pgm static trns_proc_func autorecode_trns_proc; static trns_free_func autorecode_trns_free; -static bool autorecode_proc_func (const struct ccase *, void *, const struct dataset *); static hsh_compare_func compare_alpha_value, compare_numeric_value; static hsh_hash_func hash_alpha_value, hash_numeric_value; @@ -104,6 +103,7 @@ int cmd_autorecode (struct lexer *lexer, struct dataset *ds) { struct autorecode_pgm arc; + struct ccase *c; size_t dst_cnt; size_t i; bool ok; @@ -184,7 +184,30 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) arc.src_values[i] = hsh_create (10, compare_numeric_value, hash_numeric_value, NULL, NULL); - ok = procedure (ds, autorecode_proc_func, &arc); + proc_open (ds); + while (proc_read (ds, &c)) + for (i = 0; i < arc.var_cnt; i++) + { + union arc_value v, *vp, **vpp; + + if (var_is_numeric (arc.src_vars[i])) + v.f = case_num (c, arc.src_vars[i]); + else + v.c = (char *) case_str (c, arc.src_vars[i]); + + vpp = (union arc_value **) hsh_probe (arc.src_values[i], &v); + if (*vpp == NULL) + { + vp = pool_alloc (arc.src_values_pool, sizeof *vp); + if (var_is_numeric (arc.src_vars[i])) + vp->f = v.f; + else + vp->c = pool_clone (arc.src_values_pool, + v.c, var_get_width (arc.src_vars[i])); + *vpp = vp; + } + } + ok = proc_close (ds); for (i = 0; i < arc.var_cnt; i++) arc.dst_vars[i] = dict_create_var_assert (dataset_dict (ds), @@ -344,33 +367,3 @@ hash_numeric_value (const void *a_, const void *aux UNUSED) return hsh_hash_double (a->f); } - -static bool -autorecode_proc_func (const struct ccase *c, void *arc_, const struct dataset *ds UNUSED) -{ - struct autorecode_pgm *arc = arc_; - size_t i; - - for (i = 0; i < arc->var_cnt; i++) - { - union arc_value v, *vp, **vpp; - - if (var_is_numeric (arc->src_vars[i])) - v.f = case_num (c, arc->src_vars[i]); - else - v.c = (char *) case_str (c, arc->src_vars[i]); - - vpp = (union arc_value **) hsh_probe (arc->src_values[i], &v); - if (*vpp == NULL) - { - vp = pool_alloc (arc->src_values_pool, sizeof *vp); - if (var_is_numeric (arc->src_vars[i])) - vp->f = v.f; - else - vp->c = pool_clone (arc->src_values_pool, - v.c, var_get_width (arc->src_vars[i])); - *vpp = vp; - } - } - return true; -} diff --git a/src/language/stats/flip.c b/src/language/stats/flip.c index b1e9c3b4..594596c0 100644 --- a/src/language/stats/flip.c +++ b/src/language/stats/flip.c @@ -77,6 +77,9 @@ struct flip_pgm struct varname *new_names_tail; /* Last new variable. */ FILE *file; /* Temporary file containing data. */ + union value *input_buf; /* Input buffer for temporary file. */ + size_t cases_read; /* Number of cases already read. */ + bool error; /* Error reading temporary file? */ }; static void destroy_flip_pgm (struct flip_pgm *); @@ -111,6 +114,9 @@ cmd_flip (struct lexer *lexer, struct dataset *ds) flip->new_names_head = NULL; flip->new_names_tail = NULL; flip->file = NULL; + flip->input_buf = NULL; + flip->cases_read = 0; + flip->error = false; lex_match (lexer, '/'); if (lex_match_id (lexer, "VARIABLES")) @@ -516,54 +522,54 @@ flip_source_create (struct flip_pgm *pgm) return create_case_source (&flip_source_class, pgm); } -/* Reads the FLIP stream. Copies each case into C and calls - WRITE_CASE passing WC_DATA. - Returns true if successful, false if an I/O error occurred. */ +/* Reads one case into C. + Returns true if successful, false at end of file or if an + I/O error occurred. */ static bool -flip_source_read (struct case_source *source, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) +flip_source_read (struct case_source *source, struct ccase *c) { struct flip_pgm *flip = source->aux; - union value *input_buf; size_t i; - bool ok = true; - input_buf = xnmalloc (flip->case_cnt, sizeof *input_buf); - for (i = 0; ok && i < flip->var_cnt; i++) - { - size_t j; - - if (fread (input_buf, sizeof *input_buf, flip->case_cnt, - flip->file) != flip->case_cnt) - { - if (ferror (flip->file)) - msg (SE, _("Error reading FLIP temporary file: %s."), - strerror (errno)); - else if (feof (flip->file)) - msg (SE, _("Unexpected end of file reading FLIP temporary file.")); - else - NOT_REACHED (); - ok = false; - break; - } + if (flip->error || flip->cases_read >= flip->var_cnt) + return false; + + if (flip->input_buf == NULL) + flip->input_buf = pool_nmalloc (flip->pool, + flip->case_cnt, sizeof *flip->input_buf); - for (j = 0; j < flip->case_cnt; j++) - case_data_rw_idx (c, j)->f = input_buf[j].f; - ok = write_case (wc_data); + if (fread (flip->input_buf, sizeof *flip->input_buf, flip->case_cnt, + flip->file) != flip->case_cnt) + { + if (ferror (flip->file)) + msg (SE, _("Error reading FLIP temporary file: %s."), + strerror (errno)); + else if (feof (flip->file)) + msg (SE, _("Unexpected end of file reading FLIP temporary file.")); + else + NOT_REACHED (); + flip->error = true; + return false; } - free (input_buf); - return ok; + for (i = 0; i < flip->case_cnt; i++) + case_data_rw_idx (c, i)->f = flip->input_buf[i].f; + + flip->cases_read++; + + return true; } -/* Destroy internal data in SOURCE. */ -static void +/* Destroys the source. + Returns true if successful read, false if an I/O occurred + during destruction or previously. */ +static bool flip_source_destroy (struct case_source *source) { struct flip_pgm *flip = source->aux; - + bool ok = !flip->error; destroy_flip_pgm (flip); + return ok; } static const struct case_source_class flip_source_class = -- 2.30.2