X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fprocedure.c;h=a2ca8b23ee85f8b51285a6c8f61c422911f06638;hb=75862bc63003b33702bfd6844b8a4d1c632488b3;hp=13f907cbed46422dea85cff0b725ae819c4e91b3;hpb=1d35744eb866ac9ca0e9da4b212d4e6d1854643d;p=pspp-builds.git diff --git a/src/data/procedure.c b/src/data/procedure.c index 13f907cb..a2ca8b23 100644 --- a/src/data/procedure.c +++ b/src/data/procedure.c @@ -31,55 +31,37 @@ #include #include #include -#include #include #include -#include #include -#include #include -#include #include #include -#include -#include - -#include "gettext.h" -#define _(msgid) gettext (msgid) - -/* - Virtual File Manager (vfm): - - vfm is used to process data files. It uses the model that - data is read from one stream (the data source), processed, - then written to another (the data sink). The data source is - then deleted and the data sink becomes the data source for the - next procedure. */ /* Procedure execution data. */ struct write_case_data { /* Function to call for each case. */ - bool (*proc_func) (struct ccase *, void *); /* Function. */ - void *aux; /* Auxiliary data. */ + bool (*case_func) (const struct ccase *, void *); + void *aux; struct ccase trns_case; /* Case used for transformations. */ struct ccase sink_case; /* Case written to sink, if - compaction is necessary. */ + compacting is necessary. */ size_t cases_written; /* Cases output so far. */ }; -/* Cases are read from vfm_source, +/* Cases are read from proc_source, pass through permanent_trns_chain (which transforms them into the format described by permanent_dict), - are written to vfm_sink, + are written to proc_sink, pass through temporary_trns_chain (which transforms them into the format described by default_dict), and are finally passed to the procedure. */ -static struct case_source *vfm_source; +static struct case_source *proc_source; static struct trns_chain *permanent_trns_chain; static struct dictionary *permanent_dict; -static struct case_sink *vfm_sink; +static struct case_sink *proc_sink; static struct trns_chain *temporary_trns_chain; struct dictionary *default_dict; @@ -91,8 +73,8 @@ static struct trns_chain *cur_trns_chain; otherwise a null pointer. */ static struct dict_compactor *compactor; -/* Time at which vfm was last invoked. */ -static time_t last_vfm_invocation; +/* Time at which proc was last invoked. */ +static time_t last_proc_invocation; /* Lag queue. */ int n_lag; /* Number of cases to lag. */ @@ -102,11 +84,12 @@ static struct ccase *lag_queue; /* Array of n_lag ccase * elements. */ static void add_case_limit_trns (void); static void add_filter_trns (void); -static void add_process_if_trns (void); -static bool internal_procedure (bool (*proc_func) (struct ccase *, void *), +static bool internal_procedure (bool (*case_func) (const struct ccase *, + void *), + bool (*end_func) (void *), void *aux); -static void update_last_vfm_invocation (void); +static void update_last_proc_invocation (void); static void create_trns_case (struct ccase *, struct dictionary *); static void open_active_file (void); static bool write_case (struct write_case_data *wc_data); @@ -120,10 +103,12 @@ static bool close_active_file (void); time_t time_of_last_procedure (void) { - if (last_vfm_invocation == 0) - update_last_vfm_invocation (); - return last_vfm_invocation; + if (last_proc_invocation == 0) + update_last_proc_invocation (); + return last_proc_invocation; } + +/* Regular procedure. */ /* Reads the data from the input program and writes it to a new active file. For each case we read from the input program, we @@ -141,40 +126,36 @@ time_of_last_procedure (void) Returns true if successful, false if an I/O error occurred. */ bool -procedure (bool (*proc_func) (struct ccase *, void *), void *aux) +procedure (bool (*proc_func) (const struct ccase *, void *), void *aux) { - if (proc_func == NULL - && case_source_is_class (vfm_source, &storage_source_class) - && vfm_sink == NULL - && temporary_trns_chain == NULL - && trns_chain_is_empty (permanent_trns_chain)) - { - expr_free (process_if_expr); - process_if_expr = NULL; - dict_set_case_limit (default_dict, 0); - dict_clear_vectors (default_dict); + return internal_procedure (proc_func, NULL, aux); +} + +/* Multipass procedure. */ - update_last_vfm_invocation (); - return true; - } - else - { - bool ok; - - open_active_file (); - ok = internal_procedure (proc_func, aux); - ok = close_active_file () && ok; +struct multipass_aux_data + { + struct casefile *casefile; + + bool (*proc_func) (const struct casefile *, void *aux); + void *aux; + }; - return ok; - } +/* Case processing function for multipass_procedure(). */ +static bool +multipass_case_func (const struct ccase *c, void *aux_data_) +{ + struct multipass_aux_data *aux_data = aux_data_; + return casefile_append (aux_data->casefile, c); } -/* Callback function for multipass_procedure(). */ +/* End-of-file function for multipass_procedure(). */ static bool -multipass_callback (struct ccase *c, void *cf_) +multipass_end_func (void *aux_data_) { - struct casefile *cf = cf_; - return casefile_append (cf, c); + struct multipass_aux_data *aux_data = aux_data_; + return (aux_data->proc_func == NULL + || aux_data->proc_func (aux_data->casefile, aux_data->aux)); } /* Procedure that allows multiple passes over the input data. @@ -184,75 +165,82 @@ bool multipass_procedure (bool (*proc_func) (const struct casefile *, void *aux), void *aux) { - if (case_source_is_class (vfm_source, &storage_source_class) - && vfm_sink == NULL - && temporary_trns_chain == NULL - && trns_chain_is_empty (permanent_trns_chain)) - { - proc_func (storage_source_get_casefile (vfm_source), aux); - - expr_free (process_if_expr); - process_if_expr = NULL; - dict_set_case_limit (default_dict, 0); - dict_clear_vectors (default_dict); - - update_last_vfm_invocation (); - return true; - } - else - { - struct casefile *cf; - bool ok; - - assert (proc_func != NULL); + struct multipass_aux_data aux_data; + bool ok; - cf = casefile_create (dict_get_next_value_idx (default_dict)); + aux_data.casefile = casefile_create (dict_get_next_value_idx (default_dict)); + aux_data.proc_func = proc_func; + aux_data.aux = aux; - open_active_file (); - ok = internal_procedure (multipass_callback, cf); - ok = proc_func (cf, aux) && ok; - ok = close_active_file () && ok; + ok = internal_procedure (multipass_case_func, multipass_end_func, &aux_data); + ok = !casefile_error (aux_data.casefile) && ok; - casefile_destroy (cf); + casefile_destroy (aux_data.casefile); - return ok; - } + return ok; } + +/* Procedure implementation. */ -/* Executes a procedure, as procedure(), except that the caller - is responsible for calling open_active_file() and - close_active_file(). - Returns true if successful, false if an I/O error occurred. */ +/* Executes a procedure. + Passes each case to CASE_FUNC. + Calls END_FUNC after the last case. + Returns true if successful, false if an I/O error occurred (or + if CASE_FUNC or END_FUNC ever returned false). */ static bool -internal_procedure (bool (*proc_func) (struct ccase *, void *), void *aux) +internal_procedure (bool (*case_func) (const struct ccase *, void *), + bool (*end_func) (void *), + void *aux) { struct write_case_data wc_data; - bool ok; + bool ok = true; - wc_data.proc_func = proc_func; + assert (proc_source != NULL); + + update_last_proc_invocation (); + + /* Optimize the trivial case where we're not going to do + anything with the data, by not reading the data at all. */ + if (case_func == NULL && end_func == NULL + && case_source_is_class (proc_source, &storage_source_class) + && proc_sink == NULL + && (temporary_trns_chain == NULL + || trns_chain_is_empty (temporary_trns_chain)) + && trns_chain_is_empty (permanent_trns_chain)) + { + n_lag = 0; + dict_set_case_limit (default_dict, 0); + dict_clear_vectors (default_dict); + return true; + } + + open_active_file (); + + wc_data.case_func = case_func; wc_data.aux = aux; create_trns_case (&wc_data.trns_case, default_dict); case_create (&wc_data.sink_case, dict_get_next_value_idx (default_dict)); wc_data.cases_written = 0; - update_last_vfm_invocation (); - - ok = (vfm_source == NULL - || vfm_source->class->read (vfm_source, - &wc_data.trns_case, - write_case, &wc_data)); + ok = proc_source->class->read (proc_source, + &wc_data.trns_case, + write_case, &wc_data) && ok; + if (end_func != NULL) + ok = end_func (aux) && ok; case_destroy (&wc_data.sink_case); case_destroy (&wc_data.trns_case); + ok = close_active_file () && ok; + return ok; } -/* Updates last_vfm_invocation. */ +/* Updates last_proc_invocation. */ static void -update_last_vfm_invocation (void) +update_last_proc_invocation (void) { - last_vfm_invocation = time (NULL); + last_proc_invocation = time (NULL); } /* Creates and returns a case, initializing it from the vectors @@ -284,7 +272,6 @@ open_active_file (void) { add_case_limit_trns (); add_filter_trns (); - add_process_if_trns (); /* Finalize transformations. */ trns_chain_finalize (cur_trns_chain); @@ -294,16 +281,16 @@ open_active_file (void) if (permanent_dict == NULL) permanent_dict = default_dict; - /* Figure out compaction. */ - compactor = (dict_needs_compaction (permanent_dict) + /* Figure out whether to compact. */ + compactor = (dict_compacting_would_shrink (permanent_dict) ? dict_make_compactor (permanent_dict) : NULL); /* Prepare sink. */ - if (vfm_sink == NULL) - vfm_sink = create_case_sink (&storage_sink_class, permanent_dict, NULL); - if (vfm_sink->class->open != NULL) - vfm_sink->class->open (vfm_sink); + if (proc_sink == NULL) + proc_sink = create_case_sink (&storage_sink_class, permanent_dict, NULL); + if (proc_sink->class->open != NULL) + proc_sink->class->open (proc_sink); /* Allocate memory for lag queue. */ if (n_lag > 0) @@ -341,16 +328,16 @@ write_case (struct write_case_data *wc_data) /* Write case to replacement active file. */ wc_data->cases_written++; - if (vfm_sink->class->write != NULL) + if (proc_sink->class->write != NULL) { if (compactor != NULL) { dict_compactor_compact (compactor, &wc_data->sink_case, &wc_data->trns_case); - vfm_sink->class->write (vfm_sink, &wc_data->sink_case); + proc_sink->class->write (proc_sink, &wc_data->sink_case); } else - vfm_sink->class->write (vfm_sink, &wc_data->trns_case); + proc_sink->class->write (proc_sink, &wc_data->trns_case); } /* Execute temporary transformations. */ @@ -364,8 +351,8 @@ write_case (struct write_case_data *wc_data) } /* Pass case to procedure. */ - if (wc_data->proc_func != NULL) - if (!wc_data->proc_func (&wc_data->trns_case, wc_data->aux)) + if (wc_data->case_func != NULL) + if (!wc_data->case_func (&wc_data->trns_case, wc_data->aux)) retval = TRNS_ERROR; done: @@ -424,25 +411,24 @@ close_active_file (void) /* Dictionary from before TEMPORARY becomes permanent. */ proc_cancel_temporary_transformations (); - /* Finish compaction. */ + /* Finish compacting. */ if (compactor != NULL) { dict_compactor_destroy (compactor); - dict_compact_values (default_dict); + dict_compact_values (default_dict); + compactor = NULL; } /* Free data source. */ - free_case_source (vfm_source); - vfm_source = NULL; + free_case_source (proc_source); + proc_source = NULL; /* Old data sink becomes new data source. */ - if (vfm_sink->class->make_source != NULL) - vfm_source = vfm_sink->class->make_source (vfm_sink); - free_case_sink (vfm_sink); - vfm_sink = NULL; + if (proc_sink->class->make_source != NULL) + proc_source = proc_sink->class->make_source (proc_sink); + free_case_sink (proc_sink); + proc_sink = NULL; - /* Cancel TEMPORARY, PROCESS IF, FILTER, N OF CASES, vectors, - and get rid of all the transformations. */ dict_clear_vectors (default_dict); permanent_dict = NULL; return proc_cancel_all_transformations (); @@ -467,27 +453,30 @@ lagged_case (int n_before) return NULL; } +/* Procedure that separates the data into SPLIT FILE groups. */ + /* Represents auxiliary data for handling SPLIT FILE. */ struct split_aux_data { - size_t case_count; /* Number of cases so far. */ struct ccase prev_case; /* Data in previous case. */ - /* Functions to call... */ - void (*begin_func) (void *); /* ...before data. */ - bool (*proc_func) (struct ccase *, void *); /* ...with data. */ - void (*end_func) (void *); /* ...after data. */ - void *func_aux; /* Auxiliary data. */ + /* Callback functions. */ + void (*begin_func) (const struct ccase *, void *); + bool (*proc_func) (const struct ccase *, void *); + void (*end_func) (void *); + void *func_aux; }; static int equal_splits (const struct ccase *, const struct ccase *); -static bool procedure_with_splits_callback (struct ccase *, void *); -static void dump_splits (struct ccase *); +static bool split_procedure_case_func (const struct ccase *c, void *); +static bool split_procedure_end_func (void *); /* Like procedure(), but it automatically breaks the case stream into SPLIT FILE break groups. Before each group of cases with - identical SPLIT FILE variable values, BEGIN_FUNC is called. - Then PROC_FUNC is called with each case in the group. + identical SPLIT FILE variable values, BEGIN_FUNC is called + with the first case in the group. + Then PROC_FUNC is called for each case in the group (including + the first). END_FUNC is called when the group is finished. FUNC_AUX is passed to each of the functions as auxiliary data. @@ -500,59 +489,61 @@ static void dump_splits (struct ccase *); Returns true if successful, false if an I/O error occurred. */ bool -procedure_with_splits (void (*begin_func) (void *aux), - bool (*proc_func) (struct ccase *, void *aux), +procedure_with_splits (void (*begin_func) (const struct ccase *, void *aux), + bool (*proc_func) (const struct ccase *, void *aux), void (*end_func) (void *aux), void *func_aux) { struct split_aux_data split_aux; bool ok; - split_aux.case_count = 0; case_nullify (&split_aux.prev_case); split_aux.begin_func = begin_func; split_aux.proc_func = proc_func; split_aux.end_func = end_func; split_aux.func_aux = func_aux; - open_active_file (); - ok = internal_procedure (procedure_with_splits_callback, &split_aux); - if (split_aux.case_count > 0 && end_func != NULL) - end_func (func_aux); - if (!close_active_file ()) - ok = false; + ok = internal_procedure (split_procedure_case_func, + split_procedure_end_func, &split_aux); case_destroy (&split_aux.prev_case); return ok; } -/* procedure() callback used by procedure_with_splits(). */ +/* Case callback used by procedure_with_splits(). */ static bool -procedure_with_splits_callback (struct ccase *c, void *split_aux_) +split_procedure_case_func (const struct ccase *c, void *split_aux_) { struct split_aux_data *split_aux = split_aux_; /* Start a new series if needed. */ - if (split_aux->case_count == 0 + if (case_is_null (&split_aux->prev_case) || !equal_splits (c, &split_aux->prev_case)) { - if (split_aux->case_count > 0 && split_aux->end_func != NULL) + if (!case_is_null (&split_aux->prev_case) && split_aux->end_func != NULL) split_aux->end_func (split_aux->func_aux); - dump_splits (c); case_destroy (&split_aux->prev_case); case_clone (&split_aux->prev_case, c); if (split_aux->begin_func != NULL) - split_aux->begin_func (split_aux->func_aux); + split_aux->begin_func (&split_aux->prev_case, split_aux->func_aux); } - split_aux->case_count++; - if (split_aux->proc_func != NULL) - return split_aux->proc_func (c, split_aux->func_aux); - else - return true; + return (split_aux->proc_func == NULL + || split_aux->proc_func (c, split_aux->func_aux)); +} + +/* End-of-file callback used by procedure_with_splits(). */ +static bool +split_procedure_end_func (void *split_aux_) +{ + struct split_aux_data *split_aux = split_aux_; + + if (!case_is_null (&split_aux->prev_case) && split_aux->end_func != NULL) + split_aux->end_func (split_aux->func_aux); + return true; } /* Compares the SPLIT FILE variables in cases A and B and returns @@ -564,50 +555,10 @@ equal_splits (const struct ccase *a, const struct ccase *b) dict_get_split_vars (default_dict), dict_get_split_cnt (default_dict)) == 0; } - -/* Dumps out the values of all the split variables for the case C. */ -static void -dump_splits (struct ccase *c) -{ - struct variable *const *split; - struct tab_table *t; - size_t split_cnt; - int i; - - split_cnt = dict_get_split_cnt (default_dict); - if (split_cnt == 0) - return; - - t = tab_create (3, split_cnt + 1, 0); - tab_dim (t, tab_natural_dimensions); - tab_vline (t, TAL_GAP, 1, 0, split_cnt); - tab_vline (t, TAL_GAP, 2, 0, split_cnt); - tab_text (t, 0, 0, TAB_NONE, _("Variable")); - tab_text (t, 1, 0, TAB_LEFT, _("Value")); - tab_text (t, 2, 0, TAB_LEFT, _("Label")); - split = dict_get_split_vars (default_dict); - for (i = 0; i < split_cnt; i++) - { - struct variable *v = split[i]; - char temp_buf[80]; - const char *val_lab; - - assert (v->type == NUMERIC || v->type == ALPHA); - tab_text (t, 0, i + 1, TAB_LEFT | TAT_PRINTF, "%s", v->name); - - data_out (temp_buf, &v->print, case_data (c, v->fv)); - - temp_buf[v->print.w] = 0; - tab_text (t, 1, i + 1, TAT_PRINTF, "%.*s", v->print.w, temp_buf); - - val_lab = val_labs_find (v->val_labs, *case_data (c, v->fv)); - if (val_lab) - tab_text (t, 2, i + 1, TAB_LEFT, val_lab); - } - tab_flags (t, SOMF_NO_TITLE); - tab_submit (t); -} +/* Multipass procedure that separates the data into SPLIT FILE + groups. */ + /* Represents auxiliary data for handling SPLIT FILE in a multipass procedure. */ struct multipass_split_aux_data @@ -616,45 +567,40 @@ struct multipass_split_aux_data struct casefile *casefile; /* Accumulates data for a split. */ /* Function to call with the accumulated data. */ - bool (*split_func) (const struct casefile *, void *); + bool (*split_func) (const struct ccase *first, const struct casefile *, + void *); void *func_aux; /* Auxiliary data. */ }; -static bool multipass_split_callback (struct ccase *c, void *aux_); +static bool multipass_split_case_func (const struct ccase *c, void *aux_); +static bool multipass_split_end_func (void *aux_); static bool multipass_split_output (struct multipass_split_aux_data *); /* Returns true if successful, false if an I/O error occurred. */ bool -multipass_procedure_with_splits (bool (*split_func) (const struct casefile *, - void *), - void *func_aux) +multipass_procedure_with_splits (bool (*split_func) (const struct ccase *first, + const struct casefile *, + void *aux), + void *func_aux) { struct multipass_split_aux_data aux; bool ok; - assert (split_func != NULL); - - open_active_file (); - case_nullify (&aux.prev_case); aux.casefile = NULL; aux.split_func = split_func; aux.func_aux = func_aux; - ok = internal_procedure (multipass_split_callback, &aux); - if (aux.casefile != NULL) - ok = multipass_split_output (&aux) && ok; + ok = internal_procedure (multipass_split_case_func, + multipass_split_end_func, &aux); case_destroy (&aux.prev_case); - if (!close_active_file ()) - ok = false; - return ok; } -/* procedure() callback used by multipass_procedure_with_splits(). */ +/* Case callback used by multipass_procedure_with_splits(). */ static bool -multipass_split_callback (struct ccase *c, void *aux_) +multipass_split_case_func (const struct ccase *c, void *aux_) { struct multipass_split_aux_data *aux = aux_; bool ok = true; @@ -662,35 +608,42 @@ multipass_split_callback (struct ccase *c, void *aux_) /* Start a new series if needed. */ if (aux->casefile == NULL || !equal_splits (c, &aux->prev_case)) { + /* Record split values. */ + case_destroy (&aux->prev_case); + case_clone (&aux->prev_case, c); + /* Pass any cases to split_func. */ if (aux->casefile != NULL) ok = multipass_split_output (aux); /* Start a new casefile. */ aux->casefile = casefile_create (dict_get_next_value_idx (default_dict)); - - /* Record split values. */ - dump_splits (c); - case_destroy (&aux->prev_case); - case_clone (&aux->prev_case, c); } return casefile_append (aux->casefile, c) && ok; } +/* End-of-file callback used by multipass_procedure_with_splits(). */ +static bool +multipass_split_end_func (void *aux_) +{ + struct multipass_split_aux_data *aux = aux_; + return (aux->casefile == NULL || multipass_split_output (aux)); +} + static bool multipass_split_output (struct multipass_split_aux_data *aux) { bool ok; assert (aux->casefile != NULL); - ok = aux->split_func (aux->casefile, aux->func_aux); + ok = aux->split_func (&aux->prev_case, aux->casefile, aux->func_aux); casefile_destroy (aux->casefile); aux->casefile = NULL; return ok; } - + /* Discards all the current state in preparation for a data-input command like DATA LIST or GET. */ void @@ -701,18 +654,10 @@ discard_variables (void) n_lag = 0; - if (vfm_source != NULL) - { - free_case_source (vfm_source); - vfm_source = NULL; - } + free_case_source (proc_source); + proc_source = NULL; proc_cancel_all_transformations (); - - expr_free (process_if_expr); - process_if_expr = NULL; - - proc_cancel_temporary_transformations (); } /* Returns the current set of permanent transformations, @@ -854,6 +799,7 @@ void proc_done (void) { discard_variables (); + dict_destroy (default_dict); } /* Sets SINK as the destination for procedure output from the @@ -861,8 +807,8 @@ proc_done (void) void proc_set_sink (struct case_sink *sink) { - assert (vfm_sink == NULL); - vfm_sink = sink; + assert (proc_sink == NULL); + proc_sink = sink; } /* Sets SOURCE as the source for procedure input for the next @@ -870,8 +816,8 @@ proc_set_sink (struct case_sink *sink) void proc_set_source (struct case_source *source) { - assert (vfm_source == NULL); - vfm_source = source; + assert (proc_source == NULL); + proc_source = source; } /* Returns true if a source for the next procedure has been @@ -879,7 +825,7 @@ proc_set_source (struct case_source *source) bool proc_has_source (void) { - return vfm_source != NULL; + return proc_source != NULL; } /* Returns the output from the previous procedure. @@ -893,13 +839,13 @@ proc_capture_output (void) /* Try to make sure that this function is called immediately after procedure() or a similar function. */ - assert (vfm_source != NULL); - assert (case_source_is_class (vfm_source, &storage_source_class)); + assert (proc_source != NULL); + assert (case_source_is_class (proc_source, &storage_source_class)); assert (trns_chain_is_empty (permanent_trns_chain)); assert (!proc_in_temporary_transformations ()); - casefile = storage_source_decapsulate (vfm_source); - vfm_source = NULL; + casefile = storage_source_decapsulate (proc_source); + proc_source = NULL; return casefile; } @@ -974,40 +920,4 @@ filter_trns_proc (void *filter_var_, return (f != 0.0 && !mv_is_num_missing (&filter_var->miss, f) ? TRNS_CONTINUE : TRNS_DROP_CASE); } - -static trns_proc_func process_if_trns_proc; -static trns_free_func process_if_trns_free; -/* Adds a temporary transformation to filter data according to - the expression specified on PROCESS IF, if any. */ -static void -add_process_if_trns (void) -{ - if (process_if_expr != NULL) - { - proc_start_temporary_transformations (); - add_transformation (process_if_trns_proc, process_if_trns_free, - process_if_expr); - process_if_expr = NULL; - } -} - -/* PROCESS IF transformation. */ -static int -process_if_trns_proc (void *expression_, - struct ccase *c UNUSED, int case_nr UNUSED) - -{ - struct expression *expression = expression_; - return (expr_evaluate_num (expression, c, case_nr) == 1.0 - ? TRNS_CONTINUE : TRNS_DROP_CASE); -} - -/* Frees a PROCESS IF transformation. */ -static bool -process_if_trns_free (void *expression_) -{ - struct expression *expression = expression_; - expr_free (expression); - return true; -}