X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdataset.c;h=b94a3675c6668ac4ce4ae6362c7db145d58846f3;hb=7a09f7e0127967c4f04a51f1b9cf91040c515c34;hp=a94fba2c22919c0dfc29ee9746a3a2ab7a25c49a;hpb=2be9bee9da6a2ce27715e58128569594319abfa2;p=pspp diff --git a/src/data/dataset.c b/src/data/dataset.c index a94fba2c22..b94a3675c6 100644 --- a/src/data/dataset.c +++ b/src/data/dataset.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,6 +32,7 @@ #include "data/casewriter.h" #include "data/dictionary.h" #include "data/file-handle-def.h" +#include "data/session.h" #include "data/transformations.h" #include "data/variable.h" #include "libpspp/deque.h" @@ -44,6 +45,15 @@ #include "gl/xalloc.h" struct dataset { + /* A dataset is usually part of a session. Within a session its name must + unique. The name must either be a valid PSPP identifier or the empty + string. (It must be unique within the session even if it is the empty + string; that is, there may only be a single dataset within a session with + the empty string as its name.) */ + struct session *session; + char *name; + enum dataset_display display; + /* Cases are read from source, their transformation variables are initialized, pass through permanent_trns_chain (which transforms them into @@ -60,11 +70,6 @@ struct dataset { struct trns_chain *temporary_trns_chain; struct dictionary *dict; - /* Callback which occurs whenever the transformation chain(s) have - been modified */ - transformation_change_callback_func *xform_callback; - void *xform_callback_aux; - /* If true, cases are discarded instead of being written to sink. */ bool discard_output; @@ -98,13 +103,16 @@ struct dataset { bool ok; /* Error status. */ struct casereader_shim *shim; /* Shim on proc_open() casereader. */ - void (*callback) (void *); /* Callback for when the dataset changes */ + const struct dataset_callbacks *callbacks; void *cb_data; - /* Default encoding for reading syntax files. */ - char *syntax_encoding; -}; /* struct dataset */ + /* Uniquely distinguishes datasets. */ + unsigned int seqno; +}; +static void dataset_changed__ (struct dataset *); +static void dataset_transformations_changed__ (struct dataset *, + bool non_empty); static void add_case_limit_trns (struct dataset *ds); static void add_filter_trns (struct dataset *ds); @@ -112,34 +120,253 @@ static void add_filter_trns (struct dataset *ds); static void update_last_proc_invocation (struct dataset *ds); static void -dataset_set_unsaved (const struct dataset *ds) +dict_callback (struct dictionary *d UNUSED, void *ds_) { - if (ds->callback) ds->callback (ds->cb_data); + struct dataset *ds = ds_; + dataset_changed__ (ds); } - -/* Public functions. */ +static void +dataset_create_finish__ (struct dataset *ds, struct session *session) +{ + static unsigned int seqno; + + dict_set_change_callback (ds->dict, dict_callback, ds); + proc_cancel_all_transformations (ds); + dataset_set_session (ds, session); + ds->seqno = ++seqno; +} + +/* Creates a new dataset named NAME, adds it to SESSION, and returns it. If + SESSION already contains a dataset named NAME, it is deleted and replaced. + The dataset initially has an empty dictionary and no data source. */ +struct dataset * +dataset_create (struct session *session, const char *name) +{ + struct dataset *ds; + + ds = xzalloc (sizeof *ds); + ds->name = xstrdup (name); + ds->display = DATASET_FRONT; + ds->dict = dict_create (get_default_encoding ()); + + ds->caseinit = caseinit_create (); + + dataset_create_finish__ (ds, session); + + return ds; +} + +/* Creates and returns a new dataset that has the same data and dictionary as + OLD named NAME, adds it to the same session as OLD, and returns the new + dataset. If SESSION already contains a dataset named NAME, it is deleted + and replaced. + + OLD must not have any active transformations or temporary state and must + not be in the middle of a procedure. + + Callbacks are not cloned. */ +struct dataset * +dataset_clone (struct dataset *old, const char *name) +{ + struct dataset *new; + + assert (old->proc_state == PROC_COMMITTED); + assert (trns_chain_is_empty (old->permanent_trns_chain)); + assert (old->permanent_dict == NULL); + assert (old->sink == NULL); + assert (old->temporary_trns_chain == NULL); + + new = xzalloc (sizeof *new); + new->name = xstrdup (name); + new->display = DATASET_FRONT; + new->source = casereader_clone (old->source); + new->dict = dict_clone (old->dict); + new->caseinit = caseinit_clone (old->caseinit); + new->last_proc_invocation = old->last_proc_invocation; + new->ok = old->ok; + + dataset_create_finish__ (new, old->session); + return new; +} + +/* Destroys DS. */ void -dataset_set_callback (struct dataset *ds, void (*cb) (void *), void *cb_data) +dataset_destroy (struct dataset *ds) { - ds->callback = cb; - ds->cb_data = cb_data; + if (ds != NULL) + { + dataset_set_session (ds, NULL); + dataset_clear (ds); + dict_unref (ds->dict); + caseinit_destroy (ds->caseinit); + trns_chain_destroy (ds->permanent_trns_chain); + dataset_transformations_changed__ (ds, false); + free (ds->name); + free (ds); + } } +/* Discards the active dataset's dictionary, data, and transformations. */ void -dataset_set_default_syntax_encoding (struct dataset *ds, const char *encoding) +dataset_clear (struct dataset *ds) { - free (ds->syntax_encoding); - ds->syntax_encoding = xstrdup (encoding); + assert (ds->proc_state == PROC_COMMITTED); + + dict_clear (ds->dict); + fh_set_default_handle (NULL); + + ds->n_lag = 0; + + casereader_destroy (ds->source); + ds->source = NULL; + + proc_cancel_all_transformations (ds); } const char * -dataset_get_default_syntax_encoding (const struct dataset *ds) +dataset_name (const struct dataset *ds) +{ + return ds->name; +} + +void +dataset_set_name (struct dataset *ds, const char *name) +{ + struct session *session = ds->session; + bool active = false; + + if (session != NULL) + { + active = session_active_dataset (session) == ds; + if (active) + session_set_active_dataset (session, NULL); + dataset_set_session (ds, NULL); + } + + free (ds->name); + ds->name = xstrdup (name); + + if (session != NULL) + { + dataset_set_session (ds, session); + if (active) + session_set_active_dataset (session, ds); + } +} + +struct session * +dataset_session (const struct dataset *ds) +{ + return ds->session; +} + +void +dataset_set_session (struct dataset *ds, struct session *session) +{ + if (session != ds->session) + { + if (ds->session != NULL) + session_remove_dataset (ds->session, ds); + if (session != NULL) + session_add_dataset (session, ds); + } +} + +/* Returns the dictionary within DS. This is always nonnull, although it + might not contain any variables. */ +struct dictionary * +dataset_dict (const struct dataset *ds) +{ + return ds->dict; +} + +/* Replaces DS's dictionary by DICT, discarding any source and + transformations. */ +void +dataset_set_dict (struct dataset *ds, struct dictionary *dict) +{ + assert (ds->proc_state == PROC_COMMITTED); + assert (ds->dict != dict); + + dataset_clear (ds); + + dict_unref (ds->dict); + ds->dict = dict; + dict_set_change_callback (ds->dict, dict_callback, ds); +} + +/* Returns the casereader that will be read when a procedure is executed on + DS. This can be NULL if none has been set up yet. */ +const struct casereader * +dataset_source (const struct dataset *ds) +{ + return ds->source; +} + +/* Returns true if DS has a data source, false otherwise. */ +bool +dataset_has_source (const struct dataset *ds) +{ + return dataset_source (ds) != NULL; +} + +/* Replaces the active dataset's data by READER. READER's cases must have an + appropriate format for DS's dictionary. */ +bool +dataset_set_source (struct dataset *ds, struct casereader *reader) +{ + casereader_destroy (ds->source); + ds->source = reader; + + caseinit_clear (ds->caseinit); + caseinit_mark_as_preinited (ds->caseinit, ds->dict); + + return reader == NULL || !casereader_error (reader); +} + +/* Returns the data source from DS and removes it from DS. Returns a null + pointer if DS has no data source. */ +struct casereader * +dataset_steal_source (struct dataset *ds) +{ + struct casereader *reader = ds->source; + ds->source = NULL; + + return reader; +} + +/* Returns a number unique to DS. It can be used to distinguish one dataset + from any other within a given program run, even datasets that do not exist + at the same time. */ +unsigned int +dataset_seqno (const struct dataset *ds) { - return ds->syntax_encoding; + return ds->seqno; } +void +dataset_set_callbacks (struct dataset *ds, + const struct dataset_callbacks *callbacks, + void *cb_data) +{ + ds->callbacks = callbacks; + ds->cb_data = cb_data; +} + +enum dataset_display +dataset_get_display (const struct dataset *ds) +{ + return ds->display; +} + +void +dataset_set_display (struct dataset *ds, enum dataset_display display) +{ + ds->display = display; +} + /* Returns the last time the data was read. */ time_t time_of_last_procedure (struct dataset *ds) @@ -314,7 +541,7 @@ proc_casereader_read (struct casereader *reader UNUSED, void *ds_) ds->lag_cases[deque_push_front (&ds->lag)] = case_ref (c); } - /* Write case to replacement active file. */ + /* Write case to replacement dataset. */ ds->cases_written++; if (ds->sink != NULL) casewriter_write (ds->sink, @@ -347,20 +574,20 @@ proc_casereader_destroy (struct casereader *reader, void *ds_) /* Make sure transformations happen for every input case, in case they have side effects, and ensure that the replacement - active file gets all the cases it should. */ + active dataset gets all the cases it should. */ while ((c = casereader_read (reader)) != NULL) case_unref (c); ds->proc_state = PROC_CLOSED; ds->ok = casereader_destroy (ds->source) && ds->ok; ds->source = NULL; - proc_set_active_file_data (ds, NULL); + dataset_set_source (ds, NULL); } /* Must return false if the source casereader, a transformation, or the sink casewriter signaled an error. (If a temporary transformation signals an error, then the return value is - false, but the replacement active file may still be + false, but the replacement active dataset may still be untainted.) */ bool proc_commit (struct dataset *ds) @@ -371,7 +598,7 @@ proc_commit (struct dataset *ds) assert (ds->proc_state == PROC_CLOSED); ds->proc_state = PROC_COMMITTED; - dataset_set_unsaved (ds); + dataset_changed__ (ds); /* Free memory for lagged cases. */ while (!deque_is_empty (&ds->lag)) @@ -453,9 +680,7 @@ proc_capture_transformations (struct dataset *ds) assert (ds->temporary_trns_chain == NULL); chain = ds->permanent_trns_chain; ds->cur_trns_chain = ds->permanent_trns_chain = trns_chain_create (); - - if ( ds->xform_callback) - ds->xform_callback (false, ds->xform_callback_aux); + dataset_transformations_changed__ (ds, false); return chain; } @@ -467,8 +692,7 @@ void add_transformation (struct dataset *ds, trns_proc_func *proc, trns_free_func *free, void *aux) { trns_chain_append (ds->cur_trns_chain, NULL, proc, free, aux); - if ( ds->xform_callback) - ds->xform_callback (true, ds->xform_callback_aux); + dataset_transformations_changed__ (ds, true); } /* Adds a transformation that processes a case with PROC and @@ -483,9 +707,7 @@ add_transformation_with_finalizer (struct dataset *ds, trns_free_func *free, void *aux) { trns_chain_append (ds->cur_trns_chain, finalize, proc, free, aux); - - if ( ds->xform_callback) - ds->xform_callback (true, ds->xform_callback_aux); + dataset_transformations_changed__ (ds, true); } /* Returns the index of the next transformation. @@ -520,15 +742,17 @@ proc_start_temporary_transformations (struct dataset *ds) trns_chain_finalize (ds->permanent_trns_chain); ds->temporary_trns_chain = ds->cur_trns_chain = trns_chain_create (); - - if ( ds->xform_callback) - ds->xform_callback (true, ds->xform_callback_aux); + dataset_transformations_changed__ (ds, true); } } -/* Converts all the temporary transformations, if any, to - permanent transformations. Further transformations will be - permanent. +/* Converts all the temporary transformations, if any, to permanent + transformations. Further transformations will be permanent. + + The FILTER command is implemented as a temporary transformation, so a + procedure that uses this function should usually use proc_open_filtering() + with FILTER false, instead of plain proc_open(). + Returns true if anything changed, false otherwise. */ bool proc_make_temporary_transformations_permanent (struct dataset *ds) @@ -539,7 +763,9 @@ proc_make_temporary_transformations_permanent (struct dataset *ds) trns_chain_splice (ds->permanent_trns_chain, ds->temporary_trns_chain); ds->temporary_trns_chain = NULL; - dict_destroy (ds->permanent_dict); + ds->cur_trns_chain = ds->permanent_trns_chain; + + dict_unref (ds->permanent_dict); ds->permanent_dict = NULL; return true; @@ -556,17 +782,14 @@ proc_cancel_temporary_transformations (struct dataset *ds) { if (proc_in_temporary_transformations (ds)) { - dict_destroy (ds->dict); + dict_unref (ds->dict); ds->dict = ds->permanent_dict; ds->permanent_dict = NULL; trns_chain_destroy (ds->temporary_trns_chain); ds->temporary_trns_chain = NULL; - - if ( ds->xform_callback) - ds->xform_callback (!trns_chain_is_empty (ds->permanent_trns_chain), - ds->xform_callback_aux); - + dataset_transformations_changed__ ( + ds, !trns_chain_is_empty (ds->permanent_trns_chain)); return true; } else @@ -584,65 +807,44 @@ proc_cancel_all_transformations (struct dataset *ds) ok = trns_chain_destroy (ds->temporary_trns_chain) && ok; ds->permanent_trns_chain = ds->cur_trns_chain = trns_chain_create (); ds->temporary_trns_chain = NULL; - if ( ds->xform_callback) - ds->xform_callback (false, ds->xform_callback_aux); + dataset_transformations_changed__ (ds, false); return ok; } - -static void -dict_callback (struct dictionary *d UNUSED, void *ds_) +static int +store_case_num (void *var_, struct ccase **cc, casenumber case_num) { - struct dataset *ds = ds_; - dataset_set_unsaved (ds); -} + struct variable *var = var_; -/* Initializes procedure handling. */ -struct dataset * -create_dataset (void) -{ - struct dataset *ds = xzalloc (sizeof(*ds)); - ds->dict = dict_create (); + *cc = case_unshare (*cc); + case_data_rw (*cc, var)->f = case_num; - dict_set_change_callback (ds->dict, dict_callback, ds); - - dict_set_encoding (ds->dict, get_default_encoding ()); - - ds->caseinit = caseinit_create (); - proc_cancel_all_transformations (ds); - - ds->syntax_encoding = xstrdup ("Auto"); - - return ds; + return TRNS_CONTINUE; } - -void -dataset_add_transform_change_callback (struct dataset *ds, - transformation_change_callback_func *cb, - void *aux) +/* Add a variable which we can sort by to get back the original order. */ +struct variable * +add_permanent_ordering_transformation (struct dataset *ds) { - ds->xform_callback = cb; - ds->xform_callback_aux = aux; -} + struct variable *temp_var; -/* Finishes up procedure handling. */ -void -destroy_dataset (struct dataset *ds) -{ - proc_discard_active_file (ds); - dict_destroy (ds->dict); - caseinit_destroy (ds->caseinit); - trns_chain_destroy (ds->permanent_trns_chain); + temp_var = dict_create_var_assert (ds->dict, "$ORDER", 0); + if (proc_in_temporary_transformations (ds)) + { + struct variable *perm_var; - if ( ds->xform_callback) - ds->xform_callback (false, ds->xform_callback_aux); + perm_var = dict_clone_var_in_place_assert (ds->permanent_dict, temp_var); + trns_chain_append (ds->permanent_trns_chain, NULL, store_case_num, + NULL, perm_var); + trns_chain_finalize (ds->permanent_trns_chain); + } + else + add_transformation (ds, store_case_num, NULL, temp_var); - free (ds->syntax_encoding); - free (ds); + return temp_var; } - + /* Causes output from the next procedure to be discarded, instead of being preserved for use as input for the next procedure. */ void @@ -651,77 +853,8 @@ proc_discard_output (struct dataset *ds) ds->discard_output = true; } -/* Discards the active file dictionary, data, and - transformations. */ -void -proc_discard_active_file (struct dataset *ds) -{ - assert (ds->proc_state == PROC_COMMITTED); - - dict_clear (ds->dict); - fh_set_default_handle (NULL); - - ds->n_lag = 0; - - casereader_destroy (ds->source); - ds->source = NULL; - - proc_cancel_all_transformations (ds); -} - -/* Sets SOURCE as the source for procedure input for the next - procedure. */ -void -proc_set_active_file (struct dataset *ds, - struct casereader *source, - struct dictionary *dict) -{ - assert (ds->proc_state == PROC_COMMITTED); - assert (ds->dict != dict); - - proc_discard_active_file (ds); - - dict_destroy (ds->dict); - ds->dict = dict; - dict_set_change_callback (ds->dict, dict_callback, ds); - proc_set_active_file_data (ds, source); -} - -/* Replaces the active file's data by READER without replacing - the associated dictionary. */ -bool -proc_set_active_file_data (struct dataset *ds, struct casereader *reader) -{ - casereader_destroy (ds->source); - ds->source = reader; - - caseinit_clear (ds->caseinit); - caseinit_mark_as_preinited (ds->caseinit, ds->dict); - - return reader == NULL || !casereader_error (reader); -} - -/* Returns true if an active file data source is available, false - otherwise. */ -bool -proc_has_active_file (const struct dataset *ds) -{ - return ds->source != NULL; -} - -/* Returns the active file data source from DS, or a null pointer - if DS has no data source, and removes it from DS. */ -struct casereader * -proc_extract_active_file_data (struct dataset *ds) -{ - struct casereader *reader = ds->source; - ds->source = NULL; - - return reader; -} - -/* Checks whether DS has a corrupted active file. If so, +/* Checks whether DS has a corrupted active dataset. If so, discards it and returns false. If not, returns true without doing anything. */ bool @@ -731,7 +864,7 @@ dataset_end_of_command (struct dataset *ds) { if (casereader_error (ds->source)) { - proc_discard_active_file (ds); + dataset_clear (ds); return false; } else @@ -806,7 +939,7 @@ add_filter_trns (struct dataset *ds) /* FILTER transformation. */ static int filter_trns_proc (void *filter_var_, - struct ccase **c UNUSED, casenumber case_nr UNUSED) + struct ccase **c, casenumber case_nr UNUSED) { struct variable *filter_var = filter_var_; @@ -816,20 +949,30 @@ filter_trns_proc (void *filter_var_, } -struct dictionary * -dataset_dict (const struct dataset *ds) +void +dataset_need_lag (struct dataset *ds, int n_before) { - return ds->dict; + ds->n_lag = MAX (ds->n_lag, n_before); +} + +static void +dataset_changed__ (struct dataset *ds) +{ + if (ds->callbacks != NULL && ds->callbacks->changed != NULL) + ds->callbacks->changed (ds->cb_data); } -const struct casereader * -dataset_source (const struct dataset *ds) +static void +dataset_transformations_changed__ (struct dataset *ds, bool non_empty) { - return ds->source; + if (ds->callbacks != NULL && ds->callbacks->transformations_changed != NULL) + ds->callbacks->transformations_changed (non_empty, ds->cb_data); } + +/* Private interface for use by session code. */ void -dataset_need_lag (struct dataset *ds, int n_before) +dataset_set_session__ (struct dataset *ds, struct session *session) { - ds->n_lag = MAX (ds->n_lag, n_before); + ds->session = session; }