X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdataset.c;h=cbff74088b62d470c924d27e9e5f3c6f6a9dceff;hb=349a2ee8f7e74c49e178364fa783303666b1141a;hp=5d0598e3a6949444c65acfd9b7d71861a7987941;hpb=b401615e6db40bf74394839b96600afe3a868a95;p=pspp diff --git a/src/data/dataset.c b/src/data/dataset.c index 5d0598e3a6..cbff74088b 100644 --- a/src/data/dataset.c +++ b/src/data/dataset.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,6 +32,7 @@ #include "data/casewriter.h" #include "data/dictionary.h" #include "data/file-handle-def.h" +#include "data/session.h" #include "data/transformations.h" #include "data/variable.h" #include "libpspp/deque.h" @@ -44,6 +45,15 @@ #include "gl/xalloc.h" struct dataset { + /* A dataset is usually part of a session. Within a session its name must + unique. The name must either be a valid PSPP identifier or the empty + string. (It must be unique within the session even if it is the empty + string; that is, there may only be a single dataset within a session with + the empty string as its name.) */ + struct session *session; + char *name; + enum dataset_display display; + /* Cases are read from source, their transformation variables are initialized, pass through permanent_trns_chain (which transforms them into @@ -54,20 +64,22 @@ struct dataset { and are finally passed to the procedure. */ struct casereader *source; struct caseinit *caseinit; - struct trns_chain *permanent_trns_chain; + struct trns_chain permanent_trns_chain; struct dictionary *permanent_dict; struct casewriter *sink; - struct trns_chain *temporary_trns_chain; + struct trns_chain temporary_trns_chain; + bool temporary; struct dictionary *dict; + /* Stack of transformation chains for DO IF and LOOP and INPUT PROGRAM. */ + struct trns_chain *stack; + size_t n_stack; + size_t allocated_stack; + /* If true, cases are discarded instead of being written to sink. */ bool discard_output; - /* The transformation chain that the next transformation will be - added to. */ - struct trns_chain *cur_trns_chain; - /* The case map used to compact a case, if necessary; otherwise a null pointer. */ struct case_map *compactor; @@ -96,8 +108,8 @@ struct dataset { const struct dataset_callbacks *callbacks; void *cb_data; - /* Default encoding for reading syntax files. */ - char *syntax_encoding; + /* Uniquely distinguishes datasets. */ + unsigned int seqno; }; static void dataset_changed__ (struct dataset *); @@ -116,36 +128,87 @@ dict_callback (struct dictionary *d UNUSED, void *ds_) dataset_changed__ (ds); } -/* Creates and returns a new dataset. The dataset initially has an empty - dictionary and no data source. */ -struct dataset * -dataset_create (void) +static void +dataset_create_finish__ (struct dataset *ds, struct session *session) { - struct dataset *ds; + static unsigned int seqno; - ds = xzalloc (sizeof *ds); - ds->dict = dict_create (); dict_set_change_callback (ds->dict, dict_callback, ds); - dict_set_encoding (ds->dict, get_default_encoding ()); - - ds->caseinit = caseinit_create (); proc_cancel_all_transformations (ds); - ds->syntax_encoding = xstrdup ("Auto"); + dataset_set_session (ds, session); + ds->seqno = ++seqno; +} + +/* Creates a new dataset named NAME, adds it to SESSION, and returns it. If + SESSION already contains a dataset named NAME, it is deleted and replaced. + The dataset initially has an empty dictionary and no data source. */ +struct dataset * +dataset_create (struct session *session, const char *name) +{ + struct dataset *ds = XMALLOC (struct dataset); + *ds = (struct dataset) { + .name = xstrdup (name), + .display = DATASET_FRONT, + .dict = dict_create (get_default_encoding ()), + .caseinit = caseinit_create (), + }; + dataset_create_finish__ (ds, session); + return ds; } +/* Creates and returns a new dataset that has the same data and dictionary as + OLD named NAME, adds it to the same session as OLD, and returns the new + dataset. If SESSION already contains a dataset named NAME, it is deleted + and replaced. + + OLD must not have any active transformations or temporary state and must + not be in the middle of a procedure. + + Callbacks are not cloned. */ +struct dataset * +dataset_clone (struct dataset *old, const char *name) +{ + struct dataset *new; + + assert (old->proc_state == PROC_COMMITTED); + assert (!old->permanent_trns_chain.n); + assert (old->permanent_dict == NULL); + assert (old->sink == NULL); + assert (!old->temporary); + assert (!old->temporary_trns_chain.n); + + new = xzalloc (sizeof *new); + new->name = xstrdup (name); + new->display = DATASET_FRONT; + new->source = casereader_clone (old->source); + new->dict = dict_clone (old->dict); + new->caseinit = caseinit_clone (old->caseinit); + new->last_proc_invocation = old->last_proc_invocation; + new->ok = old->ok; + + dataset_create_finish__ (new, old->session); + + return new; +} + /* Destroys DS. */ void dataset_destroy (struct dataset *ds) { if (ds != NULL) { + dataset_set_session (ds, NULL); dataset_clear (ds); - dict_destroy (ds->dict); + dict_unref (ds->dict); + dict_unref (ds->permanent_dict); caseinit_destroy (ds->caseinit); - trns_chain_destroy (ds->permanent_trns_chain); + trns_chain_uninit (&ds->permanent_trns_chain); + for (size_t i = 0; i < ds->n_stack; i++) + trns_chain_uninit (&ds->stack[i]); + free (ds->stack); dataset_transformations_changed__ (ds, false); - free (ds->syntax_encoding); + free (ds->name); free (ds); } } @@ -167,6 +230,55 @@ dataset_clear (struct dataset *ds) proc_cancel_all_transformations (ds); } +const char * +dataset_name (const struct dataset *ds) +{ + return ds->name; +} + +void +dataset_set_name (struct dataset *ds, const char *name) +{ + struct session *session = ds->session; + bool active = false; + + if (session != NULL) + { + active = session_active_dataset (session) == ds; + if (active) + session_set_active_dataset (session, NULL); + dataset_set_session (ds, NULL); + } + + free (ds->name); + ds->name = xstrdup (name); + + if (session != NULL) + { + dataset_set_session (ds, session); + if (active) + session_set_active_dataset (session, ds); + } +} + +struct session * +dataset_session (const struct dataset *ds) +{ + return ds->session; +} + +void +dataset_set_session (struct dataset *ds, struct session *session) +{ + if (session != ds->session) + { + if (ds->session != NULL) + session_remove_dataset (ds->session, ds); + if (session != NULL) + session_add_dataset (session, ds); + } +} + /* Returns the dictionary within DS. This is always nonnull, although it might not contain any variables. */ struct dictionary * @@ -185,7 +297,7 @@ dataset_set_dict (struct dataset *ds, struct dictionary *dict) dataset_clear (ds); - dict_destroy (ds->dict); + dict_unref (ds->dict); ds->dict = dict; dict_set_change_callback (ds->dict, dict_callback, ds); } @@ -230,6 +342,15 @@ dataset_steal_source (struct dataset *ds) return reader; } +/* Returns a number unique to DS. It can be used to distinguish one dataset + from any other within a given program run, even datasets that do not exist + at the same time. */ +unsigned int +dataset_seqno (const struct dataset *ds) +{ + return ds->seqno; +} + void dataset_set_callbacks (struct dataset *ds, const struct dataset_callbacks *callbacks, @@ -239,23 +360,24 @@ dataset_set_callbacks (struct dataset *ds, ds->cb_data = cb_data; } -void -dataset_set_default_syntax_encoding (struct dataset *ds, const char *encoding) +enum dataset_display +dataset_get_display (const struct dataset *ds) { - free (ds->syntax_encoding); - ds->syntax_encoding = xstrdup (encoding); + return ds->display; } -const char * -dataset_get_default_syntax_encoding (const struct dataset *ds) +void +dataset_set_display (struct dataset *ds, enum dataset_display display) { - return ds->syntax_encoding; + ds->display = display; } /* Returns the last time the data was read. */ time_t time_of_last_procedure (struct dataset *ds) { + if (!ds) + return time (NULL); if (ds->last_proc_invocation == 0) update_last_proc_invocation (ds); return ds->last_proc_invocation; @@ -272,9 +394,8 @@ proc_execute (struct dataset *ds) { bool ok; - if ((ds->temporary_trns_chain == NULL - || trns_chain_is_empty (ds->temporary_trns_chain)) - && trns_chain_is_empty (ds->permanent_trns_chain)) + if ((!ds->temporary || !ds->temporary_trns_chain.n) + && !ds->permanent_trns_chain.n) { ds->n_lag = 0; ds->discard_output = false; @@ -311,7 +432,6 @@ proc_open_filtering (struct dataset *ds, bool filter) add_case_limit_trns (ds); if (filter) add_filter_trns (ds); - trns_chain_finalize (ds->cur_trns_chain); /* Make permanent_dict refer to the dictionary right before data reaches the sink. */ @@ -322,8 +442,8 @@ proc_open_filtering (struct dataset *ds, bool filter) if (!ds->discard_output) { struct dictionary *pd = ds->permanent_dict; - size_t compacted_value_cnt = dict_count_values (pd, 1u << DC_SCRATCH); - if (compacted_value_cnt < dict_get_next_value_idx (pd)) + size_t compacted_n_values = dict_count_values (pd, 1u << DC_SCRATCH); + if (compacted_n_values < dict_get_next_value_idx (pd)) { struct caseproto *compacted_proto; compacted_proto = dict_get_compacted_proto (pd, 1u << DC_SCRATCH); @@ -395,8 +515,6 @@ proc_casereader_read (struct casereader *reader UNUSED, void *ds_) assert (ds->proc_state == PROC_OPEN); for (; ; case_unref (c)) { - casenumber case_nr; - assert (retval == TRNS_DROP_CASE || retval == TRNS_ERROR); if (retval == TRNS_ERROR) ds->ok = false; @@ -411,9 +529,8 @@ proc_casereader_read (struct casereader *reader UNUSED, void *ds_) caseinit_init_vars (ds->caseinit, c); /* Execute permanent transformations. */ - case_nr = ds->cases_written + 1; - retval = trns_chain_execute (ds->permanent_trns_chain, TRNS_CONTINUE, - &c, case_nr); + casenumber case_nr = ds->cases_written + 1; + retval = trns_chain_execute (&ds->permanent_trns_chain, case_nr, &c); caseinit_update_left_vars (ds->caseinit, c); if (retval != TRNS_CONTINUE) continue; @@ -433,10 +550,10 @@ proc_casereader_read (struct casereader *reader UNUSED, void *ds_) case_map_execute (ds->compactor, case_ref (c))); /* Execute temporary transformations. */ - if (ds->temporary_trns_chain != NULL) + if (ds->temporary_trns_chain.n) { - retval = trns_chain_execute (ds->temporary_trns_chain, TRNS_CONTINUE, - &c, ds->cases_written); + retval = trns_chain_execute (&ds->temporary_trns_chain, + ds->cases_written, &c); if (retval != TRNS_CONTINUE) continue; } @@ -554,63 +671,26 @@ lagged_case (const struct dataset *ds, int n_before) return NULL; } -/* Returns the current set of permanent transformations, - and clears the permanent transformations. - For use by INPUT PROGRAM. */ -struct trns_chain * -proc_capture_transformations (struct dataset *ds) -{ - struct trns_chain *chain; - - assert (ds->temporary_trns_chain == NULL); - chain = ds->permanent_trns_chain; - ds->cur_trns_chain = ds->permanent_trns_chain = trns_chain_create (); - dataset_transformations_changed__ (ds, false); - - return chain; -} - -/* Adds a transformation that processes a case with PROC and - frees itself with FREE to the current set of transformations. - The functions are passed AUX as auxiliary data. */ +/* Adds TRNS to the current set of transformations. */ void -add_transformation (struct dataset *ds, trns_proc_func *proc, trns_free_func *free, void *aux) -{ - trns_chain_append (ds->cur_trns_chain, NULL, proc, free, aux); +add_transformation (struct dataset *ds, + const struct trns_class *class, void *aux) +{ + struct trns_chain *chain = (ds->n_stack > 0 ? &ds->stack[ds->n_stack - 1] + : ds->temporary ? &ds->temporary_trns_chain + : &ds->permanent_trns_chain); + struct transformation t = { .class = class, .aux = aux }; + trns_chain_append (chain, &t); dataset_transformations_changed__ (ds, true); } -/* Adds a transformation that processes a case with PROC and - frees itself with FREE to the current set of transformations. - When parsing of the block of transformations is complete, - FINALIZE will be called. - The functions are passed AUX as auxiliary data. */ -void -add_transformation_with_finalizer (struct dataset *ds, - trns_finalize_func *finalize, - trns_proc_func *proc, - trns_free_func *free, void *aux) -{ - trns_chain_append (ds->cur_trns_chain, finalize, proc, free, aux); - dataset_transformations_changed__ (ds, true); -} - -/* Returns the index of the next transformation. - This value can be returned by a transformation procedure - function to indicate a "jump" to that transformation. */ -size_t -next_transformation (const struct dataset *ds) -{ - return trns_chain_next (ds->cur_trns_chain); -} - /* Returns true if the next call to add_transformation() will add a temporary transformation, false if it will add a permanent transformation. */ bool proc_in_temporary_transformations (const struct dataset *ds) { - return ds->temporary_trns_chain != NULL; + return ds->temporary; } /* Marks the start of temporary transformations. @@ -625,26 +705,29 @@ proc_start_temporary_transformations (struct dataset *ds) ds->permanent_dict = dict_clone (ds->dict); - trns_chain_finalize (ds->permanent_trns_chain); - ds->temporary_trns_chain = ds->cur_trns_chain = trns_chain_create (); + ds->temporary = true; dataset_transformations_changed__ (ds, true); } } -/* Converts all the temporary transformations, if any, to - permanent transformations. Further transformations will be - permanent. +/* Converts all the temporary transformations, if any, to permanent + transformations. Further transformations will be permanent. + + The FILTER command is implemented as a temporary transformation, so a + procedure that uses this function should usually use proc_open_filtering() + with FILTER false, instead of plain proc_open(). + Returns true if anything changed, false otherwise. */ bool proc_make_temporary_transformations_permanent (struct dataset *ds) { if (proc_in_temporary_transformations (ds)) { - trns_chain_finalize (ds->temporary_trns_chain); - trns_chain_splice (ds->permanent_trns_chain, ds->temporary_trns_chain); - ds->temporary_trns_chain = NULL; + trns_chain_splice (&ds->permanent_trns_chain, &ds->temporary_trns_chain); + + ds->temporary = false; - dict_destroy (ds->permanent_dict); + dict_unref (ds->permanent_dict); ds->permanent_dict = NULL; return true; @@ -661,14 +744,13 @@ proc_cancel_temporary_transformations (struct dataset *ds) { if (proc_in_temporary_transformations (ds)) { - dict_destroy (ds->dict); + dict_unref (ds->dict); ds->dict = ds->permanent_dict; ds->permanent_dict = NULL; - trns_chain_destroy (ds->temporary_trns_chain); - ds->temporary_trns_chain = NULL; - dataset_transformations_changed__ ( - ds, !trns_chain_is_empty (ds->permanent_trns_chain)); + trns_chain_clear (&ds->temporary_trns_chain); + + dataset_transformations_changed__ (ds, ds->permanent_trns_chain.n != 0); return true; } else @@ -682,14 +764,63 @@ proc_cancel_all_transformations (struct dataset *ds) { bool ok; assert (ds->proc_state == PROC_COMMITTED); - ok = trns_chain_destroy (ds->permanent_trns_chain); - ok = trns_chain_destroy (ds->temporary_trns_chain) && ok; - ds->permanent_trns_chain = ds->cur_trns_chain = trns_chain_create (); - ds->temporary_trns_chain = NULL; + ok = trns_chain_clear (&ds->permanent_trns_chain); + ok = trns_chain_clear (&ds->temporary_trns_chain) && ok; + ds->temporary = false; + for (size_t i = 0; i < ds->n_stack; i++) + ok = trns_chain_uninit (&ds->stack[i]) && ok; + ds->n_stack = 0; dataset_transformations_changed__ (ds, false); return ok; } + +void +proc_push_transformations (struct dataset *ds) +{ + if (ds->n_stack >= ds->allocated_stack) + ds->stack = x2nrealloc (ds->stack, &ds->allocated_stack, + sizeof *ds->stack); + trns_chain_init (&ds->stack[ds->n_stack++]); +} + +void +proc_pop_transformations (struct dataset *ds, struct trns_chain *chain) +{ + assert (ds->n_stack > 0); + *chain = ds->stack[--ds->n_stack]; +} + +static enum trns_result +store_case_num (void *var_, struct ccase **cc, casenumber case_num) +{ + struct variable *var = var_; + + *cc = case_unshare (*cc); + *case_num_rw (*cc, var) = case_num; + + return TRNS_CONTINUE; +} + +/* Add a variable which we can sort by to get back the original order. */ +struct variable * +add_permanent_ordering_transformation (struct dataset *ds) +{ + struct variable *temp_var = dict_create_var_assert (ds->dict, "$ORDER", 0); + struct variable *order_var + = (proc_in_temporary_transformations (ds) + ? dict_clone_var_in_place_assert (ds->permanent_dict, temp_var) + : temp_var); + + static const struct trns_class trns_class = { + .name = "ordering", + .execute = store_case_num + }; + const struct transformation t = { .class = &trns_class, .aux = order_var }; + trns_chain_append (&ds->permanent_trns_chain, &t); + + return temp_var; +} /* Causes output from the next procedure to be discarded, instead of being preserved for use as input for the next procedure. */ @@ -723,28 +854,9 @@ dataset_end_of_command (struct dataset *ds) return true; } -static trns_proc_func case_limit_trns_proc; -static trns_free_func case_limit_trns_free; - -/* Adds a transformation that limits the number of cases that may - pass through, if DS->DICT has a case limit. */ -static void -add_case_limit_trns (struct dataset *ds) -{ - casenumber case_limit = dict_get_case_limit (ds->dict); - if (case_limit != 0) - { - casenumber *cases_remaining = xmalloc (sizeof *cases_remaining); - *cases_remaining = case_limit; - add_transformation (ds, case_limit_trns_proc, case_limit_trns_free, - cases_remaining); - dict_set_case_limit (ds->dict, 0); - } -} - /* Limits the maximum number of cases processed to *CASES_REMAINING. */ -static int +static enum trns_result case_limit_trns_proc (void *cases_remaining_, struct ccase **c UNUSED, casenumber case_nr UNUSED) { @@ -766,34 +878,59 @@ case_limit_trns_free (void *cases_remaining_) free (cases_remaining); return true; } - -static trns_proc_func filter_trns_proc; -/* Adds a temporary transformation to filter data according to - the variable specified on FILTER, if any. */ +/* Adds a transformation that limits the number of cases that may + pass through, if DS->DICT has a case limit. */ static void -add_filter_trns (struct dataset *ds) +add_case_limit_trns (struct dataset *ds) { - struct variable *filter_var = dict_get_filter (ds->dict); - if (filter_var != NULL) + casenumber case_limit = dict_get_case_limit (ds->dict); + if (case_limit != 0) { - proc_start_temporary_transformations (ds); - add_transformation (ds, filter_trns_proc, NULL, filter_var); + casenumber *cases_remaining = xmalloc (sizeof *cases_remaining); + *cases_remaining = case_limit; + + static const struct trns_class trns_class = { + .name = "case limit", + .execute = case_limit_trns_proc, + .destroy = case_limit_trns_free, + }; + add_transformation (ds, &trns_class, cases_remaining); + + dict_set_case_limit (ds->dict, 0); } } + /* FILTER transformation. */ -static int +static enum trns_result filter_trns_proc (void *filter_var_, - struct ccase **c UNUSED, casenumber case_nr UNUSED) + struct ccase **c, casenumber case_nr UNUSED) { struct variable *filter_var = filter_var_; double f = case_num (*c, filter_var); - return (f != 0.0 && !var_is_num_missing (filter_var, f, MV_ANY) + return (f != 0.0 && !var_is_num_missing (filter_var, f) ? TRNS_CONTINUE : TRNS_DROP_CASE); } +/* Adds a temporary transformation to filter data according to + the variable specified on FILTER, if any. */ +static void +add_filter_trns (struct dataset *ds) +{ + struct variable *filter_var = dict_get_filter (ds->dict); + if (filter_var != NULL) + { + proc_start_temporary_transformations (ds); + + static const struct trns_class trns_class = { + .name = "FILTER", + .execute = filter_trns_proc, + }; + add_transformation (ds, &trns_class, filter_var); + } +} void dataset_need_lag (struct dataset *ds, int n_before) @@ -814,3 +951,11 @@ dataset_transformations_changed__ (struct dataset *ds, bool non_empty) if (ds->callbacks != NULL && ds->callbacks->transformations_changed != NULL) ds->callbacks->transformations_changed (non_empty, ds->cb_data); } + +/* Private interface for use by session code. */ + +void +dataset_set_session__ (struct dataset *ds, struct session *session) +{ + ds->session = session; +}