From 2bdde1cd21cd58349cf4bd852fddf40524854288 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 19 Sep 2007 04:28:59 +0000 Subject: [PATCH] Fix bug #20910. * helper.c (create_casereader_from_data_store): New function. (execute_syntax): Only replace the active file data by a new casereader if syntax caused the active file to be read, to avoid exponential slowdown as an increasing number of snippets that do not read from the active file are consecutively executed. Bug #20910. Reviewed by and heavily influenced by John Darrington. * psppire-data-store.c (psppire_data_store_get_value_count): New function. * psppire-dict.c (psppire_dict_get_value_cnt): New function. * procedure.c (proc_extract_active_file_data): New function. * lazy-casereader.h: New file. * lazy-casereader.c: New file. * casereader.c (casereader_dynamic_cast): New function. --- src/data/ChangeLog | 10 ++ src/data/automake.mk | 2 + src/data/casereader-provider.h | 2 + src/data/casereader.c | 20 ++++ src/data/lazy-casereader.c | 161 ++++++++++++++++++++++++++++++++ src/data/lazy-casereader.h | 39 ++++++++ src/data/procedure.c | 12 +++ src/data/procedure.h | 1 + src/ui/gui/ChangeLog | 14 +++ src/ui/gui/helper.c | 49 ++++++++-- src/ui/gui/psppire-data-store.c | 6 ++ src/ui/gui/psppire-data-store.h | 1 + src/ui/gui/psppire-dict.c | 11 +++ src/ui/gui/psppire-dict.h | 3 + 14 files changed, 322 insertions(+), 9 deletions(-) create mode 100644 src/data/lazy-casereader.c create mode 100644 src/data/lazy-casereader.h diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 91a670fb..0bec245b 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,13 @@ +2007-09-18 Ben Pfaff + + * procedure.c (proc_extract_active_file_data): New function. + + * lazy-casereader.h: New file. + + * lazy-casereader.c: New file. + + * casereader.c (casereader_dynamic_cast): New function. + 2007-09-14 Ben Pfaff * dictionary.c (dict_clone): Copy case indexes from cloned diff --git a/src/data/automake.mk b/src/data/automake.mk index 697cbb33..3c73494c 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -50,6 +50,8 @@ src_data_libdata_a_SOURCES = \ src/data/format.def \ src/data/identifier.c \ src/data/identifier.h \ + src/data/lazy-casereader.c \ + src/data/lazy-casereader.h \ src/data/missing-values.c \ src/data/missing-values.h \ src/data/make-file.c \ diff --git a/src/data/casereader-provider.h b/src/data/casereader-provider.h index 0c8b7e53..43c980a8 100644 --- a/src/data/casereader-provider.h +++ b/src/data/casereader-provider.h @@ -110,6 +110,8 @@ struct casereader * casereader_create_sequential (const struct taint *, size_t value_cnt, casenumber case_cnt, const struct casereader_class *, void *); + +void *casereader_dynamic_cast (struct casereader *, struct casereader_class *); /* Casereader class for random-access data sources. */ struct casereader_random_class diff --git a/src/data/casereader.c b/src/data/casereader.c index 3be1042d..27c6148b 100644 --- a/src/data/casereader.c +++ b/src/data/casereader.c @@ -311,6 +311,26 @@ casereader_create_sequential (const struct taint *taint, reader->aux = aux; return reader; } + +/* If READER is a casereader of the given CLASS, returns its + associated auxiliary data; otherwise, returns a null pointer. + + This function is intended for use from casereader + implementations, not by casereader users. Even within + casereader implementations, its usefulness is quite limited, + for at least two reasons. First, every casereader member + function already receives a pointer to the casereader's + auxiliary data. Second, a casereader's class can change + (through a call to casereader_swap) and this is in practice + quite common (e.g. any call to casereader_clone on a + casereader that does not directly support clone will cause the + casereader to be replaced by a shim caseader). */ +void * +casereader_dynamic_cast (struct casereader *reader, + struct casereader_class *class) +{ + return reader->class == class ? reader->aux : NULL; +} /* Random-access casereader implementation. diff --git a/src/data/lazy-casereader.c b/src/data/lazy-casereader.c new file mode 100644 index 00000000..71c1da6a --- /dev/null +++ b/src/data/lazy-casereader.c @@ -0,0 +1,161 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include + +#include + +#include +#include +#include +#include + +#include "xalloc.h" + +/* A lazy casereader's auxiliary data. */ +struct lazy_casereader + { + unsigned long int serial; + struct casereader *(*callback) (void *aux); + void *aux; + }; + +static struct casereader_class lazy_casereader_class; + +/* Creates and returns a new lazy casereader that will + instantiate its underlying casereader, if necessary, by + calling CALLBACK, passing AUX as its argument. *SERIAL is set + to a "serial number" that uniquely identifies the new lazy + casereader, for use with lazy_casereader_destroy. + + VALUE_CNT must be the number of struct values per case read + from the casereader. + + CASE_CNT is an upper limit on the number of cases that + casereader_read will return from the casereader in successive + calls. Ordinarily, this is the actual number of cases in the + data source or CASENUMBER_MAX if the number of cases cannot be + predicted in advance. */ +struct casereader * +lazy_casereader_create (size_t value_cnt, casenumber case_cnt, + struct casereader *(*callback) (void *aux), void *aux, + unsigned long int *serial) +{ + static unsigned long int next_serial = 0; + struct lazy_casereader *lc; + assert (callback != NULL); + lc = xmalloc (sizeof *lc); + *serial = lc->serial = next_serial++; + lc->callback = callback; + lc->aux = aux; + return casereader_create_sequential (NULL, value_cnt, case_cnt, + &lazy_casereader_class, lc); +} + +/* If READER is the lazy casereader that was returned by + lazy_casereader_create along with SERIAL, and READER was never + instantiated by any use of a casereader function, then this + function destroys READER without instantiating it, and returns + true. Returns false in any other case; that is, if READER is + not a lazy casereader, or if READER is a lazy casereader with + a serial number different from SERIAL, or if READER is a lazy + casereader that was instantiated. + + When this function returns false, it necessarily indicates + that the lazy casereader was never cloned and never + destroyed. */ +bool +lazy_casereader_destroy (struct casereader *reader, unsigned long int serial) +{ + struct lazy_casereader *lc; + + if (reader == NULL) + return false; + + lc = casereader_dynamic_cast (reader, &lazy_casereader_class); + if (lc == NULL || lc->serial != serial) + return false; + + lc->callback = NULL; + casereader_destroy (reader); + return true; +} + +/* Instantiates lazy casereader READER, which is associated with + LC. */ +static void +instantiate_lazy_casereader (struct casereader *reader, + struct lazy_casereader *lc) +{ + struct casereader *subreader; + + /* Call the client-provided callback to obtain the real + casereader, then swap READER with that casereader. */ + subreader = lc->callback (lc->aux); + casereader_swap (reader, subreader); + + /* Now destroy the lazy casereader, which is no longer needed + since we already swapped it out. Set the callback to null + to prevent lazy_casereader_do_destroy from trying to + instantiate it again. */ + lc->callback = NULL; + casereader_destroy (subreader); +} + +static bool +lazy_casereader_read (struct casereader *reader, void *lc_, + struct ccase *c) +{ + struct lazy_casereader *lc = lc_; + instantiate_lazy_casereader (reader, lc); + return casereader_read (reader, c); +} + +static void +lazy_casereader_do_destroy (struct casereader *reader UNUSED, void *lc_) +{ + struct lazy_casereader *lc = lc_; + if (lc->callback != NULL) + casereader_destroy (lc->callback (lc->aux)); + free (lc); +} + +static struct casereader * +lazy_casereader_clone (struct casereader *reader, void *lc_) +{ + struct lazy_casereader *lc = lc_; + instantiate_lazy_casereader (reader, lc); + return casereader_clone (reader); +} + +static bool +lazy_casereader_peek (struct casereader *reader, void *lc_, + casenumber idx, struct ccase *c) +{ + struct lazy_casereader *lc = lc_; + instantiate_lazy_casereader (reader, lc); + return casereader_peek (reader, idx, c); +} + +static struct casereader_class lazy_casereader_class = + { + lazy_casereader_read, + lazy_casereader_do_destroy, + lazy_casereader_clone, + lazy_casereader_peek, + }; diff --git a/src/data/lazy-casereader.h b/src/data/lazy-casereader.h new file mode 100644 index 00000000..c83f39db --- /dev/null +++ b/src/data/lazy-casereader.h @@ -0,0 +1,39 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Lazy casereader. + + A "lazy casereader" is a casereader that saves an underlying + casereader from the need to be instantiated in the case where + it is never used. If any casereader operation is ever + performed on a lazy casereader, it invokes a callback function + (provided by the lazy casereader's creator) to instantiate the + underlying reader. */ + +#ifndef DATA_LAZY_CASEREADER_H +#define DATA_LAZY_CASEREADER_H 1 + +#include +#include + +struct casereader *lazy_casereader_create (size_t value_cnt, + casenumber case_cnt, + struct casereader *(*) (void *aux), + void *aux, + unsigned long int *serial); +bool lazy_casereader_destroy (struct casereader *, unsigned long int serial); + +#endif /* data/lazy-casereader.h */ diff --git a/src/data/procedure.c b/src/data/procedure.c index 6a891c9b..b237c68b 100644 --- a/src/data/procedure.c +++ b/src/data/procedure.c @@ -636,6 +636,18 @@ proc_has_active_file (const struct dataset *ds) return ds->source != NULL; } +/* Returns the active file data source from DS, or a null pointer + if DS has no data source, and removes it from DS. */ +struct casereader * +proc_extract_active_file_data (struct dataset *ds) +{ + struct casereader *reader = ds->source; + ds->source = NULL; + if (ds->replace_source) ds->replace_source (reader); + + return reader; +} + /* Checks whether DS has a corrupted active file. If so, discards it and returns false. If not, returns true without doing anything. */ diff --git a/src/data/procedure.h b/src/data/procedure.h index cfb8f218..9ad36429 100644 --- a/src/data/procedure.h +++ b/src/data/procedure.h @@ -64,6 +64,7 @@ void proc_set_active_file (struct dataset *, struct casereader *, struct dictionary *); bool proc_set_active_file_data (struct dataset *, struct casereader *); bool proc_has_active_file (const struct dataset *ds); +struct casereader *proc_extract_active_file_data (struct dataset *); void proc_discard_output (struct dataset *ds); diff --git a/src/ui/gui/ChangeLog b/src/ui/gui/ChangeLog index 9e442432..a58adace 100644 --- a/src/ui/gui/ChangeLog +++ b/src/ui/gui/ChangeLog @@ -1,3 +1,17 @@ +2007-09-18 Ben Pfaff + + * helper.c (create_casereader_from_data_store): New function. + (execute_syntax): Only replace the active file data by a new + casereader if syntax caused the active file to be read, to avoid + exponential slowdown as an increasing number of snippets that do + not read from the active file are consecutively executed. Bug + #20910. Reviewed by and heavily influenced by John Darrington. + + * psppire-data-store.c (psppire_data_store_get_value_count): New + function. + + * psppire-dict.c (psppire_dict_get_value_cnt): New function. + 2007-09-13 John Darrington * find-dialog.c find-dialog.h: New files. diff --git a/src/ui/gui/helper.c b/src/ui/gui/helper.c index 0203999d..c322321d 100644 --- a/src/ui/gui/helper.c +++ b/src/ui/gui/helper.c @@ -28,21 +28,26 @@ #include #include #include +#include #include #include #include #include +#include #include #include +#include #include #include #include "psppire-data-store.h" #include #include "output-viewer.h" +#include "xalloc.h" + #include /* Formats a value according to FORMAT @@ -165,14 +170,43 @@ extern struct dataset *the_dataset; extern struct source_stream *the_source_stream; extern PsppireDataStore *the_data_store; +/* Lazy casereader callback function used by execute_syntax. */ +static struct casereader * +create_casereader_from_data_store (void *data_store_) +{ + PsppireDataStore *data_store = data_store_; + return psppire_data_store_get_reader (data_store); +} + gboolean execute_syntax (struct getl_interface *sss) { struct lexer *lexer; gboolean retval = TRUE; - struct casereader *reader = psppire_data_store_get_reader (the_data_store); - + struct casereader *reader; + size_t value_cnt; + casenumber case_cnt; + unsigned long int lazy_serial; + + /* When the user executes a number of snippets of syntax in a + row, none of which read from the active file, the GUI becomes + progressively less responsive. The reason is that each syntax + execution encapsulates the active file data in another + datasheet layer. The cumulative effect of having a number of + layers of datasheets wastes time and space. + + To solve the problem, we use a "lazy casereader", a wrapper + around the casereader obtained from the data store, that + only actually instantiates that casereader when it is + needed. If the data store casereader is never needed, then + it is reused the next time syntax is run, without wrapping + it in another layer. */ + value_cnt = psppire_data_store_get_value_count (the_data_store); + case_cnt = psppire_data_store_get_case_count (the_data_store); + reader = lazy_casereader_create (value_cnt, case_cnt, + create_casereader_from_data_store, + the_data_store, &lazy_serial); proc_set_active_file_data (the_dataset, reader); g_return_val_if_fail (proc_has_active_file (the_dataset), FALSE); @@ -204,13 +238,10 @@ execute_syntax (struct getl_interface *sss) psppire_dict_replace_dictionary (the_data_store->dict, dataset_dict (the_dataset)); - { - PsppireCaseFile *pcf = psppire_case_file_new (dataset_source (the_dataset)); - - psppire_data_store_set_case_file (the_data_store, pcf); - } - - proc_set_active_file_data (the_dataset, NULL); + reader = proc_extract_active_file_data (the_dataset); + if (!lazy_casereader_destroy (reader, lazy_serial)) + psppire_data_store_set_case_file (the_data_store, + psppire_case_file_new (reader)); som_flush (); diff --git a/src/ui/gui/psppire-data-store.c b/src/ui/gui/psppire-data-store.c index 1e6568b7..fc5cdf92 100644 --- a/src/ui/gui/psppire-data-store.c +++ b/src/ui/gui/psppire-data-store.c @@ -165,6 +165,12 @@ psppire_data_store_get_case_count (const PsppireDataStore *store) return psppire_case_file_get_case_count (store->case_file); } +size_t +psppire_data_store_get_value_count (const PsppireDataStore *store) +{ + return psppire_dict_get_value_cnt (store->dict); +} + inline casenumber psppire_data_store_get_case_count_wrapper (const GSheetModel *model) { diff --git a/src/ui/gui/psppire-data-store.h b/src/ui/gui/psppire-data-store.h index ed4f0fd1..7d1e242e 100644 --- a/src/ui/gui/psppire-data-store.h +++ b/src/ui/gui/psppire-data-store.h @@ -142,6 +142,7 @@ gboolean psppire_data_store_set_string (PsppireDataStore *ds, glong row, glong column); casenumber psppire_data_store_get_case_count (const PsppireDataStore *ds); +size_t psppire_data_store_get_value_count (const PsppireDataStore *ds); #ifdef __cplusplus } diff --git a/src/ui/gui/psppire-dict.c b/src/ui/gui/psppire-dict.c index c9df6052..8560c223 100644 --- a/src/ui/gui/psppire-dict.c +++ b/src/ui/gui/psppire-dict.c @@ -411,6 +411,17 @@ psppire_dict_get_var_cnt (const PsppireDict *d) } +/* Return the number of `union value's in the dictionary */ +size_t +psppire_dict_get_value_cnt (const PsppireDict *d) +{ + g_return_val_if_fail (d, -1); + g_return_val_if_fail (d->dict, -1); + + return dict_get_next_value_idx (d->dict); +} + + /* Return a variable by name. Return NULL if it doesn't exist */ diff --git a/src/ui/gui/psppire-dict.h b/src/ui/gui/psppire-dict.h index 397097bf..1bf8bd35 100644 --- a/src/ui/gui/psppire-dict.h +++ b/src/ui/gui/psppire-dict.h @@ -69,6 +69,9 @@ void psppire_dict_delete_var (PsppireDict *s, gint idx); /* Return the number of variables in the dictionary */ gint psppire_dict_get_var_cnt (const PsppireDict *d); +/* Return the number of `union value's in the dictionary */ +size_t psppire_dict_get_value_cnt (const PsppireDict *d); + /* Return a variable by name. Return NULL if it doesn't exist */ -- 2.30.2