From 9afdb54a2cdc03e14539cd2d046f1aa86f5fdb33 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 7 Jun 2007 05:19:10 +0000 Subject: [PATCH] Add error propagation layer. Patch #5916, slightly revised. --- src/libpspp/ChangeLog | 10 ++ src/libpspp/automake.mk | 2 + src/libpspp/taint.c | 309 ++++++++++++++++++++++++++++++++++++++++ src/libpspp/taint.h | 132 +++++++++++++++++ 4 files changed, 453 insertions(+) create mode 100644 src/libpspp/taint.c create mode 100644 src/libpspp/taint.h diff --git a/src/libpspp/ChangeLog b/src/libpspp/ChangeLog index 9e965630..43da54ee 100644 --- a/src/libpspp/ChangeLog +++ b/src/libpspp/ChangeLog @@ -1,3 +1,13 @@ +2007-06-06 Ben Pfaff + + Add error propagation layer. Patch #5916, slightly revised. + + * automake.mk: Add new files. + + * taint.c: New file. + + * taint.h: New file. + 2007-06-03 Ben Pfaff Add ability for reverse iteration to tower code. diff --git a/src/libpspp/automake.mk b/src/libpspp/automake.mk index c626d61a..ca6b7195 100644 --- a/src/libpspp/automake.mk +++ b/src/libpspp/automake.mk @@ -63,6 +63,8 @@ src_libpspp_libpspp_a_SOURCES = \ src/libpspp/str.h \ src/libpspp/syntax-gen.c \ src/libpspp/syntax-gen.h \ + src/libpspp/taint.c \ + src/libpspp/taint.h \ src/libpspp/tower.c \ src/libpspp/tower.h \ src/libpspp/verbose-msg.c \ diff --git a/src/libpspp/taint.c b/src/libpspp/taint.c new file mode 100644 index 00000000..c6ccc11b --- /dev/null +++ b/src/libpspp/taint.c @@ -0,0 +1,309 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#include + +#include + +#include + +#include +#include + +#include "xalloc.h" + +/* This code maintains two invariants: + + 1. If a node is tainted, then all of its successors are + tainted. + + 2. If a node is tainted, then it and all of its predecessors are + successor-tainted. */ + +/* A list of pointers to taint structures. */ +struct taint_list + { + size_t cnt; + struct taint **taints; + }; + +static void taint_list_init (struct taint_list *); +static void taint_list_destroy (struct taint_list *); +static void taint_list_add (struct taint_list *, struct taint *); +static void taint_list_remove (struct taint_list *, const struct taint *); + +/* A taint. */ +struct taint + { + size_t ref_cnt; /* Number of owners. */ + struct taint_list successors; /* Successors in graph. */ + struct taint_list predecessors; /* Predecessors in graph. */ + bool tainted; /* Is this node tainted? */ + bool tainted_successor; /* Is/was any derived taint tainted? */ + }; + +static void recursively_set_taint (struct taint *); +static void recursively_set_tainted_successor (struct taint *); + +/* Creates and returns a new taint object. */ +struct taint * +taint_create (void) +{ + struct taint *taint = xmalloc (sizeof *taint); + taint->ref_cnt = 1; + taint_list_init (&taint->successors); + taint_list_init (&taint->predecessors); + taint->tainted = false; + taint->tainted_successor = false; + return taint; +} + +/* Returns a clone of the given TAINT. + The new and old taint objects are logically indistinguishable, + as if they were the same object. (In this implementation, + they are in fact the same object, but this is not a guarantee + made by the interface.) */ +struct taint * +taint_clone (const struct taint *taint_) +{ + struct taint *taint = (struct taint *) taint_; + + assert (taint->ref_cnt > 0); + taint->ref_cnt++; + return taint; +} + +/* Destroys the given TAINT. + Returns false if TAINT was tainted, true otherwise. + Any propagation relationships through TAINT are preserved. + That is, if A taints B and B taints C, then destroying B will + preserve the transitive relationship, so that tainting A will + still taint C. */ +bool +taint_destroy (struct taint *taint) +{ + bool was_tainted = taint_is_tainted (taint); + if (--taint->ref_cnt == 0) + { + size_t i, j; + + for (i = 0; i < taint->predecessors.cnt; i++) + for (j = 0; j < taint->successors.cnt; j++) + taint_propagate (taint->predecessors.taints[i], + taint->successors.taints[j]); + + for (i = 0; i < taint->predecessors.cnt; i++) + taint_list_remove (&taint->predecessors.taints[i]->successors, taint); + for (i = 0; i < taint->successors.cnt; i++) + taint_list_remove (&taint->successors.taints[i]->predecessors, taint); + + taint_list_destroy (&taint->successors); + taint_list_destroy (&taint->predecessors); + free (taint); + } + return !was_tainted; +} + +/* Adds a propagation relationship from FROM to TO. This means + that, should FROM ever become tainted, then TO will + automatically be marked tainted as well. This takes effect + immediately: if FROM is currently tainted, then TO will be + tainted after the call completes. + + Taint propagation is transitive: if A propagates to B and B + propagates to C, then tainting A taints both B and C. Taint + propagation is not commutative: propagation from A to B does + not imply propagation from B to A. Taint propagation is + robust against loops, so that if A propagates to B and vice + versa, whether directly or indirectly, then tainting either A + or B will cause the other to be tainted, without producing an + infinite loop. */ +void +taint_propagate (const struct taint *from_, const struct taint *to_) +{ + struct taint *from = (struct taint *) from_; + struct taint *to = (struct taint *) to_; + + if (from != to) + { + taint_list_add (&from->successors, to); + taint_list_add (&to->predecessors, from); + if (from->tainted && !to->tainted) + recursively_set_taint (to); + else if (to->tainted_successor && !from->tainted_successor) + recursively_set_tainted_successor (from); + } +} + +/* Returns true if TAINT is tainted, false otherwise. */ +bool +taint_is_tainted (const struct taint *taint) +{ + return taint->tainted; +} + +/* Marks TAINT tainted and propagates the taint to all of its + successors. */ +void +taint_set_taint (const struct taint *taint_) +{ + struct taint *taint = (struct taint *) taint_; + if (!taint->tainted) + recursively_set_taint (taint); +} + +/* Returns true if TAINT is successor-tainted, that is, if it or + any of its successors is or ever has been tainted. (A + "successor" of a taint object X is any taint object that can + be reached by following propagation relationships starting + from X.) */ +bool +taint_has_tainted_successor (const struct taint *taint) +{ + return taint->tainted_successor; +} + +/* Attempts to reset the successor-taint on TAINT. This is + successful only if TAINT currently has no tainted successor. */ +void +taint_reset_successor_taint (const struct taint *taint_) +{ + struct taint *taint = (struct taint *) taint_; + + if (taint->tainted_successor) + { + size_t i; + + for (i = 0; i < taint->successors.cnt; i++) + if (taint->successors.taints[i]->tainted_successor) + return; + + taint->tainted_successor = false; + } +} + +/* Initializes LIST as an empty list of taints. */ +static void +taint_list_init (struct taint_list *list) +{ + list->cnt = 0; + list->taints = NULL; +} + +/* Destroys LIST. */ +static void +taint_list_destroy (struct taint_list *list) +{ + free (list->taints); +} + +/* Returns true if TAINT is in LIST, false otherwise. */ +static bool +taint_list_contains (const struct taint_list *list, const struct taint *taint) +{ + size_t i; + + for (i = 0; i < list->cnt; i++) + if (list->taints[i] == taint) + return true; + + return false; +} + +/* Returns true if X is zero or a power of 2, false otherwise. */ +static bool +is_zero_or_power_of_2 (size_t x) +{ + return (x & (x - 1)) == 0; +} + +/* Adds TAINT to LIST, if it isn't already in the list. */ +static void +taint_list_add (struct taint_list *list, struct taint *taint) +{ + if (!taint_list_contains (list, taint)) + { + /* To save a few bytes of memory per list, we don't store + the list capacity as a separate member. Instead, the + list capacity is always zero or a power of 2. Thus, if + the list count is one of these threshold values, we need + to allocate more memory. */ + if (is_zero_or_power_of_2 (list->cnt)) + list->taints = xnrealloc (list->taints, + list->cnt == 0 ? 1 : 2 * list->cnt, + sizeof *list->taints); + list->taints[list->cnt++] = taint; + } +} + +/* Removes TAINT from LIST (which must contain it). */ +static void +taint_list_remove (struct taint_list *list, const struct taint *taint) +{ + size_t i; + + for (i = 0; i < list->cnt; i++) + if (list->taints[i] == taint) + { + remove_element (list->taints, list->cnt, sizeof *list->taints, i); + list->cnt--; + return; + } + + NOT_REACHED (); +} + +/* Marks TAINT as tainted, as well as all of its successors + recursively. Also marks TAINT's predecessors as + successor-tainted, recursively. */ +static void +recursively_set_taint (struct taint *taint) +{ + size_t i; + + taint->tainted = taint->tainted_successor = true; + for (i = 0; i < taint->successors.cnt; i++) + { + struct taint *s = taint->successors.taints[i]; + if (!s->tainted) + recursively_set_taint (s); + } + for (i = 0; i < taint->predecessors.cnt; i++) + { + struct taint *p = taint->predecessors.taints[i]; + if (!p->tainted_successor) + recursively_set_tainted_successor (p); + } +} + +/* Marks TAINT as successor-tainted, as well as all of its + predecessors recursively. */ +static void +recursively_set_tainted_successor (struct taint *taint) +{ + size_t i; + + taint->tainted_successor = true; + for (i = 0; i < taint->predecessors.cnt; i++) + { + struct taint *p = taint->predecessors.taints[i]; + if (!p->tainted_successor) + recursively_set_tainted_successor (p); + } +} + diff --git a/src/libpspp/taint.h b/src/libpspp/taint.h new file mode 100644 index 00000000..89ac5ab6 --- /dev/null +++ b/src/libpspp/taint.h @@ -0,0 +1,132 @@ +/* PSPP - computes sample statistics. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +#ifndef LIBPSPP_TAINT_H +#define LIBPSPP_TAINT_H 1 + +/* Tainting and taint propagation. + + Properly handling I/O errors and other hard errors in data + handling is important. At a minimum, we must notify the user + that an error occurred and refrain from presenting possibly + corrupted output. It is unacceptable, however, to simply + terminate PSPP when an I/O error occurs, because of the + unfriendliness of that approach, especially in a GUI + environment. We should also propagate the error to the top + level of command execution; that is, ensure that the command + procedure returns CMD_CASCADING_FAILURE to its caller. + + Usually in C we propagate errors via return values, or by + maintaining an error state on an object (e.g. the error state + that the ferror function tests on C streams). But neither + approach is ideal for PSPP. Using return values requires the + programmer to pay more attention to error handling than one + would like, especially given how difficult it can be to test + error paths. Maintaining error states on important PSPP + objects (e.g. casereaders, casewriters) is a step up, but it + still requires more attention than one would like, because + quite often there are many such objects in use at any given + time, and an I/O error encountered by any of them indicates + that the final result of any computation that depends on that + object is incorrect. + + The solution implemented here is an attempt to automate as + much as possible of PSPP's error-detection problem. It is + based on use of "taint" objects, created with taint_create or + taint_clone. Each taint object represents a state of + correctness or corruption (taint) in an associated object + whose correctness must be established. The taint_set_taint + function is used to mark a taint object as tainted. The taint + status of a taint object can be queried with taint_is_tainted. + + The benefit of taint objects lies in the ability to connect + them together in propagation relationships, using + taint_propagate. The existence of a propagation relationship + from taint object A to taint object B means that, should + object A ever become tainted, then object B will automatically + be marked tainted as well. This models the situation where + the data represented by B are derived from data obtained from + A. This is a common situation in PSPP; for example, the data + in one casereader or casewriter are often derived from data in + another casereader or casewriter. + + Taint propagation is transitive: if A propagates to B and B + propagates to C, then tainting A taints both B and C. Taint + propagation is not commutative: propagation from A to B does + not imply propagation from B to A. However, taint propagation + is robust against loops, so that if A propagates to B and vice + versa, whether directly or indirectly, then tainting either A + or B will cause the other to be tainted, without producing an + infinite loop. + + The implementation is robust against destruction of taints in + propagation relationships. When this happens, taint + propagation through the destroyed taint object is preserved, + that is, if A taints B and B taints C, then destroying B will + preserve the transitive relationship, so that tainting A will + still taint C. + + Taint objects actually propagate two different types of taints + across the taint graph. The first type of taint is the one + already described, which indicates that an associated object + has corrupted state. The second type of taint, called a + "successor-taint" does not necessarily indicate that the + associated object is corrupted. Rather, it indicates some + successor of the associated object is corrupted, or was + corrupted some time in the past before it was destroyed. (A + "successor" of a taint object X is any taint object that can + be reached by following propagation relationships starting + from X.) Stated another way, when a taint object is marked + tainted, all the taint objects that are reachable by following + propagation relationships *backward* are marked with a + successor-taint. In addition, any object that is marked + tainted is also marked successor-tainted. + + The value of a successor-taint is in summarizing the history + of the taint objects derived from a common parent. For + example, consider a casereader that represents the active + file. A statistical procedure can clone this casereader any + number of times and pass it to analysis functions, which may + themselves in turn clone it themselves, pass it to sort or + merge functions, etc. Conventionally, all of these functions + would have to carefully check for I/O errors and propagate + them upward, which is error-prone and inconvenient. However, + given the successor-taint feature, the statistical procedure + may simply check the successor-taint on the top-level + casereader after calling the analysis functions and, if a + successor-taint is present, skip displaying the procedure's + output. Thus, error checking is centralized, simplified, and + made convenient. This feature is now used in a number of the + PSPP statistical procedures; search the source tree for + "taint_has_tainted_successor" for details. */ + +#include + +struct taint *taint_create (void); +struct taint *taint_clone (const struct taint *); +bool taint_destroy (struct taint *); + +void taint_propagate (const struct taint *from, const struct taint *to); + +bool taint_is_tainted (const struct taint *); +void taint_set_taint (const struct taint *); + +bool taint_has_tainted_successor (const struct taint *); +void taint_reset_successor_taint (const struct taint *); + +#endif /* libpspp/taint.h */ -- 2.30.2