--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#include <config.h>
+
+#include <libpspp/taint.h>
+
+#include <stddef.h>
+
+#include <libpspp/array.h>
+#include <libpspp/assertion.h>
+
+#include "xalloc.h"
+
+/* This code maintains two invariants:
+
+ 1. If a node is tainted, then all of its successors are
+ tainted.
+
+ 2. If a node is tainted, then it and all of its predecessors are
+ successor-tainted. */
+
+/* A list of pointers to taint structures. */
+struct taint_list
+ {
+ size_t cnt;
+ struct taint **taints;
+ };
+
+static void taint_list_init (struct taint_list *);
+static void taint_list_destroy (struct taint_list *);
+static void taint_list_add (struct taint_list *, struct taint *);
+static void taint_list_remove (struct taint_list *, const struct taint *);
+
+/* A taint. */
+struct taint
+ {
+ size_t ref_cnt; /* Number of owners. */
+ struct taint_list successors; /* Successors in graph. */
+ struct taint_list predecessors; /* Predecessors in graph. */
+ bool tainted; /* Is this node tainted? */
+ bool tainted_successor; /* Is/was any derived taint tainted? */
+ };
+
+static void recursively_set_taint (struct taint *);
+static void recursively_set_tainted_successor (struct taint *);
+
+/* Creates and returns a new taint object. */
+struct taint *
+taint_create (void)
+{
+ struct taint *taint = xmalloc (sizeof *taint);
+ taint->ref_cnt = 1;
+ taint_list_init (&taint->successors);
+ taint_list_init (&taint->predecessors);
+ taint->tainted = false;
+ taint->tainted_successor = false;
+ return taint;
+}
+
+/* Returns a clone of the given TAINT.
+ The new and old taint objects are logically indistinguishable,
+ as if they were the same object. (In this implementation,
+ they are in fact the same object, but this is not a guarantee
+ made by the interface.) */
+struct taint *
+taint_clone (const struct taint *taint_)
+{
+ struct taint *taint = (struct taint *) taint_;
+
+ assert (taint->ref_cnt > 0);
+ taint->ref_cnt++;
+ return taint;
+}
+
+/* Destroys the given TAINT.
+ Returns false if TAINT was tainted, true otherwise.
+ Any propagation relationships through TAINT are preserved.
+ That is, if A taints B and B taints C, then destroying B will
+ preserve the transitive relationship, so that tainting A will
+ still taint C. */
+bool
+taint_destroy (struct taint *taint)
+{
+ bool was_tainted = taint_is_tainted (taint);
+ if (--taint->ref_cnt == 0)
+ {
+ size_t i, j;
+
+ for (i = 0; i < taint->predecessors.cnt; i++)
+ for (j = 0; j < taint->successors.cnt; j++)
+ taint_propagate (taint->predecessors.taints[i],
+ taint->successors.taints[j]);
+
+ for (i = 0; i < taint->predecessors.cnt; i++)
+ taint_list_remove (&taint->predecessors.taints[i]->successors, taint);
+ for (i = 0; i < taint->successors.cnt; i++)
+ taint_list_remove (&taint->successors.taints[i]->predecessors, taint);
+
+ taint_list_destroy (&taint->successors);
+ taint_list_destroy (&taint->predecessors);
+ free (taint);
+ }
+ return !was_tainted;
+}
+
+/* Adds a propagation relationship from FROM to TO. This means
+ that, should FROM ever become tainted, then TO will
+ automatically be marked tainted as well. This takes effect
+ immediately: if FROM is currently tainted, then TO will be
+ tainted after the call completes.
+
+ Taint propagation is transitive: if A propagates to B and B
+ propagates to C, then tainting A taints both B and C. Taint
+ propagation is not commutative: propagation from A to B does
+ not imply propagation from B to A. Taint propagation is
+ robust against loops, so that if A propagates to B and vice
+ versa, whether directly or indirectly, then tainting either A
+ or B will cause the other to be tainted, without producing an
+ infinite loop. */
+void
+taint_propagate (const struct taint *from_, const struct taint *to_)
+{
+ struct taint *from = (struct taint *) from_;
+ struct taint *to = (struct taint *) to_;
+
+ if (from != to)
+ {
+ taint_list_add (&from->successors, to);
+ taint_list_add (&to->predecessors, from);
+ if (from->tainted && !to->tainted)
+ recursively_set_taint (to);
+ else if (to->tainted_successor && !from->tainted_successor)
+ recursively_set_tainted_successor (from);
+ }
+}
+
+/* Returns true if TAINT is tainted, false otherwise. */
+bool
+taint_is_tainted (const struct taint *taint)
+{
+ return taint->tainted;
+}
+
+/* Marks TAINT tainted and propagates the taint to all of its
+ successors. */
+void
+taint_set_taint (const struct taint *taint_)
+{
+ struct taint *taint = (struct taint *) taint_;
+ if (!taint->tainted)
+ recursively_set_taint (taint);
+}
+
+/* Returns true if TAINT is successor-tainted, that is, if it or
+ any of its successors is or ever has been tainted. (A
+ "successor" of a taint object X is any taint object that can
+ be reached by following propagation relationships starting
+ from X.) */
+bool
+taint_has_tainted_successor (const struct taint *taint)
+{
+ return taint->tainted_successor;
+}
+
+/* Attempts to reset the successor-taint on TAINT. This is
+ successful only if TAINT currently has no tainted successor. */
+void
+taint_reset_successor_taint (const struct taint *taint_)
+{
+ struct taint *taint = (struct taint *) taint_;
+
+ if (taint->tainted_successor)
+ {
+ size_t i;
+
+ for (i = 0; i < taint->successors.cnt; i++)
+ if (taint->successors.taints[i]->tainted_successor)
+ return;
+
+ taint->tainted_successor = false;
+ }
+}
+\f
+/* Initializes LIST as an empty list of taints. */
+static void
+taint_list_init (struct taint_list *list)
+{
+ list->cnt = 0;
+ list->taints = NULL;
+}
+
+/* Destroys LIST. */
+static void
+taint_list_destroy (struct taint_list *list)
+{
+ free (list->taints);
+}
+
+/* Returns true if TAINT is in LIST, false otherwise. */
+static bool
+taint_list_contains (const struct taint_list *list, const struct taint *taint)
+{
+ size_t i;
+
+ for (i = 0; i < list->cnt; i++)
+ if (list->taints[i] == taint)
+ return true;
+
+ return false;
+}
+
+/* Returns true if X is zero or a power of 2, false otherwise. */
+static bool
+is_zero_or_power_of_2 (size_t x)
+{
+ return (x & (x - 1)) == 0;
+}
+
+/* Adds TAINT to LIST, if it isn't already in the list. */
+static void
+taint_list_add (struct taint_list *list, struct taint *taint)
+{
+ if (!taint_list_contains (list, taint))
+ {
+ /* To save a few bytes of memory per list, we don't store
+ the list capacity as a separate member. Instead, the
+ list capacity is always zero or a power of 2. Thus, if
+ the list count is one of these threshold values, we need
+ to allocate more memory. */
+ if (is_zero_or_power_of_2 (list->cnt))
+ list->taints = xnrealloc (list->taints,
+ list->cnt == 0 ? 1 : 2 * list->cnt,
+ sizeof *list->taints);
+ list->taints[list->cnt++] = taint;
+ }
+}
+
+/* Removes TAINT from LIST (which must contain it). */
+static void
+taint_list_remove (struct taint_list *list, const struct taint *taint)
+{
+ size_t i;
+
+ for (i = 0; i < list->cnt; i++)
+ if (list->taints[i] == taint)
+ {
+ remove_element (list->taints, list->cnt, sizeof *list->taints, i);
+ list->cnt--;
+ return;
+ }
+
+ NOT_REACHED ();
+}
+\f
+/* Marks TAINT as tainted, as well as all of its successors
+ recursively. Also marks TAINT's predecessors as
+ successor-tainted, recursively. */
+static void
+recursively_set_taint (struct taint *taint)
+{
+ size_t i;
+
+ taint->tainted = taint->tainted_successor = true;
+ for (i = 0; i < taint->successors.cnt; i++)
+ {
+ struct taint *s = taint->successors.taints[i];
+ if (!s->tainted)
+ recursively_set_taint (s);
+ }
+ for (i = 0; i < taint->predecessors.cnt; i++)
+ {
+ struct taint *p = taint->predecessors.taints[i];
+ if (!p->tainted_successor)
+ recursively_set_tainted_successor (p);
+ }
+}
+
+/* Marks TAINT as successor-tainted, as well as all of its
+ predecessors recursively. */
+static void
+recursively_set_tainted_successor (struct taint *taint)
+{
+ size_t i;
+
+ taint->tainted_successor = true;
+ for (i = 0; i < taint->predecessors.cnt; i++)
+ {
+ struct taint *p = taint->predecessors.taints[i];
+ if (!p->tainted_successor)
+ recursively_set_tainted_successor (p);
+ }
+}
+
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#ifndef LIBPSPP_TAINT_H
+#define LIBPSPP_TAINT_H 1
+
+/* Tainting and taint propagation.
+
+ Properly handling I/O errors and other hard errors in data
+ handling is important. At a minimum, we must notify the user
+ that an error occurred and refrain from presenting possibly
+ corrupted output. It is unacceptable, however, to simply
+ terminate PSPP when an I/O error occurs, because of the
+ unfriendliness of that approach, especially in a GUI
+ environment. We should also propagate the error to the top
+ level of command execution; that is, ensure that the command
+ procedure returns CMD_CASCADING_FAILURE to its caller.
+
+ Usually in C we propagate errors via return values, or by
+ maintaining an error state on an object (e.g. the error state
+ that the ferror function tests on C streams). But neither
+ approach is ideal for PSPP. Using return values requires the
+ programmer to pay more attention to error handling than one
+ would like, especially given how difficult it can be to test
+ error paths. Maintaining error states on important PSPP
+ objects (e.g. casereaders, casewriters) is a step up, but it
+ still requires more attention than one would like, because
+ quite often there are many such objects in use at any given
+ time, and an I/O error encountered by any of them indicates
+ that the final result of any computation that depends on that
+ object is incorrect.
+
+ The solution implemented here is an attempt to automate as
+ much as possible of PSPP's error-detection problem. It is
+ based on use of "taint" objects, created with taint_create or
+ taint_clone. Each taint object represents a state of
+ correctness or corruption (taint) in an associated object
+ whose correctness must be established. The taint_set_taint
+ function is used to mark a taint object as tainted. The taint
+ status of a taint object can be queried with taint_is_tainted.
+
+ The benefit of taint objects lies in the ability to connect
+ them together in propagation relationships, using
+ taint_propagate. The existence of a propagation relationship
+ from taint object A to taint object B means that, should
+ object A ever become tainted, then object B will automatically
+ be marked tainted as well. This models the situation where
+ the data represented by B are derived from data obtained from
+ A. This is a common situation in PSPP; for example, the data
+ in one casereader or casewriter are often derived from data in
+ another casereader or casewriter.
+
+ Taint propagation is transitive: if A propagates to B and B
+ propagates to C, then tainting A taints both B and C. Taint
+ propagation is not commutative: propagation from A to B does
+ not imply propagation from B to A. However, taint propagation
+ is robust against loops, so that if A propagates to B and vice
+ versa, whether directly or indirectly, then tainting either A
+ or B will cause the other to be tainted, without producing an
+ infinite loop.
+
+ The implementation is robust against destruction of taints in
+ propagation relationships. When this happens, taint
+ propagation through the destroyed taint object is preserved,
+ that is, if A taints B and B taints C, then destroying B will
+ preserve the transitive relationship, so that tainting A will
+ still taint C.
+
+ Taint objects actually propagate two different types of taints
+ across the taint graph. The first type of taint is the one
+ already described, which indicates that an associated object
+ has corrupted state. The second type of taint, called a
+ "successor-taint" does not necessarily indicate that the
+ associated object is corrupted. Rather, it indicates some
+ successor of the associated object is corrupted, or was
+ corrupted some time in the past before it was destroyed. (A
+ "successor" of a taint object X is any taint object that can
+ be reached by following propagation relationships starting
+ from X.) Stated another way, when a taint object is marked
+ tainted, all the taint objects that are reachable by following
+ propagation relationships *backward* are marked with a
+ successor-taint. In addition, any object that is marked
+ tainted is also marked successor-tainted.
+
+ The value of a successor-taint is in summarizing the history
+ of the taint objects derived from a common parent. For
+ example, consider a casereader that represents the active
+ file. A statistical procedure can clone this casereader any
+ number of times and pass it to analysis functions, which may
+ themselves in turn clone it themselves, pass it to sort or
+ merge functions, etc. Conventionally, all of these functions
+ would have to carefully check for I/O errors and propagate
+ them upward, which is error-prone and inconvenient. However,
+ given the successor-taint feature, the statistical procedure
+ may simply check the successor-taint on the top-level
+ casereader after calling the analysis functions and, if a
+ successor-taint is present, skip displaying the procedure's
+ output. Thus, error checking is centralized, simplified, and
+ made convenient. This feature is now used in a number of the
+ PSPP statistical procedures; search the source tree for
+ "taint_has_tainted_successor" for details. */
+
+#include <stdbool.h>
+
+struct taint *taint_create (void);
+struct taint *taint_clone (const struct taint *);
+bool taint_destroy (struct taint *);
+
+void taint_propagate (const struct taint *from, const struct taint *to);
+
+bool taint_is_tainted (const struct taint *);
+void taint_set_taint (const struct taint *);
+
+bool taint_has_tainted_successor (const struct taint *);
+void taint_reset_successor_taint (const struct taint *);
+
+#endif /* libpspp/taint.h */