src/libpspp/taint.h

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2007 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #ifndef LIBPSPP_TAINT_H
  18 #define LIBPSPP_TAINT_H 1
  19
  20 /* Tainting and taint propagation.
  21
  22    Properly handling I/O errors and other hard errors in data
  23    handling is important.  At a minimum, we must notify the user
  24    that an error occurred and refrain from presenting possibly
  25    corrupted output.  It is unacceptable, however, to simply
  26    terminate PSPP when an I/O error occurs, because of the
  27    unfriendliness of that approach, especially in a GUI
  28    environment.  We should also propagate the error to the top
  29    level of command execution; that is, ensure that the command
  30    procedure returns CMD_CASCADING_FAILURE to its caller.
  31
  32    Usually in C we propagate errors via return values, or by
  33    maintaining an error state on an object (e.g. the error state
  34    that the ferror function tests on C streams).  But neither
  35    approach is ideal for PSPP.  Using return values requires the
  36    programmer to pay more attention to error handling than one
  37    would like, especially given how difficult it can be to test
  38    error paths.  Maintaining error states on important PSPP
  39    objects (e.g. casereaders, casewriters) is a step up, but it
  40    still requires more attention than one would like, because
  41    quite often there are many such objects in use at any given
  42    time, and an I/O error encountered by any of them indicates
  43    that the final result of any computation that depends on that
  44    object is incorrect.
  45
  46    The solution implemented here is an attempt to automate as
  47    much as possible of PSPP's error-detection problem.  It is
  48    based on use of "taint" objects, created with taint_create or
  49    taint_clone.  Each taint object represents a state of
  50    correctness or corruption (taint) in an associated object
  51    whose correctness must be established.  The taint_set_taint
  52    function is used to mark a taint object as tainted.  The taint
  53    status of a taint object can be queried with taint_is_tainted.
  54
  55    The benefit of taint objects lies in the ability to connect
  56    them together in propagation relationships, using
  57    taint_propagate.  The existence of a propagation relationship
  58    from taint object A to taint object B means that, should
  59    object A ever become tainted, then object B will automatically
  60    be marked tainted as well.  This models the situation where
  61    the data represented by B are derived from data obtained from
  62    A.  This is a common situation in PSPP; for example, the data
  63    in one casereader or casewriter are often derived from data in
  64    another casereader or casewriter.
  65
  66    Taint propagation is transitive: if A propagates to B and B
  67    propagates to C, then tainting A taints both B and C.  Taint
  68    propagation is not commutative: propagation from A to B does
  69    not imply propagation from B to A.  However, taint propagation
  70    is robust against loops, so that if A propagates to B and vice
  71    versa, whether directly or indirectly, then tainting either A
  72    or B will cause the other to be tainted, without producing an
  73    infinite loop.
  74
  75    The implementation is robust against destruction of taints in
  76    propagation relationships.  When this happens, taint
  77    propagation through the destroyed taint object is preserved,
  78    that is, if A taints B and B taints C, then destroying B will
  79    preserve the transitive relationship, so that tainting A will
  80    still taint C.
  81
  82    Taint objects actually propagate two different types of taints
  83    across the taint graph.  The first type of taint is the one
  84    already described, which indicates that an associated object
  85    has corrupted state.  The second type of taint, called a
  86    "successor-taint" does not necessarily indicate that the
  87    associated object is corrupted.  Rather, it indicates some
  88    successor of the associated object is corrupted, or was
  89    corrupted some time in the past before it was destroyed.  (A
  90    "successor" of a taint object X is any taint object that can
  91    be reached by following propagation relationships starting
  92    from X.)  Stated another way, when a taint object is marked
  93    tainted, all the taint objects that are reachable by following
  94    propagation relationships *backward* are marked with a
  95    successor-taint.  In addition, any object that is marked
  96    tainted is also marked successor-tainted.
  97
  98    The value of a successor-taint is in summarizing the history
  99    of the taint objects derived from a common parent.  For
 100    example, consider a casereader that represents the active
 101    dataset.  A statistical procedure can clone this casereader any
 102    number of times and pass it to analysis functions, which may
 103    themselves in turn clone it themselves, pass it to sort or
 104    merge functions, etc.  Conventionally, all of these functions
 105    would have to carefully check for I/O errors and propagate
 106    them upward, which is error-prone and inconvenient.  However,
 107    given the successor-taint feature, the statistical procedure
 108    may simply check the successor-taint on the top-level
 109    casereader after calling the analysis functions and, if a
 110    successor-taint is present, skip displaying the procedure's
 111    output.  Thus, error checking is centralized, simplified, and
 112    made convenient.  This feature is now used in a number of the
 113    PSPP statistical procedures; search the source tree for
 114    "taint_has_tainted_successor" for details. */
 115
 116 #include <stdbool.h>
 117
 118 struct taint *taint_create (void);
 119 struct taint *taint_clone (const struct taint *);
 120 bool taint_destroy (struct taint *);
 121
 122 void taint_propagate (const struct taint *from, const struct taint *to);
 123
 124 bool taint_is_tainted (const struct taint *);
 125 void taint_set_taint (const struct taint *);
 126
 127 bool taint_has_tainted_successor (const struct taint *);
 128 void taint_reset_successor_taint (const struct taint *);
 129
 130 #endif /* libpspp/taint.h */