Abort the fstrcmp computation early when a lower bound has been given.

author Ralf Wildenhues <Ralf.Wildenhues@gmx.de>

Sun, 14 Sep 2008 21:33:23 +0000 (23:33 +0200)

committer Bruno Haible <bruno@clisp.org>

Sun, 14 Sep 2008 21:33:23 +0000 (23:33 +0200)
author Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
Sun, 14 Sep 2008 21:33:23 +0000 (23:33 +0200)
committer Bruno Haible <bruno@clisp.org>
Sun, 14 Sep 2008 21:33:23 +0000 (23:33 +0200)
diff --git a/ChangeLog b/ChangeLog

index 4f7d9e6ab29a37bf44d76a0bc5e4f48ac85269c3..e42bd2002d431fcf5a7e6f1af2cbedb39f272b37 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2008-09-14  Ralf Wildenhues  <Ralf.Wildenhues@gmx.de>
+
+       * lib/fstrcmp.c (EXTRA_CONTEXT_FIELDS): Add field 'edit_count_limit'.
+       (EARLY_ABORT): Return true when the edit_count has grown too beyond the
+       limit.
+       (fstrcmp_bounded): Initialize the edit_count_limit. Return 0 when
+       compareseq was aborted.
+
  2008-09-14  Bruno Haible  <bruno@clisp.org>
  
         * lib/fstrcmp.c (EXTRA_CONTEXT_FIELDS): Combine xvec_edit_count and
@@ -9,7 +17,8 @@
  
         * lib/fstrcmp.h (fstrcmp_bounded): New declaration.
         (fstrcmp): Define in terms of fstrcmp_bounded.
-       * lib/fstrcmp.c (fstrcmp_bounded): Renamed from fstrcmp. Add lower_bound argument.
+       * lib/fstrcmp.c (fstrcmp_bounded): Renamed from fstrcmp. Add
+       lower_bound argument.
         Return quickly if the result is certainly < lower_bound.
         * tests/test-fstrcmp.c (check_fstrcmp): Test also fstrcmp_bounded.
  
diff --git a/lib/fstrcmp.c b/lib/fstrcmp.c

index 32aa0c2f005efcff3b9dea80b142b95e4eb838c4..cd0d7b7204b8e0ca11f9d55fd210776b78e0cfde 100644 (file)
--- a/lib/fstrcmp.c
+++ b/lib/fstrcmp.c
@@ -65,10 +65,14 @@
  #define EQUAL(x,y) ((x) == (y))
  #define OFFSET int
  #define EXTRA_CONTEXT_FIELDS \
-  /* The number of elements inserted, plus the number of elements deleted. */ \
+  /* The number of edits beyond which the computation can be aborted. */ \
+  int edit_count_limit; \
+  /* The number of edits (= number of elements inserted, plus the number of \
+     elements deleted), temporarily minus edit_count_limit. */ \
    int edit_count;
  #define NOTE_DELETE(ctxt, xoff) ctxt->edit_count++
  #define NOTE_INSERT(ctxt, yoff) ctxt->edit_count++
+#define EARLY_ABORT(ctxt) ctxt->edit_count > 0
  /* We don't need USE_HEURISTIC, since it is unlikely in typical uses of
     fstrcmp().  */
  #include "diffseq.h"
@@ -175,10 +179,28 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound)
    ctxt.fdiag = buffer + yvec_length + 1;
    ctxt.bdiag = ctxt.fdiag + fdiag_len;
  
+  /* The edit_count is only ever increased.  The computation can be aborted
+     when
+       (xvec_length + yvec_length - edit_count) / (xvec_length + yvec_length)
+       < lower_bound,
+     or equivalently
+       edit_count > (xvec_length + yvec_length) * (1 - lower_bound)
+     or equivalently
+       edit_count > floor((xvec_length + yvec_length) * (1 - lower_bound)).
+     We need to add an epsilon inside the floor(...) argument, to neutralize
+     rounding errors.  */
+  ctxt.edit_count_limit =
+    (lower_bound < 1.0
+     ? (int) ((xvec_length + yvec_length) * (1.0 - lower_bound + 0.000001))
+     : 0);
+
    /* Now do the main comparison algorithm */
-  ctxt.edit_count = 0;
-  compareseq (0, xvec_length, 0, yvec_length, 0,
-             &ctxt);
+  ctxt.edit_count = - ctxt.edit_count_limit;
+  if (compareseq (0, xvec_length, 0, yvec_length, 0, &ctxt))
+    /* The edit_count passed the limit.  Hence the result would be
+       < lower_bound.  We can return any value < lower_bound instead.  */
+    return 0.0;
+  ctxt.edit_count += ctxt.edit_count_limit;
  
    /* The result is
         ((number of chars in common) / (average length of the strings)).
author	Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
	Sun, 14 Sep 2008 21:33:23 +0000 (23:33 +0200)
committer	Bruno Haible <bruno@clisp.org>
	Sun, 14 Sep 2008 21:33:23 +0000 (23:33 +0200)
ChangeLog		patch \| blob \| history
lib/fstrcmp.c		patch \| blob \| history