Simplify result computation.

author Bruno Haible <bruno@clisp.org>

Sun, 14 Sep 2008 19:26:31 +0000 (21:26 +0200)

committer Bruno Haible <bruno@clisp.org>

Sun, 14 Sep 2008 19:26:31 +0000 (21:26 +0200)
author Bruno Haible <bruno@clisp.org>
Sun, 14 Sep 2008 19:26:31 +0000 (21:26 +0200)
committer Bruno Haible <bruno@clisp.org>
Sun, 14 Sep 2008 19:26:31 +0000 (21:26 +0200)
diff --git a/ChangeLog b/ChangeLog

index 39d568307ff15357f48834df9ccbe619c8f8b312..4f7d9e6ab29a37bf44d76a0bc5e4f48ac85269c3 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2008-09-14  Bruno Haible  <bruno@clisp.org>
+
+       * lib/fstrcmp.c (EXTRA_CONTEXT_FIELDS): Combine xvec_edit_count and
+       yvec_edit_count.
+       (NOTE_DELETE, NOTE_INSERT): Increment the combined edit count.
+       (fstrcmp_bounded): Simplify result computation accordingly.
+
  2008-09-14  Ralf Wildenhues  <Ralf.Wildenhues@gmx.de>
  
         * lib/fstrcmp.h (fstrcmp_bounded): New declaration.
diff --git a/lib/fstrcmp.c b/lib/fstrcmp.c

index 86a351fa20cbfd6bce6e6b1d7b8ba33ced61d9b3..32aa0c2f005efcff3b9dea80b142b95e4eb838c4 100644 (file)
--- a/lib/fstrcmp.c
+++ b/lib/fstrcmp.c
@@ -65,11 +65,10 @@
  #define EQUAL(x,y) ((x) == (y))
  #define OFFSET int
  #define EXTRA_CONTEXT_FIELDS \
-  /* The number of elements inserted or deleted. */ \
-  int xvec_edit_count; \
-  int yvec_edit_count;
-#define NOTE_DELETE(ctxt, xoff) ctxt->xvec_edit_count++
-#define NOTE_INSERT(ctxt, yoff) ctxt->yvec_edit_count++
+  /* The number of elements inserted, plus the number of elements deleted. */ \
+  int edit_count;
+#define NOTE_DELETE(ctxt, xoff) ctxt->edit_count++
+#define NOTE_INSERT(ctxt, yoff) ctxt->edit_count++
  /* We don't need USE_HEURISTIC, since it is unlikely in typical uses of
     fstrcmp().  */
  #include "diffseq.h"
@@ -122,10 +121,10 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound)
          with N edits,  | yvec_length - xvec_length | <= N.  (Proof by
          induction over N.)
          So, at the end, we will have
-          xvec_edit_count + yvec_edit_count >= | xvec_length - yvec_length |.
+          edit_count >= | xvec_length - yvec_length |.
          and hence
            result
-            = (xvec_length + yvec_length - (xvec_edit_count + yvec_edit_count))
+            = (xvec_length + yvec_length - edit_count)
                / (xvec_length + yvec_length)
              <= (xvec_length + yvec_length - | yvec_length - xvec_length |)
                 / (xvec_length + yvec_length)
@@ -177,16 +176,18 @@ fstrcmp_bounded (const char *string1, const char *string2, double lower_bound)
    ctxt.bdiag = ctxt.fdiag + fdiag_len;
  
    /* Now do the main comparison algorithm */
-  ctxt.xvec_edit_count = 0;
-  ctxt.yvec_edit_count = 0;
+  ctxt.edit_count = 0;
    compareseq (0, xvec_length, 0, yvec_length, 0,
               &ctxt);
  
    /* The result is
         ((number of chars in common) / (average length of the strings)).
+     The numerator is
+       = xvec_length - (number of calls to NOTE_DELETE)
+       = yvec_length - (number of calls to NOTE_INSERT)
+       = 1/2 * (xvec_length + yvec_length - (number of edits)).
       This is admittedly biased towards finding that the strings are
       similar, however it does produce meaningful results.  */
-  return ((double) (xvec_length + yvec_length
-                   - ctxt.yvec_edit_count - ctxt.xvec_edit_count)
+  return ((double) (xvec_length + yvec_length - ctxt.edit_count)
           / (xvec_length + yvec_length));
  }
author	Bruno Haible <bruno@clisp.org>
	Sun, 14 Sep 2008 19:26:31 +0000 (21:26 +0200)
committer	Bruno Haible <bruno@clisp.org>
	Sun, 14 Sep 2008 19:26:31 +0000 (21:26 +0200)
ChangeLog		patch \| blob \| history
lib/fstrcmp.c		patch \| blob \| history