+2008-09-14 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
+
+ * lib/fstrcmp.c (EXTRA_CONTEXT_FIELDS): Add field 'edit_count_limit'.
+ (EARLY_ABORT): Return true when the edit_count has grown too beyond the
+ limit.
+ (fstrcmp_bounded): Initialize the edit_count_limit. Return 0 when
+ compareseq was aborted.
+
2008-09-14 Bruno Haible <bruno@clisp.org>
* lib/fstrcmp.c (EXTRA_CONTEXT_FIELDS): Combine xvec_edit_count and
* lib/fstrcmp.h (fstrcmp_bounded): New declaration.
(fstrcmp): Define in terms of fstrcmp_bounded.
- * lib/fstrcmp.c (fstrcmp_bounded): Renamed from fstrcmp. Add lower_bound argument.
+ * lib/fstrcmp.c (fstrcmp_bounded): Renamed from fstrcmp. Add
+ lower_bound argument.
Return quickly if the result is certainly < lower_bound.
* tests/test-fstrcmp.c (check_fstrcmp): Test also fstrcmp_bounded.
#define EQUAL(x,y) ((x) == (y))
#define OFFSET int
#define EXTRA_CONTEXT_FIELDS \
- /* The number of elements inserted, plus the number of elements deleted. */ \
+ /* The number of edits beyond which the computation can be aborted. */ \
+ int edit_count_limit; \
+ /* The number of edits (= number of elements inserted, plus the number of \
+ elements deleted), temporarily minus edit_count_limit. */ \
int edit_count;
#define NOTE_DELETE(ctxt, xoff) ctxt->edit_count++
#define NOTE_INSERT(ctxt, yoff) ctxt->edit_count++
+#define EARLY_ABORT(ctxt) ctxt->edit_count > 0
/* We don't need USE_HEURISTIC, since it is unlikely in typical uses of
fstrcmp(). */
#include "diffseq.h"
ctxt.fdiag = buffer + yvec_length + 1;
ctxt.bdiag = ctxt.fdiag + fdiag_len;
+ /* The edit_count is only ever increased. The computation can be aborted
+ when
+ (xvec_length + yvec_length - edit_count) / (xvec_length + yvec_length)
+ < lower_bound,
+ or equivalently
+ edit_count > (xvec_length + yvec_length) * (1 - lower_bound)
+ or equivalently
+ edit_count > floor((xvec_length + yvec_length) * (1 - lower_bound)).
+ We need to add an epsilon inside the floor(...) argument, to neutralize
+ rounding errors. */
+ ctxt.edit_count_limit =
+ (lower_bound < 1.0
+ ? (int) ((xvec_length + yvec_length) * (1.0 - lower_bound + 0.000001))
+ : 0);
+
/* Now do the main comparison algorithm */
- ctxt.edit_count = 0;
- compareseq (0, xvec_length, 0, yvec_length, 0,
- &ctxt);
+ ctxt.edit_count = - ctxt.edit_count_limit;
+ if (compareseq (0, xvec_length, 0, yvec_length, 0, &ctxt))
+ /* The edit_count passed the limit. Hence the result would be
+ < lower_bound. We can return any value < lower_bound instead. */
+ return 0.0;
+ ctxt.edit_count += ctxt.edit_count_limit;
/* The result is
((number of chars in common) / (average length of the strings)).