+2008-09-14 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
+
+ * lib/fstrcmp.h (fstrcmp_bounded): New declaration.
+ (fstrcmp): Define in terms of fstrcmp_bounded.
+ * lib/fstrcmp.c (fstrcmp_bounded): Renamed from fstrcmp. Add lower_bound argument.
+ Return quickly if the result is certainly < lower_bound.
+ * tests/test-fstrcmp.c (check_fstrcmp): Test also fstrcmp_bounded.
+
2008-09-14 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
* lib/diffseq.h (EARLY_ABORT): New macro.
gl_once_define(static, keys_init_once)
-/* NAME
- fstrcmp - fuzzy string compare
-
- SYNOPSIS
- double fstrcmp(const char *, const char *);
-
- DESCRIPTION
- The fstrcmp function may be used to compare two string for
- similarity. It is very useful in reducing "cascade" or
- "secondary" errors in compilers or other situations where
- symbol tables occur.
-
- RETURNS
- double; 0 if the strings are entirly dissimilar, 1 if the
- strings are identical, and a number in between if they are
- similar. */
-
double
-fstrcmp (const char *string1, const char *string2)
+fstrcmp_bounded (const char *string1, const char *string2, double lower_bound)
{
struct context ctxt;
- int xvec_length;
- int yvec_length;
+ int xvec_length = strlen (string1);
+ int yvec_length = strlen (string2);
int i;
size_t fdiag_len;
int *buffer;
size_t bufmax;
+ /* short-circuit obvious comparisons */
+ if (xvec_length == 0 || yvec_length == 0)
+ return (xvec_length == 0 && yvec_length == 0 ? 1.0 : 0.0);
+
+ if (lower_bound > 0)
+ {
+ /* Compute a quick upper bound.
+ Each edit is an insertion or deletion of an element, hence modifies
+ the length of the sequence by at most 1.
+ Therefore, when starting from a sequence X and ending at a sequence Y,
+ with N edits, | yvec_length - xvec_length | <= N. (Proof by
+ induction over N.)
+ So, at the end, we will have
+ xvec_edit_count + yvec_edit_count >= | xvec_length - yvec_length |.
+ and hence
+ result
+ = (xvec_length + yvec_length - (xvec_edit_count + yvec_edit_count))
+ / (xvec_length + yvec_length)
+ <= (xvec_length + yvec_length - | yvec_length - xvec_length |)
+ / (xvec_length + yvec_length)
+ = 2 * min (xvec_length, yvec_length) / (xvec_length + yvec_length).
+ */
+ volatile double upper_bound =
+ (double) (2 * MIN (xvec_length, yvec_length))
+ / (xvec_length + yvec_length);
+
+ if (upper_bound < lower_bound)
+ /* Return an arbitrary value < LOWER_BOUND. */
+ return 0.0;
+ }
+
/* set the info for each string. */
ctxt.xvec = string1;
- xvec_length = strlen (string1);
ctxt.yvec = string2;
- yvec_length = strlen (string2);
-
- /* short-circuit obvious comparisons */
- if (xvec_length == 0 && yvec_length == 0)
- return 1.0;
- if (xvec_length == 0 || yvec_length == 0)
- return 0.0;
/* Set TOO_EXPENSIVE to be approximate square root of input size,
bounded below by 256. */
/* Fuzzy string comparison.
- Copyright (C) 1995, 2000, 2002-2003, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1995, 2000, 2002-2003, 2006, 2008 Free Software
+ Foundation, Inc.
This file was written by Peter Miller <pmiller@agso.gov.au>
#endif
/* Fuzzy compare of S1 and S2. Return a measure for the similarity of S1
- and S1. The higher the result, the more similar the strings are. */
+ and S1. The higher the result, the more similar the strings are.
+ The result is bounded between 0 (meaning very dissimilar strings) and
+ 1 (meaning identical strings). */
extern double fstrcmp (const char *s1, const char *s2);
+/* Like fstrcmp (S1, S2), except that if the result is < LOWER_BOUND, an
+ arbitrary other value < LOWER_BOUND can be returned. */
+extern double fstrcmp_bounded (const char *s1, const char *s2,
+ double lower_bound);
+
+/* A shortcut for fstrcmp. Avoids a function call. */
+#define fstrcmp(s1,s2) fstrcmp_bounded (s1, s2, 0.0)
+
#ifdef __cplusplus
}
#endif
compliant by default, to avoid that msgmerge results become platform and
compiler option dependent. 'volatile' is a portable alternative to gcc's
-ffloat-store option. */
- volatile double result = fstrcmp (string1, string2);
- return (result == expected);
+ {
+ volatile double result = fstrcmp (string1, string2);
+ if (!(result == expected))
+ return false;
+ }
+ {
+ volatile double result = fstrcmp_bounded (string1, string2, expected);
+ if (!(result == expected))
+ return false;
+ }
+ {
+ double bound = expected * 0.5; /* implies bound <= expected */
+ volatile double result = fstrcmp_bounded (string1, string2, bound);
+ if (!(result == expected))
+ return false;
+ }
+ {
+ double bound = (1 + expected) * 0.5;
+ if (expected < bound)
+ {
+ volatile double result = fstrcmp_bounded (string1, string2, bound);
+ if (!(result < bound))
+ return false;
+ }
+ }
+
+ return true;
}
int