strtod: make it more-accurate typically, and don't require libm

author Paul R. Eggert <eggert@cs.ucla.edu>

Mon, 12 Jul 2010 16:14:10 +0000 (09:14 -0700)

committer Paul R. Eggert <eggert@lnxsrv01.seas.ucla.edu>

Mon, 12 Jul 2010 16:15:42 +0000 (09:15 -0700)
author Paul R. Eggert <eggert@cs.ucla.edu>
Mon, 12 Jul 2010 16:14:10 +0000 (09:14 -0700)
committer Paul R. Eggert <eggert@lnxsrv01.seas.ucla.edu>
Mon, 12 Jul 2010 16:15:42 +0000 (09:15 -0700)
diff --git a/ChangeLog b/ChangeLog

index 28b0b0f4840119f927e4274b33c04ed54f67fb42..97a604be279dd19d2f5389c139211fcdbd024184 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,25 @@
+2010-07-12  Paul R. Eggert  <eggert@cs.ucla.edu>
+
+       strtod: make it more-accurate typically, and don't require libm
+       * lib/strtod.c (_GL_ARG_NONNULL): Remove; no longer needed.
+       Include limits.h.  Don't include string.h.
+       (HAVE_LDEXP_IN_LIBC, HAVE_RAW_DECL_STRTOD): Define to 0 if not defined.
+       (locale_isspace): New function, so that no casts are needed to
+       check whether *s is a space.
+       (ldexp): Provide an unused dummy if not available.
+       (scale_radix_exp, parse_number, underlying_strtod): New functions.
+       (strtod): Use them.  This implementation prefers to use the
+       underlying strtod if available, falling back on our own code
+       only to fix known bugs.  This is more likely to produce an
+       accurate result.  Also, it avoids the use of libm functions.
+       * m4/strtod.m4 (gl_FUNC_STRTOD): Don't invoke _AC_LIBOBJ_STRTOD;
+       no longer needed.  Invoke AC_LIBOBJ([strtod]); don't know why this
+       was absent, but it caused a test failure with coreutils.
+       (gl_PREREQ_STRTOD): Check wither ldexp can be used without linking
+       with libm.
+       * modules/strtod (Makefile.am, Link): libm is no longer needed.
+       * modules/strtod-tests (Makefile.am): Likewise.
+
  2010-07-11  Pádraig Brady  <P@draigBrady.com>
              Bruno Haible  <bruno@clisp.org>
  
diff --git a/lib/strtod.c b/lib/strtod.c

index 371476ee4d2b5e99a33291866dc29e1d77159166..61887598cab0543097841b015906ffe526747013 100644 (file)
--- a/lib/strtod.c
+++ b/lib/strtod.c
@@ -16,261 +16,274 @@
  
  #include <config.h>
  
-/* Don't use __attribute__ __nonnull__ in this compilation unit.  Otherwise gcc
-   optimizes away the nptr == NULL test below.  */
-#define _GL_ARG_NONNULL(params)
-
  #include <stdlib.h>
  
  #include <ctype.h>
  #include <errno.h>
  #include <float.h>
+#include <limits.h>
  #include <math.h>
  #include <stdbool.h>
-#include <string.h>
  
  #include "c-ctype.h"
  
-/* Convert NPTR to a double.  If ENDPTR is not NULL, a pointer to the
-   character after the last one used in the number is put in *ENDPTR.  */
-double
-strtod (const char *nptr, char **endptr)
+#ifndef HAVE_LDEXP_IN_LIBC
+#define HAVE_LDEXP_IN_LIBC 0
+#endif
+#ifndef HAVE_RAW_DECL_STRTOD
+#define HAVE_RAW_DECL_STRTOD 0
+#endif
+
+/* Return true if C is a space in the current locale, avoiding
+   problems with signed char and isspace.  */
+static bool
+locale_isspace (char c)
  {
-  const unsigned char *s;
-  bool negative = false;
+  unsigned char uc = c;
+  return isspace (uc) != 0;
+}
  
-  /* The number so far.  */
-  double num;
+#if !HAVE_LDEXP_IN_LIBC
+ #define ldexp dummy_ldexp
+ static double ldexp (double x, int exponent) { return x + exponent; }
+#endif
  
-  bool got_dot;                 /* Found a decimal point.  */
-  bool got_digit;               /* Seen any digits.  */
-  bool hex = false;             /* Look for hex float exponent.  */
+/* Return X * BASE**EXPONENT.  Return an extreme value and set errno
+   to ERANGE if underflow or overflow occurs.  */
+static double
+scale_radix_exp (double x, int radix, long int exponent)
+{
+  /* If RADIX == 10, this code is neither precise nor fast; it is
+     merely a straightforward and relatively portable approximation.
+     If N == 2, this code is precise on a radix-2 implementation,
+     albeit perhaps not fast if ldexp is not in libc.  */
  
-  /* The exponent of the number.  */
-  long int exponent;
+  long int e = exponent;
  
-  if (nptr == NULL)
+  if (HAVE_LDEXP_IN_LIBC && radix == 2)
+    return ldexp (x, e < INT_MIN ? INT_MIN : INT_MAX < e ? INT_MAX : e);
+  else
      {
-      errno = EINVAL;
-      goto noconv;
-    }
-
-  /* Use unsigned char for the ctype routines.  */
-  s = (unsigned char *) nptr;
-
-  /* Eat whitespace.  */
-  while (isspace (*s))
-    ++s;
-
-  /* Get the sign.  */
-  negative = *s == '-';
-  if (*s == '-' || *s == '+')
-    ++s;
-
-  num = 0.0;
-  got_dot = false;
-  got_digit = false;
-  exponent = 0;
+      double r = x;
  
-  /* Check for hex float.  */
-  if (*s == '0' && c_tolower (s[1]) == 'x'
-      && (c_isxdigit (s[2]) || ('.' == s[2] && c_isxdigit (s[3]))))
-    {
-      hex = true;
-      s += 2;
-      for (;; ++s)
+      if (r != 0)
          {
-          if (c_isxdigit (*s))
+          if (e < 0)
              {
-              got_digit = true;
-
-              /* Make sure that multiplication by 16 will not overflow.  */
-              if (num > DBL_MAX / 16)
-                /* The value of the digit doesn't matter, since we have already
-                   gotten as many digits as can be represented in a `double'.
-                   This doesn't necessarily mean the result will overflow.
-                   The exponent may reduce it to within range.
-
-                   We just need to record that there was another
-                   digit so that we can multiply by 16 later.  */
-                ++exponent;
-              else
-                num = ((num * 16.0)
-                       + (c_tolower (*s) - (c_isdigit (*s) ? '0' : 'a' - 10)));
-
-              /* Keep track of the number of digits after the decimal point.
-                 If we just divided by 16 here, we would lose precision.  */
-              if (got_dot)
-                --exponent;
+              while (e++ != 0)
+                {
+                  r /= radix;
+                  if (r == 0 && x != 0)
+                    {
+                      errno = ERANGE;
+                      break;
+                    }
+                }
              }
-          else if (!got_dot && *s == '.')
-            /* Record that we have found the decimal point.  */
-            got_dot = true;
            else
-            /* Any other character terminates the number.  */
-            break;
-        }
-    }
-
-  /* Not a hex float.  */
-  else
-    {
-      for (;; ++s)
-        {
-          if (c_isdigit (*s))
              {
-              got_digit = true;
-
-              /* Make sure that multiplication by 10 will not overflow.  */
-              if (num > DBL_MAX * 0.1)
-                /* The value of the digit doesn't matter, since we have already
-                   gotten as many digits as can be represented in a `double'.
-                   This doesn't necessarily mean the result will overflow.
-                   The exponent may reduce it to within range.
-
-                   We just need to record that there was another
-                   digit so that we can multiply by 10 later.  */
-                ++exponent;
-              else
-                num = (num * 10.0) + (*s - '0');
-
-              /* Keep track of the number of digits after the decimal point.
-                 If we just divided by 10 here, we would lose precision.  */
-              if (got_dot)
-                --exponent;
+              while (e-- != 0)
+                {
+                  if (r < -DBL_MAX / radix)
+                    {
+                      errno = ERANGE;
+                      return -HUGE_VAL;
+                    }
+                  else if (DBL_MAX / radix < r)
+                    {
+                      errno = ERANGE;
+                      return HUGE_VAL;
+                    }
+                  else
+                    r *= radix;
+                }
              }
-          else if (!got_dot && *s == '.')
-            /* Record that we have found the decimal point.  */
-            got_dot = true;
-          else
-            /* Any other character terminates the number.  */
-            break;
          }
+
+      return r;
      }
+}
+
+/* Parse a number at NPTR; this is a bit like strtol (NPTR, ENDPTR)
+   except there are no leading spaces or signs or "0x", and ENDPTR is
+   nonnull.  The number uses a base BASE (either 10 or 16) fraction, a
+   radix RADIX (either 10 or 2) exponent, and exponent character
+   EXPCHAR.  To convert from a number of digits to a radix exponent,
+   multiply by RADIX_MULTIPLIER (either 1 or 4).  */
+static double
+parse_number (const char *nptr,
+              int base, int radix, int radix_multiplier, char expchar,
+              char **endptr)
+{
+  const char *s = nptr;
+  bool got_dot = false;
+  long int exponent = 0;
+  double num = 0;
  
-  if (!got_digit)
+  for (;; ++s)
      {
-      /* Check for infinities and NaNs.  */
-      if (c_tolower (*s) == 'i'
-          && c_tolower (s[1]) == 'n'
-          && c_tolower (s[2]) == 'f')
+      int digit;
+      if (c_isdigit (*s))
+        digit = *s - '0';
+      else if (base == 16 && c_isxdigit (*s))
+        digit = c_tolower (*s) - ('a' - 10);
+      else if (! got_dot && *s == '.')
          {
-          s += 3;
-          num = HUGE_VAL;
-          if (c_tolower (*s) == 'i'
-              && c_tolower (s[1]) == 'n'
-              && c_tolower (s[2]) == 'i'
-              && c_tolower (s[3]) == 't'
-              && c_tolower (s[4]) == 'y')
-            s += 5;
-          goto valid;
+          /* Record that we have found the decimal point.  */
+          got_dot = true;
+          continue;
          }
-#ifdef NAN
-      else if (c_tolower (*s) == 'n'
-               && c_tolower (s[1]) == 'a'
-               && c_tolower (s[2]) == 'n')
+      else
+        /* Any other character terminates the number.  */
+        break;
+
+      /* Make sure that multiplication by base will not overflow.  */
+      if (num <= DBL_MAX / base)
+        num = num * base + digit;
+      else
          {
-          s += 3;
-          num = NAN;
-          /* Since nan(<n-char-sequence>) is implementation-defined,
-             we define it by ignoring <n-char-sequence>.  A nicer
-             implementation would populate the bits of the NaN
-             according to interpreting n-char-sequence as a
-             hexadecimal number, but the result is still a NaN.  */
-          if (*s == '(')
-            {
-              const unsigned char *p = s + 1;
-              while (c_isalnum (*p))
-                p++;
-              if (*p == ')')
-                s = p + 1;
-            }
-          goto valid;
+          /* The value of the digit doesn't matter, since we have already
+             gotten as many digits as can be represented in a `double'.
+             This doesn't necessarily mean the result will overflow.
+             The exponent may reduce it to within range.
+
+             We just need to record that there was another
+             digit so that we can multiply by 10 later.  */
+          exponent += radix_multiplier;
          }
-#endif
-      goto noconv;
+
+      /* Keep track of the number of digits after the decimal point.
+         If we just divided by base here, we might lose precision.  */
+      if (got_dot)
+        exponent -= radix_multiplier;
      }
  
-  if (c_tolower (*s) == (hex ? 'p' : 'e') && !isspace (s[1]))
+  if (c_tolower (*s) == expchar && ! locale_isspace (s[1]))
      {
-      /* Get the exponent specified after the `e' or `E'.  */
+      /* Add any given exponent to the implicit one.  */
        int save = errno;
        char *end;
-      long int value;
+      long int value = strtol (s + 1, &end, 10);
+      errno = save;
  
-      errno = 0;
-      ++s;
-      value = strtol ((char *) s, &end, 10);
-      if (errno == ERANGE && num)
+      if (s + 1 != end)
          {
-          /* The exponent overflowed a `long int'.  It is probably a safe
-             assumption that an exponent that cannot be represented by
-             a `long int' exceeds the limits of a `double'.  */
-          if (endptr != NULL)
-            *endptr = end;
-          if (value < 0)
-            goto underflow;
-          else
-            goto overflow;
+          /* Skip past the exponent, and add in the implicit exponent,
+             resulting in an extreme value on overflow.  */
+          s = end;
+          exponent =
+            (exponent < 0
+             ? (value < LONG_MIN - exponent ? LONG_MIN : exponent + value)
+             : (LONG_MAX - exponent < value ? LONG_MAX : exponent + value));
          }
-      else if (end == (char *) s)
-        /* There was no exponent.  Reset END to point to
-           the 'e' or 'E', so *ENDPTR will be set there.  */
-        end = (char *) s - 1;
-      errno = save;
-      s = (unsigned char *) end;
-      exponent += value;
      }
  
-  if (num == 0.0)
-    goto valid;
+  *endptr = (char *) s;
+  return scale_radix_exp (num, radix, exponent);
+}
  
-  if (hex)
-    {
-      /* ldexp takes care of range errors.  */
-      num = ldexp (num, exponent);
-      goto valid;
-    }
+static double underlying_strtod (const char *, char **);
+
+/* Convert NPTR to a double.  If ENDPTR is not NULL, a pointer to the
+   character after the last one used in the number is put in *ENDPTR.  */
+double
+strtod (const char *nptr, char **endptr)
+{
+  bool negative = false;
+
+  /* The number so far.  */
+  double num;
+
+  const char *s = nptr;
+  char *end;
+
+  /* Eat whitespace.  */
+  while (locale_isspace (*s))
+    ++s;
+
+  /* Get the sign.  */
+  negative = *s == '-';
+  if (*s == '-' || *s == '+')
+    ++s;
  
-  /* Multiply NUM by 10 to the EXPONENT power,
-     checking for overflow and underflow.  */
+  num = underlying_strtod (s, &end);
  
-  if (exponent < 0)
+  if (c_isdigit (s[*s == '.']))
      {
-      if (num < DBL_MIN * pow (10.0, (double) -exponent))
-        goto underflow;
+      /* If a hex float was converted incorrectly, do it ourselves.  */
+      if (*s == '0' && c_tolower (s[1]) == 'x' && end <= s + 2
+          && c_isxdigit (s[2 + (s[2] == '.')]))
+        num = parse_number (s + 2, 16, 2, 4, 'p', &end);
+
+      s = end;
      }
-  else if (exponent > 0)
+
+  /* Check for infinities and NaNs.  */
+  else if (c_tolower (*s) == 'i'
+           && c_tolower (s[1]) == 'n'
+           && c_tolower (s[2]) == 'f')
      {
-      if (num > DBL_MAX * pow (10.0, (double) -exponent))
-        goto overflow;
+      s += 3;
+      if (c_tolower (*s) == 'i'
+          && c_tolower (s[1]) == 'n'
+          && c_tolower (s[2]) == 'i'
+          && c_tolower (s[3]) == 't'
+          && c_tolower (s[4]) == 'y')
+        s += 5;
+      num = HUGE_VAL;
      }
+  else if (c_tolower (*s) == 'n'
+           && c_tolower (s[1]) == 'a'
+           && c_tolower (s[2]) == 'n')
+    {
+      s += 3;
+      if (*s == '(')
+        {
+          const char *p = s + 1;
+          while (c_isalnum (*p))
+            p++;
+          if (*p == ')')
+            s = p + 1;
+        }
  
-  num *= pow (10.0, (double) exponent);
+      /* If the underlying implementation misparsed the NaN, assume
+         its result is incorrect, and return a NaN.  Normally it's
+         better to use the underlying implementation's result, since a
+         nice implementation populates the bits of the NaN according
+         to interpreting n-char-sequence as a hexadecimal number.  */
+      if (s != end)
+        num = NAN;
+    }
+  else
+    {
+      /* No conversion could be performed.  */
+      errno = EINVAL;
+      s = nptr;
+    }
  
- valid:
    if (endptr != NULL)
      *endptr = (char *) s;
    return negative ? -num : num;
+}
  
- overflow:
-  /* Return an overflow error.  */
-  if (endptr != NULL)
-    *endptr = (char *) s;
-  errno = ERANGE;
-  return negative ? -HUGE_VAL : HUGE_VAL;
-
- underflow:
-  /* Return an underflow error.  */
-  if (endptr != NULL)
-    *endptr = (char *) s;
-  errno = ERANGE;
-  return negative ? -0.0 : 0.0;
-
- noconv:
-  /* There was no number.  */
-  if (endptr != NULL)
-    *endptr = (char *) nptr;
-  errno = EINVAL;
-  return 0.0;
+/* The "underlying" strtod implementation.  This must be defined
+   after strtod because it #undefs strtod.  */
+static double
+underlying_strtod (const char *nptr, char **endptr)
+{
+  if (HAVE_RAW_DECL_STRTOD)
+    {
+      /* Prefer the native strtod if available.  Usually it should
+         work and it should give more-accurate results than our
+         approximation.  */
+      #undef strtod
+      return strtod (nptr, endptr);
+    }
+  else
+    {
+      /* Approximate strtod well enough for this module.  There's no
+         need to handle anything but finite unsigned decimal
+         numbers with nonnull ENDPTR.  */
+      return parse_number (nptr, 10, 10, 1, 'e', endptr);
+    }
  }
diff --git a/m4/strtod.m4 b/m4/strtod.m4

index aa6a1c53d50bdee99bcc6d47c625c410b1757c5d..05d36bd4b357511b3ce27fa7979fcc0330f7c7ee 100644 (file)
--- a/m4/strtod.m4
+++ b/m4/strtod.m4
@@ -11,7 +11,7 @@ AC_DEFUN([gl_FUNC_STRTOD],
    dnl Don't call AC_FUNC_STRTOD, because it does not have the right guess
    dnl when cross-compiling.
    dnl Don't call AC_CHECK_FUNCS([strtod]) because it would collide with the
-  dnl ac_cv_func_strtod variable set by the AC_FUNC_STRTOD macro,
+  dnl ac_cv_func_strtod variable set by the AC_FUNC_STRTOD macro.
    AC_CHECK_DECLS_ONCE([strtod])
    if test $ac_cv_have_decl_strtod != yes; then
      HAVE_STRTOD=0
@@ -113,12 +113,26 @@ numeric_equal (double x, double y)
      fi
    fi
    if test $HAVE_STRTOD = 0 || test $REPLACE_STRTOD = 1; then
+    AC_LIBOBJ([strtod])
      gl_PREREQ_STRTOD
-    dnl Use undocumented macro to set POW_LIB correctly.
-    _AC_LIBOBJ_STRTOD
    fi
  ])
  
  # Prerequisites of lib/strtod.c.
-# The need for pow() is already handled by AC_FUNC_STRTOD.
-AC_DEFUN([gl_PREREQ_STRTOD], [:])
+# FIXME: This implementation is a copy of printf-frexp.m4 and should be shared.
+AC_DEFUN([gl_PREREQ_STRTOD], [
+  AC_CACHE_CHECK([whether ldexp can be used without linking with libm],
+    [gl_cv_func_ldexp_no_libm],
+    [
+      AC_TRY_LINK([#include <math.h>
+                   double x;
+                   int y;],
+                  [return ldexp (x, y) < 1;],
+        [gl_cv_func_ldexp_no_libm=yes],
+        [gl_cv_func_ldexp_no_libm=no])
+    ])
+  if test $gl_cv_func_ldexp_no_libm = yes; then
+    AC_DEFINE([HAVE_LDEXP_IN_LIBC], [1],
+      [Define if the ldexp function is available in libc.])
+  fi
+])
diff --git a/modules/strtod b/modules/strtod

index 9a4a0f31d2a54384528f0056d584b7915e09e219..d4f64c13d4eeeec4404ac6ec41d31271b2f3318d 100644 (file)
--- a/modules/strtod
+++ b/modules/strtod
@@ -15,14 +15,10 @@ gl_FUNC_STRTOD
  gl_STDLIB_MODULE_INDICATOR([strtod])
  
  Makefile.am:
-LIBS += $(POW_LIB)
  
  Include:
  <stdlib.h>
  
-Link:
-$(POW_LIB)
-
  License:
  LGPL
  
diff --git a/modules/strtod-tests b/modules/strtod-tests

index 8d5be7f7f7889045f17b38614212337e92610ffe..bea7825e777725a1ca2e182abaf6ec750f64784c 100644 (file)
--- a/modules/strtod-tests
+++ b/modules/strtod-tests
@@ -11,6 +11,5 @@ signbit
  configure.ac:
  
  Makefile.am:
-LIBS += $(POW_LIB)
  TESTS += test-strtod
  check_PROGRAMS += test-strtod
author	Paul R. Eggert <eggert@cs.ucla.edu>
	Mon, 12 Jul 2010 16:14:10 +0000 (09:14 -0700)
committer	Paul R. Eggert <eggert@lnxsrv01.seas.ucla.edu>
	Mon, 12 Jul 2010 16:15:42 +0000 (09:15 -0700)
ChangeLog		patch \| blob \| history
lib/strtod.c		patch \| blob \| history
m4/strtod.m4		patch \| blob \| history
modules/strtod		patch \| blob \| history
modules/strtod-tests		patch \| blob \| history