From 53d725a9a75cf1644a4cbc9a7d08855ac02b65e4 Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@cs.stanford.edu>
Date: Sun, 12 Dec 2010 14:00:28 -0800
Subject: [PATCH] float-format: Eliminate tests' dependence on exact string
 encoding.

Until now, the float-format tests have depended on the PSPP syntax
accepting arbitrary byte values in strings, without treating them as part
of any particular encoding.  The lexer is being rewritten so that this
assumption is no longer true, so this commit eliminates this assumption in
the float-format tests.  After this commit, the tests only use ASCII
characters in strings.
---
 src/language/tests/float-format.c |  80 ++++++++++++++++------
 tests/libpspp/float-format.at     | 107 +++++++++++++++---------------
 2 files changed, 114 insertions(+), 73 deletions(-)

diff --git a/src/language/tests/float-format.c b/src/language/tests/float-format.c
index 817246bc..b7a428b1 100644
--- a/src/language/tests/float-format.c
+++ b/src/language/tests/float-format.c
@@ -16,18 +16,18 @@
 
 #include <config.h>
 
-#include <libpspp/float-format.h>
+#include "libpspp/float-format.h"
 
-#include "gettext.h"
 #include <inttypes.h>
+#include <limits.h>
+#include <unistr.h>
 
-#include <language/command.h>
-#include <language/lexer/lexer.h>
-#include <libpspp/assertion.h>
-#include <libpspp/message.h>
-#include <libpspp/str.h>
-
-#define _(msgid) gettext (msgid)
+#include "language/command.h"
+#include "language/lexer/lexer.h"
+#include "libpspp/assertion.h"
+#include "libpspp/cast.h"
+#include "libpspp/message.h"
+#include "libpspp/str.h"
 
 /* Maximum supported size of a floating-point number, in bytes. */
 #define FP_MAX_SIZE 32
@@ -93,6 +93,32 @@ get_float_format_name (enum float_format format)
   NOT_REACHED ();
 }
 
+/* Returns the integer value of (hex) digit C. */
+static int
+digit_value (int c)
+{
+  switch (c)
+    {
+    case '0': return 0;
+    case '1': return 1;
+    case '2': return 2;
+    case '3': return 3;
+    case '4': return 4;
+    case '5': return 5;
+    case '6': return 6;
+    case '7': return 7;
+    case '8': return 8;
+    case '9': return 9;
+    case 'a': case 'A': return 10;
+    case 'b': case 'B': return 11;
+    case 'c': case 'C': return 12;
+    case 'd': case 'D': return 13;
+    case 'e': case 'E': return 14;
+    case 'f': case 'F': return 15;
+    default: return INT_MAX;
+    }
+}
+
 /* Parses a number in the form FORMAT(STRING), where FORMAT is
    the name of the format and STRING gives the number's
    representation.  Also supports ordinary floating-point numbers
@@ -100,6 +126,7 @@ get_float_format_name (enum float_format format)
 static bool
 parse_fp (struct lexer *lexer, struct fp *fp)
 {
+  memset (fp, 0, sizeof *fp);
   if (lex_is_number (lexer))
     {
       double number = lex_number (lexer);
@@ -109,34 +136,47 @@ parse_fp (struct lexer *lexer, struct fp *fp)
     }
   else if (lex_token (lexer) == T_ID)
     {
-      size_t length;
+      struct substring s;
 
       if (!parse_float_format (lexer, &fp->format)
           || !lex_force_match (lexer, T_LPAREN)
           || !lex_force_string (lexer))
         return false;
 
-      length = ss_length (lex_tokss (lexer));
+      s = lex_tokss (lexer);
       if (fp->format != FLOAT_HEX)
         {
-          if (length != float_get_size (fp->format))
+          size_t i;
+
+          if (s.length != float_get_size (fp->format) * 2)
             {
-              msg (SE, _("%zu-byte string needed but %zu-byte string "
-                         "supplied."),
-                   float_get_size (fp->format), length);
+              msg (SE, "%zu-byte string needed but %zu-byte string "
+                   "supplied.", float_get_size (fp->format), s.length);
               return false;
             }
-          assert (length <= sizeof fp->data);
-          memcpy (fp->data, ss_data (lex_tokss (lexer)), length);
+          assert (s.length / 2 <= sizeof fp->data);
+          for (i = 0; i < s.length / 2; i++)
+            {
+              int hi = digit_value (s.string[i * 2]);
+              int lo = digit_value (s.string[i * 2 + 1]);
+
+              if (hi >= 16 || lo >= 16)
+                {
+                  msg (SE, "Invalid hex digit in string.");
+                  return false;
+                }
+
+              fp->data[i] = hi * 16 + lo;
+            }
         }
       else
         {
-          if (length >= sizeof fp->data)
+          if (s.length >= sizeof fp->data)
             {
-              msg (SE, _("Hexadecimal floating constant too long."));
+              msg (SE, "Hexadecimal floating constant too long.");
               return false;
             }
-          strncpy (CHAR_CAST_BUG (char *,fp->data), lex_tokcstr (lexer), sizeof fp->data);
+          memcpy (fp->data, s.string, s.length);
         }
 
       lex_get (lexer);
diff --git a/tests/libpspp/float-format.at b/tests/libpspp/float-format.at
index a00a3537..e84166c1 100644
--- a/tests/libpspp/float-format.at
+++ b/tests/libpspp/float-format.at
@@ -21,77 +21,78 @@ AT_DATA([float-format.txt], [dnl
 # x: hexadecimal digits
 
 # IEEE special values.
- 0 == isb(x'00000000')
-x('Infinity') == isb(x'7f800000')
-x('-Infinity') == isb(x'ff800000')
-x('NaN:') => isb(x'7f800001')		# NaN requires nonzero fraction.
-x('NaN:e000000000000000') == isb(x'7ff00000') == idb(x'7ffe000000000000')
-x('NaN:5a5a5e0000000000') == isb(x'7fad2d2f') == idb(x'7ff5a5a5e0000000')
-x('NaN:975612abcdef4000') == idb(x'7ff975612abcdef4')
-x('-NaN:e000000000000000') == isb(x'fff00000') == idb(x'fffe000000000000')
-x('-NaN:5a5a5e0000000000') == isb(x'ffad2d2f') == idb(x'fff5a5a5e0000000')
-x('-NaN:975612abcdef4000') == idb(x'fff975612abcdef4')
+ 0 == isb('00000000')
+x('Infinity') == isb('7f800000')
+x('-Infinity') == isb('ff800000')
+x('NaN:') => isb('7f800001')		# NaN requires nonzero fraction.
+x('NaN:e000000000000000') == isb('7ff00000') == idb('7ffe000000000000')
+x('NaN:5a5a5e0000000000') == isb('7fad2d2f') == idb('7ff5a5a5e0000000')
+x('NaN:975612abcdef4000') == idb('7ff975612abcdef4')
+x('-NaN:e000000000000000') == isb('fff00000') == idb('fffe000000000000')
+x('-NaN:5a5a5e0000000000') == isb('ffad2d2f') == idb('fff5a5a5e0000000')
+x('-NaN:975612abcdef4000') == idb('fff975612abcdef4')
 
 # PSPP special values.
-x('Missing') == isb(x'ff7fffff') == idb(x'ffefffffffffffff') == isl(x'ffff7fff') == idl(x'ffffffffffffefff') == vf(x'ffffffff') == vd(x'ffffffffffffffff') == vg(x'ffffffffffffffff') == zs(x'ffffffff') == zl(x'ffffffffffffffff')
-x('Lowest') == isb(x'ff7ffffe') == idb(x'ffeffffffffffffe') == isl(x'feff7fff') == idl(x'feffffffffffefff') == vf(x'fffffeff') == vd(x'fffffeffffffffff') == vg(x'fffffeffffffffff') == zs(x'fffffffe') == zl(x'fffffffffffffffe')
-x('Highest') == isb(x'7f7fffff') == idb(x'7fefffffffffffff') == isl(x'ffff7f7f') == idl(x'ffffffffffffef7f') == vf(x'ff7fffff') == vd(x'ffffffffff7fffff') == vg(x'ffffffffff7fffff') == zs(x'7fffffff') == zl(x'7fffffffffffffff')
+x('Missing') == isb('ff7fffff') == idb('ffefffffffffffff') == isl('ffff7fff') == idl('ffffffffffffefff') == vf('ffffffff') == vd('ffffffffffffffff') == vg('ffffffffffffffff') == zs('ffffffff') == zl('ffffffffffffffff')
+x('Lowest') == isb('ff7ffffe') == idb('ffeffffffffffffe') == isl('feff7fff') == idl('feffffffffffefff') == vf('fffffeff') == vd('fffffeffffffffff') == vg('fffffeffffffffff') == zs('fffffffe') == zl('fffffffffffffffe')
+x('Highest') == isb('7f7fffff') == idb('7fefffffffffffff') == isl('ffff7f7f') == idl('ffffffffffffef7f') == vf('ff7fffff') == vd('ffffffffff7fffff') == vg('ffffffffff7fffff') == zs('7fffffff') == zl('7fffffffffffffff')
 
 # From Wikipedia.
-0.15625 == isb(b'00111110001000000000000000000000')
--118.625 == isb(b'11000010111011010100000000000000')
+0.15625 == isb('3e200000')
+-118.625 == isb('c2ed4000')
 
 # http://www.psc.edu/general/software/packages/ieee/ieee.html
-x('NaN:0400000000000000') == isb(b'01111111100000100000000000000000')
-x('-NaN:2225540000000000') == isb(b'11111111100100010001001010101010')
-2 == isb(b'01000000000000000000000000000000')
-6.5 == isb(b'01000000110100000000000000000000')
--6.5 == isb(b'11000000110100000000000000000000')
-x('.4p-124') == isb(b'00000000100000000000000000000000')
-x('.2p-124') == isb(b'00000000010000000000000000000000')
+x('NaN:0400000000000000') == isb('7f820000')
+x('-NaN:2225540000000000') == isb('ff9112aa')
+2 == isb('40000000')
+6.5 == isb('40d00000')
+-6.5 == isb('c0d00000')
+x('.4p-124') == isb('00800000')
+x('.2p-124') == isb('00400000')
 
 # Using converter at http://babbage.cs.qc.edu/IEEE-754/Decimal.html
 # plus Emacs 'calc' to convert decimal to hexadecimal
-x('.7b74bc6a7ef9db23p8') => isb(x'42f6e979')		# 123.456
-x('.7b74bc6a7ef9db23p8') => idb(x'405edd2f1a9fbe77')
-x('.817427d2d4642004p-12') => isb(x'39017428')		# .0001234567
-x('.817427d2d4642004p-12') => idb(x'3f202e84fa5a8c84')
-x('.446c3b15f9926688p168') => isb(x'7f800000')		# 1e50; overflow
-x('.446c3b15f9926688p168') => idb(x'4a511b0ec57e649a')
+x('.7b74bc6a7ef9db23p8') => isb('42f6e979')		# 123.456
+x('.7b74bc6a7ef9db23p8') => idb('405edd2f1a9fbe77')
+x('.817427d2d4642004p-12') => isb('39017428')		# .0001234567
+x('.817427d2d4642004p-12') => idb('3f202e84fa5a8c84')
+x('.446c3b15f9926688p168') => isb('7f800000')		# 1e50; overflow
+x('.446c3b15f9926688p168') => idb('4a511b0ec57e649a')
 
 # From multiple editions of the z/Architecture Principles of Operation
 # manual.
-	      1.0 == zs(x'41100000') == isb(x'3f800000')
-	      0.5 == zs(x'40800000') == isb(x'3f000000')
-       x('.4p-4') == zs(x'3f400000') == isb(x'3c800000')
-		0 == zs(x'00000000') == isb(x'00000000')
-	             zs(x'80000000') == isb(x'80000000')
-	      -15 == zs(x'c1f00000') == isb(x'c1700000')
-# x('.ffffffp252') == zs(x'7fffffff')
-      x('.3b4p8') == zs(x'423b4000')
-     x('.1p-256') == zs(x'00100000')
-     x('.4p-124') == zs(x'21400000') == isb(x'00800000')
-     x('.8p-148') == zs(x'1b800000') == isb(x'00000001')
-# x('.ffffffp128') == zs(x'60ffffff') == isb(x'7f7fffff')
-     x('.1p-256') == zs(x'00100000')
-     x('.1p-256') => isb(x'00000000')              # Underflow to zero.
- x('.ffffffp248') == zs(x'7effffff')
- x('.ffffffp248') => isb(x'7f800000')              # Overflow to +Infinity.
+	      1.0 == zs('41100000') == isb('3f800000')
+	      0.5 == zs('40800000') == isb('3f000000')
+       x('.4p-4') == zs('3f400000') == isb('3c800000')
+		0 == zs('00000000') == isb('00000000')
+	             zs('80000000') == isb('80000000')
+	      -15 == zs('c1f00000') == isb('c1700000')
+# x('.ffffffp252') == zs('7fffffff')
+      x('.3b4p8') == zs('423b4000')
+     x('.1p-256') == zs('00100000')
+     x('.4p-124') == zs('21400000') == isb('00800000')
+     x('.8p-148') == zs('1b800000') == isb('00000001')
+# x('.ffffffp128') == zs('60ffffff') == isb('7f7fffff')
+     x('.1p-256') == zs('00100000')
+     x('.1p-256') => isb('00000000')              # Underflow to zero.
+ x('.ffffffp248') == zs('7effffff')
+ x('.ffffffp248') => isb('7f800000')              # Overflow to +Infinity.
 
-            x('.4p-1020') => zl(x'0000000000000000')     # Underflow to zero.
-            x('.4p-1020') == idb(x'0010000000000000')
-            x('.4p-1072') => zl(x'0000000000000000')     # Underflow to zero.
-            x('.4p-1072') => idb(x'0000000000000001')
-x('.fffffffffffff8p1024') => zl(x'7fffffffffffffff')     # Overflow to maxval.
-x('.fffffffffffff8p1024') => idb(x'7fefffffffffffff')
-            x('.1p-256') == zl(x'0010000000000000') == idb(x'2fb0000000000000')
- x('.ffffffffffffffp248') == zl(x'7effffffffffffff')
- x('.ffffffffffffffp248') => idb(x'4f70000000000000')	# Loses precision.
+            x('.4p-1020') => zl('0000000000000000')     # Underflow to zero.
+            x('.4p-1020') == idb('0010000000000000')
+            x('.4p-1072') => zl('0000000000000000')     # Underflow to zero.
+            x('.4p-1072') => idb('0000000000000001')
+x('.fffffffffffff8p1024') => zl('7fffffffffffffff')     # Overflow to maxval.
+x('.fffffffffffff8p1024') => idb('7fefffffffffffff')
+            x('.1p-256') == zl('0010000000000000') == idb('2fb0000000000000')
+ x('.ffffffffffffffp248') == zl('7effffffffffffff')
+ x('.ffffffffffffffp248') => idb('4f70000000000000')	# Loses precision.
 ])
 AT_CHECK(
   [sed 's/#.*//
 s/^[ 	]*//
 s/[ 	]*$//
+/^$/d
 s/^\(..*\)$/DEBUG FLOAT FORMAT \1./' < float-format.txt > float-format.sps])
 AT_CHECK([pspp --testing-mode -O format=csv float-format.sps])
 AT_CLEANUP
-- 
2.30.2