Patch #6441. Reviewed by John Darrington.
[pspp-builds.git] / tests / formats / format-guesser.sh
diff --git a/tests/formats/format-guesser.sh b/tests/formats/format-guesser.sh
new file mode 100755 (executable)
index 0000000..37bd2bc
--- /dev/null
@@ -0,0 +1,203 @@
+#! /bin/sh
+
+# Tests guessing of data formats from data.
+
+TEMPDIR=/tmp/pspp-tst-$$
+
+# ensure that top_builddir  are absolute
+if [ -z "$top_builddir" ] ; then top_builddir=. ; fi
+if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi
+top_builddir=`cd $top_builddir; pwd`
+PSPP=$top_builddir/src/ui/terminal/pspp
+
+# ensure that top_srcdir is absolute
+top_srcdir=`cd $top_srcdir; pwd`
+
+STAT_CONFIG_PATH=$top_srcdir/config
+export STAT_CONFIG_PATH
+
+
+cleanup()
+{
+     cd /
+     rm -rf $TEMPDIR
+     :
+}
+
+
+fail()
+{
+    echo $activity
+    echo FAILED
+    cleanup;
+    exit 1;
+}
+
+
+no_result()
+{
+    echo $activity
+    echo NO RESULT;
+    cleanup;
+    exit 2;
+}
+
+pass()
+{
+    cleanup;
+    exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+activity="create test data"
+sed -ne 's/#.*//;/^[   ]*$/!p' > $TEMPDIR/test-list <<'EOF'
+# No data.
+=> F8.2
+"" => F8.2
+"." => F8.2
+
+# Numeric formats.
+"1.2" => F3.1
+"$1.2" => DOLLAR4.1
+"1.2%" => PCT4.1
+"$1.2%" => A5
+"1e5" => E3.0
+"1e+5" => E4.0
+"1+5" => E3.0
+"1-5" => E3.0
+"1.2e5" => E5.1
+"1.3e+5" => E6.1
+"1.4+5" => E5.1
+"1e" => A2
+"1e+" => A3
+"1+" => A2
+"1-" => A2
+"1.5-5" => E5.1
+"1,123" => COMMA5.0         # Is , is grouping or decimal? Assume grouping.
+"1.123" => F5.3                    # Ditto.
+"1,12" => F4.2             # Not a group of 3, so last delim must be decimal.
+"1.12" => F4.2             # Ditto.
+"1,1234" => F6.4           # Not a group of 3, so last delim must be decimal.
+"1.1234" => F6.4           # Ditto.
+"$1.234" => DOLLAR6.3       # Dollar sign means decimal has to be '.'.
+"$1,234" => DOLLAR6.0       # Ditto.
+"1.234%" => PCT6.3          # Percent sign means decimal has to be '.'.
+"1,234%" => PCT6.0          # Ditto.
+"1,123.456" => COMMA9.3            # Both '.' and ',', so last delim must be decimal.
+"1.123,456" => DOT9.3      # Ditto.
+"1,123,456.45" => COMMA12.2 # Ditto.
+"1.123.456,45" => DOT12.2   # Ditto.
+"1,123,456" => COMMA9.0            # Ditto.
+"1.123.456" => DOT9.0      # Ditto.
+
+# Date and time formats.
+"01-OCT-1978" => DATE11
+"01-13-99" => ADATE8
+"1-13-99" => ADATE7 (ADATE8)
+"13-01-99" => EDATE8
+"13-1-99" => EDATE7 (EDATE8)
+"32-1-1" => SDATE6 (SDATE8)
+"1q01" => QYR4
+"1Q01" => QYR4
+"1 q 01" => QYR6
+"1 Q 01" => QYR6
+"1q2001" => QYR6
+"1Q2001" => QYR6
+"1 q 2001" => QYR8
+"1 Q 2001" => QYR8
+"oct 05" => MOYR6
+"oct 2005" => MOYR8
+"1-1-01 1:2" => A10            # Minute needs at least two digits.
+"1-1-01 1:02" => DATETIME11.0 (DATETIME17.0)
+"1-1-01 1:02:3" => A13         # Second needs at least two digits.
+"1-1-01 1:02:03" => DATETIME20.0
+"1-1-01 1:02:03.1" => DATETIME20.1 (DATETIME22.1)
+"1-1-01 +1:02:03.1" => DATETIME20.1 (DATETIME22.1)
+"1-1-01 -1:02:03.1" => DATETIME20.1 (DATETIME22.1)
+"1:30" => TIME4.0 (TIME5.0)
+"1:30:05" => TIME8.0
+"-1:30" => TIME5.0
+"+1:30" => TIME5.0
+"-1:30:15" => TIME8.0
+"+1:30:15" => TIME8.0
+"-1:30:15.5" => TIME10.1
+"+1:30:15.75" => TIME11.2
+"1 1:30" => DTIME6.0 (DTIME8.0)
+"+1 1:30" => DTIME7.0 (DTIME8.0)
+"-1 1:30" => DTIME7.0 (DTIME8.0)
+"-1-13-99" => A8
+"+1-13-99" => A8
+"1+13+99" => A7
+"1:00:01.03" => TIME10.2 (TIME11.2)
+"12 1:00:01.3" => DTIME12.1 (DTIME13.1)
+"jan" => MONTH3
+"Feb" => MONTH3
+"MAR" => MONTH3
+"i" => MONTH1 (MONTH3)
+"ii" => MONTH2 (MONTH3)
+"iii" => MONTH3
+"iiii" => A4
+"iv" => MONTH2 (MONTH3)
+"v" => MONTH1 (MONTH3)
+"vi" => MONTH2 (MONTH3)
+"vii" => MONTH3
+"viii" => MONTH4
+"ix" => MONTH2 (MONTH3)
+"viiii" => A5
+"x" => MONTH1 (MONTH3)
+"xi" => MONTH2 (MONTH3)
+"xii" => MONTH3
+"january" => MONTH7
+"janaury" => MONTH7
+"february" => MONTH8
+"febraury" => MONTH8
+"march" => MONTH5
+"marhc" => MONTH5
+"april" => MONTH5
+"may" => MONTH3
+"june" => MONTH4
+"july" => MONTH4
+"august" => MONTH6
+"september" => MONTH9
+"october" => MONTH7
+"november" => MONTH8
+"decmeber" => MONTH8
+"december" => MONTH8
+"monady" => WKDAY6
+"tuseday" => WKDAY7
+"wedensday" => WKDAY9
+"thurdsay" => WKDAY8
+"fridya" => WKDAY6
+"saturady" => WKDAY8
+"sudnay" => WKDAY6
+
+# Ambiguous; bias in favor of more sensible DD/MM/YY format:
+"1/1/1978" => EDATE8
+"01/01/01" => EDATE8
+
+# Several ambiguous dates can be clarified by one unambiguous example:
+"1/1/1978" "1/2/1978" "1/3/1978" "1/13/1978" => ADATE9 # MM/DD/YY
+"01/01/01" "02/01/01" "03/01/01" "13/01/01" => EDATE8  # DD/MM/YY
+"01/01/01" "02/01/01" "03/01/01" "2013/01/01" => SDATE10 # YY/MM/DD
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="create syntax file"
+{
+  echo "SET DECIMAL=DOT." &&
+  sed < $TEMPDIR/test-list -e 's#^\(.*\)=> \(.*\)$#DEBUG FORMAT GUESSER \1.#'
+} > $TEMPDIR/test.stat
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="run program"
+$SUPERVISOR $PSPP --testing-mode \
+        $TEMPDIR/test.stat >$TEMPDIR/test.err 2> $TEMPDIR/test.out
+
+activity="compare output"
+perl -pi -e 's/^\s*$//g' $TEMPDIR/test-list $TEMPDIR/test.out
+diff -b $TEMPDIR/test-list $TEMPDIR/test.out
+if [ $? -ne 0 ] ; then fail ; fi
+
+pass