From ea411b6c81ab5895051535a4730210a66d4be388 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 15 Dec 2021 21:00:32 -0800 Subject: [PATCH 1/1] Convert all Perl build tools to Python and remove Perl build dependency. Python is much more commonly used these days and easier for beginners to pick up. --- INSTALL | 18 +- NEWS | 6 + acinclude.m4 | 7 +- doc/automake.mk | 6 +- doc/get-commands.pl | 25 -- doc/get-commands.py | 22 ++ src/data/automake.mk | 6 +- src/data/sys-file-encoding.pl | 180 ----------- src/data/sys-file-encoding.py | 174 +++++++++++ tests/data/data-in.at | 248 +++++++-------- tests/data/data-out.at | 398 +++++++++++++------------ tests/language/data-io/data-reader.at | 245 ++++++++------- tests/language/data-io/get-data-txt.at | 10 +- tests/language/stats/examine.at | 14 +- tests/language/stats/sort-cases.at | 53 ++-- tests/libpspp/line-reader.at | 4 +- tests/libpspp/u8-istream.at | 17 +- tests/math/randist.at | 106 ++++--- 18 files changed, 776 insertions(+), 763 deletions(-) delete mode 100755 doc/get-commands.pl create mode 100644 doc/get-commands.py delete mode 100755 src/data/sys-file-encoding.pl create mode 100644 src/data/sys-file-encoding.py diff --git a/INSTALL b/INSTALL index 35fd4715ea..252dc6f5ca 100644 --- a/INSTALL +++ b/INSTALL @@ -57,9 +57,6 @@ The following packages are required to install PSPP: * The GNU Scientific Library (http://www.gnu.org/software/gsl/), version 1.13 or later, including libgslcblas included with GSL. - * Perl (http://www.perl.org/), version 5.005_03 or later. Perl is - required during build but not after installation. - * Python (https://python.org/), version 3.4 or later. Python is required during build but not after installation. @@ -93,7 +90,16 @@ use the GUI, you must run `configure' with --without-gui. * GNU Spread Sheet Widget (http://www.gnu.org/software/ssw) version 0.7 or later. -The following packages are optional: +The following packages are only needed to build and test the Perl +module: + + * Perl (https://www.perl.org/), version 5.005_03 or later. + + * The Config::Perl::V module for Perl (https://cpan.org). + + * Optionally, the Text::Diff and Memory::Usage modules for Perl + (https://cpan.org). These modules enable PSPP to test its Perl + module more thoroughly. Other optional packages: @@ -111,10 +117,6 @@ Other optional packages: interface, but not the Postgresql interface itself, requires the Postgresql server to be installed. - * The Text::Diff module for Perl (http://cpan.org). This enables - PSPP to test the Perl module more thoroughly. It is not needed - to build or use the Perl module. - Basic Installation ================== diff --git a/NEWS b/NEWS index aa3b8620ed..791b9c1ac3 100644 --- a/NEWS +++ b/NEWS @@ -26,6 +26,12 @@ Changes from 1.4.1 to 1.5.3: * Build changes: + - Perl is no longer required to build. + + - Build now requires Python 3.4 or later. (Building PSPP 1.4.0 + also required Python, but it wasn't properly documented at the + time.) + - The Cairo and Pango libraries are now required. - gettext 0.20 or later is now required. diff --git a/acinclude.m4 b/acinclude.m4 index 3c451971bb..98864ea5bc 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -32,7 +32,7 @@ AC_DEFUN([PSPP_PERL], AC_PATH_PROG([PERL], perl, no) AC_SUBST([PERL])dnl if test "$PERL" != no && $PERL -e 'require 5.005_03;'; then :; else - PSPP_REQUIRED_PREREQ([Perl 5.005_03 (or later)]) + PSPP_OPTIONAL_PREREQ([Perl 5.005_03 (or later)]) fi # The PSPP autobuilder appends a build number to the PSPP version number, @@ -59,7 +59,7 @@ AC_DEFUN([PSPP_PERL_MODULE], [yes|no], [], [AC_MSG_FAILURE([--with-perl-module argument must be 'yes' or 'no'])]) WITH_PERL_MODULE=$with_perl_module], - [if test x"$cross_compiling" != x"yes"; then + [if test "$PERL" != no && test x"$cross_compiling" != x"yes"; then WITH_PERL_MODULE=yes else WITH_PERL_MODULE=no @@ -68,6 +68,9 @@ AC_DEFUN([PSPP_PERL_MODULE], AM_CONDITIONAL(WITH_PERL_MODULE, test $WITH_PERL_MODULE = yes) if test $WITH_PERL_MODULE = yes; then + if test "$PERL" = no; then + PSPP_REQUIRED_PREREQ([Perl 5.005_03 or later (or use --without-perl-module)]) + fi CHECK_PERL_MODULE([Config::Perl::V], [], [PSPP_REQUIRED_PREREQ([Config::Perl::V Perl module (or use --without-perl-module)])]) CHECK_PERL_MODULE([Text::Diff], [], diff --git a/doc/automake.mk b/doc/automake.mk index 6d2e6ebcc6..63972bb3e4 100644 --- a/doc/automake.mk +++ b/doc/automake.mk @@ -67,12 +67,12 @@ doc_pspp_dev_TEXINFOS = doc/version-dev.texi \ dist_man_MANS += doc/pspp.1 \ doc/psppire.1 -EXTRA_DIST += doc/get-commands.pl \ +EXTRA_DIST += doc/get-commands.py \ doc/help-pages-list \ doc/prepdoc.sh -$(srcdir)/doc/ni.texi: $(top_srcdir)/src/language/command.def doc/get-commands.pl - $(AM_V_GEN)$(PERL) $(top_srcdir)/doc/get-commands.pl $(top_srcdir)/src/language/command.def > $@ +$(srcdir)/doc/ni.texi: $(top_srcdir)/src/language/command.def doc/get-commands.py + $(AM_V_GEN)$(PYTHON3) $(top_srcdir)/doc/get-commands.py $(top_srcdir)/src/language/command.def > $@ $(srcdir)/doc/tut.texi: $(AM_V_GEN)echo "@set example-dir $(examplesdir)" > $@ diff --git a/doc/get-commands.pl b/doc/get-commands.pl deleted file mode 100755 index 7d5ac4534b..0000000000 --- a/doc/get-commands.pl +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/perl -# Creates Texinfo documentation from the source - -use strict; -use warnings 'all'; - -my ($file) = $ARGV[0]; -open(INFO, $file) || die "Cannot open \"$file\"\n" ; -print "\@c Generated from $file by get-commands.pl\n"; -print "\@c Do not modify!\n\n"; - -print "\@table \@asis\n"; -while () -{ - my ($command, $description) - = /^\s*UNIMPL_CMD\s*\(\s*"([^"]*)"\s*,\s*"([^"]*)"\)\s*$/ - or next; - print "\@item \@cmd{$command}\n$description\n\n"; -} -print "\@end table\n"; - -print "\@c Local Variables:\n"; -print "\@c buffer-read-only: t\n"; -print "\@c End:\n"; -close(INFO); # Close the file diff --git a/doc/get-commands.py b/doc/get-commands.py new file mode 100644 index 0000000000..fa6b98e8e4 --- /dev/null +++ b/doc/get-commands.py @@ -0,0 +1,22 @@ +#! /usr/bin/python3 +# Creates Texinfo documentation from the source + +import re +import sys + +print("""\ +@c Generated from %s by get-commands.py +@c Do not modify! + +@table @asis""" % sys.argv[1]) +for line in open(sys.argv[1], 'r'): + m = re.match(r'^\s*UNIMPL_CMD\s*\(\s*"([^"]*)"\s*,\s*"([^"]*)"\)\s*$', line) + if m: + command, description = m.groups() + print("@item @cmd{%s}\n%s\n" % (command, description)) +print("""\ +@end table +@c Local Variables: +@c buffer-read-only: t +@c End:""") + diff --git a/src/data/automake.mk b/src/data/automake.mk index e5d40bf7fb..d721666784 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -138,7 +138,7 @@ src_data_libdata_la_SOURCES = \ nodist_src_data_libdata_la_SOURCES = src/data/sys-file-encoding.c src/data/sys-file-encoding.c: \ - src/data/sys-file-encoding.pl \ + src/data/sys-file-encoding.py \ src/data/convrtrs.txt - $(AM_V_GEN)$(PERL) $^ > $@.tmp && mv $@.tmp $@ -EXTRA_DIST += src/data/sys-file-encoding.pl src/data/convrtrs.txt + $(AM_V_GEN)$(PYTHON3) $^ > $@.tmp && mv $@.tmp $@ +EXTRA_DIST += src/data/sys-file-encoding.py src/data/convrtrs.txt diff --git a/src/data/sys-file-encoding.pl b/src/data/sys-file-encoding.pl deleted file mode 100755 index 95860d0e5b..0000000000 --- a/src/data/sys-file-encoding.pl +++ /dev/null @@ -1,180 +0,0 @@ -#! /usr/bin/perl -# Copyright (C) 2020, 2021Free Software Foundation - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -use strict; -use warnings; - -if (-t 1 || @ARGV != 1 || $ARGV[0] eq '--help') { - print STDERR < sys-file-encoding.c - -To update the encoding data, get the latest ICU encoding data from: -https://raw.githubusercontent.com/unicode-org/icu/main/icu4c/source/data/mappings/convrtrs.txt -EOF - exit (@ARGV && $ARGV[0] eq '--help' ? 0 : 1); -} - -open (CONVERTERS, '<', $ARGV[0]) - or die "$ARGV[0]: open failed ($!)\n"; - -our $WINDOWS = 3; # Windows code pages. -our $IBM = 2; # IBM code pages. -our $CP = 1; # Java (?) code pages. -our %sources = ($WINDOWS => "windows", $IBM => "ibm", $CP => "cp"); - -my $converter = ""; -while () { - chomp; - s/#.*//; - if (s/^\s+//) { - $converter .= " $_"; - } else { - process_converter ($converter); - $converter = $_; - } -} -process_converter ($converter); -close (CONVERTERS); - -our %codepages; - -print <<'EOF'; -/* -*- mode: c; buffer-read-only: t -*- - - Generated by sys-file-encoding.pl. Do not modify! -*/ - -/* -PSPP - a program for statistical analysis. -Copyright (C) 2017 Free Software Foundation, Inc. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -#include - -#include "data/sys-file-private.h" - -struct sys_encoding sys_codepage_number_to_name[] = { -EOF -for my $cpnumber (sort { $a <=> $b } (keys (%codepages))) { - my $source = max (keys (%{$codepages{$cpnumber}})); - my $name = ${$codepages{$cpnumber}{$source}}[0]; - print " { $cpnumber, \"$name\" },\n"; -} -print " { 0, NULL }\n"; -print "};\n\n"; - -my %names; -for my $cpnumber (sort { $a <=> $b } (keys (%codepages))) { - for my $source (keys (%{$codepages{$cpnumber}})) { - for my $name (@{$codepages{$cpnumber}{$source}}) { - push(@{$names{$name}{$source}}, $cpnumber); - } - } -} -print "struct sys_encoding sys_codepage_name_to_number[] = {\n"; -for my $name (sort (keys (%names))) { - for my $source (reverse (sort (keys (%sources)))) { - next if !exists ($names{$name}{$source}); - my (@numbers) = @{$names{$name}{$source}}; - - # The only two encodings that currently print this are KSC_5601 - # and KS_C_5601-1987, for code pages 949 and 51949. It looks to - # me like the correct code page number is 949, which is the one - # chosen (because the numbers are in sorted order). - print " /* $name has multiple numbers for $sources{$source}: @numbers */\n" - if @numbers > 1; - - print " { $numbers[0], \"$name\" },\n"; - last; - } -} -print " { 0, NULL }\n"; -print "};\n"; - -sub process_converter { - my ($converter) = @_; - return if $converter =~ /^\s*$/; - return if $converter =~ /^\s*\{/; - - my %cps; - my @iana; - my @other; - - my @fields = split (' ', $converter); - while (@fields) { - my $name = shift (@fields); - if (@fields && $fields[0] eq '{') { - shift (@fields); - - my (%standards); - for (;;) { - my $standard = shift (@fields); - last if $standard eq '}'; - $standards{$standard} = 1; - } - if (exists $standards{'IANA*'}) { - unshift (@iana, $name); - } elsif (exists $standards{'IANA'}) { - push (@iana, $name); - } elsif (grep (/\*$/, keys %standards)) { - unshift (@other, $name); - } else { - push (@other, $name); - } - } else { - # Untagged names are completely nonstandard. - next; - } - - my $number; - if (($number) = $name =~ /^cp([0-9]+)$/) { - $cps{$CP} = int ($number); - } elsif (($number) = $name =~ /^windows-([0-9]+)$/) { - $cps{$WINDOWS} = int ($number); - } elsif (($number) = $name =~ /^ibm-([0-9]+)$/) { - $cps{$IBM} = int ($number); - } else { - next; - } - } - - # If there are no tagged names then this is completely nonstandard. - return if !@iana && !@other; - - $codepages{$cps{$_}}{$_} = [@iana, @other] for keys (%cps); -} - -sub max { - my ($best); - for my $x (@_) { - $best = $x if !defined ($best) || $x > $best; - } - return $best; -} diff --git a/src/data/sys-file-encoding.py b/src/data/sys-file-encoding.py new file mode 100644 index 0000000000..7d14545490 --- /dev/null +++ b/src/data/sys-file-encoding.py @@ -0,0 +1,174 @@ +#! /usr/bin/python3 + +# Copyright (C) 2020, 2021 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import os +import re +import sys + +def process_converter(fields): + if not fields or fields[0] == '{': + return + + global codepages + cps = {} + iana = [] + other = [] + + i = 0 + while i < len(fields): + name = fields[i] + i += 1 + + if i < len(fields) and fields[i] == '{': + i += 1 + + standards = set() + while True: + standard = fields[i] + i += 1 + if standard == '}': + break + standards.add(standard) + if 'IANA*' in standards: + iana = [name] + iana + elif 'IANA' in standards: + iana += [name] + elif any(map(lambda s: s.endswith('*'), standards)): + other = [name] + other + else: + other += [name] + else: + # Untagged names are completely nonstandard. + continue + + m = re.match(r'cp([0-9]+)$', name) + if m: + cps[CP] = int(m.group(1)) + continue + + m = re.match(r'windows-([0-9]+)$', name) + if m: + cps[WINDOWS] = int(m.group(1)) + continue + + m = re.match(r'ibm-([0-9]+)$', name) + if m: + cps[IBM] = int(m.group(1)) + continue + + # If there are no tagged names then this is completely nonstandard. + if not iana and not other: + return + + for cp in cps.keys(): + codepages.setdefault(cps[cp], {})[cp] = iana + other + +if len(sys.argv) != 2 or sys.argv[1] == '--help': + sys.stderr.write("""\ +%s: generate code page tables from ICU encoding list +usage: %s CONVRTRS-TXT > sys-file-encoding.c + +To update the encoding data, get the latest ICU encoding data from: +https://raw.githubusercontent.com/unicode-org/icu/\ +main/icu4c/source/data/mappings/convrtrs.txt +""" % (sys.argv[0], sys.argv[0])) + sys.exit(0 if len(sys.argv) == 2 and sys.argv[1] == '--help' else 1) + +WINDOWS = 3 # Windows code pages. +IBM = 2 # IBM code pages. +CP = 1 # Java (?) code pages. +sources = { WINDOWS: "windows", IBM: "ibm", CP: "cp" } + +codepages = {} + +converter = [] +for line in open(sys.argv[1], 'r'): + line = line.rstrip() + comment_ofs = line.find('#') + if comment_ofs >= 0: + line = line[:comment_ofs] + if line.lstrip() == line: + process_converter(converter) + converter = [] + converter += line.split() +process_converter(converter) + +print("""\ +/* -*- mode: c; buffer-read-only: t -*- + + Generated by sys-file-encoding.py. Do not modify! +*/ + +/* +PSPP - a program for statistical analysis. +Copyright (C) 2017 Free Software Foundation, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +#include + +#include "data/sys-file-private.h" + +struct sys_encoding sys_codepage_number_to_name[] = {""") + +for cpnumber, value in sorted(codepages.items()): + source = max(value.keys()) + name = value[source][0] + print(' { %s, "%s" },' % (cpnumber, name)) +print(""" { 0, NULL } +}; +""") + +names = {} +for cpnumber, value in sorted(codepages.items()): + for source, value2 in value.items(): + for name in value2: + names.setdefault(name, {}).setdefault(source, []).append(cpnumber) + +print('struct sys_encoding sys_codepage_name_to_number[] = {') +for name in sorted(names.keys()): + for source in sorted(sources.keys(), reverse=True): + if source not in names[name]: + continue + + numbers = names[name][source] + + # The only two encodings that currently print this are KSC_5601 + # and KS_C_5601-1987, for code pages 949 and 51949. It looks to + # me like the correct code page number is 949, which is the one + # chosen (because the numbers are in sorted order). + if len(numbers) > 1: + print(' /* %s has multiple numbers for %s: %s */' + % (name, sources[source], ' '.join(map(str, numbers)))) + print(' { %s, "%s" },' % (numbers[0], name)) + break +print("""\ + { 0, NULL } +};""") + diff --git a/tests/data/data-in.at b/tests/data/data-in.at index 77a39db010..72f1515ea2 100644 --- a/tests/data/data-in.at +++ b/tests/data/data-in.at @@ -16,9 +16,15 @@ dnl along with this program. If not, see . dnl AT_BANNER([data input (data-in)]) -m4_divert_push([PREPARE_TESTS]) -[data_in_prng () { - cat > my-rand.pl <<'EOF' +AT_SETUP([numeric input formats]) +AT_KEYWORDS([data-in slow]) +data_in_prng +AT_DATA([num-in.py], +[[#! /usr/bin/python3 + +import math +import re + # This random number generator and the test for it below are drawn # from Park and Miller, "Random Number Generators: Good Ones are Hard # to Come By", Communications of the ACM 31:10 (October 1988). It is @@ -26,132 +32,85 @@ m4_divert_push([PREPARE_TESTS]) # real significand, which includes systems that have 64-bit IEEE reals # (with 53-bit significand). The test should catch any systems for # which this is not true, in any case. +def my_rand(modulus): + global seed + a = 16807 + m = 2147483647 + tmp = a * seed + seed = tmp - m * (tmp // m) + return seed % modulus + +# Test the random number generator for reproducibility, +# then reset the seed +seed = 1 +for i in range(10000): + my_rand(1) +assert seed == 1043618065 +seed = 1 + +def permute_zeros(fraction, exponent): + frac_rep = "%f" % fraction + leading_zeros = len(frac_rep) - len(frac_rep.lstrip('0')) + trailing_zeros = len(re.search(r'(\.?0*)$', frac_rep).group(1)) + for i in range(leading_zeros + 1): + for j in range(trailing_zeros + 1): + trimmed = frac_rep[i:len(frac_rep) - j] + if trimmed == '.' or not trimmed: + continue + + permute_commas(trimmed, exponent) + +def permute_commas(frac_rep, exponent): + permute_dot_comma(frac_rep, exponent) + pos = my_rand(len(frac_rep) + 1) + frac_rep = "%s,%s" % (frac_rep[:pos], frac_rep[pos:]) + permute_dot_comma(frac_rep, exponent) + +def permute_dot_comma(frac_rep, exponent): + permute_exponent_syntax(frac_rep, exponent) + if ',' in frac_rep or '.' in frac_rep: + frac_rep = frac_rep.translate(str.maketrans('.,', ',.')) + permute_exponent_syntax(frac_rep, exponent) + +def permute_exponent_syntax(frac_rep, exponent): + if exponent == 0: + e = pick(('', 'e0', 'e-0', 'e+0', '-0', '+0')) + elif exponent > 0: + e = pick(("e%s" % exponent, "e+%s" % exponent, "+%s" % exponent)) + else: + abs_exp = -exponent + e = pick(("e-%s" % abs_exp, "e-%s" % abs_exp, "-%s" % abs_exp)) + permute_sign_and_affix(frac_rep, e) + +def permute_sign_and_affix(frac_rep, exp_rep): + for prefix in (pick(('', '$')), + pick(('-', '-$', '$-', '$-$')), + pick(('+', '+$', '$+', '$+$'))): + for suffix in ('', '%'): + permute_spaces(prefix + frac_rep + exp_rep + suffix) + +def permute_spaces(s): + fields = re.sub(r'([-+\$e%])', r' \1 ', s).split() + print(''.join(fields)) + + if len(fields) > 1: + pos = my_rand(len(fields) - 1) + 1 + print("%s %s" % (''.join(fields[:pos]), + ''.join(fields[pos:]))) + +def pick(choices): + return choices[my_rand(len(choices))] + +for number in (0, 1, .5, .015625, 123): + base_exp = math.floor(math.log10(number)) if number else 0 + for offset in range(-3, 4): + exponent = base_exp + offset + fraction = number / 10**offset + + permute_zeros(fraction, exponent) -our ($seed) = 1; -sub my_rand { - my ($modulo) = @_; - my ($a) = 16807; - my ($m) = 2147483647; - my ($tmp) = $a * $seed; - $seed = $tmp - $m * int ($tmp / $m); - return $seed % $modulo; -} -EOF - cat > test-my-rand.pl <<'EOF' -#! /usr/bin/perl -use strict; -use warnings; -do './my-rand.pl'; -my_rand (1) foreach 1...10000; -our $seed; -die $seed if $seed != 1043618065; -EOF -} -] -m4_divert_pop([PREPARE_TESTS]) - -AT_SETUP([numeric input formats]) -AT_KEYWORDS([data-in slow]) -data_in_prng -AT_CHECK([$PERL test-my-rand.pl]) -AT_DATA([num-in.pl], -[[#! /usr/bin/perl - -use POSIX; -use strict; -use warnings; - -do './my-rand.pl'; - -for my $number (0, 1, .5, .015625, 123) { - my ($base_exp) = floor ($number ? log10 ($number) : 0); - for my $offset (-3...3) { - my ($exponent) = $base_exp + $offset; - my ($fraction) = $number / 10**$offset; - - permute_zeros ($fraction, $exponent); - } -} - -sub permute_zeros { - my ($fraction, $exponent) = @_; - - my ($frac_rep) = sprintf ("%f", $fraction); - my ($leading_zeros) = length (($frac_rep =~ /^(0*)/)[0]); - my ($trailing_zeros) = length (($frac_rep =~ /(\.?0*)$/)[0]); - for my $i (0...$leading_zeros) { - for my $j (0...$trailing_zeros) { - my ($trimmed) = substr ($frac_rep, $i, - length ($frac_rep) - $i - $j); - next if $trimmed eq '.' || $trimmed eq ''; - - permute_commas ($trimmed, $exponent); - } - } -} - -sub permute_commas { - my ($frac_rep, $exponent) = @_; - permute_dot_comma ($frac_rep, $exponent); - my ($pos) = int (my_rand (length ($frac_rep) + 1)); - $frac_rep = substr ($frac_rep, 0, $pos) . "," . substr ($frac_rep, $pos); - permute_dot_comma ($frac_rep, $exponent); -} - -sub permute_dot_comma { - my ($frac_rep, $exponent) = @_; - permute_exponent_syntax ($frac_rep, $exponent); - if ($frac_rep =~ /[,.]/) { - $frac_rep =~ tr/.,/,./; - permute_exponent_syntax ($frac_rep, $exponent); - } -} - -sub permute_exponent_syntax { - my ($frac_rep, $exponent) = @_; - my (@exp_reps); - if ($exponent == 0) { - @exp_reps = pick ('', 'e0', 'e-0', 'e+0', '-0', '+0'); - } elsif ($exponent > 0) { - @exp_reps = pick ("e$exponent", "e+$exponent", "+$exponent"); - } else { - my ($abs_exp) = -$exponent; - @exp_reps = pick ("e-$abs_exp", , "e-$abs_exp", "-$abs_exp"); - } - permute_sign_and_affix ($frac_rep, $_) foreach @exp_reps; -} - -sub permute_sign_and_affix { - my ($frac_rep, $exp_rep) = @_; - for my $prefix (pick ('', '$'), - pick ('-', '-$', '$-', '$-$'), - pick ('+', '+$', '$+', '$+$')) { - for my $suffix ('', '%') { - permute_spaces ("$prefix$frac_rep$exp_rep$suffix"); - } - } -} - -sub permute_spaces { - my ($s) = @_; - $s =~ s/([-+\$e%])/ $1 /g; - my (@fields) = split (' ', $s); - print join ('', @fields), "\n"; - - if ($#fields > 0) { - my ($pos) = int (my_rand ($#fields)) + 1; - print join ('', @fields[0...$pos - 1]); - print " "; - print join ('', @fields[$pos...$#fields]); - print "\n"; - } -} - -sub pick { - return $_[int (my_rand ($#_ + 1))]; -} ]]) -AT_CHECK([$PERL num-in.pl > num-in.data]) +AT_CHECK([$PYTHON3 num-in.py > num-in.data]) AT_DATA([num-in.sps], [dnl SET ERRORS=NONE. SET MXERRS=10000000. @@ -359,9 +318,24 @@ AT_CHECK([pspp -O format=csv dtime.sps]) AT_CHECK([cat dtime.output], [0], [expout]) AT_CLEANUP +m4_divert_push([PREPARE_TESTS]) +[number_lines_in_hex () { + $PYTHON3 -c ' +import sys +for i, line in enumerate(sys.stdin): + sys.stdout.write(" %04X %s" % (i, line)) +' +}] +m4_divert_pop([PREPARE_TESTS]) + + AT_SETUP([binary and hexadecimal input (IB, PIB, and PIBHEX formats)]) AT_KEYWORDS([slow]) -AT_CHECK([$PERL -e 'print pack "n", $_ foreach 0...65535' > binhex-in.data]) +AT_CHECK([$PYTHON3 -c ' +import struct +import sys +for i in range(65536): + sys.stdout.buffer.write(struct.pack(">H", i))' > binhex-in.data]) AT_CHECK([[wc -c < binhex-in.data | sed 's/[ ]//g']], [0], [131072 ]) AT_DATA([binhex-in.sps], [dnl @@ -376,14 +350,18 @@ PRINT OUTFILE='binhex-in.out'/x (PIBHEX4) ' ' ib pib pibhex. EXECUTE. ]) AT_CHECK([gzip -cd < $top_srcdir/tests/data/binhex-in.expected.cmp.gz | \ - $PERL -pe "printf ' %04X ', $.-1" > expout]) + number_lines_in_hex > expout]) AT_CHECK([pspp -O format=csv binhex-in.sps], [0]) AT_CHECK([cat binhex-in.out], [0], [expout]) AT_CLEANUP AT_SETUP([BCD input (P and PK formats)]) AT_KEYWORDS([slow]) -AT_CHECK([$PERL -e 'print pack "n", $_ foreach 0...65535' > bcd-in.data]) +AT_CHECK([$PYTHON3 -c ' +import struct +import sys +for i in range(65536): + sys.stdout.buffer.write(struct.pack(">H", i))' > bcd-in.data]) AT_CHECK([[wc -c < bcd-in.data | sed 's/[ ]//g']], [0], [131072 ]) AT_DATA([bcd-in.sps], [dnl @@ -397,14 +375,18 @@ PRINT OUTFILE='bcd-in.out'/x (PIBHEX4) ' ' P PK. EXECUTE. ]) AT_CHECK([gzip -cd < $top_srcdir/tests/data/bcd-in.expected.cmp.gz | \ - $PERL -pe "printf ' %04X ', $.-1" > expout]) + number_lines_in_hex > expout]) AT_CHECK([pspp -O format=csv bcd-in.sps]) AT_CHECK([cat bcd-in.out], [0], [expout]) AT_CLEANUP AT_SETUP([legacy input (N and Z formats)]) AT_KEYWORDS([slow]) -AT_CHECK([$PERL -e 'print pack "n", $_ foreach 0...65535' > legacy-in.data]) +AT_CHECK([$PYTHON3 -c ' +import struct +import sys +for i in range(65536): + sys.stdout.buffer.write(struct.pack(">H", i))' > legacy-in.data]) AT_CHECK([[wc -c < legacy-in.data | sed 's/[ ]//g']], [0], [131072 ]) AT_DATA([legacy-in.sps], [dnl @@ -418,7 +400,7 @@ PRINT OUTFILE='legacy-in.out'/x (PIBHEX4) ' ' N Z. EXECUTE. ]) AT_CHECK([gzip -cd < $top_srcdir/tests/data/legacy-in.expected.cmp.gz | \ - $PERL -pe "printf ' %04X ', $.-1" > expout]) + number_lines_in_hex > expout]) AT_CHECK([pspp -O format=csv legacy-in.sps]) AT_CHECK([cat legacy-in.out], [0], [expout]) AT_CLEANUP diff --git a/tests/data/data-out.at b/tests/data/data-out.at index 67b8fb9d94..9ed4a980d0 100644 --- a/tests/data/data-out.at +++ b/tests/data/data-out.at @@ -18,221 +18,233 @@ AT_BANNER([data output (data-out)]) AT_SETUP([numeric format output]) AT_KEYWORDS([data-out slow]) -AT_DATA([num-out.pl], -[[use strict; -use warnings 'all'; +AT_DATA([num-out.py], +[[print("""\ +SET CCA=',,,'. +SET CCB='-,[[[,]]],-'. +SET CCC='((,[,],))'. +SET CCD=',XXX,,-'. +SET CCE=',,YYY,-'. +INPUT PROGRAM. +STRING EXPR(A16).""") -my @values = qw(0 2 9.5 27 271 999.95 2718 9999.995 27182 271828 +values = """0 2 9.5 27 271 999.95 2718 9999.995 27182 271828 2718281 2**39 2**333 2**-21 -2 -9.5 -27 -271 -999.95 -2718 -9999.995 -27182 -271828 -2718281 -2**39 -2**333 -2**-21 -0 3.125 31.25 314.125 3141.5 31415.875 314159.25 3141592.625 31415926.5 271828182.25 3214567890.5 31415926535.875 -3.125 -31.375 -314.125 -3141.5 -31415.875 -314159.25 -3141592.625 -31415926.5 -271828182.25 --3214567890.5 -31415926535.875); +-3214567890.5 -31415926535.875""".split() +for value in values: + print("""COMPUTE NUM=%s. +COMPUTE EXPR='%s'. +END CASE.""" % (value, value)) -print "SET CCA=',,,'.\n"; -print "SET CCB='-,[[[,]]],-'.\n"; -print "SET CCC='((,[,],))'.\n"; -print "SET CCD=',XXX,,-'.\n"; -print "SET CCE=',,YYY,-'.\n"; -print "INPUT PROGRAM.\n"; -print "STRING EXPR(A16).\n"; -print map ("COMPUTE NUM=$_.\nCOMPUTE EXPR='$_'.\nEND CASE.\n", @values); -print "END FILE.\n"; -print "END INPUT PROGRAM.\n"; +print("""\ +END FILE. +END INPUT PROGRAM.""") -print "PRINT OUTFILE='output.txt'/EXPR.\n"; -for my $format (qw (F COMMA DOT DOLLAR PCT E CCA CCB CCC CCD CCE N Z)) { - for my $d (0...16) { - my ($min_w); - if ($format ne 'E') { - $min_w = $d + 1; - $min_w++ if $format eq 'DOLLAR' || $format eq 'PCT'; - $min_w = 2 if $min_w == 1 && ($format =~ /^CC/); - } else { - $min_w = $d + 7; - } - for my $w ($min_w...40) { - my ($f) = "$format$w.$d"; - print "PRINT OUTFILE='output.txt'/'$f: \"' NUM($f) '\"'.\n"; - } - } - print "PRINT SPACE OUTFILE='output.txt'.\n"; -} -print "EXECUTE.\n"; +print("PRINT OUTFILE='output.txt'/EXPR.") +for format in 'F COMMA DOT DOLLAR PCT E CCA CCB CCC CCD CCE N Z'.split(): + for d in range(17): + if format != 'E': + min_w = d + 1 + if format in ('DOLLAR', 'PCT'): + min_w += 1 + if min_w == 1 and format.startswith('CC'): + min_w = 2 + else: + min_w = d + 7 + + for w in range(min_w, 41): + f = "%s%s.%s" % (format, w, d) + print("PRINT OUTFILE='output.txt'/'%s: \"' NUM(%s) '\"'." % (f, f)) + print("PRINT SPACE OUTFILE='output.txt'.") +print("EXECUTE.") ]]) -AT_CHECK([$PERL num-out.pl > num-out.sps]) +AT_CHECK([$PYTHON3 num-out.py > num-out.sps]) AT_CHECK([pspp -O format=csv num-out.sps]) AT_CHECK([inexactify < output.txt > output.inexact]) AT_CHECK([gzip -cd < $top_srcdir/tests/data/num-out.expected.cmp.gz > expout.cmp]) -AT_DATA([num-out-decmp.pl], -[[use strict; -use warnings 'all'; +AT_DATA([num-out-decmp.py], +[[#! /usr/bin/python3 + +import re +import sys -my (@line); -while (<>) { - if (my ($n) = /^\*(\d+)$/) { - for (1...$n) { - $line[1]++; - $line[3] = " $line[3]"; - print ' ', join ('', @line), "\n"; - } - } elsif (my ($suffix) = /^\$(.*)$/) { - for my $c (split ('', $suffix)) { - $line[1]++; - $line[4] .= $c; - print ' ', join ('', @line), "\n"; - } - } elsif (my ($prefix) = /^\^(.*)$/) { - for my $c (split ('', $prefix)) { - $line[1]++; - $line[4] = "$c$line[4]"; - print ' ', join ('', @line), "\n"; - } - } else { - @line = /^([A-Z]+)(\d+)([^"]+")( *)([^%"]*)(%?")$/; - print " $_"; - } -} +state = [] +for line in sys.stdin: + line = line.rstrip('\r\n') + if line.startswith('*'): + n = int(line[1:]) + for i in range(n): + state[1] = "%s" % (int(state[1]) + 1) + state[3] = ' ' + state[3] + print(' ' + ''.join(state)) + elif line.startswith('$'): + for c in line[1:]: + state[1] = "%s" % (int(state[1]) + 1) + state[4] += c + print(' ' + ''.join(state)) + elif line.startswith('^'): + for c in line[1:]: + state[1] = "%s" % (int(state[1]) + 1) + state[4] = c + state[4] + print(' ' + ''.join(state)) + else: + m = re.match(r'^([A-Z]+)(\d+)([^"]+")( *)([^%"]*)(%?")$', line) + if m: + state = list(m.groups()) + print(' ' + line) ]]) -AT_CHECK([$PERL num-out-decmp.pl < expout.cmp > expout.exact]) +AT_CHECK([$PYTHON3 num-out-decmp.py < expout.cmp > expout.exact]) AT_CHECK([[inexactify < expout.exact > expout.inexact]]) -AT_DATA([num-out-compare.pl], -[[#! /usr/bin/perl -w +AT_DATA([num-out-compare.py], +[[#! /usr/bin/python3 + +import getopt +import itertools +import re +import sys + +def usage(): + print("""\ +%s: compare expected and actual numeric formatting output +usage: %s [OPTION...] EXPECTED ACTUAL +where EXPECTED is the file containing expected output +and ACTUAL is the file containing actual output. +Options: + -e, --exact: Require numbers to be exactly equal. + (By default, small differences are permitted.) + -s, --spss: Ignore most SPSS formatting bugs in EXPECTED. + (A few differences are not compensated) + -v, --verbose: Use once to summarize errors and differences. + Use twice for details of differences.""" + % (sys.argv[0], sys.argv[0])) + sys.exit(0) + +exact = 0 +spss = 0 +verbose = 0 + +options, args = getopt.gnu_getopt(sys.argv[1:], 'esvh', + ['exact', 'spss', 'verbose', 'help']) +for key, value in options: + if key in ['-e', '--exact']: + exact = True + elif key in ['-s', '--spss']: + spss = True + elif key in ['-v', '--verbose']: + verbose += 1 + elif key in ['-h', '--help']: + usage() + else: + assert False +if len(args) != 2: + sys.stderr.write("%s\n" % len(args)) + sys.stderr.write("exactly two nonoption arguments are required " + "(use --help for help)\n") + sys.exit(1) + +def increment(n): + """Returns 'n' incremented by one unit in its final decimal place. + """ -use strict; -use warnings 'all'; -use Getopt::Long; + last_digit = None + for i, c in enumerate(n): + if c == 'E': + break + if c.isdigit(): + last_digit = i + if last_digit is None: + return n -my $exact = 0; -my $spss = 0; -my $verbose = 0; -Getopt::Long::Configure ("bundling"); -GetOptions ("e|exact!" => \$exact, - "s|spss!" => \$spss, - "v|verbose+" => \$verbose, - "h|help" => sub { usage (0) }) - or usage (1); + for i in range(last_digit, -1, -1): + c = n[i] + if c == '9': + n[i] = '0' + elif c in '012345678': + n[i] = chr(ord(c) + 1) + break + if i < 0: + n = '1' + n + return n -sub usage { - print "$0: compare expected and actual numeric formatting output\n"; - print "usage: $0 [OPTION...] EXPECTED ACTUAL\n"; - print "where EXPECTED is the file containing expected output\n"; - print "and ACTUAL is the file containing actual output.\n"; - print "Options:\n"; - print " -e, --exact: Require numbers to be exactly equal.\n"; - print " (By default, small differences are permitted.)\n"; - print " -s, --spss: Ignore most SPSS formatting bugs in EXPECTED.\n"; - print " (A few differences are not compensated)\n"; - print " -v, --verbose: Use once to summarize errors and differences.\n"; - print " Use twice for details of differences.\n"; - exit (@_); -} +n_exact = 0 +bad_round = 0 +approximate = 0 +spss_wtf1 = 0 +spss_wtf2 = 0 +lost_sign = 0 +errors = 0 +line_number = 0 +for a, b in itertools.zip_longest(open(args[0], 'r'), + open(args[1], 'r')): + line_number += 1 + if not b: + print("Extra lines in %s" % args[0]) + errors += 1 + break + if not a: + print("Extra lines in %s" % args[1]) + errors += 1 + break -open (EXPECTED, '<', $ARGV[0]) or die "$ARGV[0]: open: $!\n"; -open (ACTUAL, '<', $ARGV[1]) or die "$ARGV[1]: open: $!\n"; -my ($expr); -my ($bad_round) = 0; -my ($approximate) = 0; -my ($spss_wtf1) = 0; -my ($spss_wtf2) = 0; -my ($lost_sign) = 0; -my ($errors) = 0; -while (defined (my $a = ) && defined (my $b = )) { - chomp $a; - chomp $b; - if ($a eq $b) { - if ($a !~ /^\s*$/ && $a !~ /:/) { - $expr = $a; - $expr =~ s/\s*$//; - $expr =~ s/^\s*//; - } - } else { - my ($fmt, $a_out) = $a =~ /^ (.*): "(.*)"$/ or die; - my ($b_fmt, $b_out) = $b =~ /^ (.*): "(.*)"$/ or die; - die if $fmt ne $b_fmt; - die if $a_out eq $b_out; + a = a.rstrip('\r\n') + b = b.rstrip('\r\n') + if a == b: + n_exact += 1 + if not a.isspace() and ':' not in a: + expr = a.strip() + else: + fmt, a_out = re.match(r'^ (.*): "(.*)"$', a).groups() + b_fmt, b_out = re.match(r'^ (.*): "(.*)"$', b).groups() + assert fmt == b_fmt + assert a_out != b_out - if (!$exact) { - if (increment ($a_out) eq $b_out || increment ($b_out) eq $a_out) { - $approximate++; - next; - } - } - if ($spss) { - if ($a_out =~ /0.*0/ && $a_out !~ /[1-9]/) { - $bad_round++; - next; - } elsif ($a_out =~ /\*/ && $a_out !~ /^\*+$/) { - $spss_wtf1++; - next; - } elsif ($expr =~ /^-/ - && $a_out =~ /^\*+$/ - && $b_out =~ /-\d(\.\d*#*)?E[-+]\d\d\d/ - && $fmt =~ /^E/) { - $spss_wtf2++; - next; - } elsif ($expr =~ /^-/ - && (($a_out !~ /-/ && $a_out =~ /[1-9]/ && $b_out =~ /-/) - || ($a_out =~ /^[0-9]+$/ && $b_out =~ /^\*+$/))) { - $lost_sign++; - next; - } - } - print "$.: $expr in $fmt: expected \"$a_out\", got \"$b_out\"\n" - if $verbose > 1; - $errors++; - } -} -while () { - print "Extra lines in $ARGV[0]\n"; - $errors++; - last; -} -while () { - print "Extra lines in $ARGV[1]\n"; - $errors++; - last; -} -if ($verbose) { - print "$errors errors\n"; - if (!$exact) { - print "$approximate approximate matches\n"; - } - if ($spss) { - print "$bad_round bad rounds\n"; - print "$spss_wtf1 SPSS WTF 1\n"; - print "$spss_wtf2 SPSS WTF 2\n"; - print "$lost_sign lost signs\n"; - } -} -exit ($errors > 0); + if not exact: + if increment(a_out) == b_out or increment(b_out) == a_out: + approximate += 1 + continue + if spss: + if re.search(r'0.*0', a_out) and not re.search(r'[1-9]', a_out): + bad_round += 1 + continue + elif '*' in a_out and len(a_out.strip('*')): + spss_wtf1 += 1 + continue + elif (expr.startswith('-') + and re.fullmatch(r'\*+', a_out) + and re.match(r'-\d(\.\d*#*)?E[-+]\d\d\d', b_out) + and fmt.startswith('E')): + spss_wtf2 += 1 + continue + elif (expr.startswith('-') + and (('-' not in a_out + and re.search(r'[1-9]', a_out) + and '-' in b_out) + or (a_out.isdigit() + and re.fullmatch(r'\*+', b_out)))): + lost_sign += 1 + continue -# Returns the argument value incremented by one unit in its final -# decimal place. -sub increment { - local ($_) = @_; - my ($last_digit, $i); - for ($i = 0; $i < length $_; $i++) { - my ($c) = substr ($_, $i, 1); - last if ($c eq 'E'); - $last_digit = $i if $c =~ /[0-9]/; - } - return $_ if !defined $last_digit; - for ($i = $last_digit; $i >= 0; $i--) { - my ($c) = substr ($_, $i, 1); - if ($c eq '9') { - substr ($_, $i, 1) = '0'; - } elsif ($c =~ /[0-8]/) { - substr ($_, $i, 1) = chr (ord ($c) + 1); - last; - } - } - $_ = "1$_" if $i < 0; - return $_; -} + if verbose > 1: + print('%s: %s in %s: expected "%s", got "%s' + % (line_number, expr, fmt, a_out, b_out)) + errors += 1 +if verbose: + print("%s exact matches" % n_exact) + print("%s errors" % errors) + if not exact: + print('%s approximate matches' %approximate) + if spss: + print("%s bad rounds" % bad_round) + print("%s SPSS WTF 1" % spss_wtf1) + print("%s SPSS WTF 2" % spss_wtf2) + print("%s lost signs" % lost_sign) +sys.exit(1 if errors else 0) ]]) -AT_CHECK([$PERL num-out-compare.pl $PSPP_NUM_OUT_COMPARE_FLAGS expout.inexact output.inexact]) +AT_CHECK([$PYTHON3 num-out-compare.py $PSPP_NUM_OUT_COMPARE_FLAGS expout.inexact output.inexact]) AT_CLEANUP AT_SETUP([non-ASCII custom currency formats]) diff --git a/tests/language/data-io/data-reader.at b/tests/language/data-io/data-reader.at index 4156abf869..47c145b27d 100644 --- a/tests/language/data-io/data-reader.at +++ b/tests/language/data-io/data-reader.at @@ -91,83 +91,137 @@ $3 12-31-1999 682 ]) - AT_DATA([make-binary.pl], [dnl -use strict; -use warnings; + AT_DATA([make-binary.py], [[ +#! /usr/bin/python3 + +import struct +import sys + +# This random number generator and the test for it below are drawn +# from Park and Miller, "Random Number Generators: Good Ones are Hard +# to Come By", Communications of the ACM 31:10 (October 1988). It is +# documented to function properly on systems with a 46-bit or longer +# real significand, which includes systems that have 64-bit IEEE reals +# (with 53-bit significand). The test should catch any systems for +# which this is not true, in any case. +def my_rand(modulus): + global seed + a = 16807 + m = 2147483647 + tmp = a * seed + seed = tmp - m * (tmp // m) + return seed % modulus + +# Test the random number generator for reproducibility, +# then reset the seed +seed = 1 +for i in range(10000): + my_rand(1) +assert seed == 1043618065 +seed = 1 # ASCII to EBCDIC translation table -our ($ascii2ebcdic) = "" -. "\x00\x01\x02\x03\x37\x2d\x2e\x2f" -. "\x16\x05\x25\x0b\x0c\x0d\x0e\x0f" -. "\x10\x11\x12\x13\x3c\x3d\x32\x26" -. "\x18\x19\x3f\x27\x1c\x1d\x1e\x1f" -. "\x40\x5a\x7f\x7b\x5b\x6c\x50\x7d" -. "\x4d\x5d\x5c\x4e\x6b\x60\x4b\x61" -. "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" -. "\xf8\xf9\x7a\x5e\x4c\x7e\x6e\x6f" -. "\x7c\xc1\xc2\xc3\xc4\xc5\xc6\xc7" -. "\xc8\xc9\xd1\xd2\xd3\xd4\xd5\xd6" -. "\xd7\xd8\xd9\xe2\xe3\xe4\xe5\xe6" -. "\xe7\xe8\xe9\xad\xe0\xbd\x9a\x6d" -. "\x79\x81\x82\x83\x84\x85\x86\x87" -. "\x88\x89\x91\x92\x93\x94\x95\x96" -. "\x97\x98\x99\xa2\xa3\xa4\xa5\xa6" -. "\xa7\xa8\xa9\xc0\x4f\xd0\x5f\x07" -. "\x20\x21\x22\x23\x24\x15\x06\x17" -. "\x28\x29\x2a\x2b\x2c\x09\x0a\x1b" -. "\x30\x31\x1a\x33\x34\x35\x36\x08" -. "\x38\x39\x3a\x3b\x04\x14\x3e\xe1" -. "\x41\x42\x43\x44\x45\x46\x47\x48" -. "\x49\x51\x52\x53\x54\x55\x56\x57" -. "\x58\x59\x62\x63\x64\x65\x66\x67" -. "\x68\x69\x70\x71\x72\x73\x74\x75" -. "\x76\x77\x78\x80\x8a\x8b\x8c\x8d" -. "\x8e\x8f\x90\x6a\x9b\x9c\x9d\x9e" -. "\x9f\xa0\xaa\xab\xac\x4a\xae\xaf" -. "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" -. "\xb8\xb9\xba\xbb\xbc\xa1\xbe\xbf" -. "\xca\xcb\xcc\xcd\xce\xcf\xda\xdb" -. "\xdc\xdd\xde\xdf\xea\xeb\xec\xed" -. "\xee\xef\xfa\xfb\xfc\xfd\xfe\xff"; -length ($ascii2ebcdic) == 256 || die; - -open (INPUT, '<', 'input.txt') or die "input.txt: open: $!\n"; -my (@data) = or die; -close (INPUT) or die; -chomp $_ foreach @data; - -our @records; - -$2 - -sub a2e { - local ($_) = @_; - my ($s) = ""; - foreach (split (//)) { - $s .= substr ($ascii2ebcdic, ord, 1); - } - return $s; -} - -sub min { - my ($a, $b) = @_; - return $a < $b ? $a : $b -} - -sub dump_records { - while (@records) { - my ($n) = min (int (rand (5)) + 1, scalar (@records)); - my (@r) = splice (@records, 0, $n); - my ($len) = 0; - $len += length foreach @r; - print pack ("n xx", $len + 4); - print foreach @r; - } -} -]) - AT_CHECK([$PERL make-binary.pl < input.txt > input.bin]) +ascii2ebcdic = ( + 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, + 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, + 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, + 0x40, 0x5a, 0x7f, 0x7b, 0x5b, 0x6c, 0x50, 0x7d, + 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, + 0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, + 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, + 0xe7, 0xe8, 0xe9, 0xad, 0xe0, 0xbd, 0x9a, 0x6d, + 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, + 0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0x5f, 0x07, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b, + 0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08, + 0x38, 0x39, 0x3a, 0x3b, 0x04, 0x14, 0x3e, 0xe1, + 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x80, 0x8a, 0x8b, 0x8c, 0x8d, + 0x8e, 0x8f, 0x90, 0x6a, 0x9b, 0x9c, 0x9d, 0x9e, + 0x9f, 0xa0, 0xaa, 0xab, 0xac, 0x4a, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xa1, 0xbe, 0xbf, + 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xda, 0xdb, + 0xdc, 0xdd, 0xde, 0xdf, 0xea, 0xeb, 0xec, 0xed, + 0xee, 0xef, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff) +assert len(ascii2ebcdic) == 256 + +def a2e(s): + return bytearray((ascii2ebcdic[ord(c)] for c in s)) + +def dump_records(out, records): + while records: + n = min(my_rand(5) + 1, len(records)) + r = records[:n] + records[:n] = [] + + count = sum((len(rec) for rec in r)) + out.buffer.write(struct.pack(">H xx", count + 4)) + for rec in r: + out.buffer.write(rec) + +data = [] +for line in open('input.txt', 'r'): + data += [line.rstrip('\r\n')] + +# MODE=BINARY +out = open('binary.bin', 'w') +for item in data: + reclen = struct.pack("H xx', len(item) + 4) + a2e(item)] +dump_records(out, records) +out.close() + +# MODE=360 /RECFORM=SPANNED +out = open('spanned.bin', 'w') +records = [] +for line in data: + r = [] + while line: + n = min(my_rand(5), len(line)) + r += [line[:n]] + line = line[n:] + for i, s in enumerate(r): + scc = (0 if len(r) == 1 + else 1 if i == 0 + else 2 if i == len(r) - 1 + else 3) + records += [struct.pack('>H B x', len(s) + 4, scc) + a2e(s)] +dump_records(out, records) +out.close() +]]) + AT_CHECK([$PYTHON3 make-binary.py]) AT_DATA([data-reader.sps], [dnl -FILE HANDLE input/NAME='input.bin'/$1. +FILE HANDLE input/NAME='$2'/$1. DATA LIST FIXED FILE=input NOTABLE /1 start 1-10 (ADATE) /2 end 1-10 (ADATE) @@ -206,45 +260,10 @@ start,end,count ]) AT_CLEANUP]) -DATA_READER_BINARY([MODE=BINARY], - [for $_ (@data) { - my ($reclen) = pack ("V", length); - print $reclen, $_, $reclen; - }]) - -DATA_READER_BINARY([MODE=360 /RECFORM=FIXED /LRECL=32], - [my ($lrecl) = 32; - for $_ (@data) { - my ($out) = substr ($_, 0, $lrecl); - $out .= ' ' x ($lrecl - length ($out)); - length ($out) == 32 or die; - print +a2e ($out); - }], +DATA_READER_BINARY([MODE=BINARY], [binary.bin]) +DATA_READER_BINARY([MODE=360 /RECFORM=FIXED /LRECL=32], [fixed.bin], [AT_CHECK([i18n-test supports_encodings EBCDIC-US])]) - -DATA_READER_BINARY([MODE=360 /RECFORM=VARIABLE], - [for $_ (@data) { - push (@records, pack ("n xx", length ($_) + 4) . a2e ($_)); - } - dump_records ();], +DATA_READER_BINARY([MODE=360 /RECFORM=VARIABLE], [variable.bin], [AT_CHECK([i18n-test supports_encodings EBCDIC-US])]) - -DATA_READER_BINARY([MODE=360 /RECFORM=SPANNED], - [[for my $line (@data) { - local ($_) = $line; - my (@r); - while (length) { - my ($n) = min (int (rand (5)), length); - push (@r, substr ($_, 0, $n, '')); - } - foreach my $i (0...$#r) { - my $scc = ($#r == 0 ? 0 - : $i == 0 ? 1 - : $i == $#r ? 2 - : 3); - push (@records, - pack ("nCx", length ($r[$i]) + 4, $scc) . a2e ($r[$i])); - } - } - dump_records ();]], +DATA_READER_BINARY([MODE=360 /RECFORM=SPANNED], [spanned.bin], [AT_CHECK([i18n-test supports_encodings EBCDIC-US])]) diff --git a/tests/language/data-io/get-data-txt.at b/tests/language/data-io/get-data-txt.at index 936bf7d7ab..950c2ad7a9 100644 --- a/tests/language/data-io/get-data-txt.at +++ b/tests/language/data-io/get-data-txt.at @@ -303,12 +303,10 @@ AT_CLEANUP dnl " (fixes Emacs highlighting) AT_SETUP([GET DATA /TYPE=TXT with IMPORTCASE]) -AT_CHECK([$PERL > test.data <<'EOF' -for ($i = 1; $i <= 100; $i++) { - printf "%02d\n", $i; -} -EOF -]) +AT_CHECK([$PYTHON3 > test.data -c ' +for i in range(1, 101): + print("%02d" % i) +']) AT_DATA([get-data.sps], [dnl get data /type=txt /file='test.data' /importcase=first 10 /variables x f8.0. get data /type=txt /file='test.data' /importcase=percent 1 /variables x f8.0. diff --git a/tests/language/stats/examine.at b/tests/language/stats/examine.at index 4e7d6f4d8f..bea69dd354 100644 --- a/tests/language/stats/examine.at +++ b/tests/language/stats/examine.at @@ -678,11 +678,10 @@ dnl Another test that big input doesn't crash. dnl The actual bug that this checks for has been lost. AT_SETUP([EXAMINE -- big input doesn't crash 2]) AT_KEYWORDS([categorical categoricals slow]) -AT_DATA([make-big-input.pl], - [for ($i=0; $i<100000; $i++) { print "AB12\n" }; - for ($i=0; $i<100000; $i++) { print "AB04\n" }; -]) -AT_CHECK([$PERL make-big-input.pl > large.txt]) +AT_CHECK([$PYTHON3 -c ' +for i in range(100000): print("AB12") +for i in range(100000): print("AB04") +' > large.txt]) AT_DATA([examine.sps], [dnl DATA LIST FILE='large.txt' /S 1-2 (A) X 3 . @@ -694,10 +693,7 @@ EXAMINE /A BY X. ]) AT_CHECK([pspp -o pspp.csv examine.sps]) dnl Ignore output -- this is just a no-crash check. -AT_DATA([more-big-input.pl], - [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" }; -]) -AT_CHECK([$PERL more-big-input.pl >> large.txt]) +AT_CHECK([$PYTHON3 -c 'for i in range(25000): print("AB04\nAB12")' >> large.txt]) AT_CHECK([pspp -o pspp.csv examine.sps]) dnl Ignore output -- this is just a no-crash check. AT_CLEANUP diff --git a/tests/language/stats/sort-cases.at b/tests/language/stats/sort-cases.at index 5217e64d3f..8d839f9dab 100644 --- a/tests/language/stats/sort-cases.at +++ b/tests/language/stats/sort-cases.at @@ -18,38 +18,29 @@ AT_BANNER([SORT CASES]) m4_divert_push([PREPARE_TESTS]) [sort_cases_gen_data () { - cat > gen-data.pl <<'EOF' -use strict; -use warnings; - -# Generate shuffled data. -my (@data); -for my $i (0...$ARGV[0] - 1) { - push (@data, ($i) x $ARGV[1]); -} -fisher_yates_shuffle (\@data); - -# Output shuffled data. -my (@shuffled) = map ([$data[$_], $_], 0...$#data); -open (SHUFFLED, ">data.txt"); -print SHUFFLED "$data[$_] $_\n" foreach 0...$#data; - -# Output sorted data. -my (@sorted) = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @shuffled; -open (SORTED, ">expout"); -printf SORTED " %8d %8d \n", $_->[0], $_->[1] foreach @sorted; - -# From perlfaq4. -sub fisher_yates_shuffle { - my $deck = shift; # $deck is a reference to an array - my $i = @$deck; - while ($i--) { - my $j = int rand ($i+1); - @$deck[$i,$j] = @$deck[$j,$i]; - } -} + cat > gen-data.py <<'EOF' +#! /usr/bin/python3 + +import random +import sys + +data = [] +for i in range(int(sys.argv[1])): + data += [i] * int(sys.argv[2]) +random.shuffle(data) + +data_txt = open('data.txt', 'w') +for i, item in enumerate(data): + data_txt.write('%s %s\n' % (item, i)) +data_txt.close() + +shuffled = ((item, i) for i, item in enumerate(data)) +expout = open('expout', 'w') +for item, i in sorted(shuffled): + expout.write(' %8d %8d \n' % (item, i)) +expout.close() EOF - $PERL gen-data.pl "$@"] + $PYTHON3 gen-data.py "$@"] } m4_divert_pop([PREPARE_TESTS]) diff --git a/tests/libpspp/line-reader.at b/tests/libpspp/line-reader.at index 0419670db1..1f1dd0c6fc 100644 --- a/tests/libpspp/line-reader.at +++ b/tests/libpspp/line-reader.at @@ -63,10 +63,10 @@ AT_SETUP([read ISO-8859-1 as Auto,ISO-8859-1]) AT_KEYWORDS([line_reader]) AT_CHECK([i18n-test supports_encodings ISO-8859-1]) buffer_size=`line-reader-test buffer-size` -($PERL -e "print 'x' x ($buffer_size - 2)" +($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 2))"; printf '\none line\ntwo lines\nentr\351e\nfour lines\n') > input (printf 'encoded in ASCII (auto)\n\"' - $PERL -e "print 'x' x ($buffer_size - 2)" + $PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 2))"; printf '\"\n"one line"\n"two lines"\nencoded in ISO-8859-1\n"entr\303\251e"\n"four lines"\n') > expout AT_CHECK([line-reader-test read input Auto,ISO-8859-1], [0], [expout]) AT_CLEANUP diff --git a/tests/libpspp/u8-istream.at b/tests/libpspp/u8-istream.at index 84ee4514a0..9cedc0eedf 100644 --- a/tests/libpspp/u8-istream.at +++ b/tests/libpspp/u8-istream.at @@ -51,7 +51,7 @@ AT_CLEANUP AT_SETUP([read UTF-8 with character split across input buffers]) AT_KEYWORDS([u8_istream]) buffer_size=`u8-istream-test buffer-size` -($PERL -e "print 'x' x ($buffer_size - 16)" +($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))"; printf '\343\201\201\343\201\202\343\201\203\343\201\204\343\201\205\343\201\206\343\201\207\343\201\210\343\201\211\343\201\212\n') > input (echo "UTF-8 mode" cat input @@ -71,7 +71,7 @@ AT_CLEANUP AT_SETUP([read UTF-8 with character split across input and output buffers]) AT_KEYWORDS([u8_istream]) buffer_size=`u8-istream-test buffer-size` -($PERL -e "print 'x' x ($buffer_size - 16)" +($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))"; printf '\343\201\201\343\201\202\343\201\203\343\201\204\343\201\205\343\201\206\343\201\207\343\201\210\343\201\211\343\201\212\n') > input (echo "UTF-8 mode" cat input @@ -83,10 +83,10 @@ AT_SETUP([read EUC-JP with character split across input buffers]) AT_KEYWORDS([u8_istream]) AT_CHECK([i18n-test supports_encodings EUC-JP]) buffer_size=`u8-istream-test buffer-size` -($PERL -e "print 'x' x ($buffer_size - 16)" +($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))"; printf '\244\241 \244\242 \244\243 \244\244 \244\245 \244\246 \244\247 ' printf '\244\250 \244\251 \244\252\n') > input -($PERL -e "print 'x' x ($buffer_size - 16)" +($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))"; printf '\343\201\201\040\343\201\202\040\343\201\203\040\343\201\204\040' printf '\343\201\205\040\343\201\206\040\343\201\207\040\343\201\210\040' printf '\343\201\211\040\343\201\212\n') > expout @@ -106,10 +106,10 @@ AT_SETUP([read EUC-JP with character split across input and output buffers]) AT_KEYWORDS([u8_istream]) AT_CHECK([i18n-test supports_encodings EUC-JP]) buffer_size=`u8-istream-test buffer-size` -($PERL -e "print 'x' x ($buffer_size - 16)" +($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))"; printf 'xyz\244\241\244\242\244\243\244\244\244\245\244\246\244\247\244\250' printf '\244\251\244\252\n') > input -($PERL -e "print 'x' x ($buffer_size - 16)" +($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))"; printf '\170\171\172\343\201\201\343\201\202\343\201\203\343\201\204\343' printf '\201\205\343\201\206\343\201\207\343\201\210\343\201\211\343\201' printf '\212\n') > expout @@ -145,9 +145,10 @@ AT_SETUP([read ISO-8859-1 as Auto,ISO-8859-1]) AT_KEYWORDS([u8_istream]) AT_CHECK([i18n-test supports_encodings ISO-8859-1]) buffer_size=`u8-istream-test buffer-size` -($PERL -e 'print "xyzzy\n" x int('$buffer_size' * 2.5 / 7)'; printf 'entr\351e\n') > input +($PYTHON3 -c "import sys; sys.stdout.write('xyzzy\n' * ($buffer_size * 5 // 14))" + printf 'entr\351e\n') > input (echo "Auto mode" - $PERL -e 'print "xyzzy\n" x int('$buffer_size' * 2.5 / 7)' + $PYTHON3 -c "import sys; sys.stdout.write('xyzzy\n' * ($buffer_size * 5 // 14))" printf 'entr\303\251e\n') > expout AT_CHECK([u8-istream-test read input Auto,ISO-8859-1], [0], [expout]) AT_CLEANUP diff --git a/tests/math/randist.at b/tests/math/randist.at index 958d03116c..f8af168461 100644 --- a/tests/math/randist.at +++ b/tests/math/randist.at @@ -18,66 +18,78 @@ AT_BANNER([random distributions]) m4_divert_push([PREPARE_TESTS]) [randist_compare () { - cat > compare.pl <<'EOF' -use strict; -use warnings 'all'; + cat > compare.py <<'EOF' +#! /usr/bin/python3 -my ($epsilon) = 1; +import itertools +import sys -open (EXPECTED, '<', $ARGV[0]) or die "$ARGV[0]: open: $!\n"; -open (ACTUAL, '<', $ARGV[1]) or die "$ARGV[1]: open: $!\n"; +def count_decimals(s): + if '.' not in s: + return 0 + else: + return len(s) - s.index('.') -my ($errors) = 0; -LINE: for (;;) { - my $a = ; - my $b = ; +def to_int(s): + return int(s.replace('.', '')) - last if !defined $a && !defined $b; - die "$ARGV[0]:$.: unexpected end of file\n" if !defined $a; - die "$ARGV[1]:$.: unexpected end of file\n" if !defined $b; +EPSILON = 1 - my (@a) = split (' ', $a); - my (@b) = split (' ', $b); - die "$ARGV[1]:$.: contains ". scalar (@b) . " fields but should " - . "contain " . scalar (@a) . "\n" - if $#a != $#b; - foreach my $i (0...$#a) { - die "$ARGV[1]:$.: unexpected number of decimals\n" - if count_decimals ($a[$i]) != count_decimals ($b[$i]); +errors = 0 +line_number = 0 +for a, b in itertools.zip_longest(open(sys.argv[1], 'r'), + open(sys.argv[2], 'r')): + line_number += 1 + if not a: + sys.stderr.write("%s:%s: unexpected end of file\n" + % (sys.argv[2], line_number)) + sys.exit(1) + if not b: + sys.stderr.write("%s:%s: unexpected end of file\n" + % (sys.argv[1], line_number)) + sys.exit(1) - my ($an) = to_int ($a[$i]); - my ($bn) = to_int ($b[$i]); - if ($an ne $bn && ($bn < $an - $epsilon || $bn > $an + $epsilon)) { - $errors++; - if ($errors > 50) { - print "$ARGV[1]: Additional differences suppressed.\n"; - last LINE; - } - print "$ARGV[1]:$.: Values differ from $ARGV[0]:$.\n"; - print "Expected:\n", $a; - print "Calculated:\n", $b; - } - } -} -exit ($errors > 0); + af = a.split() + bf = b.split() + if len(af) != len(bf): + sys.stderr.write("%s:%s: contains %s fields but should contain %s\n" + % (sys.argv[2], line_number, len(b), len(a))) + sys.exit(1) -sub count_decimals { - my ($s) = @_; - return length (substr ($s, index ($s, '.'))); -} + for i in range(len(af)): + if count_decimals(af[i]) != count_decimals(bf[i]): + sys.stderr.write("%s:%s: unexpected number of decimals\n" + % (sys.argv[2], line_number)) + sys.exit(1) + + if af[i] == bf[i]: + continue + + an = to_int(af[i]) + bn = to_int(bf[i]) + if an != bn and (bn < an - EPSILON or bn > an + EPSILON): + errors += 1 + if errors > 50: + print("%s: Additional differences suppressed." + % sys.argv[2]) + break + print("%s:%s: Values differ from %s:%s" + % (sys.argv[2], line_number, + sys.argv[1], line_number)) + print("Expected:\n%s" % a) + print("Calculated:\n%s" % b) + + if errors > 50: + break +sys.exit(1 if errors > 0 else 0) -sub to_int { - local ($_) = @_; - s/\.//; - return $_; -} EOF + $PYTHON3 compare.py "$@" }] m4_divert_pop([PREPARE_TESTS]) m4_define([RANDIST_CHECK], [AT_SETUP([random distributions -- $1]) - randist_compare AT_DATA([$1.in], [$4]) AT_DATA([$1.sps], [dnl DATA LIST LIST FILE='$1.in' NOTABLE SKIP=1 @@ -96,7 +108,7 @@ PRINT OUTFILE='$1.out'/P $2 x $3. EXECUTE. ]) AT_CHECK([pspp -O format=csv $1.sps]) - AT_CHECK([$PERL compare.pl $1.in $1.out]) + AT_CHECK([randist_compare $1.in $1.out]) AT_CLEANUP]) RANDIST_CHECK([beta], [a b], [cdf pdf], [dnl -- 2.30.2