* The GNU Scientific Library (http://www.gnu.org/software/gsl/),
version 1.13 or later, including libgslcblas included with GSL.
- * Perl (http://www.perl.org/), version 5.005_03 or later. Perl is
- required during build but not after installation.
-
* Python (https://python.org/), version 3.4 or later. Python is
required during build but not after installation.
* GNU Spread Sheet Widget (http://www.gnu.org/software/ssw)
version 0.7 or later.
-The following packages are optional:
+The following packages are only needed to build and test the Perl
+module:
+
+ * Perl (https://www.perl.org/), version 5.005_03 or later.
+
+ * The Config::Perl::V module for Perl (https://cpan.org).
+
+ * Optionally, the Text::Diff and Memory::Usage modules for Perl
+ (https://cpan.org). These modules enable PSPP to test its Perl
+ module more thoroughly.
Other optional packages:
interface, but not the Postgresql interface itself, requires the
Postgresql server to be installed.
- * The Text::Diff module for Perl (http://cpan.org). This enables
- PSPP to test the Perl module more thoroughly. It is not needed
- to build or use the Perl module.
-
Basic Installation
==================
* Build changes:
+ - Perl is no longer required to build.
+
+ - Build now requires Python 3.4 or later. (Building PSPP 1.4.0
+ also required Python, but it wasn't properly documented at the
+ time.)
+
- The Cairo and Pango libraries are now required.
- gettext 0.20 or later is now required.
AC_PATH_PROG([PERL], perl, no)
AC_SUBST([PERL])dnl
if test "$PERL" != no && $PERL -e 'require 5.005_03;'; then :; else
- PSPP_REQUIRED_PREREQ([Perl 5.005_03 (or later)])
+ PSPP_OPTIONAL_PREREQ([Perl 5.005_03 (or later)])
fi
# The PSPP autobuilder appends a build number to the PSPP version number,
[yes|no], [],
[AC_MSG_FAILURE([--with-perl-module argument must be 'yes' or 'no'])])
WITH_PERL_MODULE=$with_perl_module],
- [if test x"$cross_compiling" != x"yes"; then
+ [if test "$PERL" != no && test x"$cross_compiling" != x"yes"; then
WITH_PERL_MODULE=yes
else
WITH_PERL_MODULE=no
AM_CONDITIONAL(WITH_PERL_MODULE, test $WITH_PERL_MODULE = yes)
if test $WITH_PERL_MODULE = yes; then
+ if test "$PERL" = no; then
+ PSPP_REQUIRED_PREREQ([Perl 5.005_03 or later (or use --without-perl-module)])
+ fi
CHECK_PERL_MODULE([Config::Perl::V], [],
[PSPP_REQUIRED_PREREQ([Config::Perl::V Perl module (or use --without-perl-module)])])
CHECK_PERL_MODULE([Text::Diff], [],
dist_man_MANS += doc/pspp.1 \
doc/psppire.1
-EXTRA_DIST += doc/get-commands.pl \
+EXTRA_DIST += doc/get-commands.py \
doc/help-pages-list \
doc/prepdoc.sh
-$(srcdir)/doc/ni.texi: $(top_srcdir)/src/language/command.def doc/get-commands.pl
- $(AM_V_GEN)$(PERL) $(top_srcdir)/doc/get-commands.pl $(top_srcdir)/src/language/command.def > $@
+$(srcdir)/doc/ni.texi: $(top_srcdir)/src/language/command.def doc/get-commands.py
+ $(AM_V_GEN)$(PYTHON3) $(top_srcdir)/doc/get-commands.py $(top_srcdir)/src/language/command.def > $@
$(srcdir)/doc/tut.texi:
$(AM_V_GEN)echo "@set example-dir $(examplesdir)" > $@
+++ /dev/null
-#!/usr/bin/perl
-# Creates Texinfo documentation from the source
-
-use strict;
-use warnings 'all';
-
-my ($file) = $ARGV[0];
-open(INFO, $file) || die "Cannot open \"$file\"\n" ;
-print "\@c Generated from $file by get-commands.pl\n";
-print "\@c Do not modify!\n\n";
-
-print "\@table \@asis\n";
-while (<INFO>)
-{
- my ($command, $description)
- = /^\s*UNIMPL_CMD\s*\(\s*"([^"]*)"\s*,\s*"([^"]*)"\)\s*$/
- or next;
- print "\@item \@cmd{$command}\n$description\n\n";
-}
-print "\@end table\n";
-
-print "\@c Local Variables:\n";
-print "\@c buffer-read-only: t\n";
-print "\@c End:\n";
-close(INFO); # Close the file
--- /dev/null
+#! /usr/bin/python3
+# Creates Texinfo documentation from the source
+
+import re
+import sys
+
+print("""\
+@c Generated from %s by get-commands.py
+@c Do not modify!
+
+@table @asis""" % sys.argv[1])
+for line in open(sys.argv[1], 'r'):
+ m = re.match(r'^\s*UNIMPL_CMD\s*\(\s*"([^"]*)"\s*,\s*"([^"]*)"\)\s*$', line)
+ if m:
+ command, description = m.groups()
+ print("@item @cmd{%s}\n%s\n" % (command, description))
+print("""\
+@end table
+@c Local Variables:
+@c buffer-read-only: t
+@c End:""")
+
nodist_src_data_libdata_la_SOURCES = src/data/sys-file-encoding.c
src/data/sys-file-encoding.c: \
- src/data/sys-file-encoding.pl \
+ src/data/sys-file-encoding.py \
src/data/convrtrs.txt
- $(AM_V_GEN)$(PERL) $^ > $@.tmp && mv $@.tmp $@
-EXTRA_DIST += src/data/sys-file-encoding.pl src/data/convrtrs.txt
+ $(AM_V_GEN)$(PYTHON3) $^ > $@.tmp && mv $@.tmp $@
+EXTRA_DIST += src/data/sys-file-encoding.py src/data/convrtrs.txt
+++ /dev/null
-#! /usr/bin/perl
-# Copyright (C) 2020, 2021Free Software Foundation
-
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-use strict;
-use warnings;
-
-if (-t 1 || @ARGV != 1 || $ARGV[0] eq '--help') {
- print STDERR <<EOF;
-$0: generate code page tables from ICU encoding list
-usage: $0 CONVRTRS-TXT > sys-file-encoding.c
-
-To update the encoding data, get the latest ICU encoding data from:
-https://raw.githubusercontent.com/unicode-org/icu/main/icu4c/source/data/mappings/convrtrs.txt
-EOF
- exit (@ARGV && $ARGV[0] eq '--help' ? 0 : 1);
-}
-
-open (CONVERTERS, '<', $ARGV[0])
- or die "$ARGV[0]: open failed ($!)\n";
-
-our $WINDOWS = 3; # Windows code pages.
-our $IBM = 2; # IBM code pages.
-our $CP = 1; # Java (?) code pages.
-our %sources = ($WINDOWS => "windows", $IBM => "ibm", $CP => "cp");
-
-my $converter = "";
-while (<CONVERTERS>) {
- chomp;
- s/#.*//;
- if (s/^\s+//) {
- $converter .= " $_";
- } else {
- process_converter ($converter);
- $converter = $_;
- }
-}
-process_converter ($converter);
-close (CONVERTERS);
-
-our %codepages;
-
-print <<'EOF';
-/* -*- mode: c; buffer-read-only: t -*-
-
- Generated by sys-file-encoding.pl. Do not modify!
-*/
-
-/*
-PSPP - a program for statistical analysis.
-Copyright (C) 2017 Free Software Foundation, Inc.
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <config.h>
-
-#include "data/sys-file-private.h"
-
-struct sys_encoding sys_codepage_number_to_name[] = {
-EOF
-for my $cpnumber (sort { $a <=> $b } (keys (%codepages))) {
- my $source = max (keys (%{$codepages{$cpnumber}}));
- my $name = ${$codepages{$cpnumber}{$source}}[0];
- print " { $cpnumber, \"$name\" },\n";
-}
-print " { 0, NULL }\n";
-print "};\n\n";
-
-my %names;
-for my $cpnumber (sort { $a <=> $b } (keys (%codepages))) {
- for my $source (keys (%{$codepages{$cpnumber}})) {
- for my $name (@{$codepages{$cpnumber}{$source}}) {
- push(@{$names{$name}{$source}}, $cpnumber);
- }
- }
-}
-print "struct sys_encoding sys_codepage_name_to_number[] = {\n";
-for my $name (sort (keys (%names))) {
- for my $source (reverse (sort (keys (%sources)))) {
- next if !exists ($names{$name}{$source});
- my (@numbers) = @{$names{$name}{$source}};
-
- # The only two encodings that currently print this are KSC_5601
- # and KS_C_5601-1987, for code pages 949 and 51949. It looks to
- # me like the correct code page number is 949, which is the one
- # chosen (because the numbers are in sorted order).
- print " /* $name has multiple numbers for $sources{$source}: @numbers */\n"
- if @numbers > 1;
-
- print " { $numbers[0], \"$name\" },\n";
- last;
- }
-}
-print " { 0, NULL }\n";
-print "};\n";
-
-sub process_converter {
- my ($converter) = @_;
- return if $converter =~ /^\s*$/;
- return if $converter =~ /^\s*\{/;
-
- my %cps;
- my @iana;
- my @other;
-
- my @fields = split (' ', $converter);
- while (@fields) {
- my $name = shift (@fields);
- if (@fields && $fields[0] eq '{') {
- shift (@fields);
-
- my (%standards);
- for (;;) {
- my $standard = shift (@fields);
- last if $standard eq '}';
- $standards{$standard} = 1;
- }
- if (exists $standards{'IANA*'}) {
- unshift (@iana, $name);
- } elsif (exists $standards{'IANA'}) {
- push (@iana, $name);
- } elsif (grep (/\*$/, keys %standards)) {
- unshift (@other, $name);
- } else {
- push (@other, $name);
- }
- } else {
- # Untagged names are completely nonstandard.
- next;
- }
-
- my $number;
- if (($number) = $name =~ /^cp([0-9]+)$/) {
- $cps{$CP} = int ($number);
- } elsif (($number) = $name =~ /^windows-([0-9]+)$/) {
- $cps{$WINDOWS} = int ($number);
- } elsif (($number) = $name =~ /^ibm-([0-9]+)$/) {
- $cps{$IBM} = int ($number);
- } else {
- next;
- }
- }
-
- # If there are no tagged names then this is completely nonstandard.
- return if !@iana && !@other;
-
- $codepages{$cps{$_}}{$_} = [@iana, @other] for keys (%cps);
-}
-
-sub max {
- my ($best);
- for my $x (@_) {
- $best = $x if !defined ($best) || $x > $best;
- }
- return $best;
-}
--- /dev/null
+#! /usr/bin/python3
+
+# Copyright (C) 2020, 2021 Free Software Foundation
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import re
+import sys
+
+def process_converter(fields):
+ if not fields or fields[0] == '{':
+ return
+
+ global codepages
+ cps = {}
+ iana = []
+ other = []
+
+ i = 0
+ while i < len(fields):
+ name = fields[i]
+ i += 1
+
+ if i < len(fields) and fields[i] == '{':
+ i += 1
+
+ standards = set()
+ while True:
+ standard = fields[i]
+ i += 1
+ if standard == '}':
+ break
+ standards.add(standard)
+ if 'IANA*' in standards:
+ iana = [name] + iana
+ elif 'IANA' in standards:
+ iana += [name]
+ elif any(map(lambda s: s.endswith('*'), standards)):
+ other = [name] + other
+ else:
+ other += [name]
+ else:
+ # Untagged names are completely nonstandard.
+ continue
+
+ m = re.match(r'cp([0-9]+)$', name)
+ if m:
+ cps[CP] = int(m.group(1))
+ continue
+
+ m = re.match(r'windows-([0-9]+)$', name)
+ if m:
+ cps[WINDOWS] = int(m.group(1))
+ continue
+
+ m = re.match(r'ibm-([0-9]+)$', name)
+ if m:
+ cps[IBM] = int(m.group(1))
+ continue
+
+ # If there are no tagged names then this is completely nonstandard.
+ if not iana and not other:
+ return
+
+ for cp in cps.keys():
+ codepages.setdefault(cps[cp], {})[cp] = iana + other
+
+if len(sys.argv) != 2 or sys.argv[1] == '--help':
+ sys.stderr.write("""\
+%s: generate code page tables from ICU encoding list
+usage: %s CONVRTRS-TXT > sys-file-encoding.c
+
+To update the encoding data, get the latest ICU encoding data from:
+https://raw.githubusercontent.com/unicode-org/icu/\
+main/icu4c/source/data/mappings/convrtrs.txt
+""" % (sys.argv[0], sys.argv[0]))
+ sys.exit(0 if len(sys.argv) == 2 and sys.argv[1] == '--help' else 1)
+
+WINDOWS = 3 # Windows code pages.
+IBM = 2 # IBM code pages.
+CP = 1 # Java (?) code pages.
+sources = { WINDOWS: "windows", IBM: "ibm", CP: "cp" }
+
+codepages = {}
+
+converter = []
+for line in open(sys.argv[1], 'r'):
+ line = line.rstrip()
+ comment_ofs = line.find('#')
+ if comment_ofs >= 0:
+ line = line[:comment_ofs]
+ if line.lstrip() == line:
+ process_converter(converter)
+ converter = []
+ converter += line.split()
+process_converter(converter)
+
+print("""\
+/* -*- mode: c; buffer-read-only: t -*-
+
+ Generated by sys-file-encoding.py. Do not modify!
+*/
+
+/*
+PSPP - a program for statistical analysis.
+Copyright (C) 2017 Free Software Foundation, Inc.
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <config.h>
+
+#include "data/sys-file-private.h"
+
+struct sys_encoding sys_codepage_number_to_name[] = {""")
+
+for cpnumber, value in sorted(codepages.items()):
+ source = max(value.keys())
+ name = value[source][0]
+ print(' { %s, "%s" },' % (cpnumber, name))
+print(""" { 0, NULL }
+};
+""")
+
+names = {}
+for cpnumber, value in sorted(codepages.items()):
+ for source, value2 in value.items():
+ for name in value2:
+ names.setdefault(name, {}).setdefault(source, []).append(cpnumber)
+
+print('struct sys_encoding sys_codepage_name_to_number[] = {')
+for name in sorted(names.keys()):
+ for source in sorted(sources.keys(), reverse=True):
+ if source not in names[name]:
+ continue
+
+ numbers = names[name][source]
+
+ # The only two encodings that currently print this are KSC_5601
+ # and KS_C_5601-1987, for code pages 949 and 51949. It looks to
+ # me like the correct code page number is 949, which is the one
+ # chosen (because the numbers are in sorted order).
+ if len(numbers) > 1:
+ print(' /* %s has multiple numbers for %s: %s */'
+ % (name, sources[source], ' '.join(map(str, numbers))))
+ print(' { %s, "%s" },' % (numbers[0], name))
+ break
+print("""\
+ { 0, NULL }
+};""")
+
dnl
AT_BANNER([data input (data-in)])
-m4_divert_push([PREPARE_TESTS])
-[data_in_prng () {
- cat > my-rand.pl <<'EOF'
+AT_SETUP([numeric input formats])
+AT_KEYWORDS([data-in slow])
+data_in_prng
+AT_DATA([num-in.py],
+[[#! /usr/bin/python3
+
+import math
+import re
+
# This random number generator and the test for it below are drawn
# from Park and Miller, "Random Number Generators: Good Ones are Hard
# to Come By", Communications of the ACM 31:10 (October 1988). It is
# real significand, which includes systems that have 64-bit IEEE reals
# (with 53-bit significand). The test should catch any systems for
# which this is not true, in any case.
+def my_rand(modulus):
+ global seed
+ a = 16807
+ m = 2147483647
+ tmp = a * seed
+ seed = tmp - m * (tmp // m)
+ return seed % modulus
+
+# Test the random number generator for reproducibility,
+# then reset the seed
+seed = 1
+for i in range(10000):
+ my_rand(1)
+assert seed == 1043618065
+seed = 1
+
+def permute_zeros(fraction, exponent):
+ frac_rep = "%f" % fraction
+ leading_zeros = len(frac_rep) - len(frac_rep.lstrip('0'))
+ trailing_zeros = len(re.search(r'(\.?0*)$', frac_rep).group(1))
+ for i in range(leading_zeros + 1):
+ for j in range(trailing_zeros + 1):
+ trimmed = frac_rep[i:len(frac_rep) - j]
+ if trimmed == '.' or not trimmed:
+ continue
+
+ permute_commas(trimmed, exponent)
+
+def permute_commas(frac_rep, exponent):
+ permute_dot_comma(frac_rep, exponent)
+ pos = my_rand(len(frac_rep) + 1)
+ frac_rep = "%s,%s" % (frac_rep[:pos], frac_rep[pos:])
+ permute_dot_comma(frac_rep, exponent)
+
+def permute_dot_comma(frac_rep, exponent):
+ permute_exponent_syntax(frac_rep, exponent)
+ if ',' in frac_rep or '.' in frac_rep:
+ frac_rep = frac_rep.translate(str.maketrans('.,', ',.'))
+ permute_exponent_syntax(frac_rep, exponent)
+
+def permute_exponent_syntax(frac_rep, exponent):
+ if exponent == 0:
+ e = pick(('', 'e0', 'e-0', 'e+0', '-0', '+0'))
+ elif exponent > 0:
+ e = pick(("e%s" % exponent, "e+%s" % exponent, "+%s" % exponent))
+ else:
+ abs_exp = -exponent
+ e = pick(("e-%s" % abs_exp, "e-%s" % abs_exp, "-%s" % abs_exp))
+ permute_sign_and_affix(frac_rep, e)
+
+def permute_sign_and_affix(frac_rep, exp_rep):
+ for prefix in (pick(('', '$')),
+ pick(('-', '-$', '$-', '$-$')),
+ pick(('+', '+$', '$+', '$+$'))):
+ for suffix in ('', '%'):
+ permute_spaces(prefix + frac_rep + exp_rep + suffix)
+
+def permute_spaces(s):
+ fields = re.sub(r'([-+\$e%])', r' \1 ', s).split()
+ print(''.join(fields))
+
+ if len(fields) > 1:
+ pos = my_rand(len(fields) - 1) + 1
+ print("%s %s" % (''.join(fields[:pos]),
+ ''.join(fields[pos:])))
+
+def pick(choices):
+ return choices[my_rand(len(choices))]
+
+for number in (0, 1, .5, .015625, 123):
+ base_exp = math.floor(math.log10(number)) if number else 0
+ for offset in range(-3, 4):
+ exponent = base_exp + offset
+ fraction = number / 10**offset
+
+ permute_zeros(fraction, exponent)
-our ($seed) = 1;
-sub my_rand {
- my ($modulo) = @_;
- my ($a) = 16807;
- my ($m) = 2147483647;
- my ($tmp) = $a * $seed;
- $seed = $tmp - $m * int ($tmp / $m);
- return $seed % $modulo;
-}
-EOF
- cat > test-my-rand.pl <<'EOF'
-#! /usr/bin/perl
-use strict;
-use warnings;
-do './my-rand.pl';
-my_rand (1) foreach 1...10000;
-our $seed;
-die $seed if $seed != 1043618065;
-EOF
-}
-]
-m4_divert_pop([PREPARE_TESTS])
-
-AT_SETUP([numeric input formats])
-AT_KEYWORDS([data-in slow])
-data_in_prng
-AT_CHECK([$PERL test-my-rand.pl])
-AT_DATA([num-in.pl],
-[[#! /usr/bin/perl
-
-use POSIX;
-use strict;
-use warnings;
-
-do './my-rand.pl';
-
-for my $number (0, 1, .5, .015625, 123) {
- my ($base_exp) = floor ($number ? log10 ($number) : 0);
- for my $offset (-3...3) {
- my ($exponent) = $base_exp + $offset;
- my ($fraction) = $number / 10**$offset;
-
- permute_zeros ($fraction, $exponent);
- }
-}
-
-sub permute_zeros {
- my ($fraction, $exponent) = @_;
-
- my ($frac_rep) = sprintf ("%f", $fraction);
- my ($leading_zeros) = length (($frac_rep =~ /^(0*)/)[0]);
- my ($trailing_zeros) = length (($frac_rep =~ /(\.?0*)$/)[0]);
- for my $i (0...$leading_zeros) {
- for my $j (0...$trailing_zeros) {
- my ($trimmed) = substr ($frac_rep, $i,
- length ($frac_rep) - $i - $j);
- next if $trimmed eq '.' || $trimmed eq '';
-
- permute_commas ($trimmed, $exponent);
- }
- }
-}
-
-sub permute_commas {
- my ($frac_rep, $exponent) = @_;
- permute_dot_comma ($frac_rep, $exponent);
- my ($pos) = int (my_rand (length ($frac_rep) + 1));
- $frac_rep = substr ($frac_rep, 0, $pos) . "," . substr ($frac_rep, $pos);
- permute_dot_comma ($frac_rep, $exponent);
-}
-
-sub permute_dot_comma {
- my ($frac_rep, $exponent) = @_;
- permute_exponent_syntax ($frac_rep, $exponent);
- if ($frac_rep =~ /[,.]/) {
- $frac_rep =~ tr/.,/,./;
- permute_exponent_syntax ($frac_rep, $exponent);
- }
-}
-
-sub permute_exponent_syntax {
- my ($frac_rep, $exponent) = @_;
- my (@exp_reps);
- if ($exponent == 0) {
- @exp_reps = pick ('', 'e0', 'e-0', 'e+0', '-0', '+0');
- } elsif ($exponent > 0) {
- @exp_reps = pick ("e$exponent", "e+$exponent", "+$exponent");
- } else {
- my ($abs_exp) = -$exponent;
- @exp_reps = pick ("e-$abs_exp", , "e-$abs_exp", "-$abs_exp");
- }
- permute_sign_and_affix ($frac_rep, $_) foreach @exp_reps;
-}
-
-sub permute_sign_and_affix {
- my ($frac_rep, $exp_rep) = @_;
- for my $prefix (pick ('', '$'),
- pick ('-', '-$', '$-', '$-$'),
- pick ('+', '+$', '$+', '$+$')) {
- for my $suffix ('', '%') {
- permute_spaces ("$prefix$frac_rep$exp_rep$suffix");
- }
- }
-}
-
-sub permute_spaces {
- my ($s) = @_;
- $s =~ s/([-+\$e%])/ $1 /g;
- my (@fields) = split (' ', $s);
- print join ('', @fields), "\n";
-
- if ($#fields > 0) {
- my ($pos) = int (my_rand ($#fields)) + 1;
- print join ('', @fields[0...$pos - 1]);
- print " ";
- print join ('', @fields[$pos...$#fields]);
- print "\n";
- }
-}
-
-sub pick {
- return $_[int (my_rand ($#_ + 1))];
-}
]])
-AT_CHECK([$PERL num-in.pl > num-in.data])
+AT_CHECK([$PYTHON3 num-in.py > num-in.data])
AT_DATA([num-in.sps], [dnl
SET ERRORS=NONE.
SET MXERRS=10000000.
AT_CHECK([cat dtime.output], [0], [expout])
AT_CLEANUP
+m4_divert_push([PREPARE_TESTS])
+[number_lines_in_hex () {
+ $PYTHON3 -c '
+import sys
+for i, line in enumerate(sys.stdin):
+ sys.stdout.write(" %04X %s" % (i, line))
+'
+}]
+m4_divert_pop([PREPARE_TESTS])
+
+
AT_SETUP([binary and hexadecimal input (IB, PIB, and PIBHEX formats)])
AT_KEYWORDS([slow])
-AT_CHECK([$PERL -e 'print pack "n", $_ foreach 0...65535' > binhex-in.data])
+AT_CHECK([$PYTHON3 -c '
+import struct
+import sys
+for i in range(65536):
+ sys.stdout.buffer.write(struct.pack(">H", i))' > binhex-in.data])
AT_CHECK([[wc -c < binhex-in.data | sed 's/[ ]//g']], [0], [131072
])
AT_DATA([binhex-in.sps], [dnl
EXECUTE.
])
AT_CHECK([gzip -cd < $top_srcdir/tests/data/binhex-in.expected.cmp.gz | \
- $PERL -pe "printf ' %04X ', $.-1" > expout])
+ number_lines_in_hex > expout])
AT_CHECK([pspp -O format=csv binhex-in.sps], [0])
AT_CHECK([cat binhex-in.out], [0], [expout])
AT_CLEANUP
AT_SETUP([BCD input (P and PK formats)])
AT_KEYWORDS([slow])
-AT_CHECK([$PERL -e 'print pack "n", $_ foreach 0...65535' > bcd-in.data])
+AT_CHECK([$PYTHON3 -c '
+import struct
+import sys
+for i in range(65536):
+ sys.stdout.buffer.write(struct.pack(">H", i))' > bcd-in.data])
AT_CHECK([[wc -c < bcd-in.data | sed 's/[ ]//g']], [0], [131072
])
AT_DATA([bcd-in.sps], [dnl
EXECUTE.
])
AT_CHECK([gzip -cd < $top_srcdir/tests/data/bcd-in.expected.cmp.gz | \
- $PERL -pe "printf ' %04X ', $.-1" > expout])
+ number_lines_in_hex > expout])
AT_CHECK([pspp -O format=csv bcd-in.sps])
AT_CHECK([cat bcd-in.out], [0], [expout])
AT_CLEANUP
AT_SETUP([legacy input (N and Z formats)])
AT_KEYWORDS([slow])
-AT_CHECK([$PERL -e 'print pack "n", $_ foreach 0...65535' > legacy-in.data])
+AT_CHECK([$PYTHON3 -c '
+import struct
+import sys
+for i in range(65536):
+ sys.stdout.buffer.write(struct.pack(">H", i))' > legacy-in.data])
AT_CHECK([[wc -c < legacy-in.data | sed 's/[ ]//g']], [0], [131072
])
AT_DATA([legacy-in.sps], [dnl
EXECUTE.
])
AT_CHECK([gzip -cd < $top_srcdir/tests/data/legacy-in.expected.cmp.gz | \
- $PERL -pe "printf ' %04X ', $.-1" > expout])
+ number_lines_in_hex > expout])
AT_CHECK([pspp -O format=csv legacy-in.sps])
AT_CHECK([cat legacy-in.out], [0], [expout])
AT_CLEANUP
AT_SETUP([numeric format output])
AT_KEYWORDS([data-out slow])
-AT_DATA([num-out.pl],
-[[use strict;
-use warnings 'all';
+AT_DATA([num-out.py],
+[[print("""\
+SET CCA=',,,'.
+SET CCB='-,[[[,]]],-'.
+SET CCC='((,[,],))'.
+SET CCD=',XXX,,-'.
+SET CCE=',,YYY,-'.
+INPUT PROGRAM.
+STRING EXPR(A16).""")
-my @values = qw(0 2 9.5 27 271 999.95 2718 9999.995 27182 271828
+values = """0 2 9.5 27 271 999.95 2718 9999.995 27182 271828
2718281 2**39 2**333 2**-21 -2 -9.5 -27 -271 -999.95 -2718 -9999.995
-27182 -271828 -2718281 -2**39 -2**333 -2**-21 -0 3.125 31.25 314.125
3141.5 31415.875 314159.25 3141592.625 31415926.5 271828182.25
3214567890.5 31415926535.875 -3.125 -31.375 -314.125 -3141.5
-31415.875 -314159.25 -3141592.625 -31415926.5 -271828182.25
--3214567890.5 -31415926535.875);
+-3214567890.5 -31415926535.875""".split()
+for value in values:
+ print("""COMPUTE NUM=%s.
+COMPUTE EXPR='%s'.
+END CASE.""" % (value, value))
-print "SET CCA=',,,'.\n";
-print "SET CCB='-,[[[,]]],-'.\n";
-print "SET CCC='((,[,],))'.\n";
-print "SET CCD=',XXX,,-'.\n";
-print "SET CCE=',,YYY,-'.\n";
-print "INPUT PROGRAM.\n";
-print "STRING EXPR(A16).\n";
-print map ("COMPUTE NUM=$_.\nCOMPUTE EXPR='$_'.\nEND CASE.\n", @values);
-print "END FILE.\n";
-print "END INPUT PROGRAM.\n";
+print("""\
+END FILE.
+END INPUT PROGRAM.""")
-print "PRINT OUTFILE='output.txt'/EXPR.\n";
-for my $format (qw (F COMMA DOT DOLLAR PCT E CCA CCB CCC CCD CCE N Z)) {
- for my $d (0...16) {
- my ($min_w);
- if ($format ne 'E') {
- $min_w = $d + 1;
- $min_w++ if $format eq 'DOLLAR' || $format eq 'PCT';
- $min_w = 2 if $min_w == 1 && ($format =~ /^CC/);
- } else {
- $min_w = $d + 7;
- }
- for my $w ($min_w...40) {
- my ($f) = "$format$w.$d";
- print "PRINT OUTFILE='output.txt'/'$f: \"' NUM($f) '\"'.\n";
- }
- }
- print "PRINT SPACE OUTFILE='output.txt'.\n";
-}
-print "EXECUTE.\n";
+print("PRINT OUTFILE='output.txt'/EXPR.")
+for format in 'F COMMA DOT DOLLAR PCT E CCA CCB CCC CCD CCE N Z'.split():
+ for d in range(17):
+ if format != 'E':
+ min_w = d + 1
+ if format in ('DOLLAR', 'PCT'):
+ min_w += 1
+ if min_w == 1 and format.startswith('CC'):
+ min_w = 2
+ else:
+ min_w = d + 7
+
+ for w in range(min_w, 41):
+ f = "%s%s.%s" % (format, w, d)
+ print("PRINT OUTFILE='output.txt'/'%s: \"' NUM(%s) '\"'." % (f, f))
+ print("PRINT SPACE OUTFILE='output.txt'.")
+print("EXECUTE.")
]])
-AT_CHECK([$PERL num-out.pl > num-out.sps])
+AT_CHECK([$PYTHON3 num-out.py > num-out.sps])
AT_CHECK([pspp -O format=csv num-out.sps])
AT_CHECK([inexactify < output.txt > output.inexact])
AT_CHECK([gzip -cd < $top_srcdir/tests/data/num-out.expected.cmp.gz > expout.cmp])
-AT_DATA([num-out-decmp.pl],
-[[use strict;
-use warnings 'all';
+AT_DATA([num-out-decmp.py],
+[[#! /usr/bin/python3
+
+import re
+import sys
-my (@line);
-while (<>) {
- if (my ($n) = /^\*(\d+)$/) {
- for (1...$n) {
- $line[1]++;
- $line[3] = " $line[3]";
- print ' ', join ('', @line), "\n";
- }
- } elsif (my ($suffix) = /^\$(.*)$/) {
- for my $c (split ('', $suffix)) {
- $line[1]++;
- $line[4] .= $c;
- print ' ', join ('', @line), "\n";
- }
- } elsif (my ($prefix) = /^\^(.*)$/) {
- for my $c (split ('', $prefix)) {
- $line[1]++;
- $line[4] = "$c$line[4]";
- print ' ', join ('', @line), "\n";
- }
- } else {
- @line = /^([A-Z]+)(\d+)([^"]+")( *)([^%"]*)(%?")$/;
- print " $_";
- }
-}
+state = []
+for line in sys.stdin:
+ line = line.rstrip('\r\n')
+ if line.startswith('*'):
+ n = int(line[1:])
+ for i in range(n):
+ state[1] = "%s" % (int(state[1]) + 1)
+ state[3] = ' ' + state[3]
+ print(' ' + ''.join(state))
+ elif line.startswith('$'):
+ for c in line[1:]:
+ state[1] = "%s" % (int(state[1]) + 1)
+ state[4] += c
+ print(' ' + ''.join(state))
+ elif line.startswith('^'):
+ for c in line[1:]:
+ state[1] = "%s" % (int(state[1]) + 1)
+ state[4] = c + state[4]
+ print(' ' + ''.join(state))
+ else:
+ m = re.match(r'^([A-Z]+)(\d+)([^"]+")( *)([^%"]*)(%?")$', line)
+ if m:
+ state = list(m.groups())
+ print(' ' + line)
]])
-AT_CHECK([$PERL num-out-decmp.pl < expout.cmp > expout.exact])
+AT_CHECK([$PYTHON3 num-out-decmp.py < expout.cmp > expout.exact])
AT_CHECK([[inexactify < expout.exact > expout.inexact]])
-AT_DATA([num-out-compare.pl],
-[[#! /usr/bin/perl -w
+AT_DATA([num-out-compare.py],
+[[#! /usr/bin/python3
+
+import getopt
+import itertools
+import re
+import sys
+
+def usage():
+ print("""\
+%s: compare expected and actual numeric formatting output
+usage: %s [OPTION...] EXPECTED ACTUAL
+where EXPECTED is the file containing expected output
+and ACTUAL is the file containing actual output.
+Options:
+ -e, --exact: Require numbers to be exactly equal.
+ (By default, small differences are permitted.)
+ -s, --spss: Ignore most SPSS formatting bugs in EXPECTED.
+ (A few differences are not compensated)
+ -v, --verbose: Use once to summarize errors and differences.
+ Use twice for details of differences."""
+ % (sys.argv[0], sys.argv[0]))
+ sys.exit(0)
+
+exact = 0
+spss = 0
+verbose = 0
+
+options, args = getopt.gnu_getopt(sys.argv[1:], 'esvh',
+ ['exact', 'spss', 'verbose', 'help'])
+for key, value in options:
+ if key in ['-e', '--exact']:
+ exact = True
+ elif key in ['-s', '--spss']:
+ spss = True
+ elif key in ['-v', '--verbose']:
+ verbose += 1
+ elif key in ['-h', '--help']:
+ usage()
+ else:
+ assert False
+if len(args) != 2:
+ sys.stderr.write("%s\n" % len(args))
+ sys.stderr.write("exactly two nonoption arguments are required "
+ "(use --help for help)\n")
+ sys.exit(1)
+
+def increment(n):
+ """Returns 'n' incremented by one unit in its final decimal place.
+ """
-use strict;
-use warnings 'all';
-use Getopt::Long;
+ last_digit = None
+ for i, c in enumerate(n):
+ if c == 'E':
+ break
+ if c.isdigit():
+ last_digit = i
+ if last_digit is None:
+ return n
-my $exact = 0;
-my $spss = 0;
-my $verbose = 0;
-Getopt::Long::Configure ("bundling");
-GetOptions ("e|exact!" => \$exact,
- "s|spss!" => \$spss,
- "v|verbose+" => \$verbose,
- "h|help" => sub { usage (0) })
- or usage (1);
+ for i in range(last_digit, -1, -1):
+ c = n[i]
+ if c == '9':
+ n[i] = '0'
+ elif c in '012345678':
+ n[i] = chr(ord(c) + 1)
+ break
+ if i < 0:
+ n = '1' + n
+ return n
-sub usage {
- print "$0: compare expected and actual numeric formatting output\n";
- print "usage: $0 [OPTION...] EXPECTED ACTUAL\n";
- print "where EXPECTED is the file containing expected output\n";
- print "and ACTUAL is the file containing actual output.\n";
- print "Options:\n";
- print " -e, --exact: Require numbers to be exactly equal.\n";
- print " (By default, small differences are permitted.)\n";
- print " -s, --spss: Ignore most SPSS formatting bugs in EXPECTED.\n";
- print " (A few differences are not compensated)\n";
- print " -v, --verbose: Use once to summarize errors and differences.\n";
- print " Use twice for details of differences.\n";
- exit (@_);
-}
+n_exact = 0
+bad_round = 0
+approximate = 0
+spss_wtf1 = 0
+spss_wtf2 = 0
+lost_sign = 0
+errors = 0
+line_number = 0
+for a, b in itertools.zip_longest(open(args[0], 'r'),
+ open(args[1], 'r')):
+ line_number += 1
+ if not b:
+ print("Extra lines in %s" % args[0])
+ errors += 1
+ break
+ if not a:
+ print("Extra lines in %s" % args[1])
+ errors += 1
+ break
-open (EXPECTED, '<', $ARGV[0]) or die "$ARGV[0]: open: $!\n";
-open (ACTUAL, '<', $ARGV[1]) or die "$ARGV[1]: open: $!\n";
-my ($expr);
-my ($bad_round) = 0;
-my ($approximate) = 0;
-my ($spss_wtf1) = 0;
-my ($spss_wtf2) = 0;
-my ($lost_sign) = 0;
-my ($errors) = 0;
-while (defined (my $a = <EXPECTED>) && defined (my $b = <ACTUAL>)) {
- chomp $a;
- chomp $b;
- if ($a eq $b) {
- if ($a !~ /^\s*$/ && $a !~ /:/) {
- $expr = $a;
- $expr =~ s/\s*$//;
- $expr =~ s/^\s*//;
- }
- } else {
- my ($fmt, $a_out) = $a =~ /^ (.*): "(.*)"$/ or die;
- my ($b_fmt, $b_out) = $b =~ /^ (.*): "(.*)"$/ or die;
- die if $fmt ne $b_fmt;
- die if $a_out eq $b_out;
+ a = a.rstrip('\r\n')
+ b = b.rstrip('\r\n')
+ if a == b:
+ n_exact += 1
+ if not a.isspace() and ':' not in a:
+ expr = a.strip()
+ else:
+ fmt, a_out = re.match(r'^ (.*): "(.*)"$', a).groups()
+ b_fmt, b_out = re.match(r'^ (.*): "(.*)"$', b).groups()
+ assert fmt == b_fmt
+ assert a_out != b_out
- if (!$exact) {
- if (increment ($a_out) eq $b_out || increment ($b_out) eq $a_out) {
- $approximate++;
- next;
- }
- }
- if ($spss) {
- if ($a_out =~ /0.*0/ && $a_out !~ /[1-9]/) {
- $bad_round++;
- next;
- } elsif ($a_out =~ /\*/ && $a_out !~ /^\*+$/) {
- $spss_wtf1++;
- next;
- } elsif ($expr =~ /^-/
- && $a_out =~ /^\*+$/
- && $b_out =~ /-\d(\.\d*#*)?E[-+]\d\d\d/
- && $fmt =~ /^E/) {
- $spss_wtf2++;
- next;
- } elsif ($expr =~ /^-/
- && (($a_out !~ /-/ && $a_out =~ /[1-9]/ && $b_out =~ /-/)
- || ($a_out =~ /^[0-9]+$/ && $b_out =~ /^\*+$/))) {
- $lost_sign++;
- next;
- }
- }
- print "$.: $expr in $fmt: expected \"$a_out\", got \"$b_out\"\n"
- if $verbose > 1;
- $errors++;
- }
-}
-while (<EXPECTED>) {
- print "Extra lines in $ARGV[0]\n";
- $errors++;
- last;
-}
-while (<ACTUAL>) {
- print "Extra lines in $ARGV[1]\n";
- $errors++;
- last;
-}
-if ($verbose) {
- print "$errors errors\n";
- if (!$exact) {
- print "$approximate approximate matches\n";
- }
- if ($spss) {
- print "$bad_round bad rounds\n";
- print "$spss_wtf1 SPSS WTF 1\n";
- print "$spss_wtf2 SPSS WTF 2\n";
- print "$lost_sign lost signs\n";
- }
-}
-exit ($errors > 0);
+ if not exact:
+ if increment(a_out) == b_out or increment(b_out) == a_out:
+ approximate += 1
+ continue
+ if spss:
+ if re.search(r'0.*0', a_out) and not re.search(r'[1-9]', a_out):
+ bad_round += 1
+ continue
+ elif '*' in a_out and len(a_out.strip('*')):
+ spss_wtf1 += 1
+ continue
+ elif (expr.startswith('-')
+ and re.fullmatch(r'\*+', a_out)
+ and re.match(r'-\d(\.\d*#*)?E[-+]\d\d\d', b_out)
+ and fmt.startswith('E')):
+ spss_wtf2 += 1
+ continue
+ elif (expr.startswith('-')
+ and (('-' not in a_out
+ and re.search(r'[1-9]', a_out)
+ and '-' in b_out)
+ or (a_out.isdigit()
+ and re.fullmatch(r'\*+', b_out)))):
+ lost_sign += 1
+ continue
-# Returns the argument value incremented by one unit in its final
-# decimal place.
-sub increment {
- local ($_) = @_;
- my ($last_digit, $i);
- for ($i = 0; $i < length $_; $i++) {
- my ($c) = substr ($_, $i, 1);
- last if ($c eq 'E');
- $last_digit = $i if $c =~ /[0-9]/;
- }
- return $_ if !defined $last_digit;
- for ($i = $last_digit; $i >= 0; $i--) {
- my ($c) = substr ($_, $i, 1);
- if ($c eq '9') {
- substr ($_, $i, 1) = '0';
- } elsif ($c =~ /[0-8]/) {
- substr ($_, $i, 1) = chr (ord ($c) + 1);
- last;
- }
- }
- $_ = "1$_" if $i < 0;
- return $_;
-}
+ if verbose > 1:
+ print('%s: %s in %s: expected "%s", got "%s'
+ % (line_number, expr, fmt, a_out, b_out))
+ errors += 1
+if verbose:
+ print("%s exact matches" % n_exact)
+ print("%s errors" % errors)
+ if not exact:
+ print('%s approximate matches' %approximate)
+ if spss:
+ print("%s bad rounds" % bad_round)
+ print("%s SPSS WTF 1" % spss_wtf1)
+ print("%s SPSS WTF 2" % spss_wtf2)
+ print("%s lost signs" % lost_sign)
+sys.exit(1 if errors else 0)
]])
-AT_CHECK([$PERL num-out-compare.pl $PSPP_NUM_OUT_COMPARE_FLAGS expout.inexact output.inexact])
+AT_CHECK([$PYTHON3 num-out-compare.py $PSPP_NUM_OUT_COMPARE_FLAGS expout.inexact output.inexact])
AT_CLEANUP
AT_SETUP([non-ASCII custom currency formats])
12-31-1999
682
])
- AT_DATA([make-binary.pl], [dnl
-use strict;
-use warnings;
+ AT_DATA([make-binary.py], [[
+#! /usr/bin/python3
+
+import struct
+import sys
+
+# This random number generator and the test for it below are drawn
+# from Park and Miller, "Random Number Generators: Good Ones are Hard
+# to Come By", Communications of the ACM 31:10 (October 1988). It is
+# documented to function properly on systems with a 46-bit or longer
+# real significand, which includes systems that have 64-bit IEEE reals
+# (with 53-bit significand). The test should catch any systems for
+# which this is not true, in any case.
+def my_rand(modulus):
+ global seed
+ a = 16807
+ m = 2147483647
+ tmp = a * seed
+ seed = tmp - m * (tmp // m)
+ return seed % modulus
+
+# Test the random number generator for reproducibility,
+# then reset the seed
+seed = 1
+for i in range(10000):
+ my_rand(1)
+assert seed == 1043618065
+seed = 1
# ASCII to EBCDIC translation table
-our ($ascii2ebcdic) = ""
-. "\x00\x01\x02\x03\x37\x2d\x2e\x2f"
-. "\x16\x05\x25\x0b\x0c\x0d\x0e\x0f"
-. "\x10\x11\x12\x13\x3c\x3d\x32\x26"
-. "\x18\x19\x3f\x27\x1c\x1d\x1e\x1f"
-. "\x40\x5a\x7f\x7b\x5b\x6c\x50\x7d"
-. "\x4d\x5d\x5c\x4e\x6b\x60\x4b\x61"
-. "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-. "\xf8\xf9\x7a\x5e\x4c\x7e\x6e\x6f"
-. "\x7c\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-. "\xc8\xc9\xd1\xd2\xd3\xd4\xd5\xd6"
-. "\xd7\xd8\xd9\xe2\xe3\xe4\xe5\xe6"
-. "\xe7\xe8\xe9\xad\xe0\xbd\x9a\x6d"
-. "\x79\x81\x82\x83\x84\x85\x86\x87"
-. "\x88\x89\x91\x92\x93\x94\x95\x96"
-. "\x97\x98\x99\xa2\xa3\xa4\xa5\xa6"
-. "\xa7\xa8\xa9\xc0\x4f\xd0\x5f\x07"
-. "\x20\x21\x22\x23\x24\x15\x06\x17"
-. "\x28\x29\x2a\x2b\x2c\x09\x0a\x1b"
-. "\x30\x31\x1a\x33\x34\x35\x36\x08"
-. "\x38\x39\x3a\x3b\x04\x14\x3e\xe1"
-. "\x41\x42\x43\x44\x45\x46\x47\x48"
-. "\x49\x51\x52\x53\x54\x55\x56\x57"
-. "\x58\x59\x62\x63\x64\x65\x66\x67"
-. "\x68\x69\x70\x71\x72\x73\x74\x75"
-. "\x76\x77\x78\x80\x8a\x8b\x8c\x8d"
-. "\x8e\x8f\x90\x6a\x9b\x9c\x9d\x9e"
-. "\x9f\xa0\xaa\xab\xac\x4a\xae\xaf"
-. "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-. "\xb8\xb9\xba\xbb\xbc\xa1\xbe\xbf"
-. "\xca\xcb\xcc\xcd\xce\xcf\xda\xdb"
-. "\xdc\xdd\xde\xdf\xea\xeb\xec\xed"
-. "\xee\xef\xfa\xfb\xfc\xfd\xfe\xff";
-length ($ascii2ebcdic) == 256 || die;
-
-open (INPUT, '<', 'input.txt') or die "input.txt: open: $!\n";
-my (@data) = <INPUT> or die;
-close (INPUT) or die;
-chomp $_ foreach @data;
-
-our @records;
-
-$2
-
-sub a2e {
- local ($_) = @_;
- my ($s) = "";
- foreach (split (//)) {
- $s .= substr ($ascii2ebcdic, ord, 1);
- }
- return $s;
-}
-
-sub min {
- my ($a, $b) = @_;
- return $a < $b ? $a : $b
-}
-
-sub dump_records {
- while (@records) {
- my ($n) = min (int (rand (5)) + 1, scalar (@records));
- my (@r) = splice (@records, 0, $n);
- my ($len) = 0;
- $len += length foreach @r;
- print pack ("n xx", $len + 4);
- print foreach @r;
- }
-}
-])
- AT_CHECK([$PERL make-binary.pl < input.txt > input.bin])
+ascii2ebcdic = (
+ 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f,
+ 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26,
+ 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x40, 0x5a, 0x7f, 0x7b, 0x5b, 0x6c, 0x50, 0x7d,
+ 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
+ 0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
+ 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
+ 0xe7, 0xe8, 0xe9, 0xad, 0xe0, 0xbd, 0x9a, 0x6d,
+ 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6,
+ 0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0x5f, 0x07,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b,
+ 0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08,
+ 0x38, 0x39, 0x3a, 0x3b, 0x04, 0x14, 0x3e, 0xe1,
+ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+ 0x49, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
+ 0x76, 0x77, 0x78, 0x80, 0x8a, 0x8b, 0x8c, 0x8d,
+ 0x8e, 0x8f, 0x90, 0x6a, 0x9b, 0x9c, 0x9d, 0x9e,
+ 0x9f, 0xa0, 0xaa, 0xab, 0xac, 0x4a, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xa1, 0xbe, 0xbf,
+ 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xda, 0xdb,
+ 0xdc, 0xdd, 0xde, 0xdf, 0xea, 0xeb, 0xec, 0xed,
+ 0xee, 0xef, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff)
+assert len(ascii2ebcdic) == 256
+
+def a2e(s):
+ return bytearray((ascii2ebcdic[ord(c)] for c in s))
+
+def dump_records(out, records):
+ while records:
+ n = min(my_rand(5) + 1, len(records))
+ r = records[:n]
+ records[:n] = []
+
+ count = sum((len(rec) for rec in r))
+ out.buffer.write(struct.pack(">H xx", count + 4))
+ for rec in r:
+ out.buffer.write(rec)
+
+data = []
+for line in open('input.txt', 'r'):
+ data += [line.rstrip('\r\n')]
+
+# MODE=BINARY
+out = open('binary.bin', 'w')
+for item in data:
+ reclen = struct.pack("<I", len(item))
+ out.buffer.write(reclen)
+ out.buffer.write(bytearray([ord(c) for c in item]))
+ out.buffer.write(reclen)
+out.close()
+
+# MODE=360 /RECFORM=FIXED /LRECL=32
+out = open('fixed.bin', 'w')
+lrecl = 32
+for item in data:
+ s = item[:lrecl]
+ s += ' ' * (lrecl - len(s))
+ assert len(s) == 32
+ out.buffer.write(a2e(s))
+out.close()
+
+# MODE=360 /RECFORM=VARIABLE
+out = open('variable.bin', 'w')
+records = []
+for item in data:
+ records += [struct.pack('>H xx', len(item) + 4) + a2e(item)]
+dump_records(out, records)
+out.close()
+
+# MODE=360 /RECFORM=SPANNED
+out = open('spanned.bin', 'w')
+records = []
+for line in data:
+ r = []
+ while line:
+ n = min(my_rand(5), len(line))
+ r += [line[:n]]
+ line = line[n:]
+ for i, s in enumerate(r):
+ scc = (0 if len(r) == 1
+ else 1 if i == 0
+ else 2 if i == len(r) - 1
+ else 3)
+ records += [struct.pack('>H B x', len(s) + 4, scc) + a2e(s)]
+dump_records(out, records)
+out.close()
+]])
+ AT_CHECK([$PYTHON3 make-binary.py])
AT_DATA([data-reader.sps], [dnl
-FILE HANDLE input/NAME='input.bin'/$1.
+FILE HANDLE input/NAME='$2'/$1.
DATA LIST FIXED FILE=input NOTABLE
/1 start 1-10 (ADATE)
/2 end 1-10 (ADATE)
])
AT_CLEANUP])
-DATA_READER_BINARY([MODE=BINARY],
- [for $_ (@data) {
- my ($reclen) = pack ("V", length);
- print $reclen, $_, $reclen;
- }])
-
-DATA_READER_BINARY([MODE=360 /RECFORM=FIXED /LRECL=32],
- [my ($lrecl) = 32;
- for $_ (@data) {
- my ($out) = substr ($_, 0, $lrecl);
- $out .= ' ' x ($lrecl - length ($out));
- length ($out) == 32 or die;
- print +a2e ($out);
- }],
+DATA_READER_BINARY([MODE=BINARY], [binary.bin])
+DATA_READER_BINARY([MODE=360 /RECFORM=FIXED /LRECL=32], [fixed.bin],
[AT_CHECK([i18n-test supports_encodings EBCDIC-US])])
-
-DATA_READER_BINARY([MODE=360 /RECFORM=VARIABLE],
- [for $_ (@data) {
- push (@records, pack ("n xx", length ($_) + 4) . a2e ($_));
- }
- dump_records ();],
+DATA_READER_BINARY([MODE=360 /RECFORM=VARIABLE], [variable.bin],
[AT_CHECK([i18n-test supports_encodings EBCDIC-US])])
-
-DATA_READER_BINARY([MODE=360 /RECFORM=SPANNED],
- [[for my $line (@data) {
- local ($_) = $line;
- my (@r);
- while (length) {
- my ($n) = min (int (rand (5)), length);
- push (@r, substr ($_, 0, $n, ''));
- }
- foreach my $i (0...$#r) {
- my $scc = ($#r == 0 ? 0
- : $i == 0 ? 1
- : $i == $#r ? 2
- : 3);
- push (@records,
- pack ("nCx", length ($r[$i]) + 4, $scc) . a2e ($r[$i]));
- }
- }
- dump_records ();]],
+DATA_READER_BINARY([MODE=360 /RECFORM=SPANNED], [spanned.bin],
[AT_CHECK([i18n-test supports_encodings EBCDIC-US])])
dnl " (fixes Emacs highlighting)
AT_SETUP([GET DATA /TYPE=TXT with IMPORTCASE])
-AT_CHECK([$PERL > test.data <<'EOF'
-for ($i = 1; $i <= 100; $i++) {
- printf "%02d\n", $i;
-}
-EOF
-])
+AT_CHECK([$PYTHON3 > test.data -c '
+for i in range(1, 101):
+ print("%02d" % i)
+'])
AT_DATA([get-data.sps], [dnl
get data /type=txt /file='test.data' /importcase=first 10 /variables x f8.0.
get data /type=txt /file='test.data' /importcase=percent 1 /variables x f8.0.
dnl The actual bug that this checks for has been lost.
AT_SETUP([EXAMINE -- big input doesn't crash 2])
AT_KEYWORDS([categorical categoricals slow])
-AT_DATA([make-big-input.pl],
- [for ($i=0; $i<100000; $i++) { print "AB12\n" };
- for ($i=0; $i<100000; $i++) { print "AB04\n" };
-])
-AT_CHECK([$PERL make-big-input.pl > large.txt])
+AT_CHECK([$PYTHON3 -c '
+for i in range(100000): print("AB12")
+for i in range(100000): print("AB04")
+' > large.txt])
AT_DATA([examine.sps], [dnl
DATA LIST FILE='large.txt' /S 1-2 (A) X 3 .
])
AT_CHECK([pspp -o pspp.csv examine.sps])
dnl Ignore output -- this is just a no-crash check.
-AT_DATA([more-big-input.pl],
- [for ($i=0; $i<25000; $i++) { print "AB04\nAB12\n" };
-])
-AT_CHECK([$PERL more-big-input.pl >> large.txt])
+AT_CHECK([$PYTHON3 -c 'for i in range(25000): print("AB04\nAB12")' >> large.txt])
AT_CHECK([pspp -o pspp.csv examine.sps])
dnl Ignore output -- this is just a no-crash check.
AT_CLEANUP
m4_divert_push([PREPARE_TESTS])
[sort_cases_gen_data () {
- cat > gen-data.pl <<'EOF'
-use strict;
-use warnings;
-
-# Generate shuffled data.
-my (@data);
-for my $i (0...$ARGV[0] - 1) {
- push (@data, ($i) x $ARGV[1]);
-}
-fisher_yates_shuffle (\@data);
-
-# Output shuffled data.
-my (@shuffled) = map ([$data[$_], $_], 0...$#data);
-open (SHUFFLED, ">data.txt");
-print SHUFFLED "$data[$_] $_\n" foreach 0...$#data;
-
-# Output sorted data.
-my (@sorted) = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @shuffled;
-open (SORTED, ">expout");
-printf SORTED " %8d %8d \n", $_->[0], $_->[1] foreach @sorted;
-
-# From perlfaq4.
-sub fisher_yates_shuffle {
- my $deck = shift; # $deck is a reference to an array
- my $i = @$deck;
- while ($i--) {
- my $j = int rand ($i+1);
- @$deck[$i,$j] = @$deck[$j,$i];
- }
-}
+ cat > gen-data.py <<'EOF'
+#! /usr/bin/python3
+
+import random
+import sys
+
+data = []
+for i in range(int(sys.argv[1])):
+ data += [i] * int(sys.argv[2])
+random.shuffle(data)
+
+data_txt = open('data.txt', 'w')
+for i, item in enumerate(data):
+ data_txt.write('%s %s\n' % (item, i))
+data_txt.close()
+
+shuffled = ((item, i) for i, item in enumerate(data))
+expout = open('expout', 'w')
+for item, i in sorted(shuffled):
+ expout.write(' %8d %8d \n' % (item, i))
+expout.close()
EOF
- $PERL gen-data.pl "$@"]
+ $PYTHON3 gen-data.py "$@"]
}
m4_divert_pop([PREPARE_TESTS])
AT_KEYWORDS([line_reader])
AT_CHECK([i18n-test supports_encodings ISO-8859-1])
buffer_size=`line-reader-test buffer-size`
-($PERL -e "print 'x' x ($buffer_size - 2)"
+($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 2))";
printf '\none line\ntwo lines\nentr\351e\nfour lines\n') > input
(printf 'encoded in ASCII (auto)\n\"'
- $PERL -e "print 'x' x ($buffer_size - 2)"
+ $PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 2))";
printf '\"\n"one line"\n"two lines"\nencoded in ISO-8859-1\n"entr\303\251e"\n"four lines"\n') > expout
AT_CHECK([line-reader-test read input Auto,ISO-8859-1], [0], [expout])
AT_CLEANUP
AT_SETUP([read UTF-8 with character split across input buffers])
AT_KEYWORDS([u8_istream])
buffer_size=`u8-istream-test buffer-size`
-($PERL -e "print 'x' x ($buffer_size - 16)"
+($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))";
printf '\343\201\201\343\201\202\343\201\203\343\201\204\343\201\205\343\201\206\343\201\207\343\201\210\343\201\211\343\201\212\n') > input
(echo "UTF-8 mode"
cat input
AT_SETUP([read UTF-8 with character split across input and output buffers])
AT_KEYWORDS([u8_istream])
buffer_size=`u8-istream-test buffer-size`
-($PERL -e "print 'x' x ($buffer_size - 16)"
+($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))";
printf '\343\201\201\343\201\202\343\201\203\343\201\204\343\201\205\343\201\206\343\201\207\343\201\210\343\201\211\343\201\212\n') > input
(echo "UTF-8 mode"
cat input
AT_KEYWORDS([u8_istream])
AT_CHECK([i18n-test supports_encodings EUC-JP])
buffer_size=`u8-istream-test buffer-size`
-($PERL -e "print 'x' x ($buffer_size - 16)"
+($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))";
printf '\244\241 \244\242 \244\243 \244\244 \244\245 \244\246 \244\247 '
printf '\244\250 \244\251 \244\252\n') > input
-($PERL -e "print 'x' x ($buffer_size - 16)"
+($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))";
printf '\343\201\201\040\343\201\202\040\343\201\203\040\343\201\204\040'
printf '\343\201\205\040\343\201\206\040\343\201\207\040\343\201\210\040'
printf '\343\201\211\040\343\201\212\n') > expout
AT_KEYWORDS([u8_istream])
AT_CHECK([i18n-test supports_encodings EUC-JP])
buffer_size=`u8-istream-test buffer-size`
-($PERL -e "print 'x' x ($buffer_size - 16)"
+($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))";
printf 'xyz\244\241\244\242\244\243\244\244\244\245\244\246\244\247\244\250'
printf '\244\251\244\252\n') > input
-($PERL -e "print 'x' x ($buffer_size - 16)"
+($PYTHON3 -c "import sys; sys.stdout.write('x' * ($buffer_size - 16))";
printf '\170\171\172\343\201\201\343\201\202\343\201\203\343\201\204\343'
printf '\201\205\343\201\206\343\201\207\343\201\210\343\201\211\343\201'
printf '\212\n') > expout
AT_KEYWORDS([u8_istream])
AT_CHECK([i18n-test supports_encodings ISO-8859-1])
buffer_size=`u8-istream-test buffer-size`
-($PERL -e 'print "xyzzy\n" x int('$buffer_size' * 2.5 / 7)'; printf 'entr\351e\n') > input
+($PYTHON3 -c "import sys; sys.stdout.write('xyzzy\n' * ($buffer_size * 5 // 14))"
+ printf 'entr\351e\n') > input
(echo "Auto mode"
- $PERL -e 'print "xyzzy\n" x int('$buffer_size' * 2.5 / 7)'
+ $PYTHON3 -c "import sys; sys.stdout.write('xyzzy\n' * ($buffer_size * 5 // 14))"
printf 'entr\303\251e\n') > expout
AT_CHECK([u8-istream-test read input Auto,ISO-8859-1], [0], [expout])
AT_CLEANUP
m4_divert_push([PREPARE_TESTS])
[randist_compare () {
- cat > compare.pl <<'EOF'
-use strict;
-use warnings 'all';
+ cat > compare.py <<'EOF'
+#! /usr/bin/python3
-my ($epsilon) = 1;
+import itertools
+import sys
-open (EXPECTED, '<', $ARGV[0]) or die "$ARGV[0]: open: $!\n";
-open (ACTUAL, '<', $ARGV[1]) or die "$ARGV[1]: open: $!\n";
+def count_decimals(s):
+ if '.' not in s:
+ return 0
+ else:
+ return len(s) - s.index('.')
-my ($errors) = 0;
-LINE: for (;;) {
- my $a = <EXPECTED>;
- my $b = <ACTUAL>;
+def to_int(s):
+ return int(s.replace('.', ''))
- last if !defined $a && !defined $b;
- die "$ARGV[0]:$.: unexpected end of file\n" if !defined $a;
- die "$ARGV[1]:$.: unexpected end of file\n" if !defined $b;
+EPSILON = 1
- my (@a) = split (' ', $a);
- my (@b) = split (' ', $b);
- die "$ARGV[1]:$.: contains ". scalar (@b) . " fields but should "
- . "contain " . scalar (@a) . "\n"
- if $#a != $#b;
- foreach my $i (0...$#a) {
- die "$ARGV[1]:$.: unexpected number of decimals\n"
- if count_decimals ($a[$i]) != count_decimals ($b[$i]);
+errors = 0
+line_number = 0
+for a, b in itertools.zip_longest(open(sys.argv[1], 'r'),
+ open(sys.argv[2], 'r')):
+ line_number += 1
+ if not a:
+ sys.stderr.write("%s:%s: unexpected end of file\n"
+ % (sys.argv[2], line_number))
+ sys.exit(1)
+ if not b:
+ sys.stderr.write("%s:%s: unexpected end of file\n"
+ % (sys.argv[1], line_number))
+ sys.exit(1)
- my ($an) = to_int ($a[$i]);
- my ($bn) = to_int ($b[$i]);
- if ($an ne $bn && ($bn < $an - $epsilon || $bn > $an + $epsilon)) {
- $errors++;
- if ($errors > 50) {
- print "$ARGV[1]: Additional differences suppressed.\n";
- last LINE;
- }
- print "$ARGV[1]:$.: Values differ from $ARGV[0]:$.\n";
- print "Expected:\n", $a;
- print "Calculated:\n", $b;
- }
- }
-}
-exit ($errors > 0);
+ af = a.split()
+ bf = b.split()
+ if len(af) != len(bf):
+ sys.stderr.write("%s:%s: contains %s fields but should contain %s\n"
+ % (sys.argv[2], line_number, len(b), len(a)))
+ sys.exit(1)
-sub count_decimals {
- my ($s) = @_;
- return length (substr ($s, index ($s, '.')));
-}
+ for i in range(len(af)):
+ if count_decimals(af[i]) != count_decimals(bf[i]):
+ sys.stderr.write("%s:%s: unexpected number of decimals\n"
+ % (sys.argv[2], line_number))
+ sys.exit(1)
+
+ if af[i] == bf[i]:
+ continue
+
+ an = to_int(af[i])
+ bn = to_int(bf[i])
+ if an != bn and (bn < an - EPSILON or bn > an + EPSILON):
+ errors += 1
+ if errors > 50:
+ print("%s: Additional differences suppressed."
+ % sys.argv[2])
+ break
+ print("%s:%s: Values differ from %s:%s"
+ % (sys.argv[2], line_number,
+ sys.argv[1], line_number))
+ print("Expected:\n%s" % a)
+ print("Calculated:\n%s" % b)
+
+ if errors > 50:
+ break
+sys.exit(1 if errors > 0 else 0)
-sub to_int {
- local ($_) = @_;
- s/\.//;
- return $_;
-}
EOF
+ $PYTHON3 compare.py "$@"
}]
m4_divert_pop([PREPARE_TESTS])
m4_define([RANDIST_CHECK],
[AT_SETUP([random distributions -- $1])
- randist_compare
AT_DATA([$1.in], [$4])
AT_DATA([$1.sps], [dnl
DATA LIST LIST FILE='$1.in' NOTABLE SKIP=1
EXECUTE.
])
AT_CHECK([pspp -O format=csv $1.sps])
- AT_CHECK([$PERL compare.pl $1.in $1.out])
+ AT_CHECK([randist_compare $1.in $1.out])
AT_CLEANUP])
RANDIST_CHECK([beta], [a b], [cdf pdf], [dnl