+++ /dev/null
-#! /usr/bin/perl
-# Copyright (C) 2020, 2021Free Software Foundation
-
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-use strict;
-use warnings;
-
-if (-t 1 || @ARGV != 1 || $ARGV[0] eq '--help') {
- print STDERR <<EOF;
-$0: generate code page tables from ICU encoding list
-usage: $0 CONVRTRS-TXT > sys-file-encoding.c
-
-To update the encoding data, get the latest ICU encoding data from:
-https://raw.githubusercontent.com/unicode-org/icu/main/icu4c/source/data/mappings/convrtrs.txt
-EOF
- exit (@ARGV && $ARGV[0] eq '--help' ? 0 : 1);
-}
-
-open (CONVERTERS, '<', $ARGV[0])
- or die "$ARGV[0]: open failed ($!)\n";
-
-our $WINDOWS = 3; # Windows code pages.
-our $IBM = 2; # IBM code pages.
-our $CP = 1; # Java (?) code pages.
-our %sources = ($WINDOWS => "windows", $IBM => "ibm", $CP => "cp");
-
-my $converter = "";
-while (<CONVERTERS>) {
- chomp;
- s/#.*//;
- if (s/^\s+//) {
- $converter .= " $_";
- } else {
- process_converter ($converter);
- $converter = $_;
- }
-}
-process_converter ($converter);
-close (CONVERTERS);
-
-our %codepages;
-
-print <<'EOF';
-/* -*- mode: c; buffer-read-only: t -*-
-
- Generated by sys-file-encoding.pl. Do not modify!
-*/
-
-/*
-PSPP - a program for statistical analysis.
-Copyright (C) 2017 Free Software Foundation, Inc.
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <config.h>
-
-#include "data/sys-file-private.h"
-
-struct sys_encoding sys_codepage_number_to_name[] = {
-EOF
-for my $cpnumber (sort { $a <=> $b } (keys (%codepages))) {
- my $source = max (keys (%{$codepages{$cpnumber}}));
- my $name = ${$codepages{$cpnumber}{$source}}[0];
- print " { $cpnumber, \"$name\" },\n";
-}
-print " { 0, NULL }\n";
-print "};\n\n";
-
-my %names;
-for my $cpnumber (sort { $a <=> $b } (keys (%codepages))) {
- for my $source (keys (%{$codepages{$cpnumber}})) {
- for my $name (@{$codepages{$cpnumber}{$source}}) {
- push(@{$names{$name}{$source}}, $cpnumber);
- }
- }
-}
-print "struct sys_encoding sys_codepage_name_to_number[] = {\n";
-for my $name (sort (keys (%names))) {
- for my $source (reverse (sort (keys (%sources)))) {
- next if !exists ($names{$name}{$source});
- my (@numbers) = @{$names{$name}{$source}};
-
- # The only two encodings that currently print this are KSC_5601
- # and KS_C_5601-1987, for code pages 949 and 51949. It looks to
- # me like the correct code page number is 949, which is the one
- # chosen (because the numbers are in sorted order).
- print " /* $name has multiple numbers for $sources{$source}: @numbers */\n"
- if @numbers > 1;
-
- print " { $numbers[0], \"$name\" },\n";
- last;
- }
-}
-print " { 0, NULL }\n";
-print "};\n";
-
-sub process_converter {
- my ($converter) = @_;
- return if $converter =~ /^\s*$/;
- return if $converter =~ /^\s*\{/;
-
- my %cps;
- my @iana;
- my @other;
-
- my @fields = split (' ', $converter);
- while (@fields) {
- my $name = shift (@fields);
- if (@fields && $fields[0] eq '{') {
- shift (@fields);
-
- my (%standards);
- for (;;) {
- my $standard = shift (@fields);
- last if $standard eq '}';
- $standards{$standard} = 1;
- }
- if (exists $standards{'IANA*'}) {
- unshift (@iana, $name);
- } elsif (exists $standards{'IANA'}) {
- push (@iana, $name);
- } elsif (grep (/\*$/, keys %standards)) {
- unshift (@other, $name);
- } else {
- push (@other, $name);
- }
- } else {
- # Untagged names are completely nonstandard.
- next;
- }
-
- my $number;
- if (($number) = $name =~ /^cp([0-9]+)$/) {
- $cps{$CP} = int ($number);
- } elsif (($number) = $name =~ /^windows-([0-9]+)$/) {
- $cps{$WINDOWS} = int ($number);
- } elsif (($number) = $name =~ /^ibm-([0-9]+)$/) {
- $cps{$IBM} = int ($number);
- } else {
- next;
- }
- }
-
- # If there are no tagged names then this is completely nonstandard.
- return if !@iana && !@other;
-
- $codepages{$cps{$_}}{$_} = [@iana, @other] for keys (%cps);
-}
-
-sub max {
- my ($best);
- for my $x (@_) {
- $best = $x if !defined ($best) || $x > $best;
- }
- return $best;
-}