2 # Copyright (C) 2020, 2021Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 if (-t 1 || @ARGV != 1 || $ARGV[0] eq '--help') {
22 $0: generate code page tables from ICU encoding list
23 usage: $0 CONVRTRS-TXT > sys-file-encoding.c
25 To update the encoding data, get the latest ICU encoding data from:
26 https://raw.githubusercontent.com/unicode-org/icu/main/icu4c/source/data/mappings/convrtrs.txt
28 exit (@ARGV && $ARGV[0] eq '--help' ? 0 : 1);
31 open (CONVERTERS, '<', $ARGV[0])
32 or die "$ARGV[0]: open failed ($!)\n";
34 our $WINDOWS = 3; # Windows code pages.
35 our $IBM = 2; # IBM code pages.
36 our $CP = 1; # Java (?) code pages.
37 our %sources = ($WINDOWS => "windows", $IBM => "ibm", $CP => "cp");
40 while (<CONVERTERS>) {
46 process_converter ($converter);
50 process_converter ($converter);
56 /* -*- mode: c; buffer-read-only: t -*-
58 Generated by sys-file-encoding.pl. Do not modify!
62 PSPP - a program for statistical analysis.
63 Copyright (C) 2017 Free Software Foundation, Inc.
65 This program is free software: you can redistribute it and/or modify
66 it under the terms of the GNU General Public License as published by
67 the Free Software Foundation, either version 3 of the License, or
68 (at your option) any later version.
70 This program is distributed in the hope that it will be useful,
71 but WITHOUT ANY WARRANTY; without even the implied warranty of
72 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
73 GNU General Public License for more details.
75 You should have received a copy of the GNU General Public License
76 along with this program. If not, see <http://www.gnu.org/licenses/>.
81 #include "data/sys-file-private.h"
83 struct sys_encoding sys_codepage_number_to_name[] = {
85 for my $cpnumber (sort { $a <=> $b } (keys (%codepages))) {
86 my $source = max (keys (%{$codepages{$cpnumber}}));
87 my $name = ${$codepages{$cpnumber}{$source}}[0];
88 print " { $cpnumber, \"$name\" },\n";
90 print " { 0, NULL }\n";
94 for my $cpnumber (sort { $a <=> $b } (keys (%codepages))) {
95 for my $source (keys (%{$codepages{$cpnumber}})) {
96 for my $name (@{$codepages{$cpnumber}{$source}}) {
97 push(@{$names{$name}{$source}}, $cpnumber);
101 print "struct sys_encoding sys_codepage_name_to_number[] = {\n";
102 for my $name (sort (keys (%names))) {
103 for my $source (reverse (sort (keys (%sources)))) {
104 next if !exists ($names{$name}{$source});
105 my (@numbers) = @{$names{$name}{$source}};
107 # The only two encodings that currently print this are KSC_5601
108 # and KS_C_5601-1987, for code pages 949 and 51949. It looks to
109 # me like the correct code page number is 949, which is the one
110 # chosen (because the numbers are in sorted order).
111 print " /* $name has multiple numbers for $sources{$source}: @numbers */\n"
114 print " { $numbers[0], \"$name\" },\n";
118 print " { 0, NULL }\n";
121 sub process_converter {
122 my ($converter) = @_;
123 return if $converter =~ /^\s*$/;
124 return if $converter =~ /^\s*\{/;
130 my @fields = split (' ', $converter);
132 my $name = shift (@fields);
133 if (@fields && $fields[0] eq '{') {
138 my $standard = shift (@fields);
139 last if $standard eq '}';
140 $standards{$standard} = 1;
142 if (exists $standards{'IANA*'}) {
143 unshift (@iana, $name);
144 } elsif (exists $standards{'IANA'}) {
146 } elsif (grep (/\*$/, keys %standards)) {
147 unshift (@other, $name);
149 push (@other, $name);
152 # Untagged names are completely nonstandard.
157 if (($number) = $name =~ /^cp([0-9]+)$/) {
158 $cps{$CP} = int ($number);
159 } elsif (($number) = $name =~ /^windows-([0-9]+)$/) {
160 $cps{$WINDOWS} = int ($number);
161 } elsif (($number) = $name =~ /^ibm-([0-9]+)$/) {
162 $cps{$IBM} = int ($number);
168 # If there are no tagged names then this is completely nonstandard.
169 return if !@iana && !@other;
171 $codepages{$cps{$_}}{$_} = [@iana, @other] for keys (%cps);
177 $best = $x if !defined ($best) || $x > $best;