8 $0: generate code page tables from ICU encoding list
9 usage: $0 < convrtrs.txt > sys-file-encoding.c
11 To regenerate the encoding data, get the latest ICU encoding data from:
12 http://source.icu-project.org/repos/icu/icu/trunk/source/data/mappings/convrtrs.txt
13 then convert it with this script using the command above.
15 exit (@ARGV && $ARGV[0] eq '--help' ? 0 : 1);
18 open (CONVERTERS, '<', 'convrtrs.txt')
19 or die "convrtrs.txt: open failed ($!)\n";
21 our $WINDOWS = 3; # Windows code pages.
22 our $IBM = 2; # IBM code pages.
23 our $CP = 1; # Java (?) code pages.
24 our %sources = ($WINDOWS => "windows", $IBM => "ibm", $CP => "cp");
27 while (<CONVERTERS>) {
33 process_converter ($converter);
37 process_converter ($converter);
43 /* -*- mode: c; buffer-read-only: t -*-
45 Generated by sys-file-encoding.pl. Do not modify!
50 #include "data/sys-file-private.h"
52 struct sys_encoding sys_codepage_number_to_name[] = {
54 for my $cpnumber (sort { $a <=> $b } (keys (%codepages))) {
55 my $source = max (keys (%{$codepages{$cpnumber}}));
56 my $name = ${$codepages{$cpnumber}{$source}}[0];
57 print " { $cpnumber, \"$name\" },\n";
59 print " { 0, NULL }\n";
63 for my $cpnumber (sort { $a <=> $b } (keys (%codepages))) {
64 for my $source (keys (%{$codepages{$cpnumber}})) {
65 for my $name (@{$codepages{$cpnumber}{$source}}) {
66 push(@{$names{$name}{$source}}, $cpnumber);
70 print "struct sys_encoding sys_codepage_name_to_number[] = {\n";
71 for my $name (sort (keys (%names))) {
72 for my $source (reverse (sort (keys (%sources)))) {
73 next if !exists ($names{$name}{$source});
74 my (@numbers) = @{$names{$name}{$source}};
76 # The only two encodings that currently print this are KSC_5601
77 # and KS_C_5601-1987, for code pages 949 and 51949. It looks to
78 # me like the correct code page number is 949, which is the one
79 # chosen (because the numbers are in sorted order).
80 print " /* $name has multiple numbers for $sources{$source}: @numbers */\n"
83 print " { $numbers[0], \"$name\" },\n";
87 print " { 0, NULL }\n";
90 sub process_converter {
92 return if $converter =~ /^\s*$/;
93 return if $converter =~ /^\s*\{/;
99 my @fields = split (' ', $converter);
101 my $name = shift (@fields);
102 if (@fields && $fields[0] eq '{') {
107 my $standard = shift (@fields);
108 last if $standard eq '}';
109 $standards{$standard} = 1;
111 if (exists $standards{'IANA*'}) {
112 unshift (@iana, $name);
113 } elsif (exists $standards{'IANA'}) {
115 } elsif (grep (/\*$/, keys %standards)) {
116 unshift (@other, $name);
118 push (@other, $name);
121 # Untagged names are completely nonstandard.
126 if (($number) = $name =~ /^cp([0-9]+)$/) {
127 $cps{$CP} = int ($number);
128 } elsif (($number) = $name =~ /^windows-([0-9]+)$/) {
129 $cps{$WINDOWS} = int ($number);
130 } elsif (($number) = $name =~ /^ibm-([0-9]+)$/) {
131 $cps{$IBM} = int ($number);
137 # If there are no tagged names then this is completely nonstandard.
138 return if !@iana && !@other;
140 $codepages{$cps{$_}}{$_} = [@iana, @other] for keys (%cps);
146 $best = $x if !defined ($best) || $x > $best;