3 # Copyright (C) 2020, 2021 Free Software Foundation
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 def process_converter(fields):
23 if not fields or fields[0] == '{':
32 while i < len(fields):
36 if i < len(fields) and fields[i] == '{':
45 standards.add(standard)
46 if 'IANA*' in standards:
48 elif 'IANA' in standards:
50 elif any(map(lambda s: s.endswith('*'), standards)):
51 other = [name] + other
55 # Untagged names are completely nonstandard.
58 m = re.match(r'cp([0-9]+)$', name)
60 cps[CP] = int(m.group(1))
63 m = re.match(r'windows-([0-9]+)$', name)
65 cps[WINDOWS] = int(m.group(1))
68 m = re.match(r'ibm-([0-9]+)$', name)
70 cps[IBM] = int(m.group(1))
73 # If there are no tagged names then this is completely nonstandard.
74 if not iana and not other:
78 codepages.setdefault(cps[cp], {})[cp] = iana + other
80 if len(sys.argv) != 2 or sys.argv[1] == '--help':
82 %s: generate code page tables from ICU encoding list
83 usage: %s CONVRTRS-TXT > sys-file-encoding.c
85 To update the encoding data, get the latest ICU encoding data from:
86 https://raw.githubusercontent.com/unicode-org/icu/\
87 main/icu4c/source/data/mappings/convrtrs.txt
88 """ % (sys.argv[0], sys.argv[0]))
89 sys.exit(0 if len(sys.argv) == 2 and sys.argv[1] == '--help' else 1)
91 WINDOWS = 3 # Windows code pages.
92 IBM = 2 # IBM code pages.
93 CP = 1 # Java (?) code pages.
94 sources = { WINDOWS: "windows", IBM: "ibm", CP: "cp" }
99 for line in open(sys.argv[1], 'r'):
101 comment_ofs = line.find('#')
103 line = line[:comment_ofs]
104 if line.lstrip() == line:
105 process_converter(converter)
107 converter += line.split()
108 process_converter(converter)
111 /* -*- mode: c; buffer-read-only: t -*-
113 Generated by sys-file-encoding.py. Do not modify!
117 PSPP - a program for statistical analysis.
118 Copyright (C) 2017 Free Software Foundation, Inc.
120 This program is free software: you can redistribute it and/or modify
121 it under the terms of the GNU General Public License as published by
122 the Free Software Foundation, either version 3 of the License, or
123 (at your option) any later version.
125 This program is distributed in the hope that it will be useful,
126 but WITHOUT ANY WARRANTY; without even the implied warranty of
127 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
128 GNU General Public License for more details.
130 You should have received a copy of the GNU General Public License
131 along with this program. If not, see <http://www.gnu.org/licenses/>.
136 #include "data/sys-file-private.h"
138 struct sys_encoding sys_codepage_number_to_name[] = {""")
140 for cpnumber, value in sorted(codepages.items()):
141 source = max(value.keys())
142 name = value[source][0]
143 print(' { %s, "%s" },' % (cpnumber, name))
144 print(""" { 0, NULL }
149 for cpnumber, value in sorted(codepages.items()):
150 for source, value2 in value.items():
152 names.setdefault(name, {}).setdefault(source, []).append(cpnumber)
154 print('struct sys_encoding sys_codepage_name_to_number[] = {')
155 for name in sorted(names.keys()):
156 for source in sorted(sources.keys(), reverse=True):
157 if source not in names[name]:
160 numbers = names[name][source]
162 # The only two encodings that currently print this are KSC_5601
163 # and KS_C_5601-1987, for code pages 949 and 51949. It looks to
164 # me like the correct code page number is 949, which is the one
165 # chosen (because the numbers are in sorted order).
167 print(' /* %s has multiple numbers for %s: %s */'
168 % (name, sources[source], ' '.join(map(str, numbers))))
169 print(' { %s, "%s" },' % (numbers[0], name))