95 lines
3 KiB
Python
Executable file
95 lines
3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
# Generate Unicode case-folding table for Ada.
|
|
|
|
# Copyright (C) 2022 Free Software Foundation, Inc.
|
|
|
|
# This file is part of GDB.
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
# This generates the ada-casefold.h header.
|
|
# Usage:
|
|
# python ada-unicode.py
|
|
|
|
import gdbcopyright
|
|
|
|
# The start of the current range of case-conversions we are
|
|
# processing. If RANGE_START is None, then we're outside of a range.
|
|
range_start = None
|
|
# End of the current range.
|
|
range_end = None
|
|
# The delta between RANGE_START and the upper-case variant of that
|
|
# character.
|
|
upper_delta = None
|
|
# The delta between RANGE_START and the lower-case variant of that
|
|
# character.
|
|
lower_delta = None
|
|
|
|
# All the ranges found and completed so far.
|
|
# Each entry is a tuple of the form (START, END, UPPER_DELTA, LOWER_DELTA).
|
|
all_ranges = []
|
|
|
|
|
|
def finish_range():
|
|
global range_start
|
|
global range_end
|
|
global upper_delta
|
|
global lower_delta
|
|
if range_start is not None:
|
|
all_ranges.append((range_start, range_end, upper_delta, lower_delta))
|
|
range_start = None
|
|
range_end = None
|
|
upper_delta = None
|
|
lower_delta = None
|
|
|
|
|
|
def process_codepoint(val):
|
|
global range_start
|
|
global range_end
|
|
global upper_delta
|
|
global lower_delta
|
|
c = chr(val)
|
|
low = c.lower()
|
|
up = c.upper()
|
|
# U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally
|
|
# upper-cases to the two-character string "SS" (the capital form
|
|
# is a relatively recent addition -- 2017). Our simple scheme
|
|
# can't handle this, so we skip it. Also, because our approach
|
|
# just represents runs of characters with identical folding
|
|
# deltas, this change must terminate the current run.
|
|
if (c == low and c == up) or len(low) != 1 or len(up) != 1:
|
|
finish_range()
|
|
return
|
|
updelta = ord(up) - val
|
|
lowdelta = ord(low) - val
|
|
if range_start is not None and (updelta != upper_delta or lowdelta != lower_delta):
|
|
finish_range()
|
|
if range_start is None:
|
|
range_start = val
|
|
upper_delta = updelta
|
|
lower_delta = lowdelta
|
|
range_end = val
|
|
|
|
|
|
for c in range(0, 0x10FFFF):
|
|
process_codepoint(c)
|
|
|
|
with open("ada-casefold.h", "w") as f:
|
|
print(
|
|
gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"),
|
|
file=f,
|
|
)
|
|
for r in all_ranges:
|
|
print(f" {{{r[0]}, {r[1]}, {r[2]}, {r[3]}}},", file=f)
|