581 lines
22 KiB
Python
Executable File
581 lines
22 KiB
Python
Executable File
#!/usr/bin/env python2
|
|
#############################################################################
|
|
##
|
|
## Copyright (C) 2020 The Qt Company Ltd.
|
|
## Contact: https://www.qt.io/licensing/
|
|
##
|
|
## This file is part of the test suite of the Qt Toolkit.
|
|
##
|
|
## $QT_BEGIN_LICENSE:GPL-EXCEPT$
|
|
## Commercial License Usage
|
|
## Licensees holding valid commercial Qt licenses may use this file in
|
|
## accordance with the commercial license agreement provided with the
|
|
## Software or, alternatively, in accordance with the terms contained in
|
|
## a written agreement between you and The Qt Company. For licensing terms
|
|
## and conditions see https://www.qt.io/terms-conditions. For further
|
|
## information use the contact form at https://www.qt.io/contact-us.
|
|
##
|
|
## GNU General Public License Usage
|
|
## Alternatively, this file may be used under the terms of the GNU
|
|
## General Public License version 3 as published by the Free Software
|
|
## Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
|
|
## included in the packaging of this file. Please review the following
|
|
## information to ensure the GNU General Public License requirements will
|
|
## be met: https://www.gnu.org/licenses/gpl-3.0.html.
|
|
##
|
|
## $QT_END_LICENSE$
|
|
##
|
|
#############################################################################
|
|
"""Script to generate C++ code from CLDR data in qLocaleXML form
|
|
|
|
See ``cldr2qlocalexml.py`` for how to generate the qLocaleXML data itself.
|
|
Pass the output file from that as first parameter to this script; pass
|
|
the root of the qtbase check-out as second parameter.
|
|
"""
|
|
|
|
import os
|
|
import datetime
|
|
|
|
from qlocalexml import QLocaleXmlReader
|
|
from xml.dom import minidom
|
|
from localetools import unicode2hex, wrap_list, Error, Transcriber, SourceFileEditor
|
|
|
|
def compareLocaleKeys(key1, key2):
|
|
if key1 == key2:
|
|
return 0
|
|
|
|
if key1[0] != key2[0]: # First sort by language:
|
|
return key1[0] - key2[0]
|
|
|
|
defaults = compareLocaleKeys.default_map
|
|
# maps {(language, script): country} by ID
|
|
try:
|
|
country = defaults[key1[:2]]
|
|
except KeyError:
|
|
pass
|
|
else:
|
|
if key1[2] == country:
|
|
return -1
|
|
if key2[2] == country:
|
|
return 1
|
|
|
|
if key1[1] == key2[1]:
|
|
return key1[2] - key2[2]
|
|
|
|
try:
|
|
country = defaults[key2[:2]]
|
|
except KeyError:
|
|
pass
|
|
else:
|
|
if key2[2] == country:
|
|
return 1
|
|
if key1[2] == country:
|
|
return -1
|
|
|
|
return key1[1] - key2[1]
|
|
|
|
|
|
class StringDataToken:
|
|
def __init__(self, index, length):
|
|
if index > 0xFFFF or length > 0xFFFF:
|
|
raise Error("Position exceeds ushort range: {},{}".format(index, length))
|
|
self.index = index
|
|
self.length = length
|
|
def __str__(self):
|
|
return " {},{} ".format(self.index, self.length)
|
|
|
|
class StringData:
|
|
def __init__(self, name):
|
|
self.data = []
|
|
self.hash = {}
|
|
self.name = name
|
|
|
|
def append(self, s):
|
|
if s in self.hash:
|
|
return self.hash[s]
|
|
|
|
lst = unicode2hex(s)
|
|
index = len(self.data)
|
|
if index > 0xffff:
|
|
raise Error('Data index {} is too big for uint16!'.format(index))
|
|
size = len(lst)
|
|
if size >= 0xffff:
|
|
raise Error('Data is too big ({}) for uint16 size!'.format(size))
|
|
token = None
|
|
try:
|
|
token = StringDataToken(index, size)
|
|
except Error as e:
|
|
e.message += '(on data "{}")'.format(s)
|
|
raise
|
|
self.hash[s] = token
|
|
self.data += lst
|
|
return token
|
|
|
|
def write(self, fd):
|
|
fd.write("\nstatic const ushort {}[] = {{\n".format(self.name))
|
|
fd.write(wrap_list(self.data))
|
|
fd.write("\n};\n")
|
|
|
|
def currencyIsoCodeData(s):
|
|
if s:
|
|
return '{' + ",".join(str(ord(x)) for x in s) + '}'
|
|
return "{0,0,0}"
|
|
|
|
class LocaleSourceEditor (SourceFileEditor):
|
|
__upinit = SourceFileEditor.__init__
|
|
def __init__(self, path, temp, version):
|
|
self.__upinit(path, temp)
|
|
self.writer.write("""
|
|
/*
|
|
This part of the file was generated on {} from the
|
|
Common Locale Data Repository v{}
|
|
|
|
http://www.unicode.org/cldr/
|
|
|
|
Do not edit this section: instead regenerate it using
|
|
cldr2qlocalexml.py and qlocalexml2cpp.py on updated (or
|
|
edited) CLDR data; see qtbase/util/locale_database/.
|
|
*/
|
|
|
|
""".format(datetime.date.today(), version))
|
|
|
|
class LocaleDataWriter (LocaleSourceEditor):
|
|
def likelySubtags(self, likely):
|
|
self.writer.write('static const QLocaleId likely_subtags[] = {\n')
|
|
for had, have, got, give, last in likely:
|
|
self.writer.write(' {{ {:3d}, {:3d}, {:3d} }}'.format(*have))
|
|
self.writer.write(', {{ {:3d}, {:3d}, {:3d} }}'.format(*give))
|
|
self.writer.write(' ' if last else ',')
|
|
self.writer.write(' // {} -> {}\n'.format(had, got))
|
|
self.writer.write('};\n\n')
|
|
|
|
def localeIndex(self, indices):
|
|
self.writer.write('static const quint16 locale_index[] = {\n')
|
|
for pair in indices:
|
|
self.writer.write('{:6d}, // {}\n'.format(*pair))
|
|
self.writer.write(' 0 // trailing 0\n')
|
|
self.writer.write('};\n\n')
|
|
|
|
def localeData(self, locales, names):
|
|
list_pattern_part_data = StringData('list_pattern_part_data')
|
|
date_format_data = StringData('date_format_data')
|
|
time_format_data = StringData('time_format_data')
|
|
days_data = StringData('days_data')
|
|
am_data = StringData('am_data')
|
|
pm_data = StringData('pm_data')
|
|
byte_unit_data = StringData('byte_unit_data')
|
|
currency_symbol_data = StringData('currency_symbol_data')
|
|
currency_display_name_data = StringData('currency_display_name_data')
|
|
currency_format_data = StringData('currency_format_data')
|
|
endonyms_data = StringData('endonyms_data')
|
|
|
|
# Locale data
|
|
self.writer.write('static const QLocaleData locale_data[] = {\n')
|
|
# Table headings: keep each label centred in its field, matching line_format:
|
|
self.writer.write(' // '
|
|
# Width 6 + comma
|
|
' lang ' # IDs
|
|
'script '
|
|
' terr '
|
|
' dec ' # Numeric punctuation
|
|
' group '
|
|
' list ' # Delimiter for *numeric* lists
|
|
' prcnt ' # Arithmetic symbols
|
|
' zero '
|
|
' minus '
|
|
' plus '
|
|
' exp '
|
|
# Width 8 + comma - to make space for these wide labels !
|
|
' quotOpn ' # Quotation marks
|
|
' quotEnd '
|
|
'altQtOpn '
|
|
'altQtEnd '
|
|
# Width 11 + comma
|
|
' lpStart ' # List pattern
|
|
' lpMid '
|
|
' lpEnd '
|
|
' lpTwo '
|
|
' sDtFmt ' # Date format
|
|
' lDtFmt '
|
|
' sTmFmt ' # Time format
|
|
' lTmFmt '
|
|
' ssDays ' # Days
|
|
' slDays '
|
|
' snDays '
|
|
' sDays '
|
|
' lDays '
|
|
' nDays '
|
|
' am ' # am/pm indicators
|
|
' pm '
|
|
# Width 8 + comma
|
|
' byte '
|
|
' siQuant '
|
|
'iecQuant '
|
|
# Width 8+4 + comma
|
|
' currISO '
|
|
# Width 11 + comma
|
|
' currSym ' # Currency formatting
|
|
' currDsply '
|
|
' currFmt '
|
|
' currFmtNeg '
|
|
' endoLang ' # Name of language in itself, and of country
|
|
' endoCntry '
|
|
# Width 6 + comma
|
|
'curDgt ' # Currency number representation
|
|
'curRnd '
|
|
'dow1st ' # First day of week
|
|
' wknd+ ' # Week-end start/end days
|
|
' wknd-'
|
|
# No trailing space on last entry (be sure to
|
|
# pad before adding anything after it).
|
|
'\n')
|
|
|
|
formatLine = ''.join((
|
|
' {{ ',
|
|
# Locale-identifier
|
|
'{:6d},' * 3,
|
|
# Numeric formats, list delimiter
|
|
'{:6d},' * 8,
|
|
# Quotation marks
|
|
'{:8d},' * 4,
|
|
# List patterns, date/time formats, month/day names, am/pm
|
|
'{:>11s},' * 16,
|
|
# SI/IEC byte-unit abbreviations
|
|
'{:>8s},' * 3,
|
|
# Currency ISO code
|
|
' {:>10s}, ',
|
|
# Currency and endonyms
|
|
'{:>11s},' * 6,
|
|
# Currency formatting
|
|
'{:6d},{:6d}',
|
|
# Day of week and week-end
|
|
',{:6d}' * 3,
|
|
' }}')).format
|
|
for key in names:
|
|
locale = locales[key]
|
|
self.writer.write(formatLine(
|
|
key[0], key[1], key[2],
|
|
locale.decimal,
|
|
locale.group,
|
|
locale.listDelim,
|
|
locale.percent,
|
|
locale.zero,
|
|
locale.minus,
|
|
locale.plus,
|
|
locale.exp,
|
|
locale.quotationStart,
|
|
locale.quotationEnd,
|
|
locale.alternateQuotationStart,
|
|
locale.alternateQuotationEnd,
|
|
list_pattern_part_data.append(locale.listPatternPartStart),
|
|
list_pattern_part_data.append(locale.listPatternPartMiddle),
|
|
list_pattern_part_data.append(locale.listPatternPartEnd),
|
|
list_pattern_part_data.append(locale.listPatternPartTwo),
|
|
date_format_data.append(locale.shortDateFormat),
|
|
date_format_data.append(locale.longDateFormat),
|
|
time_format_data.append(locale.shortTimeFormat),
|
|
time_format_data.append(locale.longTimeFormat),
|
|
days_data.append(locale.standaloneShortDays),
|
|
days_data.append(locale.standaloneLongDays),
|
|
days_data.append(locale.standaloneNarrowDays),
|
|
days_data.append(locale.shortDays),
|
|
days_data.append(locale.longDays),
|
|
days_data.append(locale.narrowDays),
|
|
am_data.append(locale.am),
|
|
pm_data.append(locale.pm),
|
|
byte_unit_data.append(locale.byte_unit),
|
|
byte_unit_data.append(locale.byte_si_quantified),
|
|
byte_unit_data.append(locale.byte_iec_quantified),
|
|
currencyIsoCodeData(locale.currencyIsoCode),
|
|
currency_symbol_data.append(locale.currencySymbol),
|
|
currency_display_name_data.append(locale.currencyDisplayName),
|
|
currency_format_data.append(locale.currencyFormat),
|
|
currency_format_data.append(locale.currencyNegativeFormat),
|
|
endonyms_data.append(locale.languageEndonym),
|
|
endonyms_data.append(locale.countryEndonym),
|
|
locale.currencyDigits,
|
|
locale.currencyRounding, # unused (QTBUG-81343)
|
|
locale.firstDayOfWeek,
|
|
locale.weekendStart,
|
|
locale.weekendEnd)
|
|
+ ', // {}/{}/{}\n'.format(
|
|
locale.language, locale.script, locale.country))
|
|
self.writer.write(formatLine(*( # All zeros, matching the format:
|
|
(0,) * (3 + 8 + 4) + ('0,0',) * (16 + 3)
|
|
+ (currencyIsoCodeData(0),)
|
|
+ ('0,0',) * 6 + (0,) * (2 + 3) ))
|
|
+ ' // trailing zeros\n')
|
|
self.writer.write('};\n')
|
|
|
|
# StringData tables:
|
|
for data in (list_pattern_part_data, date_format_data,
|
|
time_format_data, days_data,
|
|
byte_unit_data, am_data, pm_data, currency_symbol_data,
|
|
currency_display_name_data, currency_format_data,
|
|
endonyms_data):
|
|
data.write(self.writer)
|
|
|
|
@staticmethod
|
|
def __writeNameData(out, book, form):
|
|
out('static const char {}_name_list[] =\n'.format(form))
|
|
out('"Default\\0"\n')
|
|
for key, value in book.items():
|
|
if key == 0:
|
|
continue
|
|
out('"' + value[0] + '\\0"\n')
|
|
out(';\n\n')
|
|
|
|
out('static const quint16 {}_name_index[] = {{\n'.format(form))
|
|
out(' 0, // Any{}\n'.format(form.capitalize()))
|
|
index = 8
|
|
for key, value in book.items():
|
|
if key == 0:
|
|
continue
|
|
name = value[0]
|
|
out('{:6d}, // {}\n'.format(index, name))
|
|
index += len(name) + 1
|
|
out('};\n\n')
|
|
|
|
@staticmethod
|
|
def __writeCodeList(out, book, form, width):
|
|
out('static const unsigned char {}_code_list[] =\n'.format(form))
|
|
for key, value in book.items():
|
|
code = value[1]
|
|
code += r'\0' * max(width - len(code), 0)
|
|
out('"{}" // {}\n'.format(code, value[0]))
|
|
out(';\n\n')
|
|
|
|
def languageNames(self, languages):
|
|
self.__writeNameData(self.writer.write, languages, 'language')
|
|
|
|
def scriptNames(self, scripts):
|
|
self.__writeNameData(self.writer.write, scripts, 'script')
|
|
|
|
def countryNames(self, countries):
|
|
self.__writeNameData(self.writer.write, countries, 'country')
|
|
|
|
# TODO: unify these next three into the previous three; kept
|
|
# separate for now to verify we're not changing data.
|
|
|
|
def languageCodes(self, languages):
|
|
self.__writeCodeList(self.writer.write, languages, 'language', 3)
|
|
|
|
def scriptCodes(self, scripts):
|
|
self.__writeCodeList(self.writer.write, scripts, 'script', 4)
|
|
|
|
def countryCodes(self, countries): # TODO: unify with countryNames()
|
|
self.__writeCodeList(self.writer.write, countries, 'country', 3)
|
|
|
|
class CalendarDataWriter (LocaleSourceEditor):
|
|
formatCalendar = ''.join((
|
|
' {{',
|
|
'{:6d}',
|
|
',{:6d}' * 2,
|
|
',{{{:>5s}}}' * 6,
|
|
'}}, ')).format
|
|
def write(self, calendar, locales, names):
|
|
months_data = StringData('months_data')
|
|
|
|
self.writer.write('static const QCalendarLocale locale_data[] = {\n')
|
|
self.writer.write(' // '
|
|
# IDs, width 7 (6 + comma)
|
|
+ ' lang '
|
|
+ ' script'
|
|
+ ' terr '
|
|
# Month-name start-end pairs, width 8 (5 plus '{},'):
|
|
+ ' sShort '
|
|
+ ' sLong '
|
|
+ ' sNarrow'
|
|
+ ' short '
|
|
+ ' long '
|
|
+ ' narrow'
|
|
# No trailing space on last; be sure
|
|
# to pad before adding later entries.
|
|
+ '\n')
|
|
for key in names:
|
|
locale = locales[key]
|
|
self.writer.write(
|
|
self.formatCalendar(
|
|
key[0], key[1], key[2],
|
|
months_data.append(locale.standaloneShortMonths[calendar]),
|
|
months_data.append(locale.standaloneLongMonths[calendar]),
|
|
months_data.append(locale.standaloneNarrowMonths[calendar]),
|
|
months_data.append(locale.shortMonths[calendar]),
|
|
months_data.append(locale.longMonths[calendar]),
|
|
months_data.append(locale.narrowMonths[calendar]))
|
|
+ '// {}/{}/{}\n'.format(locale.language, locale.script, locale.country))
|
|
self.writer.write(self.formatCalendar(*( (0,) * 3 + ('0,0',) * 6 ))
|
|
+ '// trailing zeros\n')
|
|
self.writer.write('};\n')
|
|
months_data.write(self.writer)
|
|
|
|
class LocaleHeaderWriter (SourceFileEditor):
|
|
__upinit = SourceFileEditor.__init__
|
|
def __init__(self, path, temp, dupes):
|
|
self.__upinit(path, temp)
|
|
self.__dupes = dupes
|
|
|
|
def languages(self, languages):
|
|
self.__enum('Language', languages, self.__language)
|
|
self.writer.write('\n')
|
|
|
|
def countries(self, countries):
|
|
self.__enum('Country', countries, self.__country)
|
|
|
|
def scripts(self, scripts):
|
|
self.__enum('Script', scripts, self.__script)
|
|
self.writer.write('\n')
|
|
|
|
# Implementation details
|
|
from enumdata import (language_aliases as __language,
|
|
country_aliases as __country,
|
|
script_aliases as __script)
|
|
|
|
def __enum(self, name, book, alias):
|
|
assert book
|
|
out, dupes = self.writer.write, self.__dupes
|
|
out(' enum {} {{\n'.format(name))
|
|
for key, value in book.items():
|
|
member = value[0]
|
|
if name == 'Script':
|
|
# Don't .capitalize() as some names are already camel-case (see enumdata.py):
|
|
member = ''.join(word[0].upper() + word[1:] for word in member.split())
|
|
if not member.endswith('Script'):
|
|
member += 'Script'
|
|
if member in dupes:
|
|
raise Error('The script name "{}" is messy'.format(member))
|
|
else:
|
|
member = ''.join(member.split())
|
|
member = member + name if member in dupes else member
|
|
out(' {} = {},\n'.format(member, key))
|
|
|
|
out('\n '
|
|
+ ',\n '.join('{} = {}'.format(*pair)
|
|
for pair in sorted(alias.items()))
|
|
+ ',\n\n Last{} = {}\n }};\n'.format(name, member))
|
|
|
|
def usage(name, err, message = ''):
|
|
err.write("""Usage: {} path/to/qlocale.xml root/of/qtbase
|
|
""".format(name)) # TODO: elaborate
|
|
if message:
|
|
err.write('\n' + message + '\n')
|
|
|
|
def main(args, out, err):
|
|
# TODO: Make calendars a command-line parameter
|
|
# map { CLDR name: Qt file name }
|
|
calendars = {'gregorian': 'roman', 'persian': 'jalali', 'islamic': 'hijri',} # 'hebrew': 'hebrew',
|
|
|
|
name = args.pop(0)
|
|
if len(args) != 2:
|
|
usage(name, err, 'I expect two arguments')
|
|
return 1
|
|
|
|
qlocalexml = args.pop(0)
|
|
qtsrcdir = args.pop(0)
|
|
|
|
if not (os.path.isdir(qtsrcdir)
|
|
and all(os.path.isfile(os.path.join(qtsrcdir, 'src', 'corelib', 'text', leaf))
|
|
for leaf in ('qlocale_data_p.h', 'qlocale.h', 'qlocale.qdoc'))):
|
|
usage(name, err, 'Missing expected files under qtbase source root ' + qtsrcdir)
|
|
return 1
|
|
|
|
reader = QLocaleXmlReader(qlocalexml)
|
|
locale_map = dict(reader.loadLocaleMap(calendars, err.write))
|
|
|
|
locale_keys = locale_map.keys()
|
|
compareLocaleKeys.default_map = dict(reader.defaultMap())
|
|
locale_keys.sort(compareLocaleKeys)
|
|
|
|
try:
|
|
writer = LocaleDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text',
|
|
'qlocale_data_p.h'),
|
|
qtsrcdir, reader.cldrVersion)
|
|
except IOError as e:
|
|
err.write('Failed to open files to transcribe locale data: ' + (e.message or e.args[1]))
|
|
return 1
|
|
|
|
try:
|
|
writer.likelySubtags(reader.likelyMap())
|
|
writer.localeIndex(reader.languageIndices(tuple(k[0] for k in locale_map)))
|
|
writer.localeData(locale_map, locale_keys)
|
|
writer.writer.write('\n')
|
|
writer.languageNames(reader.languages)
|
|
writer.scriptNames(reader.scripts)
|
|
writer.countryNames(reader.countries)
|
|
# TODO: merge the next three into the previous three
|
|
writer.languageCodes(reader.languages)
|
|
writer.scriptCodes(reader.scripts)
|
|
writer.countryCodes(reader.countries)
|
|
except Error as e:
|
|
writer.cleanup()
|
|
err.write('\nError updating locale data: ' + e.message + '\n')
|
|
return 1
|
|
|
|
writer.close()
|
|
|
|
# Generate calendar data
|
|
for calendar, stem in calendars.items():
|
|
try:
|
|
writer = CalendarDataWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'time',
|
|
'q{}calendar_data_p.h'.format(stem)),
|
|
qtsrcdir, reader.cldrVersion)
|
|
except IOError as e:
|
|
err.write('Failed to open files to transcribe ' + calendar
|
|
+ ' data ' + (e.message or e.args[1]))
|
|
return 1
|
|
|
|
try:
|
|
writer.write(calendar, locale_map, locale_keys)
|
|
except Error as e:
|
|
writer.cleanup()
|
|
err.write('\nError updating ' + calendar + ' locale data: ' + e.message + '\n')
|
|
return 1
|
|
|
|
writer.close()
|
|
|
|
# qlocale.h
|
|
try:
|
|
writer = LocaleHeaderWriter(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.h'),
|
|
qtsrcdir, reader.dupes)
|
|
except IOError as e:
|
|
err.write('Failed to open files to transcribe qlocale.h: ' + (e.message or e.args[1]))
|
|
return 1
|
|
|
|
try:
|
|
writer.languages(reader.languages)
|
|
writer.scripts(reader.scripts)
|
|
writer.countries(reader.countries)
|
|
except Error as e:
|
|
writer.cleanup()
|
|
err.write('\nError updating qlocale.h: ' + e.message + '\n')
|
|
return 1
|
|
|
|
writer.close()
|
|
|
|
# qlocale.qdoc
|
|
try:
|
|
writer = Transcriber(os.path.join(qtsrcdir, 'src', 'corelib', 'text', 'qlocale.qdoc'),
|
|
qtsrcdir)
|
|
except IOError as e:
|
|
err.write('Failed to open files to transcribe qlocale.qdoc: ' + (e.message or e.args[1]))
|
|
return 1
|
|
|
|
DOCSTRING = " QLocale's data is based on Common Locale Data Repository "
|
|
try:
|
|
for line in writer.reader:
|
|
if DOCSTRING in line:
|
|
writer.writer.write(DOCSTRING + 'v' + reader.cldrVersion + '.\n')
|
|
else:
|
|
writer.writer.write(line)
|
|
except Error as e:
|
|
writer.cleanup()
|
|
err.write('\nError updating qlocale.qdoc: ' + e.message + '\n')
|
|
return 1
|
|
|
|
writer.close()
|
|
return 0
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
sys.exit(main(sys.argv, sys.stdout, sys.stderr))
|