Skip to content

Commit f9da05d

Browse files
committed
Generate messages during CLDR data collection and add timezone CLDR support
1 parent 04257f6 commit f9da05d

File tree

14 files changed

+14110
-1655
lines changed

14 files changed

+14110
-1655
lines changed

Diff for: README.rst

+9-5
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,16 @@ There are no tests yet. Sorry!
2525

2626
CLDR
2727
----
28-
The folder ``_scripts`` contains some useful scripts to update the repository data according to the CLDR. You probably
29-
want to execute the scripts in the following order::
28+
The folder ``_scripts`` contains some useful scripts to update the repository data according to the CLDR. To update
29+
all data, use::
3030

3131
_scripts/datafromcldr.py cldr.zip
32-
django-admin makemessages
33-
_scripts/messagesfromcldr.py cldr.zip
32+
33+
This will generate (or update) two types of files: all ``_cldr_data.py`` files, with dicts containing translatable
34+
strings, and ``cldr.po`` files, that contain the translations of all CLDR strings. You can merge these into the
35+
Django translation files using::
36+
37+
_scripts/mergemessages.py
3438

3539
Translators should note that all CLDR data will be automatically overwritten with translations. In the off chance that
36-
a manual override is required, add a comment with ``manual`` in it.
40+
a manual override is required, add a comment with the string ``manual`` in it.

Diff for: _scripts/_common.py

+65-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,65 @@
1-
COUNTRY_ALTERNATIVE_KEYS = {'HK': 'HK-alt-short', 'MO': 'MO-alt-short', 'PS': 'PS-alt-short'}
1+
import collections
2+
3+
COUNTRY_ALTERNATIVE_KEYS = {'HK': 'HK-alt-short', 'MO': 'MO-alt-short', 'PS': 'PS-alt-short'}
4+
TIMEZONE_TERRITORY_KEYS = {
5+
'Africa': '002',
6+
'America': '019',
7+
'Antarctica': 'AQ',
8+
'Arctic': '001', # world
9+
'Asia': '142',
10+
'Atlantic': '001', # world
11+
'Australia': '009', # oceania
12+
'Etc': '001', # world
13+
'Europe': '150',
14+
'Indian': '001', # world
15+
'Pacific': '001' # world
16+
}
17+
18+
19+
def update(d, u):
20+
"""Method to update a dict recursively, from https://stackoverflow.com/questions/3232943/"""
21+
for k, v in u.iteritems():
22+
if isinstance(v, collections.Mapping):
23+
r = update(d.get(k, {}), v)
24+
d[k] = r
25+
else:
26+
d[k] = u[k]
27+
return d
28+
29+
30+
def get_from_path(d, path):
31+
if not path:
32+
return d
33+
else:
34+
return get_from_path(d[path[0]], path[1:])
35+
36+
37+
def _get_tz_info(region_path, rest):
38+
"""Method that recursively diggs through the timezones variable looking for examplarCity."""
39+
40+
if 'exemplarCity' in rest:
41+
city = rest['exemplarCity']
42+
return [(region_path, city)]
43+
else:
44+
result = []
45+
for path, region in sorted(rest.items()):
46+
result.extend(_get_tz_info(region_path + [path], region))
47+
return result
48+
49+
50+
def get_tz_info(timezones):
51+
result = {}
52+
for region_name, rest in timezones.items():
53+
for region_path, city in _get_tz_info([region_name], rest):
54+
result['/'.join(region_path)] = (region_name, city)
55+
return result
56+
57+
58+
def get_language(lc):
59+
if lc == 'zh-cn':
60+
cldr_lc = 'zh-Hans-CN'
61+
elif lc == 'zh-tw':
62+
cldr_lc = 'zh-Hant-TW'
63+
else:
64+
cldr_lc = lc[0:3] + lc[3:].upper().replace("LATN", "Latn")
65+
return cldr_lc

Diff for: _scripts/datafromcldr.py

+147-46
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,198 @@
1-
from __future__ import absolute_import
21
import argparse
32
import json
43
import os
54
import zipfile
65
import django
76
from django.conf import settings
87
from django.core.management.base import BaseCommand, CommandError
9-
from _common import COUNTRY_ALTERNATIVE_KEYS
8+
from django.utils import translation
9+
import polib
1010

11+
from _common import COUNTRY_ALTERNATIVE_KEYS, get_tz_info, update, get_from_path, TIMEZONE_TERRITORY_KEYS, get_language
1112

1213
# This is almost a management command, but we do not want it to be added to the django-admin namespace for the simple
1314
# reason that it is not expected to be executed by package users, only by the package maintainers.
1415
# We use a thin __main__ wrapper to make it work (ish) like a management command.
1516

17+
MODULE_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'internationalflavor')
18+
LOCALE_PATH = os.path.join(MODULE_PATH, 'locale')
19+
LANGUAGES = {}
20+
21+
22+
def translate(language, original, translated):
23+
entry = polib.POEntry()
24+
entry.msgid = original
25+
entry.msgstr = translated
26+
entry.comment = "auto-generated from CLDR -- see docs before updating"
27+
LANGUAGES[language].append(entry)
28+
1629

1730
class Command(BaseCommand):
1831
help = ('Updates localized data of the internationalflavor module using data from the Unicode '
1932
'Common Locale Data Repository (CLDR)')
2033

2134
def handle(self, *args, **options):
22-
MODULE_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'internationalflavor')
35+
translation.deactivate_all()
2336

2437
try:
2538
# Unzip the files
2639
self.stdout.write("Reading CLDR from %s" % os.path.abspath(options['path_to_cldr']))
2740

2841
with zipfile.ZipFile(options['path_to_cldr']) as cldr_zip:
29-
tdata = json.loads(cldr_zip.read(os.path.join("main", "en", "territories.json")).decode("utf8"))
30-
territories = tdata['main']['en']['localeDisplayNames']['territories']
31-
32-
tzdata = json.loads(cldr_zip.read(os.path.join("main", "en", "timeZoneNames.json")).decode("utf8"))
33-
timezones = tzdata['main']['en']['dates']['timeZoneNames']
34-
42+
# Load base data
43+
data = json.loads(cldr_zip.read(os.path.join("main", "en", "territories.json")).decode("utf8"))
44+
update(data, json.loads(cldr_zip.read(os.path.join("main", "en", "timeZoneNames.json")).decode("utf8")))
45+
update(data, json.loads(cldr_zip.read(os.path.join("supplemental", "metaZones.json")).decode("utf8")))
46+
47+
# Load localized data
48+
for lc, language in settings.LANGUAGES:
49+
# Do not load data for languages that are not in my locale directory
50+
if not os.path.exists(os.path.join(LOCALE_PATH, lc)):
51+
continue
52+
53+
# Always create a PO file for languages that are in my locale directory
54+
LANGUAGES[lc] = polib.POFile()
55+
try:
56+
cldr_lc = get_language(lc)
57+
update(data, json.loads(cldr_zip.read(os.path.join("main", cldr_lc, "territories.json")).decode("utf8")))
58+
update(data, json.loads(cldr_zip.read(os.path.join("main", cldr_lc, "timeZoneNames.json")).decode("utf8")))
59+
except Exception as e:
60+
self.stderr.write("Language %s will not be translated: %s" % (language, e))
61+
62+
# Handle territories
63+
# ------------------
64+
self.stdout.write("Parsing territory information")
65+
territories = data['main']['en']['localeDisplayNames']['territories']
66+
67+
# Write territory info to a file
3568
with open(os.path.join(MODULE_PATH, "countries", "_cldr_data.py"), 'w') as f:
3669
f.write(b"# coding=utf-8\n")
3770
f.write(b"# This file is automatically generated based on the English CLDR file.\n")
3871
f.write(b"# Do not edit manually.\n\n")
3972
f.write(b"from __future__ import unicode_literals\n")
40-
f.write(b"from django.utils.translation import ugettext_lazy as _\n\n")
73+
f.write(b"from django.utils.translation import ugettext_lazy as _ # \n\n")
74+
4175
f.write(b"COUNTRY_NAMES = {\n")
76+
# Loop over each territory
4277
for territory, name in sorted(territories.items()):
43-
if len(territory) == 2:
44-
if territory in COUNTRY_ALTERNATIVE_KEYS:
45-
name = territories[COUNTRY_ALTERNATIVE_KEYS[territory]]
46-
f.write(b' "%s": _("%s"),\n' % (territory.encode('utf8'),
47-
name.encode('utf8')))
78+
# Skip territories that are alternative names
79+
if len(territory) > 2 and not territory.isdigit():
80+
continue
81+
# If we want the alternative name, we get one
82+
if territory in COUNTRY_ALTERNATIVE_KEYS:
83+
name = territories[COUNTRY_ALTERNATIVE_KEYS[territory]]
84+
f.write(b' "%s": _("%s"),\n' % (territory.encode('utf8'),
85+
name.encode('utf8')))
86+
87+
# Handle translations
88+
for lc in LANGUAGES:
89+
cldr_lc = get_language(lc)
90+
if cldr_lc in data['main']:
91+
ldata = data['main'][cldr_lc]['localeDisplayNames']['territories']
92+
# We check if the alternative name has some useful translation
93+
if territory in COUNTRY_ALTERNATIVE_KEYS and \
94+
COUNTRY_ALTERNATIVE_KEYS[territory] in ldata and \
95+
ldata[COUNTRY_ALTERNATIVE_KEYS[territory]] != territory:
96+
translate(lc, name, ldata[COUNTRY_ALTERNATIVE_KEYS[territory]])
97+
# If no alternative name, we get a useful translation from the actual name
98+
elif territory in ldata and ldata[territory] != territory:
99+
translate(lc, name, ldata[territory])
100+
# Else just do not translate
101+
else:
102+
translate(lc, name, '')
103+
else:
104+
translate(lc, name, '')
105+
48106
f.write(b"}\n")
49107

108+
#
109+
# Handle timezones and metazones
110+
# ------------------------------
111+
self.stdout.write("Parsing timezone information")
112+
113+
timezones = get_tz_info(data['main']['en']['dates']['timeZoneNames']['zone'])
114+
metazones = data['main']['en']['dates']['timeZoneNames']['metazone']
115+
metazone_info = data['supplemental']['metaZones']['metazoneInfo']['timezone']
116+
50117
with open(os.path.join(MODULE_PATH, "timezone", "_cldr_data.py"), 'w') as f:
51118
f.write(b"# coding=utf-8\n")
52119
f.write(b"# This file is automatically generated based on the English CLDR file.\n")
53120
f.write(b"# Do not edit manually.\n\n")
54121
f.write(b"from __future__ import unicode_literals\n")
55122
f.write(b"from django.utils.translation import ugettext_lazy as _\n\n")
56-
f.write(b"TIMEZONE_NAMES = {\n")
57123

58-
# We want to be able to recurse this.
59-
def write_tz_info(region_name, region_path, rest):
60-
if 'exemplarCity' in rest:
61-
city = rest['exemplarCity']
62-
63-
# Parse the time zone name
64-
if 'long' in rest and 'generic' in rest['long']:
65-
tz_name = rest['long']['generic']
66-
elif region_path == [u'Etc', u'GMT']:
67-
tz_name = timezones['gmtZeroFormat']
68-
elif region_path[0] == u'Etc' and region_path[1].startswith(u'GMT'):
69-
if region_path[1].startswith(u"GMT-"):
70-
tz_name = timezones['hourFormat'].split(';')[1]
71-
tz_name = tz_name.replace('HH', region_path[1][4:].zfill(2))
72-
else:
73-
tz_name = timezones['hourFormat'].split(';')[0]
74-
tz_name = tz_name.replace('HH', region_path[1][3:].zfill(2))
75-
tz_name = tz_name.replace('mm', '00')
76-
tz_name = timezones['gmtFormat'].format(tz_name)
77-
else:
78-
tz_name = timezones['regionFormat'].format(city)
124+
f.write(b"METAZONE_MAPPING = {\n")
125+
# We build a map of timezones to metazones
126+
for region_path in timezones:
127+
# Get the metazone info
128+
mzone = None
129+
try:
130+
# The mzone that has no end date is valid
131+
mzoneinfo = get_from_path(metazone_info, region_path)
132+
for u in mzoneinfo:
133+
if '_to' not in u['usesMetazone']:
134+
mzone = u['usesMetazone']['_mzone']
135+
except KeyError:
136+
pass
137+
138+
f.write(b' "%s": %s,\n' % (region_path.encode('utf8'),
139+
mzone and '"%s"' % mzone.encode('utf8')))
140+
f.write(b"}\n")
141+
f.write(b"TIMEZONE_NAMES = {\n")
142+
# We now loop over all timezone names
143+
for region_path, i in timezones.items():
144+
region_name, city = i
145+
# Get the translated region name
146+
if region_name in TIMEZONE_TERRITORY_KEYS:
147+
region_name = territories[TIMEZONE_TERRITORY_KEYS[region_name]]
148+
149+
f.write(b' "%s": (_("%s"), _("%s")),\n' % (region_path.encode('utf8'),
150+
region_name.encode('utf8'),
151+
city.encode('utf8')))
152+
153+
# Handle translations, quite simply: we either get the translation, or we don't.
154+
# Any translation will suffice here.
155+
for lc in LANGUAGES:
156+
cldr_lc = get_language(lc)
157+
try:
158+
translate(lc, city, get_from_path(data['main'][cldr_lc]['dates']['timeZoneNames']['zone'],
159+
region_path.split('/'))['exemplarCity'])
160+
except KeyError:
161+
translate(lc, city, '')
79162

80-
f.write(b' "%s": (_("%s"), _("%s"), _("%s")),\n' % ("/".join(region_path).encode('utf8'),
81-
region_name.encode('utf8'),
82-
city.encode('utf8'),
83-
tz_name.encode('utf8')))
163+
f.write(b"}\n")
164+
f.write(b"METAZONE_NAMES = {\n")
165+
# The metazone names are also easy
166+
for metazone, info in sorted(metazones.items()):
167+
# We get the generic name, or if there's none, the standard name
168+
if 'generic' in info['long']:
169+
name = info['long']['generic']
84170
else:
85-
for path, region in sorted(rest.items()):
86-
write_tz_info(region_name, region_path + [path], region)
87-
88-
for zone, rest in sorted(timezones['zone'].items()):
89-
write_tz_info(zone, [zone], rest)
171+
name = info['long']['standard']
172+
173+
f.write(b' "%s": _("%s"),\n' % (metazone.encode('utf8'),
174+
name.encode('utf8')))
175+
176+
# Handle translations, any name will suffice.
177+
for lc in LANGUAGES:
178+
cldr_lc = get_language(lc)
179+
try:
180+
ldata = data['main'][cldr_lc]['dates']['timeZoneNames']['metazone']
181+
if 'generic' in ldata[metazone]['long']:
182+
translate(lc, name, ldata[metazone]['long']['generic'])
183+
else:
184+
translate(lc, name, ldata[metazone]['long']['standard'])
185+
except KeyError:
186+
translate(lc, name, '')
90187
f.write(b"}\n")
91188

92189
except OSError as e:
93190
raise CommandError("Error while reading zip file: %s" % e)
94191

192+
self.stdout.write("Writing CLDR language file")
193+
for lc, pofile in LANGUAGES.items():
194+
pofile.save(os.path.join(LOCALE_PATH, lc, 'LC_MESSAGES', 'cldr.po'))
195+
95196

96197
if __name__ == '__main__':
97198
settings.configure()

0 commit comments

Comments
 (0)