summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorCarl Hetherington <cth@carlh.net>2020-08-23 22:06:29 +0200
committerCarl Hetherington <cth@carlh.net>2020-09-20 19:30:28 +0200
commite18addc9029f56c67aa40254bcfa40f8b072866f (patch)
tree09b66ee63e20166607c316e13bd2e75635bef02b /scripts
parent1ca864c987c704e66486027570cf689f9508c303 (diff)
Add new LanguageTag class.
The scripts/update-language-subtags script will download the RFC 5646 registry and write it to src/language_tag_lists.cc. This may need to be re-run if the subtags change.
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/update-language-subtags38
1 files changed, 38 insertions, 0 deletions
diff --git a/scripts/update-language-subtags b/scripts/update-language-subtags
new file mode 100755
index 00000000..e533466d
--- /dev/null
+++ b/scripts/update-language-subtags
@@ -0,0 +1,38 @@
+#!/usr/bin/python3
+
+import urllib.request
+
+block = {}
+lists = {}
+
+with urllib.request.urlopen('https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry') as f:
+#with open('language-subtag-registry', 'r') as f:
+ for l in f.readlines():
+ if l.strip().decode('utf-8') == "%%":
+ if 'Type' in block:
+ if block['Type'] in ['language', 'variant', 'region', 'script', 'extlang']:
+ if not block['Type'] in lists:
+ lists[block['Type']] = []
+ lists[block['Type']].append((block['Subtag'], block['Description']))
+ elif block['Type'] == 'redundant' or block['Type'] == 'grandfathered':
+ # We'll ignore these (for now)
+ pass
+ else:
+ print("Unknown type `%s'" % block['Type'])
+ block = {}
+ else:
+ p = l.strip().decode('utf-8').split(':')
+ if len(p) > 1:
+ block[p[0]] = p[1][1:]
+
+def escape(s):
+ return s.replace('"', '\\"')
+
+with open('src/language_tag_lists.cc', 'w') as f:
+ for k, v in lists.items():
+ print("static LanguageTag::SubtagData %s_list[] = {" % k, file=f)
+ for e in v:
+ print('\t{ "%s", "%s" },' % (escape(e[0]), escape(e[1])), file=f)
+ print("};", file=f)
+ print("", file=f)
+