import os
import urllib.request
+import json
block = {}
lists = {}
with urllib.request.urlopen('https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry') as f:
#with open('language-subtag-registry', 'r') as f:
for l in f.readlines():
- if l.strip().decode('utf-8') == "%%":
+ if l.strip() == "%%":
if 'Type' in block:
if block['Type'] in ['language', 'variant', 'region', 'script', 'extlang']:
if not block['Type'] in lists:
print("Unknown type `%s'" % block['Type'])
block = {}
else:
- p = l.strip().decode('utf-8').split(':')
+ p = l.strip().split(':')
if len(p) > 1:
- block[p[0]] = p[1][1:]
+ key = p[0]
+ value = p[1][1:]
+ if key == 'Description' and key in block:
+ block[key] = '/'.join([block[key], value])
+ else:
+ block[key] = value
for k, v in lists.items():
with open(os.path.join('tags', k), 'w') as f:
- print(len(v), file=f)
for e in v:
print(e[0], file=f)
print(e[1], file=f)
+
+with urllib.request.urlopen('https://registry.isdcf.com/languages') as f, open(os.path.join('tags', 'dcnc'), 'w') as g:
+ js = json.loads(f.read())
+ for d in js['data']:
+ if 'dcncTag' in d:
+ print(d['rfc5646Tag'], file=g)
+ print(d['dcncTag'], file=g)