scripts/update-language-subtags


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

#!/usr/bin/python3

import os
import urllib.request
import json

block = {}
lists = {}

with urllib.request.urlopen('https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry') as f:
#with open('language-subtag-registry', 'r') as f:
    for l in f.readlines():
        if l.strip() == "%%":
            if 'Type' in block:
                if block['Type'] in ['language', 'variant', 'region', 'script', 'extlang']:
                    if not block['Type'] in lists:
                        lists[block['Type']] = []
                    lists[block['Type']].append((block['Subtag'], block['Description']))
                elif block['Type'] == 'redundant' or block['Type'] == 'grandfathered':
                    # We'll ignore these (for now)
                    pass
                else:
                    print("Unknown type `%s'" % block['Type'])
            block = {}
        else:
            p = l.strip().split(':')
            if len(p) > 1:
                key = p[0]
                value = p[1][1:]
                if key == 'Description' and key in block:
                    block[key] = '/'.join([block[key], value])
                else:
                    block[key] = value

for k, v in lists.items():
    with open(os.path.join('tags', k), 'w') as f:
        for e in v:
            print(e[0], file=f)
            print(e[1], file=f)


with urllib.request.urlopen('https://registry.isdcf.com/languages') as f, open(os.path.join('tags', 'dcnc'), 'w') as g:
    js = json.loads(f.read())
    for d in js['data']:
        if 'dcncTag' in d:
            print(d['rfc5646Tag'], file=g)
            print(d['dcncTag'], file=g)