Cleanup: include sorting.
[libdcp.git] / scripts / update-language-subtags
1 #!/usr/bin/python3
2
3 import os
4 import urllib.request
5 import json
6
7 block = {}
8 lists = {}
9
10 with urllib.request.urlopen('https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry') as f:
11 #with open('language-subtag-registry', 'r') as f:
12     for l in f.readlines():
13         if l.strip() == "%%":
14             if 'Type' in block:
15                 if block['Type'] in ['language', 'variant', 'region', 'script', 'extlang']:
16                     if not block['Type'] in lists:
17                         lists[block['Type']] = []
18                     lists[block['Type']].append((block['Subtag'], block['Description']))
19                 elif block['Type'] == 'redundant' or block['Type'] == 'grandfathered':
20                     # We'll ignore these (for now)
21                     pass
22                 else:
23                     print("Unknown type `%s'" % block['Type'])
24             block = {}
25         else:
26             p = l.strip().split(':')
27             if len(p) > 1:
28                 key = p[0]
29                 value = p[1][1:]
30                 if key == 'Description' and key in block:
31                     block[key] = '/'.join([block[key], value])
32                 else:
33                     block[key] = value
34
35 for k, v in lists.items():
36     with open(os.path.join('tags', k), 'w') as f:
37         for e in v:
38             print(e[0], file=f)
39             print(e[1], file=f)
40
41
42 with urllib.request.urlopen('https://registry.isdcf.com/languages') as f, open(os.path.join('tags', 'dcnc'), 'w') as g:
43     js = json.loads(f.read())
44     for d in js['data']:
45         if 'dcncTag' in d:
46             print(d['rfc5646Tag'], file=g)
47             print(d['dcncTag'], file=g)