Add Asset::unset_hash().
[libdcp.git] / scripts / update-language-subtags
index 979d816850eba23b43a95ebeac08ced78e9808d6..eb5d01e84a2edbd6da9173de7f57e50f68517ec4 100755 (executable)
@@ -2,6 +2,7 @@
 
 import os
 import urllib.request
+import json
 
 block = {}
 lists = {}
@@ -9,7 +10,7 @@ lists = {}
 with urllib.request.urlopen('https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry') as f:
 #with open('language-subtag-registry', 'r') as f:
     for l in f.readlines():
-        if l.strip().decode('utf-8') == "%%":
+        if l.strip() == "%%":
             if 'Type' in block:
                 if block['Type'] in ['language', 'variant', 'region', 'script', 'extlang']:
                     if not block['Type'] in lists:
@@ -22,14 +23,25 @@ with urllib.request.urlopen('https://www.iana.org/assignments/language-subtag-re
                     print("Unknown type `%s'" % block['Type'])
             block = {}
         else:
-            p = l.strip().decode('utf-8').split(':')
+            p = l.strip().split(':')
             if len(p) > 1:
-                block[p[0]] = p[1][1:]
+                key = p[0]
+                value = p[1][1:]
+                if key == 'Description' and key in block:
+                    block[key] = '/'.join([block[key], value])
+                else:
+                    block[key] = value
 
 for k, v in lists.items():
     with open(os.path.join('tags', k), 'w') as f:
-        print(len(v), file=f)
         for e in v:
             print(e[0], file=f)
             print(e[1], file=f)
 
+
+with urllib.request.urlopen('https://registry.isdcf.com/languages') as f, open(os.path.join('tags', 'dcnc'), 'w') as g:
+    js = json.loads(f.read())
+    for d in js['data']:
+        if 'dcncTag' in d:
+            print(d['rfc5646Tag'], file=g)
+            print(d['dcncTag'], file=g)