summaryrefslogtreecommitdiff
path: root/src/iso6937.py
diff options
context:
space:
mode:
authorCarl Hetherington <cth@carlh.net>2014-05-28 11:50:15 +0100
committerCarl Hetherington <cth@carlh.net>2014-05-28 11:50:15 +0100
commitcd0958260b54af3514ac12b936bb76ca6de6978b (patch)
treea06591bfc901d950290e6c3ba2aad1dcf123921e /src/iso6937.py
parent14fa24a1aabda7925f4842ab7f7e13c1798182a4 (diff)
More STL binary reading stuff.
Diffstat (limited to 'src/iso6937.py')
-rw-r--r--src/iso6937.py227
1 files changed, 227 insertions, 0 deletions
diff --git a/src/iso6937.py b/src/iso6937.py
new file mode 100644
index 0000000..4719b07
--- /dev/null
+++ b/src/iso6937.py
@@ -0,0 +1,227 @@
+import urllib2
+import sys
+
+DATA = 'http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt'
+OUTPUT = 'src/iso6937_tables'
+
+data = urllib2.urlopen(DATA).read()
+# data = open('UnicodeData.txt').read()
+output_c = open(OUTPUT + '.cc', 'w')
+output_h = open(OUTPUT + '.h', 'w')
+
+def find_unicode(n):
+ for line in iter(data.splitlines()):
+ s = line.split(';')
+ if s[1] == n:
+ return s[0]
+
+ print 'Could not find %s' % n
+ sys.exit(1)
+
+def setup(output_diacritical_name):
+ print>>output_c,'map<char, wchar_t> sub::iso6937::%s;' % output_diacritical_name
+
+def fill(unicode_diacritical_name, output_diacritical_name, letters):
+
+ print>>output_h,'extern std::map<char, wchar_t> %s;' % output_diacritical_name
+
+ for letter in letters:
+ if letter.isupper():
+ case = 'CAPITAL'
+ else:
+ case = 'SMALL'
+
+ unicode_name = 'LATIN %s LETTER %s WITH %s' % (case, letter.upper(), unicode_diacritical_name)
+ print>>output_c,"\t%s['%s'] = 0x%s;" % (output_diacritical_name, letter, find_unicode(unicode_name))
+
+ print>>output_c,""
+
+print>>output_c,"""/*
+ Copyright (C) 2014 Carl Hetherington <cth@carlh.net>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+/* THIS FILE WAS AUTO-GENERATED BY iso6937.py */
+
+#include <map>
+#include "iso6937_tables.h"
+
+using std::map;
+"""
+
+print>>output_h,"""/*
+ Copyright (C) 2014 Carl Hetherington <cth@carlh.net>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+/* THIS FILE WAS AUTO-GENERATED BY iso6937.py */
+
+#include <map>
+
+namespace sub {
+
+extern void make_iso6937_tables ();
+
+namespace iso6937 {
+"""
+
+groups = [
+ ('GRAVE', 'grave', 'AEIOUaeiou'),
+ ('ACUTE', 'acute', 'ACEILNORSUYZacegilnorsuyz'),
+ ('CIRCUMFLEX', 'circumflex', 'ACEGHIJOSUWYaceghijosuwy'),
+ ('TILDE', 'tilde', 'AINOUainou'),
+ ('MACRON', 'macron', 'AEIOUaeiou'),
+ ('BREVE', 'breve', 'AGUagu'),
+ ('DOT ABOVE', 'dot', 'CEGIZcegz'),
+ ('DIAERESIS', 'diaeresis', 'AEIOUYaeiouy'),
+ ('RING ABOVE', 'ring', 'AUau'),
+ ('CEDILLA', 'cedilla', 'CGKLNRSTcklnrst'),
+ ('DOUBLE ACUTE', 'double_acute', 'OUou'),
+ ('OGONEK', 'ogonek', 'AEIUaeui'),
+ ('CARON', 'caron', 'CDELNRSTZcdelnrstz')
+]
+
+for g in groups:
+ setup(g[1])
+
+print>>output_c,"map<char, wchar_t> sub::iso6937::main;"
+print>>output_h,"extern std::map<char, wchar_t> main;"
+
+print>>output_c,"""
+void
+sub::make_iso6937_tables ()
+{
+\tusing namespace sub::iso6937;
+"""
+
+for g in groups:
+ fill(g[0], g[1], g[2])
+
+print>>output_c,"\tmain[10] = 0x000A;"
+
+for i in range(32, 127):
+ if chr(i) == "'" or chr(i) == "\\":
+ print>>output_c,"\tmain['\\%s'] = 0x00%x;" % (chr(i), i)
+ else:
+ print>>output_c,"\tmain['%s'] = 0x00%x;" % (chr(i), i)
+
+# From Wikipedia
+# http://en.wikipedia.org/wiki/ISO/IEC_6937
+print>>output_c,"\tmain[161] = 0x00A1;"
+print>>output_c,"\tmain[162] = 0x00A2;"
+print>>output_c,"\tmain[163] = 0x00A3;"
+print>>output_c,"\tmain[165] = 0x00A5;"
+print>>output_c,"\tmain[167] = 0x00A7;"
+print>>output_c,"\tmain[168] = 0x00A4;"
+print>>output_c,"\tmain[169] = 0x2018;"
+print>>output_c,"\tmain[170] = 0x201C;"
+print>>output_c,"\tmain[171] = 0x00AB;"
+print>>output_c,"\tmain[172] = 0x2190;"
+print>>output_c,"\tmain[173] = 0x2191;"
+print>>output_c,"\tmain[174] = 0x2192;"
+print>>output_c,"\tmain[175] = 0x2193;"
+print>>output_c,"\tmain[176] = 0x00B0;"
+print>>output_c,"\tmain[177] = 0x00B1;"
+print>>output_c,"\tmain[178] = 0x00B2;"
+print>>output_c,"\tmain[179] = 0x00B3;"
+print>>output_c,"\tmain[180] = 0x00D7;"
+print>>output_c,"\tmain[181] = 0x00B5;"
+print>>output_c,"\tmain[182] = 0x00B6;"
+print>>output_c,"\tmain[183] = 0x00B7;"
+print>>output_c,"\tmain[184] = 0x00F7;"
+print>>output_c,"\tmain[185] = 0x2019;"
+print>>output_c,"\tmain[186] = 0x201D;"
+print>>output_c,"\tmain[187] = 0x00BB;"
+print>>output_c,"\tmain[188] = 0x00BC;"
+print>>output_c,"\tmain[189] = 0x00BD;"
+print>>output_c,"\tmain[190] = 0x00BE;"
+print>>output_c,"\tmain[191] = 0x00BF;"
+print>>output_c,"\tmain[193] = 0x0300;"
+print>>output_c,"\tmain[194] = 0x0301;"
+print>>output_c,"\tmain[195] = 0x0302;"
+print>>output_c,"\tmain[196] = 0x0303;"
+print>>output_c,"\tmain[197] = 0x0304;"
+print>>output_c,"\tmain[198] = 0x0306;"
+print>>output_c,"\tmain[199] = 0x0307;"
+print>>output_c,"\tmain[200] = 0x0308;"
+print>>output_c,"\tmain[202] = 0x030A;"
+print>>output_c,"\tmain[203] = 0x0327;"
+print>>output_c,"\tmain[205] = 0x030B;"
+print>>output_c,"\tmain[206] = 0x032B;"
+print>>output_c,"\tmain[207] = 0x030C;"
+print>>output_c,"\tmain[208] = 0x2015;"
+print>>output_c,"\tmain[209] = 0x00B9;"
+print>>output_c,"\tmain[210] = 0x00AE;"
+print>>output_c,"\tmain[211] = 0x00A9;"
+print>>output_c,"\tmain[212] = 0x2122;"
+print>>output_c,"\tmain[213] = 0x266A;"
+print>>output_c,"\tmain[214] = 0x00AC;"
+print>>output_c,"\tmain[215] = 0x00A6;"
+print>>output_c,"\tmain[220] = 0x215B;"
+print>>output_c,"\tmain[221] = 0x215C;"
+print>>output_c,"\tmain[222] = 0x215D;"
+print>>output_c,"\tmain[223] = 0x215E;"
+print>>output_c,"\tmain[224] = 0x2126;"
+print>>output_c,"\tmain[225] = 0x00C6;"
+print>>output_c,"\tmain[226] = 0x0110;"
+print>>output_c,"\tmain[227] = 0x00AA;"
+print>>output_c,"\tmain[228] = 0x0126;"
+print>>output_c,"\tmain[230] = 0x0132;"
+print>>output_c,"\tmain[231] = 0x013F;"
+print>>output_c,"\tmain[232] = 0x0141;"
+print>>output_c,"\tmain[233] = 0x00D8;"
+print>>output_c,"\tmain[234] = 0x0152;"
+print>>output_c,"\tmain[235] = 0x00BA;"
+print>>output_c,"\tmain[236] = 0x00DE;"
+print>>output_c,"\tmain[237] = 0x0166;"
+print>>output_c,"\tmain[238] = 0x014A;"
+print>>output_c,"\tmain[239] = 0x0149;"
+print>>output_c,"\tmain[240] = 0x0138;"
+print>>output_c,"\tmain[241] = 0x00E6;"
+print>>output_c,"\tmain[242] = 0x0111;"
+print>>output_c,"\tmain[243] = 0x00F0;"
+print>>output_c,"\tmain[244] = 0x0127;"
+print>>output_c,"\tmain[245] = 0x0131;"
+print>>output_c,"\tmain[246] = 0x0133;"
+print>>output_c,"\tmain[247] = 0x0140;"
+print>>output_c,"\tmain[248] = 0x0142;"
+print>>output_c,"\tmain[249] = 0x00F8;"
+print>>output_c,"\tmain[250] = 0x0153;"
+print>>output_c,"\tmain[251] = 0x00DF;"
+print>>output_c,"\tmain[252] = 0x00FE;"
+print>>output_c,"\tmain[253] = 0x0167;"
+print>>output_c,"\tmain[254] = 0x014B;"
+print>>output_c,"\tmain[255] = 0x00AD;"
+
+print>>output_c,"}"
+print>>output_h,""
+print>>output_h,"}"
+print>>output_h,"}"