diff options
| author | Carl Hetherington <cth@carlh.net> | 2014-05-28 11:50:15 +0100 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2014-05-28 11:50:15 +0100 |
| commit | cd0958260b54af3514ac12b936bb76ca6de6978b (patch) | |
| tree | a06591bfc901d950290e6c3ba2aad1dcf123921e /src/iso6937.py | |
| parent | 14fa24a1aabda7925f4842ab7f7e13c1798182a4 (diff) | |
More STL binary reading stuff.
Diffstat (limited to 'src/iso6937.py')
| -rw-r--r-- | src/iso6937.py | 227 |
1 files changed, 227 insertions, 0 deletions
diff --git a/src/iso6937.py b/src/iso6937.py new file mode 100644 index 0000000..4719b07 --- /dev/null +++ b/src/iso6937.py @@ -0,0 +1,227 @@ +import urllib2 +import sys + +DATA = 'http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt' +OUTPUT = 'src/iso6937_tables' + +data = urllib2.urlopen(DATA).read() +# data = open('UnicodeData.txt').read() +output_c = open(OUTPUT + '.cc', 'w') +output_h = open(OUTPUT + '.h', 'w') + +def find_unicode(n): + for line in iter(data.splitlines()): + s = line.split(';') + if s[1] == n: + return s[0] + + print 'Could not find %s' % n + sys.exit(1) + +def setup(output_diacritical_name): + print>>output_c,'map<char, wchar_t> sub::iso6937::%s;' % output_diacritical_name + +def fill(unicode_diacritical_name, output_diacritical_name, letters): + + print>>output_h,'extern std::map<char, wchar_t> %s;' % output_diacritical_name + + for letter in letters: + if letter.isupper(): + case = 'CAPITAL' + else: + case = 'SMALL' + + unicode_name = 'LATIN %s LETTER %s WITH %s' % (case, letter.upper(), unicode_diacritical_name) + print>>output_c,"\t%s['%s'] = 0x%s;" % (output_diacritical_name, letter, find_unicode(unicode_name)) + + print>>output_c,"" + +print>>output_c,"""/* + Copyright (C) 2014 Carl Hetherington <cth@carlh.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +/* THIS FILE WAS AUTO-GENERATED BY iso6937.py */ + +#include <map> +#include "iso6937_tables.h" + +using std::map; +""" + +print>>output_h,"""/* + Copyright (C) 2014 Carl Hetherington <cth@carlh.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +/* THIS FILE WAS AUTO-GENERATED BY iso6937.py */ + +#include <map> + +namespace sub { + +extern void make_iso6937_tables (); + +namespace iso6937 { +""" + +groups = [ + ('GRAVE', 'grave', 'AEIOUaeiou'), + ('ACUTE', 'acute', 'ACEILNORSUYZacegilnorsuyz'), + ('CIRCUMFLEX', 'circumflex', 'ACEGHIJOSUWYaceghijosuwy'), + ('TILDE', 'tilde', 'AINOUainou'), + ('MACRON', 'macron', 'AEIOUaeiou'), + ('BREVE', 'breve', 'AGUagu'), + ('DOT ABOVE', 'dot', 'CEGIZcegz'), + ('DIAERESIS', 'diaeresis', 'AEIOUYaeiouy'), + ('RING ABOVE', 'ring', 'AUau'), + ('CEDILLA', 'cedilla', 'CGKLNRSTcklnrst'), + ('DOUBLE ACUTE', 'double_acute', 'OUou'), + ('OGONEK', 'ogonek', 'AEIUaeui'), + ('CARON', 'caron', 'CDELNRSTZcdelnrstz') +] + +for g in groups: + setup(g[1]) + +print>>output_c,"map<char, wchar_t> sub::iso6937::main;" +print>>output_h,"extern std::map<char, wchar_t> main;" + +print>>output_c,""" +void +sub::make_iso6937_tables () +{ +\tusing namespace sub::iso6937; +""" + +for g in groups: + fill(g[0], g[1], g[2]) + +print>>output_c,"\tmain[10] = 0x000A;" + +for i in range(32, 127): + if chr(i) == "'" or chr(i) == "\\": + print>>output_c,"\tmain['\\%s'] = 0x00%x;" % (chr(i), i) + else: + print>>output_c,"\tmain['%s'] = 0x00%x;" % (chr(i), i) + +# From Wikipedia +# http://en.wikipedia.org/wiki/ISO/IEC_6937 +print>>output_c,"\tmain[161] = 0x00A1;" +print>>output_c,"\tmain[162] = 0x00A2;" +print>>output_c,"\tmain[163] = 0x00A3;" +print>>output_c,"\tmain[165] = 0x00A5;" +print>>output_c,"\tmain[167] = 0x00A7;" +print>>output_c,"\tmain[168] = 0x00A4;" +print>>output_c,"\tmain[169] = 0x2018;" +print>>output_c,"\tmain[170] = 0x201C;" +print>>output_c,"\tmain[171] = 0x00AB;" +print>>output_c,"\tmain[172] = 0x2190;" +print>>output_c,"\tmain[173] = 0x2191;" +print>>output_c,"\tmain[174] = 0x2192;" +print>>output_c,"\tmain[175] = 0x2193;" +print>>output_c,"\tmain[176] = 0x00B0;" +print>>output_c,"\tmain[177] = 0x00B1;" +print>>output_c,"\tmain[178] = 0x00B2;" +print>>output_c,"\tmain[179] = 0x00B3;" +print>>output_c,"\tmain[180] = 0x00D7;" +print>>output_c,"\tmain[181] = 0x00B5;" +print>>output_c,"\tmain[182] = 0x00B6;" +print>>output_c,"\tmain[183] = 0x00B7;" +print>>output_c,"\tmain[184] = 0x00F7;" +print>>output_c,"\tmain[185] = 0x2019;" +print>>output_c,"\tmain[186] = 0x201D;" +print>>output_c,"\tmain[187] = 0x00BB;" +print>>output_c,"\tmain[188] = 0x00BC;" +print>>output_c,"\tmain[189] = 0x00BD;" +print>>output_c,"\tmain[190] = 0x00BE;" +print>>output_c,"\tmain[191] = 0x00BF;" +print>>output_c,"\tmain[193] = 0x0300;" +print>>output_c,"\tmain[194] = 0x0301;" +print>>output_c,"\tmain[195] = 0x0302;" +print>>output_c,"\tmain[196] = 0x0303;" +print>>output_c,"\tmain[197] = 0x0304;" +print>>output_c,"\tmain[198] = 0x0306;" +print>>output_c,"\tmain[199] = 0x0307;" +print>>output_c,"\tmain[200] = 0x0308;" +print>>output_c,"\tmain[202] = 0x030A;" +print>>output_c,"\tmain[203] = 0x0327;" +print>>output_c,"\tmain[205] = 0x030B;" +print>>output_c,"\tmain[206] = 0x032B;" +print>>output_c,"\tmain[207] = 0x030C;" +print>>output_c,"\tmain[208] = 0x2015;" +print>>output_c,"\tmain[209] = 0x00B9;" +print>>output_c,"\tmain[210] = 0x00AE;" +print>>output_c,"\tmain[211] = 0x00A9;" +print>>output_c,"\tmain[212] = 0x2122;" +print>>output_c,"\tmain[213] = 0x266A;" +print>>output_c,"\tmain[214] = 0x00AC;" +print>>output_c,"\tmain[215] = 0x00A6;" +print>>output_c,"\tmain[220] = 0x215B;" +print>>output_c,"\tmain[221] = 0x215C;" +print>>output_c,"\tmain[222] = 0x215D;" +print>>output_c,"\tmain[223] = 0x215E;" +print>>output_c,"\tmain[224] = 0x2126;" +print>>output_c,"\tmain[225] = 0x00C6;" +print>>output_c,"\tmain[226] = 0x0110;" +print>>output_c,"\tmain[227] = 0x00AA;" +print>>output_c,"\tmain[228] = 0x0126;" +print>>output_c,"\tmain[230] = 0x0132;" +print>>output_c,"\tmain[231] = 0x013F;" +print>>output_c,"\tmain[232] = 0x0141;" +print>>output_c,"\tmain[233] = 0x00D8;" +print>>output_c,"\tmain[234] = 0x0152;" +print>>output_c,"\tmain[235] = 0x00BA;" +print>>output_c,"\tmain[236] = 0x00DE;" +print>>output_c,"\tmain[237] = 0x0166;" +print>>output_c,"\tmain[238] = 0x014A;" +print>>output_c,"\tmain[239] = 0x0149;" +print>>output_c,"\tmain[240] = 0x0138;" +print>>output_c,"\tmain[241] = 0x00E6;" +print>>output_c,"\tmain[242] = 0x0111;" +print>>output_c,"\tmain[243] = 0x00F0;" +print>>output_c,"\tmain[244] = 0x0127;" +print>>output_c,"\tmain[245] = 0x0131;" +print>>output_c,"\tmain[246] = 0x0133;" +print>>output_c,"\tmain[247] = 0x0140;" +print>>output_c,"\tmain[248] = 0x0142;" +print>>output_c,"\tmain[249] = 0x00F8;" +print>>output_c,"\tmain[250] = 0x0153;" +print>>output_c,"\tmain[251] = 0x00DF;" +print>>output_c,"\tmain[252] = 0x00FE;" +print>>output_c,"\tmain[253] = 0x0167;" +print>>output_c,"\tmain[254] = 0x014B;" +print>>output_c,"\tmain[255] = 0x00AD;" + +print>>output_c,"}" +print>>output_h,"" +print>>output_h,"}" +print>>output_h,"}" |
