import urllib2 import sys DATA = 'http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt' OUTPUT = 'src/iso6937_tables' data = urllib2.urlopen(DATA).read() # data = open('UnicodeData.txt').read() output_c = open(OUTPUT + '.cc', 'w') output_h = open(OUTPUT + '.h', 'w') def find_unicode(n): for line in iter(data.splitlines()): s = line.split(';') if s[1] == n: return s[0] print 'Could not find %s' % n sys.exit(1) def setup(output_diacritical_name): print>>output_c,'map sub::iso6937::%s;' % output_diacritical_name def fill(unicode_diacritical_name, output_diacritical_name, letters): print>>output_h,'extern std::map %s;' % output_diacritical_name for letter in letters: if letter.isupper(): case = 'CAPITAL' else: case = 'SMALL' unicode_name = 'LATIN %s LETTER %s WITH %s' % (case, letter.upper(), unicode_diacritical_name) print>>output_c,"\t%s['%s'] = 0x%s;" % (output_diacritical_name, letter, find_unicode(unicode_name)) print>>output_c,"" print>>output_c,"""/* Copyright (C) 2014 Carl Hetherington This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* THIS FILE WAS AUTO-GENERATED BY iso6937.py */ #include #include "iso6937_tables.h" using std::map; """ print>>output_h,"""/* Copyright (C) 2014 Carl Hetherington This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* THIS FILE WAS AUTO-GENERATED BY iso6937.py */ #include namespace sub { extern void make_iso6937_tables (); namespace iso6937 { """ groups = [ (0xC1, 'GRAVE', 'grave', 'AEIOUaeiou'), (0xC2, 'ACUTE', 'acute', 'ACEILNORSUYZacegilnorsuyz'), (0xC3, 'CIRCUMFLEX', 'circumflex', 'ACEGHIJOSUWYaceghijosuwy'), (0xC4, 'TILDE', 'tilde', 'AINOUainou'), (0xC5, 'MACRON', 'macron', 'AEIOUaeiou'), (0xC6, 'BREVE', 'breve', 'AGUagu'), (0xC7, 'DOT ABOVE', 'dot', 'CEGIZcegz'), (0xC8, 'DIAERESIS', 'diaeresis', 'AEIOUYaeiouy'), (0xCA, 'RING ABOVE', 'ring', 'AUau'), (0xCB, 'CEDILLA', 'cedilla', 'CGKLNRSTcklnrst'), (0xCD, 'DOUBLE ACUTE', 'double_acute', 'OUou'), (0xCE, 'OGONEK', 'ogonek', 'AEIUaeui'), (0xCF, 'CARON', 'caron', 'CDELNRSTZcdelnrstz') ] for g in groups: setup(g[2]) print>>output_c,"map sub::iso6937::main;" print>>output_c,"map *> sub::iso6937::diacriticals;" print>>output_h,"extern std::map main;" print>>output_h,"extern std::map *> diacriticals;" print>>output_c,""" void sub::make_iso6937_tables () { \tusing namespace sub::iso6937; """ for g in groups: fill(g[1], g[2], g[3]) print>>output_c,"\tmain[10] = 0x000A;" for i in range(32, 127): if chr(i) == "'" or chr(i) == "\\": print>>output_c,"\tmain['\\%s'] = 0x00%x;" % (chr(i), i) else: print>>output_c,"\tmain['%s'] = 0x00%x;" % (chr(i), i) # From Wikipedia # http://en.wikipedia.org/wiki/ISO/IEC_6937 print>>output_c,"\tmain[161] = 0x00A1;" print>>output_c,"\tmain[162] = 0x00A2;" print>>output_c,"\tmain[163] = 0x00A3;" print>>output_c,"\tmain[165] = 0x00A5;" print>>output_c,"\tmain[167] = 0x00A7;" print>>output_c,"\tmain[168] = 0x00A4;" print>>output_c,"\tmain[169] = 0x2018;" print>>output_c,"\tmain[170] = 0x201C;" print>>output_c,"\tmain[171] = 0x00AB;" print>>output_c,"\tmain[172] = 0x2190;" print>>output_c,"\tmain[173] = 0x2191;" print>>output_c,"\tmain[174] = 0x2192;" print>>output_c,"\tmain[175] = 0x2193;" print>>output_c,"\tmain[176] = 0x00B0;" print>>output_c,"\tmain[177] = 0x00B1;" print>>output_c,"\tmain[178] = 0x00B2;" print>>output_c,"\tmain[179] = 0x00B3;" print>>output_c,"\tmain[180] = 0x00D7;" print>>output_c,"\tmain[181] = 0x00B5;" print>>output_c,"\tmain[182] = 0x00B6;" print>>output_c,"\tmain[183] = 0x00B7;" print>>output_c,"\tmain[184] = 0x00F7;" print>>output_c,"\tmain[185] = 0x2019;" print>>output_c,"\tmain[186] = 0x201D;" print>>output_c,"\tmain[187] = 0x00BB;" print>>output_c,"\tmain[188] = 0x00BC;" print>>output_c,"\tmain[189] = 0x00BD;" print>>output_c,"\tmain[190] = 0x00BE;" print>>output_c,"\tmain[191] = 0x00BF;" print>>output_c,"\tmain[193] = 0x0300;" print>>output_c,"\tmain[194] = 0x0301;" print>>output_c,"\tmain[195] = 0x0302;" print>>output_c,"\tmain[196] = 0x0303;" print>>output_c,"\tmain[197] = 0x0304;" print>>output_c,"\tmain[198] = 0x0306;" print>>output_c,"\tmain[199] = 0x0307;" print>>output_c,"\tmain[200] = 0x0308;" print>>output_c,"\tmain[202] = 0x030A;" print>>output_c,"\tmain[203] = 0x0327;" print>>output_c,"\tmain[205] = 0x030B;" print>>output_c,"\tmain[206] = 0x032B;" print>>output_c,"\tmain[207] = 0x030C;" print>>output_c,"\tmain[208] = 0x2015;" print>>output_c,"\tmain[209] = 0x00B9;" print>>output_c,"\tmain[210] = 0x00AE;" print>>output_c,"\tmain[211] = 0x00A9;" print>>output_c,"\tmain[212] = 0x2122;" print>>output_c,"\tmain[213] = 0x266A;" print>>output_c,"\tmain[214] = 0x00AC;" print>>output_c,"\tmain[215] = 0x00A6;" print>>output_c,"\tmain[220] = 0x215B;" print>>output_c,"\tmain[221] = 0x215C;" print>>output_c,"\tmain[222] = 0x215D;" print>>output_c,"\tmain[223] = 0x215E;" print>>output_c,"\tmain[224] = 0x2126;" print>>output_c,"\tmain[225] = 0x00C6;" print>>output_c,"\tmain[226] = 0x0110;" print>>output_c,"\tmain[227] = 0x00AA;" print>>output_c,"\tmain[228] = 0x0126;" print>>output_c,"\tmain[230] = 0x0132;" print>>output_c,"\tmain[231] = 0x013F;" print>>output_c,"\tmain[232] = 0x0141;" print>>output_c,"\tmain[233] = 0x00D8;" print>>output_c,"\tmain[234] = 0x0152;" print>>output_c,"\tmain[235] = 0x00BA;" print>>output_c,"\tmain[236] = 0x00DE;" print>>output_c,"\tmain[237] = 0x0166;" print>>output_c,"\tmain[238] = 0x014A;" print>>output_c,"\tmain[239] = 0x0149;" print>>output_c,"\tmain[240] = 0x0138;" print>>output_c,"\tmain[241] = 0x00E6;" print>>output_c,"\tmain[242] = 0x0111;" print>>output_c,"\tmain[243] = 0x00F0;" print>>output_c,"\tmain[244] = 0x0127;" print>>output_c,"\tmain[245] = 0x0131;" print>>output_c,"\tmain[246] = 0x0133;" print>>output_c,"\tmain[247] = 0x0140;" print>>output_c,"\tmain[248] = 0x0142;" print>>output_c,"\tmain[249] = 0x00F8;" print>>output_c,"\tmain[250] = 0x0153;" print>>output_c,"\tmain[251] = 0x00DF;" print>>output_c,"\tmain[252] = 0x00FE;" print>>output_c,"\tmain[253] = 0x0167;" print>>output_c,"\tmain[254] = 0x014B;" print>>output_c,"\tmain[255] = 0x00AD;" print>>output_c,"" for g in groups: print>>output_c,"\tdiacriticals[%s] = &%s;" % (hex(g[0]), g[2]) print>>output_c,"}" print>>output_h,"" print>>output_h,"}" print>>output_h,"}"