from urllib.request import urlopen import sys DATA = 'http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt' OUTPUT = 'src/iso6937_tables' data = urlopen(DATA).read() # data = open('UnicodeData.txt').read() output_c = open(OUTPUT + '.cc', 'w') output_h = open(OUTPUT + '.h', 'w') def find_unicode(n): for line in iter(data.splitlines()): s = line.decode('UTF-8').split(';') if s[1] == n: return s[0] print('Could not find %s' % n) sys.exit(1) def setup(output_diacritical_name): print('map sub::iso6937::%s;' % output_diacritical_name, file=output_c) def fill(unicode_diacritical_name, output_diacritical_name, letters): print('extern std::map %s;' % output_diacritical_name, file=output_h) for letter in letters: if letter.isupper(): case = 'CAPITAL' else: case = 'SMALL' unicode_name = 'LATIN %s LETTER %s WITH %s' % (case, letter.upper(), unicode_diacritical_name) print("\t%s['%s'] = 0x%s;" % (output_diacritical_name, letter, find_unicode(unicode_name)), file=output_c) print("", file=output_c) print("""/* Copyright (C) 2014 Carl Hetherington This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* THIS FILE WAS AUTO-GENERATED BY iso6937.py */ #include #include "iso6937_tables.h" using std::map; """, file=output_c) print("""/* Copyright (C) 2014 Carl Hetherington This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* THIS FILE WAS AUTO-GENERATED BY iso6937.py */ #include namespace sub { extern void make_iso6937_tables (); namespace iso6937 { """, file=output_h) groups = [ (0xC1, 'GRAVE', 'grave', 'AEIOUaeiou'), (0xC2, 'ACUTE', 'acute', 'ACEILNORSUYZacegilnorsuyz'), (0xC3, 'CIRCUMFLEX', 'circumflex', 'ACEGHIJOSUWYaceghijosuwy'), (0xC4, 'TILDE', 'tilde', 'AINOUainou'), (0xC5, 'MACRON', 'macron', 'AEIOUaeiou'), (0xC6, 'BREVE', 'breve', 'AGUagu'), (0xC7, 'DOT ABOVE', 'dot', 'CEGIZcegz'), (0xC8, 'DIAERESIS', 'diaeresis', 'AEIOUYaeiouy'), (0xCA, 'RING ABOVE', 'ring', 'AUau'), (0xCB, 'CEDILLA', 'cedilla', 'CGKLNRSTcklnrst'), (0xCD, 'DOUBLE ACUTE', 'double_acute', 'OUou'), (0xCE, 'OGONEK', 'ogonek', 'AEIUaeui'), (0xCF, 'CARON', 'caron', 'CDELNRSTZcdelnrstz') ] for g in groups: setup(g[2]) print("map sub::iso6937::main;", file=output_c) print("map *> sub::iso6937::diacriticals;", file=output_c) print("extern std::map main;", file=output_h) print("extern std::map *> diacriticals;", file=output_h) print(""" void sub::make_iso6937_tables () { \tusing namespace sub::iso6937; """, file=output_c) for g in groups: fill(g[1], g[2], g[3]) print("\tmain[10] = 0x000A;", file=output_c) for i in range(32, 127): if chr(i) == "'" or chr(i) == "\\": print("\tmain['\\%s'] = 0x00%x;" % (chr(i), i), file=output_c) else: print("\tmain['%s'] = 0x00%x;" % (chr(i), i), file=output_c) # From Wikipedia # http://en.wikipedia.org/wiki/ISO/IEC_6937 print("\tmain[161U] = 0x00A1;", file=output_c) print("\tmain[162U] = 0x00A2;", file=output_c) print("\tmain[163U] = 0x00A3;", file=output_c) # Wikipedia says the dollar is 0x24, but Annotation # Edit (and some other sources) disagree. print("\tmain[164U] = 0x0024;", file=output_c) print("\tmain[165U] = 0x00A5;", file=output_c) print("\tmain[167U] = 0x00A7;", file=output_c) print("\tmain[168U] = 0x00A4;", file=output_c) print("\tmain[169U] = 0x2018;", file=output_c) print("\tmain[170U] = 0x201C;", file=output_c) print("\tmain[171U] = 0x00AB;", file=output_c) print("\tmain[172U] = 0x2190;", file=output_c) print("\tmain[173U] = 0x2191;", file=output_c) print("\tmain[174U] = 0x2192;", file=output_c) print("\tmain[175U] = 0x2193;", file=output_c) print("\tmain[176U] = 0x00B0;", file=output_c) print("\tmain[177U] = 0x00B1;", file=output_c) print("\tmain[178U] = 0x00B2;", file=output_c) print("\tmain[179U] = 0x00B3;", file=output_c) print("\tmain[180U] = 0x00D7;", file=output_c) print("\tmain[181U] = 0x00B5;", file=output_c) print("\tmain[182U] = 0x00B6;", file=output_c) print("\tmain[183U] = 0x00B7;", file=output_c) print("\tmain[184U] = 0x00F7;", file=output_c) print("\tmain[185U] = 0x2019;", file=output_c) print("\tmain[186U] = 0x201D;", file=output_c) print("\tmain[187U] = 0x00BB;", file=output_c) print("\tmain[188U] = 0x00BC;", file=output_c) print("\tmain[189U] = 0x00BD;", file=output_c) print("\tmain[190U] = 0x00BE;", file=output_c) print("\tmain[191U] = 0x00BF;", file=output_c) print("\tmain[193U] = 0x0300;", file=output_c) print("\tmain[194U] = 0x0301;", file=output_c) print("\tmain[195U] = 0x0302;", file=output_c) print("\tmain[196U] = 0x0303;", file=output_c) print("\tmain[197U] = 0x0304;", file=output_c) print("\tmain[198U] = 0x0306;", file=output_c) print("\tmain[199U] = 0x0307;", file=output_c) print("\tmain[200U] = 0x0308;", file=output_c) print("\tmain[202U] = 0x030A;", file=output_c) print("\tmain[203U] = 0x0327;", file=output_c) print("\tmain[205U] = 0x030B;", file=output_c) print("\tmain[206U] = 0x032B;", file=output_c) print("\tmain[207U] = 0x030C;", file=output_c) print("\tmain[208U] = 0x2015;", file=output_c) print("\tmain[209U] = 0x00B9;", file=output_c) print("\tmain[210U] = 0x00AE;", file=output_c) print("\tmain[211U] = 0x00A9;", file=output_c) print("\tmain[212U] = 0x2122;", file=output_c) print("\tmain[213U] = 0x266A;", file=output_c) print("\tmain[214U] = 0x00AC;", file=output_c) print("\tmain[215U] = 0x00A6;", file=output_c) print("\tmain[220U] = 0x215B;", file=output_c) print("\tmain[221U] = 0x215C;", file=output_c) print("\tmain[222U] = 0x215D;", file=output_c) print("\tmain[223U] = 0x215E;", file=output_c) print("\tmain[224U] = 0x2126;", file=output_c) print("\tmain[225U] = 0x00C6;", file=output_c) print("\tmain[226U] = 0x0110;", file=output_c) print("\tmain[227U] = 0x00AA;", file=output_c) print("\tmain[228U] = 0x0126;", file=output_c) print("\tmain[230U] = 0x0132;", file=output_c) print("\tmain[231U] = 0x013F;", file=output_c) print("\tmain[232U] = 0x0141;", file=output_c) print("\tmain[233U] = 0x00D8;", file=output_c) print("\tmain[234U] = 0x0152;", file=output_c) print("\tmain[235U] = 0x00BA;", file=output_c) print("\tmain[236U] = 0x00DE;", file=output_c) print("\tmain[237U] = 0x0166;", file=output_c) print("\tmain[238U] = 0x014A;", file=output_c) print("\tmain[239U] = 0x0149;", file=output_c) print("\tmain[240U] = 0x0138;", file=output_c) print("\tmain[241U] = 0x00E6;", file=output_c) print("\tmain[242U] = 0x0111;", file=output_c) print("\tmain[243U] = 0x00F0;", file=output_c) print("\tmain[244U] = 0x0127;", file=output_c) print("\tmain[245U] = 0x0131;", file=output_c) print("\tmain[246U] = 0x0133;", file=output_c) print("\tmain[247U] = 0x0140;", file=output_c) print("\tmain[248U] = 0x0142;", file=output_c) print("\tmain[249U] = 0x00F8;", file=output_c) print("\tmain[250U] = 0x0153;", file=output_c) print("\tmain[251U] = 0x00DF;", file=output_c) print("\tmain[252U] = 0x00FE;", file=output_c) print("\tmain[253U] = 0x0167;", file=output_c) print("\tmain[254U] = 0x014B;", file=output_c) print("\tmain[255U] = 0x00AD;", file=output_c) print("", file=output_c) for g in groups: print("\tdiacriticals[%s] = &%s;" % (hex(g[0]), g[2]), file=output_c) print("}", file=output_c) print("", file=output_h) print("}", file=output_h) print("}", file=output_h)