diff options
| author | Carl Hetherington <cth@carlh.net> | 2014-05-28 11:50:15 +0100 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2014-05-28 11:50:15 +0100 |
| commit | cd0958260b54af3514ac12b936bb76ca6de6978b (patch) | |
| tree | a06591bfc901d950290e6c3ba2aad1dcf123921e /src | |
| parent | 14fa24a1aabda7925f4842ab7f7e13c1798182a4 (diff) | |
More STL binary reading stuff.
Diffstat (limited to 'src')
| -rw-r--r-- | src/iso6937.cc | 97 | ||||
| -rw-r--r-- | src/iso6937.h | 24 | ||||
| -rw-r--r-- | src/iso6937.py | 227 | ||||
| -rw-r--r-- | src/iso6937_tables.cc | 396 | ||||
| -rw-r--r-- | src/iso6937_tables.h | 46 | ||||
| -rw-r--r-- | src/stl_binary_reader.cc | 393 | ||||
| -rw-r--r-- | src/stl_binary_reader.h | 29 | ||||
| -rw-r--r-- | src/subtitle.h | 3 | ||||
| -rw-r--r-- | src/wscript | 4 |
9 files changed, 1053 insertions, 166 deletions
diff --git a/src/iso6937.cc b/src/iso6937.cc new file mode 100644 index 0000000..d6c1970 --- /dev/null +++ b/src/iso6937.cc @@ -0,0 +1,97 @@ +/* + Copyright (C) 2014 Carl Hetherington <cth@carlh.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +#include <string> +#include <boost/optional.hpp> +#include "iso6937_tables.h" +#include "iso6937.h" + +using std::string; +using std::cout; +using std::wstring; +using namespace sub; + +wstring +sub::iso6937_to_utf16 (char const * s) +{ + if (iso6937::grave.empty ()) { + make_iso6937_tables (); + } + + wstring o; + + boost::optional<unsigned char> diacritical; + + while (*s != '\0') { + unsigned char const u = static_cast<unsigned char> (*s); + if (u >= 0xc1 && u <= 0xcf) { + diacritical = u; + } else if (diacritical) { + switch (diacritical.get ()) { + case 0xC1: + o += iso6937::grave[u]; + break; + case 0xC2: + o += iso6937::acute[u]; + break; + case 0xC3: + o += iso6937::circumflex[u]; + break; + case 0xC4: + o += iso6937::tilde[u]; + break; + case 0xC5: + o += iso6937::macron[u]; + break; + case 0xC6: + o += iso6937::breve[u]; + break; + case 0xC7: + o += iso6937::dot[u]; + break; + case 0xC8: + o += iso6937::diaeresis[u]; + break; + case 0xCA: + o += iso6937::ring[u]; + break; + case 0xCB: + o += iso6937::cedilla[u]; + break; + case 0xCD: + o += iso6937::double_acute[u]; + break; + case 0xCE: + o += iso6937::ogonek[u]; + break; + case 0xCF: + o += iso6937::caron[u]; + break; + } + + diacritical.reset (); + } else { + o += iso6937::main[u]; + } + + ++s; + } + + return o; +} diff --git a/src/iso6937.h b/src/iso6937.h new file mode 100644 index 0000000..700602d --- /dev/null +++ b/src/iso6937.h @@ -0,0 +1,24 @@ +/* + Copyright (C) 2014 Carl Hetherington <cth@carlh.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +namespace sub { + + extern std::wstring iso6937_to_utf16 (char const * s); + +}; diff --git a/src/iso6937.py b/src/iso6937.py new file mode 100644 index 0000000..4719b07 --- /dev/null +++ b/src/iso6937.py @@ -0,0 +1,227 @@ +import urllib2 +import sys + +DATA = 'http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt' +OUTPUT = 'src/iso6937_tables' + +data = urllib2.urlopen(DATA).read() +# data = open('UnicodeData.txt').read() +output_c = open(OUTPUT + '.cc', 'w') +output_h = open(OUTPUT + '.h', 'w') + +def find_unicode(n): + for line in iter(data.splitlines()): + s = line.split(';') + if s[1] == n: + return s[0] + + print 'Could not find %s' % n + sys.exit(1) + +def setup(output_diacritical_name): + print>>output_c,'map<char, wchar_t> sub::iso6937::%s;' % output_diacritical_name + +def fill(unicode_diacritical_name, output_diacritical_name, letters): + + print>>output_h,'extern std::map<char, wchar_t> %s;' % output_diacritical_name + + for letter in letters: + if letter.isupper(): + case = 'CAPITAL' + else: + case = 'SMALL' + + unicode_name = 'LATIN %s LETTER %s WITH %s' % (case, letter.upper(), unicode_diacritical_name) + print>>output_c,"\t%s['%s'] = 0x%s;" % (output_diacritical_name, letter, find_unicode(unicode_name)) + + print>>output_c,"" + +print>>output_c,"""/* + Copyright (C) 2014 Carl Hetherington <cth@carlh.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +/* THIS FILE WAS AUTO-GENERATED BY iso6937.py */ + +#include <map> +#include "iso6937_tables.h" + +using std::map; +""" + +print>>output_h,"""/* + Copyright (C) 2014 Carl Hetherington <cth@carlh.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +/* THIS FILE WAS AUTO-GENERATED BY iso6937.py */ + +#include <map> + +namespace sub { + +extern void make_iso6937_tables (); + +namespace iso6937 { +""" + +groups = [ + ('GRAVE', 'grave', 'AEIOUaeiou'), + ('ACUTE', 'acute', 'ACEILNORSUYZacegilnorsuyz'), + ('CIRCUMFLEX', 'circumflex', 'ACEGHIJOSUWYaceghijosuwy'), + ('TILDE', 'tilde', 'AINOUainou'), + ('MACRON', 'macron', 'AEIOUaeiou'), + ('BREVE', 'breve', 'AGUagu'), + ('DOT ABOVE', 'dot', 'CEGIZcegz'), + ('DIAERESIS', 'diaeresis', 'AEIOUYaeiouy'), + ('RING ABOVE', 'ring', 'AUau'), + ('CEDILLA', 'cedilla', 'CGKLNRSTcklnrst'), + ('DOUBLE ACUTE', 'double_acute', 'OUou'), + ('OGONEK', 'ogonek', 'AEIUaeui'), + ('CARON', 'caron', 'CDELNRSTZcdelnrstz') +] + +for g in groups: + setup(g[1]) + +print>>output_c,"map<char, wchar_t> sub::iso6937::main;" +print>>output_h,"extern std::map<char, wchar_t> main;" + +print>>output_c,""" +void +sub::make_iso6937_tables () +{ +\tusing namespace sub::iso6937; +""" + +for g in groups: + fill(g[0], g[1], g[2]) + +print>>output_c,"\tmain[10] = 0x000A;" + +for i in range(32, 127): + if chr(i) == "'" or chr(i) == "\\": + print>>output_c,"\tmain['\\%s'] = 0x00%x;" % (chr(i), i) + else: + print>>output_c,"\tmain['%s'] = 0x00%x;" % (chr(i), i) + +# From Wikipedia +# http://en.wikipedia.org/wiki/ISO/IEC_6937 +print>>output_c,"\tmain[161] = 0x00A1;" +print>>output_c,"\tmain[162] = 0x00A2;" +print>>output_c,"\tmain[163] = 0x00A3;" +print>>output_c,"\tmain[165] = 0x00A5;" +print>>output_c,"\tmain[167] = 0x00A7;" +print>>output_c,"\tmain[168] = 0x00A4;" +print>>output_c,"\tmain[169] = 0x2018;" +print>>output_c,"\tmain[170] = 0x201C;" +print>>output_c,"\tmain[171] = 0x00AB;" +print>>output_c,"\tmain[172] = 0x2190;" +print>>output_c,"\tmain[173] = 0x2191;" +print>>output_c,"\tmain[174] = 0x2192;" +print>>output_c,"\tmain[175] = 0x2193;" +print>>output_c,"\tmain[176] = 0x00B0;" +print>>output_c,"\tmain[177] = 0x00B1;" +print>>output_c,"\tmain[178] = 0x00B2;" +print>>output_c,"\tmain[179] = 0x00B3;" +print>>output_c,"\tmain[180] = 0x00D7;" +print>>output_c,"\tmain[181] = 0x00B5;" +print>>output_c,"\tmain[182] = 0x00B6;" +print>>output_c,"\tmain[183] = 0x00B7;" +print>>output_c,"\tmain[184] = 0x00F7;" +print>>output_c,"\tmain[185] = 0x2019;" +print>>output_c,"\tmain[186] = 0x201D;" +print>>output_c,"\tmain[187] = 0x00BB;" +print>>output_c,"\tmain[188] = 0x00BC;" +print>>output_c,"\tmain[189] = 0x00BD;" +print>>output_c,"\tmain[190] = 0x00BE;" +print>>output_c,"\tmain[191] = 0x00BF;" +print>>output_c,"\tmain[193] = 0x0300;" +print>>output_c,"\tmain[194] = 0x0301;" +print>>output_c,"\tmain[195] = 0x0302;" +print>>output_c,"\tmain[196] = 0x0303;" +print>>output_c,"\tmain[197] = 0x0304;" +print>>output_c,"\tmain[198] = 0x0306;" +print>>output_c,"\tmain[199] = 0x0307;" +print>>output_c,"\tmain[200] = 0x0308;" +print>>output_c,"\tmain[202] = 0x030A;" +print>>output_c,"\tmain[203] = 0x0327;" +print>>output_c,"\tmain[205] = 0x030B;" +print>>output_c,"\tmain[206] = 0x032B;" +print>>output_c,"\tmain[207] = 0x030C;" +print>>output_c,"\tmain[208] = 0x2015;" +print>>output_c,"\tmain[209] = 0x00B9;" +print>>output_c,"\tmain[210] = 0x00AE;" +print>>output_c,"\tmain[211] = 0x00A9;" +print>>output_c,"\tmain[212] = 0x2122;" +print>>output_c,"\tmain[213] = 0x266A;" +print>>output_c,"\tmain[214] = 0x00AC;" +print>>output_c,"\tmain[215] = 0x00A6;" +print>>output_c,"\tmain[220] = 0x215B;" +print>>output_c,"\tmain[221] = 0x215C;" +print>>output_c,"\tmain[222] = 0x215D;" +print>>output_c,"\tmain[223] = 0x215E;" +print>>output_c,"\tmain[224] = 0x2126;" +print>>output_c,"\tmain[225] = 0x00C6;" +print>>output_c,"\tmain[226] = 0x0110;" +print>>output_c,"\tmain[227] = 0x00AA;" +print>>output_c,"\tmain[228] = 0x0126;" +print>>output_c,"\tmain[230] = 0x0132;" +print>>output_c,"\tmain[231] = 0x013F;" +print>>output_c,"\tmain[232] = 0x0141;" +print>>output_c,"\tmain[233] = 0x00D8;" +print>>output_c,"\tmain[234] = 0x0152;" +print>>output_c,"\tmain[235] = 0x00BA;" +print>>output_c,"\tmain[236] = 0x00DE;" +print>>output_c,"\tmain[237] = 0x0166;" +print>>output_c,"\tmain[238] = 0x014A;" +print>>output_c,"\tmain[239] = 0x0149;" +print>>output_c,"\tmain[240] = 0x0138;" +print>>output_c,"\tmain[241] = 0x00E6;" +print>>output_c,"\tmain[242] = 0x0111;" +print>>output_c,"\tmain[243] = 0x00F0;" +print>>output_c,"\tmain[244] = 0x0127;" +print>>output_c,"\tmain[245] = 0x0131;" +print>>output_c,"\tmain[246] = 0x0133;" +print>>output_c,"\tmain[247] = 0x0140;" +print>>output_c,"\tmain[248] = 0x0142;" +print>>output_c,"\tmain[249] = 0x00F8;" +print>>output_c,"\tmain[250] = 0x0153;" +print>>output_c,"\tmain[251] = 0x00DF;" +print>>output_c,"\tmain[252] = 0x00FE;" +print>>output_c,"\tmain[253] = 0x0167;" +print>>output_c,"\tmain[254] = 0x014B;" +print>>output_c,"\tmain[255] = 0x00AD;" + +print>>output_c,"}" +print>>output_h,"" +print>>output_h,"}" +print>>output_h,"}" diff --git a/src/iso6937_tables.cc b/src/iso6937_tables.cc new file mode 100644 index 0000000..07174c4 --- /dev/null +++ b/src/iso6937_tables.cc @@ -0,0 +1,396 @@ +/* + Copyright (C) 2014 Carl Hetherington <cth@carlh.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +/* THIS FILE WAS AUTO-GENERATED BY iso6937.py */ + +#include <map> +#include "iso6937_tables.h" + +using std::map; + +map<char, wchar_t> sub::iso6937::grave; +map<char, wchar_t> sub::iso6937::acute; +map<char, wchar_t> sub::iso6937::circumflex; +map<char, wchar_t> sub::iso6937::tilde; +map<char, wchar_t> sub::iso6937::macron; +map<char, wchar_t> sub::iso6937::breve; +map<char, wchar_t> sub::iso6937::dot; +map<char, wchar_t> sub::iso6937::diaeresis; +map<char, wchar_t> sub::iso6937::ring; +map<char, wchar_t> sub::iso6937::cedilla; +map<char, wchar_t> sub::iso6937::double_acute; +map<char, wchar_t> sub::iso6937::ogonek; +map<char, wchar_t> sub::iso6937::caron; +map<char, wchar_t> sub::iso6937::main; + +void +sub::make_iso6937_tables () +{ + using namespace sub::iso6937; + + grave['A'] = 0x00C0; + grave['E'] = 0x00C8; + grave['I'] = 0x00CC; + grave['O'] = 0x00D2; + grave['U'] = 0x00D9; + grave['a'] = 0x00E0; + grave['e'] = 0x00E8; + grave['i'] = 0x00EC; + grave['o'] = 0x00F2; + grave['u'] = 0x00F9; + + acute['A'] = 0x00C1; + acute['C'] = 0x0106; + acute['E'] = 0x00C9; + acute['I'] = 0x00CD; + acute['L'] = 0x0139; + acute['N'] = 0x0143; + acute['O'] = 0x00D3; + acute['R'] = 0x0154; + acute['S'] = 0x015A; + acute['U'] = 0x00DA; + acute['Y'] = 0x00DD; + acute['Z'] = 0x0179; + acute['a'] = 0x00E1; + acute['c'] = 0x0107; + acute['e'] = 0x00E9; + acute['g'] = 0x01F5; + acute['i'] = 0x00ED; + acute['l'] = 0x013A; + acute['n'] = 0x0144; + acute['o'] = 0x00F3; + acute['r'] = 0x0155; + acute['s'] = 0x015B; + acute['u'] = 0x00FA; + acute['y'] = 0x00FD; + acute['z'] = 0x017A; + + circumflex['A'] = 0x00C2; + circumflex['C'] = 0x0108; + circumflex['E'] = 0x00CA; + circumflex['G'] = 0x011C; + circumflex['H'] = 0x0124; + circumflex['I'] = 0x00CE; + circumflex['J'] = 0x0134; + circumflex['O'] = 0x00D4; + circumflex['S'] = 0x015C; + circumflex['U'] = 0x00DB; + circumflex['W'] = 0x0174; + circumflex['Y'] = 0x0176; + circumflex['a'] = 0x00E2; + circumflex['c'] = 0x0109; + circumflex['e'] = 0x00EA; + circumflex['g'] = 0x011D; + circumflex['h'] = 0x0125; + circumflex['i'] = 0x00EE; + circumflex['j'] = 0x0135; + circumflex['o'] = 0x00F4; + circumflex['s'] = 0x015D; + circumflex['u'] = 0x00FB; + circumflex['w'] = 0x0175; + circumflex['y'] = 0x0177; + + tilde['A'] = 0x00C3; + tilde['I'] = 0x0128; + tilde['N'] = 0x00D1; + tilde['O'] = 0x00D5; + tilde['U'] = 0x0168; + tilde['a'] = 0x00E3; + tilde['i'] = 0x0129; + tilde['n'] = 0x00F1; + tilde['o'] = 0x00F5; + tilde['u'] = 0x0169; + + macron['A'] = 0x0100; + macron['E'] = 0x0112; + macron['I'] = 0x012A; + macron['O'] = 0x014C; + macron['U'] = 0x016A; + macron['a'] = 0x0101; + macron['e'] = 0x0113; + macron['i'] = 0x012B; + macron['o'] = 0x014D; + macron['u'] = 0x016B; + + breve['A'] = 0x0102; + breve['G'] = 0x011E; + breve['U'] = 0x016C; + breve['a'] = 0x0103; + breve['g'] = 0x011F; + breve['u'] = 0x016D; + + dot['C'] = 0x010A; + dot['E'] = 0x0116; + dot['G'] = 0x0120; + dot['I'] = 0x0130; + dot['Z'] = 0x017B; + dot['c'] = 0x010B; + dot['e'] = 0x0117; + dot['g'] = 0x0121; + dot['z'] = 0x017C; + + diaeresis['A'] = 0x00C4; + diaeresis['E'] = 0x00CB; + diaeresis['I'] = 0x00CF; + diaeresis['O'] = 0x00D6; + diaeresis['U'] = 0x00DC; + diaeresis['Y'] = 0x0178; + diaeresis['a'] = 0x00E4; + diaeresis['e'] = 0x00EB; + diaeresis['i'] = 0x00EF; + diaeresis['o'] = 0x00F6; + diaeresis['u'] = 0x00FC; + diaeresis['y'] = 0x00FF; + + ring['A'] = 0x00C5; + ring['U'] = 0x016E; + ring['a'] = 0x00E5; + ring['u'] = 0x016F; + + cedilla['C'] = 0x00C7; + cedilla['G'] = 0x0122; + cedilla['K'] = 0x0136; + cedilla['L'] = 0x013B; + cedilla['N'] = 0x0145; + cedilla['R'] = 0x0156; + cedilla['S'] = 0x015E; + cedilla['T'] = 0x0162; + cedilla['c'] = 0x00E7; + cedilla['k'] = 0x0137; + cedilla['l'] = 0x013C; + cedilla['n'] = 0x0146; + cedilla['r'] = 0x0157; + cedilla['s'] = 0x015F; + cedilla['t'] = 0x0163; + + double_acute['O'] = 0x0150; + double_acute['U'] = 0x0170; + double_acute['o'] = 0x0151; + double_acute['u'] = 0x0171; + + ogonek['A'] = 0x0104; + ogonek['E'] = 0x0118; + ogonek['I'] = 0x012E; + ogonek['U'] = 0x0172; + ogonek['a'] = 0x0105; + ogonek['e'] = 0x0119; + ogonek['u'] = 0x0173; + ogonek['i'] = 0x012F; + + caron['C'] = 0x010C; + caron['D'] = 0x010E; + caron['E'] = 0x011A; + caron['L'] = 0x013D; + caron['N'] = 0x0147; + caron['R'] = 0x0158; + caron['S'] = 0x0160; + caron['T'] = 0x0164; + caron['Z'] = 0x017D; + caron['c'] = 0x010D; + caron['d'] = 0x010F; + caron['e'] = 0x011B; + caron['l'] = 0x013E; + caron['n'] = 0x0148; + caron['r'] = 0x0159; + caron['s'] = 0x0161; + caron['t'] = 0x0165; + caron['z'] = 0x017E; + + main[10] = 0x000A; + main[' '] = 0x0020; + main['!'] = 0x0021; + main['"'] = 0x0022; + main['#'] = 0x0023; + main['$'] = 0x0024; + main['%'] = 0x0025; + main['&'] = 0x0026; + main['\''] = 0x0027; + main['('] = 0x0028; + main[')'] = 0x0029; + main['*'] = 0x002a; + main['+'] = 0x002b; + main[','] = 0x002c; + main['-'] = 0x002d; + main['.'] = 0x002e; + main['/'] = 0x002f; + main['0'] = 0x0030; + main['1'] = 0x0031; + main['2'] = 0x0032; + main['3'] = 0x0033; + main['4'] = 0x0034; + main['5'] = 0x0035; + main['6'] = 0x0036; + main['7'] = 0x0037; + main['8'] = 0x0038; + main['9'] = 0x0039; + main[':'] = 0x003a; + main[';'] = 0x003b; + main['<'] = 0x003c; + main['='] = 0x003d; + main['>'] = 0x003e; + main['?'] = 0x003f; + main['@'] = 0x0040; + main['A'] = 0x0041; + main['B'] = 0x0042; + main['C'] = 0x0043; + main['D'] = 0x0044; + main['E'] = 0x0045; + main['F'] = 0x0046; + main['G'] = 0x0047; + main['H'] = 0x0048; + main['I'] = 0x0049; + main['J'] = 0x004a; + main['K'] = 0x004b; + main['L'] = 0x004c; + main['M'] = 0x004d; + main['N'] = 0x004e; + main['O'] = 0x004f; + main['P'] = 0x0050; + main['Q'] = 0x0051; + main['R'] = 0x0052; + main['S'] = 0x0053; + main['T'] = 0x0054; + main['U'] = 0x0055; + main['V'] = 0x0056; + main['W'] = 0x0057; + main['X'] = 0x0058; + main['Y'] = 0x0059; + main['Z'] = 0x005a; + main['['] = 0x005b; + main['\\'] = 0x005c; + main[']'] = 0x005d; + main['^'] = 0x005e; + main['_'] = 0x005f; + main['`'] = 0x0060; + main['a'] = 0x0061; + main['b'] = 0x0062; + main['c'] = 0x0063; + main['d'] = 0x0064; + main['e'] = 0x0065; + main['f'] = 0x0066; + main['g'] = 0x0067; + main['h'] = 0x0068; + main['i'] = 0x0069; + main['j'] = 0x006a; + main['k'] = 0x006b; + main['l'] = 0x006c; + main['m'] = 0x006d; + main['n'] = 0x006e; + main['o'] = 0x006f; + main['p'] = 0x0070; + main['q'] = 0x0071; + main['r'] = 0x0072; + main['s'] = 0x0073; + main['t'] = 0x0074; + main['u'] = 0x0075; + main['v'] = 0x0076; + main['w'] = 0x0077; + main['x'] = 0x0078; + main['y'] = 0x0079; + main['z'] = 0x007a; + main['{'] = 0x007b; + main['|'] = 0x007c; + main['}'] = 0x007d; + main['~'] = 0x007e; + main[161] = 0x00A1; + main[162] = 0x00A2; + main[163] = 0x00A3; + main[165] = 0x00A5; + main[167] = 0x00A7; + main[168] = 0x00A4; + main[169] = 0x2018; + main[170] = 0x201C; + main[171] = 0x00AB; + main[172] = 0x2190; + main[173] = 0x2191; + main[174] = 0x2192; + main[175] = 0x2193; + main[176] = 0x00B0; + main[177] = 0x00B1; + main[178] = 0x00B2; + main[179] = 0x00B3; + main[180] = 0x00D7; + main[181] = 0x00B5; + main[182] = 0x00B6; + main[183] = 0x00B7; + main[184] = 0x00F7; + main[185] = 0x2019; + main[186] = 0x201D; + main[187] = 0x00BB; + main[188] = 0x00BC; + main[189] = 0x00BD; + main[190] = 0x00BE; + main[191] = 0x00BF; + main[193] = 0x0300; + main[194] = 0x0301; + main[195] = 0x0302; + main[196] = 0x0303; + main[197] = 0x0304; + main[198] = 0x0306; + main[199] = 0x0307; + main[200] = 0x0308; + main[202] = 0x030A; + main[203] = 0x0327; + main[205] = 0x030B; + main[206] = 0x032B; + main[207] = 0x030C; + main[208] = 0x2015; + main[209] = 0x00B9; + main[210] = 0x00AE; + main[211] = 0x00A9; + main[212] = 0x2122; + main[213] = 0x266A; + main[214] = 0x00AC; + main[215] = 0x00A6; + main[220] = 0x215B; + main[221] = 0x215C; + main[222] = 0x215D; + main[223] = 0x215E; + main[224] = 0x2126; + main[225] = 0x00C6; + main[226] = 0x0110; + main[227] = 0x00AA; + main[228] = 0x0126; + main[230] = 0x0132; + main[231] = 0x013F; + main[232] = 0x0141; + main[233] = 0x00D8; + main[234] = 0x0152; + main[235] = 0x00BA; + main[236] = 0x00DE; + main[237] = 0x0166; + main[238] = 0x014A; + main[239] = 0x0149; + main[240] = 0x0138; + main[241] = 0x00E6; + main[242] = 0x0111; + main[243] = 0x00F0; + main[244] = 0x0127; + main[245] = 0x0131; + main[246] = 0x0133; + main[247] = 0x0140; + main[248] = 0x0142; + main[249] = 0x00F8; + main[250] = 0x0153; + main[251] = 0x00DF; + main[252] = 0x00FE; + main[253] = 0x0167; + main[254] = 0x014B; + main[255] = 0x00AD; +} diff --git a/src/iso6937_tables.h b/src/iso6937_tables.h new file mode 100644 index 0000000..58c8c4c --- /dev/null +++ b/src/iso6937_tables.h @@ -0,0 +1,46 @@ +/* + Copyright (C) 2014 Carl Hetherington <cth@carlh.net> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +/* THIS FILE WAS AUTO-GENERATED BY iso6937.py */ + +#include <map> + +namespace sub { + +extern void make_iso6937_tables (); + +namespace iso6937 { + +extern std::map<char, wchar_t> main; +extern std::map<char, wchar_t> grave; +extern std::map<char, wchar_t> acute; +extern std::map<char, wchar_t> circumflex; +extern std::map<char, wchar_t> tilde; +extern std::map<char, wchar_t> macron; +extern std::map<char, wchar_t> breve; +extern std::map<char, wchar_t> dot; +extern std::map<char, wchar_t> diaeresis; +extern std::map<char, wchar_t> ring; +extern std::map<char, wchar_t> cedilla; +extern std::map<char, wchar_t> double_acute; +extern std::map<char, wchar_t> ogonek; +extern std::map<char, wchar_t> caron; + +} +} diff --git a/src/stl_binary_reader.cc b/src/stl_binary_reader.cc index 3042a93..0d97e2e 100644 --- a/src/stl_binary_reader.cc +++ b/src/stl_binary_reader.cc @@ -18,8 +18,11 @@ */ #include <boost/lexical_cast.hpp> +#include <boost/algorithm/string.hpp> +#include <boost/locale.hpp> #include "stl_binary_reader.h" #include "exceptions.h" +#include "iso6937.h" #include "compose.hpp" using std::map; @@ -27,13 +30,15 @@ using std::cout; using std::string; using std::istream; using boost::lexical_cast; +using boost::algorithm::replace_all; +using boost::locale::conv::utf_to_utf; using namespace sub; -template <class T> -T -string_to_value (string k, map<string, STLCode<T> > m, string name) +template <class E, class F> +E +file_to_enum (F k, map<F, STLCode<E> > m, string name) { - typename map<string, STLCode<T> >::const_iterator i = m.find (k); + typename map<F, STLCode<E> >::const_iterator i = m.find (k); if (i == m.end ()) { throw STLError (String::compose ("Unknown %1 %2 in binary STL file", name, k)); } @@ -41,11 +46,11 @@ string_to_value (string k, map<string, STLCode<T> > m, string name) return i->second.value; } -template <class T> +template <class E, class F> string -value_to_description (T v, map<string, STLCode<T> > const & m) +enum_to_description (E v, map<F, STLCode<E> > const & m) { - for (typename map<string, STLCode<T> >::const_iterator i = m.begin(); i != m.end(); ++i) { + for (typename map<F, STLCode<E> >::const_iterator i = m.begin(); i != m.end(); ++i) { if (i->second.value == v) { return i->second.description; } @@ -54,27 +59,27 @@ value_to_description (T v, map<string, STLCode<T> > const & m) return ""; } -template <class T> +template <class E, class F> void -code (map<string, STLCode<T> >& m, string k, T v, string d) +code (map<F, STLCode<E> >& m, F k, E v, string d) { - m[k] = STLCode<T> (v, d); + m[k] = STLCode<E> (v, d); } STLBinaryReader::STLBinaryReader (istream& in) : _buffer (new unsigned char[1024]) { - create_maps (); + create_tables (); in.read ((char *) _buffer, 1024); if (in.gcount() != 1024) { throw STLError ("Could not read GSI block from binary STL file"); } - code_page_number = atoi (fixed_string (0, 3).c_str ()); + code_page_number = atoi (get_string (0, 3).c_str ()); - string const dfc = fixed_string (3, 8); + string const dfc = get_string (3, 8); if (dfc == "STL24.01") { frame_rate = 24; } else if (dfc == "STL25.01") { @@ -85,34 +90,68 @@ STLBinaryReader::STLBinaryReader (istream& in) throw STLError (String::compose ("Unknown disk format code %1 in binary STL file", dfc)); } - display_standard = string_to_value (fixed_string (11, 1), _display_standard_map, "display standard code"); - language_group = string_to_value (fixed_string (12, 2), _language_group_map, "character code"); - language = string_to_value (fixed_string (14, 2), _language_map, "language code"); - original_programme_title = fixed_string (16, 32); - original_episode_title = fixed_string (48, 32); - translated_programme_title = fixed_string (80, 32); - translated_episode_title = fixed_string (112, 32); - translator_name = fixed_string (144, 32); - translator_contact_details = fixed_string (176, 32); - subtitle_list_reference_code = fixed_string (208, 32); - creation_date = fixed_string (224, 6); - revision_date = fixed_string (230, 6); - revision_number = fixed_string (236, 2); - - tti_blocks = atoi (fixed_string (238, 6).c_str ()); - number_of_subtitles = atoi (fixed_string (243, 5).c_str ()); - subtitle_groups = atoi (fixed_string (248, 3).c_str ()); - maximum_characters = atoi (fixed_string (251, 2).c_str ()); - maximum_rows = atoi (fixed_string (253, 2).c_str ()); - timecode_status = string_to_value (fixed_string (255, 1), _timecode_status_map, "timecode status code"); - start_of_programme = fixed_string (256, 8); - first_in_cue = fixed_string (256, 8); - disks = atoi (fixed_string (272, 1).c_str ()); - disk_sequence_number = atoi (fixed_string (273, 1).c_str ()); - country_of_origin = fixed_string (274, 3); - publisher = fixed_string (277, 32); - editor_name = fixed_string (309, 32); - editor_contact_details = fixed_string (341, 32); + display_standard = file_to_enum (get_string (11, 1), _display_standard_map, "display standard code"); + language_group = file_to_enum (get_string (12, 2), _language_group_map, "character code"); + language = file_to_enum (get_string (14, 2), _language_map, "language code"); + original_programme_title = get_string (16, 32); + original_episode_title = get_string (48, 32); + translated_programme_title = get_string (80, 32); + translated_episode_title = get_string (112, 32); + translator_name = get_string (144, 32); + translator_contact_details = get_string (176, 32); + subtitle_list_reference_code = get_string (208, 32); + creation_date = get_string (224, 6); + revision_date = get_string (230, 6); + revision_number = get_string (236, 2); + + tti_blocks = atoi (get_string (238, 5).c_str ()); + number_of_subtitles = atoi (get_string (243, 5).c_str ()); + subtitle_groups = atoi (get_string (248, 3).c_str ()); + maximum_characters = atoi (get_string (251, 2).c_str ()); + maximum_rows = atoi (get_string (253, 2).c_str ()); + timecode_status = file_to_enum (get_string (255, 1), _timecode_status_map, "timecode status code"); + start_of_programme = get_string (256, 8); + first_in_cue = get_string (256, 8); + disks = atoi (get_string (272, 1).c_str ()); + disk_sequence_number = atoi (get_string (273, 1).c_str ()); + country_of_origin = get_string (274, 3); + publisher = get_string (277, 32); + editor_name = get_string (309, 32); + editor_contact_details = get_string (341, 32); + + for (int i = 0; i < tti_blocks; ++i) { + Subtitle sub; + + in.read ((char *) _buffer, 128); + if (in.gcount() != 128) { + throw STLError ("Could not read TTI block from binary STL file"); + } + + if (file_to_enum (get_int (15, 1), _comment_map, "comment flag") == COMMENT_YES) { + continue; + } + + sub.from.frame = get_timecode (5); + sub.to.frame = get_timecode (9); + sub.line = get_int (13, 1); + + /* XXX: justification, effects */ + + string s = get_string (16, 112); + + /* 8Ah is a new line */ + replace_all (s, "\x8a", "\n"); + + /* 8Fh is unused space, so trim the string to the first instance of that */ + size_t unused = s.find_first_of ('\x8f'); + if (unused != string::npos) { + s = s.substr (0, unused); + } + + sub.text = utf_to_utf<char> (iso6937_to_utf16 (s.c_str())); + + _subs.push_back (sub); + } } STLBinaryReader::~STLBinaryReader () @@ -121,7 +160,7 @@ STLBinaryReader::~STLBinaryReader () } string -STLBinaryReader::fixed_string (int offset, int length) const +STLBinaryReader::get_string (int offset, int length) const { string s; for (int i = 0; i < length; ++i) { @@ -131,6 +170,23 @@ STLBinaryReader::fixed_string (int offset, int length) const return s; } +int +STLBinaryReader::get_int (int offset, int length) const +{ + int v = 0; + for (int i = 0; i < length; ++i) { + v |= _buffer[offset + i] << (8 * i); + } + + return v; +} + +FrameTime +STLBinaryReader::get_timecode (int offset) const +{ + return FrameTime (_buffer[offset], _buffer[offset + 1], _buffer[offset + 2], _buffer[offset + 3]); +} + map<string, string> STLBinaryReader::metadata () const { @@ -138,9 +194,9 @@ STLBinaryReader::metadata () const m["Code page number"] = lexical_cast<string> (code_page_number); m["Frame rate"] = lexical_cast<string> (frame_rate); - m["Display standard"] = value_to_description (display_standard, _display_standard_map); - m["Language group"] = value_to_description (language_group, _language_group_map); - m["Language"] = value_to_description (language, _language_map); + m["Display standard"] = enum_to_description (display_standard, _display_standard_map); + m["Language group"] = enum_to_description (language_group, _language_group_map); + m["Language"] = enum_to_description (language, _language_map); m["Original programme title"] = original_programme_title; m["Original episode title"] = original_episode_title; m["Translated programme title"] = translated_programme_title; @@ -156,7 +212,7 @@ STLBinaryReader::metadata () const m["Subtitle groups"] = lexical_cast<string> (subtitle_groups); m["Maximum characters"] = lexical_cast<string> (maximum_characters); m["Maximum rows"] = lexical_cast<string> (maximum_rows); - m["Timecode status"] = value_to_description (timecode_status, _timecode_status_map); + m["Timecode status"] = enum_to_description (timecode_status, _timecode_status_map); m["Start of programme"] = start_of_programme; m["First in cue"] = first_in_cue; m["Disks"] = lexical_cast<string> (disks); @@ -170,123 +226,136 @@ STLBinaryReader::metadata () const } void -STLBinaryReader::create_maps () +STLBinaryReader::create_tables () { - code (_display_standard_map, " ", DISPLAY_STANDARD_UNDEFINED, "Undefined"); - code (_display_standard_map, "0", DISPLAY_STANDARD_OPEN_SUBTITLING, "Open subtitling"); - code (_display_standard_map, "1", DISPLAY_STANDARD_LEVEL_1_TELETEXT, "Level 1 teletext"); - code (_display_standard_map, "2", DISPLAY_STANDARD_LEVEL_2_TELETEXT, "Level 2 teletext"); + code<DisplayStandard, string> (_display_standard_map, " ", DISPLAY_STANDARD_UNDEFINED, "Undefined"); + code<DisplayStandard, string> (_display_standard_map, "0", DISPLAY_STANDARD_OPEN_SUBTITLING, "Open subtitling"); + code<DisplayStandard, string> (_display_standard_map, "1", DISPLAY_STANDARD_LEVEL_1_TELETEXT, "Level 1 teletext"); + code<DisplayStandard, string> (_display_standard_map, "2", DISPLAY_STANDARD_LEVEL_2_TELETEXT, "Level 2 teletext"); - code (_language_group_map, "00", LANGUAGE_GROUP_LATIN, "Latin"); - code (_language_group_map, "01", LANGUAGE_GROUP_LATIN_CYRILLIC, "Latin/Cyrillic"); - code (_language_group_map, "02", LANGUAGE_GROUP_LATIN_ARABIC, "Latin/Arabic"); - code (_language_group_map, "03", LANGUAGE_GROUP_LATIN_GREEK, "Latin/Greek"); - code (_language_group_map, "04", LANGUAGE_GROUP_LATIN_HEBREW, "Latin/Hebrew"); + code<LanguageGroup, string> (_language_group_map, "00", LANGUAGE_GROUP_LATIN, "Latin"); + code<LanguageGroup, string> (_language_group_map, "01", LANGUAGE_GROUP_LATIN_CYRILLIC, "Latin/Cyrillic"); + code<LanguageGroup, string> (_language_group_map, "02", LANGUAGE_GROUP_LATIN_ARABIC, "Latin/Arabic"); + code<LanguageGroup, string> (_language_group_map, "03", LANGUAGE_GROUP_LATIN_GREEK, "Latin/Greek"); + code<LanguageGroup, string> (_language_group_map, "04", LANGUAGE_GROUP_LATIN_HEBREW, "Latin/Hebrew"); - code (_language_map, "00", LANGUAGE_UNKNOWN, "Unknown"); - code (_language_map, "01", LANGUAGE_ALBANIAN, "Albanian"); - code (_language_map, "02", LANGUAGE_BRETON, "Breton"); - code (_language_map, "03", LANGUAGE_CATALAN, "Catalan"); - code (_language_map, "04", LANGUAGE_CROATIAN, "Croatian"); - code (_language_map, "05", LANGUAGE_WELSH, "Welsh"); - code (_language_map, "06", LANGUAGE_CZECH, "Czech"); - code (_language_map, "07", LANGUAGE_DANISH, "Danish"); - code (_language_map, "08", LANGUAGE_GERMAN, "German"); - code (_language_map, "09", LANGUAGE_ENGLISH, "English"); - code (_language_map, "0A", LANGUAGE_SPANISH, "Spanish"); - code (_language_map, "0B", LANGUAGE_ESPERANTO, "Esperanto"); - code (_language_map, "0C", LANGUAGE_ESTONIAN, "Estonian"); - code (_language_map, "0D", LANGUAGE_BASQUE, "Basque"); - code (_language_map, "0E", LANGUAGE_FAROESE, "Faroese"); - code (_language_map, "0F", LANGUAGE_FRENCH, "French"); - code (_language_map, "10", LANGUAGE_FRISIAN, "Frisian"); - code (_language_map, "11", LANGUAGE_IRISH, "Irish"); - code (_language_map, "12", LANGUAGE_GAELIC, "Gaelic"); - code (_language_map, "13", LANGUAGE_GALACIAN, "Galacian"); - code (_language_map, "14", LANGUAGE_ICELANDIC, "Icelandic"); - code (_language_map, "15", LANGUAGE_ITALIAN, "Italian"); - code (_language_map, "16", LANGUAGE_LAPPISH, "Lappish"); - code (_language_map, "17", LANGUAGE_LATIN, "Latin"); - code (_language_map, "18", LANGUAGE_LATVIAN, "Latvian"); - code (_language_map, "19", LANGUAGE_LUXEMBORGIAN, "Luxemborgian"); - code (_language_map, "1A", LANGUAGE_LITHUANIAN, "Lithuanian"); - code (_language_map, "1B", LANGUAGE_HUNGARIAN, "Hungarian"); - code (_language_map, "1C", LANGUAGE_MALTESE, "Maltese"); - code (_language_map, "1D", LANGUAGE_DUTCH, "Dutch"); - code (_language_map, "1E", LANGUAGE_NORWEGIAN, "Norwegian"); - code (_language_map, "1F", LANGUAGE_OCCITAN, "Occitan"); - code (_language_map, "20", LANGUAGE_POLISH, "Polish"); - code (_language_map, "21", LANGUAGE_PORTUGESE, "Portugese"); - code (_language_map, "22", LANGUAGE_ROMANIAN, "Romanian"); - code (_language_map, "23", LANGUAGE_ROMANSH, "Romansh"); - code (_language_map, "24", LANGUAGE_SERBIAN, "Serbian"); - code (_language_map, "25", LANGUAGE_SLOVAK, "Slovak"); - code (_language_map, "26", LANGUAGE_SLOVENIAN, "Slovenian"); - code (_language_map, "27", LANGUAGE_FINNISH, "Finnish"); - code (_language_map, "28", LANGUAGE_SWEDISH, "Swedish"); - code (_language_map, "29", LANGUAGE_TURKISH, "Turkish"); - code (_language_map, "2A", LANGUAGE_FLEMISH, "Flemish"); - code (_language_map, "2B", LANGUAGE_WALLON, "Wallon"); - code (_language_map, "7F", LANGUAGE_AMHARIC, "Amharic"); - code (_language_map, "7E", LANGUAGE_ARABIC, "Arabic"); - code (_language_map, "7D", LANGUAGE_ARMENIAN, "Armenian"); - code (_language_map, "7C", LANGUAGE_ASSAMESE, "Assamese"); - code (_language_map, "7B", LANGUAGE_AZERBAIJANI, "Azerbaijani"); - code (_language_map, "7A", LANGUAGE_BAMBORA, "Bambora"); - code (_language_map, "79", LANGUAGE_BIELORUSSIAN, "Bielorussian"); - code (_language_map, "78", LANGUAGE_BENGALI, "Bengali"); - code (_language_map, "77", LANGUAGE_BULGARIAN, "Bulgarian"); - code (_language_map, "76", LANGUAGE_BURMESE, "Burmese"); - code (_language_map, "75", LANGUAGE_CHINESE, "Chinese"); - code (_language_map, "74", LANGUAGE_CHURASH, "Churash"); - code (_language_map, "73", LANGUAGE_DARI, "Dari"); - code (_language_map, "72", LANGUAGE_FULANI, "Fulani"); - code (_language_map, "71", LANGUAGE_GEORGIAN, "Georgian"); - code (_language_map, "70", LANGUAGE_GREEK, "Greek"); - code (_language_map, "6F", LANGUAGE_GUJURATI, "Gujarati"); - code (_language_map, "6E", LANGUAGE_GURANI, "Gurani"); - code (_language_map, "6D", LANGUAGE_HAUSA, "Hausa"); - code (_language_map, "6C", LANGUAGE_HEBREW, "Hebrew"); - code (_language_map, "6B", LANGUAGE_HINDI, "Hindi"); - code (_language_map, "6A", LANGUAGE_INDONESIAN, "Indonesian"); - code (_language_map, "69", LANGUAGE_JAPANESE, "Japanese"); - code (_language_map, "68", LANGUAGE_KANNADA, "Kannada"); - code (_language_map, "67", LANGUAGE_KAZAKH, "Kazakh"); - code (_language_map, "66", LANGUAGE_KHMER, "Khmer"); - code (_language_map, "65", LANGUAGE_KOREAN, "Korean"); - code (_language_map, "64", LANGUAGE_LAOTIAN, "Laotian"); - code (_language_map, "63", LANGUAGE_MACEDONIAN, "Macedonian"); - code (_language_map, "62", LANGUAGE_MALAGASAY, "Malagasay"); - code (_language_map, "61", LANGUAGE_MALAYSIAN, "Malaysian"); - code (_language_map, "60", LANGUAGE_MOLDAVIAN, "Moldavian"); - code (_language_map, "5F", LANGUAGE_MARATHI, "Marathi"); - code (_language_map, "5E", LANGUAGE_NDEBELE, "Ndebele"); - code (_language_map, "5D", LANGUAGE_NEPALI, "Nepali"); - code (_language_map, "5C", LANGUAGE_ORIYA, "Oriya"); - code (_language_map, "5B", LANGUAGE_PAPAMIENTO, "Papamiento"); - code (_language_map, "5A", LANGUAGE_PERSIAN, "Persian"); - code (_language_map, "59", LANGUAGE_PUNJABI, "Punjabi"); - code (_language_map, "58", LANGUAGE_PUSHTU, "Pushtu"); - code (_language_map, "57", LANGUAGE_QUECHUA, "Quechua"); - code (_language_map, "56", LANGUAGE_RUSSIAN, "Russian"); - code (_language_map, "55", LANGUAGE_RUTHENIAN, "Ruthenian"); - code (_language_map, "54", LANGUAGE_SERBO_CROAT, "Serbo Croat"); - code (_language_map, "53", LANGUAGE_SHONA, "Shona"); - code (_language_map, "52", LANGUAGE_SINHALESE, "Sinhalese"); - code (_language_map, "51", LANGUAGE_SOMALI, "Somali"); - code (_language_map, "50", LANGUAGE_SRANAN_TONGO, "Sranan Tongo"); - code (_language_map, "4F", LANGUAGE_SWAHILI, "Swahili"); - code (_language_map, "4E", LANGUAGE_TADZHIK, "Tadzhik"); - code (_language_map, "4D", LANGUAGE_TAMIL, "Tamil"); - code (_language_map, "4C", LANGUAGE_TATAR, "Tatar"); - code (_language_map, "4B", LANGUAGE_TELUGU, "Telugu"); - code (_language_map, "4A", LANGUAGE_THAI, "Thai"); - code (_language_map, "49", LANGUAGE_UKRANIAN, "Ukranian"); - code (_language_map, "48", LANGUAGE_URDU, "Urdu"); - code (_language_map, "47", LANGUAGE_UZBEK, "Uzbek"); - code (_language_map, "46", LANGUAGE_VIETNAMESE, "Vietnamese"); - code (_language_map, "45", LANGUAGE_ZULU, "Zulu"); - - code (_timecode_status_map, "0", TIMECODE_STATUS_NOT_INTENDED_FOR_USE, "Not intended for use"); - code (_timecode_status_map, "1", TIMECODE_STATUS_INTENDED_FOR_USE, "Intended for use"); + code<Language, string> (_language_map, "00", LANGUAGE_UNKNOWN, "Unknown"); + code<Language, string> (_language_map, "01", LANGUAGE_ALBANIAN, "Albanian"); + code<Language, string> (_language_map, "02", LANGUAGE_BRETON, "Breton"); + code<Language, string> (_language_map, "03", LANGUAGE_CATALAN, "Catalan"); + code<Language, string> (_language_map, "04", LANGUAGE_CROATIAN, "Croatian"); + code<Language, string> (_language_map, "05", LANGUAGE_WELSH, "Welsh"); + code<Language, string> (_language_map, "06", LANGUAGE_CZECH, "Czech"); + code<Language, string> (_language_map, "07", LANGUAGE_DANISH, "Danish"); + code<Language, string> (_language_map, "08", LANGUAGE_GERMAN, "German"); + code<Language, string> (_language_map, "09", LANGUAGE_ENGLISH, "English"); + code<Language, string> (_language_map, "0A", LANGUAGE_SPANISH, "Spanish"); + code<Language, string> (_language_map, "0B", LANGUAGE_ESPERANTO, "Esperanto"); + code<Language, string> (_language_map, "0C", LANGUAGE_ESTONIAN, "Estonian"); + code<Language, string> (_language_map, "0D", LANGUAGE_BASQUE, "Basque"); + code<Language, string> (_language_map, "0E", LANGUAGE_FAROESE, "Faroese"); + code<Language, string> (_language_map, "0F", LANGUAGE_FRENCH, "French"); + code<Language, string> (_language_map, "10", LANGUAGE_FRISIAN, "Frisian"); + code<Language, string> (_language_map, "11", LANGUAGE_IRISH, "Irish"); + code<Language, string> (_language_map, "12", LANGUAGE_GAELIC, "Gaelic"); + code<Language, string> (_language_map, "13", LANGUAGE_GALACIAN, "Galacian"); + code<Language, string> (_language_map, "14", LANGUAGE_ICELANDIC, "Icelandic"); + code<Language, string> (_language_map, "15", LANGUAGE_ITALIAN, "Italian"); + code<Language, string> (_language_map, "16", LANGUAGE_LAPPISH, "Lappish"); + code<Language, string> (_language_map, "17", LANGUAGE_LATIN, "Latin"); + code<Language, string> (_language_map, "18", LANGUAGE_LATVIAN, "Latvian"); + code<Language, string> (_language_map, "19", LANGUAGE_LUXEMBORGIAN, "Luxemborgian"); + code<Language, string> (_language_map, "1A", LANGUAGE_LITHUANIAN, "Lithuanian"); + code<Language, string> (_language_map, "1B", LANGUAGE_HUNGARIAN, "Hungarian"); + code<Language, string> (_language_map, "1C", LANGUAGE_MALTESE, "Maltese"); + code<Language, string> (_language_map, "1D", LANGUAGE_DUTCH, "Dutch"); + code<Language, string> (_language_map, "1E", LANGUAGE_NORWEGIAN, "Norwegian"); + code<Language, string> (_language_map, "1F", LANGUAGE_OCCITAN, "Occitan"); + code<Language, string> (_language_map, "20", LANGUAGE_POLISH, "Polish"); + code<Language, string> (_language_map, "21", LANGUAGE_PORTUGESE, "Portugese"); + code<Language, string> (_language_map, "22", LANGUAGE_ROMANIAN, "Romanian"); + code<Language, string> (_language_map, "23", LANGUAGE_ROMANSH, "Romansh"); + code<Language, string> (_language_map, "24", LANGUAGE_SERBIAN, "Serbian"); + code<Language, string> (_language_map, "25", LANGUAGE_SLOVAK, "Slovak"); + code<Language, string> (_language_map, "26", LANGUAGE_SLOVENIAN, "Slovenian"); + code<Language, string> (_language_map, "27", LANGUAGE_FINNISH, "Finnish"); + code<Language, string> (_language_map, "28", LANGUAGE_SWEDISH, "Swedish"); + code<Language, string> (_language_map, "29", LANGUAGE_TURKISH, "Turkish"); + code<Language, string> (_language_map, "2A", LANGUAGE_FLEMISH, "Flemish"); + code<Language, string> (_language_map, "2B", LANGUAGE_WALLON, "Wallon"); + code<Language, string> (_language_map, "7F", LANGUAGE_AMHARIC, "Amharic"); + code<Language, string> (_language_map, "7E", LANGUAGE_ARABIC, "Arabic"); + code<Language, string> (_language_map, "7D", LANGUAGE_ARMENIAN, "Armenian"); + code<Language, string> (_language_map, "7C", LANGUAGE_ASSAMESE, "Assamese"); + code<Language, string> (_language_map, "7B", LANGUAGE_AZERBAIJANI, "Azerbaijani"); + code<Language, string> (_language_map, "7A", LANGUAGE_BAMBORA, "Bambora"); + code<Language, string> (_language_map, "79", LANGUAGE_BIELORUSSIAN, "Bielorussian"); + code<Language, string> (_language_map, "78", LANGUAGE_BENGALI, "Bengali"); + code<Language, string> (_language_map, "77", LANGUAGE_BULGARIAN, "Bulgarian"); + code<Language, string> (_language_map, "76", LANGUAGE_BURMESE, "Burmese"); + code<Language, string> (_language_map, "75", LANGUAGE_CHINESE, "Chinese"); + code<Language, string> (_language_map, "74", LANGUAGE_CHURASH, "Churash"); + code<Language, string> (_language_map, "73", LANGUAGE_DARI, "Dari"); + code<Language, string> (_language_map, "72", LANGUAGE_FULANI, "Fulani"); + code<Language, string> (_language_map, "71", LANGUAGE_GEORGIAN, "Georgian"); + code<Language, string> (_language_map, "70", LANGUAGE_GREEK, "Greek"); + code<Language, string> (_language_map, "6F", LANGUAGE_GUJURATI, "Gujarati"); + code<Language, string> (_language_map, "6E", LANGUAGE_GURANI, "Gurani"); + code<Language, string> (_language_map, "6D", LANGUAGE_HAUSA, "Hausa"); + code<Language, string> (_language_map, "6C", LANGUAGE_HEBREW, "Hebrew"); + code<Language, string> (_language_map, "6B", LANGUAGE_HINDI, "Hindi"); + code<Language, string> (_language_map, "6A", LANGUAGE_INDONESIAN, "Indonesian"); + code<Language, string> (_language_map, "69", LANGUAGE_JAPANESE, "Japanese"); + code<Language, string> (_language_map, "68", LANGUAGE_KANNADA, "Kannada"); + code<Language, string> (_language_map, "67", LANGUAGE_KAZAKH, "Kazakh"); + code<Language, string> (_language_map, "66", LANGUAGE_KHMER, "Khmer"); + code<Language, string> (_language_map, "65", LANGUAGE_KOREAN, "Korean"); + code<Language, string> (_language_map, "64", LANGUAGE_LAOTIAN, "Laotian"); + code<Language, string> (_language_map, "63", LANGUAGE_MACEDONIAN, "Macedonian"); + code<Language, string> (_language_map, "62", LANGUAGE_MALAGASAY, "Malagasay"); + code<Language, string> (_language_map, "61", LANGUAGE_MALAYSIAN, "Malaysian"); + code<Language, string> (_language_map, "60", LANGUAGE_MOLDAVIAN, "Moldavian"); + code<Language, string> (_language_map, "5F", LANGUAGE_MARATHI, "Marathi"); + code<Language, string> (_language_map, "5E", LANGUAGE_NDEBELE, "Ndebele"); + code<Language, string> (_language_map, "5D", LANGUAGE_NEPALI, "Nepali"); + code<Language, string> (_language_map, "5C", LANGUAGE_ORIYA, "Oriya"); + code<Language, string> (_language_map, "5B", LANGUAGE_PAPAMIENTO, "Papamiento"); + code<Language, string> (_language_map, "5A", LANGUAGE_PERSIAN, "Persian"); + code<Language, string> (_language_map, "59", LANGUAGE_PUNJABI, "Punjabi"); + code<Language, string> (_language_map, "58", LANGUAGE_PUSHTU, "Pushtu"); + code<Language, string> (_language_map, "57", LANGUAGE_QUECHUA, "Quechua"); + code<Language, string> (_language_map, "56", LANGUAGE_RUSSIAN, "Russian"); + code<Language, string> (_language_map, "55", LANGUAGE_RUTHENIAN, "Ruthenian"); + code<Language, string> (_language_map, "54", LANGUAGE_SERBO_CROAT, "Serbo Croat"); + code<Language, string> (_language_map, "53", LANGUAGE_SHONA, "Shona"); + code<Language, string> (_language_map, "52", LANGUAGE_SINHALESE, "Sinhalese"); + code<Language, string> (_language_map, "51", LANGUAGE_SOMALI, "Somali"); + code<Language, string> (_language_map, "50", LANGUAGE_SRANAN_TONGO, "Sranan Tongo"); + code<Language, string> (_language_map, "4F", LANGUAGE_SWAHILI, "Swahili"); + code<Language, string> (_language_map, "4E", LANGUAGE_TADZHIK, "Tadzhik"); + code<Language, string> (_language_map, "4D", LANGUAGE_TAMIL, "Tamil"); + code<Language, string> (_language_map, "4C", LANGUAGE_TATAR, "Tatar"); + code<Language, string> (_language_map, "4B", LANGUAGE_TELUGU, "Telugu"); + code<Language, string> (_language_map, "4A", LANGUAGE_THAI, "Thai"); + code<Language, string> (_language_map, "49", LANGUAGE_UKRANIAN, "Ukranian"); + code<Language, string> (_language_map, "48", LANGUAGE_URDU, "Urdu"); + code<Language, string> (_language_map, "47", LANGUAGE_UZBEK, "Uzbek"); + code<Language, string> (_language_map, "46", LANGUAGE_VIETNAMESE, "Vietnamese"); + code<Language, string> (_language_map, "45", LANGUAGE_ZULU, "Zulu"); + + code<TimecodeStatus, string> (_timecode_status_map, "0", TIMECODE_STATUS_NOT_INTENDED_FOR_USE, "Not intended for use"); + code<TimecodeStatus, string> (_timecode_status_map, "1", TIMECODE_STATUS_INTENDED_FOR_USE, "Intended for use"); + + code<CumulativeStatus, int> (_cumulative_status_map, 0, CUMULATIVE_STATUS_NOT_CUMULATIVE, "Not part of a cumulative set"); + code<CumulativeStatus, int> (_cumulative_status_map, 1, CUMULATIVE_STATUS_FIRST, "First subtitle of a cumulative set"); + code<CumulativeStatus, int> (_cumulative_status_map, 2, CUMULATIVE_STATUS_INTERMEDIATE, "Intermediate subtitle of a cumulative set"); + code<CumulativeStatus, int> (_cumulative_status_map, 3, CUMULATIVE_STATUS_LAST, "Last subtitle of a cumulative set"); + + code<Justification, int> (_justification_map, 0, JUSTIFICATION_NONE, "No justification"); + code<Justification, int> (_justification_map, 1, JUSTIFICATION_LEFT, "Left justification"); + code<Justification, int> (_justification_map, 2, JUSTIFICATION_CENTRE, "Centre justification"); + code<Justification, int> (_justification_map, 3, JUSTIFICATION_CENTRE, "Right justification"); + + code<Comment, int> (_comment_map, 0, COMMENT_NO, "Not a comment"); + code<Comment, int> (_comment_map, 1, COMMENT_YES, "Is a comment"); } diff --git a/src/stl_binary_reader.h b/src/stl_binary_reader.h index 0491cb8..b049b93 100644 --- a/src/stl_binary_reader.h +++ b/src/stl_binary_reader.h @@ -173,6 +173,25 @@ public: TIMECODE_STATUS_INTENDED_FOR_USE }; + enum CumulativeStatus { + CUMULATIVE_STATUS_NOT_CUMULATIVE, + CUMULATIVE_STATUS_FIRST, + CUMULATIVE_STATUS_INTERMEDIATE, + CUMULATIVE_STATUS_LAST + }; + + enum Justification { + JUSTIFICATION_NONE, + JUSTIFICATION_LEFT, + JUSTIFICATION_CENTRE, + JUSTIFICATION_RIGHT + }; + + enum Comment { + COMMENT_NO, + COMMENT_YES + }; + int code_page_number; int frame_rate; DisplayStandard display_standard; @@ -204,13 +223,19 @@ public: std::string editor_contact_details; private: - std::string fixed_string (int, int) const; - void create_maps (); + std::string get_string (int, int) const; + int get_int (int, int) const; + FrameTime get_timecode (int) const; + + void create_tables (); std::map<std::string, STLCode<DisplayStandard> > _display_standard_map; std::map<std::string, STLCode<LanguageGroup> > _language_group_map; std::map<std::string, STLCode<Language> > _language_map; std::map<std::string, STLCode<TimecodeStatus> > _timecode_status_map; + std::map<int, STLCode<CumulativeStatus> > _cumulative_status_map; + std::map<int, STLCode<Justification> > _justification_map; + std::map<int, STLCode<Comment> > _comment_map; unsigned char* _buffer; }; diff --git a/src/subtitle.h b/src/subtitle.h index 70a8eb6..fc08ac7 100644 --- a/src/subtitle.h +++ b/src/subtitle.h @@ -42,6 +42,7 @@ public: , line (0) {} + /** Subtitle text in UTF-8 */ std::string text; std::string font; @@ -71,7 +72,7 @@ public: bool bold; ///< true to use a bold version of font bool italic; ///< true to use an italic version of font bool underline; ///< true to underline - int line; + int line; ///< line number, starting from 0 /** from time */ struct { diff --git a/src/wscript b/src/wscript index d834c43..9cb874a 100644 --- a/src/wscript +++ b/src/wscript @@ -8,7 +8,7 @@ def build(bld): obj.name = 'libsub' obj.target = 'sub' - obj.uselib = 'CXML BOOST_FILESYSTEM' + obj.uselib = 'CXML BOOST_FILESYSTEM BOOST_LOCALE' obj.export_includes = ['.'] obj.source = """ colour.cc @@ -16,6 +16,8 @@ def build(bld): dcp_reader.cc effect.cc frame_time.cc + iso6937.cc + iso6937_tables.cc metric_time.cc reader.cc reader_factory.cc |
