2 Copyright (C) 2014-2020 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 #include "stl_binary_reader.h"
21 #include "exceptions.h"
24 #include "compose.hpp"
25 #include <boost/lexical_cast.hpp>
26 #include <boost/algorithm/string.hpp>
27 #include <boost/locale.hpp>
35 using boost::lexical_cast;
36 using boost::algorithm::replace_all;
37 using boost::is_any_of;
38 using boost::locale::conv::utf_to_utf;
39 using std::shared_ptr;
44 class InputReader : public boost::noncopyable
48 : _buffer (new unsigned char[1024])
53 virtual ~InputReader ()
58 virtual void read (int size, string what) = 0;
60 string get_string (int offset, int length) const
63 for (int i = 0; i < length; ++i) {
64 s += _buffer[offset + i];
70 int get_int (int offset, int length) const
73 for (int i = 0; i < length; ++i) {
74 v |= _buffer[offset + i] << (8 * i);
80 Time get_timecode (int offset, int frame_rate) const
82 return Time::from_hmsf (_buffer[offset], _buffer[offset + 1], _buffer[offset + 2], _buffer[offset + 3], Rational (frame_rate, 1));
86 unsigned char* _buffer;
90 class StreamInputReader : public InputReader
93 StreamInputReader (istream& in)
99 void read (int size, string what)
101 _in.read (reinterpret_cast<char *>(_buffer), size);
102 if (_in.gcount() != size) {
103 throw STLError (String::compose("Could not read %1 block from binary STL file", what));
111 class FILEInputReader : public InputReader
114 FILEInputReader (FILE* in)
120 void read (int size, string what)
122 size_t const N = fread (_buffer, 1, size, _in);
123 if (static_cast<int>(N) != size) {
124 throw STLError (String::compose("Could not read %1 block from binary STL file", what));
134 STLBinaryReader::STLBinaryReader (istream& in)
136 read (shared_ptr<InputReader>(new StreamInputReader(in)));
139 STLBinaryReader::STLBinaryReader (FILE* in)
141 read (shared_ptr<InputReader>(new FILEInputReader(in)));
144 void STLBinaryReader::read (shared_ptr<InputReader> reader)
146 reader->read (1024, "GSI");
148 code_page_number = atoi (reader->get_string(0, 3).c_str());
149 frame_rate = stl_dfc_to_frame_rate (reader->get_string(3, 8));
150 display_standard = _tables.display_standard_file_to_enum (reader->get_string(11, 1));
151 language_group = _tables.language_group_file_to_enum (reader->get_string(12, 2));
152 language = _tables.language_file_to_enum (reader->get_string(14, 2));
153 original_programme_title = reader->get_string(16, 32);
154 original_episode_title = reader->get_string(48, 32);
155 translated_programme_title = reader->get_string(80, 32);
156 translated_episode_title = reader->get_string(112, 32);
157 translator_name = reader->get_string(144, 32);
158 translator_contact_details = reader->get_string(176, 32);
159 subtitle_list_reference_code = reader->get_string(208, 16);
160 creation_date = reader->get_string(224, 6);
161 revision_date = reader->get_string(230, 6);
162 revision_number = reader->get_string(236, 2);
164 tti_blocks = atoi (reader->get_string(238, 5).c_str());
165 number_of_subtitles = atoi (reader->get_string(243, 5).c_str());
166 subtitle_groups = atoi (reader->get_string(248, 3).c_str());
167 maximum_characters = atoi (reader->get_string(251, 2).c_str());
168 maximum_rows = atoi (reader->get_string(253, 2).c_str());
170 if (maximum_rows == 99) {
171 /* https://tech.ebu.ch/docs/tech/tech3360.pdf says
172 "It is recommended that for files with a large MNR value (e.g. '99') the
173 font size (height) should be defined as ~ 1/15 of the 'Subtitle Safe Area'
174 and a lineHeight of 120% is used to achieve a row height of ~ 1/12 of the height
175 of the 'Subtitle Safe Area'.
180 timecode_status = _tables.timecode_status_file_to_enum (reader->get_string(255, 1));
181 start_of_programme = reader->get_string(256, 8);
182 first_in_cue = reader->get_string(264, 8);
183 disks = atoi (reader->get_string(272, 1).c_str());
184 disk_sequence_number = atoi (reader->get_string(273, 1).c_str());
185 country_of_origin = reader->get_string(274, 3);
186 publisher = reader->get_string(277, 32);
187 editor_name = reader->get_string(309, 32);
188 editor_contact_details = reader->get_string(341, 32);
190 for (int i = 0; i < tti_blocks; ++i) {
192 reader->read (128, "TTI");
194 if (_tables.comment_file_to_enum (reader->get_int(15, 1)) == COMMENT_YES) {
198 string const whole = reader->get_string(16, 112);
200 /* Split the text up into lines (8Ah is a new line) */
201 vector<string> lines;
202 split (lines, whole, is_any_of ("\x8a"));
204 /* Italic / underline specifications can span lines, so we need to track them
205 outside the lines loop.
208 bool underline = false;
210 for (size_t j = 0; j < lines.size(); ++j) {
212 sub.from = reader->get_timecode(5, frame_rate);
213 sub.to = reader->get_timecode(9, frame_rate);
214 /* XXX: only the vertical position of the first TTI block should be used (says the spec),
215 so using reader->get_int(13, 1) here is wrong if i > 0
217 sub.vertical_position.line = reader->get_int(13, 1) + j;
218 sub.vertical_position.lines = maximum_rows;
219 sub.vertical_position.reference = TOP_OF_SCREEN;
221 sub.underline = underline;
223 /* XXX: not sure what to do with JC = 0, "unchanged presentation" */
224 int const h = reader->get_int(14, 1);
228 sub.horizontal_position.reference = HORIZONTAL_CENTRE_OF_SCREEN;
231 sub.horizontal_position.reference = LEFT_OF_SCREEN;
234 sub.horizontal_position.reference = RIGHT_OF_SCREEN;
238 /* Loop over characters */
240 for (size_t k = 0; k < lines[j].size(); ++k) {
242 unsigned char const c = static_cast<unsigned char> (lines[j][k]);
245 /* Unused space i.e. end of line */
249 if (c >= 0x80 && c <= 0x83) {
250 /* Italic or underline control code */
251 sub.text = utf_to_utf<char> (iso6937_to_utf16 (text.c_str()));
252 _subs.push_back (sub);
275 sub.underline = underline;
278 if (!text.empty ()) {
279 sub.text = utf_to_utf<char> (iso6937_to_utf16 (text.c_str()));
280 _subs.push_back (sub);
283 /* XXX: justification */
289 STLBinaryReader::metadata () const
291 map<string, string> m;
293 m["Code page number"] = lexical_cast<string> (code_page_number);
294 m["Frame rate"] = lexical_cast<string> (frame_rate);
295 m["Display standard"] = _tables.display_standard_enum_to_description (display_standard);
296 m["Language group"] = _tables.language_group_enum_to_description (language_group);
297 m["Language"] = _tables.language_enum_to_description (language);
298 m["Original programme title"] = original_programme_title;
299 m["Original episode title"] = original_episode_title;
300 m["Translated programme title"] = translated_programme_title;
301 m["Translated episode title"] = translated_episode_title;
302 m["Translator name"] = translator_name;
303 m["Translator contact details"] = translator_contact_details;
304 m["Subtitle list reference code"] = subtitle_list_reference_code;
305 m["Creation date"] = creation_date;
306 m["Revision date"] = revision_date;
307 m["Revision number"] = revision_number;
308 m["TTI blocks"] = lexical_cast<string> (tti_blocks);
309 m["Number of subtitles"] = lexical_cast<string> (number_of_subtitles);
310 m["Subtitle groups"] = lexical_cast<string> (subtitle_groups);
311 m["Maximum characters"] = lexical_cast<string> (maximum_characters);
312 m["Maximum rows"] = lexical_cast<string> (maximum_rows);
313 m["Timecode status"] = _tables.timecode_status_enum_to_description (timecode_status);
314 m["Start of programme"] = start_of_programme;
315 m["First in cue"] = first_in_cue;
316 m["Disks"] = lexical_cast<string> (disks);
317 m["Disk sequence number"] = lexical_cast<string> (disk_sequence_number);
318 m["Country of origin"] = country_of_origin;
319 m["Publisher"] = publisher;
320 m["Editor name"] = editor_name;
321 m["Editor contact details"] = editor_contact_details;