/* Copyright (C) 2014-2015 Carl Hetherington This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /** @file src/subrip_reader.cc * @brief SubripReader class. */ #include "subrip_reader.h" #include "exceptions.h" #include "util.h" #include #include #include #include #include #include #include using std::string; using std::vector; using std::list; using std::cout; using std::hex; using std::stringstream; using boost::lexical_cast; using boost::to_upper; using boost::optional; using boost::function; using namespace sub; /** @param s Subtitle string encoded in UTF-8 */ SubripReader::SubripReader (string const & s) { stringstream str (s); this->read (boost::bind (&get_line_stringstream, &str)); } /** @param f Subtitle file encoded in UTF-8 */ SubripReader::SubripReader (FILE* f) { this->read (boost::bind (&get_line_file, f)); } void SubripReader::read (function ()> get_line) { enum { COUNTER, METADATA, CONTENT } state = COUNTER; RawSubtitle rs; rs.font = "Arial"; rs.font_size.set_points (48); rs.vertical_position.line = 0; /* XXX: arbitrary */ rs.vertical_position.lines = 32; rs.vertical_position.reference = TOP_OF_SUBTITLE; while (true) { optional line = get_line (); if (!line) { break; } trim_right_if (*line, boost::is_any_of ("\n\r")); if ( line->length() >= 3 && static_cast (line.get()[0]) == 0xef && static_cast (line.get()[1]) == 0xbb && static_cast (line.get()[2]) == 0xbf ) { /* Skip Unicode byte order mark */ line = line->substr (3); } switch (state) { case COUNTER: { if (line->empty ()) { /* a blank line at the start is ok */ break; } state = METADATA; /* Reset stuff that should not persist across separate subtitles */ rs.bold = false; rs.italic = false; rs.underline = false; rs.vertical_position.line = 0; } break; case METADATA: { vector p; boost::algorithm::split (p, *line, boost::algorithm::is_any_of (" ")); if (p.size() != 3 && p.size() != 7) { throw SubripError (*line, "a time/position line"); } rs.from = convert_time (p[0]); rs.to = convert_time (p[2]); /* XXX: should not ignore coordinate specifications */ state = CONTENT; break; } case CONTENT: if (line->empty ()) { state = COUNTER; } else { convert_line (*line, rs); rs.vertical_position.line = rs.vertical_position.line.get() + 1; } break; } } } Time SubripReader::convert_time (string t) { vector a; boost::algorithm::split (a, t, boost::is_any_of (":")); if (a.size() != 3) { throw SubripError (t, "time in the format h:m:s,ms"); } vector b; boost::algorithm::split (b, a[2], boost::is_any_of (",")); return Time::from_hms ( lexical_cast (a[0]), lexical_cast (a[1]), lexical_cast (b[0]), lexical_cast (b[1]) ); } void SubripReader::convert_line (string t, RawSubtitle& p) { enum { TEXT, TAG } state = TEXT; string tag; list colours; colours.push_back (Colour (1, 1, 1)); /* XXX: missing support */ /* XXX: nesting of tags e.g. foobarbazfredjim might not work, I think. */ for (size_t i = 0; i < t.size(); ++i) { switch (state) { case TEXT: if (t[i] == '<' || t[i] == '{') { state = TAG; } else { p.text += t[i]; } break; case TAG: if (t[i] == '>' || t[i] == '}') { if (tag == "b") { maybe_content (p); p.bold = true; } else if (tag == "/b") { maybe_content (p); p.bold = false; } else if (tag == "i") { maybe_content (p); p.italic = true; } else if (tag == "/i") { maybe_content (p); p.italic = false; } else if (tag == "u") { maybe_content (p); p.underline = true; } else if (tag == "/u") { maybe_content (p); p.underline = false; } else if (boost::starts_with (tag, "font")) { maybe_content (p); boost::regex re (".*color=\"#([0123456789abcdef]+)\""); boost::smatch match; if (boost::regex_search (tag, match, re) && string (match[1]).size() == 6) { p.colour = Colour::from_rgb_hex (match[1]); colours.push_back (p.colour); } } else if (tag == "/font") { maybe_content (p); colours.pop_back (); p.colour = colours.back (); } tag.clear (); state = TEXT; } else { tag += t[i]; } break; } } maybe_content (p); } /* Push p into _subs if it has some text, and clear the text out of p */ void SubripReader::maybe_content (RawSubtitle& p) { if (!p.text.empty ()) { _subs.push_back (p); p.text.clear (); } }