/* Copyright (C) 2014-2020 Carl Hetherington This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /** @file src/subrip_reader.cc * @brief SubripReader class. */ #include "subrip_reader.h" #include "exceptions.h" #include "util.h" #include "sub_assert.h" #include "raw_convert.h" #include "ssa_reader.h" #include #include #include #include #include #include #include using std::string; using std::vector; using std::cout; using std::hex; using boost::lexical_cast; using boost::to_upper; using boost::optional; using boost::function; using boost::algorithm::replace_all; using namespace sub; /** @param s Subtitle string encoded in UTF-8 */ SubripReader::SubripReader (string s) { this->read (boost::bind(&get_line_string, &s)); } /** @param f Subtitle file encoded in UTF-8 */ SubripReader::SubripReader (FILE* f) { this->read (boost::bind (&get_line_file, f)); } void SubripReader::read (function ()> get_line) { enum { COUNTER, METADATA, CONTENT } state = COUNTER; RawSubtitle rs; rs.vertical_position.line = 0; rs.vertical_position.reference = TOP_OF_SUBTITLE; while (true) { auto line = get_line (); if (!line) { break; } trim_right_if (*line, boost::is_any_of ("\n\r")); remove_unicode_bom (line); /* Keep some history in case there is an error to report */ _context.push_back (*line); if (_context.size() > 5) { _context.pop_front (); } switch (state) { case COUNTER: { if (line->empty ()) { /* a blank line at the start is ok */ break; } state = METADATA; /* Reset stuff that should not persist across separate subtitles */ rs.bold = false; rs.italic = false; rs.underline = false; rs.vertical_position.line = 0; rs.vertical_position.reference = TOP_OF_SUBTITLE; } break; case METADATA: { vector p; /* Further trim this line, removing spaces from the end */ trim_right_if (*line, boost::is_any_of (" ")); boost::algorithm::split (p, *line, boost::algorithm::is_any_of (" "), boost::token_compress_on); if (p.size() != 3 && p.size() != 7) { for (int i = 0; i < 2; ++i) { optional ex = get_line (); if (ex) { _context.push_back (*ex); } } throw SubripError (*line, "a time/position line", _context); } rs.from = convert_time (p[0]); rs.to = convert_time (p[2]); /* XXX: should not ignore coordinate specifications */ state = CONTENT; break; } case CONTENT: if (line->empty ()) { state = COUNTER; } else { convert_line (*line, rs); rs.vertical_position.line = rs.vertical_position.line.get() + 1; } break; } } } Time SubripReader::convert_time (string t) { vector a; boost::algorithm::split (a, t, boost::is_any_of (":")); if (a.size() != 3) { throw SubripError (t, "time in the format h:m:s,ms", _context); } vector b; boost::algorithm::split (b, a[2], boost::is_any_of (",")); if (b.size() != 2) { throw SubripError (t, "time in the format h:m:s,ms", _context); } int h, m, s, ms; try { h = lexical_cast(a[0]); } catch (boost::bad_lexical_cast &) { throw SubripError (t, "integer hour value", _context); } try { m = lexical_cast(a[1]); } catch (boost::bad_lexical_cast &) { throw SubripError (t, "integer minute value", _context); } try { s = lexical_cast(b[0]); } catch (boost::bad_lexical_cast &) { throw SubripError (t, "integer second value", _context); } try { ms = lexical_cast(b[1]); } catch (boost::bad_lexical_cast &) { throw SubripError (t, "integer millisecond value", _context); } return Time::from_hms (h, m, s, ms); } void SubripReader::convert_line (string t, RawSubtitle& p) { vector colours; colours.push_back (Colour (1, 1, 1)); auto has_next = [](string line, size_t& index, string s) { boost::to_lower(s); auto next = line.substr(index, s.size()); boost::to_lower(next); if (next != s) { return false; } index += s.size(); return true; }; size_t i = 0; while (i < t.size()) { if (has_next(t, i, "") || has_next(t, i, "{b}")) { maybe_content (p); p.bold = true; } else if (has_next(t, i, "") || has_next(t, i, "{/b}")) { maybe_content (p); p.bold = false; } else if (has_next(t, i, "") || has_next(t, i, "{i}")) { maybe_content (p); p.italic = true; } else if (has_next(t, i, "") || has_next(t, i, "{/i}")) { maybe_content (p); p.italic = false; } else if (has_next(t, i, "") || has_next(t, i, "{u}")) { maybe_content (p); p.underline = true; } else if (has_next(t, i, "") || has_next(t, i, "{/u}")) { maybe_content (p); p.underline = false; } else if (has_next(t, i, "(string(match[1])) / 255.0; p.colour.g = raw_convert(string(match[2])) / 255.0; p.colour.b = raw_convert(string(match[3])) / 255.0; colours.push_back (p.colour); } else { throw SubripError (tag, "a colour in the format #rrggbb or rgba(rr,gg,bb,aa)", _context); } } } else if (has_next(t, i, "")) { maybe_content (p); SUB_ASSERT (!colours.empty()); colours.pop_back (); p.colour = colours.back (); } else if (has_next(t, i, "{\\")) { string ssa = "\\"; while (i < t.size() && t[i] != '}') { ssa += t[i]; ++i; } ++i; SSAReader::parse_style (p, ssa, 288, 288, Colour(1, 1, 1)); } else { p.text += t[i]; ++i; } } /* Strip Unicode U+202B (right-to-left embedding) as sometimes it is rendered as a missing character. This may be a hack. */ replace_all (p.text, "\xe2\x80\xab", ""); maybe_content (p); } /* Push p into _subs if it has some text, and clear the text out of p */ void SubripReader::maybe_content (RawSubtitle& p) { if (!p.text.empty ()) { _subs.push_back (p); p.text.clear (); } }