/* Copyright (C) 2014-2020 Carl Hetherington This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /** @file src/subrip_reader.cc * @brief SubripReader class. */ #include "compose.hpp" #include "exceptions.h" #include "raw_convert.h" #include "ssa_reader.h" #include "sub_assert.h" #include "subrip_reader.h" #include "util.h" #include #include #include #include #include #include #include #include using std::string; using std::vector; using std::cout; using std::hex; using boost::lexical_cast; using boost::to_upper; using boost::optional; using boost::function; using boost::algorithm::replace_all; using namespace sub; /** @param s Subtitle string encoded in UTF-8 */ SubripReader::SubripReader (string s) { this->read (boost::bind(&get_line_string, &s)); } /** @param f Subtitle file encoded in UTF-8 */ SubripReader::SubripReader (FILE* f) { this->read (boost::bind (&get_line_file, f)); } void SubripReader::read (function ()> get_line) { enum { COUNTER, METADATA, CONTENT } state = COUNTER; RawSubtitle rs; rs.vertical_position.line = 0; rs.vertical_position.reference = TOP_OF_SUBTITLE; while (true) { auto line = get_line (); if (!line) { break; } trim_right_if (*line, boost::is_any_of ("\n\r")); remove_unicode_bom (line); /* Keep some history in case there is an error to report */ _context.push_back (*line); if (_context.size() > 5) { _context.pop_front (); } switch (state) { case COUNTER: { if (line->empty ()) { /* a blank line at the start is ok */ break; } state = METADATA; /* Reset stuff that should not persist across separate subtitles */ rs.bold = false; rs.italic = false; rs.underline = false; rs.vertical_position.line = 0; rs.vertical_position.reference = TOP_OF_SUBTITLE; } break; case METADATA: { vector p; /* Further trim this line, removing spaces from the end */ trim_right_if (*line, boost::is_any_of (" ")); boost::algorithm::split (p, *line, boost::algorithm::is_any_of (" "), boost::token_compress_on); if (p.size() != 3 && p.size() != 7) { for (int i = 0; i < 2; ++i) { optional ex = get_line (); if (ex) { _context.push_back (*ex); } } throw SubripError (*line, "a time/position line", _context); } string expected; auto from = convert_time(p[0], ",", &expected); if (!from) { throw SubripError(p[0], expected, _context); } rs.from = *from; auto to = convert_time(p[2], ",", &expected); if (!to) { throw SubripError(p[2], expected, _context); } rs.to = *to; /* XXX: should not ignore coordinate specifications */ state = CONTENT; break; } case CONTENT: if (line->empty ()) { state = COUNTER; } else { vector sub_lines; /* Split up this line on unicode "LINE SEPARATOR". This feels hacky but also * the least unpleasant place to do it. */ boost::algorithm::split_regex(sub_lines, *line, boost::regex("\xe2\x80\xa8")); for (auto sub_line: sub_lines) { convert_line(sub_line, rs); rs.vertical_position.line = rs.vertical_position.line.get() + 1; rs.text.clear(); } } break; } } } optional