2 Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 #include "exceptions.h"
22 #include "subrip_reader.h"
24 #include "web_vtt_reader.h"
25 #include <boost/algorithm/string.hpp>
26 #include <boost/algorithm/string_regex.hpp>
27 #include <boost/bind.hpp>
28 #include <boost/regex.hpp>
36 using boost::optional;
40 WebVTTReader::WebVTTReader(FILE* file)
42 this->read(boost::bind(&get_line_file, file));
46 WebVTTReader::WebVTTReader(string subs)
48 this->read(boost::bind(&get_line_string, &subs));
53 WebVTTReader::read(std::function<optional<string> ()> get_line)
56 /* expecting WEBVTT */
58 /* awaiting a NOTE, some other metadata, or a subtitle timing line */
60 /* reading the text of a subtitle */
64 } state = State::HEADER;
68 rs.vertical_position.line = 0;
69 rs.vertical_position.reference = TOP_OF_SUBTITLE;
72 auto line = get_line();
77 trim_right_if(*line, boost::is_any_of("\n\r"));
78 remove_unicode_bom(line);
80 /* Keep some history in case there is an error to report */
81 _context.push_back(*line);
82 if (_context.size() > 5) {
88 if (!boost::starts_with(*line, "WEBVTT")) {
89 throw WebVTTError("No WEBVTT header found");
94 if (boost::starts_with(*line, "NOTE")) {
96 } else if (line->find("-->") != string::npos) {
97 /* Further trim this line, removing spaces from the end */
98 trim_right_if(*line, boost::is_any_of(" "));
100 vector<string> parts;
101 boost::algorithm::split(parts, *line, boost::algorithm::is_any_of(" "), boost::token_compress_on);
103 if (parts.size() != 3 && parts.size() != 7) {
104 for (int i = 0; i < 2; ++i) {
105 auto ex = get_line();
107 _context.push_back(*ex);
110 throw WebVTTError(*line, "a time line", _context);
114 auto from = SubripReader::convert_time(parts[0], ".", &expected);
116 throw WebVTTError(parts[0], expected, _context);
120 auto to = SubripReader::convert_time(parts[2], ".", &expected);
122 throw WebVTTError(parts[2], expected, _context);
126 rs.vertical_position.line = 0;
127 state = State::SUBTITLE;
130 case State::SUBTITLE:
134 /* Split up this line on unicode "LINE SEPARATOR". This feels hacky but also
135 * the least unpleasant place to do it.
137 vector<string> sub_lines;
138 boost::algorithm::split_regex(sub_lines, *line, boost::regex("\xe2\x80\xa8"));
139 for (auto sub_line: sub_lines) {
142 rs.vertical_position.line = rs.vertical_position.line.get() + 1;