summaryrefslogtreecommitdiff
path: root/src/subrip_reader.cc
diff options
context:
space:
mode:
authorCarl Hetherington <cth@carlh.net>2022-11-10 11:25:57 +0100
committerCarl Hetherington <cth@carlh.net>2022-11-10 11:25:57 +0100
commitdc36525dc7d430aa00d4de4bd037ae5dbb004b32 (patch)
tree47dd23cadfe380e897d3d63b14ec0d7193553d51 /src/subrip_reader.cc
parent7996d08033ad274770639f4951b40263ded40e86 (diff)
Handle unicode LINE SEPARATOR properly in subrip files.v1.6.37
Diffstat (limited to 'src/subrip_reader.cc')
-rw-r--r--src/subrip_reader.cc13
1 files changed, 11 insertions, 2 deletions
diff --git a/src/subrip_reader.cc b/src/subrip_reader.cc
index 7c7b5c2..f0fe07f 100644
--- a/src/subrip_reader.cc
+++ b/src/subrip_reader.cc
@@ -28,6 +28,7 @@
#include "raw_convert.h"
#include "ssa_reader.h"
#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string_regex.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/regex.hpp>
#include <boost/bind.hpp>
@@ -135,8 +136,16 @@ SubripReader::read (function<optional<string> ()> get_line)
if (line->empty ()) {
state = COUNTER;
} else {
- convert_line (*line, rs);
- rs.vertical_position.line = rs.vertical_position.line.get() + 1;
+ vector<string> sub_lines;
+ /* Split up this line on unicode "LINE SEPARATOR". This feels hacky but also
+ * the least unpleasant place to do it.
+ */
+ boost::algorithm::split_regex(sub_lines, *line, boost::regex("\xe2\x80\xa8"));
+ for (auto sub_line: sub_lines) {
+ convert_line(sub_line, rs);
+ rs.vertical_position.line = rs.vertical_position.line.get() + 1;
+ rs.text.clear();
+ }
}
break;
}