From dc36525dc7d430aa00d4de4bd037ae5dbb004b32 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Thu, 10 Nov 2022 11:25:57 +0100 Subject: Handle unicode LINE SEPARATOR properly in subrip files. --- src/subrip_reader.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src/subrip_reader.cc') diff --git a/src/subrip_reader.cc b/src/subrip_reader.cc index 7c7b5c2..f0fe07f 100644 --- a/src/subrip_reader.cc +++ b/src/subrip_reader.cc @@ -28,6 +28,7 @@ #include "raw_convert.h" #include "ssa_reader.h" #include +#include #include #include #include @@ -135,8 +136,16 @@ SubripReader::read (function ()> get_line) if (line->empty ()) { state = COUNTER; } else { - convert_line (*line, rs); - rs.vertical_position.line = rs.vertical_position.line.get() + 1; + vector sub_lines; + /* Split up this line on unicode "LINE SEPARATOR". This feels hacky but also + * the least unpleasant place to do it. + */ + boost::algorithm::split_regex(sub_lines, *line, boost::regex("\xe2\x80\xa8")); + for (auto sub_line: sub_lines) { + convert_line(sub_line, rs); + rs.vertical_position.line = rs.vertical_position.line.get() + 1; + rs.text.clear(); + } } break; } -- cgit v1.2.3