From dc36525dc7d430aa00d4de4bd037ae5dbb004b32 Mon Sep 17 00:00:00 2001
From: Carl Hetherington <cth@carlh.net>
Date: Thu, 10 Nov 2022 11:25:57 +0100
Subject: Handle unicode LINE SEPARATOR properly in subrip files.

---
 src/subrip_reader.cc | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'src/subrip_reader.cc')
diff --git a/src/subrip_reader.cc b/src/subrip_reader.cc
index 7c7b5c2..f0fe07f 100644
--- a/src/subrip_reader.cc
+++ b/src/subrip_reader.cc
@@ -28,6 +28,7 @@
 #include "raw_convert.h"
 #include "ssa_reader.h"
 #include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string_regex.hpp>
 #include <boost/lexical_cast.hpp>
 #include <boost/regex.hpp>
 #include <boost/bind.hpp>
@@ -135,8 +136,16 @@ SubripReader::read (function<optional<string> ()> get_line)
 			if (line->empty ()) {
 				state = COUNTER;
 			} else {
-				convert_line (*line, rs);
-				rs.vertical_position.line = rs.vertical_position.line.get() + 1;
+				vector<string> sub_lines;
+				/* Split up this line on unicode "LINE SEPARATOR".  This feels hacky but also
+				 * the least unpleasant place to do it.
+				 */
+				boost::algorithm::split_regex(sub_lines, *line, boost::regex("\xe2\x80\xa8"));
+				for (auto sub_line: sub_lines) {
+					convert_line(sub_line, rs);
+					rs.vertical_position.line = rs.vertical_position.line.get() + 1;
+					rs.text.clear();
+				}
 			}
 			break;
 		}
-- 
cgit v1.2.3