summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarl Hetherington <cth@carlh.net>2022-11-10 11:25:57 +0100
committerCarl Hetherington <cth@carlh.net>2022-11-10 11:25:57 +0100
commitdc36525dc7d430aa00d4de4bd037ae5dbb004b32 (patch)
tree47dd23cadfe380e897d3d63b14ec0d7193553d51
parent7996d08033ad274770639f4951b40263ded40e86 (diff)
Handle unicode LINE SEPARATOR properly in subrip files.v1.6.37
-rw-r--r--src/subrip_reader.cc13
-rw-r--r--test/data/newline.srt8
-rw-r--r--test/subrip_reader_test.cc24
3 files changed, 43 insertions, 2 deletions
diff --git a/src/subrip_reader.cc b/src/subrip_reader.cc
index 7c7b5c2..f0fe07f 100644
--- a/src/subrip_reader.cc
+++ b/src/subrip_reader.cc
@@ -28,6 +28,7 @@
#include "raw_convert.h"
#include "ssa_reader.h"
#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string_regex.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/regex.hpp>
#include <boost/bind.hpp>
@@ -135,8 +136,16 @@ SubripReader::read (function<optional<string> ()> get_line)
if (line->empty ()) {
state = COUNTER;
} else {
- convert_line (*line, rs);
- rs.vertical_position.line = rs.vertical_position.line.get() + 1;
+ vector<string> sub_lines;
+ /* Split up this line on unicode "LINE SEPARATOR". This feels hacky but also
+ * the least unpleasant place to do it.
+ */
+ boost::algorithm::split_regex(sub_lines, *line, boost::regex("\xe2\x80\xa8"));
+ for (auto sub_line: sub_lines) {
+ convert_line(sub_line, rs);
+ rs.vertical_position.line = rs.vertical_position.line.get() + 1;
+ rs.text.clear();
+ }
}
break;
}
diff --git a/test/data/newline.srt b/test/data/newline.srt
new file mode 100644
index 0000000..a7b1660
--- /dev/null
+++ b/test/data/newline.srt
@@ -0,0 +1,8 @@
+1
+00:01:08,234 --> 00:01:10,570
+Du fühlst dich danach besser.
Okay, Kleiner?
+
+2
+00:01:40,642 --> 00:01:42,769
+Sie kann es nicht machen
+wenn du dich bewegst.
diff --git a/test/subrip_reader_test.cc b/test/subrip_reader_test.cc
index 2daf84a..b364d57 100644
--- a/test/subrip_reader_test.cc
+++ b/test/subrip_reader_test.cc
@@ -656,3 +656,27 @@ BOOST_AUTO_TEST_CASE (subrip_reader_test6)
r._subs.clear ();
}
+
+BOOST_AUTO_TEST_CASE(subrip_with_unicode_line_separator_test)
+{
+ auto f = fopen ("test/data/newline.srt", "r");
+ BOOST_REQUIRE(f);
+ sub::SubripReader reader(f);
+ fclose(f);
+ auto subs = sub::collect<std::vector<sub::Subtitle>>(reader.subtitles());
+
+ BOOST_REQUIRE_EQUAL(subs.size(), 2U);
+
+ BOOST_REQUIRE_EQUAL(subs[0].lines.size(), 2U);
+ BOOST_REQUIRE_EQUAL(subs[0].lines[0].blocks.size(), 1U);
+ BOOST_CHECK_EQUAL(subs[0].lines[0].blocks[0].text, "Du fühlst dich danach besser.");
+ BOOST_REQUIRE_EQUAL(subs[0].lines[1].blocks.size(), 1U);
+ BOOST_CHECK_EQUAL(subs[0].lines[1].blocks[0].text, "Okay, Kleiner?");
+
+ BOOST_REQUIRE_EQUAL(subs[1].lines.size(), 2U);
+ BOOST_REQUIRE_EQUAL(subs[1].lines[0].blocks.size(), 1U);
+ BOOST_CHECK_EQUAL(subs[1].lines[0].blocks[0].text, "Sie kann es nicht machen");
+ BOOST_REQUIRE_EQUAL(subs[1].lines[1].blocks.size(), 1U);
+ BOOST_CHECK_EQUAL(subs[1].lines[1].blocks[0].text, "wenn du dich bewegst.");
+}
+