From dc36525dc7d430aa00d4de4bd037ae5dbb004b32 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Thu, 10 Nov 2022 11:25:57 +0100 Subject: [PATCH] Handle unicode LINE SEPARATOR properly in subrip files. --- src/subrip_reader.cc | 13 +++++++++++-- test/data/newline.srt | 8 ++++++++ test/subrip_reader_test.cc | 24 ++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 test/data/newline.srt diff --git a/src/subrip_reader.cc b/src/subrip_reader.cc index 7c7b5c2..f0fe07f 100644 --- a/src/subrip_reader.cc +++ b/src/subrip_reader.cc @@ -28,6 +28,7 @@ #include "raw_convert.h" #include "ssa_reader.h" #include +#include #include #include #include @@ -135,8 +136,16 @@ SubripReader::read (function ()> get_line) if (line->empty ()) { state = COUNTER; } else { - convert_line (*line, rs); - rs.vertical_position.line = rs.vertical_position.line.get() + 1; + vector sub_lines; + /* Split up this line on unicode "LINE SEPARATOR". This feels hacky but also + * the least unpleasant place to do it. + */ + boost::algorithm::split_regex(sub_lines, *line, boost::regex("\xe2\x80\xa8")); + for (auto sub_line: sub_lines) { + convert_line(sub_line, rs); + rs.vertical_position.line = rs.vertical_position.line.get() + 1; + rs.text.clear(); + } } break; } diff --git a/test/data/newline.srt b/test/data/newline.srt new file mode 100644 index 0000000..a7b1660 --- /dev/null +++ b/test/data/newline.srt @@ -0,0 +1,8 @@ +1 +00:01:08,234 --> 00:01:10,570 +Du fühlst dich danach besser.
Okay, Kleiner? + +2 +00:01:40,642 --> 00:01:42,769 +Sie kann es nicht machen +wenn du dich bewegst. diff --git a/test/subrip_reader_test.cc b/test/subrip_reader_test.cc index 2daf84a..b364d57 100644 --- a/test/subrip_reader_test.cc +++ b/test/subrip_reader_test.cc @@ -656,3 +656,27 @@ BOOST_AUTO_TEST_CASE (subrip_reader_test6) r._subs.clear (); } + +BOOST_AUTO_TEST_CASE(subrip_with_unicode_line_separator_test) +{ + auto f = fopen ("test/data/newline.srt", "r"); + BOOST_REQUIRE(f); + sub::SubripReader reader(f); + fclose(f); + auto subs = sub::collect>(reader.subtitles()); + + BOOST_REQUIRE_EQUAL(subs.size(), 2U); + + BOOST_REQUIRE_EQUAL(subs[0].lines.size(), 2U); + BOOST_REQUIRE_EQUAL(subs[0].lines[0].blocks.size(), 1U); + BOOST_CHECK_EQUAL(subs[0].lines[0].blocks[0].text, "Du fühlst dich danach besser."); + BOOST_REQUIRE_EQUAL(subs[0].lines[1].blocks.size(), 1U); + BOOST_CHECK_EQUAL(subs[0].lines[1].blocks[0].text, "Okay, Kleiner?"); + + BOOST_REQUIRE_EQUAL(subs[1].lines.size(), 2U); + BOOST_REQUIRE_EQUAL(subs[1].lines[0].blocks.size(), 1U); + BOOST_CHECK_EQUAL(subs[1].lines[0].blocks[0].text, "Sie kann es nicht machen"); + BOOST_REQUIRE_EQUAL(subs[1].lines[1].blocks.size(), 1U); + BOOST_CHECK_EQUAL(subs[1].lines[1].blocks[0].text, "wenn du dich bewegst."); +} + -- 2.30.2