diff options
| author | Carl Hetherington <cth@carlh.net> | 2022-11-10 11:25:57 +0100 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2022-11-10 11:25:57 +0100 |
| commit | dc36525dc7d430aa00d4de4bd037ae5dbb004b32 (patch) | |
| tree | 47dd23cadfe380e897d3d63b14ec0d7193553d51 /src/subrip_reader.cc | |
| parent | 7996d08033ad274770639f4951b40263ded40e86 (diff) | |
Handle unicode LINE SEPARATOR properly in subrip files.v1.6.37
Diffstat (limited to 'src/subrip_reader.cc')
| -rw-r--r-- | src/subrip_reader.cc | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/src/subrip_reader.cc b/src/subrip_reader.cc index 7c7b5c2..f0fe07f 100644 --- a/src/subrip_reader.cc +++ b/src/subrip_reader.cc @@ -28,6 +28,7 @@ #include "raw_convert.h" #include "ssa_reader.h" #include <boost/algorithm/string.hpp> +#include <boost/algorithm/string_regex.hpp> #include <boost/lexical_cast.hpp> #include <boost/regex.hpp> #include <boost/bind.hpp> @@ -135,8 +136,16 @@ SubripReader::read (function<optional<string> ()> get_line) if (line->empty ()) { state = COUNTER; } else { - convert_line (*line, rs); - rs.vertical_position.line = rs.vertical_position.line.get() + 1; + vector<string> sub_lines; + /* Split up this line on unicode "LINE SEPARATOR". This feels hacky but also + * the least unpleasant place to do it. + */ + boost::algorithm::split_regex(sub_lines, *line, boost::regex("\xe2\x80\xa8")); + for (auto sub_line: sub_lines) { + convert_line(sub_line, rs); + rs.vertical_position.line = rs.vertical_position.line.get() + 1; + rs.text.clear(); + } } break; } |
