From 014bb19876f5b26b9802fa42b573c333ebc09139 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Wed, 23 Jan 2019 21:44:20 +0000 Subject: Strip Unicode U+202B (right-to-left-embedding) code; it looks like DoM does RTL (at least) partially correctly without this. --- src/subrip_reader.cc | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/subrip_reader.cc b/src/subrip_reader.cc index 02ee20a..b5d0446 100644 --- a/src/subrip_reader.cc +++ b/src/subrip_reader.cc @@ -42,6 +42,7 @@ using boost::lexical_cast; using boost::to_upper; using boost::optional; using boost::function; +using boost::algorithm::replace_all; using namespace sub; /** @param s Subtitle string encoded in UTF-8 */ @@ -233,6 +234,11 @@ SubripReader::convert_line (string t, RawSubtitle& p) } } + /* Strip Unicode U+202B (right-to-left embedding) as sometimes it is rendered + as a missing character. This may be a hack. + */ + replace_all (p.text, "\xe2\x80\xab", ""); + maybe_content (p); } -- cgit v1.2.3