From 95f4db0a8bb5bbf742a0195fbbe17d304fb99507 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Thu, 17 Jan 2019 22:33:55 +0000 Subject: [PATCH] Read UTF8 string lengths correctly when checking closed captions (part of #1446). --- src/lib/hints.cc | 2 +- src/lib/util.cc | 19 +++++++++++++++++++ src/lib/util.h | 1 + test/util_test.cc | 8 +++++++- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/lib/hints.cc b/src/lib/hints.cc index d961b1a30..6238aa991 100644 --- a/src/lib/hints.cc +++ b/src/lib/hints.cc @@ -311,7 +311,7 @@ Hints::text (PlayerText text, TextType type, DCPTimePeriod period) int lines = text.string.size(); BOOST_FOREACH (StringText i, text.string) { - if (i.text().length() > CLOSED_CAPTION_LENGTH) { + if (utf8_strlen(i.text()) > CLOSED_CAPTION_LENGTH) { ++lines; if (!_long_ccap) { _long_ccap = true; diff --git a/src/lib/util.cc b/src/lib/util.cc index 7472047e8..cd2d2e753 100644 --- a/src/lib/util.cc +++ b/src/lib/util.cc @@ -815,6 +815,25 @@ checked_fread (void* ptr, size_t size, FILE* stream, boost::filesystem::path pat } } +size_t +utf8_strlen (string s) +{ + size_t const len = s.length (); + int N = 0; + for (size_t i = 0; i < len; ++i) { + unsigned char c = s[i]; + if ((c & 0xe0) == 0xc0) { + ++i; + } else if ((c & 0xf0) == 0xe0) { + i += 2; + } else if ((c & 0xf8) == 0xf0) { + i += 3; + } + ++N; + } + return N; +} + #ifdef DCPOMATIC_VARIANT_SWAROOP /* Make up a key from the machine UUID */ diff --git a/src/lib/util.h b/src/lib/util.h index bdf2480fc..7c0eb4b6d 100644 --- a/src/lib/util.h +++ b/src/lib/util.h @@ -100,6 +100,7 @@ extern boost::shared_ptr remap (boost::shared_ptr read_swaroop_chain (boost::filesystem::path path); extern void write_swaroop_chain (boost::shared_ptr chain, boost::filesystem::path output); diff --git a/test/util_test.cc b/test/util_test.cc index ba33b42c5..d8cb61fdc 100644 --- a/test/util_test.cc +++ b/test/util_test.cc @@ -92,6 +92,13 @@ BOOST_AUTO_TEST_CASE (tidy_for_filename_test) BOOST_CHECK_EQUAL (tidy_for_filename ("abcdefghï"), "abcdefghï"); } +BOOST_AUTO_TEST_CASE (utf8_strlen_test) +{ + BOOST_CHECK_EQUAL (utf8_strlen("hello world"), 11); + BOOST_CHECK_EQUAL (utf8_strlen("hëllo world"), 11); + BOOST_CHECK_EQUAL (utf8_strlen("hëłlo wørld"), 11); +} + #ifdef DCPOMATIC_VARIANT_SWAROOP BOOST_AUTO_TEST_CASE (swaroop_chain_test) { @@ -111,5 +118,4 @@ BOOST_AUTO_TEST_CASE (swaroop_chain_test) BOOST_CHECK (cc->root_to_leaf() == back->root_to_leaf()); } - #endif -- 2.30.2