From 61c6b8a23ef6381c2d1b1aaa5bfc1e31e35a7ec0 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Wed, 15 Apr 2026 22:55:53 +0200 Subject: Move utf8_strlen() in from DoM. --- src/util.cc | 21 +++++++++++++++++++++ src/util.h | 2 ++ test/util_test.cc | 9 +++++++++ 3 files changed, 32 insertions(+) diff --git a/src/util.cc b/src/util.cc index 1984595d..61b9ba66 100644 --- a/src/util.cc +++ b/src/util.cc @@ -471,3 +471,24 @@ dcp::maybe_throw_from_asdcplib(Kumu::Result_t result, boost::filesystem::path pa } } + +size_t +dcp::utf8_strlen(string s) +{ + size_t const len = s.length(); + int N = 0; + for (size_t i = 0; i < len; ++i) { + unsigned char c = s[i]; + if ((c & 0xe0) == 0xc0) { + ++i; + } else if ((c & 0xf0) == 0xe0) { + i += 2; + } else if ((c & 0xf8) == 0xf0) { + i += 3; + } + ++N; + } + return N; +} + + diff --git a/src/util.h b/src/util.h index cfed9fcb..62c0e295 100644 --- a/src/util.h +++ b/src/util.h @@ -171,6 +171,8 @@ void throw_from_asdcplib(Kumu::Result_t result, boost::filesystem::path path, T boost::throw_exception(general); } +extern size_t utf8_strlen(std::string s); + } diff --git a/test/util_test.cc b/test/util_test.cc index 5780f898..69f4c891 100644 --- a/test/util_test.cc +++ b/test/util_test.cc @@ -276,3 +276,12 @@ BOOST_AUTO_TEST_CASE (unique_string_test) existing.push_back (s); } } + + +BOOST_AUTO_TEST_CASE(utf8_strlen_test) +{ + BOOST_CHECK_EQUAL(dcp::utf8_strlen("hello world"), 11U); + BOOST_CHECK_EQUAL(dcp::utf8_strlen("hëllo world"), 11U); + BOOST_CHECK_EQUAL(dcp::utf8_strlen("hëłlo wørld"), 11U); +} + -- cgit v1.2.3