2 Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
23 #include "dcpomatic_assert.h"
24 #include <unicode/putil.h>
25 #include <unicode/ucol.h>
26 #include <unicode/uiter.h>
27 #include <unicode/utypes.h>
28 #include <unicode/usearch.h>
29 #include <unicode/ustring.h>
30 #include <boost/scoped_array.hpp>
39 Collator::Collator(char const* locale)
41 UErrorCode status = U_ZERO_ERROR;
42 _collator = ucol_open(locale, &status);
44 ucol_setAttribute(_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
45 /* Ignore case and character encoding (and probably some other things) */
46 ucol_setAttribute(_collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
47 ucol_setAttribute(_collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
55 ucol_close (_collator);
61 utf8_to_utf16(string const& utf8)
63 vector<UChar> utf16(utf8.size() + 1);
64 UErrorCode error = U_ZERO_ERROR;
65 u_strFromUTF8(utf16.data(), utf8.size() + 1, nullptr, utf8.c_str(), -1, &error);
66 DCPOMATIC_ASSERT(error == U_ZERO_ERROR);
72 Collator::compare (string const& utf8_a, string const& utf8_b) const
75 auto utf16_a = utf8_to_utf16(utf8_a);
76 auto utf16_b = utf8_to_utf16(utf8_b);
77 return ucol_strcoll(_collator, utf16_a.data(), -1, utf16_b.data(), -1);
79 return strcoll(utf8_a.c_str(), utf8_b.c_str());
85 Collator::find(string pattern, string text) const
88 auto utf16_pattern = utf8_to_utf16(pattern);
89 auto utf16_text = utf8_to_utf16(text);
90 UErrorCode status = U_ZERO_ERROR;
91 auto search = usearch_openFromCollator(utf16_pattern.data(), -1, utf16_text.data(), -1, _collator, nullptr, &status);
92 DCPOMATIC_ASSERT(search);
93 auto const index = usearch_first(search, &status);
94 usearch_close(search);
97 transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
98 transform(text.begin(), text.end(), text.begin(), ::tolower);
99 return pattern.find(text) != string::npos;