#include "collator.h"
+#include "dcpomatic_assert.h"
#include <unicode/putil.h>
#include <unicode/ucol.h>
#include <unicode/uiter.h>
#include <unicode/utypes.h>
+#include <unicode/usearch.h>
#include <unicode/ustring.h>
#include <boost/scoped_array.hpp>
#include <cstring>
+#include <vector>
using std::string;
+using std::vector;
-Collator::Collator()
+Collator::Collator(char const* locale)
{
UErrorCode status = U_ZERO_ERROR;
- _collator = ucol_open(nullptr, &status);
+ _collator = ucol_open(locale, &status);
if (_collator) {
ucol_setAttribute(_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
+ /* Ignore case and character encoding (and probably some other things) */
ucol_setAttribute(_collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
ucol_setAttribute(_collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
}
}
+vector<UChar>
+utf8_to_utf16(string const& utf8)
+{
+ vector<UChar> utf16(utf8.size() + 1);
+ UErrorCode error = U_ZERO_ERROR;
+ u_strFromUTF8(utf16.data(), utf8.size() + 1, nullptr, utf8.c_str(), -1, &error);
+ DCPOMATIC_ASSERT(error == U_ZERO_ERROR);
+ return utf16;
+}
+
+
int
-Collator::compare (string const& utf8_a, string const& utf8_b)
+Collator::compare (string const& utf8_a, string const& utf8_b) const
{
if (_collator) {
- UErrorCode error = U_ZERO_ERROR;
- boost::scoped_array<uint16_t> utf16_a(new uint16_t[utf8_a.size() + 1]);
- u_strFromUTF8(reinterpret_cast<UChar*>(utf16_a.get()), utf8_a.size() + 1, nullptr, utf8_a.c_str(), -1, &error);
- boost::scoped_array<uint16_t> utf16_b(new uint16_t[utf8_b.size() + 1]);
- u_strFromUTF8(reinterpret_cast<UChar*>(utf16_b.get()), utf8_b.size() + 1, nullptr, utf8_b.c_str(), -1, &error);
- return ucol_strcoll(_collator, reinterpret_cast<UChar*>(utf16_a.get()), -1, reinterpret_cast<UChar*>(utf16_b.get()), -1);
+ auto utf16_a = utf8_to_utf16(utf8_a);
+ auto utf16_b = utf8_to_utf16(utf8_b);
+ return ucol_strcoll(_collator, utf16_a.data(), -1, utf16_b.data(), -1);
} else {
return strcoll(utf8_a.c_str(), utf8_b.c_str());
}
}
+
+bool
+Collator::find(string pattern, string text) const
+{
+ if (_collator) {
+ auto utf16_pattern = utf8_to_utf16(pattern);
+ auto utf16_text = utf8_to_utf16(text);
+ UErrorCode status = U_ZERO_ERROR;
+ auto search = usearch_openFromCollator(utf16_pattern.data(), -1, utf16_text.data(), -1, _collator, nullptr, &status);
+ DCPOMATIC_ASSERT(search);
+ auto const index = usearch_first(search, &status);
+ usearch_close(search);
+ return index != -1;
+ } else {
+ transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
+ transform(text.begin(), text.end(), text.begin(), ::tolower);
+ return pattern.find(text) != string::npos;
+ }
+}
+