Allow Collators to have their language specified, and specify it for tests.
[dcpomatic.git] / src / lib / collator.cc
index 17afbe852c2208f7d1d70cf1c3631c67c570fb70..8de1857abeb6c24c988f9e5b2536be94b67b2c6a 100644 (file)
 
 
 #include "collator.h"
+#include "dcpomatic_assert.h"
 #include <unicode/putil.h>
 #include <unicode/ucol.h>
 #include <unicode/uiter.h>
 #include <unicode/utypes.h>
+#include <unicode/usearch.h>
 #include <unicode/ustring.h>
 #include <boost/scoped_array.hpp>
 #include <cstring>
+#include <vector>
 
 
 using std::string;
+using std::vector;
 
 
-Collator::Collator()
+Collator::Collator(char const* locale)
 {
        UErrorCode status = U_ZERO_ERROR;
-       _collator = ucol_open(nullptr, &status);
+       _collator = ucol_open(locale, &status);
        if (_collator) {
                ucol_setAttribute(_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
+               /* Ignore case and character encoding (and probably some other things) */
                ucol_setAttribute(_collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
                ucol_setAttribute(_collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
        }
@@ -52,18 +57,46 @@ Collator::~Collator()
 }
 
 
+vector<UChar>
+utf8_to_utf16(string const& utf8)
+{
+       vector<UChar> utf16(utf8.size() + 1);
+       UErrorCode error = U_ZERO_ERROR;
+       u_strFromUTF8(utf16.data(), utf8.size() + 1, nullptr, utf8.c_str(), -1, &error);
+       DCPOMATIC_ASSERT(error == U_ZERO_ERROR);
+       return utf16;
+}
+
+
 int
 Collator::compare (string const& utf8_a, string const& utf8_b) const
 {
        if (_collator) {
-               UErrorCode error = U_ZERO_ERROR;
-               boost::scoped_array<uint16_t> utf16_a(new uint16_t[utf8_a.size() + 1]);
-               u_strFromUTF8(reinterpret_cast<UChar*>(utf16_a.get()), utf8_a.size() + 1, nullptr, utf8_a.c_str(), -1, &error);
-               boost::scoped_array<uint16_t> utf16_b(new uint16_t[utf8_b.size() + 1]);
-               u_strFromUTF8(reinterpret_cast<UChar*>(utf16_b.get()), utf8_b.size() + 1, nullptr, utf8_b.c_str(), -1, &error);
-               return ucol_strcoll(_collator, reinterpret_cast<UChar*>(utf16_a.get()), -1, reinterpret_cast<UChar*>(utf16_b.get()), -1);
+               auto utf16_a = utf8_to_utf16(utf8_a);
+               auto utf16_b = utf8_to_utf16(utf8_b);
+               return ucol_strcoll(_collator, utf16_a.data(), -1, utf16_b.data(), -1);
        } else {
                return strcoll(utf8_a.c_str(), utf8_b.c_str());
        }
 }
 
+
+bool
+Collator::find(string pattern, string text) const
+{
+       if (_collator) {
+               auto utf16_pattern = utf8_to_utf16(pattern);
+               auto utf16_text = utf8_to_utf16(text);
+               UErrorCode status = U_ZERO_ERROR;
+               auto search = usearch_openFromCollator(utf16_pattern.data(), -1, utf16_text.data(), -1, _collator, nullptr, &status);
+               DCPOMATIC_ASSERT(search);
+               auto const index = usearch_first(search, &status);
+               usearch_close(search);
+               return index != -1;
+       } else {
+               transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
+               transform(text.begin(), text.end(), text.begin(), ::tolower);
+               return pattern.find(text) != string::npos;
+       }
+}
+