summaryrefslogtreecommitdiff
path: root/src/lib/collator.cc
blob: 8de1857abeb6c24c988f9e5b2536be94b67b2c6a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*
    Copyright (C) 2022 Carl Hetherington <cth@carlh.net>

    This file is part of DCP-o-matic.

    DCP-o-matic is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    DCP-o-matic is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.

*/


#include "collator.h"
#include "dcpomatic_assert.h"
#include <unicode/putil.h>
#include <unicode/ucol.h>
#include <unicode/uiter.h>
#include <unicode/utypes.h>
#include <unicode/usearch.h>
#include <unicode/ustring.h>
#include <boost/scoped_array.hpp>
#include <cstring>
#include <vector>


using std::string;
using std::vector;


Collator::Collator(char const* locale)
{
	UErrorCode status = U_ZERO_ERROR;
	_collator = ucol_open(locale, &status);
	if (_collator) {
		ucol_setAttribute(_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
		/* Ignore case and character encoding (and probably some other things) */
		ucol_setAttribute(_collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
		ucol_setAttribute(_collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
	}
}


Collator::~Collator()
{
	if (_collator) {
		ucol_close (_collator);
	}
}


vector<UChar>
utf8_to_utf16(string const& utf8)
{
	vector<UChar> utf16(utf8.size() + 1);
	UErrorCode error = U_ZERO_ERROR;
	u_strFromUTF8(utf16.data(), utf8.size() + 1, nullptr, utf8.c_str(), -1, &error);
	DCPOMATIC_ASSERT(error == U_ZERO_ERROR);
	return utf16;
}


int
Collator::compare (string const& utf8_a, string const& utf8_b) const
{
	if (_collator) {
		auto utf16_a = utf8_to_utf16(utf8_a);
		auto utf16_b = utf8_to_utf16(utf8_b);
		return ucol_strcoll(_collator, utf16_a.data(), -1, utf16_b.data(), -1);
	} else {
		return strcoll(utf8_a.c_str(), utf8_b.c_str());
	}
}


bool
Collator::find(string pattern, string text) const
{
	if (_collator) {
		auto utf16_pattern = utf8_to_utf16(pattern);
		auto utf16_text = utf8_to_utf16(text);
		UErrorCode status = U_ZERO_ERROR;
		auto search = usearch_openFromCollator(utf16_pattern.data(), -1, utf16_text.data(), -1, _collator, nullptr, &status);
		DCPOMATIC_ASSERT(search);
		auto const index = usearch_first(search, &status);
		usearch_close(search);
		return index != -1;
	} else {
		transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
		transform(text.begin(), text.end(), text.begin(), ::tolower);
		return pattern.find(text) != string::npos;
	}
}