+ /* Filter out `bad' characters which `may' cause problems with some systems (either for DCP name or filename).
+ There's no apparent list of what really is allowed, so this is a guess.
+ Safety first and all that.
+ */
+
+ /* First transliterate using libicu to try to remove accents in a "nice" way */
+ auto transliterated = icu::UnicodeString::fromUTF8(icu::StringPiece(s));
+ auto status = U_ZERO_ERROR;
+ auto transliterator = icu::Transliterator::createInstance("NFD; [:M:] Remove; NFC", UTRANS_FORWARD, status);
+ transliterator->transliterate(transliterated);
+
+ /* Some things are missed by ICU's transliterator */
+ std::map<wchar_t, wchar_t> replacements = {
+ { L'ł', L'l' },
+ { L'Ł', L'L' }
+ };
+
+ icu::UnicodeString transliterated_more;
+ for (int i = 0; i < transliterated.length(); ++i) {
+ auto replacement = replacements.find(transliterated[i]);
+ if (replacement != replacements.end()) {
+ transliterated_more += replacement->second;
+ } else {
+ transliterated_more += transliterated[i];
+ }
+ }
+
+ /* Then remove anything that's not in a very limited character set */
+ wstring out;
+ wstring const allowed = L"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_%.+";
+ for (size_t i = 0; i < transliterated_more.length(); ++i) {
+ wchar_t c = transliterated_more[i];
+ if (allowed.find(c) != string::npos) {
+ out += c;
+ }
+ }
+
+ return boost::locale::conv::utf_to_utf<char>(out);