}
+string
+TextDecoder::remove_invalid_characters_for_xml(string text)
+{
+ string output;
+
+ /* https://www.w3.org/TR/REC-xml/#charsets says that XML may only contain 0x9, 0xa, 0xd below 0x32.
+ * Not sure if we should be doing direct UTF-8 manipulation here.
+ */
+ for (size_t i = 0; i < text.length(); ++i) {
+ auto const c = text[i];
+ if ((c & 0xe0) == 0xc0) {
+ // start of 2-byte code point
+ output += c;
+ output += text[i + 1];
+ ++i;
+ } else if ((c & 0xf0) == 0xe0) {
+ // start of 3-byte code point
+ output += c;
+ output += text[i + 1];
+ output += text[i + 2];
+ i += 2;
+ } else if ((c & 0xf8) == 0xf0) {
+ // start of 4-byte code point
+ output += c;
+ output += text[i + 1];
+ output += text[i + 2];
+ output += text[i + 3];
+ i += 3;
+ } else {
+ if (c >= 0x20 || c == 0x9 || c == 0xa || c == 0xd) {
+ output += c;
+ }
+ }
+ }
+
+ return output;
+}
+
+
void
TextDecoder::emit_plain_start(ContentTime from, vector<dcp::SubtitleString> subtitles, dcp::SubtitleStandard valign_standard)
{
content()->get_font(subtitle.font().get_value_or("")),
valign_standard
);
- string_text.set_text(string_text.text());
+ string_text.set_text(remove_invalid_characters_for_xml(string_text.text()));
set_forced_appearance(content(), string_text);
string_texts.push_back(string_text);
}
v_align,
0,
dcp::Direction::LTR,
- block.text,
+ remove_invalid_characters_for_xml(block.text),
dcp::Effect::NONE,
dcp_colour(block.effect_colour.get_value_or(sub::Colour(0, 0, 0))),
/* Hack: we should use subtitle.fade_up and subtitle.fade_down here
return _content;
}
+ static std::string remove_invalid_characters_for_xml(std::string text);
+
boost::signals2::signal<void (ContentBitmapText)> BitmapStart;
boost::signals2::signal<void (ContentStringText)> PlainStart;
boost::signals2::signal<void (dcpomatic::ContentTime)> Stop;
}
+/** A control code in a .srt file should not make it into the XML */
+BOOST_AUTO_TEST_CASE(srt_subtitle_control_code)
+{
+ std::ofstream srt("build/test/srt_subtitle_control_code.srt");
+ srt << "1\n";
+ srt << "00:00:01,000 -> 00:00:10,000\n";
+ srt << "Hello \x0c world\n";
+ srt.close();
+
+ auto content = make_shared<StringTextFileContent>("build/test/srt_subtitle_control_code.srt");
+ auto film = new_test_film2("srt_subtitle_control_code", { content });
+ film->set_interop(false);
+ content->only_text()->set_use(true);
+ content->only_text()->set_burn(false);
+ make_and_verify_dcp (
+ film,
+ {
+ dcp::VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE,
+ dcp::VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME,
+ dcp::VerificationNote::Code::MISSING_CPL_METADATA,
+ });
+}
+
+
#if 0
/* XXX: this is disabled; there is some difference in font rendering
between the test machine and others.
--- /dev/null
+/*
+ Copyright (C) 2023 Carl Hetherington <cth@carlh.net>
+
+ This file is part of DCP-o-matic.
+
+ DCP-o-matic is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ DCP-o-matic is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+
+#include "lib/text_decoder.h"
+#include <boost/test/unit_test.hpp>
+
+
+BOOST_AUTO_TEST_CASE(strip_invalid_characters_for_xml_test)
+{
+ BOOST_CHECK_EQUAL(TextDecoder::remove_invalid_characters_for_xml("hello world"), "hello world");
+ BOOST_CHECK_EQUAL(TextDecoder::remove_invalid_characters_for_xml("hello\x0cworld"), "helloworld");
+ BOOST_CHECK_EQUAL(TextDecoder::remove_invalid_characters_for_xml("𒀖hello\x02worl𒁝d"), "𒀖helloworl𒁝d");
+ BOOST_CHECK_EQUAL(TextDecoder::remove_invalid_characters_for_xml("😀œ´®†¥¨ˆø\x09π¬˚∆\x1a˙©ƒ∂ßåΩ≈ç√∫\x02˜µ≤ユーザーコードa"), "😀œ´®†¥¨ˆø\x09π¬˚∆˙©ƒ∂ßåΩ≈ç√∫˜µ≤ユーザーコードa");
+}
subtitle_timing_test.cc
subtitle_trim_test.cc
test.cc
+ text_decoder_test.cc
threed_test.cc
time_calculation_test.cc
torture_test.cc