- BOOST_FOREACH (dcp::SubtitleString& i, s) {
- /* We must escape < and > in strings, otherwise they might confuse our subtitle
- renderer (which uses some HTML-esque markup to do bold/italic etc.)
- */
- string t = i.text ();
- boost::algorithm::replace_all (t, "<", "<");
- boost::algorithm::replace_all (t, ">", ">");
- i.set_text (t);
-
- /* Set any forced appearance */
- if (content()->colour()) {
- i.set_colour (*content()->colour());
- }
- if (content()->effect_colour()) {
- i.set_effect_colour (*content()->effect_colour());
- }
- if (content()->effect()) {
- i.set_effect (*content()->effect());
- }
- if (content()->fade_in()) {
- i.set_fade_up_time (dcp::Time(content()->fade_in()->seconds(), 1000));
- }
- if (content()->fade_out()) {
- i.set_fade_down_time (dcp::Time(content()->fade_out()->seconds(), 1000));
+ if (content->colour()) {
+ subtitle.set_colour(*content->colour());
+ }
+ if (content->effect_colour()) {
+ subtitle.set_effect_colour(*content->effect_colour());
+ }
+ if (content->effect()) {
+ subtitle.set_effect(*content->effect());
+ }
+ if (content->fade_in()) {
+ subtitle.set_fade_up_time(dcp::Time(content->fade_in()->seconds(), 1000));
+ }
+ if (content->fade_out()) {
+ subtitle.set_fade_down_time (dcp::Time(content->fade_out()->seconds(), 1000));
+ }
+}
+
+
+string
+TextDecoder::remove_invalid_characters_for_xml(string text)
+{
+ string output;
+
+ /* https://www.w3.org/TR/REC-xml/#charsets says that XML may only contain 0x9, 0xa, 0xd below 0x32.
+ * Not sure if we should be doing direct UTF-8 manipulation here.
+ */
+ for (size_t i = 0; i < text.length(); ++i) {
+ auto const c = text[i];
+ if ((c & 0xe0) == 0xc0) {
+ // start of 2-byte code point
+ output += c;
+ output += text[i + 1];
+ ++i;
+ } else if ((c & 0xf0) == 0xe0) {
+ // start of 3-byte code point
+ output += c;
+ output += text[i + 1];
+ output += text[i + 2];
+ i += 2;
+ } else if ((c & 0xf8) == 0xf0) {
+ // start of 4-byte code point
+ output += c;
+ output += text[i + 1];
+ output += text[i + 2];
+ output += text[i + 3];
+ i += 3;
+ } else {
+ if (c >= 0x20 || c == 0x9 || c == 0xa || c == 0xd) {
+ output += c;
+ }