Fix crashes when using templates in some cases (#2491).
[dcpomatic.git] / src / lib / text_decoder.cc
index 750deb9b30602b952e88876ca27c20f1683d1178..1ecdcd34783ae8050fbb6c282d27a511aabcef44 100644 (file)
@@ -87,8 +87,47 @@ set_forced_appearance(shared_ptr<const TextContent> content, StringText& subtitl
 }
 
 
+string
+TextDecoder::remove_invalid_characters_for_xml(string text)
+{
+       string output;
+
+       /* https://www.w3.org/TR/REC-xml/#charsets says that XML may only contain 0x9, 0xa, 0xd below 0x32.
+        * Not sure if we should be doing direct UTF-8 manipulation here.
+        */
+       for (size_t i = 0; i < text.length(); ++i) {
+               auto const c = text[i];
+               if ((c & 0xe0) == 0xc0) {
+                       // start of 2-byte code point
+                       output += c;
+                       output += text[i + 1];
+                       ++i;
+               } else if ((c & 0xf0) == 0xe0) {
+                       // start of 3-byte code point
+                       output += c;
+                       output += text[i + 1];
+                       output += text[i + 2];
+                       i += 2;
+               } else if ((c & 0xf8) == 0xf0) {
+                       // start of 4-byte code point
+                       output += c;
+                       output += text[i + 1];
+                       output += text[i + 2];
+                       output += text[i + 3];
+                       i += 3;
+               } else {
+                       if (c >= 0x20 || c == 0x9 || c == 0xa || c == 0xd) {
+                               output += c;
+                       }
+               }
+       }
+
+       return output;
+}
+
+
 void
-TextDecoder::emit_plain_start (ContentTime from, vector<dcp::SubtitleString> subtitles, dcp::Standard valign_standard)
+TextDecoder::emit_plain_start(ContentTime from, vector<dcp::SubtitleString> subtitles, dcp::SubtitleStandard valign_standard)
 {
        vector<StringText> string_texts;
 
@@ -99,7 +138,7 @@ TextDecoder::emit_plain_start (ContentTime from, vector<dcp::SubtitleString> sub
                        content()->get_font(subtitle.font().get_value_or("")),
                        valign_standard
                        );
-               string_text.set_text(string_text.text());
+               string_text.set_text(remove_invalid_characters_for_xml(string_text.text()));
                set_forced_appearance(content(), string_text);
                string_texts.push_back(string_text);
        }
@@ -153,13 +192,20 @@ TextDecoder::emit_plain_start (ContentTime from, sub::Subtitle const & sub_subti
                                switch (line.vertical_position.reference.get_value_or(sub::BOTTOM_OF_SCREEN)) {
                                case sub::BOTTOM_OF_SCREEN:
                                case sub::TOP_OF_SUBTITLE:
-                                       /* This 0.9 is an arbitrary value to lift the bottom sub off the bottom
+                                       /* This 0.1 is an arbitrary value to lift the bottom sub off the bottom
                                           of the screen a bit to a pleasing degree.
                                           */
-                                       v_position = 0.9 -
+                                       v_position = 0.1 +
                                                (1 + bottom_line.get() - line.vertical_position.line.get()) * multiplier;
 
-                                       v_align = dcp::VAlign::TOP;
+                                       /* Align our subtitles to the bottom of the screen, because if we are making a SMPTE
+                                        * DCP and the projection system uses the wrong standard to interpret vertical position,
+                                        * a bottom-aligned subtitle will be less wrong than a top-aligned one.  This is because
+                                        * in the top-aligned case the difference will be the distance between bbox top an
+                                        * baseline, but in the bottom-aligned case the difference will be between bbox bottom
+                                        * and baseline (which is shorter).
+                                        */
+                                       v_align = dcp::VAlign::BOTTOM;
                                        break;
                                case sub::TOP_OF_SCREEN:
                                        /* This 0.1 is another fudge factor to bring the top line away from the top of the screen a little */
@@ -226,12 +272,16 @@ TextDecoder::emit_plain_start (ContentTime from, sub::Subtitle const & sub_subti
                           content by the other emit_plain_start() above.
                        */
 
+                       auto dcp_colour = [](sub::Colour const& c) {
+                               return dcp::Colour(lrintf(c.r * 255), lrintf(c.g * 255), lrintf(c.b * 255));
+                               };
+
                        auto dcp_subtitle = dcp::SubtitleString(
                                optional<string>(),
                                block.italic,
                                block.bold,
                                block.underline,
-                               block.colour.dcp(),
+                               dcp_colour(block.colour),
                                block.font_size.points (72 * 11),
                                1.0,
                                dcp::Time (from.seconds(), 1000),
@@ -243,9 +293,9 @@ TextDecoder::emit_plain_start (ContentTime from, sub::Subtitle const & sub_subti
                                v_align,
                                0,
                                dcp::Direction::LTR,
-                               block.text,
+                               remove_invalid_characters_for_xml(block.text),
                                dcp::Effect::NONE,
-                               block.effect_colour.get_value_or(sub::Colour(0, 0, 0)).dcp(),
+                               dcp_colour(block.effect_colour.get_value_or(sub::Colour(0, 0, 0))),
                                /* Hack: we should use subtitle.fade_up and subtitle.fade_down here
                                   but the times of these often don't have a frame rate associated
                                   with them so the sub::Time won't convert them to milliseconds without
@@ -261,7 +311,7 @@ TextDecoder::emit_plain_start (ContentTime from, sub::Subtitle const & sub_subti
                                dcp_subtitle,
                                content()->outline_width(),
                                content()->get_font(block.font.get_value_or("")),
-                               dcp::Standard::SMPTE
+                               dcp::SubtitleStandard::SMPTE_2014
                                );
                        set_forced_appearance(content(), string_text);
                        string_texts.push_back(string_text);
@@ -281,7 +331,7 @@ TextDecoder::emit_stop (ContentTime to)
 
 
 void
-TextDecoder::emit_plain (ContentTimePeriod period, vector<dcp::SubtitleString> subtitles, dcp::Standard valign_standard)
+TextDecoder::emit_plain(ContentTimePeriod period, vector<dcp::SubtitleString> subtitles, dcp::SubtitleStandard valign_standard)
 {
        emit_plain_start (period.from, subtitles, valign_standard);
        emit_stop (period.to);