}
+string
+TextDecoder::remove_invalid_characters_for_xml(string text)
+{
+ string output;
+
+ /* https://www.w3.org/TR/REC-xml/#charsets says that XML may only contain 0x9, 0xa, 0xd below 0x32.
+ * Not sure if we should be doing direct UTF-8 manipulation here.
+ */
+ for (size_t i = 0; i < text.length(); ++i) {
+ auto const c = text[i];
+ if ((c & 0xe0) == 0xc0) {
+ // start of 2-byte code point
+ output += c;
+ output += text[i + 1];
+ ++i;
+ } else if ((c & 0xf0) == 0xe0) {
+ // start of 3-byte code point
+ output += c;
+ output += text[i + 1];
+ output += text[i + 2];
+ i += 2;
+ } else if ((c & 0xf8) == 0xf0) {
+ // start of 4-byte code point
+ output += c;
+ output += text[i + 1];
+ output += text[i + 2];
+ output += text[i + 3];
+ i += 3;
+ } else {
+ if (c >= 0x20 || c == 0x9 || c == 0xa || c == 0xd) {
+ output += c;
+ }
+ }
+ }
+
+ return output;
+}
+
+
void
-TextDecoder::emit_plain_start (ContentTime from, vector<dcp::SubtitleString> subtitles, dcp::Standard valign_standard)
+TextDecoder::emit_plain_start(ContentTime from, vector<dcp::SubtitleString> subtitles, dcp::SubtitleStandard valign_standard)
{
vector<StringText> string_texts;
content()->get_font(subtitle.font().get_value_or("")),
valign_standard
);
- string_text.set_text(string_text.text());
+ string_text.set_text(remove_invalid_characters_for_xml(string_text.text()));
set_forced_appearance(content(), string_text);
string_texts.push_back(string_text);
}
switch (line.vertical_position.reference.get_value_or(sub::BOTTOM_OF_SCREEN)) {
case sub::BOTTOM_OF_SCREEN:
case sub::TOP_OF_SUBTITLE:
- /* This 0.9 is an arbitrary value to lift the bottom sub off the bottom
+ /* This 0.1 is an arbitrary value to lift the bottom sub off the bottom
of the screen a bit to a pleasing degree.
*/
- v_position = 0.9 -
+ v_position = 0.1 +
(1 + bottom_line.get() - line.vertical_position.line.get()) * multiplier;
- v_align = dcp::VAlign::TOP;
+ /* Align our subtitles to the bottom of the screen, because if we are making a SMPTE
+ * DCP and the projection system uses the wrong standard to interpret vertical position,
+ * a bottom-aligned subtitle will be less wrong than a top-aligned one. This is because
+ * in the top-aligned case the difference will be the distance between bbox top an
+ * baseline, but in the bottom-aligned case the difference will be between bbox bottom
+ * and baseline (which is shorter).
+ */
+ v_align = dcp::VAlign::BOTTOM;
break;
case sub::TOP_OF_SCREEN:
/* This 0.1 is another fudge factor to bring the top line away from the top of the screen a little */
v_align,
0,
dcp::Direction::LTR,
- block.text,
+ remove_invalid_characters_for_xml(block.text),
dcp::Effect::NONE,
dcp_colour(block.effect_colour.get_value_or(sub::Colour(0, 0, 0))),
/* Hack: we should use subtitle.fade_up and subtitle.fade_down here
dcp_subtitle,
content()->outline_width(),
content()->get_font(block.font.get_value_or("")),
- dcp::Standard::SMPTE
+ dcp::SubtitleStandard::SMPTE_2014
);
set_forced_appearance(content(), string_text);
string_texts.push_back(string_text);
void
-TextDecoder::emit_plain (ContentTimePeriod period, vector<dcp::SubtitleString> subtitles, dcp::Standard valign_standard)
+TextDecoder::emit_plain(ContentTimePeriod period, vector<dcp::SubtitleString> subtitles, dcp::SubtitleStandard valign_standard)
{
emit_plain_start (period.from, subtitles, valign_standard);
emit_stop (period.to);