X-Git-Url: https://git.carlh.net/gitweb/?a=blobdiff_plain;f=src%2Flib%2Ftext_decoder.cc;h=1ecdcd34783ae8050fbb6c282d27a511aabcef44;hb=3e7540182e9549b1f64a0129700a297b66b21cf0;hp=c691f795d2d5133f7b64e63a44275a72ce0abf30;hpb=486038a7eb23498e1e5097e73a32929d889ca505;p=dcpomatic.git diff --git a/src/lib/text_decoder.cc b/src/lib/text_decoder.cc index c691f795d..1ecdcd347 100644 --- a/src/lib/text_decoder.cc +++ b/src/lib/text_decoder.cc @@ -65,20 +65,6 @@ TextDecoder::emit_bitmap_start (ContentBitmapText const& bitmap) } -static -string -escape_text (string text) -{ - /* We must escape some things, otherwise they might confuse our subtitle - renderer (which uses entities and some HTML-esque markup to do bold/italic etc.) - */ - boost::algorithm::replace_all(text, "&", "&"); - boost::algorithm::replace_all(text, "<", "<"); - boost::algorithm::replace_all(text, ">", ">"); - return text; -} - - static void set_forced_appearance(shared_ptr content, StringText& subtitle) @@ -101,8 +87,47 @@ set_forced_appearance(shared_ptr content, StringText& subtitl } +string +TextDecoder::remove_invalid_characters_for_xml(string text) +{ + string output; + + /* https://www.w3.org/TR/REC-xml/#charsets says that XML may only contain 0x9, 0xa, 0xd below 0x32. + * Not sure if we should be doing direct UTF-8 manipulation here. + */ + for (size_t i = 0; i < text.length(); ++i) { + auto const c = text[i]; + if ((c & 0xe0) == 0xc0) { + // start of 2-byte code point + output += c; + output += text[i + 1]; + ++i; + } else if ((c & 0xf0) == 0xe0) { + // start of 3-byte code point + output += c; + output += text[i + 1]; + output += text[i + 2]; + i += 2; + } else if ((c & 0xf8) == 0xf0) { + // start of 4-byte code point + output += c; + output += text[i + 1]; + output += text[i + 2]; + output += text[i + 3]; + i += 3; + } else { + if (c >= 0x20 || c == 0x9 || c == 0xa || c == 0xd) { + output += c; + } + } + } + + return output; +} + + void -TextDecoder::emit_plain_start (ContentTime from, vector subtitles, dcp::Standard valign_standard) +TextDecoder::emit_plain_start(ContentTime from, vector subtitles, dcp::SubtitleStandard valign_standard) { vector string_texts; @@ -110,10 +135,10 @@ TextDecoder::emit_plain_start (ContentTime from, vector sub auto string_text = StringText( subtitle, content()->outline_width(), - subtitle.font() ? content()->get_font(*subtitle.font()) : shared_ptr(), + content()->get_font(subtitle.font().get_value_or("")), valign_standard ); - string_text.set_text(escape_text(string_text.text())); + string_text.set_text(remove_invalid_characters_for_xml(string_text.text())); set_forced_appearance(content(), string_text); string_texts.push_back(string_text); } @@ -167,13 +192,20 @@ TextDecoder::emit_plain_start (ContentTime from, sub::Subtitle const & sub_subti switch (line.vertical_position.reference.get_value_or(sub::BOTTOM_OF_SCREEN)) { case sub::BOTTOM_OF_SCREEN: case sub::TOP_OF_SUBTITLE: - /* This 0.9 is an arbitrary value to lift the bottom sub off the bottom + /* This 0.1 is an arbitrary value to lift the bottom sub off the bottom of the screen a bit to a pleasing degree. */ - v_position = 0.9 - + v_position = 0.1 + (1 + bottom_line.get() - line.vertical_position.line.get()) * multiplier; - v_align = dcp::VAlign::TOP; + /* Align our subtitles to the bottom of the screen, because if we are making a SMPTE + * DCP and the projection system uses the wrong standard to interpret vertical position, + * a bottom-aligned subtitle will be less wrong than a top-aligned one. This is because + * in the top-aligned case the difference will be the distance between bbox top an + * baseline, but in the bottom-aligned case the difference will be between bbox bottom + * and baseline (which is shorter). + */ + v_align = dcp::VAlign::BOTTOM; break; case sub::TOP_OF_SCREEN: /* This 0.1 is another fudge factor to bring the top line away from the top of the screen a little */ @@ -240,12 +272,16 @@ TextDecoder::emit_plain_start (ContentTime from, sub::Subtitle const & sub_subti content by the other emit_plain_start() above. */ + auto dcp_colour = [](sub::Colour const& c) { + return dcp::Colour(lrintf(c.r * 255), lrintf(c.g * 255), lrintf(c.b * 255)); + }; + auto dcp_subtitle = dcp::SubtitleString( optional(), block.italic, block.bold, block.underline, - block.colour.dcp(), + dcp_colour(block.colour), block.font_size.points (72 * 11), 1.0, dcp::Time (from.seconds(), 1000), @@ -255,10 +291,11 @@ TextDecoder::emit_plain_start (ContentTime from, sub::Subtitle const & sub_subti h_align, v_position, v_align, + 0, dcp::Direction::LTR, - escape_text(block.text), + remove_invalid_characters_for_xml(block.text), dcp::Effect::NONE, - block.effect_colour.get_value_or(sub::Colour(0, 0, 0)).dcp(), + dcp_colour(block.effect_colour.get_value_or(sub::Colour(0, 0, 0))), /* Hack: we should use subtitle.fade_up and subtitle.fade_down here but the times of these often don't have a frame rate associated with them so the sub::Time won't convert them to milliseconds without @@ -274,7 +311,7 @@ TextDecoder::emit_plain_start (ContentTime from, sub::Subtitle const & sub_subti dcp_subtitle, content()->outline_width(), content()->get_font(block.font.get_value_or("")), - dcp::Standard::SMPTE + dcp::SubtitleStandard::SMPTE_2014 ); set_forced_appearance(content(), string_text); string_texts.push_back(string_text); @@ -294,7 +331,7 @@ TextDecoder::emit_stop (ContentTime to) void -TextDecoder::emit_plain (ContentTimePeriod period, vector subtitles, dcp::Standard valign_standard) +TextDecoder::emit_plain(ContentTimePeriod period, vector subtitles, dcp::SubtitleStandard valign_standard) { emit_plain_start (period.from, subtitles, valign_standard); emit_stop (period.to);