Add verify check for empty <Text> nodes in timed text.
authorCarl Hetherington <cth@carlh.net>
Tue, 10 Aug 2021 00:10:49 +0000 (02:10 +0200)
committerCarl Hetherington <cth@carlh.net>
Wed, 11 Aug 2021 21:35:46 +0000 (23:35 +0200)
src/verify.cc
src/verify.h
test/data/empty_but_with_children.xml [new file with mode: 0644]
test/data/empty_text.mxf [new file with mode: 0644]
test/data/empty_with_empty_children.xml [new file with mode: 0644]
test/verify_test.cc

index c8307aa5c85b711c0aa3861325618646927bd5bb..e7a55d4c833ff478d60a6bd5aac4c6b4d1471048 100644 (file)
@@ -778,6 +778,7 @@ verify_closed_caption_asset (
 }
 
 
+/** Check the timing of the individual subtitles and make sure there are no empty <Text> nodes */
 static
 void
 verify_text_details (
@@ -795,11 +796,12 @@ verify_text_details (
        auto too_close = false;
        auto too_early = false;
        auto reel_overlap = false;
+       auto empty_text = false;
        /* current reel start time (in editable units) */
        int64_t reel_offset = 0;
 
        std::function<void (cxml::ConstNodePtr, optional<int>, optional<Time>, int, bool)> parse;
-       parse = [&parse, &last_out, &too_short, &too_close, &too_early, &reel_offset](cxml::ConstNodePtr node, optional<int> tcr, optional<Time> start_time, int er, bool first_reel) {
+       parse = [&parse, &last_out, &too_short, &too_close, &too_early, &empty_text, &reel_offset](cxml::ConstNodePtr node, optional<int> tcr, optional<Time> start_time, int er, bool first_reel) {
                if (node->name() == "Subtitle") {
                        Time in (node->string_attribute("TimeIn"), tcr);
                        if (start_time) {
@@ -824,11 +826,26 @@ verify_text_details (
                                }
                        }
                        last_out = reel_offset + out.as_editable_units_floor(er);
-               } else {
-                       for (auto i: node->node_children()) {
-                               parse(i, tcr, start_time, er, first_reel);
+               } else if (node->name() == "Text") {
+                       std::function<bool (cxml::ConstNodePtr)> node_has_content = [&](cxml::ConstNodePtr node) {
+                               if (!node->content().empty()) {
+                                       return true;
+                               }
+                               for (auto i: node->node_children()) {
+                                       if (node_has_content(i)) {
+                                               return true;
+                                       }
+                               }
+                               return false;
+                       };
+                       if (!node_has_content(node)) {
+                               empty_text = true;
                        }
                }
+
+               for (auto i: node->node_children()) {
+                       parse(i, tcr, start_time, er, first_reel);
+               }
        };
 
        for (auto i = 0U; i < reels.size(); ++i) {
@@ -896,6 +913,12 @@ verify_text_details (
                        VerificationNote::Type::ERROR, VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY
                });
        }
+
+       if (empty_text) {
+               notes.push_back ({
+                       VerificationNote::Type::WARNING, VerificationNote::Code::EMPTY_TEXT
+               });
+       }
 }
 
 
@@ -1626,6 +1649,8 @@ dcp::note_to_string (VerificationNote note)
        }
        case VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED:
                return "Some aspect of this DCP could not be checked because it is encrypted.";
+       case VerificationNote::Code::EMPTY_TEXT:
+               return "There is an empty <Text> node in a subtitle or closed caption.";
        }
 
        return "";
index c0491422301c33c3142b0186f4142c2941907b39..045c12c2de0c8a869622888a84b51c7d72384bff 100644 (file)
@@ -381,6 +381,8 @@ public:
                MISMATCHED_TIMED_TEXT_DURATION,
                /** Something could not be verified because content is encrypted and no key is available */
                MISSED_CHECK_OF_ENCRYPTED,
+               /** Some timed-text XML has an empty <Text> node */
+               EMPTY_TEXT
        };
 
        VerificationNote (Type type, Code code)
diff --git a/test/data/empty_but_with_children.xml b/test/data/empty_but_with_children.xml
new file mode 100644 (file)
index 0000000..74f3ddb
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<DCSubtitle Version="1.0"><SubtitleID>04a77cb2-3d44-4fea-8817-ce9feb5ad79b</SubtitleID><MovieTitle>DCP</MovieTitle><ReelNumber>1</ReelNumber><Language>Unknown</Language><LoadFont Id="font0" URI="font_0.ttf"/><Font AspectAdjust="1.0" Color="FFFFFFFF" Effect="none" EffectColor="FF000000" Id="font0" Script="normal" Size="42" Underlined="no" Weight="normal"><Subtitle SpotNumber="1" TimeIn="00:00:00:208" TimeOut="00:00:06:031" FadeUpTime="0" FadeDownTime="0"><Text VAlign="bottom" VPosition="10"><Font Italic="no">This</Font><Font Italic="yes"> is </Font><Font Italic="no">wrong.</Font></Text></Subtitle></Font></DCSubtitle>
diff --git a/test/data/empty_text.mxf b/test/data/empty_text.mxf
new file mode 100644 (file)
index 0000000..b60c04a
Binary files /dev/null and b/test/data/empty_text.mxf differ
diff --git a/test/data/empty_with_empty_children.xml b/test/data/empty_with_empty_children.xml
new file mode 100644 (file)
index 0000000..1e58f20
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<DCSubtitle Version="1.0"><SubtitleID>04a77cb2-3d44-4fea-8817-ce9feb5ad79b</SubtitleID><MovieTitle>DCP</MovieTitle><ReelNumber>1</ReelNumber><Language>Unknown</Language><LoadFont Id="font0" URI="font_0.ttf"/><Font AspectAdjust="1.0" Color="FFFFFFFF" Effect="none" EffectColor="FF000000" Id="font0" Script="normal" Size="42" Underlined="no" Weight="normal"><Subtitle SpotNumber="1" TimeIn="00:00:00:208" TimeOut="00:00:06:031" FadeUpTime="0" FadeDownTime="0"><Text VAlign="bottom" VPosition="10"><Font Italic="no"></Font><Font Italic="yes"></Font><Font Italic="no"></Font></Text></Subtitle></Font></DCSubtitle>
index 59777fe59d025323d1c19b0702af87ae43807cc9..ec4dea9dba13a42a3ea907b39de594d03bcd9efd 100644 (file)
@@ -760,6 +760,63 @@ BOOST_AUTO_TEST_CASE (verify_invalid_smpte_subtitles)
 }
 
 
+BOOST_AUTO_TEST_CASE (verify_empty_text_node_in_subtitles)
+{
+       path const dir("build/test/verify_empty_text_node_in_subtitles");
+       prepare_directory (dir);
+       copy_file ("test/data/empty_text.mxf", dir / "subs.mxf");
+       auto asset = make_shared<dcp::SMPTESubtitleAsset>(dir / "subs.mxf");
+       auto reel_asset = make_shared<dcp::ReelSMPTESubtitleAsset>(asset, dcp::Fraction(24, 1), 192, 0);
+       auto cpl = write_dcp_with_single_asset (dir, reel_asset);
+
+       check_verify_result (
+               { dir },
+               {
+                       { dcp::VerificationNote::Type::WARNING, dcp::VerificationNote::Code::EMPTY_TEXT },
+                       { dcp::VerificationNote::Type::WARNING, dcp::VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME },
+                       { dcp::VerificationNote::Type::BV21_ERROR, dcp::VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, canonical(dir / "subs.mxf") },
+                       { dcp::VerificationNote::Type::BV21_ERROR, dcp::VerificationNote::Code::MISSING_CPL_METADATA, cpl->id(), cpl->file().get() },
+               });
+}
+
+
+/** A <Text> node with no content except some <Font> nodes, which themselves do have content */
+BOOST_AUTO_TEST_CASE (verify_empty_text_node_in_subtitles_with_child_nodes)
+{
+       path const dir("build/test/verify_empty_text_node_in_subtitles_with_child_nodes");
+       prepare_directory (dir);
+       copy_file ("test/data/empty_but_with_children.xml", dir / "subs.xml");
+       auto asset = make_shared<dcp::InteropSubtitleAsset>(dir / "subs.xml");
+       auto reel_asset = make_shared<dcp::ReelInteropSubtitleAsset>(asset, dcp::Fraction(24, 1), 192, 0);
+       auto cpl = write_dcp_with_single_asset (dir, reel_asset, dcp::Standard::INTEROP);
+
+       check_verify_result (
+               { dir },
+               {
+                       { dcp::VerificationNote::Type::BV21_ERROR, dcp::VerificationNote::Code::INVALID_STANDARD },
+               });
+}
+
+
+/** A <Text> node with no content except some <Font> nodes, which themselves also have no content */
+BOOST_AUTO_TEST_CASE (verify_empty_text_node_in_subtitles_with_empty_child_nodes)
+{
+       path const dir("build/test/verify_empty_text_node_in_subtitles_with_empty_child_nodes");
+       prepare_directory (dir);
+       copy_file ("test/data/empty_with_empty_children.xml", dir / "subs.xml");
+       auto asset = make_shared<dcp::InteropSubtitleAsset>(dir / "subs.xml");
+       auto reel_asset = make_shared<dcp::ReelInteropSubtitleAsset>(asset, dcp::Fraction(24, 1), 192, 0);
+       auto cpl = write_dcp_with_single_asset (dir, reel_asset, dcp::Standard::INTEROP);
+
+       check_verify_result (
+               { dir },
+               {
+                       { dcp::VerificationNote::Type::BV21_ERROR, dcp::VerificationNote::Code::INVALID_STANDARD },
+                       { dcp::VerificationNote::Type::WARNING, dcp::VerificationNote::Code::EMPTY_TEXT },
+               });
+}
+
+
 BOOST_AUTO_TEST_CASE (verify_external_asset)
 {
        path const ov_dir("build/test/verify_external_asset");