diff options
| author | Carl Hetherington <cth@carlh.net> | 2022-03-15 15:32:49 +0100 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2022-03-19 23:04:35 +0100 |
| commit | ec87fa55c5909ae85429fc5344469cf284e5f6b3 (patch) | |
| tree | 2ea1d5fb73c254dd43844a00796c41bbf7cbd22e | |
| parent | 2516e657c80da672d1ea1ef5b5a189ef8799c070 (diff) | |
Nearly pretty-print subtitle XML (though not in <Text> nodes).
This is an attempt to fix DoM bug #2205.
| -rw-r--r-- | src/interop_subtitle_asset.cc | 2 | ||||
| -rw-r--r-- | src/smpte_subtitle_asset.cc | 4 | ||||
| -rw-r--r-- | src/subtitle_asset.cc | 116 | ||||
| -rw-r--r-- | src/subtitle_asset.h | 6 | ||||
| -rw-r--r-- | test/shared_subtitle_test.cc | 38 |
5 files changed, 157 insertions, 9 deletions
diff --git a/src/interop_subtitle_asset.cc b/src/interop_subtitle_asset.cc index d5147284..bb0cad70 100644 --- a/src/interop_subtitle_asset.cc +++ b/src/interop_subtitle_asset.cc @@ -125,7 +125,7 @@ InteropSubtitleAsset::xml_as_string () const subtitles_as_xml (root, 250, Standard::INTEROP); - return doc.write_to_string ("UTF-8"); + return format_xml(doc, {}); } diff --git a/src/smpte_subtitle_asset.cc b/src/smpte_subtitle_asset.cc index f09d99f3..0693323c 100644 --- a/src/smpte_subtitle_asset.cc +++ b/src/smpte_subtitle_asset.cc @@ -344,8 +344,6 @@ SMPTESubtitleAsset::xml_as_string () const { xmlpp::Document doc; auto root = doc.create_root_node ("SubtitleReel"); - root->set_namespace_declaration (subtitle_smpte_ns); - root->set_namespace_declaration ("http://www.w3.org/2001/XMLSchema", "xs"); DCP_ASSERT (_xml_id); root->add_child("Id")->add_child_text("urn:uuid:" + *_xml_id); @@ -374,7 +372,7 @@ SMPTESubtitleAsset::xml_as_string () const subtitles_as_xml (root->add_child("SubtitleList"), _time_code_rate, Standard::SMPTE); - return doc.write_to_string ("UTF-8"); + return format_xml(doc, { {"", subtitle_smpte_ns}, {"xs", "http://www.w3.org/2001/XMLSchema"} }); } diff --git a/src/subtitle_asset.cc b/src/subtitle_asset.cc index 22781196..de6332e6 100644 --- a/src/subtitle_asset.cc +++ b/src/subtitle_asset.cc @@ -54,18 +54,20 @@ #include <boost/algorithm/string.hpp> #include <boost/lexical_cast.hpp> #include <boost/shared_array.hpp> +#include <algorithm> -using std::dynamic_pointer_cast; -using std::string; -using std::cout; using std::cerr; +using std::cout; +using std::dynamic_pointer_cast; +using std::make_shared; using std::map; +using std::pair; using std::shared_ptr; +using std::string; using std::vector; -using std::make_shared; -using boost::optional; using boost::lexical_cast; +using boost::optional; using namespace dcp; @@ -799,3 +801,107 @@ SubtitleAsset::fix_empty_font_ids () } } } + + +struct State +{ + int indent; + string xml; + int disable_formatting; +}; + + +static +void +format_xml_node (xmlpp::Node const* node, State& state) +{ + if (auto text_node = dynamic_cast<const xmlpp::TextNode*>(node)) { + state.xml += text_node->get_content(); + } else if (auto element = dynamic_cast<const xmlpp::Element*>(node)) { + ++state.indent; + + auto children = element->get_children(); + auto const should_disable_formatting = + std::any_of( + children.begin(), children.end(), + [](xmlpp::Node const* node) { return static_cast<bool>(dynamic_cast<const xmlpp::ContentNode*>(node)); } + ) || element->get_name() == "Text"; + + if (!state.disable_formatting) { + state.xml += "\n" + string(state.indent * 2, ' '); + } + + state.xml += "<" + element->get_name(); + + for (auto attribute: element->get_attributes()) { + state.xml += String::compose(" %1=\"%2\"", attribute->get_name().raw(), attribute->get_value().raw()); + } + + if (children.empty()) { + state.xml += "/>"; + } else { + state.xml += ">"; + + if (should_disable_formatting) { + ++state.disable_formatting; + } + + for (auto child: children) { + format_xml_node(child, state); + } + + if (!state.disable_formatting) { + state.xml += "\n" + string(state.indent * 2, ' '); + } + + state.xml += String::compose("</%1>", element->get_name().raw()); + + if (should_disable_formatting) { + --state.disable_formatting; + } + } + + --state.indent; + } +} + + +/** Format XML much as write_to_string_formatted() would do, except without adding any white space + * to <Text> nodes. This is an attempt to avoid changing what is actually displayed as subtitles + * while also formatting the XML in such a way as to avoid DoM bug 2205. + * + * namespace is a list of namespaces for the root node; it would be nicer to set these up with + * set_namespace_declaration in the caller and then to extract them here but I couldn't find a way + * to get all namespaces with the libxml++ API. + */ +string +SubtitleAsset::format_xml (xmlpp::Document const& document, vector<pair<string, string>> const& namespaces) +{ + auto root = document.get_root_node(); + + State state = {}; + state.xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<" + root->get_name(); + + for (auto const& ns: namespaces) { + if (ns.first.empty()) { + state.xml += String::compose(" xmlns=\"%1\"", ns.second); + } else { + state.xml += String::compose(" xmlns:%1=\"%2\"", ns.first, ns.second); + } + } + + for (auto attribute: root->get_attributes()) { + state.xml += String::compose(" %1=\"%2\"", attribute->get_name().raw(), attribute->get_value().raw()); + } + + state.xml += ">"; + + for (auto child: document.get_root_node()->get_children()) { + format_xml_node(child, state); + } + + state.xml += String::compose("\n</%1>\n", root->get_name().raw()); + + return state.xml; +} + diff --git a/src/subtitle_asset.h b/src/subtitle_asset.h index f51906e2..88b5378c 100644 --- a/src/subtitle_asset.h +++ b/src/subtitle_asset.h @@ -48,9 +48,13 @@ #include <libcxml/cxml.h> #include <boost/shared_array.hpp> #include <map> +#include <string> +#include <utility> +#include <vector> namespace xmlpp { + class Document; class Element; } @@ -128,6 +132,8 @@ public: return _raw_xml; } + static std::string format_xml (xmlpp::Document const& document, std::vector<std::pair<std::string, std::string>> const& namespaces); + protected: friend struct ::interop_dcp_font_test; friend struct ::smpte_dcp_font_test; diff --git a/test/shared_subtitle_test.cc b/test/shared_subtitle_test.cc index f18b03db..2231d631 100644 --- a/test/shared_subtitle_test.cc +++ b/test/shared_subtitle_test.cc @@ -167,3 +167,41 @@ BOOST_AUTO_TEST_CASE (pull_fonts_test3) BOOST_CHECK_EQUAL (sub1->font._values["size"], "42"); } + +/* Check that subtitle XML is prettily formatted without inserting any white space into + * <Text> node, which I think has the potential to alter appearance. + */ +BOOST_AUTO_TEST_CASE (format_xml_test1) +{ + xmlpp::Document doc; + auto root = doc.create_root_node("Foo"); + root->add_child("Empty"); + root->add_child("Text")->add_child_text("Hello world"); + root->add_child("Font")->add_child("Text")->add_child_text("Say what"); + auto fred = root->add_child("Text")->add_child("Font"); + fred->set_attribute("bob", "job"); + fred->add_child_text("Fred"); + fred->add_child("Text")->add_child_text("Jim"); + fred->add_child_text("Sheila"); + BOOST_REQUIRE_EQUAL (dcp::SubtitleAsset::format_xml(doc, { {"", "fred"}, {"jim", "sheila"} }), +"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +"<Foo xmlns=\"fred\" xmlns:jim=\"sheila\">\n" +" <Empty/>\n" +" <Text>Hello world</Text>\n" +" <Font>\n" +" <Text>Say what</Text>\n" +" </Font>\n" +" <Text><Font bob=\"job\">Fred<Text>Jim</Text>Sheila</Font></Text>\n" +"</Foo>\n"); +} + + +BOOST_AUTO_TEST_CASE (format_xml_test2) +{ + xmlpp::DomParser parser; + auto path = private_test / "DKH_UT_EN20160601def.xml"; + parser.parse_file(path.string().c_str()); + auto document = parser.get_document(); + check_xml (dcp::file_to_string(private_test / "DKH_UT_EN20160601def.reformatted.xml"), dcp::SubtitleAsset::format_xml(*document, {}), {}); +} + |
