/* Copyright (C) 2018-2021 Carl Hetherington This file is part of libdcp. libdcp is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. libdcp is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with libdcp. If not, see . In addition, as a special exception, the copyright holders give permission to link the code of portions of this program with the OpenSSL library under certain conditions as described in each individual source file, and distribute linked combinations including the two. You must obey the GNU General Public License in all respects for all of the code used other than OpenSSL. If you modify file(s) with this exception, you may extend this exception to your version of the file(s), but you are not obligated to do so. If you do not wish to do so, delete this exception statement from your version. If you delete this exception statement from all source files in the program, then also delete it here. */ /** @file src/verify.cc * @brief dcp::verify() method and associated code */ #include "compose.hpp" #include "cpl.h" #include "dcp.h" #include "exceptions.h" #include "interop_subtitle_asset.h" #include "mono_picture_asset.h" #include "mono_picture_frame.h" #include "raw_convert.h" #include "reel.h" #include "reel_closed_caption_asset.h" #include "reel_interop_subtitle_asset.h" #include "reel_markers_asset.h" #include "reel_picture_asset.h" #include "reel_sound_asset.h" #include "reel_smpte_subtitle_asset.h" #include "reel_subtitle_asset.h" #include "smpte_subtitle_asset.h" #include "stereo_picture_asset.h" #include "stereo_picture_frame.h" #include "verify.h" #include "verify_j2k.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using std::list; using std::vector; using std::string; using std::cout; using std::map; using std::max; using std::shared_ptr; using std::make_shared; using boost::optional; using boost::function; using std::dynamic_pointer_cast; using namespace dcp; using namespace xercesc; static string xml_ch_to_string (XMLCh const * a) { char* x = XMLString::transcode(a); string const o(x); XMLString::release(&x); return o; } class XMLValidationError { public: XMLValidationError (SAXParseException const & e) : _message (xml_ch_to_string(e.getMessage())) , _line (e.getLineNumber()) , _column (e.getColumnNumber()) , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "") , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "") { } string message () const { return _message; } uint64_t line () const { return _line; } uint64_t column () const { return _column; } string public_id () const { return _public_id; } string system_id () const { return _system_id; } private: string _message; uint64_t _line; uint64_t _column; string _public_id; string _system_id; }; class DCPErrorHandler : public ErrorHandler { public: void warning(const SAXParseException& e) override { maybe_add (XMLValidationError(e)); } void error(const SAXParseException& e) override { maybe_add (XMLValidationError(e)); } void fatalError(const SAXParseException& e) override { maybe_add (XMLValidationError(e)); } void resetErrors() override { _errors.clear (); } list errors () const { return _errors; } private: void maybe_add (XMLValidationError e) { /* XXX: nasty hack */ if ( e.message().find("schema document") != string::npos && e.message().find("has different target namespace from the one specified in instance document") != string::npos ) { return; } _errors.push_back (e); } list _errors; }; class StringToXMLCh { public: StringToXMLCh (string a) { _buffer = XMLString::transcode(a.c_str()); } StringToXMLCh (StringToXMLCh const&) = delete; StringToXMLCh& operator= (StringToXMLCh const&) = delete; ~StringToXMLCh () { XMLString::release (&_buffer); } XMLCh const * get () const { return _buffer; } private: XMLCh* _buffer; }; class LocalFileResolver : public EntityResolver { public: LocalFileResolver (boost::filesystem::path xsd_dtd_directory) : _xsd_dtd_directory (xsd_dtd_directory) { /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically * found without being here. */ add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd"); add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd"); add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd"); add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd"); add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd"); add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd"); add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd"); add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd"); add("interop-subs", "DCSubtitle.v1.mattsson.xsd"); add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd"); add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd"); add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd"); add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd"); } InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id) override { if (!system_id) { return 0; } auto system_id_str = xml_ch_to_string (system_id); auto p = _xsd_dtd_directory; if (_files.find(system_id_str) == _files.end()) { p /= system_id_str; } else { p /= _files[system_id_str]; } StringToXMLCh ch (p.string()); return new LocalFileInputSource(ch.get()); } private: void add (string uri, string file) { _files[uri] = file; } std::map _files; boost::filesystem::path _xsd_dtd_directory; }; static void parse (XercesDOMParser& parser, boost::filesystem::path xml) { parser.parse(xml.string().c_str()); } static void parse (XercesDOMParser& parser, string xml) { xercesc::MemBufInputSource buf(reinterpret_cast(xml.c_str()), xml.size(), ""); parser.parse(buf); } template void validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector& notes) { try { XMLPlatformUtils::Initialize (); } catch (XMLException& e) { throw MiscError ("Failed to initialise xerces library"); } DCPErrorHandler error_handler; /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */ { XercesDOMParser parser; parser.setValidationScheme(XercesDOMParser::Val_Always); parser.setDoNamespaces(true); parser.setDoSchema(true); vector schema; schema.push_back("xml.xsd"); schema.push_back("xmldsig-core-schema.xsd"); schema.push_back("SMPTE-429-7-2006-CPL.xsd"); schema.push_back("SMPTE-429-8-2006-PKL.xsd"); schema.push_back("SMPTE-429-9-2007-AM.xsd"); schema.push_back("Main-Stereo-Picture-CPL.xsd"); schema.push_back("PROTO-ASDCP-CPL-20040511.xsd"); schema.push_back("PROTO-ASDCP-PKL-20040311.xsd"); schema.push_back("PROTO-ASDCP-AM-20040311.xsd"); schema.push_back("DCSubtitle.v1.mattsson.xsd"); schema.push_back("DCDMSubtitle-2010.xsd"); schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd"); schema.push_back("SMPTE-429-16.xsd"); schema.push_back("Dolby-2012-AD.xsd"); schema.push_back("SMPTE-429-10-2008.xsd"); schema.push_back("xlink.xsd"); schema.push_back("SMPTE-335-2012.xsd"); schema.push_back("SMPTE-395-2014-13-1-aaf.xsd"); schema.push_back("isdcf-mca.xsd"); schema.push_back("SMPTE-429-12-2008.xsd"); /* XXX: I'm not especially clear what this is for, but it seems to be necessary. * Schemas that are not mentioned in this list are not read, and the things * they describe are not checked. */ string locations; for (auto i: schema) { locations += String::compose("%1 %1 ", i, i); } parser.setExternalSchemaLocation(locations.c_str()); parser.setValidationSchemaFullChecking(true); parser.setErrorHandler(&error_handler); LocalFileResolver resolver (xsd_dtd_directory); parser.setEntityResolver(&resolver); try { parser.resetDocumentPool(); parse(parser, xml); } catch (XMLException& e) { throw MiscError(xml_ch_to_string(e.getMessage())); } catch (DOMException& e) { throw MiscError(xml_ch_to_string(e.getMessage())); } catch (...) { throw MiscError("Unknown exception from xerces"); } } XMLPlatformUtils::Terminate (); for (auto i: error_handler.errors()) { notes.push_back ({ VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_XML, i.message(), boost::trim_copy(i.public_id() + " " + i.system_id()), i.line() }); } } enum class VerifyAssetResult { GOOD, CPL_PKL_DIFFER, BAD }; static VerifyAssetResult verify_asset (shared_ptr dcp, shared_ptr reel_file_asset, function progress) { auto const actual_hash = reel_file_asset->asset_ref()->hash(progress); auto pkls = dcp->pkls(); /* We've read this DCP in so it must have at least one PKL */ DCP_ASSERT (!pkls.empty()); auto asset = reel_file_asset->asset_ref().asset(); optional pkl_hash; for (auto i: pkls) { pkl_hash = i->hash (reel_file_asset->asset_ref()->id()); if (pkl_hash) { break; } } DCP_ASSERT (pkl_hash); auto cpl_hash = reel_file_asset->hash(); if (cpl_hash && *cpl_hash != *pkl_hash) { return VerifyAssetResult::CPL_PKL_DIFFER; } if (actual_hash != *pkl_hash) { return VerifyAssetResult::BAD; } return VerifyAssetResult::GOOD; } void verify_language_tag (string tag, vector& notes) { try { LanguageTag test (tag); } catch (LanguageTagError &) { notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag}); } } static void verify_picture_asset (shared_ptr reel_file_asset, boost::filesystem::path file, vector& notes, function progress) { int biggest_frame = 0; auto asset = dynamic_pointer_cast(reel_file_asset->asset_ref().asset()); auto const duration = asset->intrinsic_duration (); auto check_and_add = [¬es](vector const& j2k_notes) { for (auto i: j2k_notes) { if (find(notes.begin(), notes.end(), i) == notes.end()) { notes.push_back (i); } } }; if (auto mono_asset = dynamic_pointer_cast(reel_file_asset->asset_ref().asset())) { auto reader = mono_asset->start_read (); for (int64_t i = 0; i < duration; ++i) { auto frame = reader->get_frame (i); biggest_frame = max(biggest_frame, frame->size()); if (!mono_asset->encrypted() || mono_asset->key()) { vector j2k_notes; verify_j2k (frame, j2k_notes); check_and_add (j2k_notes); } progress (float(i) / duration); } } else if (auto stereo_asset = dynamic_pointer_cast(asset)) { auto reader = stereo_asset->start_read (); for (int64_t i = 0; i < duration; ++i) { auto frame = reader->get_frame (i); biggest_frame = max(biggest_frame, max(frame->left()->size(), frame->right()->size())); if (!stereo_asset->encrypted() || mono_asset->key()) { vector j2k_notes; verify_j2k (frame->left(), j2k_notes); verify_j2k (frame->right(), j2k_notes); check_and_add (j2k_notes); } progress (float(i) / duration); } } static const int max_frame = rint(250 * 1000000 / (8 * asset->edit_rate().as_float())); static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float())); if (biggest_frame > max_frame) { notes.push_back ({ VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file }); } else if (biggest_frame > risky_frame) { notes.push_back ({ VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file }); } } static void verify_main_picture_asset ( shared_ptr dcp, shared_ptr reel_asset, function)> stage, function progress, vector& notes ) { auto asset = reel_asset->asset(); auto const file = *asset->file(); stage ("Checking picture asset hash", file); auto const r = verify_asset (dcp, reel_asset, progress); switch (r) { case VerifyAssetResult::BAD: notes.push_back ({ VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file }); break; case VerifyAssetResult::CPL_PKL_DIFFER: notes.push_back ({ VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file }); break; default: break; } stage ("Checking picture frame sizes", asset->file()); verify_picture_asset (reel_asset, file, notes, progress); /* Only flat/scope allowed by Bv2.1 */ if ( asset->size() != Size(2048, 858) && asset->size() != Size(1998, 1080) && asset->size() != Size(4096, 1716) && asset->size() != Size(3996, 2160)) { notes.push_back({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS, String::compose("%1x%2", asset->size().width, asset->size().height), file }); } /* Only 24, 25, 48fps allowed for 2K */ if ( (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) && (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1)) ) { notes.push_back({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K, String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator), file }); } if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) { /* Only 24fps allowed for 4K */ if (asset->edit_rate() != Fraction(24, 1)) { notes.push_back({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K, String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator), file }); } /* Only 2D allowed for 4K */ if (dynamic_pointer_cast(asset)) { notes.push_back({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D, String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator), file }); } } } static void verify_main_sound_asset ( shared_ptr dcp, shared_ptr reel_asset, function)> stage, function progress, vector& notes ) { auto asset = reel_asset->asset(); stage ("Checking sound asset hash", asset->file()); auto const r = verify_asset (dcp, reel_asset, progress); switch (r) { case VerifyAssetResult::BAD: notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, *asset->file()}); break; case VerifyAssetResult::CPL_PKL_DIFFER: notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, *asset->file()}); break; default: break; } stage ("Checking sound asset metadata", asset->file()); if (auto lang = asset->language()) { verify_language_tag (*lang, notes); } if (asset->sampling_rate() != 48000) { notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, fmt::to_string(asset->sampling_rate()), *asset->file()}); } } static void verify_main_subtitle_reel (shared_ptr reel_asset, vector& notes) { /* XXX: is Language compulsory? */ if (reel_asset->language()) { verify_language_tag (*reel_asset->language(), notes); } if (!reel_asset->entry_point()) { notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() }); } else if (reel_asset->entry_point().get()) { notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() }); } } static void verify_closed_caption_reel (shared_ptr reel_asset, vector& notes) { /* XXX: is Language compulsory? */ if (reel_asset->language()) { verify_language_tag (*reel_asset->language(), notes); } if (!reel_asset->entry_point()) { notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() }); } else if (reel_asset->entry_point().get()) { notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() }); } } struct State { boost::optional subtitle_language; }; /** Verify stuff that is common to both subtitles and closed captions */ void verify_smpte_timed_text_asset ( shared_ptr asset, optional reel_asset_duration, vector& notes ) { if (asset->language()) { verify_language_tag (*asset->language(), notes); } else { notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() }); } auto const size = boost::filesystem::file_size(asset->file().get()); if (size > 115 * 1024 * 1024) { notes.push_back ( { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, fmt::to_string(size), *asset->file() } ); } /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB" * but I'm hoping that checking for the total size of all fonts being <= 10MB will do. */ auto fonts = asset->font_data (); int total_size = 0; for (auto i: fonts) { total_size += i.second.size(); } if (total_size > 10 * 1024 * 1024) { notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, fmt::to_string(total_size), asset->file().get() }); } if (!asset->start_time()) { notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() }); } else if (asset->start_time() != Time()) { notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() }); } if (reel_asset_duration && *reel_asset_duration != asset->intrinsic_duration()) { notes.push_back ( { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_TIMED_TEXT_DURATION, String::compose("%1 %2", *reel_asset_duration, asset->intrinsic_duration()), asset->file().get() }); } } /** Verify SMPTE subtitle-only stuff */ void verify_smpte_subtitle_asset ( shared_ptr asset, vector& notes, State& state ) { if (asset->language()) { if (!state.subtitle_language) { state.subtitle_language = *asset->language(); } else if (state.subtitle_language != *asset->language()) { notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES }); } } DCP_ASSERT (asset->resource_id()); auto xml_id = asset->xml_id(); if (xml_id) { if (asset->resource_id().get() != xml_id) { notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_TIMED_TEXT_RESOURCE_ID }); } if (asset->id() == asset->resource_id().get() || asset->id() == xml_id) { notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_TIMED_TEXT_ASSET_ID }); } } else { notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED}); } } /** Verify all subtitle stuff */ static void verify_subtitle_asset ( shared_ptr asset, optional reel_asset_duration, function)> stage, boost::filesystem::path xsd_dtd_directory, vector& notes, State& state ) { stage ("Checking subtitle XML", asset->file()); /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk * gets passed through libdcp which may clean up and therefore hide errors. */ if (asset->raw_xml()) { validate_xml (asset->raw_xml().get(), xsd_dtd_directory, notes); } else { notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED}); } auto smpte = dynamic_pointer_cast(asset); if (smpte) { verify_smpte_timed_text_asset (smpte, reel_asset_duration, notes); verify_smpte_subtitle_asset (smpte, notes, state); } } /** Verify all closed caption stuff */ static void verify_closed_caption_asset ( shared_ptr asset, optional reel_asset_duration, function)> stage, boost::filesystem::path xsd_dtd_directory, vector& notes ) { stage ("Checking closed caption XML", asset->file()); /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk * gets passed through libdcp which may clean up and therefore hide errors. */ auto raw_xml = asset->raw_xml(); if (raw_xml) { validate_xml (*raw_xml, xsd_dtd_directory, notes); if (raw_xml->size() > 256 * 1024) { notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, fmt::to_string(raw_xml->size()), *asset->file()}); } } else { notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED}); } auto smpte = dynamic_pointer_cast(asset); if (smpte) { verify_smpte_timed_text_asset (smpte, reel_asset_duration, notes); } } /** Check the timing of the individual subtitles and make sure there are no empty nodes */ static void verify_text_details ( vector> reels, int edit_rate, vector& notes, std::function)> check, std::function (shared_ptr)> xml, std::function)> duration ) { /* end of last subtitle (in editable units) */ optional last_out; auto too_short = false; auto too_close = false; auto too_early = false; auto reel_overlap = false; auto empty_text = false; /* current reel start time (in editable units) */ int64_t reel_offset = 0; std::function, optional