Add test for too many subtitle namespaces.
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34
35 /** @file  src/verify.cc
36  *  @brief dcp::verify() method and associated code
37  */
38
39
40 #include "compose.hpp"
41 #include "cpl.h"
42 #include "dcp.h"
43 #include "exceptions.h"
44 #include "interop_subtitle_asset.h"
45 #include "mono_picture_asset.h"
46 #include "mono_picture_frame.h"
47 #include "raw_convert.h"
48 #include "reel.h"
49 #include "reel_closed_caption_asset.h"
50 #include "reel_interop_subtitle_asset.h"
51 #include "reel_markers_asset.h"
52 #include "reel_picture_asset.h"
53 #include "reel_sound_asset.h"
54 #include "reel_smpte_subtitle_asset.h"
55 #include "reel_subtitle_asset.h"
56 #include "smpte_subtitle_asset.h"
57 #include "stereo_picture_asset.h"
58 #include "stereo_picture_frame.h"
59 #include "verify.h"
60 #include "verify_j2k.h"
61 #include <libxml/parserInternals.h>
62 #include <xercesc/dom/DOMAttr.hpp>
63 #include <xercesc/dom/DOMDocument.hpp>
64 #include <xercesc/dom/DOMError.hpp>
65 #include <xercesc/dom/DOMErrorHandler.hpp>
66 #include <xercesc/dom/DOMException.hpp>
67 #include <xercesc/dom/DOMImplementation.hpp>
68 #include <xercesc/dom/DOMImplementationLS.hpp>
69 #include <xercesc/dom/DOMImplementationRegistry.hpp>
70 #include <xercesc/dom/DOMLSParser.hpp>
71 #include <xercesc/dom/DOMLocator.hpp>
72 #include <xercesc/dom/DOMNamedNodeMap.hpp>
73 #include <xercesc/dom/DOMNodeList.hpp>
74 #include <xercesc/framework/LocalFileInputSource.hpp>
75 #include <xercesc/framework/MemBufInputSource.hpp>
76 #include <xercesc/parsers/AbstractDOMParser.hpp>
77 #include <xercesc/parsers/XercesDOMParser.hpp>
78 #include <xercesc/sax/HandlerBase.hpp>
79 #include <xercesc/util/PlatformUtils.hpp>
80 #include <boost/algorithm/string.hpp>
81 #include <iostream>
82 #include <map>
83 #include <regex>
84 #include <set>
85 #include <vector>
86
87
88 using std::cout;
89 using std::dynamic_pointer_cast;
90 using std::list;
91 using std::make_shared;
92 using std::map;
93 using std::max;
94 using std::set;
95 using std::shared_ptr;
96 using std::string;
97 using std::vector;
98 using boost::optional;
99 using boost::function;
100
101
102 using namespace dcp;
103 using namespace xercesc;
104
105
106 static
107 string
108 xml_ch_to_string (XMLCh const * a)
109 {
110         char* x = XMLString::transcode(a);
111         string const o(x);
112         XMLString::release(&x);
113         return o;
114 }
115
116
117 class XMLValidationError
118 {
119 public:
120         XMLValidationError (SAXParseException const & e)
121                 : _message (xml_ch_to_string(e.getMessage()))
122                 , _line (e.getLineNumber())
123                 , _column (e.getColumnNumber())
124                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
125                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
126         {
127
128         }
129
130         string message () const {
131                 return _message;
132         }
133
134         uint64_t line () const {
135                 return _line;
136         }
137
138         uint64_t column () const {
139                 return _column;
140         }
141
142         string public_id () const {
143                 return _public_id;
144         }
145
146         string system_id () const {
147                 return _system_id;
148         }
149
150 private:
151         string _message;
152         uint64_t _line;
153         uint64_t _column;
154         string _public_id;
155         string _system_id;
156 };
157
158
159 class DCPErrorHandler : public ErrorHandler
160 {
161 public:
162         void warning(const SAXParseException& e) override
163         {
164                 maybe_add (XMLValidationError(e));
165         }
166
167         void error(const SAXParseException& e) override
168         {
169                 maybe_add (XMLValidationError(e));
170         }
171
172         void fatalError(const SAXParseException& e) override
173         {
174                 maybe_add (XMLValidationError(e));
175         }
176
177         void resetErrors() override {
178                 _errors.clear ();
179         }
180
181         list<XMLValidationError> errors () const {
182                 return _errors;
183         }
184
185 private:
186         void maybe_add (XMLValidationError e)
187         {
188                 /* XXX: nasty hack */
189                 if (
190                         e.message().find("schema document") != string::npos &&
191                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
192                         ) {
193                         return;
194                 }
195
196                 _errors.push_back (e);
197         }
198
199         list<XMLValidationError> _errors;
200 };
201
202
203 class StringToXMLCh
204 {
205 public:
206         StringToXMLCh (string a)
207         {
208                 _buffer = XMLString::transcode(a.c_str());
209         }
210
211         StringToXMLCh (StringToXMLCh const&) = delete;
212         StringToXMLCh& operator= (StringToXMLCh const&) = delete;
213
214         ~StringToXMLCh ()
215         {
216                 XMLString::release (&_buffer);
217         }
218
219         XMLCh const * get () const {
220                 return _buffer;
221         }
222
223 private:
224         XMLCh* _buffer;
225 };
226
227
228 class LocalFileResolver : public EntityResolver
229 {
230 public:
231         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
232                 : _xsd_dtd_directory (xsd_dtd_directory)
233         {
234                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
235                  * found without being here.
236                  */
237                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
238                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
239                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
240                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
241                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
242                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
243                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
244                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
245                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
246                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "DCDMSubtitle-2010.xsd");
247                 add("http://www.smpte-ra.org/schemas/428-7/2014/DCST.xsd", "DCDMSubtitle-2014.xsd");
248                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
249                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
250                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
251         }
252
253         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id) override
254         {
255                 if (!system_id) {
256                         return 0;
257                 }
258                 auto system_id_str = xml_ch_to_string (system_id);
259                 auto p = _xsd_dtd_directory;
260                 if (_files.find(system_id_str) == _files.end()) {
261                         p /= system_id_str;
262                 } else {
263                         p /= _files[system_id_str];
264                 }
265                 StringToXMLCh ch (p.string());
266                 return new LocalFileInputSource(ch.get());
267         }
268
269 private:
270         void add (string uri, string file)
271         {
272                 _files[uri] = file;
273         }
274
275         std::map<string, string> _files;
276         boost::filesystem::path _xsd_dtd_directory;
277 };
278
279
280 static void
281 parse (XercesDOMParser& parser, boost::filesystem::path xml)
282 {
283         parser.parse(xml.c_str());
284 }
285
286
287 static void
288 parse (XercesDOMParser& parser, string xml)
289 {
290         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
291         parser.parse(buf);
292 }
293
294
295 template <class T>
296 void
297 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
298 {
299         try {
300                 XMLPlatformUtils::Initialize ();
301         } catch (XMLException& e) {
302                 throw MiscError ("Failed to initialise xerces library");
303         }
304
305         DCPErrorHandler error_handler;
306
307         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
308         {
309                 XercesDOMParser parser;
310                 parser.setValidationScheme(XercesDOMParser::Val_Always);
311                 parser.setDoNamespaces(true);
312                 parser.setDoSchema(true);
313
314                 vector<string> schema;
315                 schema.push_back("xml.xsd");
316                 schema.push_back("xmldsig-core-schema.xsd");
317                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
318                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
319                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
320                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
321                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
322                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
323                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
324                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
325                 schema.push_back("DCDMSubtitle-2010.xsd");
326                 schema.push_back("DCDMSubtitle-2014.xsd");
327                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
328                 schema.push_back("SMPTE-429-16.xsd");
329                 schema.push_back("Dolby-2012-AD.xsd");
330                 schema.push_back("SMPTE-429-10-2008.xsd");
331                 schema.push_back("xlink.xsd");
332                 schema.push_back("SMPTE-335-2012.xsd");
333                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
334                 schema.push_back("isdcf-mca.xsd");
335                 schema.push_back("SMPTE-429-12-2008.xsd");
336
337                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
338                  * Schemas that are not mentioned in this list are not read, and the things
339                  * they describe are not checked.
340                  */
341                 string locations;
342                 for (auto i: schema) {
343                         locations += String::compose("%1 %1 ", i, i);
344                 }
345
346                 parser.setExternalSchemaLocation(locations.c_str());
347                 parser.setValidationSchemaFullChecking(true);
348                 parser.setErrorHandler(&error_handler);
349
350                 LocalFileResolver resolver (xsd_dtd_directory);
351                 parser.setEntityResolver(&resolver);
352
353                 try {
354                         parser.resetDocumentPool();
355                         parse(parser, xml);
356                 } catch (XMLException& e) {
357                         throw MiscError(xml_ch_to_string(e.getMessage()));
358                 } catch (DOMException& e) {
359                         throw MiscError(xml_ch_to_string(e.getMessage()));
360                 } catch (...) {
361                         throw MiscError("Unknown exception from xerces");
362                 }
363         }
364
365         XMLPlatformUtils::Terminate ();
366
367         for (auto i: error_handler.errors()) {
368                 notes.push_back ({
369                         VerificationNote::Type::ERROR,
370                         VerificationNote::Code::INVALID_XML,
371                         i.message(),
372                         boost::trim_copy(i.public_id() + " " + i.system_id()),
373                         i.line()
374                 });
375         }
376 }
377
378
379 enum class VerifyAssetResult {
380         GOOD,
381         CPL_PKL_DIFFER,
382         BAD
383 };
384
385
386 static VerifyAssetResult
387 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
388 {
389         auto const actual_hash = reel_file_asset->asset_ref()->hash(progress);
390
391         auto pkls = dcp->pkls();
392         /* We've read this DCP in so it must have at least one PKL */
393         DCP_ASSERT (!pkls.empty());
394
395         auto asset = reel_file_asset->asset_ref().asset();
396
397         optional<string> pkl_hash;
398         for (auto i: pkls) {
399                 pkl_hash = i->hash (reel_file_asset->asset_ref()->id());
400                 if (pkl_hash) {
401                         break;
402                 }
403         }
404
405         DCP_ASSERT (pkl_hash);
406
407         auto cpl_hash = reel_file_asset->hash();
408         if (cpl_hash && *cpl_hash != *pkl_hash) {
409                 return VerifyAssetResult::CPL_PKL_DIFFER;
410         }
411
412         if (actual_hash != *pkl_hash) {
413                 return VerifyAssetResult::BAD;
414         }
415
416         return VerifyAssetResult::GOOD;
417 }
418
419
420 void
421 verify_language_tag (string tag, vector<VerificationNote>& notes)
422 {
423         try {
424                 LanguageTag test (tag);
425         } catch (LanguageTagError &) {
426                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag});
427         }
428 }
429
430
431 static void
432 verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, boost::filesystem::path file, vector<VerificationNote>& notes, function<void (float)> progress)
433 {
434         int biggest_frame = 0;
435         auto asset = dynamic_pointer_cast<PictureAsset>(reel_file_asset->asset_ref().asset());
436         auto const duration = asset->intrinsic_duration ();
437
438         auto check_and_add = [&notes](vector<VerificationNote> const& j2k_notes) {
439                 for (auto i: j2k_notes) {
440                         if (find(notes.begin(), notes.end(), i) == notes.end()) {
441                                 notes.push_back (i);
442                         }
443                 }
444         };
445
446         if (auto mono_asset = dynamic_pointer_cast<MonoPictureAsset>(reel_file_asset->asset_ref().asset())) {
447                 auto reader = mono_asset->start_read ();
448                 for (int64_t i = 0; i < duration; ++i) {
449                         auto frame = reader->get_frame (i);
450                         biggest_frame = max(biggest_frame, frame->size());
451                         if (!mono_asset->encrypted() || mono_asset->key()) {
452                                 vector<VerificationNote> j2k_notes;
453                                 verify_j2k(frame, i, mono_asset->frame_rate().numerator, j2k_notes);
454                                 check_and_add (j2k_notes);
455                         }
456                         progress (float(i) / duration);
457                 }
458         } else if (auto stereo_asset = dynamic_pointer_cast<StereoPictureAsset>(asset)) {
459                 auto reader = stereo_asset->start_read ();
460                 for (int64_t i = 0; i < duration; ++i) {
461                         auto frame = reader->get_frame (i);
462                         biggest_frame = max(biggest_frame, max(frame->left()->size(), frame->right()->size()));
463                         if (!stereo_asset->encrypted() || stereo_asset->key()) {
464                                 vector<VerificationNote> j2k_notes;
465                                 verify_j2k(frame->left(), i, stereo_asset->frame_rate().numerator, j2k_notes);
466                                 verify_j2k(frame->right(), i, stereo_asset->frame_rate().numerator, j2k_notes);
467                                 check_and_add (j2k_notes);
468                         }
469                         progress (float(i) / duration);
470                 }
471
472         }
473
474         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
475         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
476         if (biggest_frame > max_frame) {
477                 notes.push_back ({
478                         VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
479                 });
480         } else if (biggest_frame > risky_frame) {
481                 notes.push_back ({
482                         VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
483                 });
484         }
485 }
486
487
488 static void
489 verify_main_picture_asset (
490         shared_ptr<const DCP> dcp,
491         shared_ptr<const ReelPictureAsset> reel_asset,
492         function<void (string, optional<boost::filesystem::path>)> stage,
493         function<void (float)> progress,
494         VerificationOptions options,
495         vector<VerificationNote>& notes
496         )
497 {
498         auto asset = reel_asset->asset();
499         auto const file = *asset->file();
500
501         if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || boost::filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
502                 stage ("Checking picture asset hash", file);
503                 auto const r = verify_asset (dcp, reel_asset, progress);
504                 switch (r) {
505                         case VerifyAssetResult::BAD:
506                                 notes.push_back ({
507                                         VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file
508                                 });
509                                 break;
510                         case VerifyAssetResult::CPL_PKL_DIFFER:
511                                 notes.push_back ({
512                                         VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file
513                                 });
514                                 break;
515                         default:
516                                 break;
517                 }
518         }
519
520         stage ("Checking picture frame sizes", asset->file());
521         verify_picture_asset (reel_asset, file, notes, progress);
522
523         /* Only flat/scope allowed by Bv2.1 */
524         if (
525                 asset->size() != Size(2048, 858) &&
526                 asset->size() != Size(1998, 1080) &&
527                 asset->size() != Size(4096, 1716) &&
528                 asset->size() != Size(3996, 2160)) {
529                 notes.push_back({
530                         VerificationNote::Type::BV21_ERROR,
531                         VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS,
532                         String::compose("%1x%2", asset->size().width, asset->size().height),
533                         file
534                 });
535         }
536
537         /* Only 24, 25, 48fps allowed for 2K */
538         if (
539                 (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) &&
540                 (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1))
541            ) {
542                 notes.push_back({
543                         VerificationNote::Type::BV21_ERROR,
544                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K,
545                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
546                         file
547                 });
548         }
549
550         if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) {
551                 /* Only 24fps allowed for 4K */
552                 if (asset->edit_rate() != Fraction(24, 1)) {
553                         notes.push_back({
554                                 VerificationNote::Type::BV21_ERROR,
555                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K,
556                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
557                                 file
558                         });
559                 }
560
561                 /* Only 2D allowed for 4K */
562                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
563                         notes.push_back({
564                                 VerificationNote::Type::BV21_ERROR,
565                                 VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D,
566                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
567                                 file
568                         });
569
570                 }
571         }
572
573 }
574
575
576 struct State
577 {
578         boost::optional<string> subtitle_language;
579         boost::optional<int> audio_channels;
580 };
581
582
583 static void
584 verify_main_sound_asset (
585         shared_ptr<const DCP> dcp,
586         shared_ptr<const ReelSoundAsset> reel_asset,
587         function<void (string, optional<boost::filesystem::path>)> stage,
588         function<void (float)> progress,
589         VerificationOptions options,
590         vector<VerificationNote>& notes,
591         State& state
592         )
593 {
594         auto asset = reel_asset->asset();
595         auto const file = *asset->file();
596
597         if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || boost::filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
598                 stage("Checking sound asset hash", file);
599                 auto const r = verify_asset (dcp, reel_asset, progress);
600                 switch (r) {
601                         case VerifyAssetResult::BAD:
602                                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, file});
603                                 break;
604                         case VerifyAssetResult::CPL_PKL_DIFFER:
605                                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, file});
606                                 break;
607                         default:
608                                 break;
609                 }
610         }
611
612         if (!state.audio_channels) {
613                 state.audio_channels = asset->channels();
614         } else if (*state.audio_channels != asset->channels()) {
615                 notes.push_back({ VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_CHANNEL_COUNTS, file });
616         }
617
618         stage ("Checking sound asset metadata", file);
619
620         if (auto lang = asset->language()) {
621                 verify_language_tag (*lang, notes);
622         }
623         if (asset->sampling_rate() != 48000) {
624                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, raw_convert<string>(asset->sampling_rate()), file});
625         }
626 }
627
628
629 static void
630 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
631 {
632         /* XXX: is Language compulsory? */
633         if (reel_asset->language()) {
634                 verify_language_tag (*reel_asset->language(), notes);
635         }
636
637         if (!reel_asset->entry_point()) {
638                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() });
639         } else if (reel_asset->entry_point().get()) {
640                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() });
641         }
642 }
643
644
645 static void
646 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
647 {
648         /* XXX: is Language compulsory? */
649         if (reel_asset->language()) {
650                 verify_language_tag (*reel_asset->language(), notes);
651         }
652
653         if (!reel_asset->entry_point()) {
654                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
655         } else if (reel_asset->entry_point().get()) {
656                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
657         }
658 }
659
660
661 /** Verify stuff that is common to both subtitles and closed captions */
662 void
663 verify_smpte_timed_text_asset (
664         shared_ptr<const SMPTESubtitleAsset> asset,
665         optional<int64_t> reel_asset_duration,
666         vector<VerificationNote>& notes
667         )
668 {
669         if (asset->language()) {
670                 verify_language_tag (*asset->language(), notes);
671         } else {
672                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
673         }
674
675         auto const size = boost::filesystem::file_size(asset->file().get());
676         if (size > 115 * 1024 * 1024) {
677                 notes.push_back (
678                         { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
679                         );
680         }
681
682         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
683          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
684          */
685         auto fonts = asset->font_data ();
686         int total_size = 0;
687         for (auto i: fonts) {
688                 total_size += i.second.size();
689         }
690         if (total_size > 10 * 1024 * 1024) {
691                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert<string>(total_size), asset->file().get() });
692         }
693
694         if (!asset->start_time()) {
695                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() });
696         } else if (asset->start_time() != Time()) {
697                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() });
698         }
699
700         if (reel_asset_duration && *reel_asset_duration != asset->intrinsic_duration()) {
701                 notes.push_back (
702                         {
703                                 VerificationNote::Type::BV21_ERROR,
704                                 VerificationNote::Code::MISMATCHED_TIMED_TEXT_DURATION,
705                                 String::compose("%1 %2", *reel_asset_duration, asset->intrinsic_duration()),
706                                 asset->file().get()
707                         });
708         }
709 }
710
711
712 /** Verify Interop subtitle-only stuff */
713 void
714 verify_interop_subtitle_asset(shared_ptr<const InteropSubtitleAsset> asset, vector<VerificationNote>& notes)
715 {
716         if (asset->subtitles().empty()) {
717                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_SUBTITLE, asset->id(), asset->file().get() });
718         }
719         auto const unresolved = asset->unresolved_fonts();
720         if (!unresolved.empty()) {
721                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_FONT, unresolved.front() });
722         }
723 }
724
725
726 /** Verify SMPTE subtitle-only stuff */
727 void
728 verify_smpte_subtitle_asset (
729         shared_ptr<const SMPTESubtitleAsset> asset,
730         vector<VerificationNote>& notes,
731         State& state
732         )
733 {
734         if (asset->language()) {
735                 if (!state.subtitle_language) {
736                         state.subtitle_language = *asset->language();
737                 } else if (state.subtitle_language != *asset->language()) {
738                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES });
739                 }
740         }
741
742         DCP_ASSERT (asset->resource_id());
743         auto xml_id = asset->xml_id();
744         if (xml_id) {
745                 if (asset->resource_id().get() != xml_id) {
746                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_TIMED_TEXT_RESOURCE_ID });
747                 }
748
749                 if (asset->id() == asset->resource_id().get() || asset->id() == xml_id) {
750                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_TIMED_TEXT_ASSET_ID });
751                 }
752         } else {
753                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
754         }
755
756         if (asset->raw_xml()) {
757                 /* Deluxe require this in their QC even if it seems never to be mentioned in any standard */
758                 cxml::Document doc("SubtitleReel");
759                 doc.read_string(*asset->raw_xml());
760                 auto issue_date = doc.string_child("IssueDate");
761                 std::regex reg("^\\d\\d\\d\\d-\\d\\d-\\d\\dT\\d\\d:\\d\\d:\\d\\d$");
762                 if (!std::regex_match(issue_date, reg)) {
763                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_ISSUE_DATE, issue_date});
764                 }
765         }
766 }
767
768
769 /** Verify all subtitle stuff */
770 static void
771 verify_subtitle_asset (
772         shared_ptr<const SubtitleAsset> asset,
773         optional<int64_t> reel_asset_duration,
774         function<void (string, optional<boost::filesystem::path>)> stage,
775         boost::filesystem::path xsd_dtd_directory,
776         vector<VerificationNote>& notes,
777         State& state
778         )
779 {
780         stage ("Checking subtitle XML", asset->file());
781         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
782          * gets passed through libdcp which may clean up and therefore hide errors.
783          */
784         if (asset->raw_xml()) {
785                 validate_xml (asset->raw_xml().get(), xsd_dtd_directory, notes);
786         } else {
787                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
788         }
789
790         auto namespace_count = [](shared_ptr<const SubtitleAsset> asset, string root_node) {
791                 cxml::Document doc(root_node);
792                 doc.read_string(asset->raw_xml().get());
793                 auto root = dynamic_cast<xmlpp::Element*>(doc.node())->cobj();
794                 int count = 0;
795                 for (auto ns = root->nsDef; ns != nullptr; ns = ns->next) {
796                         ++count;
797                 }
798                 return count;
799         };
800
801         auto interop = dynamic_pointer_cast<const InteropSubtitleAsset>(asset);
802         if (interop) {
803                 verify_interop_subtitle_asset(interop, notes);
804                 if (namespace_count(asset, "DCSubtitle") > 1) {
805                         notes.push_back({ VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_SUBTITLE_NAMESPACE_COUNT, asset->id() });
806                 }
807         }
808
809         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
810         if (smpte) {
811                 verify_smpte_timed_text_asset (smpte, reel_asset_duration, notes);
812                 verify_smpte_subtitle_asset (smpte, notes, state);
813                 if (namespace_count(asset, "SubtitleReel") > 1) {
814                         notes.push_back({ VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_SUBTITLE_NAMESPACE_COUNT, asset->id()});
815                 }
816         }
817 }
818
819
820 /** Verify all closed caption stuff */
821 static void
822 verify_closed_caption_asset (
823         shared_ptr<const SubtitleAsset> asset,
824         optional<int64_t> reel_asset_duration,
825         function<void (string, optional<boost::filesystem::path>)> stage,
826         boost::filesystem::path xsd_dtd_directory,
827         vector<VerificationNote>& notes
828         )
829 {
830         stage ("Checking closed caption XML", asset->file());
831         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
832          * gets passed through libdcp which may clean up and therefore hide errors.
833          */
834         auto raw_xml = asset->raw_xml();
835         if (raw_xml) {
836                 validate_xml (*raw_xml, xsd_dtd_directory, notes);
837                 if (raw_xml->size() > 256 * 1024) {
838                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert<string>(raw_xml->size()), *asset->file()});
839                 }
840         } else {
841                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
842         }
843
844         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
845         if (smpte) {
846                 verify_smpte_timed_text_asset (smpte, reel_asset_duration, notes);
847         }
848 }
849
850
851 /** Check the timing of the individual subtitles and make sure there are no empty <Text> nodes */
852 static
853 void
854 verify_text_details (
855         vector<shared_ptr<Reel>> reels,
856         int edit_rate,
857         vector<VerificationNote>& notes,
858         std::function<bool (shared_ptr<Reel>)> check,
859         std::function<optional<string> (shared_ptr<Reel>)> xml,
860         std::function<int64_t (shared_ptr<Reel>)> duration
861         )
862 {
863         /* end of last subtitle (in editable units) */
864         optional<int64_t> last_out;
865         auto too_short = false;
866         auto too_close = false;
867         auto too_early = false;
868         auto reel_overlap = false;
869         auto empty_text = false;
870         /* current reel start time (in editable units) */
871         int64_t reel_offset = 0;
872
873         std::function<void (cxml::ConstNodePtr, optional<int>, optional<Time>, int, bool)> parse;
874         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &empty_text, &reel_offset](cxml::ConstNodePtr node, optional<int> tcr, optional<Time> start_time, int er, bool first_reel) {
875                 if (node->name() == "Subtitle") {
876                         Time in (node->string_attribute("TimeIn"), tcr);
877                         if (start_time) {
878                                 in -= *start_time;
879                         }
880                         Time out (node->string_attribute("TimeOut"), tcr);
881                         if (start_time) {
882                                 out -= *start_time;
883                         }
884                         if (first_reel && tcr && in < Time(0, 0, 4, 0, *tcr)) {
885                                 too_early = true;
886                         }
887                         auto length = out - in;
888                         if (length.as_editable_units_ceil(er) < 15) {
889                                 too_short = true;
890                         }
891                         if (last_out) {
892                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
893                                 auto distance = reel_offset + in.as_editable_units_ceil(er) - *last_out;
894                                 if (distance >= 0 && distance < 2) {
895                                         too_close = true;
896                                 }
897                         }
898                         last_out = reel_offset + out.as_editable_units_floor(er);
899                 } else if (node->name() == "Text") {
900                         std::function<bool (cxml::ConstNodePtr)> node_has_content = [&](cxml::ConstNodePtr node) {
901                                 if (!node->content().empty()) {
902                                         return true;
903                                 }
904                                 for (auto i: node->node_children()) {
905                                         if (node_has_content(i)) {
906                                                 return true;
907                                         }
908                                 }
909                                 return false;
910                         };
911                         if (!node_has_content(node)) {
912                                 empty_text = true;
913                         }
914                 }
915
916                 for (auto i: node->node_children()) {
917                         parse(i, tcr, start_time, er, first_reel);
918                 }
919         };
920
921         for (auto i = 0U; i < reels.size(); ++i) {
922                 if (!check(reels[i])) {
923                         continue;
924                 }
925
926                 auto reel_xml = xml(reels[i]);
927                 if (!reel_xml) {
928                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
929                         continue;
930                 }
931
932                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
933                  * read in by libdcp's parser.
934                  */
935
936                 shared_ptr<cxml::Document> doc;
937                 optional<int> tcr;
938                 optional<Time> start_time;
939                 try {
940                         doc = make_shared<cxml::Document>("SubtitleReel");
941                         doc->read_string (*reel_xml);
942                         tcr = doc->number_child<int>("TimeCodeRate");
943                         auto start_time_string = doc->optional_string_child("StartTime");
944                         if (start_time_string) {
945                                 start_time = Time(*start_time_string, tcr);
946                         }
947                 } catch (...) {
948                         doc = make_shared<cxml::Document>("DCSubtitle");
949                         doc->read_string (*reel_xml);
950                 }
951                 parse (doc, tcr, start_time, edit_rate, i == 0);
952                 auto end = reel_offset + duration(reels[i]);
953                 if (last_out && *last_out > end) {
954                         reel_overlap = true;
955                 }
956                 reel_offset = end;
957         }
958
959         if (last_out && *last_out > reel_offset) {
960                 reel_overlap = true;
961         }
962
963         if (too_early) {
964                 notes.push_back({
965                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME
966                 });
967         }
968
969         if (too_short) {
970                 notes.push_back ({
971                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_DURATION
972                 });
973         }
974
975         if (too_close) {
976                 notes.push_back ({
977                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_SPACING
978                 });
979         }
980
981         if (reel_overlap) {
982                 notes.push_back ({
983                         VerificationNote::Type::ERROR, VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY
984                 });
985         }
986
987         if (empty_text) {
988                 notes.push_back ({
989                         VerificationNote::Type::WARNING, VerificationNote::Code::EMPTY_TEXT
990                 });
991         }
992 }
993
994
995 static
996 void
997 verify_closed_caption_details (
998         vector<shared_ptr<Reel>> reels,
999         vector<VerificationNote>& notes
1000         )
1001 {
1002         std::function<void (cxml::ConstNodePtr node, std::vector<cxml::ConstNodePtr>& text_or_image)> find_text_or_image;
1003         find_text_or_image = [&find_text_or_image](cxml::ConstNodePtr node, std::vector<cxml::ConstNodePtr>& text_or_image) {
1004                 for (auto i: node->node_children()) {
1005                         if (i->name() == "Text") {
1006                                 text_or_image.push_back (i);
1007                         } else {
1008                                 find_text_or_image (i, text_or_image);
1009                         }
1010                 }
1011         };
1012
1013         auto mismatched_valign = false;
1014         auto incorrect_order = false;
1015
1016         std::function<void (cxml::ConstNodePtr)> parse;
1017         parse = [&parse, &find_text_or_image, &mismatched_valign, &incorrect_order](cxml::ConstNodePtr node) {
1018                 if (node->name() == "Subtitle") {
1019                         vector<cxml::ConstNodePtr> text_or_image;
1020                         find_text_or_image (node, text_or_image);
1021                         optional<string> last_valign;
1022                         optional<float> last_vpos;
1023                         for (auto i: text_or_image) {
1024                                 auto valign = i->optional_string_attribute("VAlign");
1025                                 if (!valign) {
1026                                         valign = i->optional_string_attribute("Valign").get_value_or("center");
1027                                 }
1028                                 auto vpos = i->optional_number_attribute<float>("VPosition");
1029                                 if (!vpos) {
1030                                         vpos = i->optional_number_attribute<float>("Vposition").get_value_or(50);
1031                                 }
1032
1033                                 if (last_valign) {
1034                                         if (*last_valign != valign) {
1035                                                 mismatched_valign = true;
1036                                         }
1037                                 }
1038                                 last_valign = valign;
1039
1040                                 if (!mismatched_valign) {
1041                                         if (last_vpos) {
1042                                                 if (*last_valign == "top" || *last_valign == "center") {
1043                                                         if (*vpos < *last_vpos) {
1044                                                                 incorrect_order = true;
1045                                                         }
1046                                                 } else {
1047                                                         if (*vpos > *last_vpos) {
1048                                                                 incorrect_order = true;
1049                                                         }
1050                                                 }
1051                                         }
1052                                         last_vpos = vpos;
1053                                 }
1054                         }
1055                 }
1056
1057                 for (auto i: node->node_children()) {
1058                         parse(i);
1059                 }
1060         };
1061
1062         for (auto reel: reels) {
1063                 for (auto ccap: reel->closed_captions()) {
1064                         auto reel_xml = ccap->asset()->raw_xml();
1065                         if (!reel_xml) {
1066                                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
1067                                 continue;
1068                         }
1069
1070                         /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
1071                          * read in by libdcp's parser.
1072                          */
1073
1074                         shared_ptr<cxml::Document> doc;
1075                         optional<int> tcr;
1076                         optional<Time> start_time;
1077                         try {
1078                                 doc = make_shared<cxml::Document>("SubtitleReel");
1079                                 doc->read_string (*reel_xml);
1080                         } catch (...) {
1081                                 doc = make_shared<cxml::Document>("DCSubtitle");
1082                                 doc->read_string (*reel_xml);
1083                         }
1084                         parse (doc);
1085                 }
1086         }
1087
1088         if (mismatched_valign) {
1089                 notes.push_back ({
1090                         VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_VALIGN,
1091                 });
1092         }
1093
1094         if (incorrect_order) {
1095                 notes.push_back ({
1096                         VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ORDERING,
1097                 });
1098         }
1099 }
1100
1101
1102 struct LinesCharactersResult
1103 {
1104         bool warning_length_exceeded = false;
1105         bool error_length_exceeded = false;
1106         bool line_count_exceeded = false;
1107 };
1108
1109
1110 static
1111 void
1112 verify_text_lines_and_characters (
1113         shared_ptr<SubtitleAsset> asset,
1114         int warning_length,
1115         int error_length,
1116         LinesCharactersResult* result
1117         )
1118 {
1119         class Event
1120         {
1121         public:
1122                 Event (Time time_, float position_, int characters_)
1123                         : time (time_)
1124                         , position (position_)
1125                         , characters (characters_)
1126                 {}
1127
1128                 Event (Time time_, shared_ptr<Event> start_)
1129                         : time (time_)
1130                         , start (start_)
1131                 {}
1132
1133                 Time time;
1134                 int position; //< position from 0 at top of screen to 100 at bottom
1135                 int characters;
1136                 shared_ptr<Event> start;
1137         };
1138
1139         vector<shared_ptr<Event>> events;
1140
1141         auto position = [](shared_ptr<const SubtitleString> sub) {
1142                 switch (sub->v_align()) {
1143                 case VAlign::TOP:
1144                         return lrintf(sub->v_position() * 100);
1145                 case VAlign::CENTER:
1146                         return lrintf((0.5f + sub->v_position()) * 100);
1147                 case VAlign::BOTTOM:
1148                         return lrintf((1.0f - sub->v_position()) * 100);
1149                 }
1150
1151                 return 0L;
1152         };
1153
1154         for (auto j: asset->subtitles()) {
1155                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
1156                 if (text) {
1157                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
1158                         events.push_back(in);
1159                         events.push_back(make_shared<Event>(text->out(), in));
1160                 }
1161         }
1162
1163         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
1164                 return a->time < b->time;
1165         });
1166
1167         map<int, int> current;
1168         for (auto i: events) {
1169                 if (current.size() > 3) {
1170                         result->line_count_exceeded = true;
1171                 }
1172                 for (auto j: current) {
1173                         if (j.second > warning_length) {
1174                                 result->warning_length_exceeded = true;
1175                         }
1176                         if (j.second > error_length) {
1177                                 result->error_length_exceeded = true;
1178                         }
1179                 }
1180
1181                 if (i->start) {
1182                         /* end of a subtitle */
1183                         DCP_ASSERT (current.find(i->start->position) != current.end());
1184                         if (current[i->start->position] == i->start->characters) {
1185                                 current.erase(i->start->position);
1186                         } else {
1187                                 current[i->start->position] -= i->start->characters;
1188                         }
1189                 } else {
1190                         /* start of a subtitle */
1191                         if (current.find(i->position) == current.end()) {
1192                                 current[i->position] = i->characters;
1193                         } else {
1194                                 current[i->position] += i->characters;
1195                         }
1196                 }
1197         }
1198 }
1199
1200
1201 static
1202 void
1203 verify_text_details (vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
1204 {
1205         if (reels.empty()) {
1206                 return;
1207         }
1208
1209         if (reels[0]->main_subtitle()) {
1210                 verify_text_details (reels, reels[0]->main_subtitle()->edit_rate().numerator, notes,
1211                         [](shared_ptr<Reel> reel) {
1212                                 return static_cast<bool>(reel->main_subtitle());
1213                         },
1214                         [](shared_ptr<Reel> reel) {
1215                                 auto interop = dynamic_pointer_cast<ReelInteropSubtitleAsset>(reel->main_subtitle());
1216                                 if (interop) {
1217                                         return interop->asset()->raw_xml();
1218                                 }
1219                                 auto smpte = dynamic_pointer_cast<ReelSMPTESubtitleAsset>(reel->main_subtitle());
1220                                 DCP_ASSERT (smpte);
1221                                 return smpte->asset()->raw_xml();
1222                         },
1223                         [](shared_ptr<Reel> reel) {
1224                                 return reel->main_subtitle()->actual_duration();
1225                         }
1226                 );
1227         }
1228
1229         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
1230                 verify_text_details (reels, reels[0]->closed_captions()[i]->edit_rate().numerator, notes,
1231                         [i](shared_ptr<Reel> reel) {
1232                                 return i < reel->closed_captions().size();
1233                         },
1234                         [i](shared_ptr<Reel> reel) {
1235                                 return reel->closed_captions()[i]->asset()->raw_xml();
1236                         },
1237                         [i](shared_ptr<Reel> reel) {
1238                                 return reel->closed_captions()[i]->actual_duration();
1239                         }
1240                 );
1241         }
1242
1243         verify_closed_caption_details (reels, notes);
1244 }
1245
1246
1247 void
1248 verify_extension_metadata(shared_ptr<const CPL> cpl, vector<VerificationNote>& notes)
1249 {
1250         DCP_ASSERT (cpl->file());
1251         cxml::Document doc ("CompositionPlaylist");
1252         doc.read_file (cpl->file().get());
1253
1254         auto missing = false;
1255         string malformed;
1256
1257         if (auto reel_list = doc.node_child("ReelList")) {
1258                 auto reels = reel_list->node_children("Reel");
1259                 if (!reels.empty()) {
1260                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
1261                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
1262                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
1263                                                 missing = true;
1264                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
1265                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
1266                                                                 continue;
1267                                                         }
1268                                                         missing = false;
1269                                                         if (auto name = extension->optional_node_child("Name")) {
1270                                                                 if (name->content() != "Application") {
1271                                                                         malformed = "<Name> should be 'Application'";
1272                                                                 }
1273                                                         }
1274                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
1275                                                                 if (auto property = property_list->optional_node_child("Property")) {
1276                                                                         if (auto name = property->optional_node_child("Name")) {
1277                                                                                 if (name->content() != "DCP Constraints Profile") {
1278                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
1279                                                                                 }
1280                                                                         }
1281                                                                         if (auto value = property->optional_node_child("Value")) {
1282                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
1283                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
1284                                                                                 }
1285                                                                         }
1286                                                                 }
1287                                                         }
1288                                                 }
1289                                         } else {
1290                                                 missing = true;
1291                                         }
1292                                 }
1293                         }
1294                 }
1295         }
1296
1297         if (missing) {
1298                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_EXTENSION_METADATA, cpl->id(), cpl->file().get()});
1299         } else if (!malformed.empty()) {
1300                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_EXTENSION_METADATA, malformed, cpl->file().get()});
1301         }
1302 }
1303
1304
1305 bool
1306 pkl_has_encrypted_assets(shared_ptr<const DCP> dcp, shared_ptr<const PKL> pkl)
1307 {
1308         vector<string> encrypted;
1309         for (auto i: dcp->cpls()) {
1310                 for (auto j: i->reel_file_assets()) {
1311                         if (j->asset_ref().resolved()) {
1312                                 auto mxf = dynamic_pointer_cast<MXF>(j->asset_ref().asset());
1313                                 if (mxf && mxf->encrypted()) {
1314                                         encrypted.push_back(j->asset_ref().id());
1315                                 }
1316                         }
1317                 }
1318         }
1319
1320         for (auto i: pkl->assets()) {
1321                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1322                         return true;
1323                 }
1324         }
1325
1326         return false;
1327 }
1328
1329
1330 static
1331 void
1332 verify_reel(
1333         shared_ptr<const DCP> dcp,
1334         shared_ptr<const CPL> cpl,
1335         shared_ptr<const Reel> reel,
1336         optional<dcp::Size> main_picture_active_area,
1337         function<void (string, optional<boost::filesystem::path>)> stage,
1338         boost::filesystem::path xsd_dtd_directory,
1339         function<void (float)> progress,
1340         VerificationOptions options,
1341         vector<VerificationNote>& notes,
1342         State& state,
1343         bool* have_main_subtitle,
1344         bool* have_no_main_subtitle,
1345         size_t* most_closed_captions,
1346         size_t* fewest_closed_captions,
1347         map<Marker, Time>* markers_seen
1348         )
1349 {
1350         for (auto i: reel->assets()) {
1351                 if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1352                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_DURATION, i->id()});
1353                 }
1354                 if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1355                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_INTRINSIC_DURATION, i->id()});
1356                 }
1357                 auto file_asset = dynamic_pointer_cast<ReelFileAsset>(i);
1358                 if (i->encryptable() && !file_asset->hash()) {
1359                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_HASH, i->id()});
1360                 }
1361         }
1362
1363         if (dcp->standard() == Standard::SMPTE) {
1364                 boost::optional<int64_t> duration;
1365                 for (auto i: reel->assets()) {
1366                         if (!duration) {
1367                                 duration = i->actual_duration();
1368                         } else if (*duration != i->actual_duration()) {
1369                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_ASSET_DURATION});
1370                                 break;
1371                         }
1372                 }
1373         }
1374
1375         if (reel->main_picture()) {
1376                 /* Check reel stuff */
1377                 auto const frame_rate = reel->main_picture()->frame_rate();
1378                 if (frame_rate.denominator != 1 ||
1379                     (frame_rate.numerator != 24 &&
1380                      frame_rate.numerator != 25 &&
1381                      frame_rate.numerator != 30 &&
1382                      frame_rate.numerator != 48 &&
1383                      frame_rate.numerator != 50 &&
1384                      frame_rate.numerator != 60 &&
1385                      frame_rate.numerator != 96)) {
1386                         notes.push_back({
1387                                 VerificationNote::Type::ERROR,
1388                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE,
1389                                 String::compose("%1/%2", frame_rate.numerator, frame_rate.denominator)
1390                         });
1391                 }
1392                 /* Check asset */
1393                 if (reel->main_picture()->asset_ref().resolved()) {
1394                         verify_main_picture_asset(dcp, reel->main_picture(), stage, progress, options, notes);
1395                         auto const asset_size = reel->main_picture()->asset()->size();
1396                         if (main_picture_active_area) {
1397                                 if (main_picture_active_area->width > asset_size.width) {
1398                                         notes.push_back({
1399                                                         VerificationNote::Type::ERROR,
1400                                                         VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1401                                                         String::compose("width %1 is bigger than the asset width %2", main_picture_active_area->width, asset_size.width),
1402                                                         cpl->file().get()
1403                                                         });
1404                                 }
1405                                 if (main_picture_active_area->height > asset_size.height) {
1406                                         notes.push_back({
1407                                                         VerificationNote::Type::ERROR,
1408                                                         VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1409                                                         String::compose("height %1 is bigger than the asset height %2", main_picture_active_area->height, asset_size.height),
1410                                                         cpl->file().get()
1411                                                         });
1412                                 }
1413                         }
1414                 }
1415         }
1416
1417         if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1418                 verify_main_sound_asset(dcp, reel->main_sound(), stage, progress, options, notes, state);
1419         }
1420
1421         if (reel->main_subtitle()) {
1422                 verify_main_subtitle_reel(reel->main_subtitle(), notes);
1423                 if (reel->main_subtitle()->asset_ref().resolved()) {
1424                         verify_subtitle_asset(reel->main_subtitle()->asset(), reel->main_subtitle()->duration(), stage, xsd_dtd_directory, notes, state);
1425                 }
1426                 *have_main_subtitle = true;
1427         } else {
1428                 *have_no_main_subtitle = true;
1429         }
1430
1431         for (auto i: reel->closed_captions()) {
1432                 verify_closed_caption_reel(i, notes);
1433                 if (i->asset_ref().resolved()) {
1434                         verify_closed_caption_asset(i->asset(), i->duration(), stage, xsd_dtd_directory, notes);
1435                 }
1436         }
1437
1438         if (reel->main_markers()) {
1439                 for (auto const& i: reel->main_markers()->get()) {
1440                         markers_seen->insert(i);
1441                 }
1442                 if (reel->main_markers()->entry_point()) {
1443                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::UNEXPECTED_ENTRY_POINT});
1444                 }
1445                 if (reel->main_markers()->duration()) {
1446                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::UNEXPECTED_DURATION});
1447                 }
1448         }
1449
1450         *fewest_closed_captions = std::min(*fewest_closed_captions, reel->closed_captions().size());
1451         *most_closed_captions = std::max(*most_closed_captions, reel->closed_captions().size());
1452
1453 }
1454
1455
1456 static
1457 void
1458 verify_cpl(
1459         shared_ptr<const DCP> dcp,
1460         shared_ptr<const CPL> cpl,
1461         function<void (string, optional<boost::filesystem::path>)> stage,
1462         boost::filesystem::path xsd_dtd_directory,
1463         function<void (float)> progress,
1464         VerificationOptions options,
1465         vector<VerificationNote>& notes,
1466         State& state
1467         )
1468 {
1469         stage("Checking CPL", cpl->file());
1470         validate_xml(cpl->file().get(), xsd_dtd_directory, notes);
1471
1472         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1473                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::PARTIALLY_ENCRYPTED});
1474         }
1475
1476         for (auto const& i: cpl->additional_subtitle_languages()) {
1477                 verify_language_tag(i, notes);
1478         }
1479
1480         if (!cpl->content_kind().scope() || *cpl->content_kind().scope() == "http://www.smpte-ra.org/schemas/429-7/2006/CPL#standard-content") {
1481                 /* This is a content kind from http://www.smpte-ra.org/schemas/429-7/2006/CPL#standard-content; make sure it's one
1482                  * of the approved ones.
1483                  */
1484                 auto all = ContentKind::all();
1485                 auto name = cpl->content_kind().name();
1486                 transform(name.begin(), name.end(), name.begin(), ::tolower);
1487                 auto iter = std::find_if(all.begin(), all.end(), [name](ContentKind const& k) { return !k.scope() && k.name() == name; });
1488                 if (iter == all.end()) {
1489                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_CONTENT_KIND, cpl->content_kind().name()});
1490                 }
1491         }
1492
1493         if (cpl->release_territory()) {
1494                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1495                         auto terr = cpl->release_territory().get();
1496                         /* Must be a valid region tag, or "001" */
1497                         try {
1498                                 LanguageTag::RegionSubtag test(terr);
1499                         } catch (...) {
1500                                 if (terr != "001") {
1501                                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, terr});
1502                                 }
1503                         }
1504                 }
1505         }
1506
1507         if (dcp->standard() == Standard::SMPTE) {
1508                 if (!cpl->annotation_text()) {
1509                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1510                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1511                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1512                 }
1513         }
1514
1515         for (auto i: dcp->pkls()) {
1516                 /* Check that the CPL's hash corresponds to the PKL */
1517                 optional<string> h = i->hash(cpl->id());
1518                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1519                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()});
1520                 }
1521
1522                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1523                 optional<string> required_annotation_text;
1524                 for (auto j: i->assets()) {
1525                         /* See if this is a CPL */
1526                         for (auto k: dcp->cpls()) {
1527                                 if (j->id() == k->id()) {
1528                                         if (!required_annotation_text) {
1529                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1530                                                 required_annotation_text = cpl->content_title_text();
1531                                         } else {
1532                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1533                                                 required_annotation_text = boost::none;
1534                                         }
1535                                 }
1536                         }
1537                 }
1538
1539                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1540                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL, i->id(), i->file().get()});
1541                 }
1542         }
1543
1544         /* set to true if any reel has a MainSubtitle */
1545         auto have_main_subtitle = false;
1546         /* set to true if any reel has no MainSubtitle */
1547         auto have_no_main_subtitle = false;
1548         /* fewest number of closed caption assets seen in a reel */
1549         size_t fewest_closed_captions = SIZE_MAX;
1550         /* most number of closed caption assets seen in a reel */
1551         size_t most_closed_captions = 0;
1552         map<Marker, Time> markers_seen;
1553
1554         auto const main_picture_active_area = cpl->main_picture_active_area();
1555         if (main_picture_active_area && (main_picture_active_area->width % 2)) {
1556                 notes.push_back({
1557                                 VerificationNote::Type::ERROR,
1558                                 VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1559                                 String::compose("width %1 is not a multiple of 2", main_picture_active_area->width),
1560                                 cpl->file().get()
1561                         });
1562         }
1563         if (main_picture_active_area && (main_picture_active_area->height % 2)) {
1564                 notes.push_back({
1565                                 VerificationNote::Type::ERROR,
1566                                 VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1567                                 String::compose("height %1 is not a multiple of 2", main_picture_active_area->height),
1568                                 cpl->file().get()
1569                         });
1570         }
1571
1572         for (auto reel: cpl->reels()) {
1573                 stage("Checking reel", optional<boost::filesystem::path>());
1574                 verify_reel(
1575                         dcp,
1576                         cpl,
1577                         reel,
1578                         main_picture_active_area,
1579                         stage,
1580                         xsd_dtd_directory,
1581                         progress,
1582                         options,
1583                         notes,
1584                         state,
1585                         &have_main_subtitle,
1586                         &have_no_main_subtitle,
1587                         &most_closed_captions,
1588                         &fewest_closed_captions,
1589                         &markers_seen
1590                         );
1591         }
1592
1593         verify_text_details(cpl->reels(), notes);
1594
1595         if (dcp->standard() == Standard::SMPTE) {
1596                 if (auto msc = cpl->main_sound_configuration()) {
1597                         if (state.audio_channels && msc->channels() != *state.audio_channels) {
1598                                 notes.push_back({
1599                                                 VerificationNote::Type::ERROR,
1600                                                 VerificationNote::Code::INVALID_MAIN_SOUND_CONFIGURATION,
1601                                                 String::compose("MainSoundConfiguration has %1 channels but sound assets have %2", msc->channels(), *state.audio_channels),
1602                                                 cpl->file().get()
1603                                         });
1604                         }
1605                 }
1606
1607                 if (have_main_subtitle && have_no_main_subtitle) {
1608                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS});
1609                 }
1610
1611                 if (fewest_closed_captions != most_closed_captions) {
1612                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS});
1613                 }
1614
1615                 if (cpl->content_kind() == ContentKind::FEATURE) {
1616                         if (markers_seen.find(Marker::FFEC) == markers_seen.end()) {
1617                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFEC_IN_FEATURE});
1618                         }
1619                         if (markers_seen.find(Marker::FFMC) == markers_seen.end()) {
1620                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFMC_IN_FEATURE});
1621                         }
1622                 }
1623
1624                 auto ffoc = markers_seen.find(Marker::FFOC);
1625                 if (ffoc == markers_seen.end()) {
1626                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_FFOC});
1627                 } else if (ffoc->second.e != 1) {
1628                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_FFOC, raw_convert<string>(ffoc->second.e)});
1629                 }
1630
1631                 auto lfoc = markers_seen.find(Marker::LFOC);
1632                 if (lfoc == markers_seen.end()) {
1633                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_LFOC});
1634                 } else {
1635                         auto lfoc_time = lfoc->second.as_editable_units_ceil(lfoc->second.tcr);
1636                         if (lfoc_time != (cpl->reels().back()->duration() - 1)) {
1637                                 notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_LFOC, raw_convert<string>(lfoc_time)});
1638                         }
1639                 }
1640
1641                 LinesCharactersResult result;
1642                 for (auto reel: cpl->reels()) {
1643                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1644                                 verify_text_lines_and_characters(reel->main_subtitle()->asset(), 52, 79, &result);
1645                         }
1646                 }
1647
1648                 if (result.line_count_exceeded) {
1649                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT});
1650                 }
1651                 if (result.error_length_exceeded) {
1652                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH});
1653                 } else if (result.warning_length_exceeded) {
1654                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH});
1655                 }
1656
1657                 result = LinesCharactersResult();
1658                 for (auto reel: cpl->reels()) {
1659                         for (auto i: reel->closed_captions()) {
1660                                 if (i->asset()) {
1661                                         verify_text_lines_and_characters(i->asset(), 32, 32, &result);
1662                                 }
1663                         }
1664                 }
1665
1666                 if (result.line_count_exceeded) {
1667                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT});
1668                 }
1669                 if (result.error_length_exceeded) {
1670                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH});
1671                 }
1672
1673                 if (!cpl->read_composition_metadata()) {
1674                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA, cpl->id(), cpl->file().get()});
1675                 } else if (!cpl->version_number()) {
1676                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER, cpl->id(), cpl->file().get()});
1677                 }
1678
1679                 verify_extension_metadata(cpl, notes);
1680
1681                 if (cpl->any_encrypted()) {
1682                         cxml::Document doc("CompositionPlaylist");
1683                         DCP_ASSERT(cpl->file());
1684                         doc.read_file(cpl->file().get());
1685                         if (!doc.optional_node_child("Signature")) {
1686                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
1687                         }
1688                 }
1689         }
1690 }
1691
1692
1693 static
1694 void
1695 verify_pkl(
1696         shared_ptr<const DCP> dcp,
1697         shared_ptr<const PKL> pkl,
1698         boost::filesystem::path xsd_dtd_directory,
1699         vector<VerificationNote>& notes
1700         )
1701 {
1702         validate_xml(pkl->file().get(), xsd_dtd_directory, notes);
1703
1704         if (pkl_has_encrypted_assets(dcp, pkl)) {
1705                 cxml::Document doc("PackingList");
1706                 doc.read_file(pkl->file().get());
1707                 if (!doc.optional_node_child("Signature")) {
1708                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
1709                 }
1710         }
1711
1712         set<string> uuid_set;
1713         for (auto asset: pkl->assets()) {
1714                 if (!uuid_set.insert(asset->id()).second) {
1715                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::DUPLICATE_ASSET_ID_IN_PKL, pkl->id(), pkl->file().get()});
1716                         break;
1717                 }
1718         }
1719 }
1720
1721
1722
1723 static
1724 void
1725 verify_assetmap(
1726         shared_ptr<const DCP> dcp,
1727         boost::filesystem::path xsd_dtd_directory,
1728         vector<VerificationNote>& notes
1729         )
1730 {
1731         auto asset_map = dcp->asset_map();
1732         DCP_ASSERT(asset_map);
1733
1734         validate_xml(asset_map->file().get(), xsd_dtd_directory, notes);
1735
1736         set<string> uuid_set;
1737         for (auto const& asset: asset_map->assets()) {
1738                 if (!uuid_set.insert(asset.id()).second) {
1739                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::DUPLICATE_ASSET_ID_IN_ASSETMAP, asset_map->id(), asset_map->file().get()});
1740                         break;
1741                 }
1742         }
1743 }
1744
1745
1746 vector<VerificationNote>
1747 dcp::verify (
1748         vector<boost::filesystem::path> directories,
1749         function<void (string, optional<boost::filesystem::path>)> stage,
1750         function<void (float)> progress,
1751         VerificationOptions options,
1752         optional<boost::filesystem::path> xsd_dtd_directory
1753         )
1754 {
1755         if (!xsd_dtd_directory) {
1756                 xsd_dtd_directory = resources_directory() / "xsd";
1757         }
1758         *xsd_dtd_directory = boost::filesystem::canonical (*xsd_dtd_directory);
1759
1760         vector<VerificationNote> notes;
1761         State state{};
1762
1763         vector<shared_ptr<DCP>> dcps;
1764         for (auto i: directories) {
1765                 dcps.push_back (make_shared<DCP>(i));
1766         }
1767
1768         for (auto dcp: dcps) {
1769                 stage ("Checking DCP", dcp->directory());
1770                 bool carry_on = true;
1771                 try {
1772                         dcp->read (&notes, true);
1773                 } catch (MissingAssetmapError& e) {
1774                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1775                         carry_on = false;
1776                 } catch (ReadError& e) {
1777                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1778                 } catch (XMLError& e) {
1779                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1780                 } catch (MXFFileError& e) {
1781                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1782                 } catch (cxml::Error& e) {
1783                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1784                 }
1785
1786                 if (!carry_on) {
1787                         continue;
1788                 }
1789
1790                 if (dcp->standard() != Standard::SMPTE) {
1791                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_STANDARD});
1792                 }
1793
1794                 for (auto cpl: dcp->cpls()) {
1795                         verify_cpl(
1796                                 dcp,
1797                                 cpl,
1798                                 stage,
1799                                 *xsd_dtd_directory,
1800                                 progress,
1801                                 options,
1802                                 notes,
1803                                 state
1804                                 );
1805                 }
1806
1807                 for (auto pkl: dcp->pkls()) {
1808                         stage("Checking PKL", pkl->file());
1809                         verify_pkl(dcp, pkl, *xsd_dtd_directory, notes);
1810                 }
1811
1812                 if (dcp->asset_map_file()) {
1813                         stage("Checking ASSETMAP", dcp->asset_map_file().get());
1814                         verify_assetmap(dcp, *xsd_dtd_directory, notes);
1815                 } else {
1816                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_ASSETMAP});
1817                 }
1818         }
1819
1820         return notes;
1821 }
1822
1823
1824 string
1825 dcp::note_to_string (VerificationNote note)
1826 {
1827         /** These strings should say what is wrong, incorporating any extra details (ID, filenames etc.).
1828          *
1829          *  e.g. "ClosedCaption asset has no <EntryPoint> tag.",
1830          *  not "ClosedCaption assets must have an <EntryPoint> tag."
1831          *
1832          *  It's OK to use XML tag names where they are clear.
1833          *  If both ID and filename are available, use only the ID.
1834          *  End messages with a full stop.
1835          *  Messages should not mention whether or not their errors are a part of Bv2.1.
1836          */
1837         switch (note.code()) {
1838         case VerificationNote::Code::FAILED_READ:
1839                 return *note.note();
1840         case VerificationNote::Code::MISMATCHED_CPL_HASHES:
1841                 return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
1842         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE:
1843                 return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
1844         case VerificationNote::Code::INCORRECT_PICTURE_HASH:
1845                 return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1846         case VerificationNote::Code::MISMATCHED_PICTURE_HASHES:
1847                 return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1848         case VerificationNote::Code::INCORRECT_SOUND_HASH:
1849                 return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1850         case VerificationNote::Code::MISMATCHED_SOUND_HASHES:
1851                 return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1852         case VerificationNote::Code::EMPTY_ASSET_PATH:
1853                 return "The asset map contains an empty asset path.";
1854         case VerificationNote::Code::MISSING_ASSET:
1855                 return String::compose("The file %1 for an asset in the asset map cannot be found.", note.file()->filename());
1856         case VerificationNote::Code::MISMATCHED_STANDARD:
1857                 return "The DCP contains both SMPTE and Interop parts.";
1858         case VerificationNote::Code::INVALID_XML:
1859                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1860         case VerificationNote::Code::MISSING_ASSETMAP:
1861                 return "No ASSETMAP or ASSETMAP.xml was found.";
1862         case VerificationNote::Code::INVALID_INTRINSIC_DURATION:
1863                 return String::compose("The intrinsic duration of the asset %1 is less than 1 second.", note.note().get());
1864         case VerificationNote::Code::INVALID_DURATION:
1865                 return String::compose("The duration of the asset %1 is less than 1 second.", note.note().get());
1866         case VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1867                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1868         case VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1869                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1870         case VerificationNote::Code::EXTERNAL_ASSET:
1871                 return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
1872         case VerificationNote::Code::THREED_ASSET_MARKED_AS_TWOD:
1873                 return String::compose("The asset %1 is 3D but its MXF is marked as 2D.", note.file()->filename());
1874         case VerificationNote::Code::INVALID_STANDARD:
1875                 return "This DCP does not use the SMPTE standard.";
1876         case VerificationNote::Code::INVALID_LANGUAGE:
1877                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1878         case VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS:
1879                 return String::compose("The size %1 of picture asset %2 is not allowed.", note.note().get(), note.file()->filename());
1880         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K:
1881                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 2K DCPs.", note.note().get(), note.file()->filename());
1882         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K:
1883                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 4K DCPs.", note.note().get(), note.file()->filename());
1884         case VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D:
1885                 return "3D 4K DCPs are not allowed.";
1886         case VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES:
1887                 return String::compose("The size %1 of the closed caption asset %2 is larger than the 256KB maximum.", note.note().get(), note.file()->filename());
1888         case VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES:
1889                 return String::compose("The size %1 of the timed text asset %2 is larger than the 115MB maximum.", note.note().get(), note.file()->filename());
1890         case VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES:
1891                 return String::compose("The size %1 of the fonts in timed text asset %2 is larger than the 10MB maximum.", note.note().get(), note.file()->filename());
1892         case VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE:
1893                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <Language> tag.", note.file()->filename());
1894         case VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES:
1895                 return "Some subtitle assets have different <Language> tags than others";
1896         case VerificationNote::Code::MISSING_SUBTITLE_START_TIME:
1897                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <StartTime> tag.", note.file()->filename());
1898         case VerificationNote::Code::INVALID_SUBTITLE_START_TIME:
1899                 return String::compose("The XML for a SMPTE subtitle asset %1 has a non-zero <StartTime> tag.", note.file()->filename());
1900         case VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME:
1901                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1902         case VerificationNote::Code::INVALID_SUBTITLE_DURATION:
1903                 return "At least one subtitle lasts less than 15 frames.";
1904         case VerificationNote::Code::INVALID_SUBTITLE_SPACING:
1905                 return "At least one pair of subtitles is separated by less than 2 frames.";
1906         case VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY:
1907                 return "At least one subtitle extends outside of its reel.";
1908         case VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT:
1909                 return "There are more than 3 subtitle lines in at least one place in the DCP.";
1910         case VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH:
1911                 return "There are more than 52 characters in at least one subtitle line.";
1912         case VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH:
1913                 return "There are more than 79 characters in at least one subtitle line.";
1914         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT:
1915                 return "There are more than 3 closed caption lines in at least one place.";
1916         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH:
1917                 return "There are more than 32 characters in at least one closed caption line.";
1918         case VerificationNote::Code::INVALID_SOUND_FRAME_RATE:
1919                 return String::compose("The sound asset %1 has a sampling rate of %2", note.file()->filename(), note.note().get());
1920         case VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT:
1921                 return String::compose("The CPL %1 has no <AnnotationText> tag.", note.note().get());
1922         case VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT:
1923                 return String::compose("The CPL %1 has an <AnnotationText> which differs from its <ContentTitleText>.", note.note().get());
1924         case VerificationNote::Code::MISMATCHED_ASSET_DURATION:
1925                 return "All assets in a reel do not have the same duration.";
1926         case VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS:
1927                 return "At least one reel contains a subtitle asset, but some reel(s) do not.";
1928         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS:
1929                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1930         case VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT:
1931                 return String::compose("The subtitle asset %1 has no <EntryPoint> tag.", note.note().get());
1932         case VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT:
1933                 return String::compose("The subtitle asset %1 has an <EntryPoint> other than 0.", note.note().get());
1934         case VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1935                 return String::compose("The closed caption asset %1 has no <EntryPoint> tag.", note.note().get());
1936         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT:
1937                 return String::compose("The closed caption asset %1 has an <EntryPoint> other than 0.", note.note().get());
1938         case VerificationNote::Code::MISSING_HASH:
1939                 return String::compose("The asset %1 has no <Hash> tag in the CPL.", note.note().get());
1940         case VerificationNote::Code::MISSING_FFEC_IN_FEATURE:
1941                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker.";
1942         case VerificationNote::Code::MISSING_FFMC_IN_FEATURE:
1943                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker.";
1944         case VerificationNote::Code::MISSING_FFOC:
1945                 return "There should be a FFOC (first frame of content) marker.";
1946         case VerificationNote::Code::MISSING_LFOC:
1947                 return "There should be a LFOC (last frame of content) marker.";
1948         case VerificationNote::Code::INCORRECT_FFOC:
1949                 return String::compose("The FFOC marker is %1 instead of 1", note.note().get());
1950         case VerificationNote::Code::INCORRECT_LFOC:
1951                 return String::compose("The LFOC marker is %1 instead of 1 less than the duration of the last reel.", note.note().get());
1952         case VerificationNote::Code::MISSING_CPL_METADATA:
1953                 return String::compose("The CPL %1 has no <CompositionMetadataAsset> tag.", note.note().get());
1954         case VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER:
1955                 return String::compose("The CPL %1 has no <VersionNumber> in its <CompositionMetadataAsset>.", note.note().get());
1956         case VerificationNote::Code::MISSING_EXTENSION_METADATA:
1957                 return String::compose("The CPL %1 has no <ExtensionMetadata> in its <CompositionMetadataAsset>.", note.note().get());
1958         case VerificationNote::Code::INVALID_EXTENSION_METADATA:
1959                 return String::compose("The CPL %1 has a malformed <ExtensionMetadata> (%2).", note.file()->filename(), note.note().get());
1960         case VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT:
1961                 return String::compose("The CPL %1, which has encrypted content, is not signed.", note.note().get());
1962         case VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT:
1963                 return String::compose("The PKL %1, which has encrypted content, is not signed.", note.note().get());
1964         case VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL:
1965                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>.", note.note().get());
1966         case VerificationNote::Code::PARTIALLY_ENCRYPTED:
1967                 return "Some assets are encrypted but some are not.";
1968         case VerificationNote::Code::INVALID_JPEG2000_CODESTREAM:
1969                 return String::compose("The JPEG2000 codestream for at least one frame is invalid (%1).", note.note().get());
1970         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_2K:
1971                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 2K image instead of 1.", note.note().get());
1972         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_4K:
1973                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 4K image instead of 2.", note.note().get());
1974         case VerificationNote::Code::INVALID_JPEG2000_TILE_SIZE:
1975                 return "The JPEG2000 tile size is not the same as the image size.";
1976         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_WIDTH:
1977                 return String::compose("The JPEG2000 codestream uses a code block width of %1 instead of 32.", note.note().get());
1978         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_HEIGHT:
1979                 return String::compose("The JPEG2000 codestream uses a code block height of %1 instead of 32.", note.note().get());
1980         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_2K:
1981                 return String::compose("%1 POC markers found in 2K JPEG2000 codestream instead of 0.", note.note().get());
1982         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_4K:
1983                 return String::compose("%1 POC markers found in 4K JPEG2000 codestream instead of 1.", note.note().get());
1984         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER:
1985                 return String::compose("Incorrect POC marker content found (%1).", note.note().get());
1986         case VerificationNote::Code::INVALID_JPEG2000_POC_MARKER_LOCATION:
1987                 return "POC marker found outside main header.";
1988         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_2K:
1989                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 2K image instead of 3.", note.note().get());
1990         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_4K:
1991                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 4K image instead of 6.", note.note().get());
1992         case VerificationNote::Code::MISSING_JPEG200_TLM_MARKER:
1993                 return "No TLM marker was found in a JPEG2000 codestream.";
1994         case VerificationNote::Code::MISMATCHED_TIMED_TEXT_RESOURCE_ID:
1995                 return "The Resource ID in a timed text MXF did not match the ID of the contained XML.";
1996         case VerificationNote::Code::INCORRECT_TIMED_TEXT_ASSET_ID:
1997                 return "The Asset ID in a timed text MXF is the same as the Resource ID or that of the contained XML.";
1998         case VerificationNote::Code::MISMATCHED_TIMED_TEXT_DURATION:
1999         {
2000                 vector<string> parts;
2001                 boost::split (parts, note.note().get(), boost::is_any_of(" "));
2002                 DCP_ASSERT (parts.size() == 2);
2003                 return String::compose("The reel duration of some timed text (%1) is not the same as the ContainerDuration of its MXF (%2).", parts[0], parts[1]);
2004         }
2005         case VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED:
2006                 return "Some aspect of this DCP could not be checked because it is encrypted.";
2007         case VerificationNote::Code::EMPTY_TEXT:
2008                 return "There is an empty <Text> node in a subtitle or closed caption.";
2009         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_VALIGN:
2010                 return "Some closed <Text> or <Image> nodes have different vertical alignments within a <Subtitle>.";
2011         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ORDERING:
2012                 return "Some closed captions are not listed in the order of their vertical position.";
2013         case VerificationNote::Code::UNEXPECTED_ENTRY_POINT:
2014                 return "There is an <EntryPoint> node inside a <MainMarkers>.";
2015         case VerificationNote::Code::UNEXPECTED_DURATION:
2016                 return "There is an <Duration> node inside a <MainMarkers>.";
2017         case VerificationNote::Code::INVALID_CONTENT_KIND:
2018                 return String::compose("<ContentKind> has an invalid value %1.", note.note().get());
2019         case VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA:
2020                 return String::compose("<MainPictureActiveaArea> has an invalid value: %1", note.note().get());
2021         case VerificationNote::Code::DUPLICATE_ASSET_ID_IN_PKL:
2022                 return String::compose("The PKL %1 has more than one asset with the same ID.", note.note().get());
2023         case VerificationNote::Code::DUPLICATE_ASSET_ID_IN_ASSETMAP:
2024                 return String::compose("The ASSETMAP %1 has more than one asset with the same ID.", note.note().get());
2025         case VerificationNote::Code::MISSING_SUBTITLE:
2026                 return String::compose("The subtitle asset %1 has no subtitles.", note.note().get());
2027         case VerificationNote::Code::INVALID_SUBTITLE_ISSUE_DATE:
2028                 return String::compose("<IssueDate> has an invalid value: %1", note.note().get());
2029         case VerificationNote::Code::MISMATCHED_SOUND_CHANNEL_COUNTS:
2030                 return String::compose("The sound assets do not all have the same channel count; the first to differ is %1", note.file()->filename());
2031         case VerificationNote::Code::INVALID_MAIN_SOUND_CONFIGURATION:
2032                 return String::compose("<MainSoundConfiguration> has an invalid value: %1", note.note().get());
2033         case VerificationNote::Code::MISSING_FONT:
2034                 return String::compose("The font file for font ID \"%1\" was not found, or was not referred to in the ASSETMAP.", note.note().get());
2035         case VerificationNote::Code::INVALID_JPEG2000_TILE_PART_SIZE:
2036                 return String::compose(
2037                         "Frame %1 has an image component that is too large (component %2 is %3 bytes in size).",
2038                         note.frame().get(), note.component().get(), note.size().get()
2039                         );
2040         case VerificationNote::Code::INCORRECT_SUBTITLE_NAMESPACE_COUNT:
2041                 return String::compose("The XML in the subtitle asset %1 has more than one namespace declaration.", note.note().get());
2042         }
2043
2044         return "";
2045 }
2046
2047
2048 bool
2049 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
2050 {
2051         return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
2052 }
2053
2054
2055 bool
2056 dcp::operator< (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
2057 {
2058         if (a.type() != b.type()) {
2059                 return a.type() < b.type();
2060         }
2061
2062         if (a.code() != b.code()) {
2063                 return a.code() < b.code();
2064         }
2065
2066         if (a.note() != b.note()) {
2067                 return a.note().get_value_or("") < b.note().get_value_or("");
2068         }
2069
2070         if (a.file() != b.file()) {
2071                 return a.file().get_value_or("") < b.file().get_value_or("");
2072         }
2073
2074         return a.line().get_value_or(0) < b.line().get_value_or(0);
2075 }
2076
2077
2078 std::ostream&
2079 dcp::operator<< (std::ostream& s, dcp::VerificationNote const& note)
2080 {
2081         s << note_to_string (note);
2082         if (note.note()) {
2083                 s << " [" << note.note().get() << "]";
2084         }
2085         if (note.file()) {
2086                 s << " [" << note.file().get() << "]";
2087         }
2088         if (note.line()) {
2089                 s << " [" << note.line().get() << "]";
2090         }
2091         return s;
2092 }
2093