Cleanup: replace a nasty try/catch.
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34
35 /** @file  src/verify.cc
36  *  @brief dcp::verify() method and associated code
37  */
38
39
40 #include "compose.hpp"
41 #include "cpl.h"
42 #include "dcp.h"
43 #include "exceptions.h"
44 #include "interop_subtitle_asset.h"
45 #include "mono_picture_asset.h"
46 #include "mono_picture_frame.h"
47 #include "raw_convert.h"
48 #include "reel.h"
49 #include "reel_closed_caption_asset.h"
50 #include "reel_interop_subtitle_asset.h"
51 #include "reel_markers_asset.h"
52 #include "reel_picture_asset.h"
53 #include "reel_sound_asset.h"
54 #include "reel_smpte_subtitle_asset.h"
55 #include "reel_subtitle_asset.h"
56 #include "smpte_subtitle_asset.h"
57 #include "stereo_picture_asset.h"
58 #include "stereo_picture_frame.h"
59 #include "verify.h"
60 #include "verify_j2k.h"
61 #include <libxml/parserInternals.h>
62 #include <xercesc/dom/DOMAttr.hpp>
63 #include <xercesc/dom/DOMDocument.hpp>
64 #include <xercesc/dom/DOMError.hpp>
65 #include <xercesc/dom/DOMErrorHandler.hpp>
66 #include <xercesc/dom/DOMException.hpp>
67 #include <xercesc/dom/DOMImplementation.hpp>
68 #include <xercesc/dom/DOMImplementationLS.hpp>
69 #include <xercesc/dom/DOMImplementationRegistry.hpp>
70 #include <xercesc/dom/DOMLSParser.hpp>
71 #include <xercesc/dom/DOMLocator.hpp>
72 #include <xercesc/dom/DOMNamedNodeMap.hpp>
73 #include <xercesc/dom/DOMNodeList.hpp>
74 #include <xercesc/framework/LocalFileInputSource.hpp>
75 #include <xercesc/framework/MemBufInputSource.hpp>
76 #include <xercesc/parsers/AbstractDOMParser.hpp>
77 #include <xercesc/parsers/XercesDOMParser.hpp>
78 #include <xercesc/sax/HandlerBase.hpp>
79 #include <xercesc/util/PlatformUtils.hpp>
80 #include <boost/algorithm/string.hpp>
81 #include <iostream>
82 #include <map>
83 #include <regex>
84 #include <set>
85 #include <vector>
86
87
88 using std::cout;
89 using std::dynamic_pointer_cast;
90 using std::list;
91 using std::make_shared;
92 using std::map;
93 using std::max;
94 using std::set;
95 using std::shared_ptr;
96 using std::string;
97 using std::vector;
98 using boost::optional;
99 using boost::function;
100
101
102 using namespace dcp;
103 using namespace xercesc;
104
105
106 static
107 string
108 xml_ch_to_string (XMLCh const * a)
109 {
110         char* x = XMLString::transcode(a);
111         string const o(x);
112         XMLString::release(&x);
113         return o;
114 }
115
116
117 class XMLValidationError
118 {
119 public:
120         XMLValidationError (SAXParseException const & e)
121                 : _message (xml_ch_to_string(e.getMessage()))
122                 , _line (e.getLineNumber())
123                 , _column (e.getColumnNumber())
124                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
125                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
126         {
127
128         }
129
130         string message () const {
131                 return _message;
132         }
133
134         uint64_t line () const {
135                 return _line;
136         }
137
138         uint64_t column () const {
139                 return _column;
140         }
141
142         string public_id () const {
143                 return _public_id;
144         }
145
146         string system_id () const {
147                 return _system_id;
148         }
149
150 private:
151         string _message;
152         uint64_t _line;
153         uint64_t _column;
154         string _public_id;
155         string _system_id;
156 };
157
158
159 class DCPErrorHandler : public ErrorHandler
160 {
161 public:
162         void warning(const SAXParseException& e) override
163         {
164                 maybe_add (XMLValidationError(e));
165         }
166
167         void error(const SAXParseException& e) override
168         {
169                 maybe_add (XMLValidationError(e));
170         }
171
172         void fatalError(const SAXParseException& e) override
173         {
174                 maybe_add (XMLValidationError(e));
175         }
176
177         void resetErrors() override {
178                 _errors.clear ();
179         }
180
181         list<XMLValidationError> errors () const {
182                 return _errors;
183         }
184
185 private:
186         void maybe_add (XMLValidationError e)
187         {
188                 /* XXX: nasty hack */
189                 if (
190                         e.message().find("schema document") != string::npos &&
191                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
192                         ) {
193                         return;
194                 }
195
196                 _errors.push_back (e);
197         }
198
199         list<XMLValidationError> _errors;
200 };
201
202
203 class StringToXMLCh
204 {
205 public:
206         StringToXMLCh (string a)
207         {
208                 _buffer = XMLString::transcode(a.c_str());
209         }
210
211         StringToXMLCh (StringToXMLCh const&) = delete;
212         StringToXMLCh& operator= (StringToXMLCh const&) = delete;
213
214         ~StringToXMLCh ()
215         {
216                 XMLString::release (&_buffer);
217         }
218
219         XMLCh const * get () const {
220                 return _buffer;
221         }
222
223 private:
224         XMLCh* _buffer;
225 };
226
227
228 class LocalFileResolver : public EntityResolver
229 {
230 public:
231         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
232                 : _xsd_dtd_directory (xsd_dtd_directory)
233         {
234                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
235                  * found without being here.
236                  */
237                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
238                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
239                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
240                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
241                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
242                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
243                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
244                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
245                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
246                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "DCDMSubtitle-2010.xsd");
247                 add("http://www.smpte-ra.org/schemas/428-7/2014/DCST.xsd", "DCDMSubtitle-2014.xsd");
248                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
249                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
250                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
251         }
252
253         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id) override
254         {
255                 if (!system_id) {
256                         return 0;
257                 }
258                 auto system_id_str = xml_ch_to_string (system_id);
259                 auto p = _xsd_dtd_directory;
260                 if (_files.find(system_id_str) == _files.end()) {
261                         p /= system_id_str;
262                 } else {
263                         p /= _files[system_id_str];
264                 }
265                 StringToXMLCh ch (p.string());
266                 return new LocalFileInputSource(ch.get());
267         }
268
269 private:
270         void add (string uri, string file)
271         {
272                 _files[uri] = file;
273         }
274
275         std::map<string, string> _files;
276         boost::filesystem::path _xsd_dtd_directory;
277 };
278
279
280 static void
281 parse (XercesDOMParser& parser, boost::filesystem::path xml)
282 {
283         parser.parse(xml.c_str());
284 }
285
286
287 static void
288 parse (XercesDOMParser& parser, string xml)
289 {
290         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
291         parser.parse(buf);
292 }
293
294
295 template <class T>
296 void
297 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
298 {
299         try {
300                 XMLPlatformUtils::Initialize ();
301         } catch (XMLException& e) {
302                 throw MiscError ("Failed to initialise xerces library");
303         }
304
305         DCPErrorHandler error_handler;
306
307         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
308         {
309                 XercesDOMParser parser;
310                 parser.setValidationScheme(XercesDOMParser::Val_Always);
311                 parser.setDoNamespaces(true);
312                 parser.setDoSchema(true);
313
314                 vector<string> schema;
315                 schema.push_back("xml.xsd");
316                 schema.push_back("xmldsig-core-schema.xsd");
317                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
318                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
319                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
320                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
321                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
322                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
323                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
324                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
325                 schema.push_back("DCDMSubtitle-2010.xsd");
326                 schema.push_back("DCDMSubtitle-2014.xsd");
327                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
328                 schema.push_back("SMPTE-429-16.xsd");
329                 schema.push_back("Dolby-2012-AD.xsd");
330                 schema.push_back("SMPTE-429-10-2008.xsd");
331                 schema.push_back("xlink.xsd");
332                 schema.push_back("SMPTE-335-2012.xsd");
333                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
334                 schema.push_back("isdcf-mca.xsd");
335                 schema.push_back("SMPTE-429-12-2008.xsd");
336
337                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
338                  * Schemas that are not mentioned in this list are not read, and the things
339                  * they describe are not checked.
340                  */
341                 string locations;
342                 for (auto i: schema) {
343                         locations += String::compose("%1 %1 ", i, i);
344                 }
345
346                 parser.setExternalSchemaLocation(locations.c_str());
347                 parser.setValidationSchemaFullChecking(true);
348                 parser.setErrorHandler(&error_handler);
349
350                 LocalFileResolver resolver (xsd_dtd_directory);
351                 parser.setEntityResolver(&resolver);
352
353                 try {
354                         parser.resetDocumentPool();
355                         parse(parser, xml);
356                 } catch (XMLException& e) {
357                         throw MiscError(xml_ch_to_string(e.getMessage()));
358                 } catch (DOMException& e) {
359                         throw MiscError(xml_ch_to_string(e.getMessage()));
360                 } catch (...) {
361                         throw MiscError("Unknown exception from xerces");
362                 }
363         }
364
365         XMLPlatformUtils::Terminate ();
366
367         for (auto i: error_handler.errors()) {
368                 notes.push_back ({
369                         VerificationNote::Type::ERROR,
370                         VerificationNote::Code::INVALID_XML,
371                         i.message(),
372                         boost::trim_copy(i.public_id() + " " + i.system_id()),
373                         i.line()
374                 });
375         }
376 }
377
378
379 enum class VerifyAssetResult {
380         GOOD,
381         CPL_PKL_DIFFER,
382         BAD
383 };
384
385
386 static VerifyAssetResult
387 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
388 {
389         auto const actual_hash = reel_file_asset->asset_ref()->hash(progress);
390
391         auto pkls = dcp->pkls();
392         /* We've read this DCP in so it must have at least one PKL */
393         DCP_ASSERT (!pkls.empty());
394
395         auto asset = reel_file_asset->asset_ref().asset();
396
397         optional<string> pkl_hash;
398         for (auto i: pkls) {
399                 pkl_hash = i->hash (reel_file_asset->asset_ref()->id());
400                 if (pkl_hash) {
401                         break;
402                 }
403         }
404
405         DCP_ASSERT (pkl_hash);
406
407         auto cpl_hash = reel_file_asset->hash();
408         if (cpl_hash && *cpl_hash != *pkl_hash) {
409                 return VerifyAssetResult::CPL_PKL_DIFFER;
410         }
411
412         if (actual_hash != *pkl_hash) {
413                 return VerifyAssetResult::BAD;
414         }
415
416         return VerifyAssetResult::GOOD;
417 }
418
419
420 void
421 verify_language_tag (string tag, vector<VerificationNote>& notes)
422 {
423         try {
424                 LanguageTag test (tag);
425         } catch (LanguageTagError &) {
426                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag});
427         }
428 }
429
430
431 static void
432 verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, boost::filesystem::path file, vector<VerificationNote>& notes, function<void (float)> progress)
433 {
434         int biggest_frame = 0;
435         auto asset = dynamic_pointer_cast<PictureAsset>(reel_file_asset->asset_ref().asset());
436         auto const duration = asset->intrinsic_duration ();
437
438         auto check_and_add = [&notes](vector<VerificationNote> const& j2k_notes) {
439                 for (auto i: j2k_notes) {
440                         if (find(notes.begin(), notes.end(), i) == notes.end()) {
441                                 notes.push_back (i);
442                         }
443                 }
444         };
445
446         if (auto mono_asset = dynamic_pointer_cast<MonoPictureAsset>(reel_file_asset->asset_ref().asset())) {
447                 auto reader = mono_asset->start_read ();
448                 for (int64_t i = 0; i < duration; ++i) {
449                         auto frame = reader->get_frame (i);
450                         biggest_frame = max(biggest_frame, frame->size());
451                         if (!mono_asset->encrypted() || mono_asset->key()) {
452                                 vector<VerificationNote> j2k_notes;
453                                 verify_j2k(frame, i, mono_asset->frame_rate().numerator, j2k_notes);
454                                 check_and_add (j2k_notes);
455                         }
456                         progress (float(i) / duration);
457                 }
458         } else if (auto stereo_asset = dynamic_pointer_cast<StereoPictureAsset>(asset)) {
459                 auto reader = stereo_asset->start_read ();
460                 for (int64_t i = 0; i < duration; ++i) {
461                         auto frame = reader->get_frame (i);
462                         biggest_frame = max(biggest_frame, max(frame->left()->size(), frame->right()->size()));
463                         if (!stereo_asset->encrypted() || stereo_asset->key()) {
464                                 vector<VerificationNote> j2k_notes;
465                                 verify_j2k(frame->left(), i, stereo_asset->frame_rate().numerator, j2k_notes);
466                                 verify_j2k(frame->right(), i, stereo_asset->frame_rate().numerator, j2k_notes);
467                                 check_and_add (j2k_notes);
468                         }
469                         progress (float(i) / duration);
470                 }
471
472         }
473
474         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
475         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
476         if (biggest_frame > max_frame) {
477                 notes.push_back ({
478                         VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
479                 });
480         } else if (biggest_frame > risky_frame) {
481                 notes.push_back ({
482                         VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
483                 });
484         }
485 }
486
487
488 static void
489 verify_main_picture_asset (
490         shared_ptr<const DCP> dcp,
491         shared_ptr<const ReelPictureAsset> reel_asset,
492         function<void (string, optional<boost::filesystem::path>)> stage,
493         function<void (float)> progress,
494         VerificationOptions options,
495         vector<VerificationNote>& notes
496         )
497 {
498         auto asset = reel_asset->asset();
499         auto const file = *asset->file();
500
501         if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || boost::filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
502                 stage ("Checking picture asset hash", file);
503                 auto const r = verify_asset (dcp, reel_asset, progress);
504                 switch (r) {
505                         case VerifyAssetResult::BAD:
506                                 notes.push_back ({
507                                         VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file
508                                 });
509                                 break;
510                         case VerifyAssetResult::CPL_PKL_DIFFER:
511                                 notes.push_back ({
512                                         VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file
513                                 });
514                                 break;
515                         default:
516                                 break;
517                 }
518         }
519
520         stage ("Checking picture frame sizes", asset->file());
521         verify_picture_asset (reel_asset, file, notes, progress);
522
523         /* Only flat/scope allowed by Bv2.1 */
524         if (
525                 asset->size() != Size(2048, 858) &&
526                 asset->size() != Size(1998, 1080) &&
527                 asset->size() != Size(4096, 1716) &&
528                 asset->size() != Size(3996, 2160)) {
529                 notes.push_back({
530                         VerificationNote::Type::BV21_ERROR,
531                         VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS,
532                         String::compose("%1x%2", asset->size().width, asset->size().height),
533                         file
534                 });
535         }
536
537         /* Only 24, 25, 48fps allowed for 2K */
538         if (
539                 (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) &&
540                 (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1))
541            ) {
542                 notes.push_back({
543                         VerificationNote::Type::BV21_ERROR,
544                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K,
545                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
546                         file
547                 });
548         }
549
550         if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) {
551                 /* Only 24fps allowed for 4K */
552                 if (asset->edit_rate() != Fraction(24, 1)) {
553                         notes.push_back({
554                                 VerificationNote::Type::BV21_ERROR,
555                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K,
556                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
557                                 file
558                         });
559                 }
560
561                 /* Only 2D allowed for 4K */
562                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
563                         notes.push_back({
564                                 VerificationNote::Type::BV21_ERROR,
565                                 VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D,
566                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
567                                 file
568                         });
569
570                 }
571         }
572
573 }
574
575
576 struct State
577 {
578         boost::optional<string> subtitle_language;
579         boost::optional<int> audio_channels;
580 };
581
582
583 static void
584 verify_main_sound_asset (
585         shared_ptr<const DCP> dcp,
586         shared_ptr<const ReelSoundAsset> reel_asset,
587         function<void (string, optional<boost::filesystem::path>)> stage,
588         function<void (float)> progress,
589         VerificationOptions options,
590         vector<VerificationNote>& notes,
591         State& state
592         )
593 {
594         auto asset = reel_asset->asset();
595         auto const file = *asset->file();
596
597         if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || boost::filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
598                 stage("Checking sound asset hash", file);
599                 auto const r = verify_asset (dcp, reel_asset, progress);
600                 switch (r) {
601                         case VerifyAssetResult::BAD:
602                                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, file});
603                                 break;
604                         case VerifyAssetResult::CPL_PKL_DIFFER:
605                                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, file});
606                                 break;
607                         default:
608                                 break;
609                 }
610         }
611
612         if (!state.audio_channels) {
613                 state.audio_channels = asset->channels();
614         } else if (*state.audio_channels != asset->channels()) {
615                 notes.push_back({ VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_CHANNEL_COUNTS, file });
616         }
617
618         stage ("Checking sound asset metadata", file);
619
620         if (auto lang = asset->language()) {
621                 verify_language_tag (*lang, notes);
622         }
623         if (asset->sampling_rate() != 48000) {
624                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, raw_convert<string>(asset->sampling_rate()), file});
625         }
626 }
627
628
629 static void
630 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
631 {
632         /* XXX: is Language compulsory? */
633         if (reel_asset->language()) {
634                 verify_language_tag (*reel_asset->language(), notes);
635         }
636
637         if (!reel_asset->entry_point()) {
638                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() });
639         } else if (reel_asset->entry_point().get()) {
640                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() });
641         }
642 }
643
644
645 static void
646 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
647 {
648         /* XXX: is Language compulsory? */
649         if (reel_asset->language()) {
650                 verify_language_tag (*reel_asset->language(), notes);
651         }
652
653         if (!reel_asset->entry_point()) {
654                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
655         } else if (reel_asset->entry_point().get()) {
656                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
657         }
658 }
659
660
661 /** Verify stuff that is common to both subtitles and closed captions */
662 void
663 verify_smpte_timed_text_asset (
664         shared_ptr<const SMPTESubtitleAsset> asset,
665         optional<int64_t> reel_asset_duration,
666         vector<VerificationNote>& notes
667         )
668 {
669         if (asset->language()) {
670                 verify_language_tag (*asset->language(), notes);
671         } else {
672                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
673         }
674
675         auto const size = boost::filesystem::file_size(asset->file().get());
676         if (size > 115 * 1024 * 1024) {
677                 notes.push_back (
678                         { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
679                         );
680         }
681
682         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
683          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
684          */
685         auto fonts = asset->font_data ();
686         int total_size = 0;
687         for (auto i: fonts) {
688                 total_size += i.second.size();
689         }
690         if (total_size > 10 * 1024 * 1024) {
691                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert<string>(total_size), asset->file().get() });
692         }
693
694         if (!asset->start_time()) {
695                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() });
696         } else if (asset->start_time() != Time()) {
697                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() });
698         }
699
700         if (reel_asset_duration && *reel_asset_duration != asset->intrinsic_duration()) {
701                 notes.push_back (
702                         {
703                                 VerificationNote::Type::BV21_ERROR,
704                                 VerificationNote::Code::MISMATCHED_TIMED_TEXT_DURATION,
705                                 String::compose("%1 %2", *reel_asset_duration, asset->intrinsic_duration()),
706                                 asset->file().get()
707                         });
708         }
709 }
710
711
712 /** Verify Interop subtitle-only stuff */
713 void
714 verify_interop_subtitle_asset(shared_ptr<const InteropSubtitleAsset> asset, vector<VerificationNote>& notes)
715 {
716         if (asset->subtitles().empty()) {
717                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_SUBTITLE, asset->id(), asset->file().get() });
718         }
719         auto const unresolved = asset->unresolved_fonts();
720         if (!unresolved.empty()) {
721                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_FONT, unresolved.front() });
722         }
723 }
724
725
726 /** Verify SMPTE subtitle-only stuff */
727 void
728 verify_smpte_subtitle_asset (
729         shared_ptr<const SMPTESubtitleAsset> asset,
730         vector<VerificationNote>& notes,
731         State& state
732         )
733 {
734         if (asset->language()) {
735                 if (!state.subtitle_language) {
736                         state.subtitle_language = *asset->language();
737                 } else if (state.subtitle_language != *asset->language()) {
738                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES });
739                 }
740         }
741
742         DCP_ASSERT (asset->resource_id());
743         auto xml_id = asset->xml_id();
744         if (xml_id) {
745                 if (asset->resource_id().get() != xml_id) {
746                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_TIMED_TEXT_RESOURCE_ID });
747                 }
748
749                 if (asset->id() == asset->resource_id().get() || asset->id() == xml_id) {
750                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_TIMED_TEXT_ASSET_ID });
751                 }
752         } else {
753                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
754         }
755
756         if (asset->raw_xml()) {
757                 /* Deluxe require this in their QC even if it seems never to be mentioned in any standard */
758                 cxml::Document doc("SubtitleReel");
759                 doc.read_string(*asset->raw_xml());
760                 auto issue_date = doc.string_child("IssueDate");
761                 std::regex reg("^\\d\\d\\d\\d-\\d\\d-\\d\\dT\\d\\d:\\d\\d:\\d\\d$");
762                 if (!std::regex_match(issue_date, reg)) {
763                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_ISSUE_DATE, issue_date});
764                 }
765         }
766 }
767
768
769 /** Verify all subtitle stuff */
770 static void
771 verify_subtitle_asset (
772         shared_ptr<const SubtitleAsset> asset,
773         optional<int64_t> reel_asset_duration,
774         function<void (string, optional<boost::filesystem::path>)> stage,
775         boost::filesystem::path xsd_dtd_directory,
776         vector<VerificationNote>& notes,
777         State& state
778         )
779 {
780         stage ("Checking subtitle XML", asset->file());
781         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
782          * gets passed through libdcp which may clean up and therefore hide errors.
783          */
784         if (asset->raw_xml()) {
785                 validate_xml (asset->raw_xml().get(), xsd_dtd_directory, notes);
786         } else {
787                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
788         }
789
790         auto namespace_count = [](shared_ptr<const SubtitleAsset> asset, string root_node) {
791                 cxml::Document doc(root_node);
792                 doc.read_string(asset->raw_xml().get());
793                 auto root = dynamic_cast<xmlpp::Element*>(doc.node())->cobj();
794                 int count = 0;
795                 for (auto ns = root->nsDef; ns != nullptr; ns = ns->next) {
796                         ++count;
797                 }
798                 return count;
799         };
800
801         auto interop = dynamic_pointer_cast<const InteropSubtitleAsset>(asset);
802         if (interop) {
803                 verify_interop_subtitle_asset(interop, notes);
804                 if (namespace_count(asset, "DCSubtitle") > 1) {
805                         notes.push_back({ VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_SUBTITLE_NAMESPACE_COUNT, asset->id() });
806                 }
807         }
808
809         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
810         if (smpte) {
811                 verify_smpte_timed_text_asset (smpte, reel_asset_duration, notes);
812                 verify_smpte_subtitle_asset (smpte, notes, state);
813                 /* This asset may be encrypted and in that case we'll have no raw_xml() */
814                 if (asset->raw_xml() && namespace_count(asset, "SubtitleReel") > 1) {
815                         notes.push_back({ VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_SUBTITLE_NAMESPACE_COUNT, asset->id()});
816                 }
817         }
818 }
819
820
821 /** Verify all closed caption stuff */
822 static void
823 verify_closed_caption_asset (
824         shared_ptr<const SubtitleAsset> asset,
825         optional<int64_t> reel_asset_duration,
826         function<void (string, optional<boost::filesystem::path>)> stage,
827         boost::filesystem::path xsd_dtd_directory,
828         vector<VerificationNote>& notes
829         )
830 {
831         stage ("Checking closed caption XML", asset->file());
832         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
833          * gets passed through libdcp which may clean up and therefore hide errors.
834          */
835         auto raw_xml = asset->raw_xml();
836         if (raw_xml) {
837                 validate_xml (*raw_xml, xsd_dtd_directory, notes);
838                 if (raw_xml->size() > 256 * 1024) {
839                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert<string>(raw_xml->size()), *asset->file()});
840                 }
841         } else {
842                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
843         }
844
845         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
846         if (smpte) {
847                 verify_smpte_timed_text_asset (smpte, reel_asset_duration, notes);
848         }
849 }
850
851
852 /** Check the timing of the individual subtitles and make sure there are no empty <Text> nodes etc. */
853 static
854 void
855 verify_text_details (
856         dcp::Standard standard,
857         vector<shared_ptr<Reel>> reels,
858         int edit_rate,
859         vector<VerificationNote>& notes,
860         std::function<bool (shared_ptr<Reel>)> check,
861         std::function<optional<string> (shared_ptr<Reel>)> xml,
862         std::function<int64_t (shared_ptr<Reel>)> duration
863         )
864 {
865         /* end of last subtitle (in editable units) */
866         optional<int64_t> last_out;
867         auto too_short = false;
868         auto too_close = false;
869         auto too_early = false;
870         auto reel_overlap = false;
871         auto empty_text = false;
872         /* current reel start time (in editable units) */
873         int64_t reel_offset = 0;
874         vector<string> font_ids;
875         optional<string> missing_load_font_id;
876
877         std::function<void (cxml::ConstNodePtr, optional<int>, optional<Time>, int, bool)> parse;
878         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &empty_text, &reel_offset, &font_ids, &missing_load_font_id](
879                 cxml::ConstNodePtr node,
880                 optional<int> tcr,
881                 optional<Time> start_time,
882                 int er,
883                 bool first_reel
884                 ) {
885                 if (node->name() == "Subtitle") {
886                         Time in (node->string_attribute("TimeIn"), tcr);
887                         if (start_time) {
888                                 in -= *start_time;
889                         }
890                         Time out (node->string_attribute("TimeOut"), tcr);
891                         if (start_time) {
892                                 out -= *start_time;
893                         }
894                         if (first_reel && tcr && in < Time(0, 0, 4, 0, *tcr)) {
895                                 too_early = true;
896                         }
897                         auto length = out - in;
898                         if (length.as_editable_units_ceil(er) < 15) {
899                                 too_short = true;
900                         }
901                         if (last_out) {
902                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
903                                 auto distance = reel_offset + in.as_editable_units_ceil(er) - *last_out;
904                                 if (distance >= 0 && distance < 2) {
905                                         too_close = true;
906                                 }
907                         }
908                         last_out = reel_offset + out.as_editable_units_floor(er);
909                 } else if (node->name() == "Text") {
910                         std::function<bool (cxml::ConstNodePtr)> node_has_content = [&](cxml::ConstNodePtr node) {
911                                 if (!node->content().empty()) {
912                                         return true;
913                                 }
914                                 for (auto i: node->node_children()) {
915                                         if (node_has_content(i)) {
916                                                 return true;
917                                         }
918                                 }
919                                 return false;
920                         };
921                         if (!node_has_content(node)) {
922                                 empty_text = true;
923                         }
924                 } else if (node->name() == "LoadFont") {
925                         if (auto const id = node->optional_string_attribute("Id")) {
926                                 font_ids.push_back(*id);
927                         }
928                 } else if (node->name() == "Font") {
929                         if (auto const font_id = node->optional_string_attribute("Id")) {
930                                 if (std::find_if(font_ids.begin(), font_ids.end(), [font_id](string const& id) { return id == font_id; }) == font_ids.end()) {
931                                         missing_load_font_id = font_id;
932                                 }
933                         }
934                 }
935                 for (auto i: node->node_children()) {
936                         parse(i, tcr, start_time, er, first_reel);
937                 }
938         };
939
940         for (auto i = 0U; i < reels.size(); ++i) {
941                 if (!check(reels[i])) {
942                         continue;
943                 }
944
945                 auto reel_xml = xml(reels[i]);
946                 if (!reel_xml) {
947                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
948                         continue;
949                 }
950
951                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
952                  * read in by libdcp's parser.
953                  */
954
955                 shared_ptr<cxml::Document> doc;
956                 optional<int> tcr;
957                 optional<Time> start_time;
958                 switch (standard) {
959                 case dcp::Standard::INTEROP:
960                         doc = make_shared<cxml::Document>("DCSubtitle");
961                         doc->read_string (*reel_xml);
962                         break;
963                 case dcp::Standard::SMPTE:
964                         doc = make_shared<cxml::Document>("SubtitleReel");
965                         doc->read_string (*reel_xml);
966                         tcr = doc->number_child<int>("TimeCodeRate");
967                         if (auto start_time_string = doc->optional_string_child("StartTime")) {
968                                 start_time = Time(*start_time_string, tcr);
969                         }
970                         break;
971                 }
972                 parse (doc, tcr, start_time, edit_rate, i == 0);
973                 auto end = reel_offset + duration(reels[i]);
974                 if (last_out && *last_out > end) {
975                         reel_overlap = true;
976                 }
977                 reel_offset = end;
978         }
979
980         if (last_out && *last_out > reel_offset) {
981                 reel_overlap = true;
982         }
983
984         if (too_early) {
985                 notes.push_back({
986                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME
987                 });
988         }
989
990         if (too_short) {
991                 notes.push_back ({
992                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_DURATION
993                 });
994         }
995
996         if (too_close) {
997                 notes.push_back ({
998                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_SPACING
999                 });
1000         }
1001
1002         if (reel_overlap) {
1003                 notes.push_back ({
1004                         VerificationNote::Type::ERROR, VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY
1005                 });
1006         }
1007
1008         if (empty_text) {
1009                 notes.push_back ({
1010                         VerificationNote::Type::WARNING, VerificationNote::Code::EMPTY_TEXT
1011                 });
1012         }
1013
1014         if (missing_load_font_id) {
1015                 notes.push_back(dcp::VerificationNote(VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_LOAD_FONT_FOR_FONT).set_id(*missing_load_font_id));
1016         }
1017 }
1018
1019
1020 static
1021 void
1022 verify_closed_caption_details (
1023         vector<shared_ptr<Reel>> reels,
1024         vector<VerificationNote>& notes
1025         )
1026 {
1027         std::function<void (cxml::ConstNodePtr node, std::vector<cxml::ConstNodePtr>& text_or_image)> find_text_or_image;
1028         find_text_or_image = [&find_text_or_image](cxml::ConstNodePtr node, std::vector<cxml::ConstNodePtr>& text_or_image) {
1029                 for (auto i: node->node_children()) {
1030                         if (i->name() == "Text") {
1031                                 text_or_image.push_back (i);
1032                         } else {
1033                                 find_text_or_image (i, text_or_image);
1034                         }
1035                 }
1036         };
1037
1038         auto mismatched_valign = false;
1039         auto incorrect_order = false;
1040
1041         std::function<void (cxml::ConstNodePtr)> parse;
1042         parse = [&parse, &find_text_or_image, &mismatched_valign, &incorrect_order](cxml::ConstNodePtr node) {
1043                 if (node->name() == "Subtitle") {
1044                         vector<cxml::ConstNodePtr> text_or_image;
1045                         find_text_or_image (node, text_or_image);
1046                         optional<string> last_valign;
1047                         optional<float> last_vpos;
1048                         for (auto i: text_or_image) {
1049                                 auto valign = i->optional_string_attribute("VAlign");
1050                                 if (!valign) {
1051                                         valign = i->optional_string_attribute("Valign").get_value_or("center");
1052                                 }
1053                                 auto vpos = i->optional_number_attribute<float>("VPosition");
1054                                 if (!vpos) {
1055                                         vpos = i->optional_number_attribute<float>("Vposition").get_value_or(50);
1056                                 }
1057
1058                                 if (last_valign) {
1059                                         if (*last_valign != valign) {
1060                                                 mismatched_valign = true;
1061                                         }
1062                                 }
1063                                 last_valign = valign;
1064
1065                                 if (!mismatched_valign) {
1066                                         if (last_vpos) {
1067                                                 if (*last_valign == "top" || *last_valign == "center") {
1068                                                         if (*vpos < *last_vpos) {
1069                                                                 incorrect_order = true;
1070                                                         }
1071                                                 } else {
1072                                                         if (*vpos > *last_vpos) {
1073                                                                 incorrect_order = true;
1074                                                         }
1075                                                 }
1076                                         }
1077                                         last_vpos = vpos;
1078                                 }
1079                         }
1080                 }
1081
1082                 for (auto i: node->node_children()) {
1083                         parse(i);
1084                 }
1085         };
1086
1087         for (auto reel: reels) {
1088                 for (auto ccap: reel->closed_captions()) {
1089                         auto reel_xml = ccap->asset()->raw_xml();
1090                         if (!reel_xml) {
1091                                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
1092                                 continue;
1093                         }
1094
1095                         /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
1096                          * read in by libdcp's parser.
1097                          */
1098
1099                         shared_ptr<cxml::Document> doc;
1100                         optional<int> tcr;
1101                         optional<Time> start_time;
1102                         try {
1103                                 doc = make_shared<cxml::Document>("SubtitleReel");
1104                                 doc->read_string (*reel_xml);
1105                         } catch (...) {
1106                                 doc = make_shared<cxml::Document>("DCSubtitle");
1107                                 doc->read_string (*reel_xml);
1108                         }
1109                         parse (doc);
1110                 }
1111         }
1112
1113         if (mismatched_valign) {
1114                 notes.push_back ({
1115                         VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_VALIGN,
1116                 });
1117         }
1118
1119         if (incorrect_order) {
1120                 notes.push_back ({
1121                         VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ORDERING,
1122                 });
1123         }
1124 }
1125
1126
1127 struct LinesCharactersResult
1128 {
1129         bool warning_length_exceeded = false;
1130         bool error_length_exceeded = false;
1131         bool line_count_exceeded = false;
1132 };
1133
1134
1135 static
1136 void
1137 verify_text_lines_and_characters (
1138         shared_ptr<SubtitleAsset> asset,
1139         int warning_length,
1140         int error_length,
1141         LinesCharactersResult* result
1142         )
1143 {
1144         class Event
1145         {
1146         public:
1147                 Event (Time time_, float position_, int characters_)
1148                         : time (time_)
1149                         , position (position_)
1150                         , characters (characters_)
1151                 {}
1152
1153                 Event (Time time_, shared_ptr<Event> start_)
1154                         : time (time_)
1155                         , start (start_)
1156                 {}
1157
1158                 Time time;
1159                 int position; //< position from 0 at top of screen to 100 at bottom
1160                 int characters;
1161                 shared_ptr<Event> start;
1162         };
1163
1164         vector<shared_ptr<Event>> events;
1165
1166         auto position = [](shared_ptr<const SubtitleString> sub) {
1167                 switch (sub->v_align()) {
1168                 case VAlign::TOP:
1169                         return lrintf(sub->v_position() * 100);
1170                 case VAlign::CENTER:
1171                         return lrintf((0.5f + sub->v_position()) * 100);
1172                 case VAlign::BOTTOM:
1173                         return lrintf((1.0f - sub->v_position()) * 100);
1174                 }
1175
1176                 return 0L;
1177         };
1178
1179         for (auto j: asset->subtitles()) {
1180                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
1181                 if (text) {
1182                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
1183                         events.push_back(in);
1184                         events.push_back(make_shared<Event>(text->out(), in));
1185                 }
1186         }
1187
1188         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
1189                 return a->time < b->time;
1190         });
1191
1192         map<int, int> current;
1193         for (auto i: events) {
1194                 if (current.size() > 3) {
1195                         result->line_count_exceeded = true;
1196                 }
1197                 for (auto j: current) {
1198                         if (j.second > warning_length) {
1199                                 result->warning_length_exceeded = true;
1200                         }
1201                         if (j.second > error_length) {
1202                                 result->error_length_exceeded = true;
1203                         }
1204                 }
1205
1206                 if (i->start) {
1207                         /* end of a subtitle */
1208                         DCP_ASSERT (current.find(i->start->position) != current.end());
1209                         if (current[i->start->position] == i->start->characters) {
1210                                 current.erase(i->start->position);
1211                         } else {
1212                                 current[i->start->position] -= i->start->characters;
1213                         }
1214                 } else {
1215                         /* start of a subtitle */
1216                         if (current.find(i->position) == current.end()) {
1217                                 current[i->position] = i->characters;
1218                         } else {
1219                                 current[i->position] += i->characters;
1220                         }
1221                 }
1222         }
1223 }
1224
1225
1226 static
1227 void
1228 verify_text_details(dcp::Standard standard, vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
1229 {
1230         if (reels.empty()) {
1231                 return;
1232         }
1233
1234         if (reels[0]->main_subtitle()) {
1235                 verify_text_details(standard, reels, reels[0]->main_subtitle()->edit_rate().numerator, notes,
1236                         [](shared_ptr<Reel> reel) {
1237                                 return static_cast<bool>(reel->main_subtitle());
1238                         },
1239                         [](shared_ptr<Reel> reel) {
1240                                 return reel->main_subtitle()->asset()->raw_xml();
1241                         },
1242                         [](shared_ptr<Reel> reel) {
1243                                 return reel->main_subtitle()->actual_duration();
1244                         }
1245                 );
1246         }
1247
1248         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
1249                 verify_text_details(standard, reels, reels[0]->closed_captions()[i]->edit_rate().numerator, notes,
1250                         [i](shared_ptr<Reel> reel) {
1251                                 return i < reel->closed_captions().size();
1252                         },
1253                         [i](shared_ptr<Reel> reel) {
1254                                 return reel->closed_captions()[i]->asset()->raw_xml();
1255                         },
1256                         [i](shared_ptr<Reel> reel) {
1257                                 return reel->closed_captions()[i]->actual_duration();
1258                         }
1259                 );
1260         }
1261
1262         verify_closed_caption_details (reels, notes);
1263 }
1264
1265
1266 void
1267 verify_extension_metadata(shared_ptr<const CPL> cpl, vector<VerificationNote>& notes)
1268 {
1269         DCP_ASSERT (cpl->file());
1270         cxml::Document doc ("CompositionPlaylist");
1271         doc.read_file (cpl->file().get());
1272
1273         auto missing = false;
1274         string malformed;
1275
1276         if (auto reel_list = doc.node_child("ReelList")) {
1277                 auto reels = reel_list->node_children("Reel");
1278                 if (!reels.empty()) {
1279                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
1280                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
1281                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
1282                                                 missing = true;
1283                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
1284                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
1285                                                                 continue;
1286                                                         }
1287                                                         missing = false;
1288                                                         if (auto name = extension->optional_node_child("Name")) {
1289                                                                 if (name->content() != "Application") {
1290                                                                         malformed = "<Name> should be 'Application'";
1291                                                                 }
1292                                                         }
1293                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
1294                                                                 if (auto property = property_list->optional_node_child("Property")) {
1295                                                                         if (auto name = property->optional_node_child("Name")) {
1296                                                                                 if (name->content() != "DCP Constraints Profile") {
1297                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
1298                                                                                 }
1299                                                                         }
1300                                                                         if (auto value = property->optional_node_child("Value")) {
1301                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
1302                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
1303                                                                                 }
1304                                                                         }
1305                                                                 }
1306                                                         }
1307                                                 }
1308                                         } else {
1309                                                 missing = true;
1310                                         }
1311                                 }
1312                         }
1313                 }
1314         }
1315
1316         if (missing) {
1317                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_EXTENSION_METADATA, cpl->id(), cpl->file().get()});
1318         } else if (!malformed.empty()) {
1319                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_EXTENSION_METADATA, malformed, cpl->file().get()});
1320         }
1321 }
1322
1323
1324 bool
1325 pkl_has_encrypted_assets(shared_ptr<const DCP> dcp, shared_ptr<const PKL> pkl)
1326 {
1327         vector<string> encrypted;
1328         for (auto i: dcp->cpls()) {
1329                 for (auto j: i->reel_file_assets()) {
1330                         if (j->asset_ref().resolved()) {
1331                                 auto mxf = dynamic_pointer_cast<MXF>(j->asset_ref().asset());
1332                                 if (mxf && mxf->encrypted()) {
1333                                         encrypted.push_back(j->asset_ref().id());
1334                                 }
1335                         }
1336                 }
1337         }
1338
1339         for (auto i: pkl->assets()) {
1340                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1341                         return true;
1342                 }
1343         }
1344
1345         return false;
1346 }
1347
1348
1349 static
1350 void
1351 verify_reel(
1352         shared_ptr<const DCP> dcp,
1353         shared_ptr<const CPL> cpl,
1354         shared_ptr<const Reel> reel,
1355         optional<dcp::Size> main_picture_active_area,
1356         function<void (string, optional<boost::filesystem::path>)> stage,
1357         boost::filesystem::path xsd_dtd_directory,
1358         function<void (float)> progress,
1359         VerificationOptions options,
1360         vector<VerificationNote>& notes,
1361         State& state,
1362         bool* have_main_subtitle,
1363         bool* have_no_main_subtitle,
1364         size_t* most_closed_captions,
1365         size_t* fewest_closed_captions,
1366         map<Marker, Time>* markers_seen
1367         )
1368 {
1369         for (auto i: reel->assets()) {
1370                 if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1371                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_DURATION, i->id()});
1372                 }
1373                 if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1374                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_INTRINSIC_DURATION, i->id()});
1375                 }
1376                 auto file_asset = dynamic_pointer_cast<ReelFileAsset>(i);
1377                 if (i->encryptable() && !file_asset->hash()) {
1378                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_HASH, i->id()});
1379                 }
1380         }
1381
1382         if (dcp->standard() == Standard::SMPTE) {
1383                 boost::optional<int64_t> duration;
1384                 for (auto i: reel->assets()) {
1385                         if (!duration) {
1386                                 duration = i->actual_duration();
1387                         } else if (*duration != i->actual_duration()) {
1388                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_ASSET_DURATION});
1389                                 break;
1390                         }
1391                 }
1392         }
1393
1394         if (reel->main_picture()) {
1395                 /* Check reel stuff */
1396                 auto const frame_rate = reel->main_picture()->frame_rate();
1397                 if (frame_rate.denominator != 1 ||
1398                     (frame_rate.numerator != 24 &&
1399                      frame_rate.numerator != 25 &&
1400                      frame_rate.numerator != 30 &&
1401                      frame_rate.numerator != 48 &&
1402                      frame_rate.numerator != 50 &&
1403                      frame_rate.numerator != 60 &&
1404                      frame_rate.numerator != 96)) {
1405                         notes.push_back({
1406                                 VerificationNote::Type::ERROR,
1407                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE,
1408                                 String::compose("%1/%2", frame_rate.numerator, frame_rate.denominator)
1409                         });
1410                 }
1411                 /* Check asset */
1412                 if (reel->main_picture()->asset_ref().resolved()) {
1413                         verify_main_picture_asset(dcp, reel->main_picture(), stage, progress, options, notes);
1414                         auto const asset_size = reel->main_picture()->asset()->size();
1415                         if (main_picture_active_area) {
1416                                 if (main_picture_active_area->width > asset_size.width) {
1417                                         notes.push_back({
1418                                                         VerificationNote::Type::ERROR,
1419                                                         VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1420                                                         String::compose("width %1 is bigger than the asset width %2", main_picture_active_area->width, asset_size.width),
1421                                                         cpl->file().get()
1422                                                         });
1423                                 }
1424                                 if (main_picture_active_area->height > asset_size.height) {
1425                                         notes.push_back({
1426                                                         VerificationNote::Type::ERROR,
1427                                                         VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1428                                                         String::compose("height %1 is bigger than the asset height %2", main_picture_active_area->height, asset_size.height),
1429                                                         cpl->file().get()
1430                                                         });
1431                                 }
1432                         }
1433                 }
1434         }
1435
1436         if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1437                 verify_main_sound_asset(dcp, reel->main_sound(), stage, progress, options, notes, state);
1438         }
1439
1440         if (reel->main_subtitle()) {
1441                 verify_main_subtitle_reel(reel->main_subtitle(), notes);
1442                 if (reel->main_subtitle()->asset_ref().resolved()) {
1443                         verify_subtitle_asset(reel->main_subtitle()->asset(), reel->main_subtitle()->duration(), stage, xsd_dtd_directory, notes, state);
1444                 }
1445                 *have_main_subtitle = true;
1446         } else {
1447                 *have_no_main_subtitle = true;
1448         }
1449
1450         for (auto i: reel->closed_captions()) {
1451                 verify_closed_caption_reel(i, notes);
1452                 if (i->asset_ref().resolved()) {
1453                         verify_closed_caption_asset(i->asset(), i->duration(), stage, xsd_dtd_directory, notes);
1454                 }
1455         }
1456
1457         if (reel->main_markers()) {
1458                 for (auto const& i: reel->main_markers()->get()) {
1459                         markers_seen->insert(i);
1460                 }
1461                 if (reel->main_markers()->entry_point()) {
1462                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::UNEXPECTED_ENTRY_POINT});
1463                 }
1464                 if (reel->main_markers()->duration()) {
1465                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::UNEXPECTED_DURATION});
1466                 }
1467         }
1468
1469         *fewest_closed_captions = std::min(*fewest_closed_captions, reel->closed_captions().size());
1470         *most_closed_captions = std::max(*most_closed_captions, reel->closed_captions().size());
1471
1472 }
1473
1474
1475 static
1476 void
1477 verify_cpl(
1478         shared_ptr<const DCP> dcp,
1479         shared_ptr<const CPL> cpl,
1480         function<void (string, optional<boost::filesystem::path>)> stage,
1481         boost::filesystem::path xsd_dtd_directory,
1482         function<void (float)> progress,
1483         VerificationOptions options,
1484         vector<VerificationNote>& notes,
1485         State& state
1486         )
1487 {
1488         stage("Checking CPL", cpl->file());
1489         validate_xml(cpl->file().get(), xsd_dtd_directory, notes);
1490
1491         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1492                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::PARTIALLY_ENCRYPTED});
1493         }
1494
1495         for (auto const& i: cpl->additional_subtitle_languages()) {
1496                 verify_language_tag(i, notes);
1497         }
1498
1499         if (!cpl->content_kind().scope() || *cpl->content_kind().scope() == "http://www.smpte-ra.org/schemas/429-7/2006/CPL#standard-content") {
1500                 /* This is a content kind from http://www.smpte-ra.org/schemas/429-7/2006/CPL#standard-content; make sure it's one
1501                  * of the approved ones.
1502                  */
1503                 auto all = ContentKind::all();
1504                 auto name = cpl->content_kind().name();
1505                 transform(name.begin(), name.end(), name.begin(), ::tolower);
1506                 auto iter = std::find_if(all.begin(), all.end(), [name](ContentKind const& k) { return !k.scope() && k.name() == name; });
1507                 if (iter == all.end()) {
1508                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_CONTENT_KIND, cpl->content_kind().name()});
1509                 }
1510         }
1511
1512         if (cpl->release_territory()) {
1513                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1514                         auto terr = cpl->release_territory().get();
1515                         /* Must be a valid region tag, or "001" */
1516                         try {
1517                                 LanguageTag::RegionSubtag test(terr);
1518                         } catch (...) {
1519                                 if (terr != "001") {
1520                                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, terr});
1521                                 }
1522                         }
1523                 }
1524         }
1525
1526         if (dcp->standard() == Standard::SMPTE) {
1527                 if (!cpl->annotation_text()) {
1528                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1529                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1530                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1531                 }
1532         }
1533
1534         for (auto i: dcp->pkls()) {
1535                 /* Check that the CPL's hash corresponds to the PKL */
1536                 optional<string> h = i->hash(cpl->id());
1537                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1538                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()});
1539                 }
1540
1541                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1542                 optional<string> required_annotation_text;
1543                 for (auto j: i->assets()) {
1544                         /* See if this is a CPL */
1545                         for (auto k: dcp->cpls()) {
1546                                 if (j->id() == k->id()) {
1547                                         if (!required_annotation_text) {
1548                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1549                                                 required_annotation_text = cpl->content_title_text();
1550                                         } else {
1551                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1552                                                 required_annotation_text = boost::none;
1553                                         }
1554                                 }
1555                         }
1556                 }
1557
1558                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1559                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL, i->id(), i->file().get()});
1560                 }
1561         }
1562
1563         /* set to true if any reel has a MainSubtitle */
1564         auto have_main_subtitle = false;
1565         /* set to true if any reel has no MainSubtitle */
1566         auto have_no_main_subtitle = false;
1567         /* fewest number of closed caption assets seen in a reel */
1568         size_t fewest_closed_captions = SIZE_MAX;
1569         /* most number of closed caption assets seen in a reel */
1570         size_t most_closed_captions = 0;
1571         map<Marker, Time> markers_seen;
1572
1573         auto const main_picture_active_area = cpl->main_picture_active_area();
1574         if (main_picture_active_area && (main_picture_active_area->width % 2)) {
1575                 notes.push_back({
1576                                 VerificationNote::Type::ERROR,
1577                                 VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1578                                 String::compose("width %1 is not a multiple of 2", main_picture_active_area->width),
1579                                 cpl->file().get()
1580                         });
1581         }
1582         if (main_picture_active_area && (main_picture_active_area->height % 2)) {
1583                 notes.push_back({
1584                                 VerificationNote::Type::ERROR,
1585                                 VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1586                                 String::compose("height %1 is not a multiple of 2", main_picture_active_area->height),
1587                                 cpl->file().get()
1588                         });
1589         }
1590
1591         for (auto reel: cpl->reels()) {
1592                 stage("Checking reel", optional<boost::filesystem::path>());
1593                 verify_reel(
1594                         dcp,
1595                         cpl,
1596                         reel,
1597                         main_picture_active_area,
1598                         stage,
1599                         xsd_dtd_directory,
1600                         progress,
1601                         options,
1602                         notes,
1603                         state,
1604                         &have_main_subtitle,
1605                         &have_no_main_subtitle,
1606                         &most_closed_captions,
1607                         &fewest_closed_captions,
1608                         &markers_seen
1609                         );
1610         }
1611
1612         verify_text_details(dcp->standard().get_value_or(dcp::Standard::SMPTE), cpl->reels(), notes);
1613
1614         if (dcp->standard() == Standard::SMPTE) {
1615                 if (auto msc = cpl->main_sound_configuration()) {
1616                         if (state.audio_channels && msc->channels() != *state.audio_channels) {
1617                                 notes.push_back({
1618                                                 VerificationNote::Type::ERROR,
1619                                                 VerificationNote::Code::INVALID_MAIN_SOUND_CONFIGURATION,
1620                                                 String::compose("MainSoundConfiguration has %1 channels but sound assets have %2", msc->channels(), *state.audio_channels),
1621                                                 cpl->file().get()
1622                                         });
1623                         }
1624                 }
1625
1626                 if (have_main_subtitle && have_no_main_subtitle) {
1627                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS});
1628                 }
1629
1630                 if (fewest_closed_captions != most_closed_captions) {
1631                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS});
1632                 }
1633
1634                 if (cpl->content_kind() == ContentKind::FEATURE) {
1635                         if (markers_seen.find(Marker::FFEC) == markers_seen.end()) {
1636                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFEC_IN_FEATURE});
1637                         }
1638                         if (markers_seen.find(Marker::FFMC) == markers_seen.end()) {
1639                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFMC_IN_FEATURE});
1640                         }
1641                 }
1642
1643                 auto ffoc = markers_seen.find(Marker::FFOC);
1644                 if (ffoc == markers_seen.end()) {
1645                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_FFOC});
1646                 } else if (ffoc->second.e != 1) {
1647                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_FFOC, raw_convert<string>(ffoc->second.e)});
1648                 }
1649
1650                 auto lfoc = markers_seen.find(Marker::LFOC);
1651                 if (lfoc == markers_seen.end()) {
1652                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_LFOC});
1653                 } else {
1654                         auto lfoc_time = lfoc->second.as_editable_units_ceil(lfoc->second.tcr);
1655                         if (lfoc_time != (cpl->reels().back()->duration() - 1)) {
1656                                 notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_LFOC, raw_convert<string>(lfoc_time)});
1657                         }
1658                 }
1659
1660                 LinesCharactersResult result;
1661                 for (auto reel: cpl->reels()) {
1662                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1663                                 verify_text_lines_and_characters(reel->main_subtitle()->asset(), 52, 79, &result);
1664                         }
1665                 }
1666
1667                 if (result.line_count_exceeded) {
1668                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT});
1669                 }
1670                 if (result.error_length_exceeded) {
1671                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH});
1672                 } else if (result.warning_length_exceeded) {
1673                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH});
1674                 }
1675
1676                 result = LinesCharactersResult();
1677                 for (auto reel: cpl->reels()) {
1678                         for (auto i: reel->closed_captions()) {
1679                                 if (i->asset()) {
1680                                         verify_text_lines_and_characters(i->asset(), 32, 32, &result);
1681                                 }
1682                         }
1683                 }
1684
1685                 if (result.line_count_exceeded) {
1686                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT});
1687                 }
1688                 if (result.error_length_exceeded) {
1689                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH});
1690                 }
1691
1692                 if (!cpl->read_composition_metadata()) {
1693                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA, cpl->id(), cpl->file().get()});
1694                 } else if (!cpl->version_number()) {
1695                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER, cpl->id(), cpl->file().get()});
1696                 }
1697
1698                 verify_extension_metadata(cpl, notes);
1699
1700                 if (cpl->any_encrypted()) {
1701                         cxml::Document doc("CompositionPlaylist");
1702                         DCP_ASSERT(cpl->file());
1703                         doc.read_file(cpl->file().get());
1704                         if (!doc.optional_node_child("Signature")) {
1705                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
1706                         }
1707                 }
1708         }
1709 }
1710
1711
1712 static
1713 void
1714 verify_pkl(
1715         shared_ptr<const DCP> dcp,
1716         shared_ptr<const PKL> pkl,
1717         boost::filesystem::path xsd_dtd_directory,
1718         vector<VerificationNote>& notes
1719         )
1720 {
1721         validate_xml(pkl->file().get(), xsd_dtd_directory, notes);
1722
1723         if (pkl_has_encrypted_assets(dcp, pkl)) {
1724                 cxml::Document doc("PackingList");
1725                 doc.read_file(pkl->file().get());
1726                 if (!doc.optional_node_child("Signature")) {
1727                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
1728                 }
1729         }
1730
1731         set<string> uuid_set;
1732         for (auto asset: pkl->assets()) {
1733                 if (!uuid_set.insert(asset->id()).second) {
1734                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::DUPLICATE_ASSET_ID_IN_PKL, pkl->id(), pkl->file().get()});
1735                         break;
1736                 }
1737         }
1738 }
1739
1740
1741
1742 static
1743 void
1744 verify_assetmap(
1745         shared_ptr<const DCP> dcp,
1746         boost::filesystem::path xsd_dtd_directory,
1747         vector<VerificationNote>& notes
1748         )
1749 {
1750         auto asset_map = dcp->asset_map();
1751         DCP_ASSERT(asset_map);
1752
1753         validate_xml(asset_map->file().get(), xsd_dtd_directory, notes);
1754
1755         set<string> uuid_set;
1756         for (auto const& asset: asset_map->assets()) {
1757                 if (!uuid_set.insert(asset.id()).second) {
1758                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::DUPLICATE_ASSET_ID_IN_ASSETMAP, asset_map->id(), asset_map->file().get()});
1759                         break;
1760                 }
1761         }
1762 }
1763
1764
1765 vector<VerificationNote>
1766 dcp::verify (
1767         vector<boost::filesystem::path> directories,
1768         function<void (string, optional<boost::filesystem::path>)> stage,
1769         function<void (float)> progress,
1770         VerificationOptions options,
1771         optional<boost::filesystem::path> xsd_dtd_directory
1772         )
1773 {
1774         if (!xsd_dtd_directory) {
1775                 xsd_dtd_directory = resources_directory() / "xsd";
1776         }
1777         *xsd_dtd_directory = boost::filesystem::canonical (*xsd_dtd_directory);
1778
1779         vector<VerificationNote> notes;
1780         State state{};
1781
1782         vector<shared_ptr<DCP>> dcps;
1783         for (auto i: directories) {
1784                 dcps.push_back (make_shared<DCP>(i));
1785         }
1786
1787         for (auto dcp: dcps) {
1788                 stage ("Checking DCP", dcp->directory());
1789                 bool carry_on = true;
1790                 try {
1791                         dcp->read (&notes, true);
1792                 } catch (MissingAssetmapError& e) {
1793                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1794                         carry_on = false;
1795                 } catch (ReadError& e) {
1796                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1797                 } catch (XMLError& e) {
1798                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1799                 } catch (MXFFileError& e) {
1800                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1801                 } catch (BadURNUUIDError& e) {
1802                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1803                 } catch (cxml::Error& e) {
1804                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1805                 }
1806
1807                 if (!carry_on) {
1808                         continue;
1809                 }
1810
1811                 if (dcp->standard() != Standard::SMPTE) {
1812                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_STANDARD});
1813                 }
1814
1815                 for (auto cpl: dcp->cpls()) {
1816                         verify_cpl(
1817                                 dcp,
1818                                 cpl,
1819                                 stage,
1820                                 *xsd_dtd_directory,
1821                                 progress,
1822                                 options,
1823                                 notes,
1824                                 state
1825                                 );
1826                 }
1827
1828                 for (auto pkl: dcp->pkls()) {
1829                         stage("Checking PKL", pkl->file());
1830                         verify_pkl(dcp, pkl, *xsd_dtd_directory, notes);
1831                 }
1832
1833                 if (dcp->asset_map_file()) {
1834                         stage("Checking ASSETMAP", dcp->asset_map_file().get());
1835                         verify_assetmap(dcp, *xsd_dtd_directory, notes);
1836                 } else {
1837                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_ASSETMAP});
1838                 }
1839         }
1840
1841         return notes;
1842 }
1843
1844
1845 string
1846 dcp::note_to_string (VerificationNote note)
1847 {
1848         /** These strings should say what is wrong, incorporating any extra details (ID, filenames etc.).
1849          *
1850          *  e.g. "ClosedCaption asset has no <EntryPoint> tag.",
1851          *  not "ClosedCaption assets must have an <EntryPoint> tag."
1852          *
1853          *  It's OK to use XML tag names where they are clear.
1854          *  If both ID and filename are available, use only the ID.
1855          *  End messages with a full stop.
1856          *  Messages should not mention whether or not their errors are a part of Bv2.1.
1857          */
1858         switch (note.code()) {
1859         case VerificationNote::Code::FAILED_READ:
1860                 return *note.note();
1861         case VerificationNote::Code::MISMATCHED_CPL_HASHES:
1862                 return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
1863         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE:
1864                 return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
1865         case VerificationNote::Code::INCORRECT_PICTURE_HASH:
1866                 return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1867         case VerificationNote::Code::MISMATCHED_PICTURE_HASHES:
1868                 return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1869         case VerificationNote::Code::INCORRECT_SOUND_HASH:
1870                 return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1871         case VerificationNote::Code::MISMATCHED_SOUND_HASHES:
1872                 return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1873         case VerificationNote::Code::EMPTY_ASSET_PATH:
1874                 return "The asset map contains an empty asset path.";
1875         case VerificationNote::Code::MISSING_ASSET:
1876                 return String::compose("The file %1 for an asset in the asset map cannot be found.", note.file()->filename());
1877         case VerificationNote::Code::MISMATCHED_STANDARD:
1878                 return "The DCP contains both SMPTE and Interop parts.";
1879         case VerificationNote::Code::INVALID_XML:
1880                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1881         case VerificationNote::Code::MISSING_ASSETMAP:
1882                 return "No valid ASSETMAP or ASSETMAP.xml was found.";
1883         case VerificationNote::Code::INVALID_INTRINSIC_DURATION:
1884                 return String::compose("The intrinsic duration of the asset %1 is less than 1 second.", note.note().get());
1885         case VerificationNote::Code::INVALID_DURATION:
1886                 return String::compose("The duration of the asset %1 is less than 1 second.", note.note().get());
1887         case VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1888                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1889         case VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1890                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1891         case VerificationNote::Code::EXTERNAL_ASSET:
1892                 return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
1893         case VerificationNote::Code::THREED_ASSET_MARKED_AS_TWOD:
1894                 return String::compose("The asset %1 is 3D but its MXF is marked as 2D.", note.file()->filename());
1895         case VerificationNote::Code::INVALID_STANDARD:
1896                 return "This DCP does not use the SMPTE standard.";
1897         case VerificationNote::Code::INVALID_LANGUAGE:
1898                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1899         case VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS:
1900                 return String::compose("The size %1 of picture asset %2 is not allowed.", note.note().get(), note.file()->filename());
1901         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K:
1902                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 2K DCPs.", note.note().get(), note.file()->filename());
1903         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K:
1904                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 4K DCPs.", note.note().get(), note.file()->filename());
1905         case VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D:
1906                 return "3D 4K DCPs are not allowed.";
1907         case VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES:
1908                 return String::compose("The size %1 of the closed caption asset %2 is larger than the 256KB maximum.", note.note().get(), note.file()->filename());
1909         case VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES:
1910                 return String::compose("The size %1 of the timed text asset %2 is larger than the 115MB maximum.", note.note().get(), note.file()->filename());
1911         case VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES:
1912                 return String::compose("The size %1 of the fonts in timed text asset %2 is larger than the 10MB maximum.", note.note().get(), note.file()->filename());
1913         case VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE:
1914                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <Language> tag.", note.file()->filename());
1915         case VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES:
1916                 return "Some subtitle assets have different <Language> tags than others";
1917         case VerificationNote::Code::MISSING_SUBTITLE_START_TIME:
1918                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <StartTime> tag.", note.file()->filename());
1919         case VerificationNote::Code::INVALID_SUBTITLE_START_TIME:
1920                 return String::compose("The XML for a SMPTE subtitle asset %1 has a non-zero <StartTime> tag.", note.file()->filename());
1921         case VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME:
1922                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1923         case VerificationNote::Code::INVALID_SUBTITLE_DURATION:
1924                 return "At least one subtitle lasts less than 15 frames.";
1925         case VerificationNote::Code::INVALID_SUBTITLE_SPACING:
1926                 return "At least one pair of subtitles is separated by less than 2 frames.";
1927         case VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY:
1928                 return "At least one subtitle extends outside of its reel.";
1929         case VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT:
1930                 return "There are more than 3 subtitle lines in at least one place in the DCP.";
1931         case VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH:
1932                 return "There are more than 52 characters in at least one subtitle line.";
1933         case VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH:
1934                 return "There are more than 79 characters in at least one subtitle line.";
1935         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT:
1936                 return "There are more than 3 closed caption lines in at least one place.";
1937         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH:
1938                 return "There are more than 32 characters in at least one closed caption line.";
1939         case VerificationNote::Code::INVALID_SOUND_FRAME_RATE:
1940                 return String::compose("The sound asset %1 has a sampling rate of %2", note.file()->filename(), note.note().get());
1941         case VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT:
1942                 return String::compose("The CPL %1 has no <AnnotationText> tag.", note.note().get());
1943         case VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT:
1944                 return String::compose("The CPL %1 has an <AnnotationText> which differs from its <ContentTitleText>.", note.note().get());
1945         case VerificationNote::Code::MISMATCHED_ASSET_DURATION:
1946                 return "All assets in a reel do not have the same duration.";
1947         case VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS:
1948                 return "At least one reel contains a subtitle asset, but some reel(s) do not.";
1949         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS:
1950                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1951         case VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT:
1952                 return String::compose("The subtitle asset %1 has no <EntryPoint> tag.", note.note().get());
1953         case VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT:
1954                 return String::compose("The subtitle asset %1 has an <EntryPoint> other than 0.", note.note().get());
1955         case VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1956                 return String::compose("The closed caption asset %1 has no <EntryPoint> tag.", note.note().get());
1957         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT:
1958                 return String::compose("The closed caption asset %1 has an <EntryPoint> other than 0.", note.note().get());
1959         case VerificationNote::Code::MISSING_HASH:
1960                 return String::compose("The asset %1 has no <Hash> tag in the CPL.", note.note().get());
1961         case VerificationNote::Code::MISSING_FFEC_IN_FEATURE:
1962                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker.";
1963         case VerificationNote::Code::MISSING_FFMC_IN_FEATURE:
1964                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker.";
1965         case VerificationNote::Code::MISSING_FFOC:
1966                 return "There should be a FFOC (first frame of content) marker.";
1967         case VerificationNote::Code::MISSING_LFOC:
1968                 return "There should be a LFOC (last frame of content) marker.";
1969         case VerificationNote::Code::INCORRECT_FFOC:
1970                 return String::compose("The FFOC marker is %1 instead of 1", note.note().get());
1971         case VerificationNote::Code::INCORRECT_LFOC:
1972                 return String::compose("The LFOC marker is %1 instead of 1 less than the duration of the last reel.", note.note().get());
1973         case VerificationNote::Code::MISSING_CPL_METADATA:
1974                 return String::compose("The CPL %1 has no <CompositionMetadataAsset> tag.", note.note().get());
1975         case VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER:
1976                 return String::compose("The CPL %1 has no <VersionNumber> in its <CompositionMetadataAsset>.", note.note().get());
1977         case VerificationNote::Code::MISSING_EXTENSION_METADATA:
1978                 return String::compose("The CPL %1 has no <ExtensionMetadata> in its <CompositionMetadataAsset>.", note.note().get());
1979         case VerificationNote::Code::INVALID_EXTENSION_METADATA:
1980                 return String::compose("The CPL %1 has a malformed <ExtensionMetadata> (%2).", note.file()->filename(), note.note().get());
1981         case VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT:
1982                 return String::compose("The CPL %1, which has encrypted content, is not signed.", note.note().get());
1983         case VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT:
1984                 return String::compose("The PKL %1, which has encrypted content, is not signed.", note.note().get());
1985         case VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL:
1986                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>.", note.note().get());
1987         case VerificationNote::Code::PARTIALLY_ENCRYPTED:
1988                 return "Some assets are encrypted but some are not.";
1989         case VerificationNote::Code::INVALID_JPEG2000_CODESTREAM:
1990                 return String::compose("The JPEG2000 codestream for at least one frame is invalid (%1).", note.note().get());
1991         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_2K:
1992                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 2K image instead of 1.", note.note().get());
1993         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_4K:
1994                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 4K image instead of 2.", note.note().get());
1995         case VerificationNote::Code::INVALID_JPEG2000_TILE_SIZE:
1996                 return "The JPEG2000 tile size is not the same as the image size.";
1997         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_WIDTH:
1998                 return String::compose("The JPEG2000 codestream uses a code block width of %1 instead of 32.", note.note().get());
1999         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_HEIGHT:
2000                 return String::compose("The JPEG2000 codestream uses a code block height of %1 instead of 32.", note.note().get());
2001         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_2K:
2002                 return String::compose("%1 POC markers found in 2K JPEG2000 codestream instead of 0.", note.note().get());
2003         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_4K:
2004                 return String::compose("%1 POC markers found in 4K JPEG2000 codestream instead of 1.", note.note().get());
2005         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER:
2006                 return String::compose("Incorrect POC marker content found (%1).", note.note().get());
2007         case VerificationNote::Code::INVALID_JPEG2000_POC_MARKER_LOCATION:
2008                 return "POC marker found outside main header.";
2009         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_2K:
2010                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 2K image instead of 3.", note.note().get());
2011         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_4K:
2012                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 4K image instead of 6.", note.note().get());
2013         case VerificationNote::Code::MISSING_JPEG200_TLM_MARKER:
2014                 return "No TLM marker was found in a JPEG2000 codestream.";
2015         case VerificationNote::Code::MISMATCHED_TIMED_TEXT_RESOURCE_ID:
2016                 return "The Resource ID in a timed text MXF did not match the ID of the contained XML.";
2017         case VerificationNote::Code::INCORRECT_TIMED_TEXT_ASSET_ID:
2018                 return "The Asset ID in a timed text MXF is the same as the Resource ID or that of the contained XML.";
2019         case VerificationNote::Code::MISMATCHED_TIMED_TEXT_DURATION:
2020         {
2021                 vector<string> parts;
2022                 boost::split (parts, note.note().get(), boost::is_any_of(" "));
2023                 DCP_ASSERT (parts.size() == 2);
2024                 return String::compose("The reel duration of some timed text (%1) is not the same as the ContainerDuration of its MXF (%2).", parts[0], parts[1]);
2025         }
2026         case VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED:
2027                 return "Some aspect of this DCP could not be checked because it is encrypted.";
2028         case VerificationNote::Code::EMPTY_TEXT:
2029                 return "There is an empty <Text> node in a subtitle or closed caption.";
2030         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_VALIGN:
2031                 return "Some closed <Text> or <Image> nodes have different vertical alignments within a <Subtitle>.";
2032         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ORDERING:
2033                 return "Some closed captions are not listed in the order of their vertical position.";
2034         case VerificationNote::Code::UNEXPECTED_ENTRY_POINT:
2035                 return "There is an <EntryPoint> node inside a <MainMarkers>.";
2036         case VerificationNote::Code::UNEXPECTED_DURATION:
2037                 return "There is an <Duration> node inside a <MainMarkers>.";
2038         case VerificationNote::Code::INVALID_CONTENT_KIND:
2039                 return String::compose("<ContentKind> has an invalid value %1.", note.note().get());
2040         case VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA:
2041                 return String::compose("<MainPictureActiveaArea> has an invalid value: %1", note.note().get());
2042         case VerificationNote::Code::DUPLICATE_ASSET_ID_IN_PKL:
2043                 return String::compose("The PKL %1 has more than one asset with the same ID.", note.note().get());
2044         case VerificationNote::Code::DUPLICATE_ASSET_ID_IN_ASSETMAP:
2045                 return String::compose("The ASSETMAP %1 has more than one asset with the same ID.", note.note().get());
2046         case VerificationNote::Code::MISSING_SUBTITLE:
2047                 return String::compose("The subtitle asset %1 has no subtitles.", note.note().get());
2048         case VerificationNote::Code::INVALID_SUBTITLE_ISSUE_DATE:
2049                 return String::compose("<IssueDate> has an invalid value: %1", note.note().get());
2050         case VerificationNote::Code::MISMATCHED_SOUND_CHANNEL_COUNTS:
2051                 return String::compose("The sound assets do not all have the same channel count; the first to differ is %1", note.file()->filename());
2052         case VerificationNote::Code::INVALID_MAIN_SOUND_CONFIGURATION:
2053                 return String::compose("<MainSoundConfiguration> has an invalid value: %1", note.note().get());
2054         case VerificationNote::Code::MISSING_FONT:
2055                 return String::compose("The font file for font ID \"%1\" was not found, or was not referred to in the ASSETMAP.", note.note().get());
2056         case VerificationNote::Code::INVALID_JPEG2000_TILE_PART_SIZE:
2057                 return String::compose(
2058                         "Frame %1 has an image component that is too large (component %2 is %3 bytes in size).",
2059                         note.frame().get(), note.component().get(), note.size().get()
2060                         );
2061         case VerificationNote::Code::INCORRECT_SUBTITLE_NAMESPACE_COUNT:
2062                 return String::compose("The XML in the subtitle asset %1 has more than one namespace declaration.", note.note().get());
2063         case VerificationNote::Code::MISSING_LOAD_FONT_FOR_FONT:
2064                 return String::compose("A subtitle or closed caption refers to a font with ID %1 that does not have a corresponding <LoadFont> node", note.id().get());
2065         }
2066
2067         return "";
2068 }
2069
2070
2071 bool
2072 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
2073 {
2074         return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
2075 }
2076
2077
2078 bool
2079 dcp::operator< (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
2080 {
2081         if (a.type() != b.type()) {
2082                 return a.type() < b.type();
2083         }
2084
2085         if (a.code() != b.code()) {
2086                 return a.code() < b.code();
2087         }
2088
2089         if (a.note() != b.note()) {
2090                 return a.note().get_value_or("") < b.note().get_value_or("");
2091         }
2092
2093         if (a.file() != b.file()) {
2094                 return a.file().get_value_or("") < b.file().get_value_or("");
2095         }
2096
2097         return a.line().get_value_or(0) < b.line().get_value_or(0);
2098 }
2099
2100
2101 std::ostream&
2102 dcp::operator<< (std::ostream& s, dcp::VerificationNote const& note)
2103 {
2104         s << note_to_string (note);
2105         if (note.note()) {
2106                 s << " [" << note.note().get() << "]";
2107         }
2108         if (note.file()) {
2109                 s << " [" << note.file().get() << "]";
2110         }
2111         if (note.line()) {
2112                 s << " [" << note.line().get() << "]";
2113         }
2114         return s;
2115 }
2116