394326fe24b08063a3c743ca0a404fa2eb8dcdc1
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34
35 /** @file  src/verify.cc
36  *  @brief dcp::verify() method and associated code
37  */
38
39
40 #include "compose.hpp"
41 #include "cpl.h"
42 #include "dcp.h"
43 #include "exceptions.h"
44 #include "interop_subtitle_asset.h"
45 #include "mono_picture_asset.h"
46 #include "mono_picture_frame.h"
47 #include "raw_convert.h"
48 #include "reel.h"
49 #include "reel_closed_caption_asset.h"
50 #include "reel_interop_subtitle_asset.h"
51 #include "reel_markers_asset.h"
52 #include "reel_picture_asset.h"
53 #include "reel_sound_asset.h"
54 #include "reel_smpte_subtitle_asset.h"
55 #include "reel_subtitle_asset.h"
56 #include "smpte_subtitle_asset.h"
57 #include "stereo_picture_asset.h"
58 #include "stereo_picture_frame.h"
59 #include "verify.h"
60 #include "verify_j2k.h"
61 #include <libxml/parserInternals.h>
62 #include <xercesc/dom/DOMAttr.hpp>
63 #include <xercesc/dom/DOMDocument.hpp>
64 #include <xercesc/dom/DOMError.hpp>
65 #include <xercesc/dom/DOMErrorHandler.hpp>
66 #include <xercesc/dom/DOMException.hpp>
67 #include <xercesc/dom/DOMImplementation.hpp>
68 #include <xercesc/dom/DOMImplementationLS.hpp>
69 #include <xercesc/dom/DOMImplementationRegistry.hpp>
70 #include <xercesc/dom/DOMLSParser.hpp>
71 #include <xercesc/dom/DOMLocator.hpp>
72 #include <xercesc/dom/DOMNamedNodeMap.hpp>
73 #include <xercesc/dom/DOMNodeList.hpp>
74 #include <xercesc/framework/LocalFileInputSource.hpp>
75 #include <xercesc/framework/MemBufInputSource.hpp>
76 #include <xercesc/parsers/AbstractDOMParser.hpp>
77 #include <xercesc/parsers/XercesDOMParser.hpp>
78 #include <xercesc/sax/HandlerBase.hpp>
79 #include <xercesc/util/PlatformUtils.hpp>
80 #include <boost/algorithm/string.hpp>
81 #include <iostream>
82 #include <map>
83 #include <regex>
84 #include <set>
85 #include <vector>
86
87
88 using std::cout;
89 using std::dynamic_pointer_cast;
90 using std::list;
91 using std::make_shared;
92 using std::map;
93 using std::max;
94 using std::set;
95 using std::shared_ptr;
96 using std::string;
97 using std::vector;
98 using boost::optional;
99 using boost::function;
100
101
102 using namespace dcp;
103 using namespace xercesc;
104
105
106 static
107 string
108 xml_ch_to_string (XMLCh const * a)
109 {
110         char* x = XMLString::transcode(a);
111         string const o(x);
112         XMLString::release(&x);
113         return o;
114 }
115
116
117 class XMLValidationError
118 {
119 public:
120         XMLValidationError (SAXParseException const & e)
121                 : _message (xml_ch_to_string(e.getMessage()))
122                 , _line (e.getLineNumber())
123                 , _column (e.getColumnNumber())
124                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
125                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
126         {
127
128         }
129
130         string message () const {
131                 return _message;
132         }
133
134         uint64_t line () const {
135                 return _line;
136         }
137
138         uint64_t column () const {
139                 return _column;
140         }
141
142         string public_id () const {
143                 return _public_id;
144         }
145
146         string system_id () const {
147                 return _system_id;
148         }
149
150 private:
151         string _message;
152         uint64_t _line;
153         uint64_t _column;
154         string _public_id;
155         string _system_id;
156 };
157
158
159 class DCPErrorHandler : public ErrorHandler
160 {
161 public:
162         void warning(const SAXParseException& e) override
163         {
164                 maybe_add (XMLValidationError(e));
165         }
166
167         void error(const SAXParseException& e) override
168         {
169                 maybe_add (XMLValidationError(e));
170         }
171
172         void fatalError(const SAXParseException& e) override
173         {
174                 maybe_add (XMLValidationError(e));
175         }
176
177         void resetErrors() override {
178                 _errors.clear ();
179         }
180
181         list<XMLValidationError> errors () const {
182                 return _errors;
183         }
184
185 private:
186         void maybe_add (XMLValidationError e)
187         {
188                 /* XXX: nasty hack */
189                 if (
190                         e.message().find("schema document") != string::npos &&
191                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
192                         ) {
193                         return;
194                 }
195
196                 _errors.push_back (e);
197         }
198
199         list<XMLValidationError> _errors;
200 };
201
202
203 class StringToXMLCh
204 {
205 public:
206         StringToXMLCh (string a)
207         {
208                 _buffer = XMLString::transcode(a.c_str());
209         }
210
211         StringToXMLCh (StringToXMLCh const&) = delete;
212         StringToXMLCh& operator= (StringToXMLCh const&) = delete;
213
214         ~StringToXMLCh ()
215         {
216                 XMLString::release (&_buffer);
217         }
218
219         XMLCh const * get () const {
220                 return _buffer;
221         }
222
223 private:
224         XMLCh* _buffer;
225 };
226
227
228 class LocalFileResolver : public EntityResolver
229 {
230 public:
231         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
232                 : _xsd_dtd_directory (xsd_dtd_directory)
233         {
234                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
235                  * found without being here.
236                  */
237                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
238                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
239                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
240                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
241                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
242                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
243                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
244                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
245                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
246                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "DCDMSubtitle-2010.xsd");
247                 add("http://www.smpte-ra.org/schemas/428-7/2014/DCST.xsd", "DCDMSubtitle-2014.xsd");
248                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
249                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
250                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
251         }
252
253         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id) override
254         {
255                 if (!system_id) {
256                         return 0;
257                 }
258                 auto system_id_str = xml_ch_to_string (system_id);
259                 auto p = _xsd_dtd_directory;
260                 if (_files.find(system_id_str) == _files.end()) {
261                         p /= system_id_str;
262                 } else {
263                         p /= _files[system_id_str];
264                 }
265                 StringToXMLCh ch (p.string());
266                 return new LocalFileInputSource(ch.get());
267         }
268
269 private:
270         void add (string uri, string file)
271         {
272                 _files[uri] = file;
273         }
274
275         std::map<string, string> _files;
276         boost::filesystem::path _xsd_dtd_directory;
277 };
278
279
280 static void
281 parse (XercesDOMParser& parser, boost::filesystem::path xml)
282 {
283         parser.parse(xml.c_str());
284 }
285
286
287 static void
288 parse (XercesDOMParser& parser, string xml)
289 {
290         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
291         parser.parse(buf);
292 }
293
294
295 template <class T>
296 void
297 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
298 {
299         try {
300                 XMLPlatformUtils::Initialize ();
301         } catch (XMLException& e) {
302                 throw MiscError ("Failed to initialise xerces library");
303         }
304
305         DCPErrorHandler error_handler;
306
307         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
308         {
309                 XercesDOMParser parser;
310                 parser.setValidationScheme(XercesDOMParser::Val_Always);
311                 parser.setDoNamespaces(true);
312                 parser.setDoSchema(true);
313
314                 vector<string> schema;
315                 schema.push_back("xml.xsd");
316                 schema.push_back("xmldsig-core-schema.xsd");
317                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
318                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
319                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
320                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
321                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
322                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
323                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
324                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
325                 schema.push_back("DCDMSubtitle-2010.xsd");
326                 schema.push_back("DCDMSubtitle-2014.xsd");
327                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
328                 schema.push_back("SMPTE-429-16.xsd");
329                 schema.push_back("Dolby-2012-AD.xsd");
330                 schema.push_back("SMPTE-429-10-2008.xsd");
331                 schema.push_back("xlink.xsd");
332                 schema.push_back("SMPTE-335-2012.xsd");
333                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
334                 schema.push_back("isdcf-mca.xsd");
335                 schema.push_back("SMPTE-429-12-2008.xsd");
336
337                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
338                  * Schemas that are not mentioned in this list are not read, and the things
339                  * they describe are not checked.
340                  */
341                 string locations;
342                 for (auto i: schema) {
343                         locations += String::compose("%1 %1 ", i, i);
344                 }
345
346                 parser.setExternalSchemaLocation(locations.c_str());
347                 parser.setValidationSchemaFullChecking(true);
348                 parser.setErrorHandler(&error_handler);
349
350                 LocalFileResolver resolver (xsd_dtd_directory);
351                 parser.setEntityResolver(&resolver);
352
353                 try {
354                         parser.resetDocumentPool();
355                         parse(parser, xml);
356                 } catch (XMLException& e) {
357                         throw MiscError(xml_ch_to_string(e.getMessage()));
358                 } catch (DOMException& e) {
359                         throw MiscError(xml_ch_to_string(e.getMessage()));
360                 } catch (...) {
361                         throw MiscError("Unknown exception from xerces");
362                 }
363         }
364
365         XMLPlatformUtils::Terminate ();
366
367         for (auto i: error_handler.errors()) {
368                 notes.push_back ({
369                         VerificationNote::Type::ERROR,
370                         VerificationNote::Code::INVALID_XML,
371                         i.message(),
372                         boost::trim_copy(i.public_id() + " " + i.system_id()),
373                         i.line()
374                 });
375         }
376 }
377
378
379 enum class VerifyAssetResult {
380         GOOD,
381         CPL_PKL_DIFFER,
382         BAD
383 };
384
385
386 static VerifyAssetResult
387 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
388 {
389         auto const actual_hash = reel_file_asset->asset_ref()->hash(progress);
390
391         auto pkls = dcp->pkls();
392         /* We've read this DCP in so it must have at least one PKL */
393         DCP_ASSERT (!pkls.empty());
394
395         auto asset = reel_file_asset->asset_ref().asset();
396
397         optional<string> pkl_hash;
398         for (auto i: pkls) {
399                 pkl_hash = i->hash (reel_file_asset->asset_ref()->id());
400                 if (pkl_hash) {
401                         break;
402                 }
403         }
404
405         DCP_ASSERT (pkl_hash);
406
407         auto cpl_hash = reel_file_asset->hash();
408         if (cpl_hash && *cpl_hash != *pkl_hash) {
409                 return VerifyAssetResult::CPL_PKL_DIFFER;
410         }
411
412         if (actual_hash != *pkl_hash) {
413                 return VerifyAssetResult::BAD;
414         }
415
416         return VerifyAssetResult::GOOD;
417 }
418
419
420 void
421 verify_language_tag (string tag, vector<VerificationNote>& notes)
422 {
423         try {
424                 LanguageTag test (tag);
425         } catch (LanguageTagError &) {
426                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag});
427         }
428 }
429
430
431 static void
432 verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, boost::filesystem::path file, vector<VerificationNote>& notes, function<void (float)> progress)
433 {
434         int biggest_frame = 0;
435         auto asset = dynamic_pointer_cast<PictureAsset>(reel_file_asset->asset_ref().asset());
436         auto const duration = asset->intrinsic_duration ();
437
438         auto check_and_add = [&notes](vector<VerificationNote> const& j2k_notes) {
439                 for (auto i: j2k_notes) {
440                         if (find(notes.begin(), notes.end(), i) == notes.end()) {
441                                 notes.push_back (i);
442                         }
443                 }
444         };
445
446         if (auto mono_asset = dynamic_pointer_cast<MonoPictureAsset>(reel_file_asset->asset_ref().asset())) {
447                 auto reader = mono_asset->start_read ();
448                 for (int64_t i = 0; i < duration; ++i) {
449                         auto frame = reader->get_frame (i);
450                         biggest_frame = max(biggest_frame, frame->size());
451                         if (!mono_asset->encrypted() || mono_asset->key()) {
452                                 vector<VerificationNote> j2k_notes;
453                                 verify_j2k(frame, i, mono_asset->frame_rate().numerator, j2k_notes);
454                                 check_and_add (j2k_notes);
455                         }
456                         progress (float(i) / duration);
457                 }
458         } else if (auto stereo_asset = dynamic_pointer_cast<StereoPictureAsset>(asset)) {
459                 auto reader = stereo_asset->start_read ();
460                 for (int64_t i = 0; i < duration; ++i) {
461                         auto frame = reader->get_frame (i);
462                         biggest_frame = max(biggest_frame, max(frame->left()->size(), frame->right()->size()));
463                         if (!stereo_asset->encrypted() || stereo_asset->key()) {
464                                 vector<VerificationNote> j2k_notes;
465                                 verify_j2k(frame->left(), i, stereo_asset->frame_rate().numerator, j2k_notes);
466                                 verify_j2k(frame->right(), i, stereo_asset->frame_rate().numerator, j2k_notes);
467                                 check_and_add (j2k_notes);
468                         }
469                         progress (float(i) / duration);
470                 }
471
472         }
473
474         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
475         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
476         if (biggest_frame > max_frame) {
477                 notes.push_back ({
478                         VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
479                 });
480         } else if (biggest_frame > risky_frame) {
481                 notes.push_back ({
482                         VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
483                 });
484         }
485 }
486
487
488 static void
489 verify_main_picture_asset (
490         shared_ptr<const DCP> dcp,
491         shared_ptr<const ReelPictureAsset> reel_asset,
492         function<void (string, optional<boost::filesystem::path>)> stage,
493         function<void (float)> progress,
494         VerificationOptions options,
495         vector<VerificationNote>& notes
496         )
497 {
498         auto asset = reel_asset->asset();
499         auto const file = *asset->file();
500
501         if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || boost::filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
502                 stage ("Checking picture asset hash", file);
503                 auto const r = verify_asset (dcp, reel_asset, progress);
504                 switch (r) {
505                         case VerifyAssetResult::BAD:
506                                 notes.push_back ({
507                                         VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file
508                                 });
509                                 break;
510                         case VerifyAssetResult::CPL_PKL_DIFFER:
511                                 notes.push_back ({
512                                         VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file
513                                 });
514                                 break;
515                         default:
516                                 break;
517                 }
518         }
519
520         stage ("Checking picture frame sizes", asset->file());
521         verify_picture_asset (reel_asset, file, notes, progress);
522
523         /* Only flat/scope allowed by Bv2.1 */
524         if (
525                 asset->size() != Size(2048, 858) &&
526                 asset->size() != Size(1998, 1080) &&
527                 asset->size() != Size(4096, 1716) &&
528                 asset->size() != Size(3996, 2160)) {
529                 notes.push_back({
530                         VerificationNote::Type::BV21_ERROR,
531                         VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS,
532                         String::compose("%1x%2", asset->size().width, asset->size().height),
533                         file
534                 });
535         }
536
537         /* Only 24, 25, 48fps allowed for 2K */
538         if (
539                 (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) &&
540                 (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1))
541            ) {
542                 notes.push_back({
543                         VerificationNote::Type::BV21_ERROR,
544                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K,
545                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
546                         file
547                 });
548         }
549
550         if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) {
551                 /* Only 24fps allowed for 4K */
552                 if (asset->edit_rate() != Fraction(24, 1)) {
553                         notes.push_back({
554                                 VerificationNote::Type::BV21_ERROR,
555                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K,
556                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
557                                 file
558                         });
559                 }
560
561                 /* Only 2D allowed for 4K */
562                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
563                         notes.push_back({
564                                 VerificationNote::Type::BV21_ERROR,
565                                 VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D,
566                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
567                                 file
568                         });
569
570                 }
571         }
572
573 }
574
575
576 struct State
577 {
578         boost::optional<string> subtitle_language;
579         boost::optional<int> audio_channels;
580 };
581
582
583 static void
584 verify_main_sound_asset (
585         shared_ptr<const DCP> dcp,
586         shared_ptr<const ReelSoundAsset> reel_asset,
587         function<void (string, optional<boost::filesystem::path>)> stage,
588         function<void (float)> progress,
589         VerificationOptions options,
590         vector<VerificationNote>& notes,
591         State& state
592         )
593 {
594         auto asset = reel_asset->asset();
595         auto const file = *asset->file();
596
597         if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || boost::filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
598                 stage("Checking sound asset hash", file);
599                 auto const r = verify_asset (dcp, reel_asset, progress);
600                 switch (r) {
601                         case VerifyAssetResult::BAD:
602                                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, file});
603                                 break;
604                         case VerifyAssetResult::CPL_PKL_DIFFER:
605                                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, file});
606                                 break;
607                         default:
608                                 break;
609                 }
610         }
611
612         if (!state.audio_channels) {
613                 state.audio_channels = asset->channels();
614         } else if (*state.audio_channels != asset->channels()) {
615                 notes.push_back({ VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_CHANNEL_COUNTS, file });
616         }
617
618         stage ("Checking sound asset metadata", file);
619
620         if (auto lang = asset->language()) {
621                 verify_language_tag (*lang, notes);
622         }
623         if (asset->sampling_rate() != 48000) {
624                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, raw_convert<string>(asset->sampling_rate()), file});
625         }
626 }
627
628
629 static void
630 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
631 {
632         /* XXX: is Language compulsory? */
633         if (reel_asset->language()) {
634                 verify_language_tag (*reel_asset->language(), notes);
635         }
636
637         if (!reel_asset->entry_point()) {
638                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() });
639         } else if (reel_asset->entry_point().get()) {
640                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() });
641         }
642 }
643
644
645 static void
646 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
647 {
648         /* XXX: is Language compulsory? */
649         if (reel_asset->language()) {
650                 verify_language_tag (*reel_asset->language(), notes);
651         }
652
653         if (!reel_asset->entry_point()) {
654                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
655         } else if (reel_asset->entry_point().get()) {
656                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
657         }
658 }
659
660
661 /** Verify stuff that is common to both subtitles and closed captions */
662 void
663 verify_smpte_timed_text_asset (
664         shared_ptr<const SMPTESubtitleAsset> asset,
665         optional<int64_t> reel_asset_duration,
666         vector<VerificationNote>& notes
667         )
668 {
669         if (asset->language()) {
670                 verify_language_tag (*asset->language(), notes);
671         } else {
672                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
673         }
674
675         auto const size = boost::filesystem::file_size(asset->file().get());
676         if (size > 115 * 1024 * 1024) {
677                 notes.push_back (
678                         { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
679                         );
680         }
681
682         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
683          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
684          */
685         auto fonts = asset->font_data ();
686         int total_size = 0;
687         for (auto i: fonts) {
688                 total_size += i.second.size();
689         }
690         if (total_size > 10 * 1024 * 1024) {
691                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert<string>(total_size), asset->file().get() });
692         }
693
694         if (!asset->start_time()) {
695                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() });
696         } else if (asset->start_time() != Time()) {
697                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() });
698         }
699
700         if (reel_asset_duration && *reel_asset_duration != asset->intrinsic_duration()) {
701                 notes.push_back (
702                         {
703                                 VerificationNote::Type::BV21_ERROR,
704                                 VerificationNote::Code::MISMATCHED_TIMED_TEXT_DURATION,
705                                 String::compose("%1 %2", *reel_asset_duration, asset->intrinsic_duration()),
706                                 asset->file().get()
707                         });
708         }
709 }
710
711
712 /** Verify Interop subtitle-only stuff */
713 void
714 verify_interop_subtitle_asset(shared_ptr<const InteropSubtitleAsset> asset, vector<VerificationNote>& notes)
715 {
716         if (asset->subtitles().empty()) {
717                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_SUBTITLE, asset->id(), asset->file().get() });
718         }
719         auto const unresolved = asset->unresolved_fonts();
720         if (!unresolved.empty()) {
721                 notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_FONT, unresolved.front() });
722         }
723 }
724
725
726 /** Verify SMPTE subtitle-only stuff */
727 void
728 verify_smpte_subtitle_asset (
729         shared_ptr<const SMPTESubtitleAsset> asset,
730         vector<VerificationNote>& notes,
731         State& state
732         )
733 {
734         if (asset->language()) {
735                 if (!state.subtitle_language) {
736                         state.subtitle_language = *asset->language();
737                 } else if (state.subtitle_language != *asset->language()) {
738                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES });
739                 }
740         }
741
742         DCP_ASSERT (asset->resource_id());
743         auto xml_id = asset->xml_id();
744         if (xml_id) {
745                 if (asset->resource_id().get() != xml_id) {
746                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_TIMED_TEXT_RESOURCE_ID });
747                 }
748
749                 if (asset->id() == asset->resource_id().get() || asset->id() == xml_id) {
750                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_TIMED_TEXT_ASSET_ID });
751                 }
752         } else {
753                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
754         }
755
756         if (asset->raw_xml()) {
757                 /* Deluxe require this in their QC even if it seems never to be mentioned in any standard */
758                 cxml::Document doc("SubtitleReel");
759                 doc.read_string(*asset->raw_xml());
760                 auto issue_date = doc.string_child("IssueDate");
761                 std::regex reg("^\\d\\d\\d\\d-\\d\\d-\\d\\dT\\d\\d:\\d\\d:\\d\\d$");
762                 if (!std::regex_match(issue_date, reg)) {
763                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_ISSUE_DATE, issue_date});
764                 }
765         }
766 }
767
768
769 /** Verify all subtitle stuff */
770 static void
771 verify_subtitle_asset (
772         shared_ptr<const SubtitleAsset> asset,
773         optional<int64_t> reel_asset_duration,
774         function<void (string, optional<boost::filesystem::path>)> stage,
775         boost::filesystem::path xsd_dtd_directory,
776         vector<VerificationNote>& notes,
777         State& state
778         )
779 {
780         stage ("Checking subtitle XML", asset->file());
781         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
782          * gets passed through libdcp which may clean up and therefore hide errors.
783          */
784         if (asset->raw_xml()) {
785                 validate_xml (asset->raw_xml().get(), xsd_dtd_directory, notes);
786         } else {
787                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
788         }
789
790         auto namespace_count = [](shared_ptr<const SubtitleAsset> asset, string root_node) {
791                 cxml::Document doc(root_node);
792                 doc.read_string(asset->raw_xml().get());
793                 auto root = dynamic_cast<xmlpp::Element*>(doc.node())->cobj();
794                 int count = 0;
795                 for (auto ns = root->nsDef; ns != nullptr; ns = ns->next) {
796                         ++count;
797                 }
798                 return count;
799         };
800
801         auto interop = dynamic_pointer_cast<const InteropSubtitleAsset>(asset);
802         if (interop) {
803                 verify_interop_subtitle_asset(interop, notes);
804                 if (namespace_count(asset, "DCSubtitle") > 1) {
805                         notes.push_back({ VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_SUBTITLE_NAMESPACE_COUNT, asset->id() });
806                 }
807         }
808
809         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
810         if (smpte) {
811                 verify_smpte_timed_text_asset (smpte, reel_asset_duration, notes);
812                 verify_smpte_subtitle_asset (smpte, notes, state);
813                 /* This asset may be encrypted and in that case we'll have no raw_xml() */
814                 if (asset->raw_xml() && namespace_count(asset, "SubtitleReel") > 1) {
815                         notes.push_back({ VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_SUBTITLE_NAMESPACE_COUNT, asset->id()});
816                 }
817         }
818 }
819
820
821 /** Verify all closed caption stuff */
822 static void
823 verify_closed_caption_asset (
824         shared_ptr<const SubtitleAsset> asset,
825         optional<int64_t> reel_asset_duration,
826         function<void (string, optional<boost::filesystem::path>)> stage,
827         boost::filesystem::path xsd_dtd_directory,
828         vector<VerificationNote>& notes
829         )
830 {
831         stage ("Checking closed caption XML", asset->file());
832         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
833          * gets passed through libdcp which may clean up and therefore hide errors.
834          */
835         auto raw_xml = asset->raw_xml();
836         if (raw_xml) {
837                 validate_xml (*raw_xml, xsd_dtd_directory, notes);
838                 if (raw_xml->size() > 256 * 1024) {
839                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert<string>(raw_xml->size()), *asset->file()});
840                 }
841         } else {
842                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
843         }
844
845         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
846         if (smpte) {
847                 verify_smpte_timed_text_asset (smpte, reel_asset_duration, notes);
848         }
849 }
850
851
852 /** Check the timing of the individual subtitles and make sure there are no empty <Text> nodes etc. */
853 static
854 void
855 verify_text_details (
856         dcp::Standard standard,
857         vector<shared_ptr<Reel>> reels,
858         int edit_rate,
859         vector<VerificationNote>& notes,
860         std::function<bool (shared_ptr<Reel>)> check,
861         std::function<optional<string> (shared_ptr<Reel>)> xml,
862         std::function<int64_t (shared_ptr<Reel>)> duration,
863         std::function<std::string (shared_ptr<Reel>)> id
864         )
865 {
866         /* end of last subtitle (in editable units) */
867         optional<int64_t> last_out;
868         auto too_short = false;
869         auto too_close = false;
870         auto too_early = false;
871         auto reel_overlap = false;
872         auto empty_text = false;
873         /* current reel start time (in editable units) */
874         int64_t reel_offset = 0;
875         optional<string> missing_load_font_id;
876
877         std::function<void (cxml::ConstNodePtr, optional<int>, optional<Time>, int, bool, bool&, vector<string>&)> parse;
878
879         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &empty_text, &reel_offset, &missing_load_font_id](
880                 cxml::ConstNodePtr node,
881                 optional<int> tcr,
882                 optional<Time> start_time,
883                 int er,
884                 bool first_reel,
885                 bool& has_text,
886                 vector<string>& font_ids
887                 ) {
888                 if (node->name() == "Subtitle") {
889                         Time in (node->string_attribute("TimeIn"), tcr);
890                         if (start_time) {
891                                 in -= *start_time;
892                         }
893                         Time out (node->string_attribute("TimeOut"), tcr);
894                         if (start_time) {
895                                 out -= *start_time;
896                         }
897                         if (first_reel && tcr && in < Time(0, 0, 4, 0, *tcr)) {
898                                 too_early = true;
899                         }
900                         auto length = out - in;
901                         if (length.as_editable_units_ceil(er) < 15) {
902                                 too_short = true;
903                         }
904                         if (last_out) {
905                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
906                                 auto distance = reel_offset + in.as_editable_units_ceil(er) - *last_out;
907                                 if (distance >= 0 && distance < 2) {
908                                         too_close = true;
909                                 }
910                         }
911                         last_out = reel_offset + out.as_editable_units_floor(er);
912                 } else if (node->name() == "Text") {
913                         std::function<bool (cxml::ConstNodePtr)> node_has_content = [&](cxml::ConstNodePtr node) {
914                                 if (!node->content().empty()) {
915                                         return true;
916                                 }
917                                 for (auto i: node->node_children()) {
918                                         if (node_has_content(i)) {
919                                                 return true;
920                                         }
921                                 }
922                                 return false;
923                         };
924                         if (!node_has_content(node)) {
925                                 empty_text = true;
926                         }
927                         has_text = true;
928                 } else if (node->name() == "LoadFont") {
929                         if (auto const id = node->optional_string_attribute("Id")) {
930                                 font_ids.push_back(*id);
931                         } else if (auto const id = node->optional_string_attribute("ID")) {
932                                 font_ids.push_back(*id);
933                         }
934                 } else if (node->name() == "Font") {
935                         if (auto const font_id = node->optional_string_attribute("Id")) {
936                                 if (std::find_if(font_ids.begin(), font_ids.end(), [font_id](string const& id) { return id == font_id; }) == font_ids.end()) {
937                                         missing_load_font_id = font_id;
938                                 }
939                         }
940                 }
941                 for (auto i: node->node_children()) {
942                         parse(i, tcr, start_time, er, first_reel, has_text, font_ids);
943                 }
944         };
945
946         for (auto i = 0U; i < reels.size(); ++i) {
947                 if (!check(reels[i])) {
948                         continue;
949                 }
950
951                 auto reel_xml = xml(reels[i]);
952                 if (!reel_xml) {
953                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
954                         continue;
955                 }
956
957                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
958                  * read in by libdcp's parser.
959                  */
960
961                 shared_ptr<cxml::Document> doc;
962                 optional<int> tcr;
963                 optional<Time> start_time;
964                 switch (standard) {
965                 case dcp::Standard::INTEROP:
966                         doc = make_shared<cxml::Document>("DCSubtitle");
967                         doc->read_string (*reel_xml);
968                         break;
969                 case dcp::Standard::SMPTE:
970                         doc = make_shared<cxml::Document>("SubtitleReel");
971                         doc->read_string (*reel_xml);
972                         tcr = doc->number_child<int>("TimeCodeRate");
973                         if (auto start_time_string = doc->optional_string_child("StartTime")) {
974                                 start_time = Time(*start_time_string, tcr);
975                         }
976                         break;
977                 }
978                 bool has_text = false;
979                 vector<string> font_ids;
980                 parse(doc, tcr, start_time, edit_rate, i == 0, has_text, font_ids);
981                 auto end = reel_offset + duration(reels[i]);
982                 if (last_out && *last_out > end) {
983                         reel_overlap = true;
984                 }
985                 reel_offset = end;
986
987                 if (standard == dcp::Standard::SMPTE && has_text && font_ids.empty()) {
988                         notes.push_back(dcp::VerificationNote(dcp::VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_LOAD_FONT).set_id(id(reels[i])));
989                 }
990         }
991
992         if (last_out && *last_out > reel_offset) {
993                 reel_overlap = true;
994         }
995
996         if (too_early) {
997                 notes.push_back({
998                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME
999                 });
1000         }
1001
1002         if (too_short) {
1003                 notes.push_back ({
1004                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_DURATION
1005                 });
1006         }
1007
1008         if (too_close) {
1009                 notes.push_back ({
1010                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_SPACING
1011                 });
1012         }
1013
1014         if (reel_overlap) {
1015                 notes.push_back ({
1016                         VerificationNote::Type::ERROR, VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY
1017                 });
1018         }
1019
1020         if (empty_text) {
1021                 notes.push_back ({
1022                         VerificationNote::Type::WARNING, VerificationNote::Code::EMPTY_TEXT
1023                 });
1024         }
1025
1026         if (missing_load_font_id) {
1027                 notes.push_back(dcp::VerificationNote(VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_LOAD_FONT_FOR_FONT).set_id(*missing_load_font_id));
1028         }
1029 }
1030
1031
1032 static
1033 void
1034 verify_closed_caption_details (
1035         vector<shared_ptr<Reel>> reels,
1036         vector<VerificationNote>& notes
1037         )
1038 {
1039         std::function<void (cxml::ConstNodePtr node, std::vector<cxml::ConstNodePtr>& text_or_image)> find_text_or_image;
1040         find_text_or_image = [&find_text_or_image](cxml::ConstNodePtr node, std::vector<cxml::ConstNodePtr>& text_or_image) {
1041                 for (auto i: node->node_children()) {
1042                         if (i->name() == "Text") {
1043                                 text_or_image.push_back (i);
1044                         } else {
1045                                 find_text_or_image (i, text_or_image);
1046                         }
1047                 }
1048         };
1049
1050         auto mismatched_valign = false;
1051         auto incorrect_order = false;
1052
1053         std::function<void (cxml::ConstNodePtr)> parse;
1054         parse = [&parse, &find_text_or_image, &mismatched_valign, &incorrect_order](cxml::ConstNodePtr node) {
1055                 if (node->name() == "Subtitle") {
1056                         vector<cxml::ConstNodePtr> text_or_image;
1057                         find_text_or_image (node, text_or_image);
1058                         optional<string> last_valign;
1059                         optional<float> last_vpos;
1060                         for (auto i: text_or_image) {
1061                                 auto valign = i->optional_string_attribute("VAlign");
1062                                 if (!valign) {
1063                                         valign = i->optional_string_attribute("Valign").get_value_or("center");
1064                                 }
1065                                 auto vpos = i->optional_number_attribute<float>("VPosition");
1066                                 if (!vpos) {
1067                                         vpos = i->optional_number_attribute<float>("Vposition").get_value_or(50);
1068                                 }
1069
1070                                 if (last_valign) {
1071                                         if (*last_valign != valign) {
1072                                                 mismatched_valign = true;
1073                                         }
1074                                 }
1075                                 last_valign = valign;
1076
1077                                 if (!mismatched_valign) {
1078                                         if (last_vpos) {
1079                                                 if (*last_valign == "top" || *last_valign == "center") {
1080                                                         if (*vpos < *last_vpos) {
1081                                                                 incorrect_order = true;
1082                                                         }
1083                                                 } else {
1084                                                         if (*vpos > *last_vpos) {
1085                                                                 incorrect_order = true;
1086                                                         }
1087                                                 }
1088                                         }
1089                                         last_vpos = vpos;
1090                                 }
1091                         }
1092                 }
1093
1094                 for (auto i: node->node_children()) {
1095                         parse(i);
1096                 }
1097         };
1098
1099         for (auto reel: reels) {
1100                 for (auto ccap: reel->closed_captions()) {
1101                         auto reel_xml = ccap->asset()->raw_xml();
1102                         if (!reel_xml) {
1103                                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
1104                                 continue;
1105                         }
1106
1107                         /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
1108                          * read in by libdcp's parser.
1109                          */
1110
1111                         shared_ptr<cxml::Document> doc;
1112                         optional<int> tcr;
1113                         optional<Time> start_time;
1114                         try {
1115                                 doc = make_shared<cxml::Document>("SubtitleReel");
1116                                 doc->read_string (*reel_xml);
1117                         } catch (...) {
1118                                 doc = make_shared<cxml::Document>("DCSubtitle");
1119                                 doc->read_string (*reel_xml);
1120                         }
1121                         parse (doc);
1122                 }
1123         }
1124
1125         if (mismatched_valign) {
1126                 notes.push_back ({
1127                         VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_VALIGN,
1128                 });
1129         }
1130
1131         if (incorrect_order) {
1132                 notes.push_back ({
1133                         VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ORDERING,
1134                 });
1135         }
1136 }
1137
1138
1139 struct LinesCharactersResult
1140 {
1141         bool warning_length_exceeded = false;
1142         bool error_length_exceeded = false;
1143         bool line_count_exceeded = false;
1144 };
1145
1146
1147 static
1148 void
1149 verify_text_lines_and_characters (
1150         shared_ptr<SubtitleAsset> asset,
1151         int warning_length,
1152         int error_length,
1153         LinesCharactersResult* result
1154         )
1155 {
1156         class Event
1157         {
1158         public:
1159                 Event (Time time_, float position_, int characters_)
1160                         : time (time_)
1161                         , position (position_)
1162                         , characters (characters_)
1163                 {}
1164
1165                 Event (Time time_, shared_ptr<Event> start_)
1166                         : time (time_)
1167                         , start (start_)
1168                 {}
1169
1170                 Time time;
1171                 int position; //< position from 0 at top of screen to 100 at bottom
1172                 int characters;
1173                 shared_ptr<Event> start;
1174         };
1175
1176         vector<shared_ptr<Event>> events;
1177
1178         auto position = [](shared_ptr<const SubtitleString> sub) {
1179                 switch (sub->v_align()) {
1180                 case VAlign::TOP:
1181                         return lrintf(sub->v_position() * 100);
1182                 case VAlign::CENTER:
1183                         return lrintf((0.5f + sub->v_position()) * 100);
1184                 case VAlign::BOTTOM:
1185                         return lrintf((1.0f - sub->v_position()) * 100);
1186                 }
1187
1188                 return 0L;
1189         };
1190
1191         for (auto j: asset->subtitles()) {
1192                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
1193                 if (text) {
1194                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
1195                         events.push_back(in);
1196                         events.push_back(make_shared<Event>(text->out(), in));
1197                 }
1198         }
1199
1200         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
1201                 return a->time < b->time;
1202         });
1203
1204         map<int, int> current;
1205         for (auto i: events) {
1206                 if (current.size() > 3) {
1207                         result->line_count_exceeded = true;
1208                 }
1209                 for (auto j: current) {
1210                         if (j.second > warning_length) {
1211                                 result->warning_length_exceeded = true;
1212                         }
1213                         if (j.second > error_length) {
1214                                 result->error_length_exceeded = true;
1215                         }
1216                 }
1217
1218                 if (i->start) {
1219                         /* end of a subtitle */
1220                         DCP_ASSERT (current.find(i->start->position) != current.end());
1221                         if (current[i->start->position] == i->start->characters) {
1222                                 current.erase(i->start->position);
1223                         } else {
1224                                 current[i->start->position] -= i->start->characters;
1225                         }
1226                 } else {
1227                         /* start of a subtitle */
1228                         if (current.find(i->position) == current.end()) {
1229                                 current[i->position] = i->characters;
1230                         } else {
1231                                 current[i->position] += i->characters;
1232                         }
1233                 }
1234         }
1235 }
1236
1237
1238 static
1239 void
1240 verify_text_details(dcp::Standard standard, vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
1241 {
1242         if (reels.empty()) {
1243                 return;
1244         }
1245
1246         if (reels[0]->main_subtitle()) {
1247                 verify_text_details(standard, reels, reels[0]->main_subtitle()->edit_rate().numerator, notes,
1248                         [](shared_ptr<Reel> reel) {
1249                                 return static_cast<bool>(reel->main_subtitle());
1250                         },
1251                         [](shared_ptr<Reel> reel) {
1252                                 return reel->main_subtitle()->asset()->raw_xml();
1253                         },
1254                         [](shared_ptr<Reel> reel) {
1255                                 return reel->main_subtitle()->actual_duration();
1256                         },
1257                         [](shared_ptr<Reel> reel) {
1258                                 return reel->main_subtitle()->id();
1259                         }
1260                 );
1261         }
1262
1263         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
1264                 verify_text_details(standard, reels, reels[0]->closed_captions()[i]->edit_rate().numerator, notes,
1265                         [i](shared_ptr<Reel> reel) {
1266                                 return i < reel->closed_captions().size();
1267                         },
1268                         [i](shared_ptr<Reel> reel) {
1269                                 return reel->closed_captions()[i]->asset()->raw_xml();
1270                         },
1271                         [i](shared_ptr<Reel> reel) {
1272                                 return reel->closed_captions()[i]->actual_duration();
1273                         },
1274                         [i](shared_ptr<Reel> reel) {
1275                                 return reel->closed_captions()[i]->id();
1276                         }
1277                 );
1278         }
1279
1280         verify_closed_caption_details (reels, notes);
1281 }
1282
1283
1284 void
1285 verify_extension_metadata(shared_ptr<const CPL> cpl, vector<VerificationNote>& notes)
1286 {
1287         DCP_ASSERT (cpl->file());
1288         cxml::Document doc ("CompositionPlaylist");
1289         doc.read_file (cpl->file().get());
1290
1291         auto missing = false;
1292         string malformed;
1293
1294         if (auto reel_list = doc.node_child("ReelList")) {
1295                 auto reels = reel_list->node_children("Reel");
1296                 if (!reels.empty()) {
1297                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
1298                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
1299                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
1300                                                 missing = true;
1301                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
1302                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
1303                                                                 continue;
1304                                                         }
1305                                                         missing = false;
1306                                                         if (auto name = extension->optional_node_child("Name")) {
1307                                                                 if (name->content() != "Application") {
1308                                                                         malformed = "<Name> should be 'Application'";
1309                                                                 }
1310                                                         }
1311                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
1312                                                                 if (auto property = property_list->optional_node_child("Property")) {
1313                                                                         if (auto name = property->optional_node_child("Name")) {
1314                                                                                 if (name->content() != "DCP Constraints Profile") {
1315                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
1316                                                                                 }
1317                                                                         }
1318                                                                         if (auto value = property->optional_node_child("Value")) {
1319                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
1320                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
1321                                                                                 }
1322                                                                         }
1323                                                                 }
1324                                                         }
1325                                                 }
1326                                         } else {
1327                                                 missing = true;
1328                                         }
1329                                 }
1330                         }
1331                 }
1332         }
1333
1334         if (missing) {
1335                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_EXTENSION_METADATA, cpl->id(), cpl->file().get()});
1336         } else if (!malformed.empty()) {
1337                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_EXTENSION_METADATA, malformed, cpl->file().get()});
1338         }
1339 }
1340
1341
1342 bool
1343 pkl_has_encrypted_assets(shared_ptr<const DCP> dcp, shared_ptr<const PKL> pkl)
1344 {
1345         vector<string> encrypted;
1346         for (auto i: dcp->cpls()) {
1347                 for (auto j: i->reel_file_assets()) {
1348                         if (j->asset_ref().resolved()) {
1349                                 auto mxf = dynamic_pointer_cast<MXF>(j->asset_ref().asset());
1350                                 if (mxf && mxf->encrypted()) {
1351                                         encrypted.push_back(j->asset_ref().id());
1352                                 }
1353                         }
1354                 }
1355         }
1356
1357         for (auto i: pkl->assets()) {
1358                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1359                         return true;
1360                 }
1361         }
1362
1363         return false;
1364 }
1365
1366
1367 static
1368 void
1369 verify_reel(
1370         shared_ptr<const DCP> dcp,
1371         shared_ptr<const CPL> cpl,
1372         shared_ptr<const Reel> reel,
1373         optional<dcp::Size> main_picture_active_area,
1374         function<void (string, optional<boost::filesystem::path>)> stage,
1375         boost::filesystem::path xsd_dtd_directory,
1376         function<void (float)> progress,
1377         VerificationOptions options,
1378         vector<VerificationNote>& notes,
1379         State& state,
1380         bool* have_main_subtitle,
1381         bool* have_no_main_subtitle,
1382         size_t* most_closed_captions,
1383         size_t* fewest_closed_captions,
1384         map<Marker, Time>* markers_seen
1385         )
1386 {
1387         for (auto i: reel->assets()) {
1388                 if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1389                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_DURATION, i->id()});
1390                 }
1391                 if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1392                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_INTRINSIC_DURATION, i->id()});
1393                 }
1394                 auto file_asset = dynamic_pointer_cast<ReelFileAsset>(i);
1395                 if (i->encryptable() && !file_asset->hash()) {
1396                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_HASH, i->id()});
1397                 }
1398         }
1399
1400         if (dcp->standard() == Standard::SMPTE) {
1401                 boost::optional<int64_t> duration;
1402                 for (auto i: reel->assets()) {
1403                         if (!duration) {
1404                                 duration = i->actual_duration();
1405                         } else if (*duration != i->actual_duration()) {
1406                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_ASSET_DURATION});
1407                                 break;
1408                         }
1409                 }
1410         }
1411
1412         if (reel->main_picture()) {
1413                 /* Check reel stuff */
1414                 auto const frame_rate = reel->main_picture()->frame_rate();
1415                 if (frame_rate.denominator != 1 ||
1416                     (frame_rate.numerator != 24 &&
1417                      frame_rate.numerator != 25 &&
1418                      frame_rate.numerator != 30 &&
1419                      frame_rate.numerator != 48 &&
1420                      frame_rate.numerator != 50 &&
1421                      frame_rate.numerator != 60 &&
1422                      frame_rate.numerator != 96)) {
1423                         notes.push_back({
1424                                 VerificationNote::Type::ERROR,
1425                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE,
1426                                 String::compose("%1/%2", frame_rate.numerator, frame_rate.denominator)
1427                         });
1428                 }
1429                 /* Check asset */
1430                 if (reel->main_picture()->asset_ref().resolved()) {
1431                         verify_main_picture_asset(dcp, reel->main_picture(), stage, progress, options, notes);
1432                         auto const asset_size = reel->main_picture()->asset()->size();
1433                         if (main_picture_active_area) {
1434                                 if (main_picture_active_area->width > asset_size.width) {
1435                                         notes.push_back({
1436                                                         VerificationNote::Type::ERROR,
1437                                                         VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1438                                                         String::compose("width %1 is bigger than the asset width %2", main_picture_active_area->width, asset_size.width),
1439                                                         cpl->file().get()
1440                                                         });
1441                                 }
1442                                 if (main_picture_active_area->height > asset_size.height) {
1443                                         notes.push_back({
1444                                                         VerificationNote::Type::ERROR,
1445                                                         VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1446                                                         String::compose("height %1 is bigger than the asset height %2", main_picture_active_area->height, asset_size.height),
1447                                                         cpl->file().get()
1448                                                         });
1449                                 }
1450                         }
1451                 }
1452
1453         }
1454
1455         if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1456                 verify_main_sound_asset(dcp, reel->main_sound(), stage, progress, options, notes, state);
1457         }
1458
1459         if (reel->main_subtitle()) {
1460                 verify_main_subtitle_reel(reel->main_subtitle(), notes);
1461                 if (reel->main_subtitle()->asset_ref().resolved()) {
1462                         verify_subtitle_asset(reel->main_subtitle()->asset(), reel->main_subtitle()->duration(), stage, xsd_dtd_directory, notes, state);
1463                 }
1464                 *have_main_subtitle = true;
1465         } else {
1466                 *have_no_main_subtitle = true;
1467         }
1468
1469         for (auto i: reel->closed_captions()) {
1470                 verify_closed_caption_reel(i, notes);
1471                 if (i->asset_ref().resolved()) {
1472                         verify_closed_caption_asset(i->asset(), i->duration(), stage, xsd_dtd_directory, notes);
1473                 }
1474         }
1475
1476         if (reel->main_markers()) {
1477                 for (auto const& i: reel->main_markers()->get()) {
1478                         markers_seen->insert(i);
1479                 }
1480                 if (reel->main_markers()->entry_point()) {
1481                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::UNEXPECTED_ENTRY_POINT});
1482                 }
1483                 if (reel->main_markers()->duration()) {
1484                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::UNEXPECTED_DURATION});
1485                 }
1486         }
1487
1488         *fewest_closed_captions = std::min(*fewest_closed_captions, reel->closed_captions().size());
1489         *most_closed_captions = std::max(*most_closed_captions, reel->closed_captions().size());
1490
1491 }
1492
1493
1494 static
1495 void
1496 verify_cpl(
1497         shared_ptr<const DCP> dcp,
1498         shared_ptr<const CPL> cpl,
1499         function<void (string, optional<boost::filesystem::path>)> stage,
1500         boost::filesystem::path xsd_dtd_directory,
1501         function<void (float)> progress,
1502         VerificationOptions options,
1503         vector<VerificationNote>& notes,
1504         State& state
1505         )
1506 {
1507         stage("Checking CPL", cpl->file());
1508         validate_xml(cpl->file().get(), xsd_dtd_directory, notes);
1509
1510         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1511                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::PARTIALLY_ENCRYPTED});
1512         }
1513
1514         for (auto const& i: cpl->additional_subtitle_languages()) {
1515                 verify_language_tag(i, notes);
1516         }
1517
1518         if (!cpl->content_kind().scope() || *cpl->content_kind().scope() == "http://www.smpte-ra.org/schemas/429-7/2006/CPL#standard-content") {
1519                 /* This is a content kind from http://www.smpte-ra.org/schemas/429-7/2006/CPL#standard-content; make sure it's one
1520                  * of the approved ones.
1521                  */
1522                 auto all = ContentKind::all();
1523                 auto name = cpl->content_kind().name();
1524                 transform(name.begin(), name.end(), name.begin(), ::tolower);
1525                 auto iter = std::find_if(all.begin(), all.end(), [name](ContentKind const& k) { return !k.scope() && k.name() == name; });
1526                 if (iter == all.end()) {
1527                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_CONTENT_KIND, cpl->content_kind().name()});
1528                 }
1529         }
1530
1531         if (cpl->release_territory()) {
1532                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1533                         auto terr = cpl->release_territory().get();
1534                         /* Must be a valid region tag, or "001" */
1535                         try {
1536                                 LanguageTag::RegionSubtag test(terr);
1537                         } catch (...) {
1538                                 if (terr != "001") {
1539                                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, terr});
1540                                 }
1541                         }
1542                 }
1543         }
1544
1545         if (dcp->standard() == Standard::SMPTE) {
1546                 if (!cpl->annotation_text()) {
1547                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1548                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1549                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1550                 }
1551         }
1552
1553         for (auto i: dcp->pkls()) {
1554                 /* Check that the CPL's hash corresponds to the PKL */
1555                 optional<string> h = i->hash(cpl->id());
1556                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1557                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()});
1558                 }
1559
1560                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1561                 optional<string> required_annotation_text;
1562                 for (auto j: i->assets()) {
1563                         /* See if this is a CPL */
1564                         for (auto k: dcp->cpls()) {
1565                                 if (j->id() == k->id()) {
1566                                         if (!required_annotation_text) {
1567                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1568                                                 required_annotation_text = cpl->content_title_text();
1569                                         } else {
1570                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1571                                                 required_annotation_text = boost::none;
1572                                         }
1573                                 }
1574                         }
1575                 }
1576
1577                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1578                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL, i->id(), i->file().get()});
1579                 }
1580         }
1581
1582         /* set to true if any reel has a MainSubtitle */
1583         auto have_main_subtitle = false;
1584         /* set to true if any reel has no MainSubtitle */
1585         auto have_no_main_subtitle = false;
1586         /* fewest number of closed caption assets seen in a reel */
1587         size_t fewest_closed_captions = SIZE_MAX;
1588         /* most number of closed caption assets seen in a reel */
1589         size_t most_closed_captions = 0;
1590         map<Marker, Time> markers_seen;
1591
1592         auto const main_picture_active_area = cpl->main_picture_active_area();
1593         if (main_picture_active_area && (main_picture_active_area->width % 2)) {
1594                 notes.push_back({
1595                                 VerificationNote::Type::ERROR,
1596                                 VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1597                                 String::compose("width %1 is not a multiple of 2", main_picture_active_area->width),
1598                                 cpl->file().get()
1599                         });
1600         }
1601         if (main_picture_active_area && (main_picture_active_area->height % 2)) {
1602                 notes.push_back({
1603                                 VerificationNote::Type::ERROR,
1604                                 VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA,
1605                                 String::compose("height %1 is not a multiple of 2", main_picture_active_area->height),
1606                                 cpl->file().get()
1607                         });
1608         }
1609
1610         for (auto reel: cpl->reels()) {
1611                 stage("Checking reel", optional<boost::filesystem::path>());
1612                 verify_reel(
1613                         dcp,
1614                         cpl,
1615                         reel,
1616                         main_picture_active_area,
1617                         stage,
1618                         xsd_dtd_directory,
1619                         progress,
1620                         options,
1621                         notes,
1622                         state,
1623                         &have_main_subtitle,
1624                         &have_no_main_subtitle,
1625                         &most_closed_captions,
1626                         &fewest_closed_captions,
1627                         &markers_seen
1628                         );
1629         }
1630
1631         verify_text_details(dcp->standard().get_value_or(dcp::Standard::SMPTE), cpl->reels(), notes);
1632
1633         if (dcp->standard() == Standard::SMPTE) {
1634                 if (auto msc = cpl->main_sound_configuration()) {
1635                         if (state.audio_channels && msc->channels() != *state.audio_channels) {
1636                                 notes.push_back({
1637                                                 VerificationNote::Type::ERROR,
1638                                                 VerificationNote::Code::INVALID_MAIN_SOUND_CONFIGURATION,
1639                                                 String::compose("MainSoundConfiguration has %1 channels but sound assets have %2", msc->channels(), *state.audio_channels),
1640                                                 cpl->file().get()
1641                                         });
1642                         }
1643                 }
1644
1645                 if (have_main_subtitle && have_no_main_subtitle) {
1646                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS});
1647                 }
1648
1649                 if (fewest_closed_captions != most_closed_captions) {
1650                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS});
1651                 }
1652
1653                 if (cpl->content_kind() == ContentKind::FEATURE) {
1654                         if (markers_seen.find(Marker::FFEC) == markers_seen.end()) {
1655                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFEC_IN_FEATURE});
1656                         }
1657                         if (markers_seen.find(Marker::FFMC) == markers_seen.end()) {
1658                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFMC_IN_FEATURE});
1659                         }
1660                 }
1661
1662                 auto ffoc = markers_seen.find(Marker::FFOC);
1663                 if (ffoc == markers_seen.end()) {
1664                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_FFOC});
1665                 } else if (ffoc->second.e != 1) {
1666                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_FFOC, raw_convert<string>(ffoc->second.e)});
1667                 }
1668
1669                 auto lfoc = markers_seen.find(Marker::LFOC);
1670                 if (lfoc == markers_seen.end()) {
1671                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_LFOC});
1672                 } else {
1673                         auto lfoc_time = lfoc->second.as_editable_units_ceil(lfoc->second.tcr);
1674                         if (lfoc_time != (cpl->reels().back()->duration() - 1)) {
1675                                 notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_LFOC, raw_convert<string>(lfoc_time)});
1676                         }
1677                 }
1678
1679                 LinesCharactersResult result;
1680                 for (auto reel: cpl->reels()) {
1681                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1682                                 verify_text_lines_and_characters(reel->main_subtitle()->asset(), 52, 79, &result);
1683                         }
1684                 }
1685
1686                 if (result.line_count_exceeded) {
1687                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT});
1688                 }
1689                 if (result.error_length_exceeded) {
1690                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH});
1691                 } else if (result.warning_length_exceeded) {
1692                         notes.push_back({VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH});
1693                 }
1694
1695                 result = LinesCharactersResult();
1696                 for (auto reel: cpl->reels()) {
1697                         for (auto i: reel->closed_captions()) {
1698                                 if (i->asset()) {
1699                                         verify_text_lines_and_characters(i->asset(), 32, 32, &result);
1700                                 }
1701                         }
1702                 }
1703
1704                 if (result.line_count_exceeded) {
1705                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT});
1706                 }
1707                 if (result.error_length_exceeded) {
1708                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH});
1709                 }
1710
1711                 if (!cpl->read_composition_metadata()) {
1712                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA, cpl->id(), cpl->file().get()});
1713                 } else if (!cpl->version_number()) {
1714                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER, cpl->id(), cpl->file().get()});
1715                 }
1716
1717                 verify_extension_metadata(cpl, notes);
1718
1719                 if (cpl->any_encrypted()) {
1720                         cxml::Document doc("CompositionPlaylist");
1721                         DCP_ASSERT(cpl->file());
1722                         doc.read_file(cpl->file().get());
1723                         if (!doc.optional_node_child("Signature")) {
1724                                 notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
1725                         }
1726                 }
1727         }
1728 }
1729
1730
1731 static
1732 void
1733 verify_pkl(
1734         shared_ptr<const DCP> dcp,
1735         shared_ptr<const PKL> pkl,
1736         boost::filesystem::path xsd_dtd_directory,
1737         vector<VerificationNote>& notes
1738         )
1739 {
1740         validate_xml(pkl->file().get(), xsd_dtd_directory, notes);
1741
1742         if (pkl_has_encrypted_assets(dcp, pkl)) {
1743                 cxml::Document doc("PackingList");
1744                 doc.read_file(pkl->file().get());
1745                 if (!doc.optional_node_child("Signature")) {
1746                         notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
1747                 }
1748         }
1749
1750         set<string> uuid_set;
1751         for (auto asset: pkl->assets()) {
1752                 if (!uuid_set.insert(asset->id()).second) {
1753                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::DUPLICATE_ASSET_ID_IN_PKL, pkl->id(), pkl->file().get()});
1754                         break;
1755                 }
1756         }
1757 }
1758
1759
1760
1761 static
1762 void
1763 verify_assetmap(
1764         shared_ptr<const DCP> dcp,
1765         boost::filesystem::path xsd_dtd_directory,
1766         vector<VerificationNote>& notes
1767         )
1768 {
1769         auto asset_map = dcp->asset_map();
1770         DCP_ASSERT(asset_map);
1771
1772         validate_xml(asset_map->file().get(), xsd_dtd_directory, notes);
1773
1774         set<string> uuid_set;
1775         for (auto const& asset: asset_map->assets()) {
1776                 if (!uuid_set.insert(asset.id()).second) {
1777                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::DUPLICATE_ASSET_ID_IN_ASSETMAP, asset_map->id(), asset_map->file().get()});
1778                         break;
1779                 }
1780         }
1781 }
1782
1783
1784 vector<VerificationNote>
1785 dcp::verify (
1786         vector<boost::filesystem::path> directories,
1787         function<void (string, optional<boost::filesystem::path>)> stage,
1788         function<void (float)> progress,
1789         VerificationOptions options,
1790         optional<boost::filesystem::path> xsd_dtd_directory
1791         )
1792 {
1793         if (!xsd_dtd_directory) {
1794                 xsd_dtd_directory = resources_directory() / "xsd";
1795         }
1796         *xsd_dtd_directory = boost::filesystem::canonical (*xsd_dtd_directory);
1797
1798         vector<VerificationNote> notes;
1799         State state{};
1800
1801         vector<shared_ptr<DCP>> dcps;
1802         for (auto i: directories) {
1803                 dcps.push_back (make_shared<DCP>(i));
1804         }
1805
1806         for (auto dcp: dcps) {
1807                 stage ("Checking DCP", dcp->directory());
1808                 bool carry_on = true;
1809                 try {
1810                         dcp->read (&notes, true);
1811                 } catch (MissingAssetmapError& e) {
1812                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1813                         carry_on = false;
1814                 } catch (ReadError& e) {
1815                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1816                 } catch (XMLError& e) {
1817                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1818                 } catch (MXFFileError& e) {
1819                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1820                 } catch (BadURNUUIDError& e) {
1821                         notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1822                 } catch (cxml::Error& e) {
1823                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1824                 }
1825
1826                 if (!carry_on) {
1827                         continue;
1828                 }
1829
1830                 if (dcp->standard() != Standard::SMPTE) {
1831                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_STANDARD});
1832                 }
1833
1834                 for (auto cpl: dcp->cpls()) {
1835                         verify_cpl(
1836                                 dcp,
1837                                 cpl,
1838                                 stage,
1839                                 *xsd_dtd_directory,
1840                                 progress,
1841                                 options,
1842                                 notes,
1843                                 state
1844                                 );
1845                 }
1846
1847                 for (auto pkl: dcp->pkls()) {
1848                         stage("Checking PKL", pkl->file());
1849                         verify_pkl(dcp, pkl, *xsd_dtd_directory, notes);
1850                 }
1851
1852                 if (dcp->asset_map_file()) {
1853                         stage("Checking ASSETMAP", dcp->asset_map_file().get());
1854                         verify_assetmap(dcp, *xsd_dtd_directory, notes);
1855                 } else {
1856                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_ASSETMAP});
1857                 }
1858         }
1859
1860         return notes;
1861 }
1862
1863
1864 string
1865 dcp::note_to_string (VerificationNote note)
1866 {
1867         /** These strings should say what is wrong, incorporating any extra details (ID, filenames etc.).
1868          *
1869          *  e.g. "ClosedCaption asset has no <EntryPoint> tag.",
1870          *  not "ClosedCaption assets must have an <EntryPoint> tag."
1871          *
1872          *  It's OK to use XML tag names where they are clear.
1873          *  If both ID and filename are available, use only the ID.
1874          *  End messages with a full stop.
1875          *  Messages should not mention whether or not their errors are a part of Bv2.1.
1876          */
1877         switch (note.code()) {
1878         case VerificationNote::Code::FAILED_READ:
1879                 return *note.note();
1880         case VerificationNote::Code::MISMATCHED_CPL_HASHES:
1881                 return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
1882         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE:
1883                 return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
1884         case VerificationNote::Code::INCORRECT_PICTURE_HASH:
1885                 return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1886         case VerificationNote::Code::MISMATCHED_PICTURE_HASHES:
1887                 return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1888         case VerificationNote::Code::INCORRECT_SOUND_HASH:
1889                 return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1890         case VerificationNote::Code::MISMATCHED_SOUND_HASHES:
1891                 return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1892         case VerificationNote::Code::EMPTY_ASSET_PATH:
1893                 return "The asset map contains an empty asset path.";
1894         case VerificationNote::Code::MISSING_ASSET:
1895                 return String::compose("The file %1 for an asset in the asset map cannot be found.", note.file()->filename());
1896         case VerificationNote::Code::MISMATCHED_STANDARD:
1897                 return "The DCP contains both SMPTE and Interop parts.";
1898         case VerificationNote::Code::INVALID_XML:
1899                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1900         case VerificationNote::Code::MISSING_ASSETMAP:
1901                 return "No valid ASSETMAP or ASSETMAP.xml was found.";
1902         case VerificationNote::Code::INVALID_INTRINSIC_DURATION:
1903                 return String::compose("The intrinsic duration of the asset %1 is less than 1 second.", note.note().get());
1904         case VerificationNote::Code::INVALID_DURATION:
1905                 return String::compose("The duration of the asset %1 is less than 1 second.", note.note().get());
1906         case VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1907                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1908         case VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1909                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1910         case VerificationNote::Code::EXTERNAL_ASSET:
1911                 return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
1912         case VerificationNote::Code::THREED_ASSET_MARKED_AS_TWOD:
1913                 return String::compose("The asset %1 is 3D but its MXF is marked as 2D.", note.file()->filename());
1914         case VerificationNote::Code::INVALID_STANDARD:
1915                 return "This DCP does not use the SMPTE standard.";
1916         case VerificationNote::Code::INVALID_LANGUAGE:
1917                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1918         case VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS:
1919                 return String::compose("The size %1 of picture asset %2 is not allowed.", note.note().get(), note.file()->filename());
1920         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K:
1921                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 2K DCPs.", note.note().get(), note.file()->filename());
1922         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K:
1923                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 4K DCPs.", note.note().get(), note.file()->filename());
1924         case VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D:
1925                 return "3D 4K DCPs are not allowed.";
1926         case VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES:
1927                 return String::compose("The size %1 of the closed caption asset %2 is larger than the 256KB maximum.", note.note().get(), note.file()->filename());
1928         case VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES:
1929                 return String::compose("The size %1 of the timed text asset %2 is larger than the 115MB maximum.", note.note().get(), note.file()->filename());
1930         case VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES:
1931                 return String::compose("The size %1 of the fonts in timed text asset %2 is larger than the 10MB maximum.", note.note().get(), note.file()->filename());
1932         case VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE:
1933                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <Language> tag.", note.file()->filename());
1934         case VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES:
1935                 return "Some subtitle assets have different <Language> tags than others";
1936         case VerificationNote::Code::MISSING_SUBTITLE_START_TIME:
1937                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <StartTime> tag.", note.file()->filename());
1938         case VerificationNote::Code::INVALID_SUBTITLE_START_TIME:
1939                 return String::compose("The XML for a SMPTE subtitle asset %1 has a non-zero <StartTime> tag.", note.file()->filename());
1940         case VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME:
1941                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1942         case VerificationNote::Code::INVALID_SUBTITLE_DURATION:
1943                 return "At least one subtitle lasts less than 15 frames.";
1944         case VerificationNote::Code::INVALID_SUBTITLE_SPACING:
1945                 return "At least one pair of subtitles is separated by less than 2 frames.";
1946         case VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY:
1947                 return "At least one subtitle extends outside of its reel.";
1948         case VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT:
1949                 return "There are more than 3 subtitle lines in at least one place in the DCP.";
1950         case VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH:
1951                 return "There are more than 52 characters in at least one subtitle line.";
1952         case VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH:
1953                 return "There are more than 79 characters in at least one subtitle line.";
1954         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT:
1955                 return "There are more than 3 closed caption lines in at least one place.";
1956         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH:
1957                 return "There are more than 32 characters in at least one closed caption line.";
1958         case VerificationNote::Code::INVALID_SOUND_FRAME_RATE:
1959                 return String::compose("The sound asset %1 has a sampling rate of %2", note.file()->filename(), note.note().get());
1960         case VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT:
1961                 return String::compose("The CPL %1 has no <AnnotationText> tag.", note.note().get());
1962         case VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT:
1963                 return String::compose("The CPL %1 has an <AnnotationText> which differs from its <ContentTitleText>.", note.note().get());
1964         case VerificationNote::Code::MISMATCHED_ASSET_DURATION:
1965                 return "All assets in a reel do not have the same duration.";
1966         case VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS:
1967                 return "At least one reel contains a subtitle asset, but some reel(s) do not.";
1968         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS:
1969                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1970         case VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT:
1971                 return String::compose("The subtitle asset %1 has no <EntryPoint> tag.", note.note().get());
1972         case VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT:
1973                 return String::compose("The subtitle asset %1 has an <EntryPoint> other than 0.", note.note().get());
1974         case VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1975                 return String::compose("The closed caption asset %1 has no <EntryPoint> tag.", note.note().get());
1976         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT:
1977                 return String::compose("The closed caption asset %1 has an <EntryPoint> other than 0.", note.note().get());
1978         case VerificationNote::Code::MISSING_HASH:
1979                 return String::compose("The asset %1 has no <Hash> tag in the CPL.", note.note().get());
1980         case VerificationNote::Code::MISSING_FFEC_IN_FEATURE:
1981                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker.";
1982         case VerificationNote::Code::MISSING_FFMC_IN_FEATURE:
1983                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker.";
1984         case VerificationNote::Code::MISSING_FFOC:
1985                 return "There should be a FFOC (first frame of content) marker.";
1986         case VerificationNote::Code::MISSING_LFOC:
1987                 return "There should be a LFOC (last frame of content) marker.";
1988         case VerificationNote::Code::INCORRECT_FFOC:
1989                 return String::compose("The FFOC marker is %1 instead of 1", note.note().get());
1990         case VerificationNote::Code::INCORRECT_LFOC:
1991                 return String::compose("The LFOC marker is %1 instead of 1 less than the duration of the last reel.", note.note().get());
1992         case VerificationNote::Code::MISSING_CPL_METADATA:
1993                 return String::compose("The CPL %1 has no <CompositionMetadataAsset> tag.", note.note().get());
1994         case VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER:
1995                 return String::compose("The CPL %1 has no <VersionNumber> in its <CompositionMetadataAsset>.", note.note().get());
1996         case VerificationNote::Code::MISSING_EXTENSION_METADATA:
1997                 return String::compose("The CPL %1 has no <ExtensionMetadata> in its <CompositionMetadataAsset>.", note.note().get());
1998         case VerificationNote::Code::INVALID_EXTENSION_METADATA:
1999                 return String::compose("The CPL %1 has a malformed <ExtensionMetadata> (%2).", note.file()->filename(), note.note().get());
2000         case VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT:
2001                 return String::compose("The CPL %1, which has encrypted content, is not signed.", note.note().get());
2002         case VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT:
2003                 return String::compose("The PKL %1, which has encrypted content, is not signed.", note.note().get());
2004         case VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL:
2005                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>.", note.note().get());
2006         case VerificationNote::Code::PARTIALLY_ENCRYPTED:
2007                 return "Some assets are encrypted but some are not.";
2008         case VerificationNote::Code::INVALID_JPEG2000_CODESTREAM:
2009                 return String::compose("The JPEG2000 codestream for at least one frame is invalid (%1).", note.note().get());
2010         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_2K:
2011                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 2K image instead of 1.", note.note().get());
2012         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_4K:
2013                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 4K image instead of 2.", note.note().get());
2014         case VerificationNote::Code::INVALID_JPEG2000_TILE_SIZE:
2015                 return "The JPEG2000 tile size is not the same as the image size.";
2016         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_WIDTH:
2017                 return String::compose("The JPEG2000 codestream uses a code block width of %1 instead of 32.", note.note().get());
2018         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_HEIGHT:
2019                 return String::compose("The JPEG2000 codestream uses a code block height of %1 instead of 32.", note.note().get());
2020         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_2K:
2021                 return String::compose("%1 POC markers found in 2K JPEG2000 codestream instead of 0.", note.note().get());
2022         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_4K:
2023                 return String::compose("%1 POC markers found in 4K JPEG2000 codestream instead of 1.", note.note().get());
2024         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER:
2025                 return String::compose("Incorrect POC marker content found (%1).", note.note().get());
2026         case VerificationNote::Code::INVALID_JPEG2000_POC_MARKER_LOCATION:
2027                 return "POC marker found outside main header.";
2028         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_2K:
2029                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 2K image instead of 3.", note.note().get());
2030         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_4K:
2031                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 4K image instead of 6.", note.note().get());
2032         case VerificationNote::Code::MISSING_JPEG200_TLM_MARKER:
2033                 return "No TLM marker was found in a JPEG2000 codestream.";
2034         case VerificationNote::Code::MISMATCHED_TIMED_TEXT_RESOURCE_ID:
2035                 return "The Resource ID in a timed text MXF did not match the ID of the contained XML.";
2036         case VerificationNote::Code::INCORRECT_TIMED_TEXT_ASSET_ID:
2037                 return "The Asset ID in a timed text MXF is the same as the Resource ID or that of the contained XML.";
2038         case VerificationNote::Code::MISMATCHED_TIMED_TEXT_DURATION:
2039         {
2040                 vector<string> parts;
2041                 boost::split (parts, note.note().get(), boost::is_any_of(" "));
2042                 DCP_ASSERT (parts.size() == 2);
2043                 return String::compose("The reel duration of some timed text (%1) is not the same as the ContainerDuration of its MXF (%2).", parts[0], parts[1]);
2044         }
2045         case VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED:
2046                 return "Some aspect of this DCP could not be checked because it is encrypted.";
2047         case VerificationNote::Code::EMPTY_TEXT:
2048                 return "There is an empty <Text> node in a subtitle or closed caption.";
2049         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_VALIGN:
2050                 return "Some closed <Text> or <Image> nodes have different vertical alignments within a <Subtitle>.";
2051         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ORDERING:
2052                 return "Some closed captions are not listed in the order of their vertical position.";
2053         case VerificationNote::Code::UNEXPECTED_ENTRY_POINT:
2054                 return "There is an <EntryPoint> node inside a <MainMarkers>.";
2055         case VerificationNote::Code::UNEXPECTED_DURATION:
2056                 return "There is an <Duration> node inside a <MainMarkers>.";
2057         case VerificationNote::Code::INVALID_CONTENT_KIND:
2058                 return String::compose("<ContentKind> has an invalid value %1.", note.note().get());
2059         case VerificationNote::Code::INVALID_MAIN_PICTURE_ACTIVE_AREA:
2060                 return String::compose("<MainPictureActiveaArea> has an invalid value: %1", note.note().get());
2061         case VerificationNote::Code::DUPLICATE_ASSET_ID_IN_PKL:
2062                 return String::compose("The PKL %1 has more than one asset with the same ID.", note.note().get());
2063         case VerificationNote::Code::DUPLICATE_ASSET_ID_IN_ASSETMAP:
2064                 return String::compose("The ASSETMAP %1 has more than one asset with the same ID.", note.note().get());
2065         case VerificationNote::Code::MISSING_SUBTITLE:
2066                 return String::compose("The subtitle asset %1 has no subtitles.", note.note().get());
2067         case VerificationNote::Code::INVALID_SUBTITLE_ISSUE_DATE:
2068                 return String::compose("<IssueDate> has an invalid value: %1", note.note().get());
2069         case VerificationNote::Code::MISMATCHED_SOUND_CHANNEL_COUNTS:
2070                 return String::compose("The sound assets do not all have the same channel count; the first to differ is %1", note.file()->filename());
2071         case VerificationNote::Code::INVALID_MAIN_SOUND_CONFIGURATION:
2072                 return String::compose("<MainSoundConfiguration> has an invalid value: %1", note.note().get());
2073         case VerificationNote::Code::MISSING_FONT:
2074                 return String::compose("The font file for font ID \"%1\" was not found, or was not referred to in the ASSETMAP.", note.note().get());
2075         case VerificationNote::Code::INVALID_JPEG2000_TILE_PART_SIZE:
2076                 return String::compose(
2077                         "Frame %1 has an image component that is too large (component %2 is %3 bytes in size).",
2078                         note.frame().get(), note.component().get(), note.size().get()
2079                         );
2080         case VerificationNote::Code::INCORRECT_SUBTITLE_NAMESPACE_COUNT:
2081                 return String::compose("The XML in the subtitle asset %1 has more than one namespace declaration.", note.note().get());
2082         case VerificationNote::Code::MISSING_LOAD_FONT_FOR_FONT:
2083                 return String::compose("A subtitle or closed caption refers to a font with ID %1 that does not have a corresponding <LoadFont> node", note.id().get());
2084         case VerificationNote::Code::MISSING_LOAD_FONT:
2085                 return String::compose("The SMPTE subtitle asset %1 has <Text> nodes but no <LoadFont> node", note.id().get());
2086         case VerificationNote::Code::MISMATCHED_ASSET_MAP_ID:
2087                 return String::compose("The asset with ID %1 in the asset map actually has an id of %2", note.id().get(), note.other_id().get());
2088         }
2089
2090         return "";
2091 }
2092
2093
2094 bool
2095 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
2096 {
2097         return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
2098 }
2099
2100
2101 bool
2102 dcp::operator< (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
2103 {
2104         if (a.type() != b.type()) {
2105                 return a.type() < b.type();
2106         }
2107
2108         if (a.code() != b.code()) {
2109                 return a.code() < b.code();
2110         }
2111
2112         if (a.note() != b.note()) {
2113                 return a.note().get_value_or("") < b.note().get_value_or("");
2114         }
2115
2116         if (a.file() != b.file()) {
2117                 return a.file().get_value_or("") < b.file().get_value_or("");
2118         }
2119
2120         return a.line().get_value_or(0) < b.line().get_value_or(0);
2121 }
2122
2123
2124 std::ostream&
2125 dcp::operator<< (std::ostream& s, dcp::VerificationNote const& note)
2126 {
2127         s << note_to_string (note);
2128         if (note.note()) {
2129                 s << " [" << note.note().get() << "]";
2130         }
2131         if (note.file()) {
2132                 s << " [" << note.file().get() << "]";
2133         }
2134         if (note.line()) {
2135                 s << " [" << note.line().get() << "]";
2136         }
2137         return s;
2138 }
2139