Replace std::list with std::vector in the API.
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_closed_caption_asset.h"
39 #include "reel_picture_asset.h"
40 #include "reel_sound_asset.h"
41 #include "reel_subtitle_asset.h"
42 #include "interop_subtitle_asset.h"
43 #include "mono_picture_asset.h"
44 #include "mono_picture_frame.h"
45 #include "stereo_picture_asset.h"
46 #include "stereo_picture_frame.h"
47 #include "exceptions.h"
48 #include "compose.hpp"
49 #include "raw_convert.h"
50 #include "smpte_subtitle_asset.h"
51 #include <xercesc/util/PlatformUtils.hpp>
52 #include <xercesc/parsers/XercesDOMParser.hpp>
53 #include <xercesc/parsers/AbstractDOMParser.hpp>
54 #include <xercesc/sax/HandlerBase.hpp>
55 #include <xercesc/dom/DOMImplementation.hpp>
56 #include <xercesc/dom/DOMImplementationLS.hpp>
57 #include <xercesc/dom/DOMImplementationRegistry.hpp>
58 #include <xercesc/dom/DOMLSParser.hpp>
59 #include <xercesc/dom/DOMException.hpp>
60 #include <xercesc/dom/DOMDocument.hpp>
61 #include <xercesc/dom/DOMNodeList.hpp>
62 #include <xercesc/dom/DOMError.hpp>
63 #include <xercesc/dom/DOMLocator.hpp>
64 #include <xercesc/dom/DOMNamedNodeMap.hpp>
65 #include <xercesc/dom/DOMAttr.hpp>
66 #include <xercesc/dom/DOMErrorHandler.hpp>
67 #include <xercesc/framework/LocalFileInputSource.hpp>
68 #include <xercesc/framework/MemBufInputSource.hpp>
69 #include <boost/noncopyable.hpp>
70 #include <boost/algorithm/string.hpp>
71 #include <map>
72 #include <vector>
73 #include <iostream>
74
75 using std::list;
76 using std::vector;
77 using std::string;
78 using std::cout;
79 using std::map;
80 using std::max;
81 using std::shared_ptr;
82 using boost::optional;
83 using boost::function;
84 using std::dynamic_pointer_cast;
85
86 using namespace dcp;
87 using namespace xercesc;
88
89 static
90 string
91 xml_ch_to_string (XMLCh const * a)
92 {
93         char* x = XMLString::transcode(a);
94         string const o(x);
95         XMLString::release(&x);
96         return o;
97 }
98
99 class XMLValidationError
100 {
101 public:
102         XMLValidationError (SAXParseException const & e)
103                 : _message (xml_ch_to_string(e.getMessage()))
104                 , _line (e.getLineNumber())
105                 , _column (e.getColumnNumber())
106                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
107                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
108         {
109
110         }
111
112         string message () const {
113                 return _message;
114         }
115
116         uint64_t line () const {
117                 return _line;
118         }
119
120         uint64_t column () const {
121                 return _column;
122         }
123
124         string public_id () const {
125                 return _public_id;
126         }
127
128         string system_id () const {
129                 return _system_id;
130         }
131
132 private:
133         string _message;
134         uint64_t _line;
135         uint64_t _column;
136         string _public_id;
137         string _system_id;
138 };
139
140
141 class DCPErrorHandler : public ErrorHandler
142 {
143 public:
144         void warning(const SAXParseException& e)
145         {
146                 maybe_add (XMLValidationError(e));
147         }
148
149         void error(const SAXParseException& e)
150         {
151                 maybe_add (XMLValidationError(e));
152         }
153
154         void fatalError(const SAXParseException& e)
155         {
156                 maybe_add (XMLValidationError(e));
157         }
158
159         void resetErrors() {
160                 _errors.clear ();
161         }
162
163         list<XMLValidationError> errors () const {
164                 return _errors;
165         }
166
167 private:
168         void maybe_add (XMLValidationError e)
169         {
170                 /* XXX: nasty hack */
171                 if (
172                         e.message().find("schema document") != string::npos &&
173                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
174                         ) {
175                         return;
176                 }
177
178                 _errors.push_back (e);
179         }
180
181         list<XMLValidationError> _errors;
182 };
183
184 class StringToXMLCh : public boost::noncopyable
185 {
186 public:
187         StringToXMLCh (string a)
188         {
189                 _buffer = XMLString::transcode(a.c_str());
190         }
191
192         ~StringToXMLCh ()
193         {
194                 XMLString::release (&_buffer);
195         }
196
197         XMLCh const * get () const {
198                 return _buffer;
199         }
200
201 private:
202         XMLCh* _buffer;
203 };
204
205 class LocalFileResolver : public EntityResolver
206 {
207 public:
208         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
209                 : _xsd_dtd_directory (xsd_dtd_directory)
210         {
211                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
212                  * found without being here.
213                  */
214                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
215                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
216                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
217                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
218                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
219                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
220                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
221                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
222                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
223                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
224                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
225                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
226                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
227         }
228
229         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
230         {
231                 if (!system_id) {
232                         return 0;
233                 }
234                 auto system_id_str = xml_ch_to_string (system_id);
235                 auto p = _xsd_dtd_directory;
236                 if (_files.find(system_id_str) == _files.end()) {
237                         p /= system_id_str;
238                 } else {
239                         p /= _files[system_id_str];
240                 }
241                 StringToXMLCh ch (p.string());
242                 return new LocalFileInputSource(ch.get());
243         }
244
245 private:
246         void add (string uri, string file)
247         {
248                 _files[uri] = file;
249         }
250
251         std::map<string, string> _files;
252         boost::filesystem::path _xsd_dtd_directory;
253 };
254
255
256 static void
257 parse (XercesDOMParser& parser, boost::filesystem::path xml)
258 {
259         parser.parse(xml.string().c_str());
260 }
261
262
263 static void
264 parse (XercesDOMParser& parser, std::string xml)
265 {
266         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
267         parser.parse(buf);
268 }
269
270
271 template <class T>
272 void
273 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
274 {
275         try {
276                 XMLPlatformUtils::Initialize ();
277         } catch (XMLException& e) {
278                 throw MiscError ("Failed to initialise xerces library");
279         }
280
281         DCPErrorHandler error_handler;
282
283         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
284         {
285                 XercesDOMParser parser;
286                 parser.setValidationScheme(XercesDOMParser::Val_Always);
287                 parser.setDoNamespaces(true);
288                 parser.setDoSchema(true);
289
290                 vector<string> schema;
291                 schema.push_back("xml.xsd");
292                 schema.push_back("xmldsig-core-schema.xsd");
293                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
294                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
295                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
296                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
297                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
298                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
299                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
300                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
301                 schema.push_back("DCDMSubtitle-2010.xsd");
302                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
303                 schema.push_back("SMPTE-429-16.xsd");
304                 schema.push_back("Dolby-2012-AD.xsd");
305                 schema.push_back("SMPTE-429-10-2008.xsd");
306                 schema.push_back("xlink.xsd");
307                 schema.push_back("SMPTE-335-2012.xsd");
308                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
309                 schema.push_back("isdcf-mca.xsd");
310                 schema.push_back("SMPTE-429-12-2008.xsd");
311
312                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
313                  * Schemas that are not mentioned in this list are not read, and the things
314                  * they describe are not checked.
315                  */
316                 string locations;
317                 for (auto i: schema) {
318                         locations += String::compose("%1 %1 ", i, i);
319                 }
320
321                 parser.setExternalSchemaLocation(locations.c_str());
322                 parser.setValidationSchemaFullChecking(true);
323                 parser.setErrorHandler(&error_handler);
324
325                 LocalFileResolver resolver (xsd_dtd_directory);
326                 parser.setEntityResolver(&resolver);
327
328                 try {
329                         parser.resetDocumentPool();
330                         parse(parser, xml);
331                 } catch (XMLException& e) {
332                         throw MiscError(xml_ch_to_string(e.getMessage()));
333                 } catch (DOMException& e) {
334                         throw MiscError(xml_ch_to_string(e.getMessage()));
335                 } catch (...) {
336                         throw MiscError("Unknown exception from xerces");
337                 }
338         }
339
340         XMLPlatformUtils::Terminate ();
341
342         for (auto i: error_handler.errors()) {
343                 notes.push_back (
344                         VerificationNote(
345                                 VerificationNote::VERIFY_ERROR,
346                                 VerificationNote::XML_VALIDATION_ERROR,
347                                 i.message(),
348                                 boost::trim_copy(i.public_id() + " " + i.system_id()),
349                                 i.line()
350                                 )
351                         );
352         }
353 }
354
355
356 enum VerifyAssetResult {
357         VERIFY_ASSET_RESULT_GOOD,
358         VERIFY_ASSET_RESULT_CPL_PKL_DIFFER,
359         VERIFY_ASSET_RESULT_BAD
360 };
361
362
363 static VerifyAssetResult
364 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
365 {
366         auto const actual_hash = reel_mxf->asset_ref()->hash(progress);
367
368         auto pkls = dcp->pkls();
369         /* We've read this DCP in so it must have at least one PKL */
370         DCP_ASSERT (!pkls.empty());
371
372         auto asset = reel_mxf->asset_ref().asset();
373
374         optional<string> pkl_hash;
375         for (auto i: pkls) {
376                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
377                 if (pkl_hash) {
378                         break;
379                 }
380         }
381
382         DCP_ASSERT (pkl_hash);
383
384         auto cpl_hash = reel_mxf->hash();
385         if (cpl_hash && *cpl_hash != *pkl_hash) {
386                 return VERIFY_ASSET_RESULT_CPL_PKL_DIFFER;
387         }
388
389         if (actual_hash != *pkl_hash) {
390                 return VERIFY_ASSET_RESULT_BAD;
391         }
392
393         return VERIFY_ASSET_RESULT_GOOD;
394 }
395
396
397 void
398 verify_language_tag (string tag, vector<VerificationNote>& notes)
399 {
400         try {
401                 dcp::LanguageTag test (tag);
402         } catch (dcp::LanguageTagError &) {
403                 notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::BAD_LANGUAGE, tag));
404         }
405 }
406
407
408 enum VerifyPictureAssetResult
409 {
410         VERIFY_PICTURE_ASSET_RESULT_GOOD,
411         VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE,
412         VERIFY_PICTURE_ASSET_RESULT_BAD,
413 };
414
415
416 int
417 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
418 {
419         return frame->size ();
420 }
421
422 int
423 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
424 {
425         return max(frame->left()->size(), frame->right()->size());
426 }
427
428
429 template <class A, class R, class F>
430 optional<VerifyPictureAssetResult>
431 verify_picture_asset_type (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
432 {
433         auto asset = dynamic_pointer_cast<A>(reel_mxf->asset_ref().asset());
434         if (!asset) {
435                 return optional<VerifyPictureAssetResult>();
436         }
437
438         int biggest_frame = 0;
439         auto reader = asset->start_read ();
440         auto const duration = asset->intrinsic_duration ();
441         for (int64_t i = 0; i < duration; ++i) {
442                 shared_ptr<const F> frame = reader->get_frame (i);
443                 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
444                 progress (float(i) / duration);
445         }
446
447         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
448         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
449         if (biggest_frame > max_frame) {
450                 return VERIFY_PICTURE_ASSET_RESULT_BAD;
451         } else if (biggest_frame > risky_frame) {
452                 return VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE;
453         }
454
455         return VERIFY_PICTURE_ASSET_RESULT_GOOD;
456 }
457
458
459 static VerifyPictureAssetResult
460 verify_picture_asset (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
461 {
462         auto r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_mxf, progress);
463         if (!r) {
464                 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_mxf, progress);
465         }
466
467         DCP_ASSERT (r);
468         return *r;
469 }
470
471
472 static void
473 verify_main_picture_asset (
474         shared_ptr<const DCP> dcp,
475         shared_ptr<const ReelPictureAsset> reel_asset,
476         function<void (string, optional<boost::filesystem::path>)> stage,
477         function<void (float)> progress,
478         vector<VerificationNote>& notes
479         )
480 {
481         auto asset = reel_asset->asset();
482         auto const file = *asset->file();
483         stage ("Checking picture asset hash", file);
484         auto const r = verify_asset (dcp, reel_asset, progress);
485         switch (r) {
486                 case VERIFY_ASSET_RESULT_BAD:
487                         notes.push_back (
488                                 VerificationNote(
489                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_HASH_INCORRECT, file
490                                         )
491                                 );
492                         break;
493                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
494                         notes.push_back (
495                                 VerificationNote(
496                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_PICTURE_HASHES_DIFFER, file
497                                         )
498                                 );
499                         break;
500                 default:
501                         break;
502         }
503         stage ("Checking picture frame sizes", asset->file());
504         auto const pr = verify_picture_asset (reel_asset, progress);
505         switch (pr) {
506                 case VERIFY_PICTURE_ASSET_RESULT_BAD:
507                         notes.push_back (
508                                 VerificationNote(
509                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_FRAME_TOO_LARGE_IN_BYTES, file
510                                         )
511                                 );
512                         break;
513                 case VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE:
514                         notes.push_back (
515                                 VerificationNote(
516                                         VerificationNote::VERIFY_WARNING, VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE_IN_BYTES, file
517                                         )
518                                 );
519                         break;
520                 default:
521                         break;
522         }
523
524         /* Only flat/scope allowed by Bv2.1 */
525         if (
526                 asset->size() != dcp::Size(2048, 858) &&
527                 asset->size() != dcp::Size(1998, 1080) &&
528                 asset->size() != dcp::Size(4096, 1716) &&
529                 asset->size() != dcp::Size(3996, 2160)) {
530                 notes.push_back(
531                         VerificationNote(
532                                 VerificationNote::VERIFY_BV21_ERROR,
533                                 VerificationNote::PICTURE_ASSET_INVALID_SIZE_IN_PIXELS,
534                                 String::compose("%1x%2", asset->size().width, asset->size().height),
535                                 file
536                                 )
537                         );
538         }
539
540         /* Only 24, 25, 48fps allowed for 2K */
541         if (
542                 (asset->size() == dcp::Size(2048, 858) || asset->size() == dcp::Size(1998, 1080)) &&
543                 (asset->edit_rate() != dcp::Fraction(24, 1) && asset->edit_rate() != dcp::Fraction(25, 1) && asset->edit_rate() != dcp::Fraction(48, 1))
544            ) {
545                 notes.push_back(
546                         VerificationNote(
547                                 VerificationNote::VERIFY_BV21_ERROR,
548                                 VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_2K,
549                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
550                                 file
551                                 )
552                         );
553         }
554
555         if (asset->size() == dcp::Size(4096, 1716) || asset->size() == dcp::Size(3996, 2160)) {
556                 /* Only 24fps allowed for 4K */
557                 if (asset->edit_rate() != dcp::Fraction(24, 1)) {
558                         notes.push_back(
559                                 VerificationNote(
560                                         VerificationNote::VERIFY_BV21_ERROR,
561                                         VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_4K,
562                                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
563                                         file
564                                         )
565                                 );
566                 }
567
568                 /* Only 2D allowed for 4K */
569                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
570                         notes.push_back(
571                                 VerificationNote(
572                                         VerificationNote::VERIFY_BV21_ERROR,
573                                         VerificationNote::PICTURE_ASSET_4K_3D,
574                                         file
575                                         )
576                                 );
577
578                 }
579         }
580
581 }
582
583
584 static void
585 verify_main_sound_asset (
586         shared_ptr<const DCP> dcp,
587         shared_ptr<const ReelSoundAsset> reel_asset,
588         function<void (string, optional<boost::filesystem::path>)> stage,
589         function<void (float)> progress,
590         vector<VerificationNote>& notes
591         )
592 {
593         auto asset = reel_asset->asset();
594         stage ("Checking sound asset hash", asset->file());
595         auto const r = verify_asset (dcp, reel_asset, progress);
596         switch (r) {
597                 case VERIFY_ASSET_RESULT_BAD:
598                         notes.push_back (
599                                 VerificationNote(
600                                         VerificationNote::VERIFY_ERROR, VerificationNote::SOUND_HASH_INCORRECT, *asset->file()
601                                         )
602                                 );
603                         break;
604                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
605                         notes.push_back (
606                                 VerificationNote(
607                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_SOUND_HASHES_DIFFER, *asset->file()
608                                         )
609                                 );
610                         break;
611                 default:
612                         break;
613         }
614
615         stage ("Checking sound asset metadata", asset->file());
616
617         verify_language_tag (asset->language(), notes);
618 }
619
620
621 static void
622 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
623 {
624         /* XXX: is Language compulsory? */
625         if (reel_asset->language()) {
626                 verify_language_tag (*reel_asset->language(), notes);
627         }
628 }
629
630
631 static void
632 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
633 {
634         /* XXX: is Language compulsory? */
635         if (reel_asset->language()) {
636                 verify_language_tag (*reel_asset->language(), notes);
637         }
638 }
639
640
641 struct State
642 {
643         boost::optional<string> subtitle_language;
644 };
645
646
647 static void
648 verify_subtitle_asset (
649         shared_ptr<const SubtitleAsset> asset,
650         function<void (string, optional<boost::filesystem::path>)> stage,
651         boost::filesystem::path xsd_dtd_directory,
652         vector<VerificationNote>& notes,
653         State& state,
654         bool first_reel
655         )
656 {
657         stage ("Checking subtitle XML", asset->file());
658         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
659          * gets passed through libdcp which may clean up and therefore hide errors.
660          */
661         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
662
663         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
664         if (smpte) {
665                 if (smpte->language()) {
666                         auto const language = *smpte->language();
667                         verify_language_tag (language, notes);
668                         if (!state.subtitle_language) {
669                                 state.subtitle_language = language;
670                         } else if (state.subtitle_language != language) {
671                                 notes.push_back (
672                                         VerificationNote(
673                                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::SUBTITLE_LANGUAGES_DIFFER, *asset->file()
674                                                 )
675                                         );
676                         }
677                 } else {
678                         notes.push_back (
679                                 VerificationNote(
680                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_LANGUAGE, *asset->file()
681                                         )
682                                 );
683                 }
684                 if (boost::filesystem::file_size(*asset->file()) > 115 * 1024 * 1024) {
685                         notes.push_back (
686                                 VerificationNote(
687                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::TIMED_TEXT_ASSET_TOO_LARGE_IN_BYTES, *asset->file()
688                                         )
689                                 );
690                 }
691                 /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
692                  * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
693                  */
694                 auto fonts = asset->font_data ();
695                 int total_size = 0;
696                 for (auto i: fonts) {
697                         total_size += i.second.size();
698                 }
699                 if (total_size > 10 * 1024 * 1024) {
700                         notes.push_back (
701                                 VerificationNote(
702                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::TIMED_TEXT_FONTS_TOO_LARGE_IN_BYTES, *asset->file()
703                                         )
704                                 );
705                 }
706
707                 if (!smpte->start_time()) {
708                         notes.push_back (
709                                 VerificationNote(
710                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_START_TIME, *asset->file())
711                                 );
712                 } else if (smpte->start_time() != dcp::Time()) {
713                         notes.push_back (
714                                 VerificationNote(
715                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::SUBTITLE_START_TIME_NON_ZERO, *asset->file())
716                                 );
717                 }
718
719                 if (first_reel) {
720                         auto subs = smpte->subtitles();
721                         sort (subs.begin(), subs.end(), [](shared_ptr<Subtitle> a, shared_ptr<Subtitle> b) {
722                                 return a->in() < b->in();
723                         });
724                         if (!subs.empty() && subs.front()->in() < dcp::Time(0, 0, 4, 0, 24)) {
725                                 notes.push_back(
726                                         VerificationNote(
727                                                 VerificationNote::VERIFY_WARNING, VerificationNote::FIRST_TEXT_TOO_EARLY
728                                                 )
729                                         );
730                         }
731                 }
732         }
733 }
734
735
736 static void
737 verify_closed_caption_asset (
738         shared_ptr<const SubtitleAsset> asset,
739         function<void (string, optional<boost::filesystem::path>)> stage,
740         boost::filesystem::path xsd_dtd_directory,
741         vector<VerificationNote>& notes,
742         State& state,
743         bool first_reel
744         )
745 {
746         verify_subtitle_asset (asset, stage, xsd_dtd_directory, notes, state, first_reel);
747
748         if (asset->raw_xml().size() > 256 * 1024) {
749                 notes.push_back (
750                         VerificationNote(
751                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::CLOSED_CAPTION_XML_TOO_LARGE_IN_BYTES, *asset->file()
752                                 )
753                         );
754         }
755 }
756
757
758 vector<VerificationNote>
759 dcp::verify (
760         vector<boost::filesystem::path> directories,
761         function<void (string, optional<boost::filesystem::path>)> stage,
762         function<void (float)> progress,
763         boost::filesystem::path xsd_dtd_directory
764         )
765 {
766         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
767
768         vector<VerificationNote> notes;
769         State state;
770
771         vector<shared_ptr<DCP>> dcps;
772         for (auto i: directories) {
773                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
774         }
775
776         for (auto dcp: dcps) {
777                 stage ("Checking DCP", dcp->directory());
778                 try {
779                         dcp->read (&notes);
780                 } catch (ReadError& e) {
781                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
782                 } catch (XMLError& e) {
783                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
784                 } catch (MXFFileError& e) {
785                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
786                 } catch (cxml::Error& e) {
787                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
788                 }
789
790                 if (dcp->standard() != dcp::SMPTE) {
791                         notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::NOT_SMPTE));
792                 }
793
794                 for (auto cpl: dcp->cpls()) {
795                         stage ("Checking CPL", cpl->file());
796                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
797
798                         for (auto const& i: cpl->additional_subtitle_languages()) {
799                                 verify_language_tag (i, notes);
800                         }
801
802                         if (cpl->release_territory()) {
803                                 verify_language_tag (cpl->release_territory().get(), notes);
804                         }
805
806                         /* Check that the CPL's hash corresponds to the PKL */
807                         for (auto i: dcp->pkls()) {
808                                 optional<string> h = i->hash(cpl->id());
809                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
810                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
811                                 }
812                         }
813
814                         bool first_reel = true;
815                         for (auto reel: cpl->reels()) {
816                                 stage ("Checking reel", optional<boost::filesystem::path>());
817
818                                 for (auto i: reel->assets()) {
819                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
820                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::DURATION_TOO_SMALL, i->id()));
821                                         }
822                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
823                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INTRINSIC_DURATION_TOO_SMALL, i->id()));
824                                         }
825                                 }
826
827                                 if (reel->main_picture()) {
828                                         /* Check reel stuff */
829                                         auto const frame_rate = reel->main_picture()->frame_rate();
830                                         if (frame_rate.denominator != 1 ||
831                                             (frame_rate.numerator != 24 &&
832                                              frame_rate.numerator != 25 &&
833                                              frame_rate.numerator != 30 &&
834                                              frame_rate.numerator != 48 &&
835                                              frame_rate.numerator != 50 &&
836                                              frame_rate.numerator != 60 &&
837                                              frame_rate.numerator != 96)) {
838                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_RATE));
839                                         }
840                                         /* Check asset */
841                                         if (reel->main_picture()->asset_ref().resolved()) {
842                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
843                                         }
844                                 }
845
846                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
847                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
848                                 }
849
850                                 if (reel->main_subtitle()) {
851                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
852                                         if (reel->main_subtitle()->asset_ref().resolved()) {
853                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, xsd_dtd_directory, notes, state, first_reel);
854                                         }
855                                 }
856
857                                 for (auto i: reel->closed_captions()) {
858                                         verify_closed_caption_reel (i, notes);
859                                         if (i->asset_ref().resolved()) {
860                                                 verify_closed_caption_asset (i->asset(), stage, xsd_dtd_directory, notes, state, first_reel);
861                                         }
862                                 }
863
864                                 first_reel = false;
865                         }
866                 }
867
868                 for (auto pkl: dcp->pkls()) {
869                         stage ("Checking PKL", pkl->file());
870                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
871                 }
872
873                 if (dcp->asset_map_path()) {
874                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
875                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
876                 } else {
877                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::MISSING_ASSETMAP));
878                 }
879         }
880
881         return notes;
882 }
883
884 string
885 dcp::note_to_string (dcp::VerificationNote note)
886 {
887         switch (note.code()) {
888         case dcp::VerificationNote::GENERAL_READ:
889                 return *note.note();
890         case dcp::VerificationNote::CPL_HASH_INCORRECT:
891                 return "The hash of the CPL in the PKL does not agree with the CPL file.";
892         case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
893                 return "The picture in a reel has an invalid frame rate.";
894         case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
895                 return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
896         case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DIFFER:
897                 return dcp::String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
898         case dcp::VerificationNote::SOUND_HASH_INCORRECT:
899                 return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
900         case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DIFFER:
901                 return dcp::String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
902         case dcp::VerificationNote::EMPTY_ASSET_PATH:
903                 return "The asset map contains an empty asset path.";
904         case dcp::VerificationNote::MISSING_ASSET:
905                 return String::compose("The file for an asset in the asset map cannot be found; missing file is %1.", note.file()->filename());
906         case dcp::VerificationNote::MISMATCHED_STANDARD:
907                 return "The DCP contains both SMPTE and Interop parts.";
908         case dcp::VerificationNote::XML_VALIDATION_ERROR:
909                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
910         case dcp::VerificationNote::MISSING_ASSETMAP:
911                 return "No ASSETMAP or ASSETMAP.xml was found.";
912         case dcp::VerificationNote::INTRINSIC_DURATION_TOO_SMALL:
913                 return String::compose("The intrinsic duration of an asset is less than 1 second long: %1", note.note().get());
914         case dcp::VerificationNote::DURATION_TOO_SMALL:
915                 return String::compose("The duration of an asset is less than 1 second long: %1", note.note().get());
916         case dcp::VerificationNote::PICTURE_FRAME_TOO_LARGE_IN_BYTES:
917                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
918         case dcp::VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE_IN_BYTES:
919                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
920         case dcp::VerificationNote::EXTERNAL_ASSET:
921                 return String::compose("An asset that this DCP refers to is not included in the DCP.  It may be a VF.  Missing asset is %1.", note.note().get());
922         case dcp::VerificationNote::NOT_SMPTE:
923                 return "This DCP does not use the SMPTE standard, which is required for Bv2.1 compliance.";
924         case dcp::VerificationNote::BAD_LANGUAGE:
925                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
926         case dcp::VerificationNote::PICTURE_ASSET_INVALID_SIZE_IN_PIXELS:
927                 return String::compose("A picture asset's size (%1) is not one of those allowed by Bv2.1 (2048x858, 1998x1080, 4096x1716 or 3996x2160)", note.note().get());
928         case dcp::VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_2K:
929                 return String::compose("A picture asset's frame rate (%1) is not one of those allowed for 2K DCPs by Bv2.1 (24, 25 or 48fps)", note.note().get());
930         case dcp::VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_4K:
931                 return String::compose("A picture asset's frame rate (%1) is not 24fps as required for 4K DCPs by Bv2.1", note.note().get());
932         case dcp::VerificationNote::PICTURE_ASSET_4K_3D:
933                 return "3D 4K DCPs are not allowed by Bv2.1";
934         case dcp::VerificationNote::CLOSED_CAPTION_XML_TOO_LARGE_IN_BYTES:
935                 return String::compose("The XML for the closed caption asset %1 is longer than the 256KB maximum required by Bv2.1", note.file()->filename());
936         case dcp::VerificationNote::TIMED_TEXT_ASSET_TOO_LARGE_IN_BYTES:
937                 return String::compose("The total size of the timed text asset %1 is larger than the 115MB maximum required by Bv2.1", note.file()->filename());
938         case dcp::VerificationNote::TIMED_TEXT_FONTS_TOO_LARGE_IN_BYTES:
939                 return String::compose("The total size of the fonts in timed text asset %1 is larger than the 10MB maximum required by Bv2.1", note.file()->filename());
940         case dcp::VerificationNote::MISSING_SUBTITLE_LANGUAGE:
941                 return String::compose("The XML for a SMPTE subtitle asset has no <Language> tag, which is required by Bv2.1", note.file()->filename());
942         case dcp::VerificationNote::SUBTITLE_LANGUAGES_DIFFER:
943                 return String::compose("Some subtitle assets have different <Language> tags than others", note.file()->filename());
944         case dcp::VerificationNote::MISSING_SUBTITLE_START_TIME:
945                 return String::compose("The XML for a SMPTE subtitle asset has no <StartTime> tag, which is required by Bv2.1", note.file()->filename());
946         case dcp::VerificationNote::SUBTITLE_START_TIME_NON_ZERO:
947                 return String::compose("The XML for a SMPTE subtitle asset has a non-zero <StartTime> tag, which is disallowed by Bv2.1", note.file()->filename());
948         case dcp::VerificationNote::FIRST_TEXT_TOO_EARLY:
949                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
950         }
951
952         return "";
953 }