When reading a DCP set up asset hashes from the CPL/PKL, not by digesting the actual...
authorCarl Hetherington <cth@carlh.net>
Mon, 26 Jun 2023 22:11:43 +0000 (00:11 +0200)
committerCarl Hetherington <cth@carlh.net>
Wed, 28 Jun 2023 23:25:40 +0000 (01:25 +0200)
Previously when reading a DCP we would recalculate asset hashes.  This meant that corrupted
DCPs could be loaded in and their hashes re-written (if nobody checked the new hashes against
the ones in the PKL/CPL).

It seems better to take the hashes from the metadata.  Then if the assets are corrupted
and the DCP is re-written the PKL/CPL hashes will be preserved, showing rather than hiding
the corruption.

src/dcp.cc
src/verify.cc
test/dcp_test.cc

index 0d0dc69087a37e4399812bc71180843c06bb274d..3b267090580e2b1b97bd0e6dadc9e43ebb5c2f77 100644 (file)
@@ -270,6 +270,48 @@ DCP::read (vector<dcp::VerificationNote>* notes, bool ignore_incorrect_picture_m
                }
        }
 
+       /* Set hashes for assets where we have an idea of what the hash should be in either a CPL or PKL.
+        * This means that when the hash is later read from these objects the result will be the one that
+        * it should be, rather the one that it currently is.  This should prevent errors being concealed
+        * when an asset is corrupted - the hash from the CPL/PKL will disagree with the actual hash of the
+        * file, revealing the problem.
+        */
+
+       auto hash_from_pkl = [this](string id) -> optional<string> {
+               for (auto pkl: _pkls) {
+                       if (auto pkl_hash = pkl->hash(id)) {
+                               return pkl_hash;
+                       }
+               }
+
+               return {};
+       };
+
+       auto hash_from_cpl_or_pkl = [this, &hash_from_pkl](string id) -> optional<string> {
+               for (auto cpl: cpls()) {
+                       for (auto reel_file_asset: cpl->reel_file_assets()) {
+                               if (reel_file_asset->asset_ref().id() == id && reel_file_asset->hash()) {
+                                       return reel_file_asset->hash();
+                               }
+                       }
+               }
+
+               return hash_from_pkl(id);
+       };
+
+       for (auto asset: other_assets) {
+               if (auto hash = hash_from_cpl_or_pkl(asset->id())) {
+                       asset->set_hash(*hash);
+               }
+       }
+
+       for (auto cpl: cpls()) {
+               if (auto hash = hash_from_pkl(cpl->id())) {
+                       cpl->set_hash(*hash);
+               }
+       }
+
+       /* Resolve references */
        resolve_refs (other_assets);
 
        /* While we've got the ASSETMAP lets look and see if this DCP refers to things that are not in its ASSETMAP */
index 394326fe24b08063a3c743ca0a404fa2eb8dcdc1..8f318af9f2674f47de2615de1a6605f6f98d35a4 100644 (file)
@@ -386,6 +386,12 @@ enum class VerifyAssetResult {
 static VerifyAssetResult
 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
 {
+       /* When reading the DCP the hash will have been set to the one from the PKL/CPL.
+        * We want to calculate the hash of the actual file contents here, so that we
+        * can check it.  unset_hash() means that this calculation will happen on the
+        * call to hash().
+        */
+       reel_file_asset->asset_ref()->unset_hash();
        auto const actual_hash = reel_file_asset->asset_ref()->hash(progress);
 
        auto pkls = dcp->pkls();
index 9320d4f10df848a43d5fb5e3f9191c42f018b3da..51ce7961268ed461a23494d48824dfb4d0f5d2a2 100644 (file)
@@ -500,3 +500,46 @@ BOOST_AUTO_TEST_CASE (dcp_add_kdm_test)
        BOOST_CHECK_EQUAL (reels[2]->_kdms[0].keys()[1].id(), kdm_2_uuid_2);
 }
 
+
+BOOST_AUTO_TEST_CASE(hashes_preserved_when_loading_corrupted_dcp)
+{
+       boost::filesystem::path const dir = "build/test/hashes_preserved_when_loading_corrupted_dcp";
+       boost::filesystem::remove_all(dir);
+
+       auto dcp = make_simple(dir / "1");
+       dcp->write_xml();
+
+       auto asset_1_id = dcp::MonoPictureAsset(dir / "1" / "video.mxf").id();
+       auto asset_1_hash = dcp::MonoPictureAsset(dir / "1" / "video.mxf").hash();
+
+       /* Replace the hash in the CPL (the one that corresponds to the actual file)
+        * with an incorrect one new_hash.
+        */
+       string new_hash;
+       {
+               Editor editor(find_file(dir / "1", "cpl_"));
+               auto const after = "<Duration>24</Duration>";
+               editor.delete_lines_after(after, 1);
+
+               if (asset_1_hash[0] == 'A') {
+                       new_hash = 'B' + asset_1_hash.substr(1);
+               } else {
+                       new_hash = 'A' + asset_1_hash.substr(1);
+               }
+
+               editor.insert(after, dcp::String::compose("      <Hash>%1</Hash>", new_hash));
+       }
+
+       dcp::DCP read_back(dir / "1");
+       read_back.read();
+
+       BOOST_REQUIRE_EQUAL(read_back.cpls().size(), 1U);
+       auto cpl = read_back.cpls()[0];
+       BOOST_REQUIRE_EQUAL(cpl->reels().size(), 1U);
+       auto reel = cpl->reels()[0];
+       BOOST_REQUIRE(reel->main_picture());
+       /* Now the asset should think it has the wrong hash written to the PKL file; it shouldn't have
+        * checked the file again.
+        */
+       BOOST_CHECK_EQUAL(reel->main_picture()->asset_ref()->hash(), new_hash);
+}