From: Carl Hetherington Date: Mon, 26 Jun 2023 22:11:43 +0000 (+0200) Subject: When reading a DCP set up asset hashes from the CPL/PKL, not by digesting the actual... X-Git-Tag: v1.8.74~1 X-Git-Url: https://git.carlh.net/gitweb/?p=libdcp.git;a=commitdiff_plain;h=e702623781c8d5853b79a29ca8c5f495d0ade3d1 When reading a DCP set up asset hashes from the CPL/PKL, not by digesting the actual asset. Previously when reading a DCP we would recalculate asset hashes. This meant that corrupted DCPs could be loaded in and their hashes re-written (if nobody checked the new hashes against the ones in the PKL/CPL). It seems better to take the hashes from the metadata. Then if the assets are corrupted and the DCP is re-written the PKL/CPL hashes will be preserved, showing rather than hiding the corruption. --- diff --git a/src/dcp.cc b/src/dcp.cc index 0d0dc690..3b267090 100644 --- a/src/dcp.cc +++ b/src/dcp.cc @@ -270,6 +270,48 @@ DCP::read (vector* notes, bool ignore_incorrect_picture_m } } + /* Set hashes for assets where we have an idea of what the hash should be in either a CPL or PKL. + * This means that when the hash is later read from these objects the result will be the one that + * it should be, rather the one that it currently is. This should prevent errors being concealed + * when an asset is corrupted - the hash from the CPL/PKL will disagree with the actual hash of the + * file, revealing the problem. + */ + + auto hash_from_pkl = [this](string id) -> optional { + for (auto pkl: _pkls) { + if (auto pkl_hash = pkl->hash(id)) { + return pkl_hash; + } + } + + return {}; + }; + + auto hash_from_cpl_or_pkl = [this, &hash_from_pkl](string id) -> optional { + for (auto cpl: cpls()) { + for (auto reel_file_asset: cpl->reel_file_assets()) { + if (reel_file_asset->asset_ref().id() == id && reel_file_asset->hash()) { + return reel_file_asset->hash(); + } + } + } + + return hash_from_pkl(id); + }; + + for (auto asset: other_assets) { + if (auto hash = hash_from_cpl_or_pkl(asset->id())) { + asset->set_hash(*hash); + } + } + + for (auto cpl: cpls()) { + if (auto hash = hash_from_pkl(cpl->id())) { + cpl->set_hash(*hash); + } + } + + /* Resolve references */ resolve_refs (other_assets); /* While we've got the ASSETMAP lets look and see if this DCP refers to things that are not in its ASSETMAP */ diff --git a/src/verify.cc b/src/verify.cc index 394326fe..8f318af9 100644 --- a/src/verify.cc +++ b/src/verify.cc @@ -386,6 +386,12 @@ enum class VerifyAssetResult { static VerifyAssetResult verify_asset (shared_ptr dcp, shared_ptr reel_file_asset, function progress) { + /* When reading the DCP the hash will have been set to the one from the PKL/CPL. + * We want to calculate the hash of the actual file contents here, so that we + * can check it. unset_hash() means that this calculation will happen on the + * call to hash(). + */ + reel_file_asset->asset_ref()->unset_hash(); auto const actual_hash = reel_file_asset->asset_ref()->hash(progress); auto pkls = dcp->pkls(); diff --git a/test/dcp_test.cc b/test/dcp_test.cc index 9320d4f1..51ce7961 100644 --- a/test/dcp_test.cc +++ b/test/dcp_test.cc @@ -500,3 +500,46 @@ BOOST_AUTO_TEST_CASE (dcp_add_kdm_test) BOOST_CHECK_EQUAL (reels[2]->_kdms[0].keys()[1].id(), kdm_2_uuid_2); } + +BOOST_AUTO_TEST_CASE(hashes_preserved_when_loading_corrupted_dcp) +{ + boost::filesystem::path const dir = "build/test/hashes_preserved_when_loading_corrupted_dcp"; + boost::filesystem::remove_all(dir); + + auto dcp = make_simple(dir / "1"); + dcp->write_xml(); + + auto asset_1_id = dcp::MonoPictureAsset(dir / "1" / "video.mxf").id(); + auto asset_1_hash = dcp::MonoPictureAsset(dir / "1" / "video.mxf").hash(); + + /* Replace the hash in the CPL (the one that corresponds to the actual file) + * with an incorrect one new_hash. + */ + string new_hash; + { + Editor editor(find_file(dir / "1", "cpl_")); + auto const after = "24"; + editor.delete_lines_after(after, 1); + + if (asset_1_hash[0] == 'A') { + new_hash = 'B' + asset_1_hash.substr(1); + } else { + new_hash = 'A' + asset_1_hash.substr(1); + } + + editor.insert(after, dcp::String::compose(" %1", new_hash)); + } + + dcp::DCP read_back(dir / "1"); + read_back.read(); + + BOOST_REQUIRE_EQUAL(read_back.cpls().size(), 1U); + auto cpl = read_back.cpls()[0]; + BOOST_REQUIRE_EQUAL(cpl->reels().size(), 1U); + auto reel = cpl->reels()[0]; + BOOST_REQUIRE(reel->main_picture()); + /* Now the asset should think it has the wrong hash written to the PKL file; it shouldn't have + * checked the file again. + */ + BOOST_CHECK_EQUAL(reel->main_picture()->asset_ref()->hash(), new_hash); +}