diff options
| author | Carl Hetherington <cth@carlh.net> | 2022-05-23 19:53:17 +0200 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2022-05-23 19:53:17 +0200 |
| commit | 692ef68b721c57fdd07de83adbfccaa93a903f30 (patch) | |
| tree | 049371c9026c9d701c9bc22bbf6358ce5b4aaad5 /src/lib | |
| parent | c4578e1197744272e78d5a19c92bc43e0e92e4cb (diff) | |
Try using streams.
Diffstat (limited to 'src/lib')
| -rw-r--r-- | src/lib/cuda_j2k_frame_encoder.cc | 27 | ||||
| -rw-r--r-- | src/lib/cuda_j2k_frame_encoder.h | 4 |
2 files changed, 16 insertions, 15 deletions
diff --git a/src/lib/cuda_j2k_frame_encoder.cc b/src/lib/cuda_j2k_frame_encoder.cc index 7fba7b58e..6aa894b87 100644 --- a/src/lib/cuda_j2k_frame_encoder.cc +++ b/src/lib/cuda_j2k_frame_encoder.cc @@ -45,19 +45,17 @@ CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder() } -CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf) +CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf, cudaStream_t stream) : _index(vf.index()) , _eyes(vf.eyes()) { - auto xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2)); + _xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2)); - void* pixel_data_h[] = { - xyz->data(0), - xyz->data(1), - xyz->data(2) - }; + for (int i = 0; i < 3; ++i) { + _pixel_data_h[i] = reinterpret_cast<uint8_t*>(_xyz->data(i)); + } - auto const pitch = xyz->size().width * 2; + auto const pitch = _xyz->size().width * 2; for (int i = 0; i < 3; ++i) { _pitch_in_bytes[i] = pitch; @@ -65,21 +63,22 @@ CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf) reinterpret_cast<void**>(&_pixel_data_d[i]), &_pitch_in_bytes[i], pitch, - xyz->size().height + _xyz->size().height ); if (status != cudaSuccess) { throw CUDAError("cudaMallocPitch", status); } - status = cudaMemcpy2D( + status = cudaMemcpy2DAsync( _pixel_data_d[i], _pitch_in_bytes[i], - pixel_data_h[i], + _pixel_data_h[i], _pitch_in_bytes[i], pitch, - xyz->size().height, - cudaMemcpyHostToDevice + _xyz->size().height, + cudaMemcpyHostToDevice, + stream ); if (status != cudaSuccess) { @@ -122,7 +121,7 @@ CUDAJ2KFrameEncoder::Input::~Input() optional<dcp::ArrayData> CUDAJ2KFrameEncoder::encode(DCPVideo const& vf) { - auto input = Input(vf); + auto input = Input(vf, _stream); auto const size = vf.frame()->out_size(); DCPOMATIC_ASSERT(!_size || size == *_size); diff --git a/src/lib/cuda_j2k_frame_encoder.h b/src/lib/cuda_j2k_frame_encoder.h index b17ad69d9..5d1aeef07 100644 --- a/src/lib/cuda_j2k_frame_encoder.h +++ b/src/lib/cuda_j2k_frame_encoder.h @@ -52,7 +52,7 @@ private: class Input { public: - Input(DCPVideo const& vf); + Input(DCPVideo const& vf, cudaStream_t stream); Input(Input const& other) = delete; Input(Input&& other); ~Input(); @@ -72,6 +72,8 @@ private: } private: + std::shared_ptr<dcp::OpenJPEGImage> _xyz; + uint8_t* _pixel_data_h[3]; uint8_t* _pixel_data_d[3]; size_t _pitch_in_bytes[3]; nvjpeg2kImage_t _device_image; |
