diff options
| -rw-r--r-- | src/lib/cuda_j2k_frame_encoder.cc | 27 | ||||
| -rw-r--r-- | src/lib/cuda_j2k_frame_encoder.h | 4 |
2 files changed, 16 insertions, 15 deletions
diff --git a/src/lib/cuda_j2k_frame_encoder.cc b/src/lib/cuda_j2k_frame_encoder.cc index 7fba7b58e..6aa894b87 100644 --- a/src/lib/cuda_j2k_frame_encoder.cc +++ b/src/lib/cuda_j2k_frame_encoder.cc @@ -45,19 +45,17 @@ CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder() } -CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf) +CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf, cudaStream_t stream) : _index(vf.index()) , _eyes(vf.eyes()) { - auto xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2)); + _xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2)); - void* pixel_data_h[] = { - xyz->data(0), - xyz->data(1), - xyz->data(2) - }; + for (int i = 0; i < 3; ++i) { + _pixel_data_h[i] = reinterpret_cast<uint8_t*>(_xyz->data(i)); + } - auto const pitch = xyz->size().width * 2; + auto const pitch = _xyz->size().width * 2; for (int i = 0; i < 3; ++i) { _pitch_in_bytes[i] = pitch; @@ -65,21 +63,22 @@ CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf) reinterpret_cast<void**>(&_pixel_data_d[i]), &_pitch_in_bytes[i], pitch, - xyz->size().height + _xyz->size().height ); if (status != cudaSuccess) { throw CUDAError("cudaMallocPitch", status); } - status = cudaMemcpy2D( + status = cudaMemcpy2DAsync( _pixel_data_d[i], _pitch_in_bytes[i], - pixel_data_h[i], + _pixel_data_h[i], _pitch_in_bytes[i], pitch, - xyz->size().height, - cudaMemcpyHostToDevice + _xyz->size().height, + cudaMemcpyHostToDevice, + stream ); if (status != cudaSuccess) { @@ -122,7 +121,7 @@ CUDAJ2KFrameEncoder::Input::~Input() optional<dcp::ArrayData> CUDAJ2KFrameEncoder::encode(DCPVideo const& vf) { - auto input = Input(vf); + auto input = Input(vf, _stream); auto const size = vf.frame()->out_size(); DCPOMATIC_ASSERT(!_size || size == *_size); diff --git a/src/lib/cuda_j2k_frame_encoder.h b/src/lib/cuda_j2k_frame_encoder.h index b17ad69d9..5d1aeef07 100644 --- a/src/lib/cuda_j2k_frame_encoder.h +++ b/src/lib/cuda_j2k_frame_encoder.h @@ -52,7 +52,7 @@ private: class Input { public: - Input(DCPVideo const& vf); + Input(DCPVideo const& vf, cudaStream_t stream); Input(Input const& other) = delete; Input(Input&& other); ~Input(); @@ -72,6 +72,8 @@ private: } private: + std::shared_ptr<dcp::OpenJPEGImage> _xyz; + uint8_t* _pixel_data_h[3]; uint8_t* _pixel_data_d[3]; size_t _pitch_in_bytes[3]; nvjpeg2kImage_t _device_image; |
