summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authorCarl Hetherington <cth@carlh.net>2022-05-23 19:53:17 +0200
committerCarl Hetherington <cth@carlh.net>2022-05-23 19:53:17 +0200
commit692ef68b721c57fdd07de83adbfccaa93a903f30 (patch)
tree049371c9026c9d701c9bc22bbf6358ce5b4aaad5 /src/lib
parentc4578e1197744272e78d5a19c92bc43e0e92e4cb (diff)
Try using streams.
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/cuda_j2k_frame_encoder.cc27
-rw-r--r--src/lib/cuda_j2k_frame_encoder.h4
2 files changed, 16 insertions, 15 deletions
diff --git a/src/lib/cuda_j2k_frame_encoder.cc b/src/lib/cuda_j2k_frame_encoder.cc
index 7fba7b58e..6aa894b87 100644
--- a/src/lib/cuda_j2k_frame_encoder.cc
+++ b/src/lib/cuda_j2k_frame_encoder.cc
@@ -45,19 +45,17 @@ CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder()
}
-CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf)
+CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf, cudaStream_t stream)
: _index(vf.index())
, _eyes(vf.eyes())
{
- auto xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
+ _xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
- void* pixel_data_h[] = {
- xyz->data(0),
- xyz->data(1),
- xyz->data(2)
- };
+ for (int i = 0; i < 3; ++i) {
+ _pixel_data_h[i] = reinterpret_cast<uint8_t*>(_xyz->data(i));
+ }
- auto const pitch = xyz->size().width * 2;
+ auto const pitch = _xyz->size().width * 2;
for (int i = 0; i < 3; ++i) {
_pitch_in_bytes[i] = pitch;
@@ -65,21 +63,22 @@ CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf)
reinterpret_cast<void**>(&_pixel_data_d[i]),
&_pitch_in_bytes[i],
pitch,
- xyz->size().height
+ _xyz->size().height
);
if (status != cudaSuccess) {
throw CUDAError("cudaMallocPitch", status);
}
- status = cudaMemcpy2D(
+ status = cudaMemcpy2DAsync(
_pixel_data_d[i],
_pitch_in_bytes[i],
- pixel_data_h[i],
+ _pixel_data_h[i],
_pitch_in_bytes[i],
pitch,
- xyz->size().height,
- cudaMemcpyHostToDevice
+ _xyz->size().height,
+ cudaMemcpyHostToDevice,
+ stream
);
if (status != cudaSuccess) {
@@ -122,7 +121,7 @@ CUDAJ2KFrameEncoder::Input::~Input()
optional<dcp::ArrayData>
CUDAJ2KFrameEncoder::encode(DCPVideo const& vf)
{
- auto input = Input(vf);
+ auto input = Input(vf, _stream);
auto const size = vf.frame()->out_size();
DCPOMATIC_ASSERT(!_size || size == *_size);
diff --git a/src/lib/cuda_j2k_frame_encoder.h b/src/lib/cuda_j2k_frame_encoder.h
index b17ad69d9..5d1aeef07 100644
--- a/src/lib/cuda_j2k_frame_encoder.h
+++ b/src/lib/cuda_j2k_frame_encoder.h
@@ -52,7 +52,7 @@ private:
class Input
{
public:
- Input(DCPVideo const& vf);
+ Input(DCPVideo const& vf, cudaStream_t stream);
Input(Input const& other) = delete;
Input(Input&& other);
~Input();
@@ -72,6 +72,8 @@ private:
}
private:
+ std::shared_ptr<dcp::OpenJPEGImage> _xyz;
+ uint8_t* _pixel_data_h[3];
uint8_t* _pixel_data_d[3];
size_t _pitch_in_bytes[3];
nvjpeg2kImage_t _device_image;