}
-CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf)
+CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf, cudaStream_t stream)
: _index(vf.index())
, _eyes(vf.eyes())
{
- auto xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
+ _xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
- void* pixel_data_h[] = {
- xyz->data(0),
- xyz->data(1),
- xyz->data(2)
- };
+ for (int i = 0; i < 3; ++i) {
+ _pixel_data_h[i] = reinterpret_cast<uint8_t*>(_xyz->data(i));
+ }
- auto const pitch = xyz->size().width * 2;
+ auto const pitch = _xyz->size().width * 2;
for (int i = 0; i < 3; ++i) {
_pitch_in_bytes[i] = pitch;
reinterpret_cast<void**>(&_pixel_data_d[i]),
&_pitch_in_bytes[i],
pitch,
- xyz->size().height
+ _xyz->size().height
);
if (status != cudaSuccess) {
throw CUDAError("cudaMallocPitch", status);
}
- status = cudaMemcpy2D(
+ status = cudaMemcpy2DAsync(
_pixel_data_d[i],
_pitch_in_bytes[i],
- pixel_data_h[i],
+ _pixel_data_h[i],
_pitch_in_bytes[i],
pitch,
- xyz->size().height,
- cudaMemcpyHostToDevice
+ _xyz->size().height,
+ cudaMemcpyHostToDevice,
+ stream
);
if (status != cudaSuccess) {
optional<dcp::ArrayData>
CUDAJ2KFrameEncoder::encode(DCPVideo const& vf)
{
- auto input = Input(vf);
+ auto input = Input(vf, _stream);
auto const size = vf.frame()->out_size();
DCPOMATIC_ASSERT(!_size || size == *_size);
class Input
{
public:
- Input(DCPVideo const& vf);
+ Input(DCPVideo const& vf, cudaStream_t stream);
Input(Input const& other) = delete;
Input(Input&& other);
~Input();
}
private:
+ std::shared_ptr<dcp::OpenJPEGImage> _xyz;
+ uint8_t* _pixel_data_h[3];
uint8_t* _pixel_data_d[3];
size_t _pitch_in_bytes[3];
nvjpeg2kImage_t _device_image;