#include "player_video.h"
#include <dcp/openjpeg_image.h>
#include <nvjpeg2k.h>
+#include <vector>
+using std::vector;
using boost::optional;
nvjpeg2kEncoderCreateSimple(&_encoder_handle);
nvjpeg2kEncodeStateCreate(_encoder_handle, &_encoder_state);
nvjpeg2kEncodeParamsCreate(&_encoder_params);
-
- _pixel_data_d[0] = _pixel_data_d[1] = _pixel_data_d[2] = nullptr;
}
-void
-CUDAJ2KFrameEncoder::allocate(dcp::Size size)
+CUDAJ2KFrameEncoder::Frame::Frame(DCPVideo const& vf)
{
- for (int i = 0; i < 3; ++i) {
- _image_component_info[i].component_width = size.width;
- _image_component_info[i].component_height = size.height;
- _image_component_info[i].precision = 12;
- _image_component_info[i].sgn = 0;
- _pitch_in_bytes[i] = size.width * 2;
- }
+ auto xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
+
+ void* pixel_data_h[] = {
+ xyz->data(0),
+ xyz->data(1),
+ xyz->data(2)
+ };
+
+ auto const pitch = xyz->size().width * 2;
for (int i = 0; i < 3; ++i) {
- auto error = cudaMallocPitch(
+ _pitch_in_bytes[i] = pitch;
+ auto status = cudaMallocPitch(
reinterpret_cast<void**>(&_pixel_data_d[i]),
&_pitch_in_bytes[i],
- _image_component_info[i].component_width * 2,
- _image_component_info[i].component_height
+ pitch,
+ xyz->size().height
);
- if (error != cudaSuccess) {
- throw CUDAError("cudaMallocPitch", error);
+
+ if (status != cudaSuccess) {
+ throw CUDAError("cudaMallocPitch", status);
}
- }
- _allocated_size = size;
-}
+ status = cudaMemcpy2D(
+ _pixel_data_d[i],
+ _pitch_in_bytes[i],
+ pixel_data_h[i],
+ _pitch_in_bytes[i],
+ pitch,
+ xyz->size().height,
+ cudaMemcpyHostToDevice
+ );
+ if (status != cudaSuccess) {
+ throw CUDAError("cudaMemcpy2D", status);
+ }
-CUDAJ2KFrameEncoder::~CUDAJ2KFrameEncoder()
-{
- free();
+ cudaDeviceSynchronize();
+ }
+
+ _device_image.num_components = 3;
+ _device_image.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
+ _device_image.pixel_type = NVJPEG2K_UINT16;
+ _device_image.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
}
-void
-CUDAJ2KFrameEncoder::free()
+CUDAJ2KFrameEncoder::Frame::Frame(Frame&& other)
{
for (int i = 0; i < 3; ++i) {
- cudaFree(&_pixel_data_d[i]);
+ _pixel_data_d[i] = other._pixel_data_d[i];
+ other._pixel_data_d[i] = nullptr;
+ _pitch_in_bytes[i] = other._pitch_in_bytes[i];
}
- _pixel_data_d[0] = _pixel_data_d[1] = _pixel_data_d[2] = nullptr;
+ _device_image.num_components = other._device_image.num_components;
+ _device_image.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
+ _device_image.pixel_type = NVJPEG2K_UINT16;
+ _device_image.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
}
-optional<dcp::ArrayData>
-CUDAJ2KFrameEncoder::encode(DCPVideo const& vf)
+CUDAJ2KFrameEncoder::Frame::~Frame()
{
- std::cout << "cuda encode starts.\n";
+ cudaFree(_pixel_data_d[0]);
+ cudaFree(_pixel_data_d[1]);
+ cudaFree(_pixel_data_d[2]);
+}
- auto frame = vf.frame();
- auto size = frame->out_size();
- if (size != _allocated_size) {
- free();
- allocate(size);
- }
+vector<dcp::ArrayData>
+CUDAJ2KFrameEncoder::encode(DCPVideo const& vf)
+{
+ int constexpr BATCH_SIZE = 128;
- auto xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
+ auto const size = vf.frame()->out_size();
+ DCPOMATIC_ASSERT(!_size || size == *_size);
+ _size = size;
- for (int i = 0; i < 3; ++i) {
- _pixel_data_h[i] = reinterpret_cast<uint8_t*>(xyz->data(i));
+ DCPOMATIC_ASSERT(!_resolution || vf.resolution() == *_resolution);
+ _resolution = vf.resolution();
+
+ if (_batch.size() < BATCH_SIZE) {
+ _batch.push_back(Frame(vf));
+ return {};
}
- nvjpeg2kImage_t image_h;
- image_h.num_components = 3;
- image_h.pixel_data = reinterpret_cast<void**>(_pixel_data_h);
- image_h.pixel_type = NVJPEG2K_UINT16;
- image_h.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
+ return flush();
+}
+
- nvjpeg2kImage_t image_d;
- image_d.num_components = 3;
- image_d.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
- image_d.pixel_type = NVJPEG2K_UINT16;
- image_d.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
+vector<dcp::ArrayData>
+CUDAJ2KFrameEncoder::flush()
+{
+ if (_batch.empty()) {
+ return {};
+ }
+ nvjpeg2kImageComponentInfo_t info[3];
for (int i = 0; i < 3; ++i) {
- auto status = cudaMemcpy2D(
- image_d.pixel_data[i],
- image_d.pitch_in_bytes[i],
- image_h.pixel_data[i],
- image_h.pitch_in_bytes[i],
- 2 * _image_component_info[i].component_width,
- _image_component_info[i].component_height,
- cudaMemcpyHostToDevice
- );
- if (status != cudaSuccess) {
- throw CUDAError("cudaMemcpy2D", status);
- }
- cudaDeviceSynchronize();
+ info[i].component_width = _size->width;
+ info[i].component_height = _size->height;
+ info[i].precision = 12;
+ info[i].sgn = 0;
}
nvjpeg2kEncodeConfig_t config;
memset(&config, 0, sizeof(config));
config.stream_type = NVJPEG2K_STREAM_J2K;
config.color_space = NVJPEG2K_COLORSPACE_SRGB;
- config.image_width = size.width;
- config.image_height = size.height;
+ config.image_width = _size->width;
+ config.image_height = _size->height;
config.num_components = 3;
- config.image_comp_info = reinterpret_cast<nvjpeg2kImageComponentInfo_t*>(&_image_component_info);
+ config.image_comp_info = reinterpret_cast<nvjpeg2kImageComponentInfo_t*>(&info);
config.code_block_w = 32;
config.code_block_h = 32;
config.irreversible = 0;
config.mct_mode = 1;
config.prog_order = NVJPEG2K_CPRL;
- config.num_resolutions = vf.resolution() == Resolution::FOUR_K ? 7 : 6;
+ config.num_resolutions = *_resolution == Resolution::FOUR_K ? 7 : 6;
auto status = nvjpeg2kEncodeParamsSetEncodeConfig(_encoder_params, &config);
if (status != NVJPEG2K_STATUS_SUCCESS) {
throw CUDAError("nvjpeg2kEncodeParamsSetQuality", status);
}
- status = nvjpeg2kEncode(_encoder_handle, _encoder_state, _encoder_params, &image_d, 0);
- if (status != NVJPEG2K_STATUS_SUCCESS) {
- throw CUDAError("nvjpeg2kEncode", status);
- }
+ vector<dcp::ArrayData> output;
- size_t compressed_size;
- status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, nullptr, &compressed_size, 0);
+ for (auto const& frame: _batch) {
- dcp::ArrayData output(compressed_size);
- status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, output.data(), &compressed_size, 0);
- cudaDeviceSynchronize();
+ auto x = frame.device_image();
- if (status != NVJPEG2K_STATUS_SUCCESS) {
- throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status);
+ status = nvjpeg2kEncode(_encoder_handle, _encoder_state, _encoder_params, frame.device_image(), 0);
+ if (status != NVJPEG2K_STATUS_SUCCESS) {
+ throw CUDAError("nvjpeg2kEncode", status);
+ }
+
+ size_t compressed_size;
+ status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, nullptr, &compressed_size, 0);
+
+ dcp::ArrayData this_output(compressed_size);
+ status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, this_output.data(), &compressed_size, 0);
+ if (status != NVJPEG2K_STATUS_SUCCESS) {
+ throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status);
+ }
+
+ output.push_back(this_output);
+ cudaStreamSynchronize(0);
}
- std::cout << "cuda encode ends with " << status << " " << output.size() << "\n";
+ _batch.clear();
return output;
}