summaryrefslogtreecommitdiff
path: root/src/lib/cuda_j2k_encoder_thread.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/cuda_j2k_encoder_thread.cc')
-rw-r--r--src/lib/cuda_j2k_encoder_thread.cc207
1 files changed, 207 insertions, 0 deletions
diff --git a/src/lib/cuda_j2k_encoder_thread.cc b/src/lib/cuda_j2k_encoder_thread.cc
new file mode 100644
index 000000000..466b78fc1
--- /dev/null
+++ b/src/lib/cuda_j2k_encoder_thread.cc
@@ -0,0 +1,207 @@
+/*
+ Copyright (C) 2025 Carl Hetherington <cth@carlh.net>
+
+ This file is part of DCP-o-matic.
+
+ DCP-o-matic is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ DCP-o-matic is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+
+#include "colour_conversion.h"
+#include "cuda_j2k_encoder_thread.h"
+#include "dcp_video.h"
+#include "exceptions.h"
+#include "resolution.h"
+#include <nvjpeg2k.h>
+
+
+using std::shared_ptr;
+
+
+CUDAJ2KEncoderThread::CUDAJ2KEncoderThread(J2KEncoder& encoder)
+ : J2KSyncEncoderThread(encoder)
+{
+ nvjpeg2kEncoderCreateSimple(&_encoder_handle);
+ nvjpeg2kEncodeStateCreate(_encoder_handle, &_encode_state);
+ nvjpeg2kEncodeParamsCreate(&_encode_params);
+
+ nvjpeg2kEncodeParamsSetInputFormat(_encode_params, NVJPEG2K_FORMAT_PLANAR);
+
+ cudaStreamCreateWithFlags(&_stream, cudaStreamNonBlocking);
+}
+
+
+shared_ptr<dcp::ArrayData>
+CUDAJ2KEncoderThread::encode(DCPVideo const& frame)
+{
+ auto xyz = convert_to_xyz(frame.frame());
+ auto const size = xyz->size();
+
+ /* {x,y,z}lp is 4-byte int representation */
+ int* xlp = xyz->data(0);
+ int* ylp = xyz->data(1);
+ int* zlp = xyz->data(2);
+
+ uint8_t* pixel_data_h[3];
+
+ /* {x,y,z}p is 2-byte int representation */
+ _xyz_x.resize(size.width * size.height);
+ int16_t* xp = _xyz_x.data();
+ pixel_data_h[0] = reinterpret_cast<uint8_t*>(xp);
+
+ _xyz_y.resize(size.width * size.height);
+ int16_t* yp = _xyz_y.data();
+ pixel_data_h[1] = reinterpret_cast<uint8_t*>(yp);
+
+ _xyz_z.resize(size.width * size.height);
+ int16_t* zp = _xyz_z.data();
+ pixel_data_h[2] = reinterpret_cast<uint8_t*>(zp);
+
+ for (int j = 0; j < size.width * size.height; ++j) {
+ *xp++ = static_cast<int16_t>(*xlp++);
+ *yp++ = static_cast<int16_t>(*ylp++);
+ *zp++ = static_cast<int16_t>(*zlp++);
+ }
+
+ allocate_pixel_data_d(size);
+
+ /* Copy to device */
+ size_t pitch_in_bytes_h[3];
+ for (int i = 0; i < 3; ++i) {
+ pitch_in_bytes_h[i] = size.width * 2;
+ auto const status = cudaMemcpy2D(
+ _pixel_data_d[i],
+ _pitch_in_bytes_d[i],
+ pixel_data_h[i],
+ pitch_in_bytes_h[i],
+ size.width * 2,
+ size.height,
+ cudaMemcpyHostToDevice
+ );
+
+ if (status != cudaSuccess) {
+ throw CUDAError("cudaMemcpy2D", status);
+ }
+ }
+
+ nvjpeg2kImage_t device_image;
+ device_image.num_components = 3;
+ device_image.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
+ device_image.pixel_type = NVJPEG2K_INT16;
+ device_image.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes_d);
+
+ nvjpeg2kImageComponentInfo_t info[3];
+ for (int i = 0; i < 3; ++i) {
+ info[i].component_width = size.width;
+ info[i].component_height = size.height;
+ info[i].precision = 12;
+ info[i].sgn = 0;
+ }
+
+ nvjpeg2kEncodeConfig_t config;
+ memset(&config, 0, sizeof(config));
+ config.stream_type = NVJPEG2K_STREAM_J2K;
+ config.color_space = NVJPEG2K_COLORSPACE_SRGB;
+ config.image_width = size.width;
+ config.image_height = size.height;
+ config.num_components = 3;
+ config.image_comp_info = reinterpret_cast<nvjpeg2kImageComponentInfo_t*>(&info);
+ config.code_block_w = 32;
+ config.code_block_h = 32;
+ config.irreversible = 0;
+ config.mct_mode = 1;
+ config.prog_order = NVJPEG2K_CPRL;
+ config.num_resolutions = frame.resolution() == Resolution::FOUR_K ? 7 : 6;
+ config.num_precincts_init = 7;
+ config.precinct_width[0] = 7;
+ config.precinct_height[0] = 7;
+ for (int i = 1; i < 6; ++i) {
+ config.precinct_width[i] = 8;
+ config.precinct_height[i] = 8;
+ }
+
+ auto status = nvjpeg2kEncodeParamsSetEncodeConfig(_encode_params, &config);
+ if (status != NVJPEG2K_STATUS_SUCCESS) {
+ throw CUDAError("nvjpeg2kEncodeParamsSetEncodeConfig", status);
+ }
+
+ // XXX: quality
+ status = nvjpeg2kEncodeParamsSetQuality(_encode_params, 50);
+ if (status != NVJPEG2K_STATUS_SUCCESS) {
+ throw CUDAError("nvjpeg2kEncodeParamsSetQuality", status);
+ }
+
+ status = nvjpeg2kEncode(_encoder_handle, _encode_state, _encode_params, &device_image, _stream);
+ if (status != NVJPEG2K_STATUS_SUCCESS) {
+ throw CUDAError("nvjpeg2kEncode", status);
+ }
+
+ size_t compressed_size;
+ status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encode_state, nullptr, &compressed_size, _stream);
+
+ auto output = std::make_shared<dcp::ArrayData>(compressed_size);
+ status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encode_state, output->data(), &compressed_size, _stream);
+ if (status != NVJPEG2K_STATUS_SUCCESS) {
+ throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status);
+ }
+
+ return output;
+}
+
+
+void
+CUDAJ2KEncoderThread::allocate_pixel_data_d(dcp::Size size)
+{
+ if (size == _pixel_data_d_size) {
+ return;
+ }
+
+ free_pixel_data_d();
+
+ for (int i = 0; i < 3; ++i) {
+ auto status = cudaMallocPitch(
+ reinterpret_cast<void**>(&_pixel_data_d[i]),
+ &_pitch_in_bytes_d[i],
+ size.width * 2,
+ size.height
+ );
+
+ if (status != cudaSuccess) {
+ throw CUDAError("cudaMallocPitch", status);
+ }
+ }
+
+ _pixel_data_d_size = size;
+}
+
+
+void
+CUDAJ2KEncoderThread::free_pixel_data_d()
+{
+ for (int i = 0; i < 3; ++i) {
+ cudaFree(_pixel_data_d[i]);
+ }
+}
+
+
+CUDAJ2KEncoderThread::~CUDAJ2KEncoderThread()
+{
+ free_pixel_data_d();
+
+ nvjpeg2kEncodeParamsDestroy(_encode_params);
+ nvjpeg2kEncodeStateDestroy(_encode_state);
+ nvjpeg2kEncoderDestroy(_encoder_handle);
+}
+