/* Copyright (C) 2025 Carl Hetherington This file is part of DCP-o-matic. DCP-o-matic is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. DCP-o-matic is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with DCP-o-matic. If not, see . */ #include "colour_conversion.h" #include "cuda_j2k_encoder_thread.h" #include "dcp_video.h" #include "exceptions.h" #include "resolution.h" #include using std::shared_ptr; CUDAJ2KEncoderThread::CUDAJ2KEncoderThread(J2KEncoder& encoder) : J2KSyncEncoderThread(encoder) { nvjpeg2kEncoderCreateSimple(&_encoder_handle); nvjpeg2kEncodeStateCreate(_encoder_handle, &_encode_state); nvjpeg2kEncodeParamsCreate(&_encode_params); nvjpeg2kEncodeParamsSetInputFormat(_encode_params, NVJPEG2K_FORMAT_PLANAR); cudaStreamCreateWithFlags(&_stream, cudaStreamNonBlocking); } shared_ptr CUDAJ2KEncoderThread::encode(DCPVideo const& frame) { std::cout << "CUDA: encode " << frame.index() << "\n"; auto xyz = convert_to_xyz(frame.frame()); auto const size = xyz->size(); std::cout << "freeowwwwm\n"; /* {x,y,z}lp is 4-byte int representation */ int* xlp = xyz->data(0); int* ylp = xyz->data(1); int* zlp = xyz->data(2); uint8_t* pixel_data_h[3]; /* {x,y,z}p is 2-byte int representation */ _xyz_x.resize(size.width * size.height); int16_t* xp = _xyz_x.data(); pixel_data_h[0] = reinterpret_cast(xp); _xyz_y.resize(size.width * size.height); int16_t* yp = _xyz_y.data(); pixel_data_h[1] = reinterpret_cast(yp); _xyz_z.resize(size.width * size.height); int16_t* zp = _xyz_z.data(); pixel_data_h[2] = reinterpret_cast(zp); std::cout << "felcher\n"; for (int j = 0; j < size.width * size.height; ++j) { *xp++ = static_cast(*xlp++); *yp++ = static_cast(*ylp++); *zp++ = static_cast(*zlp++); } std::cout << "big bag o bollox"; allocate_pixel_data_d(size); std::cout << "copying to device.\n"; /* Copy to device */ size_t pitch_in_bytes_h[3]; for (int i = 0; i < 3; ++i) { pitch_in_bytes_h[i] = size.width * 2; auto const status = cudaMemcpy2D( _pixel_data_d[i], _pitch_in_bytes_d[i], pixel_data_h[i], pitch_in_bytes_h[i], size.width * 2, size.height, cudaMemcpyHostToDevice ); if (status != cudaSuccess) { throw CUDAError("cudaMemcpy2D", status); } } std::cout << "copied to device.\n"; nvjpeg2kImage_t device_image; device_image.num_components = 3; device_image.pixel_data = reinterpret_cast(_pixel_data_d); device_image.pixel_type = NVJPEG2K_INT16; device_image.pitch_in_bytes = reinterpret_cast(_pitch_in_bytes_d); nvjpeg2kImageComponentInfo_t info[3]; for (int i = 0; i < 3; ++i) { info[i].component_width = size.width; info[i].component_height = size.height; info[i].precision = 12; info[i].sgn = 0; } nvjpeg2kEncodeConfig_t config; memset(&config, 0, sizeof(config)); config.stream_type = NVJPEG2K_STREAM_J2K; config.color_space = NVJPEG2K_COLORSPACE_SRGB; config.image_width = size.width; config.image_height = size.height; config.num_components = 3; config.image_comp_info = reinterpret_cast(&info); config.code_block_w = 32; config.code_block_h = 32; config.irreversible = 0; config.mct_mode = 1; config.prog_order = NVJPEG2K_CPRL; config.num_resolutions = frame.resolution() == Resolution::FOUR_K ? 7 : 6; config.num_precincts_init = 7; config.precinct_width[0] = 7; config.precinct_height[0] = 7; for (int i = 1; i < 6; ++i) { config.precinct_width[i] = 8; config.precinct_height[i] = 8; } auto status = nvjpeg2kEncodeParamsSetEncodeConfig(_encode_params, &config); if (status != NVJPEG2K_STATUS_SUCCESS) { throw CUDAError("nvjpeg2kEncodeParamsSetEncodeConfig", status); } // XXX: quality status = nvjpeg2kEncodeParamsSetQuality(_encode_params, 50); if (status != NVJPEG2K_STATUS_SUCCESS) { throw CUDAError("nvjpeg2kEncodeParamsSetQuality", status); } std::cout << "calling nvjpeg2kEncode\n"; status = nvjpeg2kEncode(_encoder_handle, _encode_state, _encode_params, &device_image, _stream); if (status != NVJPEG2K_STATUS_SUCCESS) { throw CUDAError("nvjpeg2kEncode", status); } size_t compressed_size; status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encode_state, nullptr, &compressed_size, _stream); auto output = std::make_shared(compressed_size); status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encode_state, output->data(), &compressed_size, _stream); if (status != NVJPEG2K_STATUS_SUCCESS) { throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status); } return output; } void CUDAJ2KEncoderThread::allocate_pixel_data_d(dcp::Size size) { if (size == _pixel_data_d_size) { return; } free_pixel_data_d(); for (int i = 0; i < 3; ++i) { auto status = cudaMallocPitch( reinterpret_cast(&_pixel_data_d[i]), &_pitch_in_bytes_d[i], size.width * 2, size.height ); if (status != cudaSuccess) { std::cout << "bad news brown: " << status << "\n"; throw CUDAError("cudaMallocPitch", status); } } _pixel_data_d_size = size; } void CUDAJ2KEncoderThread::free_pixel_data_d() { for (int i = 0; i < 3; ++i) { cudaFree(_pixel_data_d[i]); } } CUDAJ2KEncoderThread::~CUDAJ2KEncoderThread() { free_pixel_data_d(); nvjpeg2kEncodeParamsDestroy(_encode_params); nvjpeg2kEncodeStateDestroy(_encode_state); nvjpeg2kEncoderDestroy(_encoder_handle); }