2 Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
23 #include "cuda_j2k_frame_encoder.h"
24 #include "dcpomatic_log.h"
25 #include "dcp_video.h"
26 #include "exceptions.h"
27 #include "player_video.h"
28 #include <dcp/openjpeg_image.h>
34 using boost::optional;
37 CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder()
39 nvjpeg2kEncoderCreateSimple(&_encoder_handle);
40 nvjpeg2kEncodeStateCreate(_encoder_handle, &_encoder_state);
41 nvjpeg2kEncodeParamsCreate(&_encoder_params);
45 CUDAJ2KFrameEncoder::Frame::Frame(DCPVideo const& vf)
47 auto xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
49 void* pixel_data_h[] = {
55 auto const pitch = xyz->size().width * 2;
57 for (int i = 0; i < 3; ++i) {
58 _pitch_in_bytes[i] = pitch;
59 auto status = cudaMallocPitch(
60 reinterpret_cast<void**>(&_pixel_data_d[i]),
66 if (status != cudaSuccess) {
67 throw CUDAError("cudaMallocPitch", status);
70 status = cudaMemcpy2D(
77 cudaMemcpyHostToDevice
80 if (status != cudaSuccess) {
81 throw CUDAError("cudaMemcpy2D", status);
84 cudaDeviceSynchronize();
87 _device_image.num_components = 3;
88 _device_image.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
89 _device_image.pixel_type = NVJPEG2K_UINT16;
90 _device_image.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
94 CUDAJ2KFrameEncoder::Frame::Frame(Frame&& other)
96 for (int i = 0; i < 3; ++i) {
97 _pixel_data_d[i] = other._pixel_data_d[i];
98 other._pixel_data_d[i] = nullptr;
99 _pitch_in_bytes[i] = other._pitch_in_bytes[i];
102 _device_image.num_components = other._device_image.num_components;
103 _device_image.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
104 _device_image.pixel_type = NVJPEG2K_UINT16;
105 _device_image.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
109 CUDAJ2KFrameEncoder::Frame::~Frame()
111 cudaFree(_pixel_data_d[0]);
112 cudaFree(_pixel_data_d[1]);
113 cudaFree(_pixel_data_d[2]);
117 vector<dcp::ArrayData>
118 CUDAJ2KFrameEncoder::encode(DCPVideo const& vf)
120 int constexpr BATCH_SIZE = 128;
122 auto const size = vf.frame()->out_size();
123 DCPOMATIC_ASSERT(!_size || size == *_size);
126 DCPOMATIC_ASSERT(!_resolution || vf.resolution() == *_resolution);
127 _resolution = vf.resolution();
129 if (_batch.size() < BATCH_SIZE) {
130 _batch.push_back(Frame(vf));
138 vector<dcp::ArrayData>
139 CUDAJ2KFrameEncoder::flush()
141 if (_batch.empty()) {
145 nvjpeg2kImageComponentInfo_t info[3];
146 for (int i = 0; i < 3; ++i) {
147 info[i].component_width = _size->width;
148 info[i].component_height = _size->height;
149 info[i].precision = 12;
153 nvjpeg2kEncodeConfig_t config;
154 memset(&config, 0, sizeof(config));
155 config.stream_type = NVJPEG2K_STREAM_J2K;
156 config.color_space = NVJPEG2K_COLORSPACE_SRGB;
157 config.image_width = _size->width;
158 config.image_height = _size->height;
159 config.num_components = 3;
160 config.image_comp_info = reinterpret_cast<nvjpeg2kImageComponentInfo_t*>(&info);
161 config.code_block_w = 32;
162 config.code_block_h = 32;
163 config.irreversible = 0;
165 config.prog_order = NVJPEG2K_CPRL;
166 config.num_resolutions = *_resolution == Resolution::FOUR_K ? 7 : 6;
168 auto status = nvjpeg2kEncodeParamsSetEncodeConfig(_encoder_params, &config);
169 if (status != NVJPEG2K_STATUS_SUCCESS) {
170 throw CUDAError("nvjpeg2kEncodeParamsSetEncodeConfig", status);
174 status = nvjpeg2kEncodeParamsSetQuality(_encoder_params, 25);
175 if (status != NVJPEG2K_STATUS_SUCCESS) {
176 throw CUDAError("nvjpeg2kEncodeParamsSetQuality", status);
179 vector<dcp::ArrayData> output;
181 for (auto const& frame: _batch) {
183 auto x = frame.device_image();
185 status = nvjpeg2kEncode(_encoder_handle, _encoder_state, _encoder_params, frame.device_image(), 0);
186 if (status != NVJPEG2K_STATUS_SUCCESS) {
187 throw CUDAError("nvjpeg2kEncode", status);
190 size_t compressed_size;
191 status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, nullptr, &compressed_size, 0);
193 dcp::ArrayData this_output(compressed_size);
194 status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, this_output.data(), &compressed_size, 0);
195 if (status != NVJPEG2K_STATUS_SUCCESS) {
196 throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status);
199 output.push_back(this_output);
200 cudaStreamSynchronize(0);
210 CUDAJ2KFrameEncoder::log_thread_start ()
212 LOG_TIMING("start-encoder-thread thread=%1", thread_id());