2 Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
23 #include "cuda_j2k_frame_encoder.h"
24 #include "dcpomatic_log.h"
25 #include "dcp_video.h"
26 #include "exceptions.h"
27 #include "player_video.h"
28 #include <dcp/array_data.h>
29 #include <dcp/openjpeg_image.h>
38 using boost::optional;
41 vector<thread> CUDAJ2KFrameEncoder::_cuda_threads;
42 std::queue<CUDAJ2KFrameEncoder::Input> CUDAJ2KFrameEncoder::_input;
43 std::map<std::pair<int, Eyes>, dcp::ArrayData> CUDAJ2KFrameEncoder::_output;
44 boost::condition CUDAJ2KFrameEncoder::_input_condition;
45 boost::mutex CUDAJ2KFrameEncoder::_input_mutex;
46 boost::condition CUDAJ2KFrameEncoder::_output_condition;
47 boost::mutex CUDAJ2KFrameEncoder::_output_mutex;
50 CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder()
52 if (_cuda_threads.empty()) {
53 for (int i = 0; i < 8; ++i) {
54 _cuda_threads.push_back(std::thread(&CUDAJ2KFrameEncoder::cuda_thread));
61 CUDAJ2KFrameEncoder::cuda_thread()
63 nvjpeg2kEncoder_t encoder_handle;
64 nvjpeg2kEncodeState_t encoder_state;
65 nvjpeg2kEncodeParams_t encoder_params;
67 nvjpeg2kEncoderCreateSimple(&encoder_handle);
68 nvjpeg2kEncodeStateCreate(encoder_handle, &encoder_state);
69 nvjpeg2kEncodeParamsCreate(&encoder_params);
72 cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
75 boost::mutex::scoped_lock lm(_input_mutex);
76 while (_input.empty()) {
77 std::cout << "gpu starved.\n";
78 _input_condition.wait(lm);
81 auto input = std::move(_input.front());
85 nvjpeg2kImageComponentInfo_t info[3];
86 for (int i = 0; i < 3; ++i) {
87 info[i].component_width = input.size().width;
88 info[i].component_height = input.size().height;
89 info[i].precision = 12;
93 nvjpeg2kEncodeConfig_t config;
94 memset(&config, 0, sizeof(config));
95 config.stream_type = NVJPEG2K_STREAM_J2K;
96 config.color_space = NVJPEG2K_COLORSPACE_SRGB;
97 config.image_width = input.size().width;
98 config.image_height = input.size().height;
99 config.num_components = 3;
100 config.image_comp_info = reinterpret_cast<nvjpeg2kImageComponentInfo_t*>(&info);
101 config.code_block_w = 32;
102 config.code_block_h = 32;
103 config.irreversible = 0;
105 config.prog_order = NVJPEG2K_CPRL;
106 config.num_resolutions = input.resolution() == Resolution::FOUR_K ? 7 : 6;
107 config.enable_custom_precincts = 0;
108 config.precint_width[0] = 7;
109 config.precint_height[0] = 7;
110 for (int i = 1; i < 6; ++i) {
111 config.precint_width[i] = 8;
112 config.precint_height[i] = 8;
115 auto status = nvjpeg2kEncodeParamsSetEncodeConfig(encoder_params, &config);
116 if (status != NVJPEG2K_STATUS_SUCCESS) {
117 throw CUDAError("nvjpeg2kEncodeParamsSetEncodeConfig", status);
121 status = nvjpeg2kEncodeParamsSetQuality(encoder_params, 50);
122 if (status != NVJPEG2K_STATUS_SUCCESS) {
123 throw CUDAError("nvjpeg2kEncodeParamsSetQuality", status);
126 status = nvjpeg2kEncode(encoder_handle, encoder_state, encoder_params, input.device_image(), stream);
127 if (status != NVJPEG2K_STATUS_SUCCESS) {
128 throw CUDAError("nvjpeg2kEncode", status);
131 size_t compressed_size;
132 status = nvjpeg2kEncodeRetrieveBitstream(encoder_handle, encoder_state, nullptr, &compressed_size, stream);
134 dcp::ArrayData output(compressed_size);
135 status = nvjpeg2kEncodeRetrieveBitstream(encoder_handle, encoder_state, output.data(), &compressed_size, stream);
136 if (status != NVJPEG2K_STATUS_SUCCESS) {
137 throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status);
140 boost::mutex::scoped_lock lm2(_output_mutex);
141 _output[make_pair(input.index(), input.eyes())] = output;
142 _output_condition.notify_all();
145 cudaStreamDestroy(stream);
149 CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf)
152 , _resolution(vf.resolution())
154 _xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
156 for (int i = 0; i < 3; ++i) {
157 _pixel_data_h[i] = reinterpret_cast<uint8_t*>(_xyz->data(i));
160 _size = _xyz->size();
162 auto const pitch = _size.width * 2;
164 for (int i = 0; i < 3; ++i) {
165 _pitch_in_bytes[i] = pitch;
166 auto status = cudaMallocPitch(
167 reinterpret_cast<void**>(&_pixel_data_d[i]),
173 if (status != cudaSuccess) {
174 throw CUDAError("cudaMallocPitch", status);
177 status = cudaMemcpy2D(
184 cudaMemcpyHostToDevice
187 if (status != cudaSuccess) {
188 throw CUDAError("cudaMemcpy2D", status);
192 _device_image.num_components = 3;
193 _device_image.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
194 _device_image.pixel_type = NVJPEG2K_UINT16;
195 _device_image.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
199 CUDAJ2KFrameEncoder::Input::Input(Input&& other)
200 : _index(other._index)
203 , _resolution(other._resolution)
205 for (int i = 0; i < 3; ++i) {
206 _pixel_data_d[i] = other._pixel_data_d[i];
207 other._pixel_data_d[i] = nullptr;
208 _pitch_in_bytes[i] = other._pitch_in_bytes[i];
211 _device_image.num_components = other._device_image.num_components;
212 _device_image.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
213 _device_image.pixel_type = NVJPEG2K_UINT16;
214 _device_image.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
218 CUDAJ2KFrameEncoder::Input::~Input()
220 cudaFree(_pixel_data_d[0]);
221 cudaFree(_pixel_data_d[1]);
222 cudaFree(_pixel_data_d[2]);
226 optional<dcp::ArrayData>
227 CUDAJ2KFrameEncoder::encode(DCPVideo const& vf)
229 auto input = Input(vf);
231 auto const size = vf.frame()->out_size();
232 DCPOMATIC_ASSERT(!_size || size == *_size);
235 DCPOMATIC_ASSERT(!_resolution || vf.resolution() == *_resolution);
236 _resolution = vf.resolution();
239 boost::mutex::scoped_lock lm (_input_mutex);
240 _input.push(std::move(input));
241 std::cout << "push input: " << _input.size() << "\n";
242 _input_condition.notify_all();
245 boost::mutex::scoped_lock lm(_output_mutex);
246 while (_output.find(make_pair(vf.index(), vf.eyes())) == _output.end()) {
247 _output_condition.wait(lm);
250 auto iter = _output.find(make_pair(vf.index(), vf.eyes()));
251 auto data = iter->second;
259 CUDAJ2KFrameEncoder::log_thread_start ()
261 LOG_TIMING("start-encoder-thread thread=%1", thread_id());
266 CUDAJ2KFrameEncoder::flush()