675b26f57f2421e4ad0823a19b339c000cc031a7
[dcpomatic.git] / src / lib / cuda_j2k_frame_encoder.cc
1 /*
2     Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
3
4     This file is part of DCP-o-matic.
5
6     DCP-o-matic is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     DCP-o-matic is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
18
19 */
20
21
22 #include "cross.h"
23 #include "cuda_j2k_frame_encoder.h"
24 #include "dcpomatic_log.h"
25 #include "dcp_video.h"
26 #include "exceptions.h"
27 #include "player_video.h"
28 #include <dcp/openjpeg_image.h>
29 #include <nvjpeg2k.h>
30 #include <vector>
31
32
33 using std::vector;
34 using boost::optional;
35
36
37 CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder()
38 {
39         nvjpeg2kEncoderCreateSimple(&_encoder_handle);
40         nvjpeg2kEncodeStateCreate(_encoder_handle, &_encoder_state);
41         nvjpeg2kEncodeParamsCreate(&_encoder_params);
42 }
43
44
45 CUDAJ2KFrameEncoder::Frame::Frame(DCPVideo const& vf)
46 {
47         auto xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
48
49         void* pixel_data_h[] = {
50                 xyz->data(0),
51                 xyz->data(1),
52                 xyz->data(2)
53         };
54
55         auto const pitch = xyz->size().width * 2;
56
57         for (int i = 0; i < 3; ++i) {
58                 _pitch_in_bytes[i] = pitch;
59                 auto status = cudaMallocPitch(
60                         reinterpret_cast<void**>(&_pixel_data_d[i]),
61                         &_pitch_in_bytes[i],
62                         pitch,
63                         xyz->size().height
64                         );
65
66                 if (status != cudaSuccess) {
67                         throw CUDAError("cudaMallocPitch", status);
68                 }
69
70                 status = cudaMemcpy2D(
71                         _pixel_data_d[i],
72                         _pitch_in_bytes[i],
73                         pixel_data_h[i],
74                         _pitch_in_bytes[i],
75                         pitch,
76                         xyz->size().height,
77                         cudaMemcpyHostToDevice
78                         );
79
80                 if (status != cudaSuccess) {
81                         throw CUDAError("cudaMemcpy2D", status);
82                 }
83
84                 cudaDeviceSynchronize();
85         }
86
87         _device_image.num_components = 3;
88         _device_image.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
89         _device_image.pixel_type = NVJPEG2K_UINT16;
90         _device_image.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
91 }
92
93
94 CUDAJ2KFrameEncoder::Frame::Frame(Frame&& other)
95 {
96         for (int i = 0; i < 3; ++i) {
97                 _pixel_data_d[i] = other._pixel_data_d[i];
98                 other._pixel_data_d[i] = nullptr;
99                 _pitch_in_bytes[i] = other._pitch_in_bytes[i];
100         }
101
102         _device_image.num_components = other._device_image.num_components;
103         _device_image.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
104         _device_image.pixel_type = NVJPEG2K_UINT16;
105         _device_image.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
106 }
107
108
109 CUDAJ2KFrameEncoder::Frame::~Frame()
110 {
111         cudaFree(_pixel_data_d[0]);
112         cudaFree(_pixel_data_d[1]);
113         cudaFree(_pixel_data_d[2]);
114 }
115
116
117 vector<dcp::ArrayData>
118 CUDAJ2KFrameEncoder::encode(DCPVideo const& vf)
119 {
120         int constexpr BATCH_SIZE = 128;
121
122         auto const size = vf.frame()->out_size();
123         DCPOMATIC_ASSERT(!_size || size == *_size);
124         _size = size;
125
126         DCPOMATIC_ASSERT(!_resolution || vf.resolution() == *_resolution);
127         _resolution = vf.resolution();
128
129         if (_batch.size() < BATCH_SIZE) {
130                 _batch.push_back(Frame(vf));
131                 return {};
132         }
133
134         return flush();
135 }
136
137
138 vector<dcp::ArrayData>
139 CUDAJ2KFrameEncoder::flush()
140 {
141         if (_batch.empty()) {
142                 return {};
143         }
144
145         nvjpeg2kImageComponentInfo_t info[3];
146         for (int i = 0; i < 3; ++i) {
147                 info[i].component_width = _size->width;
148                 info[i].component_height = _size->height;
149                 info[i].precision = 12;
150                 info[i].sgn = 0;
151         }
152
153         nvjpeg2kEncodeConfig_t config;
154         memset(&config, 0, sizeof(config));
155         config.stream_type = NVJPEG2K_STREAM_J2K;
156         config.color_space = NVJPEG2K_COLORSPACE_SRGB;
157         config.image_width = _size->width;
158         config.image_height = _size->height;
159         config.num_components = 3;
160         config.image_comp_info = reinterpret_cast<nvjpeg2kImageComponentInfo_t*>(&info);
161         config.code_block_w = 32;
162         config.code_block_h = 32;
163         config.irreversible = 0;
164         config.mct_mode = 1;
165         config.prog_order = NVJPEG2K_CPRL;
166         config.num_resolutions = *_resolution == Resolution::FOUR_K ? 7 : 6;
167
168         auto status = nvjpeg2kEncodeParamsSetEncodeConfig(_encoder_params, &config);
169         if (status != NVJPEG2K_STATUS_SUCCESS) {
170                 throw CUDAError("nvjpeg2kEncodeParamsSetEncodeConfig", status);
171         }
172
173         // XXX: quality
174         status = nvjpeg2kEncodeParamsSetQuality(_encoder_params, 25);
175         if (status != NVJPEG2K_STATUS_SUCCESS) {
176                 throw CUDAError("nvjpeg2kEncodeParamsSetQuality", status);
177         }
178
179         vector<dcp::ArrayData> output;
180
181         for (auto const& frame: _batch) {
182
183                 auto x = frame.device_image();
184
185                 status = nvjpeg2kEncode(_encoder_handle, _encoder_state, _encoder_params, frame.device_image(), 0);
186                 if (status != NVJPEG2K_STATUS_SUCCESS) {
187                         throw CUDAError("nvjpeg2kEncode", status);
188                 }
189
190                 size_t compressed_size;
191                 status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, nullptr, &compressed_size, 0);
192
193                 dcp::ArrayData this_output(compressed_size);
194                 status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, this_output.data(), &compressed_size, 0);
195                 if (status != NVJPEG2K_STATUS_SUCCESS) {
196                         throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status);
197                 }
198
199                 output.push_back(this_output);
200                 cudaStreamSynchronize(0);
201         }
202
203         _batch.clear();
204
205         return output;
206 }
207
208
209 void
210 CUDAJ2KFrameEncoder::log_thread_start ()
211 {
212        LOG_TIMING("start-encoder-thread thread=%1", thread_id());
213 }