445548558bb931c9a00dff114c6c8767d3440bc7
[dcpomatic.git] / src / lib / cuda_j2k_frame_encoder.cc
1 /*
2     Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
3
4     This file is part of DCP-o-matic.
5
6     DCP-o-matic is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     DCP-o-matic is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
18
19 */
20
21
22 #include "cross.h"
23 #include "cuda_j2k_frame_encoder.h"
24 #include "dcpomatic_log.h"
25 #include "dcp_video.h"
26 #include "exceptions.h"
27 #include "player_video.h"
28 #include <dcp/openjpeg_image.h>
29 #include <nvjpeg2k.h>
30 #include <vector>
31
32
33 using std::make_pair;
34 using std::vector;
35 using boost::optional;
36
37
38 CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder()
39 {
40         nvjpeg2kEncoderCreateSimple(&_encoder_handle);
41         nvjpeg2kEncodeStateCreate(_encoder_handle, &_encoder_state);
42         nvjpeg2kEncodeParamsCreate(&_encoder_params);
43
44         cudaStreamCreateWithFlags(&_stream, cudaStreamNonBlocking);
45 }
46
47
48 CUDAJ2KFrameEncoder::~CUDAJ2KFrameEncoder()
49 {
50         cudaStreamDestroy(_stream);
51 }
52
53
54 CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf, cudaStream_t stream)
55         : _index(vf.index())
56         , _eyes(vf.eyes())
57 {
58         _xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
59
60         for (int i = 0; i < 3; ++i) {
61                 _pixel_data_h[i] = reinterpret_cast<uint8_t*>(_xyz->data(i));
62         }
63
64         auto const pitch = _xyz->size().width * 2;
65
66         for (int i = 0; i < 3; ++i) {
67                 _pitch_in_bytes[i] = pitch;
68                 auto status = cudaMallocPitch(
69                         reinterpret_cast<void**>(&_pixel_data_d[i]),
70                         &_pitch_in_bytes[i],
71                         pitch,
72                         _xyz->size().height
73                         );
74
75                 if (status != cudaSuccess) {
76                         throw CUDAError("cudaMallocPitch", status);
77                 }
78
79                 status = cudaMemcpy2DAsync(
80                         _pixel_data_d[i],
81                         _pitch_in_bytes[i],
82                         _pixel_data_h[i],
83                         _pitch_in_bytes[i],
84                         pitch,
85                         _xyz->size().height,
86                         cudaMemcpyHostToDevice,
87                         stream
88                         );
89
90                 if (status != cudaSuccess) {
91                         throw CUDAError("cudaMemcpy2D", status);
92                 }
93         }
94
95         _device_image.num_components = 3;
96         _device_image.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
97         _device_image.pixel_type = NVJPEG2K_UINT16;
98         _device_image.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
99 }
100
101
102 CUDAJ2KFrameEncoder::Input::Input(Input&& other)
103         : _index(other._index)
104         , _eyes(other._eyes)
105 {
106         for (int i = 0; i < 3; ++i) {
107                 _pixel_data_d[i] = other._pixel_data_d[i];
108                 other._pixel_data_d[i] = nullptr;
109                 _pitch_in_bytes[i] = other._pitch_in_bytes[i];
110         }
111
112         _device_image.num_components = other._device_image.num_components;
113         _device_image.pixel_data = reinterpret_cast<void**>(_pixel_data_d);
114         _device_image.pixel_type = NVJPEG2K_UINT16;
115         _device_image.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes);
116 }
117
118
119 CUDAJ2KFrameEncoder::Input::~Input()
120 {
121         cudaFree(_pixel_data_d[0]);
122         cudaFree(_pixel_data_d[1]);
123         cudaFree(_pixel_data_d[2]);
124 }
125
126
127 optional<dcp::ArrayData>
128 CUDAJ2KFrameEncoder::encode(DCPVideo const& vf)
129 {
130         auto input = Input(vf, _stream);
131
132         auto const size = vf.frame()->out_size();
133         DCPOMATIC_ASSERT(!_size || size == *_size);
134         _size = size;
135
136         DCPOMATIC_ASSERT(!_resolution || vf.resolution() == *_resolution);
137         _resolution = vf.resolution();
138
139         nvjpeg2kImageComponentInfo_t info[3];
140         for (int i = 0; i < 3; ++i) {
141                 info[i].component_width = _size->width;
142                 info[i].component_height = _size->height;
143                 info[i].precision = 12;
144                 info[i].sgn = 0;
145         }
146
147         nvjpeg2kEncodeConfig_t config;
148         memset(&config, 0, sizeof(config));
149         config.stream_type = NVJPEG2K_STREAM_J2K;
150         config.color_space = NVJPEG2K_COLORSPACE_SRGB;
151         config.image_width = _size->width;
152         config.image_height = _size->height;
153         config.num_components = 3;
154         config.image_comp_info = reinterpret_cast<nvjpeg2kImageComponentInfo_t*>(&info);
155         config.code_block_w = 32;
156         config.code_block_h = 32;
157         config.irreversible = 0;
158         config.mct_mode = 1;
159         config.prog_order = NVJPEG2K_CPRL;
160         config.num_resolutions = *_resolution == Resolution::FOUR_K ? 7 : 6;
161
162         auto status = nvjpeg2kEncodeParamsSetEncodeConfig(_encoder_params, &config);
163         if (status != NVJPEG2K_STATUS_SUCCESS) {
164                 throw CUDAError("nvjpeg2kEncodeParamsSetEncodeConfig", status);
165         }
166
167         // XXX: quality
168         status = nvjpeg2kEncodeParamsSetQuality(_encoder_params, 30);
169         if (status != NVJPEG2K_STATUS_SUCCESS) {
170                 throw CUDAError("nvjpeg2kEncodeParamsSetQuality", status);
171         }
172
173         status = nvjpeg2kEncode(_encoder_handle, _encoder_state, _encoder_params, input.device_image(), _stream);
174         if (status != NVJPEG2K_STATUS_SUCCESS) {
175                 throw CUDAError("nvjpeg2kEncode", status);
176         }
177
178         size_t compressed_size;
179         status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, nullptr, &compressed_size, _stream);
180
181         dcp::ArrayData output(compressed_size);
182         status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, output.data(), &compressed_size, _stream);
183         if (status != NVJPEG2K_STATUS_SUCCESS) {
184                 throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status);
185         }
186
187         return output;
188 }
189
190
191 void
192 CUDAJ2KFrameEncoder::log_thread_start ()
193 {
194        LOG_TIMING("start-encoder-thread thread=%1", thread_id());
195 }
196
197
198 void
199 CUDAJ2KFrameEncoder::flush()
200 {
201
202 }