fixup! Playback sort of works.
[dcpomatic.git] / src / lib / cuda_decoder.cc
1 /*
2     Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
3
4     This file is part of DCP-o-matic.
5
6     DCP-o-matic is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     DCP-o-matic is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
18
19 */
20
21
22 #include "cuda_decoder.h"
23 #include "dcpomatic_assert.h"
24 #include "dcpomatic_log.h"
25 #include "exceptions.h"
26 #include "image.h"
27 #include "scope_guard.h"
28 #include <dcp/openjpeg_image.h>
29 #include <nvjpeg2k.h>
30
31
32 using std::shared_ptr;
33 using std::string;
34
35
36 CUDADecoder* CUDADecoder::_instance = nullptr;
37
38
39 CUDADecoder::CUDADecoder()
40 {
41         _thread = boost::thread(std::bind(&CUDADecoder::thread, this));
42 }
43
44
45 CUDADecoder::~CUDADecoder()
46 {
47         try {
48                 _thread.interrupt();
49                 _thread.join();
50         } catch (...) {}
51
52         for (int i = 0; i < 3; ++i) {
53                 cudaFree(_device[i]);
54         }
55 }
56
57
58 void
59 CUDADecoder::check_jpeg2k(string name, nvjpeg2kStatus_t status)
60 {
61         if (status != NVJPEG2K_STATUS_SUCCESS) {
62                 throw CUDAError(name, status);
63         }
64 }
65
66
67 void
68 CUDADecoder::thread()
69 try
70 {
71         nvjpeg2kHandle_t handle;
72         check_jpeg2k("nvjpeg2kCreateSimple", nvjpeg2kCreateSimple(&handle));
73         ScopeGuard handle_guard([handle]() {
74                 nvjpeg2kDestroy(handle);
75         });
76
77         nvjpeg2kDecodeState_t state;
78         check_jpeg2k("nvjpeg2kDecodeStateCreate", nvjpeg2kDecodeStateCreate(handle, &state));
79         ScopeGuard state_guard([&state]() {
80                 nvjpeg2kDecodeStateDestroy(state);
81         });
82
83         nvjpeg2kStream_t jpeg2k_stream;
84         check_jpeg2k("nvjpeg2kStreamCreate", nvjpeg2kStreamCreate(&jpeg2k_stream));
85         ScopeGuard jpeg2k_stream_guard([&jpeg2k_stream]() {
86                 nvjpeg2kStreamDestroy(jpeg2k_stream);
87         });
88
89         while (true) {
90                 boost::mutex::scoped_lock lm(_mutex);
91                 while (_queue.empty()) {
92                         _queue_empty_condition.wait(lm);
93                 }
94
95                 auto input = std::move(_queue.front());
96                 _queue.pop();
97                 lm.unlock();
98
99                 auto output = decode_one(input, handle, state, jpeg2k_stream);
100
101                 lm.lock();
102                 _output[input.id] = output;
103                 _complete_condition.notify_all();
104         }
105 }
106 catch (CUDAError& e)
107 {
108         LOG_ERROR("CUDA error: %1 (aborting CUDADecoder)", e.what());
109 }
110 catch (boost::thread_interrupted&)
111 {
112
113 }
114 catch (std::exception& e)
115 {
116         LOG_ERROR("Aborting CUDADecoder thread: %1", e.what());
117 }
118 catch (...)
119 {
120         LOG_ERROR_NC("Aborting CUDADecoder thread: unknown error");
121 }
122
123
124
125 shared_ptr<Image>
126 CUDADecoder::decode_one(QueueItem const& input, nvjpeg2kHandle_t handle, nvjpeg2kDecodeState_t state, nvjpeg2kStream_t jpeg2k_stream)
127 {
128         try {
129                 check_jpeg2k("nvjpeg2kStreamParse", nvjpeg2kStreamParse(handle, input.data->data(), input.data->size(), 0, 0, jpeg2k_stream));
130
131                 nvjpeg2kImageInfo_t image_info;
132                 check_jpeg2k("nvjpeg2kStreamGetImageInfo", nvjpeg2kStreamGetImageInfo(jpeg2k_stream, &image_info));
133
134                 nvjpeg2kImageComponentInfo_t image_component_info[3];
135                 for (int i = 0; i < 3; ++i) {
136                         check_jpeg2k("nvjpeg2kStreamGetImageComponentInfo", nvjpeg2kStreamGetImageComponentInfo(jpeg2k_stream, &image_component_info[i], i));
137                 }
138
139                 dcp::Size size(image_component_info[0].component_width, image_component_info[0].component_height);
140                 if (size != _allocation) {
141                         for (int i = 0; i < 3; ++i) {
142                                 cudaFree(_device[i]);
143                                 _device[i] = nullptr;
144                                 auto status = cudaMallocPitch(reinterpret_cast<void**>(&_device[i]), &_pitch[i], size.width * 2, size.height);
145                                 if (status != cudaSuccess) {
146                                         throw CUDAError("cudaMallocPitch", status);
147                                 }
148                                 _host[i].resize(_pitch[i] * size.height / 2);
149                         }
150                         _allocation = size;
151                 }
152
153                 nvjpeg2kImage_t output_image;
154                 output_image.pixel_data = reinterpret_cast<void**>(_device);
155                 output_image.pixel_type = NVJPEG2K_UINT16;
156                 output_image.pitch_in_bytes = _pitch;
157                 output_image.num_components = 3;
158
159                 check_jpeg2k("nvjpeg2kDecode", nvjpeg2kDecode(handle, state, jpeg2k_stream, &output_image, 0));
160                 cudaDeviceSynchronize();
161
162                 for (int i = 0; i < 3; ++i) {
163                         auto status = cudaMemcpy(_host[i].data(), _device[i], _pitch[i] * size.height, cudaMemcpyDeviceToHost);
164                         if (status != cudaSuccess) {
165                                 throw CUDAError("cudaMemcpy", status);
166                         }
167                 }
168
169                 auto output = std::make_shared<Image>(input.pixel_format, size, input.alignment);
170                 for (int y = 0; y < size.height; ++y) {
171                         int p = y * _pitch[0] / 2;
172                         auto q = reinterpret_cast<uint16_t *>(output->data()[0] + y * output->stride()[0]);
173                         for (int x = 0; x < size.width; ++x) {
174                                 *q++ = _host[0][p] << 4;
175                                 *q++ = _host[1][p] << 4;
176                                 *q++ = _host[2][p] << 4;
177                                 ++p;
178                         }
179                 }
180
181                 return output;
182
183         } catch (CUDAError& e) {
184                 LOG_ERROR("CUDA error: %1", e.what());
185                 return {};
186         }
187 }
188
189
190 shared_ptr<Image>
191 CUDADecoder::decode(shared_ptr<const dcp::Data> j2k_data, int reduce, AVPixelFormat pixel_format, Image::Alignment alignment)
192 {
193         boost::mutex::scoped_lock lm(_mutex);
194         auto id = _next_id++;
195         _queue.push({id, j2k_data, reduce, pixel_format, alignment});
196         _queue_empty_condition.notify_all();
197
198         while (_output.find(id) == _output.end()) {
199                 _complete_condition.wait(lm);
200         }
201
202         auto iter = _output.find(id);
203         if (iter == _output.end()) {
204                 return {};
205         }
206
207         auto output = *iter;
208         _output.erase(iter);
209         return output.second;
210 }
211
212
213 CUDADecoder *
214 CUDADecoder::instance()
215 {
216         if (!_instance) {
217                 _instance = new CUDADecoder();
218         }
219
220         return _instance;
221 }
222