2 Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
23 #include "dcpomatic_assert.h"
24 #include "exceptions.h"
25 #include <dcp/openjpeg_image.h>
29 using std::shared_ptr;
32 CUDA* CUDA::_instance = nullptr;
37 _decode_thread = std::thread(std::bind(&CUDA::decode_thread, this));
44 nvjpeg2kHandle_t handle;
45 auto status = nvjpeg2kCreateSimple(&handle);
46 if (status != NVJPEG2K_STATUS_SUCCESS) {
47 throw CUDAError("nvjpeg2kCreateSimple", status);
50 nvjpeg2kDecodeState_t decode_state;
51 status = nvjpeg2kDecodeStateCreate(handle, &decode_state);
52 if (status != NVJPEG2K_STATUS_SUCCESS) {
53 throw CUDAError("nvjpeg2kDecodeStateCreate", status);
56 nvjpeg2kStream_t jpeg2k_stream;
57 status = nvjpeg2kStreamCreate(&jpeg2k_stream);
58 if (status != NVJPEG2K_STATUS_SUCCESS) {
59 throw CUDAError("nvjpeg2kStreamCreate", status);
63 boost::mutex::scoped_lock lm(_decode_mutex);
64 while (_decode_queue.empty()) {
65 _decode_queue_empty_condition.wait(lm);
68 auto input = std::move(_decode_queue.front());
73 std::cout << "we got " << input.data->size() << " bytes.\n";
74 auto status = nvjpeg2kStreamParse(handle, input.data->data(), input.data->size(), 0, 0, jpeg2k_stream);
75 if (status != NVJPEG2K_STATUS_SUCCESS) {
76 throw CUDAError("nvjpeg2kStreamParse", status);
79 nvjpeg2kImageInfo_t image_info;
80 status = nvjpeg2kStreamGetImageInfo(jpeg2k_stream, &image_info);
81 if (status != NVJPEG2K_STATUS_SUCCESS) {
82 throw CUDAError("nvjpeg2kStreamGetImageInfo", status);
84 std::cout << image_info.num_components << " components.\n";
86 nvjpeg2kImageComponentInfo_t image_component_info[3];
87 for (int i = 0; i < 3; ++i) {
88 status = nvjpeg2kStreamGetImageComponentInfo(jpeg2k_stream, &image_component_info[i], i);
89 if (status != NVJPEG2K_STATUS_SUCCESS) {
90 throw CUDAError("nvjpeg2kStreamGetImageComponentInfo", status);
94 auto const width = image_component_info[0].component_width;
95 auto const height = image_component_info[0].component_height;
96 std::cout << width << "x" << height << " " << ((int)image_component_info[0].precision) << "\n";
98 uint16_t* decoded_d[3];
101 for (int i = 0; i < 3; ++i) {
102 printf("cudaMallocPitch %d %d\n", width * 2, height);
103 auto status = cudaMallocPitch(reinterpret_cast<void**>(&decoded_d[i]), &pitch[i], width * 2, height);
104 if (status != cudaSuccess) {
105 throw CUDAError("cudaMallocPitch", status);
109 nvjpeg2kImage_t output_image;
110 output_image.pixel_data = reinterpret_cast<void**>(decoded_d);
111 output_image.pixel_type = NVJPEG2K_UINT16;
112 output_image.pitch_in_bytes = pitch;
113 output_image.num_components = 3;
115 status = nvjpeg2kDecode(handle, decode_state, jpeg2k_stream, &output_image, 0);
116 std::cout << "decode said " << status << "\n";
117 if (status != NVJPEG2K_STATUS_SUCCESS) {
119 throw CUDAError("nvjpeg2kDecode", status);
121 cudaDeviceSynchronize();
123 std::vector<uint16_t> decoded_h[3];
124 for (int i = 0; i < 3; ++i) {
125 auto size = pitch[i] * height;
126 decoded_h[i].resize(size / 2);
127 auto status = cudaMemcpy(decoded_h[i].data(), decoded_d[i], size, cudaMemcpyDeviceToHost);
128 if (status != cudaSuccess) {
129 throw CUDAError("cudaMemcpy", status);
133 auto output = std::make_shared<Image>(input.pixel_format, dcp::Size(width, height), input.alignment);
135 for (size_t y = 0; y < height; ++y) {
136 auto q = reinterpret_cast<uint16_t *>(output->data()[0] + y * output->stride()[0]);
137 for (size_t x = 0; x < width; ++x) {
138 *q++ = decoded_h[0][p] << 4;
139 *q++ = decoded_h[1][p] << 4;
140 *q++ = decoded_h[2][p] << 4;
145 for (int i = 0; i < 3; ++i) {
146 cudaFree(decoded_d[i]);
150 _decode_output[input.id] = output;
151 _decode_complete_condition.notify_all();
153 } catch (CUDAError&) {
155 _decode_output[input.id] = {};
156 _decode_complete_condition.notify_all();
164 CUDA::decode(shared_ptr<const dcp::Data> j2k_data, int reduce, AVPixelFormat pixel_format, Image::Alignment alignment)
166 boost::mutex::scoped_lock lm(_decode_mutex);
167 auto id = _next_decode_id++;
168 _decode_queue.push({id, j2k_data, reduce, pixel_format, alignment});
169 _decode_queue_empty_condition.notify_all();
171 while (_decode_output.find(id) == _decode_output.end()) {
172 _decode_complete_condition.wait(lm);
175 auto iter = _decode_output.find(id);
176 if (iter ==_decode_output.end()) {
181 _decode_output.erase(iter);
182 return output.second;
190 _instance = new CUDA();