diff options
| author | Carl Hetherington <cth@carlh.net> | 2022-05-24 20:50:19 +0200 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2022-05-24 20:50:19 +0200 |
| commit | d39f2ce23a8d12c11b0ccafc4568761802101ba0 (patch) | |
| tree | 2e081c8a33f9f0c037dbdfb6290f2505f97623a6 | |
| parent | 9d967309911e96d2ab752692fee7eb64c2b3a43a (diff) | |
Playback sort of works.
| -rw-r--r-- | src/lib/cuda.cc | 195 | ||||
| -rw-r--r-- | src/lib/cuda.h | 71 | ||||
| -rw-r--r-- | src/lib/exceptions.cc | 1 | ||||
| -rw-r--r-- | src/lib/j2k_image_proxy.cc | 6 | ||||
| -rw-r--r-- | src/lib/wscript | 6 |
5 files changed, 276 insertions, 3 deletions
diff --git a/src/lib/cuda.cc b/src/lib/cuda.cc new file mode 100644 index 000000000..d12dfe34a --- /dev/null +++ b/src/lib/cuda.cc @@ -0,0 +1,195 @@ +/* + Copyright (C) 2022 Carl Hetherington <cth@carlh.net> + + This file is part of DCP-o-matic. + + DCP-o-matic is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + DCP-o-matic is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>. + +*/ + + +#include "cuda.h" +#include "dcpomatic_assert.h" +#include "exceptions.h" +#include <dcp/openjpeg_image.h> +#include <nvjpeg2k.h> + + +using std::shared_ptr; + + +CUDA* CUDA::_instance = nullptr; + + +CUDA::CUDA() +{ + _decode_thread = std::thread(std::bind(&CUDA::decode_thread, this)); +} + + +void +CUDA::decode_thread() +{ + nvjpeg2kHandle_t handle; + auto status = nvjpeg2kCreateSimple(&handle); + if (status != NVJPEG2K_STATUS_SUCCESS) { + throw CUDAError("nvjpeg2kCreateSimple", status); + } + + nvjpeg2kDecodeState_t decode_state; + status = nvjpeg2kDecodeStateCreate(handle, &decode_state); + if (status != NVJPEG2K_STATUS_SUCCESS) { + throw CUDAError("nvjpeg2kDecodeStateCreate", status); + } + + nvjpeg2kStream_t jpeg2k_stream; + status = nvjpeg2kStreamCreate(&jpeg2k_stream); + if (status != NVJPEG2K_STATUS_SUCCESS) { + throw CUDAError("nvjpeg2kStreamCreate", status); + } + + while (true) { + boost::mutex::scoped_lock lm(_decode_mutex); + while (_decode_queue.empty()) { + _decode_queue_empty_condition.wait(lm); + } + + auto input = std::move(_decode_queue.front()); + _decode_queue.pop(); + lm.unlock(); + + try { + std::cout << "we got " << input.data->size() << " bytes.\n"; + auto status = nvjpeg2kStreamParse(handle, input.data->data(), input.data->size(), 0, 0, jpeg2k_stream); + if (status != NVJPEG2K_STATUS_SUCCESS) { + throw CUDAError("nvjpeg2kStreamParse", status); + } + + nvjpeg2kImageInfo_t image_info; + status = nvjpeg2kStreamGetImageInfo(jpeg2k_stream, &image_info); + if (status != NVJPEG2K_STATUS_SUCCESS) { + throw CUDAError("nvjpeg2kStreamGetImageInfo", status); + } + std::cout << image_info.num_components << " components.\n"; + + nvjpeg2kImageComponentInfo_t image_component_info[3]; + for (int i = 0; i < 3; ++i) { + status = nvjpeg2kStreamGetImageComponentInfo(jpeg2k_stream, &image_component_info[i], i); + if (status != NVJPEG2K_STATUS_SUCCESS) { + throw CUDAError("nvjpeg2kStreamGetImageComponentInfo", status); + } + } + + auto const width = image_component_info[0].component_width; + auto const height = image_component_info[0].component_height; + std::cout << width << "x" << height << " " << ((int)image_component_info[0].precision) << "\n"; + + uint16_t* decoded_d[3]; + size_t pitch[3]; + + for (int i = 0; i < 3; ++i) { + printf("cudaMallocPitch %d %d\n", width * 2, height); + auto status = cudaMallocPitch(reinterpret_cast<void**>(&decoded_d[i]), &pitch[i], width * 2, height); + if (status != cudaSuccess) { + throw CUDAError("cudaMallocPitch", status); + } + } + + nvjpeg2kImage_t output_image; + output_image.pixel_data = reinterpret_cast<void**>(decoded_d); + output_image.pixel_type = NVJPEG2K_UINT16; + output_image.pitch_in_bytes = pitch; + output_image.num_components = 3; + + status = nvjpeg2kDecode(handle, decode_state, jpeg2k_stream, &output_image, 0); + std::cout << "decode said " << status << "\n"; + if (status != NVJPEG2K_STATUS_SUCCESS) { + abort(); + throw CUDAError("nvjpeg2kDecode", status); + } + cudaDeviceSynchronize(); + + std::vector<uint16_t> decoded_h[3]; + for (int i = 0; i < 3; ++i) { + auto size = pitch[i] * height; + decoded_h[i].resize(size / 2); + auto status = cudaMemcpy(decoded_h[i].data(), decoded_d[i], size, cudaMemcpyDeviceToHost); + if (status != cudaSuccess) { + throw CUDAError("cudaMemcpy", status); + } + } + + auto output = std::make_shared<Image>(input.pixel_format, dcp::Size(width, height), input.alignment); + int p = 0; + for (size_t y = 0; y < height; ++y) { + auto q = reinterpret_cast<uint16_t *>(output->data()[0] + y * output->stride()[0]); + for (size_t x = 0; x < width; ++x) { + *q++ = decoded_h[0][p] << 4; + *q++ = decoded_h[1][p] << 4; + *q++ = decoded_h[2][p] << 4; + ++p; + } + } + + for (int i = 0; i < 3; ++i) { + cudaFree(decoded_d[i]); + } + + lm.lock(); + _decode_output[input.id] = output; + _decode_complete_condition.notify_all(); + lm.unlock(); + } catch (CUDAError&) { + lm.lock(); + _decode_output[input.id] = {}; + _decode_complete_condition.notify_all(); + lm.unlock(); + } + } +} + + +shared_ptr<Image> +CUDA::decode(shared_ptr<const dcp::Data> j2k_data, int reduce, AVPixelFormat pixel_format, Image::Alignment alignment) +{ + boost::mutex::scoped_lock lm(_decode_mutex); + auto id = _next_decode_id++; + _decode_queue.push({id, j2k_data, reduce, pixel_format, alignment}); + _decode_queue_empty_condition.notify_all(); + + while (_decode_output.find(id) == _decode_output.end()) { + _decode_complete_condition.wait(lm); + } + + auto iter = _decode_output.find(id); + if (iter ==_decode_output.end()) { + return {}; + } + + auto output = *iter; + _decode_output.erase(iter); + return output.second; +} + + +CUDA * +CUDA::instance() +{ + if (!_instance) { + _instance = new CUDA(); + } + + return _instance; +} + diff --git a/src/lib/cuda.h b/src/lib/cuda.h new file mode 100644 index 000000000..8d89c7df7 --- /dev/null +++ b/src/lib/cuda.h @@ -0,0 +1,71 @@ +/* + Copyright (C) 2022 Carl Hetherington <cth@carlh.net> + + This file is part of DCP-o-matic. + + DCP-o-matic is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + DCP-o-matic is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>. + +*/ + + +#include "image.h" +#include <dcp/array_data.h> +extern "C" { +#include <libavutil/pixfmt.h> +} +#include <boost/thread/condition.hpp> +#include <boost/thread/mutex.hpp> +#include <memory> +#include <queue> +#include <thread> + + +class CUDA +{ +public: + CUDA(); + + CUDA(CUDA &) = delete; + CUDA(CUDA &&) = delete; + + std::shared_ptr<Image> decode(std::shared_ptr<const dcp::Data> j2k_data, int reduce, AVPixelFormat pixel_format, Image::Alignment alignment); + + static CUDA* instance(); + +private: + void decode_thread(); + + typedef uint64_t ID; + + class DecodeQueueItem + { + public: + ID id; + std::shared_ptr<const dcp::Data> data; + int reduce; + AVPixelFormat pixel_format; + Image::Alignment alignment; + }; + + std::queue<DecodeQueueItem> _decode_queue; + std::map<ID, std::shared_ptr<Image>> _decode_output; + boost::condition _decode_queue_empty_condition; + boost::condition _decode_complete_condition; + boost::mutex _decode_mutex; + std::thread _decode_thread; + ID _next_decode_id = 0; + + static CUDA* _instance; +}; + diff --git a/src/lib/exceptions.cc b/src/lib/exceptions.cc index 4a43ac32b..2c9500577 100644 --- a/src/lib/exceptions.cc +++ b/src/lib/exceptions.cc @@ -183,5 +183,6 @@ CUDAError::CUDAError (string function, cudaError error) CUDAError::CUDAError (string function, int error) : runtime_error(String::compose("CUDA error on %1 (%2)", function, error)) { + std::cout << "CUDA error " << function << " " << error << "\n"; } diff --git a/src/lib/j2k_image_proxy.cc b/src/lib/j2k_image_proxy.cc index 269b01bca..26b2064d7 100644 --- a/src/lib/j2k_image_proxy.cc +++ b/src/lib/j2k_image_proxy.cc @@ -19,6 +19,9 @@ */ +#ifdef HAVE_NVJPEG2K_H +#include "cuda.h" +#endif #include "dcpomatic_assert.h" #include "dcpomatic_socket.h" #include "image.h" @@ -141,6 +144,7 @@ J2KImageProxy::prepare (Image::Alignment alignment, optional<dcp::Size> target_s } try { +#if 0 /* XXX: should check that potentially trashing _data here doesn't matter */ auto decompressed = dcp::decompress_j2k (const_cast<uint8_t*>(_data->data()), _data->size(), reduce); _image = make_shared<Image>(_pixel_format, decompressed->size(), alignment); @@ -166,6 +170,8 @@ J2KImageProxy::prepare (Image::Alignment alignment, optional<dcp::Size> target_s ++p; } } +#endif + _image = CUDA::instance()->decode(_data, 0, _pixel_format, alignment); } catch (dcp::J2KDecompressionError& e) { _image = make_shared<Image>(_pixel_format, _size, alignment); _image->make_black (); diff --git a/src/lib/wscript b/src/lib/wscript index 377bf1409..9dce415bb 100644 --- a/src/lib/wscript +++ b/src/lib/wscript @@ -45,8 +45,6 @@ sources = """ audio_ring_buffers.cc audio_stream.cc butler.cc - text_content.cc - text_decoder.cc case_insensitive_sorter.cc check_content_change_job.cc cinema.cc @@ -174,6 +172,8 @@ sources = """ string_text_file_decoder.cc subtitle_analysis.cc subtitle_encoder.cc + text_content.cc + text_decoder.cc text_ring_buffers.cc timer.cc transcode_job.cc @@ -229,7 +229,7 @@ def build(bld): obj.uselib += ' POLKIT' if bld.env.HAVE_NVJPEG2K_H: - obj.source += ' cuda_j2k_frame_encoder.cc' + obj.source += ' cuda.cc cuda_j2k_frame_encoder.cc' if bld.env.TARGET_WINDOWS_64 or bld.env.TARGET_WINDOWS_32: obj.uselib += ' WINSOCK2 DBGHELP SHLWAPI MSWSOCK BOOST_LOCALE SETUPAPI OLE32 UUID' |
