fixup! Playback sort of works.nvidia

author: Carl Hetherington <cth@carlh.net> 2022-05-24 22:33:06 +0200
committer: Carl Hetherington <cth@carlh.net> 2022-05-24 22:33:06 +0200
commit: 322acdcd0fa514aa4612edb1cba0a1bec792cba8 (patch)
tree: 31c503063501bf722dfdd481e4166b5b2f9b61b3 /src/lib/cuda_decoder.cc
parent: d39f2ce23a8d12c11b0ccafc4568761802101ba0 (diff)
1 files changed, 222 insertions, 0 deletions
diff --git a/src/lib/cuda_decoder.cc b/src/lib/cuda_decoder.cc
new file mode 100644
index 000000000..bc4c34232
--- /dev/null
+++ b/src/lib/cuda_decoder.cc
@@ -0,0 +1,222 @@
+/*
+    Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
+
+    This file is part of DCP-o-matic.
+
+    DCP-o-matic is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    DCP-o-matic is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+
+#include "cuda_decoder.h"
+#include "dcpomatic_assert.h"
+#include "dcpomatic_log.h"
+#include "exceptions.h"
+#include "image.h"
+#include "scope_guard.h"
+#include <dcp/openjpeg_image.h>
+#include <nvjpeg2k.h>
+
+
+using std::shared_ptr;
+using std::string;
+
+
+CUDADecoder* CUDADecoder::_instance = nullptr;
+
+
+CUDADecoder::CUDADecoder()
+{
+	_thread = boost::thread(std::bind(&CUDADecoder::thread, this));
+}
+
+
+CUDADecoder::~CUDADecoder()
+{
+	try {
+		_thread.interrupt();
+		_thread.join();
+	} catch (...) {}
+
+	for (int i = 0; i < 3; ++i) {
+		cudaFree(_device[i]);
+	}
+}
+
+
+void
+CUDADecoder::check_jpeg2k(string name, nvjpeg2kStatus_t status)
+{
+	if (status != NVJPEG2K_STATUS_SUCCESS) {
+		throw CUDAError(name, status);
+	}
+}
+
+
+void
+CUDADecoder::thread()
+try
+{
+	nvjpeg2kHandle_t handle;
+	check_jpeg2k("nvjpeg2kCreateSimple", nvjpeg2kCreateSimple(&handle));
+	ScopeGuard handle_guard([handle]() {
+		nvjpeg2kDestroy(handle);
+	});
+
+	nvjpeg2kDecodeState_t state;
+	check_jpeg2k("nvjpeg2kDecodeStateCreate", nvjpeg2kDecodeStateCreate(handle, &state));
+	ScopeGuard state_guard([&state]() {
+		nvjpeg2kDecodeStateDestroy(state);
+	});
+
+	nvjpeg2kStream_t jpeg2k_stream;
+	check_jpeg2k("nvjpeg2kStreamCreate", nvjpeg2kStreamCreate(&jpeg2k_stream));
+	ScopeGuard jpeg2k_stream_guard([&jpeg2k_stream]() {
+		nvjpeg2kStreamDestroy(jpeg2k_stream);
+	});
+
+	while (true) {
+		boost::mutex::scoped_lock lm(_mutex);
+		while (_queue.empty()) {
+			_queue_empty_condition.wait(lm);
+		}
+
+		auto input = std::move(_queue.front());
+		_queue.pop();
+		lm.unlock();
+
+		auto output = decode_one(input, handle, state, jpeg2k_stream);
+
+		lm.lock();
+		_output[input.id] = output;
+		_complete_condition.notify_all();
+	}
+}
+catch (CUDAError& e)
+{
+	LOG_ERROR("CUDA error: %1 (aborting CUDADecoder)", e.what());
+}
+catch (boost::thread_interrupted&)
+{
+
+}
+catch (std::exception& e)
+{
+	LOG_ERROR("Aborting CUDADecoder thread: %1", e.what());
+}
+catch (...)
+{
+	LOG_ERROR_NC("Aborting CUDADecoder thread: unknown error");
+}
+
+
+
+shared_ptr<Image>
+CUDADecoder::decode_one(QueueItem const& input, nvjpeg2kHandle_t handle, nvjpeg2kDecodeState_t state, nvjpeg2kStream_t jpeg2k_stream)
+{
+	try {
+		check_jpeg2k("nvjpeg2kStreamParse", nvjpeg2kStreamParse(handle, input.data->data(), input.data->size(), 0, 0, jpeg2k_stream));
+
+		nvjpeg2kImageInfo_t image_info;
+		check_jpeg2k("nvjpeg2kStreamGetImageInfo", nvjpeg2kStreamGetImageInfo(jpeg2k_stream, &image_info));
+
+		nvjpeg2kImageComponentInfo_t image_component_info[3];
+		for (int i = 0; i < 3; ++i) {
+			check_jpeg2k("nvjpeg2kStreamGetImageComponentInfo", nvjpeg2kStreamGetImageComponentInfo(jpeg2k_stream, &image_component_info[i], i));
+		}
+
+		dcp::Size size(image_component_info[0].component_width, image_component_info[0].component_height);
+		if (size != _allocation) {
+			for (int i = 0; i < 3; ++i) {
+				cudaFree(_device[i]);
+				_device[i] = nullptr;
+				auto status = cudaMallocPitch(reinterpret_cast<void**>(&_device[i]), &_pitch[i], size.width * 2, size.height);
+				if (status != cudaSuccess) {
+					throw CUDAError("cudaMallocPitch", status);
+				}
+				_host[i].resize(_pitch[i] * size.height / 2);
+			}
+			_allocation = size;
+		}
+
+		nvjpeg2kImage_t output_image;
+		output_image.pixel_data = reinterpret_cast<void**>(_device);
+		output_image.pixel_type = NVJPEG2K_UINT16;
+		output_image.pitch_in_bytes = _pitch;
+		output_image.num_components = 3;
+
+		check_jpeg2k("nvjpeg2kDecode", nvjpeg2kDecode(handle, state, jpeg2k_stream, &output_image, 0));
+		cudaDeviceSynchronize();
+
+		for (int i = 0; i < 3; ++i) {
+			auto status = cudaMemcpy(_host[i].data(), _device[i], _pitch[i] * size.height, cudaMemcpyDeviceToHost);
+			if (status != cudaSuccess) {
+				throw CUDAError("cudaMemcpy", status);
+			}
+		}
+
+		auto output = std::make_shared<Image>(input.pixel_format, size, input.alignment);
+		for (int y = 0; y < size.height; ++y) {
+			int p = y * _pitch[0] / 2;
+			auto q = reinterpret_cast<uint16_t *>(output->data()[0] + y * output->stride()[0]);
+			for (int x = 0; x < size.width; ++x) {
+				*q++ = _host[0][p] << 4;
+				*q++ = _host[1][p] << 4;
+				*q++ = _host[2][p] << 4;
+				++p;
+			}
+		}
+
+		return output;
+
+	} catch (CUDAError& e) {
+		LOG_ERROR("CUDA error: %1", e.what());
+		return {};
+	}
+}
+
+
+shared_ptr<Image>
+CUDADecoder::decode(shared_ptr<const dcp::Data> j2k_data, int reduce, AVPixelFormat pixel_format, Image::Alignment alignment)
+{
+	boost::mutex::scoped_lock lm(_mutex);
+	auto id = _next_id++;
+	_queue.push({id, j2k_data, reduce, pixel_format, alignment});
+	_queue_empty_condition.notify_all();
+
+	while (_output.find(id) == _output.end()) {
+		_complete_condition.wait(lm);
+	}
+
+	auto iter = _output.find(id);
+	if (iter == _output.end()) {
+		return {};
+	}
+
+	auto output = *iter;
+	_output.erase(iter);
+	return output.second;
+}
+
+
+CUDADecoder *
+CUDADecoder::instance()
+{
+	if (!_instance) {
+		_instance = new CUDADecoder();
+	}
+
+	return _instance;
+}
+
author	Carl Hetherington <cth@carlh.net>	2022-05-24 22:33:06 +0200
committer	Carl Hetherington <cth@carlh.net>	2022-05-24 22:33:06 +0200
commit	322acdcd0fa514aa4612edb1cba0a1bec792cba8 (patch)
tree	31c503063501bf722dfdd481e4166b5b2f9b61b3 /src/lib/cuda_decoder.cc
parent	d39f2ce23a8d12c11b0ccafc4568761802101ba0 (diff)