diff options
| author | Carl Hetherington <cth@carlh.net> | 2022-05-20 16:55:09 +0200 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2022-05-23 16:55:28 +0200 |
| commit | e7503b1a3c93121c27cec2804376f8621d793f6a (patch) | |
| tree | c0f73c0fde00863d1f749c78ff3c776a109a8b8d | |
| parent | 9bf074b427b2f6a2ac40e420c595a8d01577ff6d (diff) | |
wip: Add CUDA J2K frame encoder using libjpeg2k.
| -rw-r--r-- | src/lib/cuda_j2k_frame_encoder.cc | 188 | ||||
| -rw-r--r-- | src/lib/cuda_j2k_frame_encoder.h | 57 | ||||
| -rw-r--r-- | src/lib/exceptions.cc | 13 | ||||
| -rw-r--r-- | src/lib/exceptions.h | 11 | ||||
| -rw-r--r-- | src/lib/j2k_encoder.cc | 6 | ||||
| -rw-r--r-- | src/lib/wscript | 4 | ||||
| -rw-r--r-- | src/tools/wscript | 2 | ||||
| -rw-r--r-- | wscript | 5 |
8 files changed, 284 insertions, 2 deletions
diff --git a/src/lib/cuda_j2k_frame_encoder.cc b/src/lib/cuda_j2k_frame_encoder.cc new file mode 100644 index 000000000..b382e8d3f --- /dev/null +++ b/src/lib/cuda_j2k_frame_encoder.cc @@ -0,0 +1,188 @@ +/* + Copyright (C) 2022 Carl Hetherington <cth@carlh.net> + + This file is part of DCP-o-matic. + + DCP-o-matic is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + DCP-o-matic is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>. + +*/ + + +#include "cross.h" +#include "cuda_j2k_frame_encoder.h" +#include "dcpomatic_log.h" +#include "dcp_video.h" +#include "exceptions.h" +#include "player_video.h" +#include <dcp/openjpeg_image.h> +#include <nvjpeg2k.h> + + +using boost::optional; + + +CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder() +{ + nvjpeg2kEncoderCreateSimple(&_encoder_handle); + nvjpeg2kEncodeStateCreate(_encoder_handle, &_encoder_state); + nvjpeg2kEncodeParamsCreate(&_encoder_params); + + _pixel_data_d[0] = _pixel_data_d[1] = _pixel_data_d[2] = nullptr; +} + + +void +CUDAJ2KFrameEncoder::allocate(dcp::Size size) +{ + for (int i = 0; i < 3; ++i) { + _image_component_info[i].component_width = size.width; + _image_component_info[i].component_height = size.height; + _image_component_info[i].precision = 12; + _image_component_info[i].sgn = 0; + _pitch_in_bytes[i] = size.width * 2; + } + + for (int i = 0; i < 3; ++i) { + auto error = cudaMallocPitch( + reinterpret_cast<void**>(&_pixel_data_d[i]), + &_pitch_in_bytes[i], + _image_component_info[i].component_width * 2, + _image_component_info[i].component_height + ); + if (error != cudaSuccess) { + throw CUDAError("cudaMallocPitch", error); + } + } + + _allocated_size = size; +} + + +CUDAJ2KFrameEncoder::~CUDAJ2KFrameEncoder() +{ + free(); +} + + +void +CUDAJ2KFrameEncoder::free() +{ + for (int i = 0; i < 3; ++i) { + cudaFree(&_pixel_data_d[i]); + } + + _pixel_data_d[0] = _pixel_data_d[1] = _pixel_data_d[2] = nullptr; +} + + +optional<dcp::ArrayData> +CUDAJ2KFrameEncoder::encode(DCPVideo const& vf) +{ + std::cout << "cuda encode starts.\n"; + + auto frame = vf.frame(); + auto size = frame->out_size(); + + if (size != _allocated_size) { + free(); + allocate(size); + } + + auto xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2)); + + for (int i = 0; i < 3; ++i) { + _pixel_data_h[i] = reinterpret_cast<uint8_t*>(xyz->data(i)); + } + + nvjpeg2kImage_t image_h; + image_h.num_components = 3; + image_h.pixel_data = reinterpret_cast<void**>(_pixel_data_h); + image_h.pixel_type = NVJPEG2K_UINT16; + image_h.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes); + + nvjpeg2kImage_t image_d; + image_d.num_components = 3; + image_d.pixel_data = reinterpret_cast<void**>(_pixel_data_d); + image_d.pixel_type = NVJPEG2K_UINT16; + image_d.pitch_in_bytes = reinterpret_cast<size_t*>(_pitch_in_bytes); + + for (int i = 0; i < 3; ++i) { + auto status = cudaMemcpy2D( + image_d.pixel_data[i], + image_d.pitch_in_bytes[i], + image_h.pixel_data[i], + image_h.pitch_in_bytes[i], + 2 * _image_component_info[i].component_width, + _image_component_info[i].component_height, + cudaMemcpyHostToDevice + ); + if (status != cudaSuccess) { + throw CUDAError("cudaMemcpy2D", status); + } + cudaDeviceSynchronize(); + } + + nvjpeg2kEncodeConfig_t config; + memset(&config, 0, sizeof(config)); + config.stream_type = NVJPEG2K_STREAM_J2K; + config.color_space = NVJPEG2K_COLORSPACE_SRGB; + config.image_width = size.width; + config.image_height = size.height; + config.num_components = 3; + config.image_comp_info = reinterpret_cast<nvjpeg2kImageComponentInfo_t*>(&_image_component_info); + config.code_block_w = 32; + config.code_block_h = 32; + config.irreversible = 0; + config.mct_mode = 1; + config.prog_order = NVJPEG2K_CPRL; + config.num_resolutions = vf.resolution() == Resolution::FOUR_K ? 7 : 6; + + auto status = nvjpeg2kEncodeParamsSetEncodeConfig(_encoder_params, &config); + if (status != NVJPEG2K_STATUS_SUCCESS) { + throw CUDAError("nvjpeg2kEncodeParamsSetEncodeConfig", status); + } + + // XXX: quality + status = nvjpeg2kEncodeParamsSetQuality(_encoder_params, 25); + if (status != NVJPEG2K_STATUS_SUCCESS) { + throw CUDAError("nvjpeg2kEncodeParamsSetQuality", status); + } + + status = nvjpeg2kEncode(_encoder_handle, _encoder_state, _encoder_params, &image_d, 0); + if (status != NVJPEG2K_STATUS_SUCCESS) { + throw CUDAError("nvjpeg2kEncode", status); + } + + size_t compressed_size; + status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, nullptr, &compressed_size, 0); + + dcp::ArrayData output(compressed_size); + status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, output.data(), &compressed_size, 0); + cudaDeviceSynchronize(); + + if (status != NVJPEG2K_STATUS_SUCCESS) { + throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status); + } + + std::cout << "cuda encode ends with " << status << " " << output.size() << "\n"; + + return output; +} + + +void +CUDAJ2KFrameEncoder::log_thread_start () +{ + LOG_TIMING("start-encoder-thread thread=%1", thread_id()); +} diff --git a/src/lib/cuda_j2k_frame_encoder.h b/src/lib/cuda_j2k_frame_encoder.h new file mode 100644 index 000000000..46ac64607 --- /dev/null +++ b/src/lib/cuda_j2k_frame_encoder.h @@ -0,0 +1,57 @@ +/* + Copyright (C) 2022 Carl Hetherington <cth@carlh.net> + + This file is part of DCP-o-matic. + + DCP-o-matic is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + DCP-o-matic is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>. + +*/ + + +#ifndef DCPOMATIC_CUDA_J2K_FRAME_ENCODER +#define DCPOMATIC_CUDA_J2K_FRAME_ENCODER + + +#include "j2k_frame_encoder.h" +#include <dcp/types.h> +#include <nvjpeg2k.h> +#include <vector> + + +class CUDAJ2KFrameEncoder : public J2KFrameEncoder +{ +public: + CUDAJ2KFrameEncoder(); + ~CUDAJ2KFrameEncoder(); + + boost::optional<dcp::ArrayData> encode(DCPVideo const &) override; + void log_thread_start() override; + +private: + void allocate(dcp::Size size); + void free(); + + nvjpeg2kEncoder_t _encoder_handle; + nvjpeg2kEncodeState_t _encoder_state; + nvjpeg2kEncodeParams_t _encoder_params; + uint8_t* _pixel_data_h[3]; + uint8_t* _pixel_data_d[3]; + size_t _pitch_in_bytes[3]; + nvjpeg2kImageComponentInfo_t _image_component_info[3]; + + dcp::Size _allocated_size; +}; + + +#endif diff --git a/src/lib/exceptions.cc b/src/lib/exceptions.cc index 66db9fda7..4a43ac32b 100644 --- a/src/lib/exceptions.cc +++ b/src/lib/exceptions.cc @@ -172,3 +172,16 @@ VerifyError::VerifyError (string m, int n) } + +CUDAError::CUDAError (string function, cudaError error) + : runtime_error(String::compose("CUDA error on %1 (%2)", function, static_cast<int>(error))) +{ + +} + + +CUDAError::CUDAError (string function, int error) + : runtime_error(String::compose("CUDA error on %1 (%2)", function, error)) +{ +} + diff --git a/src/lib/exceptions.h b/src/lib/exceptions.h index 7c9509800..0712c085b 100644 --- a/src/lib/exceptions.h +++ b/src/lib/exceptions.h @@ -32,6 +32,9 @@ extern "C" { #include <libavutil/pixfmt.h> } +#ifdef HAVE_NVJPEG2K_H +#include <cuda_runtime_api.h> +#endif #include <boost/filesystem.hpp> #include <boost/optional.hpp> #include <cstring> @@ -457,4 +460,12 @@ public: }; +class CUDAError : public std::runtime_error +{ +public: + CUDAError (std::string function, cudaError_t error); + CUDAError (std::string function, int); +}; + + #endif diff --git a/src/lib/j2k_encoder.cc b/src/lib/j2k_encoder.cc index f9969927f..11b360b34 100644 --- a/src/lib/j2k_encoder.cc +++ b/src/lib/j2k_encoder.cc @@ -27,6 +27,7 @@ #include "compose.hpp" #include "config.h" #include "cpu_j2k_frame_encoder.h" +#include "cuda_j2k_frame_encoder.h" #include "cross.h" #include "dcp_video.h" #include "dcpomatic_log.h" @@ -350,6 +351,7 @@ J2KEncoder::servers_list_changed () /* XXX: could re-use threads */ +#if 0 if (!Config::instance()->only_servers_encode ()) { for (int i = 0; i < Config::instance()->master_encoding_threads (); ++i) { auto worker = make_shared<CPUJ2KFrameEncoder>(); @@ -375,6 +377,10 @@ J2KEncoder::servers_list_changed () _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, worker)); } } +#endif + auto worker = make_shared<CUDAJ2KFrameEncoder>(); + _workers.push_back(worker); + _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, worker)); _writer->set_encoder_threads (_threads->size()); } diff --git a/src/lib/wscript b/src/lib/wscript index 1e8fd6f25..377bf1409 100644 --- a/src/lib/wscript +++ b/src/lib/wscript @@ -214,6 +214,7 @@ def build(bld): BOOST_FILESYSTEM BOOST_THREAD BOOST_DATETIME BOOST_SIGNALS2 BOOST_REGEX SAMPLERATE POSTPROC TIFF SSH DCP CXML GLIB LZMA XML++ CURL ZIP BZ2 FONTCONFIG PANGOMM CAIROMM XMLSEC SUB ICU NETTLE PNG JPEG LEQM_NRT + CUDA """ if bld.env.TARGET_OSX: @@ -227,6 +228,9 @@ def build(bld): if bld.env.TARGET_LINUX: obj.uselib += ' POLKIT' + if bld.env.HAVE_NVJPEG2K_H: + obj.source += ' cuda_j2k_frame_encoder.cc' + if bld.env.TARGET_WINDOWS_64 or bld.env.TARGET_WINDOWS_32: obj.uselib += ' WINSOCK2 DBGHELP SHLWAPI MSWSOCK BOOST_LOCALE SETUPAPI OLE32 UUID' obj.source += ' cross_windows.cc' diff --git a/src/tools/wscript b/src/tools/wscript index be5297beb..204058b2e 100644 --- a/src/tools/wscript +++ b/src/tools/wscript @@ -30,7 +30,7 @@ def configure(conf): def build(bld): uselib = 'BOOST_THREAD BOOST_DATETIME DCP XMLSEC CXML XMLPP AVFORMAT AVFILTER AVCODEC ' uselib += 'AVUTIL SWSCALE SWRESAMPLE POSTPROC CURL BOOST_FILESYSTEM SSH ZIP CAIROMM FONTCONFIG PANGOMM SUB ' - uselib += 'SNDFILE SAMPLERATE BOOST_REGEX ICU NETTLE RTAUDIO PNG JPEG LEQM_NRT ' + uselib += 'SNDFILE SAMPLERATE BOOST_REGEX ICU NETTLE RTAUDIO PNG JPEG LEQM_NRT CUDA' if bld.env.ENABLE_DISK: if bld.env.TARGET_LINUX: @@ -149,6 +149,7 @@ def configure(conf): pass conf.env.append_value('INCLUDES', os.path.join(conf.options.cuda_path, 'include')) + conf.env.append_value('LIBPATH', os.path.join(conf.options.cuda_path, 'targets', 'x86_64-linux', 'lib')) # # Windows/Linux/OS X specific @@ -561,7 +562,9 @@ def configure(conf): lib=deps, uselib_store='BOOST_PROCESS') - conf.check(header_name='nvjpeg2k.h', mandatory=False) + if conf.check(header_name='nvjpeg2k.h', mandatory=False): + conf.env.LIB_CUDA = ['cudart', 'nvjpeg2k'] + # Other stuff |
