From: Carl Hetherington Date: Mon, 23 May 2022 13:25:43 +0000 (+0200) Subject: encoder using batching X-Git-Url: https://git.carlh.net/gitweb/?p=dcpomatic.git;a=commitdiff_plain;h=f01378133e187043f499fcd9c354e9321343c7be encoder using batching --- diff --git a/src/lib/cuda_j2k_frame_encoder.cc b/src/lib/cuda_j2k_frame_encoder.cc index 675b26f57..3726f63a7 100644 --- a/src/lib/cuda_j2k_frame_encoder.cc +++ b/src/lib/cuda_j2k_frame_encoder.cc @@ -30,10 +30,20 @@ #include +using std::make_pair; using std::vector; using boost::optional; +boost::mutex CUDAJ2KFrameEncoder::_mutex; +boost::condition CUDAJ2KFrameEncoder::_condition; +std::vector CUDAJ2KFrameEncoder::_queue; +std::map, dcp::ArrayData> CUDAJ2KFrameEncoder::_output; + +boost::optional CUDAJ2KFrameEncoder::_size; +boost::optional CUDAJ2KFrameEncoder::_resolution; + + CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder() { nvjpeg2kEncoderCreateSimple(&_encoder_handle); @@ -42,7 +52,9 @@ CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder() } -CUDAJ2KFrameEncoder::Frame::Frame(DCPVideo const& vf) +CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf) + : _index(vf.index()) + , _eyes(vf.eyes()) { auto xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2)); @@ -91,7 +103,9 @@ CUDAJ2KFrameEncoder::Frame::Frame(DCPVideo const& vf) } -CUDAJ2KFrameEncoder::Frame::Frame(Frame&& other) +CUDAJ2KFrameEncoder::Input::Input(Input&& other) + : _index(other._index) + , _eyes(other._eyes) { for (int i = 0; i < 3; ++i) { _pixel_data_d[i] = other._pixel_data_d[i]; @@ -106,7 +120,7 @@ CUDAJ2KFrameEncoder::Frame::Frame(Frame&& other) } -CUDAJ2KFrameEncoder::Frame::~Frame() +CUDAJ2KFrameEncoder::Input::~Input() { cudaFree(_pixel_data_d[0]); cudaFree(_pixel_data_d[1]); @@ -114,10 +128,12 @@ CUDAJ2KFrameEncoder::Frame::~Frame() } -vector +optional CUDAJ2KFrameEncoder::encode(DCPVideo const& vf) { - int constexpr BATCH_SIZE = 128; + auto input = Input(vf); + + boost::mutex::scoped_lock lm(_mutex); auto const size = vf.frame()->out_size(); DCPOMATIC_ASSERT(!_size || size == *_size); @@ -126,22 +142,27 @@ CUDAJ2KFrameEncoder::encode(DCPVideo const& vf) DCPOMATIC_ASSERT(!_resolution || vf.resolution() == *_resolution); _resolution = vf.resolution(); - if (_batch.size() < BATCH_SIZE) { - _batch.push_back(Frame(vf)); + _queue.push_back(std::move(input)); + if (_queue.size() < batch_size) { + std::cout << "queue is " << _queue.size() << " - waiting\n"; + _condition.wait(lm); + } else { + encode_queue(); + _condition.notify_all(); + } + + auto output = _output.find(make_pair(vf.index(), vf.eyes())); + if (output == _output.end()) { return {}; } - return flush(); + return output->second; } -vector -CUDAJ2KFrameEncoder::flush() +void +CUDAJ2KFrameEncoder::encode_queue() { - if (_batch.empty()) { - return {}; - } - nvjpeg2kImageComponentInfo_t info[3]; for (int i = 0; i < 3; ++i) { info[i].component_width = _size->width; @@ -176,11 +197,8 @@ CUDAJ2KFrameEncoder::flush() throw CUDAError("nvjpeg2kEncodeParamsSetQuality", status); } - vector output; - - for (auto const& frame: _batch) { - - auto x = frame.device_image(); + std::cout << "encoding queue of " << _queue.size() << "\n"; + for (auto const& frame: _queue) { status = nvjpeg2kEncode(_encoder_handle, _encoder_state, _encoder_params, frame.device_image(), 0); if (status != NVJPEG2K_STATUS_SUCCESS) { @@ -196,13 +214,11 @@ CUDAJ2KFrameEncoder::flush() throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status); } - output.push_back(this_output); + _output[make_pair(frame.index(), frame.eyes())] = this_output; cudaStreamSynchronize(0); } - _batch.clear(); - - return output; + _queue.clear(); } @@ -211,3 +227,12 @@ CUDAJ2KFrameEncoder::log_thread_start () { LOG_TIMING("start-encoder-thread thread=%1", thread_id()); } + + +void +CUDAJ2KFrameEncoder::flush() +{ + boost::mutex::scoped_lock lm(_mutex); + encode_queue(); + _condition.notify_all(); +} diff --git a/src/lib/cuda_j2k_frame_encoder.h b/src/lib/cuda_j2k_frame_encoder.h index 28e8a3c76..9a8666cf6 100644 --- a/src/lib/cuda_j2k_frame_encoder.h +++ b/src/lib/cuda_j2k_frame_encoder.h @@ -27,6 +27,9 @@ #include "types.h" #include #include +#include +#include +#include #include @@ -35,38 +38,56 @@ class CUDAJ2KFrameEncoder : public J2KFrameEncoder public: CUDAJ2KFrameEncoder(); - std::vector encode(DCPVideo const &) override; - std::vector flush() override; + boost::optional encode(DCPVideo const &) override; + void flush() override; void log_thread_start() override; + static int constexpr batch_size = 1; + private: + void encode_queue(); + nvjpeg2kEncoder_t _encoder_handle; nvjpeg2kEncodeState_t _encoder_state; nvjpeg2kEncodeParams_t _encoder_params; - class Frame + class Input { public: - Frame(DCPVideo const& vf); - Frame(Frame const& other) = delete; - Frame(Frame&& other); - ~Frame(); + Input(DCPVideo const& vf); + Input(Input const& other) = delete; + Input(Input&& other); + ~Input(); - Frame& operator=(Frame const& other) = delete; + Input& operator=(Input const& other) = delete; nvjpeg2kImage_t const* const device_image() const { return &_device_image; } + int index() const { + return _index; + } + + Eyes eyes() const { + return _eyes; + } + private: uint8_t* _pixel_data_d[3]; size_t _pitch_in_bytes[3]; nvjpeg2kImage_t _device_image; + int _index; + Eyes _eyes; }; - std::vector _batch; - boost::optional _size; - boost::optional _resolution; + static boost::mutex _mutex; + static boost::condition _condition; + static std::vector _queue; + static std::map, dcp::ArrayData> _output; + + static boost::optional _size; + static boost::optional _resolution; }; diff --git a/src/lib/j2k_encoder.cc b/src/lib/j2k_encoder.cc index 11b360b34..49f11377c 100644 --- a/src/lib/j2k_encoder.cc +++ b/src/lib/j2k_encoder.cc @@ -115,6 +115,10 @@ J2KEncoder::end () _full_condition.wait (lock); } + for (auto& worker: _workers) { + worker->flush(); + } + lock.unlock (); LOG_GENERAL_NC (N_("Terminating encoder threads")); @@ -378,9 +382,11 @@ J2KEncoder::servers_list_changed () } } #endif - auto worker = make_shared(); - _workers.push_back(worker); - _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, worker)); + for (int i = 0; i < CUDAJ2KFrameEncoder::batch_size; ++i) { + auto worker = make_shared(); + _workers.push_back(worker); + _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, worker)); + } _writer->set_encoder_threads (_threads->size()); } diff --git a/src/lib/j2k_frame_encoder.h b/src/lib/j2k_frame_encoder.h index 33f987653..c2779e238 100644 --- a/src/lib/j2k_frame_encoder.h +++ b/src/lib/j2k_frame_encoder.h @@ -37,6 +37,7 @@ public: virtual ~J2KFrameEncoder() {} virtual boost::optional encode (DCPVideo const &) = 0; + virtual void flush () {} virtual void log_thread_start () = 0; };