From f5f14a6422ddd68a52dd14686c1bd49159dbaa74 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Wed, 12 Aug 2020 01:12:31 +0200 Subject: [PATCH] wip: hacks which at least get GPU-decoded image on screen --- src/lib/butler.cc | 53 +---- src/lib/butler.h | 10 +- src/lib/cpu_player_video_preparer.cc | 58 ++++++ src/lib/cpu_player_video_preparer.h | 33 +++ src/lib/fastvideo.cc | 2 +- src/lib/fastvideo_player_video_preparer.cc | 221 +++++++++++++++++++++ src/lib/fastvideo_player_video_preparer.h | 49 +++++ src/lib/j2k_image_proxy.cc | 30 ++- src/lib/j2k_image_proxy.h | 5 + src/lib/player_video.h | 5 + src/lib/player_video_preparer.h | 17 +- src/lib/wscript | 3 +- test/fastvideo_test.cc | 59 +++++- 13 files changed, 479 insertions(+), 66 deletions(-) create mode 100644 src/lib/cpu_player_video_preparer.cc create mode 100644 src/lib/cpu_player_video_preparer.h create mode 100644 src/lib/fastvideo_player_video_preparer.cc create mode 100644 src/lib/fastvideo_player_video_preparer.h diff --git a/src/lib/butler.cc b/src/lib/butler.cc index 39da0bd5c..6b730236d 100644 --- a/src/lib/butler.cc +++ b/src/lib/butler.cc @@ -26,6 +26,8 @@ #include "cross.h" #include "compose.hpp" #include "exceptions.h" +#include "cpu_player_video_preparer.h" +#include "fastvideo_player_video_preparer.h" #include #include @@ -44,9 +46,9 @@ using namespace boost::placeholders; #endif /** Minimum video readahead in frames */ -#define MINIMUM_VIDEO_READAHEAD 10 +#define MINIMUM_VIDEO_READAHEAD 64 /** Maximum video readahead in frames; should never be exceeded (by much) unless there are bugs in Player */ -#define MAXIMUM_VIDEO_READAHEAD 48 +#define MAXIMUM_VIDEO_READAHEAD 128 /** Minimum audio readahead in frames */ #define MINIMUM_AUDIO_READAHEAD (48000 * MINIMUM_VIDEO_READAHEAD / 24) /** Maximum audio readahead in frames; should never be exceeded (by much) unless there are bugs in Player */ @@ -66,7 +68,6 @@ Butler::Butler ( bool fast ) : _player (player) - , _prepare_work (new boost::asio::io_service::work (_prepare_service)) , _pending_seek_accurate (false) , _suspended (0) , _finished (false) @@ -79,6 +80,8 @@ Butler::Butler ( , _aligned (aligned) , _fast (fast) { + _preparer.reset (new FastvideoPlayerVideoPreparer(pixel_format, aligned, fast)); + _player_video_connection = _player->Video.connect (bind (&Butler::video, this, _1, _2)); _player_audio_connection = _player->Audio.connect (bind (&Butler::audio, this, _1, _2, _3)); _player_text_connection = _player->Text.connect (bind (&Butler::text, this, _1, _2, _3, _4)); @@ -90,16 +93,6 @@ Butler::Butler ( #ifdef DCPOMATIC_LINUX pthread_setname_np (_thread.native_handle(), "butler"); #endif - - /* Create some threads to do work on the PlayerVideos we are creating; at present this is used to - multi-thread JPEG2000 decoding. - */ - - LOG_TIMING("start-prepare-threads %1", boost::thread::hardware_concurrency() * 2); - - for (size_t i = 0; i < boost::thread::hardware_concurrency() * 2; ++i) { - _prepare_pool.create_thread (bind (&boost::asio::io_service::run, &_prepare_service)); - } } Butler::~Butler () @@ -111,14 +104,12 @@ Butler::~Butler () _stop_thread = true; } - _prepare_work.reset (); - _prepare_pool.join_all (); - _prepare_service.stop (); - _thread.interrupt (); try { _thread.join (); } catch (...) {} + + _preparer.reset (); } /** Caller must hold a lock on _mutex */ @@ -297,32 +288,6 @@ Butler::seek_unlocked (DCPTime position, bool accurate) _summon.notify_all (); } -void -Butler::prepare (weak_ptr weak_video) -try -{ - shared_ptr video = weak_video.lock (); - /* If the weak_ptr cannot be locked the video obviously no longer requires any work */ - if (video) { - LOG_TIMING("start-prepare in %1", thread_id()); - video->prepare (_pixel_format, _aligned, _fast); - LOG_TIMING("finish-prepare in %1", thread_id()); - } -} -catch (std::exception& e) -{ - store_current (); - boost::mutex::scoped_lock lm (_mutex); - _died = true; - _died_message = e.what (); -} -catch (...) -{ - store_current (); - boost::mutex::scoped_lock lm (_mutex); - _died = true; -} - void Butler::video (shared_ptr video, DCPTime time) { @@ -333,7 +298,7 @@ Butler::video (shared_ptr video, DCPTime time) return; } - _prepare_service.post (bind (&Butler::prepare, this, weak_ptr(video))); + _preparer->request (video); _video.put (video, time); } diff --git a/src/lib/butler.h b/src/lib/butler.h index e13843c90..f389ebc88 100644 --- a/src/lib/butler.h +++ b/src/lib/butler.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2017 Carl Hetherington + Copyright (C) 2016-2020 Carl Hetherington This file is part of DCP-o-matic. @@ -28,10 +28,10 @@ #include #include #include -#include class Player; class PlayerVideo; +class PlayerVideoPreparer; class Butler : public ExceptionStore, public boost::noncopyable { @@ -93,10 +93,6 @@ private: AudioRingBuffers _audio; TextRingBuffers _closed_caption; - boost::thread_group _prepare_pool; - boost::asio::io_service _prepare_service; - boost::shared_ptr _prepare_work; - /** mutex to protect _pending_seek_position, _pending_seek_accurate, _finished, _died, _stop_thread */ boost::mutex _mutex; boost::condition _summon; @@ -123,6 +119,8 @@ private: */ boost::optional _awaiting; + boost::shared_ptr _preparer; + boost::signals2::scoped_connection _player_video_connection; boost::signals2::scoped_connection _player_audio_connection; boost::signals2::scoped_connection _player_text_connection; diff --git a/src/lib/cpu_player_video_preparer.cc b/src/lib/cpu_player_video_preparer.cc new file mode 100644 index 000000000..80cb5bc91 --- /dev/null +++ b/src/lib/cpu_player_video_preparer.cc @@ -0,0 +1,58 @@ +#include "cpu_player_video_preparer.h" +#include "cross.h" +#include "dcpomatic_log.h" +#include "player_video.h" +#include + + +using boost::bind; +using boost::shared_ptr; +using boost::weak_ptr; + + +CPUPlayerVideoPreparer::CPUPlayerVideoPreparer (boost::function pixel_format, bool aligned, bool fast) + : _work (new boost::asio::io_service::work(_service)) + , _pixel_format (pixel_format) + , _aligned (aligned) + , _fast (fast) +{ + LOG_TIMING("start-prepare-threads %1", boost::thread::hardware_concurrency() * 2); + + for (size_t i = 0; i < boost::thread::hardware_concurrency() * 2; ++i) { + _pool.create_thread (bind (&boost::asio::io_service::run, &_service)); + } +} + + +CPUPlayerVideoPreparer::~CPUPlayerVideoPreparer () +{ + _work.reset (); + _pool.join_all (); + _service.stop (); +} + + +void +CPUPlayerVideoPreparer::prepare (weak_ptr weak_video) +try +{ + shared_ptr video = weak_video.lock (); + /* If the weak_ptr cannot be locked the video obviously no longer requires any work */ + if (video) { + LOG_TIMING("start-prepare in %1", thread_id()); + video->prepare (_pixel_format, _aligned, _fast); + LOG_TIMING("finish-prepare in %1", thread_id()); + } +} +catch (...) +{ + store_current (); +} + + +void +CPUPlayerVideoPreparer::request (shared_ptr pv) +{ + _service.post (bind(&CPUPlayerVideoPreparer::prepare, this, weak_ptr(pv))); +} + diff --git a/src/lib/cpu_player_video_preparer.h b/src/lib/cpu_player_video_preparer.h new file mode 100644 index 000000000..7cb46dc10 --- /dev/null +++ b/src/lib/cpu_player_video_preparer.h @@ -0,0 +1,33 @@ +#ifndef DCPOMATIC_CPU_PLAYER_VIDEO_PREPARER_H +#define DCPOMATIC_CPU_PLAYER_VIDEO_PREPARER_H + +#include "player_video_preparer.h" +extern "C" { +#include +} +#include +#include +#include + + +class CPUPlayerVideoPreparer : public PlayerVideoPreparer +{ +public: + CPUPlayerVideoPreparer (boost::function pixel_format, bool aligned, bool fast); + ~CPUPlayerVideoPreparer (); + + void request (boost::shared_ptr pv); + +private: + void prepare (boost::weak_ptr pv); + + boost::thread_group _pool; + boost::asio::io_service _service; + boost::shared_ptr _work; + + boost::function _pixel_format; + bool _aligned; + bool _fast; +}; + +#endif diff --git a/src/lib/fastvideo.cc b/src/lib/fastvideo.cc index 2b236bd9b..8e67f4b26 100644 --- a/src/lib/fastvideo.cc +++ b/src/lib/fastvideo.cc @@ -129,7 +129,7 @@ fastvideo_decompress_j2k (dcp::Data data, int reduce) &decoder, ¶meters, FAST_RGB8, info.width, info.height, - max_batch_si\e, + max_batch_size, &buffer ); if (r != FAST_OK) { diff --git a/src/lib/fastvideo_player_video_preparer.cc b/src/lib/fastvideo_player_video_preparer.cc new file mode 100644 index 000000000..9d694df14 --- /dev/null +++ b/src/lib/fastvideo_player_video_preparer.cc @@ -0,0 +1,221 @@ +#include "dcpomatic_assert.h" +#include "exceptions.h" +#include "image.h" +#include "image_proxy.h" +#include "fastvideo_player_video_preparer.h" +#include "j2k_image_proxy.h" +#include "player_video.h" +#include +#include +#include + + +using boost::bind; +using boost::const_pointer_cast; +using boost::dynamic_pointer_cast; +using boost::shared_ptr; +using boost::weak_ptr; + + +FastvideoPlayerVideoPreparer::FastvideoPlayerVideoPreparer (boost::function pixel_format, bool aligned, bool fast) + : _stop_thread (false) + , _decoder (0) + , _setup_done (false) + , _cpu (pixel_format, aligned, fast) +{ + fastSdkParametersHandle_t sdk_parameters; + fastStatus_t r = fastGetSdkParametersHandle(&sdk_parameters); + if (r != FAST_OK) { + throw FastvideoError ("GetSdkParametersHandle", r); + } + r = fastDecoderJ2kLibraryInit(sdk_parameters); + if (r != FAST_OK) { + throw FastvideoError ("DecoderJ2kLibraryInit", r); + } + + _thread = boost::thread (bind(&FastvideoPlayerVideoPreparer::thread, this)); +} + + +FastvideoPlayerVideoPreparer::~FastvideoPlayerVideoPreparer () +{ + _stop_thread = true; + _work.notify_all (); + try { + _thread.join (); + } catch (...) {} + + if (_setup_done) { + fastDecoderJ2kDestroy(_decoder); + fastExportToHostDestroy(_adapter); + } +} + + +void +FastvideoPlayerVideoPreparer::request (shared_ptr pv) +{ + boost::mutex::scoped_lock lm (_mutex); + _queue.push (weak_ptr(pv)); + _work.notify_all (); +} + + +void +FastvideoPlayerVideoPreparer::thread () +{ + while (true) { + boost::mutex::scoped_lock lm (_mutex); + while (!_stop_thread && _queue.empty()) { + _work.wait (lm); + } + + if (_stop_thread) { + return; + } + + weak_ptr weak = _queue.front (); + _queue.pop (); + + lm.unlock (); + shared_ptr pv = weak.lock (); + if (pv) { + /* We're going to "prepare" just the image proxy here (if it interests us i.e. if it's J2K, + * then hand over the rest of the work to a CPUPlayerVideoPreparer. + */ + shared_ptr ip = pv->image_proxy (); + shared_ptr jp = dynamic_pointer_cast (ip); + if (jp) { + send_to_gpu (pv, jp); + } else { + _cpu.request (pv); + } + } + } +} + + +void +FastvideoPlayerVideoPreparer::send_to_gpu (shared_ptr pv, shared_ptr proxy) +{ + if (!_setup_done) { + std::cout << "setup.\n"; + setup (proxy->j2k()); + _setup_done = true; + } + + std::cout << "add to batch.\n"; + fastDecoderJ2kAddImageToBatch(_decoder, proxy->j2k().data().get(), proxy->j2k().size()); + _batch.push_back (pv); + + int free_slots = 0; + fastDecoderJ2kFreeSlotsInBatch(_decoder, &free_slots); + std::cout << free_slots << " left in batch.\n"; + if (free_slots == 0) { + /* Do some decoding */ + transform_and_extract (); + } + + /* XXX: maybe should hoover up any left-overs at some point */ +} + + +void +FastvideoPlayerVideoPreparer::transform_and_extract () +{ + fastDecoderJ2kReport_t report; + fastStatus_t r = fastDecoderJ2kTransformBatch(_decoder, &report); + if (r != FAST_OK) { + throw FastvideoError ("DecoderJ2kTransformBatch", r); + } + + size_t index = 0; + int images_left; + do { + DCPOMATIC_ASSERT (index <= _batch.size()); + shared_ptr pv = _batch[index].lock(); + if (pv) { + std::cout << "got a pv for batch number " << index << "\n"; + /* XXX: this should be memlocked or whatever fastMalloc does */ + shared_ptr proxy = const_pointer_cast(dynamic_pointer_cast(pv->image_proxy())); + DCPOMATIC_ASSERT (proxy); + dcp::Size const size = proxy->size(); + shared_ptr image(new dcpomatic::Image(AV_PIX_FMT_RGB24, size, true)); + fastExportParameters_t export_parameters; + export_parameters.convert = FAST_CONVERT_NONE; + fastStatus_t r = fastExportToHostCopy(_adapter, image->data()[0], size.width, image->stride()[0], size.height, &export_parameters); + if (r != FAST_OK) { + throw FastvideoError ("ExportToHostCopy", r); + } + + std::cout << "setting the proxy's image.\n"; + proxy->set_image (image); + + _cpu.request (pv); + } + + fastStatus_t r = fastDecoderJ2kGetNextDecodedImage(_decoder, &report, &images_left); + std::cout << images_left << " images left.\n"; + if (r != FAST_OK) { + throw FastvideoError ("DecoderJ2kGetNextDecodedImage", r); + } + + ++index; + + } while (images_left); + + _batch.clear (); +} + + +void +FastvideoPlayerVideoPreparer::setup (dcp::Data sample) +{ + fastJ2kImageInfo_t info; + fastStatus_t r = fastDecoderJ2kPredecode(&info, sample.data().get(), sample.size()); + if (r != FAST_OK) { + throw FastvideoError ("DecoderJ2kPredecode", r); + } + + fastDecoderJ2kStaticParameters_t parameters; + memset(¶meters, 0, sizeof(fastDecoderJ2kStaticParameters_t)); + + parameters.ResolutionLevels = 0; + parameters.verboseLevel = 1; + parameters.enableROI = 0; + + parameters.maxTileWidth = info.width; + parameters.maxTileHeight = info.height; + + parameters.windowX0 = 0; + parameters.windowY0 = 0; + parameters.windowWidth = info.width; + parameters.windowHeight = info.height; + + parameters.truncationLength = 0; + parameters.truncationMode = 0; + parameters.truncationRate = 0; + + parameters.DecodePasses = 0; + parameters.imageInfo = &info; + parameters.maxStreamSize = max_stream_size; + + r = fastDecoderJ2kCreate( + &_decoder, + ¶meters, + FAST_RGB8, info.width, info.height, + max_batch_size, + &_buffer + ); + + if (r != FAST_OK) { + fastTraceClose (); + throw FastvideoError ("J2kCreate", r); + } + + fastSurfaceFormat_t surface_format = FAST_RGB8; + r = fastExportToHostCreate(&_adapter, &surface_format, _buffer); + if (r != FAST_OK) { + throw FastvideoError ("ExportToHostCreate"); + } +} diff --git a/src/lib/fastvideo_player_video_preparer.h b/src/lib/fastvideo_player_video_preparer.h new file mode 100644 index 000000000..384d8edeb --- /dev/null +++ b/src/lib/fastvideo_player_video_preparer.h @@ -0,0 +1,49 @@ +#include "cpu_player_video_preparer.h" +#include "player_video_preparer.h" +#include +#include +#include +#include +#include +#include +#include +#include + + +class J2KImageProxy; + + +class FastvideoPlayerVideoPreparer : public PlayerVideoPreparer +{ +public: + FastvideoPlayerVideoPreparer (boost::function pixel_format, bool aligned, bool fast); + ~FastvideoPlayerVideoPreparer (); + + void request (boost::shared_ptr pv); + +private: + void thread (); + void setup (dcp::Data sample); + void send_to_gpu (boost::shared_ptr pv, boost::shared_ptr proxy); + void transform_and_extract (); + + std::queue > _queue; + boost::mutex _mutex; + boost::condition _work; + + boost::atomic _stop_thread; + boost::thread _thread; + + fastDecoderJ2kHandle_t _decoder; + fastDeviceSurfaceBufferHandle_t _buffer; + fastExportToHostHandle_t _adapter; + bool _setup_done; + + std::vector > _batch; + + CPUPlayerVideoPreparer _cpu; + + static const int max_batch_size = 16; + static const int max_stream_size = 1302083; +}; + diff --git a/src/lib/j2k_image_proxy.cc b/src/lib/j2k_image_proxy.cc index c8a561d25..18d597e50 100644 --- a/src/lib/j2k_image_proxy.cc +++ b/src/lib/j2k_image_proxy.cc @@ -127,9 +127,23 @@ J2KImageProxy::prepare (optional target_size) const { boost::mutex::scoped_lock lm (_mutex); - if (_image && target_size == _target_size) { - DCPOMATIC_ASSERT (_reduce); - return *_reduce; + if (target_size) { + std::cout << "target_size is " << target_size->width << " " << target_size->height << "\n"; + } else { + std::cout << "no target size.\n"; + } + + if (_target_size) { + std::cout << "_target_size is " << _target_size->width << " " << _target_size->height << "\n"; + } else { + std::cout << "no _target size.\n"; + } + + if (_image) {// && target_size == _target_size) { + std::cout << "hit.\n"; + return 0; + //DCPOMATIC_ASSERT (_reduce); + //return *_reduce; } int reduce = 0; @@ -146,6 +160,7 @@ J2KImageProxy::prepare (optional target_size) const } try { + std::cout << "cpu decompress.\n"; shared_ptr decompressed = dcp::decompress_j2k (_data, reduce); _image.reset (new Image (_pixel_format, decompressed->size(), true)); @@ -247,3 +262,12 @@ J2KImageProxy::memory_used () const } return m; } + + +void +J2KImageProxy::set_image (shared_ptr image) +{ + // XXX need _target_size to be set up + boost::mutex::scoped_lock lm (_mutex); + _image = image; +} diff --git a/src/lib/j2k_image_proxy.h b/src/lib/j2k_image_proxy.h index 78e0cbd9b..e9cb69f7a 100644 --- a/src/lib/j2k_image_proxy.h +++ b/src/lib/j2k_image_proxy.h @@ -58,7 +58,12 @@ public: void write_to_socket (boost::shared_ptr) const; /** @return true if our image is definitely the same as another, false if it is probably not */ bool same (boost::shared_ptr) const; + + /* these are kind of related; one prepares by calculating the image in-place, + * one by being given the image; they could be renamed, perhaps. + */ int prepare (boost::optional = boost::optional()) const; + void set_image (boost::shared_ptr image); dcp::Data j2k () const { return _data; diff --git a/src/lib/player_video.h b/src/lib/player_video.h index 9fd313a15..464bb4894 100644 --- a/src/lib/player_video.h +++ b/src/lib/player_video.h @@ -82,6 +82,7 @@ public: bool reset_metadata (boost::shared_ptr film, dcp::Size video_container_size, dcp::Size film_frame_size); bool has_j2k () const; + /* XXX: who's using this?! */ dcp::Data j2k () const; Eyes eyes () const { @@ -116,6 +117,10 @@ public: return _error; } + boost::shared_ptr image_proxy () const { + return _in; + } + private: void make_image (boost::function pixel_format, bool aligned, bool fast) const; diff --git a/src/lib/player_video_preparer.h b/src/lib/player_video_preparer.h index c16463477..821d9794c 100644 --- a/src/lib/player_video_preparer.h +++ b/src/lib/player_video_preparer.h @@ -1,14 +1,17 @@ -class PlayerVideoPreparer -{ -public: - virtual void request (boost::shared_ptr pv) = 0; -}; +#ifndef DCPOMATIC_PLAYER_VIDEO_PREPARER_H +#define DCPOMATIC_PLAYER_VIDEO_PREPARER_H + +#include "exception_store.h" +#include +class PlayerVideo; -class CPUPlayerVideoPreparer : public PlayerVideoPreparer +class PlayerVideoPreparer : public ExceptionStore { public: - void request (boost::shared_ptr pv); + virtual ~PlayerVideoPreparer () {} + virtual void request (boost::shared_ptr pv) = 0; }; +#endif diff --git a/src/lib/wscript b/src/lib/wscript index bbf72a038..3b14f8738 100644 --- a/src/lib/wscript +++ b/src/lib/wscript @@ -56,6 +56,7 @@ sources = """ content.cc content_factory.cc copy_dcp_details_to_film.cc + cpu_player_video_preparer.cc create_cli.cc cross_common.cc crypto.cc @@ -229,7 +230,7 @@ def build(bld): if bld.env.STATIC_DCPOMATIC: obj.uselib += ' XMLPP' if bld.env.ENABLE_FASTVIDEO: - obj.source += ' fastvideo.cc' + obj.source += ' fastvideo_player_video_preparer.cc' obj.target = 'dcpomatic2' diff --git a/test/fastvideo_test.cc b/test/fastvideo_test.cc index 65ea1bb7e..ba78900cb 100644 --- a/test/fastvideo_test.cc +++ b/test/fastvideo_test.cc @@ -1,10 +1,61 @@ -#include "lib/fastvideo.h" +#include "lib/colour_conversion.h" +#include "lib/content.h" +#include "lib/cross.h" +#include "lib/image.h" +#include "lib/fastvideo_player_video_preparer.h" +#include "lib/j2k_image_proxy.h" +#include "lib/player_video.h" +#include "lib/types.h" #include "test.h" +#include +#include #include +#include -BOOST_AUTO_TEST_CASE (fastvideo_decoder_test) + +using std::vector; +using boost::optional; +using boost::shared_ptr; +using boost::weak_ptr; + + +static +AVPixelFormat +pixel_format (AVPixelFormat) { - dcp::Data data ("test/data/sizing_card_flat.j2k"); - fastvideo_decompress_j2k (data, 0); + return AV_PIX_FMT_RGB24; +} + + +BOOST_AUTO_TEST_CASE (fastvideo_preparer_setup_teardown_test) +{ + FastvideoPlayerVideoPreparer preparer (&pixel_format, true, true); +} + + +BOOST_AUTO_TEST_CASE (fastvideo_preparer_simple_decode_test) +{ + FastvideoPlayerVideoPreparer preparer (&pixel_format, true, true); + + shared_ptr proxy(new J2KImageProxy("test/data/sizing_card_flat.j2k", dcp::Size(1998, 1080), AV_PIX_FMT_XYZ12)); + + vector > videos; + + for (int i = 0; i < 16; ++i) { + shared_ptr pv( + new PlayerVideo( + proxy, Crop(), optional(), dcp::Size(1998, 1080), dcp::Size(1998, 1080), + EYES_BOTH, PART_WHOLE, optional(), VIDEO_RANGE_FULL, weak_ptr(), optional(), false + ) + ); + + preparer.request (pv); + videos.push_back (pv); + } + + dcpomatic_sleep_seconds (5); + + std::cout << "____-> get an image.\n"; + videos[0]->image_proxy()->image().image->convert_pixel_format(dcp::YUV_TO_RGB_REC709, AV_PIX_FMT_RGBA, true, true)->as_png().write("/home/carl/foo.png"); } -- 2.30.2