Hacks to multi-thread GPU encoding.

author Carl Hetherington <cth@carlh.net>

Sun, 23 Aug 2020 18:27:33 +0000 (20:27 +0200)

committer Carl Hetherington <cth@carlh.net>

Sun, 13 Sep 2020 18:23:29 +0000 (20:23 +0200)
author Carl Hetherington <cth@carlh.net>
Sun, 23 Aug 2020 18:27:33 +0000 (20:27 +0200)
committer Carl Hetherington <cth@carlh.net>
Sun, 13 Sep 2020 18:23:29 +0000 (20:23 +0200)
diff --git a/src/lib/j2k_encoder.cc b/src/lib/j2k_encoder.cc

index 82e9ba76c7b28a12868f98f4cbe6faab09dd34a5..b40f5c95010255d3b1f6daf6052a460429c96443 100644 (file)
--- a/src/lib/j2k_encoder.cc
+++ b/src/lib/j2k_encoder.cc
@@ -38,14 +38,17 @@
  #include "encode_server_finder.h"
  #include "player.h"
  #include "player_video.h"
+#include "timer.h"
  #include "encode_server_description.h"
  #include "compose.hpp"
  #include <libcxml/cxml.h>
  #include <boost/foreach.hpp>
+#include <boost/thread/barrier.hpp>
  #include <iostream>
  
  #include "i18n.h"
  
+using std::min;
  using std::list;
  using std::cout;
  using std::exception;
@@ -56,6 +59,9 @@ using boost::optional;
  using dcp::Data;
  using namespace dcpomatic;
  
+/* XXX */
+#define GPU_THREADS 4
+
  /** @param film Film that we are encoding.
   *  @param writer Writer that we are using.
   */
@@ -64,6 +70,7 @@ J2KEncoder::J2KEncoder (shared_ptr<const Film> film, shared_ptr<Writer> writer)
         , _history (200)
         , _cpu_backend (new J2KEncoderCPUBackend())
         , _writer (writer)
+       , _gpu_barrier (GPU_THREADS)
  {
         servers_list_changed ();
  }
@@ -194,7 +201,8 @@ J2KEncoder::encode (shared_ptr<PlayerVideo> pv, DCPTime time)
         /* Wait until the queue has gone down a bit.  Allow one thing in the queue even
            when there are no threads.
         */
-       while (_queue.size() >= (threads * 2) + 1) {
+       /* XXX: needs correct handling for GPU */
+       while (_queue.size() >= (threads * 2) + 1 + 48) {
                 LOG_TIMING ("decoder-sleep queue=%1 threads=%2", _queue.size(), threads);
                 _full_condition.wait (queue_lock);
                 LOG_TIMING ("decoder-wake queue=%1 threads=%2", _queue.size(), threads);
@@ -218,11 +226,14 @@ J2KEncoder::encode (shared_ptr<PlayerVideo> pv, DCPTime time)
                 LOG_DEBUG_ENCODE("Frame @ %1 J2K", to_string(time));
                 /* This frame already has J2K data, so just write it */
                 _writer->write (pv->j2k(), position, pv->eyes ());
+               /*
         } else if (_last_player_video[pv->eyes()] && _writer->can_repeat(position) && pv->same (_last_player_video[pv->eyes()])) {
                 LOG_DEBUG_ENCODE("Frame @ %1 REPEAT", to_string(time));
                 _writer->repeat (position, pv->eyes ());
+               */
         } else {
                 LOG_DEBUG_ENCODE("Frame @ %1 ENCODE", to_string(time));
+               std::cout << "queue " << to_string(time) << "\n";
                 /* Queue this new frame for encoding */
                 LOG_TIMING ("add-frame-to-queue queue=%1", _queue.size ());
                 _queue.push_back (shared_ptr<DCPVideo> (
@@ -276,13 +287,14 @@ try
  
                 LOG_TIMING ("encoder-sleep thread=%1", thread_id ());
                 boost::mutex::scoped_lock lock (_queue_mutex);
-               while (static_cast<int>(_queue.size()) < backend->quantity()) {
+               while (_queue.empty()) {
                         _empty_condition.wait (lock);
                 }
  
                 LOG_TIMING ("encoder-wake thread=%1 queue=%2", thread_id(), _queue.size());
                 vector<shared_ptr<DCPVideo> > video;
-               for (int i = 0; i < backend->quantity(); ++i) {
+               int to_do = min(static_cast<int>(_queue.size()), backend->quantity());
+               for (int i = 0; i < to_do; ++i) {
                         video.push_back (_queue.front());
                         _queue.pop_front();
                 }
@@ -292,7 +304,8 @@ try
                    block has thread interruption disabled.
                 */
                 {
-                       boost::this_thread::disable_interruption dis;
+                       // XXX HACK!!
+                       //boost::this_thread::disable_interruption dis;
  
                         LOG_TIMING ("encoder-pop thread=%1 frames=%2 eyes=%3", thread_id(), video.size());
  
@@ -301,11 +314,11 @@ try
                         vector<Data> encoded = backend->encode (video);
  
                         if (!encoded.empty()) {
-                               DCPOMATIC_ASSERT (static_cast<int>(encoded.size()) == backend->quantity());
-                               for (int i = 0; i < backend->quantity(); ++i) {
+                               DCPOMATIC_ASSERT (encoded.size() == video.size());
+                               for (size_t i = 0; i < video.size(); ++i) {
                                         _writer->write (encoded[i], video[i]->index(), video[i]->frame()->eyes());
                                 }
-                               frames_done (backend->quantity());
+                               frames_done (video.size());
                         } else {
                                 lock.lock ();
                                 LOG_GENERAL (N_("[%1] J2KEncoder thread pushes %2 frames back onto queue after failure"), thread_id(), video.size());
@@ -325,8 +338,16 @@ catch (boost::thread_interrupted& e) {
         /* Ignore these and just stop the thread */
         _full_condition.notify_all ();
  }
+catch (std::exception& e)
+{
+       std::cout << "encoder thread crashed " << e.what() << "\n";
+       store_current ();
+       /* Wake anything waiting on _full_condition so it can see the exception */
+       _full_condition.notify_all ();
+}
  catch (...)
  {
+       std::cout << "encoder thread crashed.\n";
         store_current ();
         /* Wake anything waiting on _full_condition so it can see the exception */
         _full_condition.notify_all ();
@@ -363,8 +384,10 @@ J2KEncoder::servers_list_changed ()
                 }
         }
  
-       shared_ptr<J2KEncoderFastvideoBackend> fastvideo(new J2KEncoderFastvideoBackend());
-       _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, fastvideo));
+       for (int i = 0; i < GPU_THREADS; ++i) {
+               shared_ptr<J2KEncoderFastvideoBackend> fastvideo(new J2KEncoderFastvideoBackend(_gpu_barrier));
+               _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, fastvideo));
+       }
  
         _writer->set_encoder_threads (_threads->size());
  }
diff --git a/src/lib/j2k_encoder.h b/src/lib/j2k_encoder.h

index cc782d96d949ba7bd76114191eb5e28af45edbad..9d73ded70a57a9cab6d9574eeeb79aa5cd83a250 100644 (file)
--- a/src/lib/j2k_encoder.h
+++ b/src/lib/j2k_encoder.h
@@ -107,6 +107,8 @@ private:
         boost::shared_ptr<PlayerVideo> _last_player_video[EYES_COUNT];
         boost::optional<dcpomatic::DCPTime> _last_player_video_time;
  
+       boost::barrier _gpu_barrier;
+
         boost::signals2::scoped_connection _server_found_connection;
  };
  
diff --git a/src/lib/j2k_encoder_fastvideo_backend.cc b/src/lib/j2k_encoder_fastvideo_backend.cc

index 17775510aaade40590100d5f6f5c2ff602e1b3c2..cf814f13cd212e72ffae2812c3c13320832ffad0 100644 (file)
--- a/src/lib/j2k_encoder_fastvideo_backend.cc
+++ b/src/lib/j2k_encoder_fastvideo_backend.cc
@@ -17,8 +17,9 @@ using dcp::Data;
  
  
  
-J2KEncoderFastvideoBackend::J2KEncoderFastvideoBackend ()
-       : _setup_done (false)
+J2KEncoderFastvideoBackend::J2KEncoderFastvideoBackend (boost::barrier& barrier)
+       : _barrier (barrier)
+       , _setup_done (false)
  {
         fastSdkParametersHandle_t sdk_parameters;
         fastStatus_t r = fastGetSdkParametersHandle(&sdk_parameters);
@@ -99,14 +100,18 @@ J2KEncoderFastvideoBackend::setup (dcp::Size size)
  vector<Data>
  J2KEncoderFastvideoBackend::encode (vector<shared_ptr<DCPVideo> > video)
  {
-       DCPOMATIC_ASSERT (static_cast<int>(video.size()) == quantity());
+       DCPOMATIC_ASSERT (static_cast<int>(video.size()) <= quantity());
         std::cout << "FV: " << video.size() << " from " << video.front()->index() << "\n";
+       // XXX: works most of the time, not sure how it helps (need to ask) but screws
+       // things up when we're only encoding one frame
+       //_barrier.wait();
  
         if (!_setup_done) {
                 setup (video.front()->frame()->out_size());
                 _setup_done = true;
         }
  
+       std::cout << "hello dolly.\n";
         BOOST_FOREACH (shared_ptr<DCPVideo> i, video) {
                 shared_ptr<dcpomatic::Image> image = i->frame()->image(boost::bind(&PlayerVideo::keep_xyz_or_rgb, _1), true, false);
                 if (i->frame()->colour_conversion()) {
@@ -122,6 +127,7 @@ J2KEncoderFastvideoBackend::encode (vector<shared_ptr<DCPVideo> > video)
                         /* XXX */
                 }
  
+               std::cout << "here we go.\n";
                 fastStatus_t r = fastImportFromHostCopy(
                         _adapter,
                         _xyz_buffer,
@@ -147,12 +153,7 @@ J2KEncoderFastvideoBackend::encode (vector<shared_ptr<DCPVideo> > video)
         }
  
  
-       int free_slots = 0;
-       fastEncoderJ2kFreeSlotsInBatch(_encoder, &free_slots);
-       DCPOMATIC_ASSERT (free_slots == 0);
-
         fastEncoderJ2kReport_t report;
-
         fastEncoderJ2kOutput_t output;
         int const max_j2k_size = 1024 * 1024 * 2;
         output.bufferSize = max_j2k_size;
diff --git a/src/lib/j2k_encoder_fastvideo_backend.h b/src/lib/j2k_encoder_fastvideo_backend.h

index abf6c43553b5603341f5a7ec31449071f743587e..d65e6e1b488deb168c9a707d8660448ac7594ed4 100644 (file)
--- a/src/lib/j2k_encoder_fastvideo_backend.h
+++ b/src/lib/j2k_encoder_fastvideo_backend.h
@@ -7,23 +7,25 @@
  #include <fastvideo_encoder_j2k.h>
  #include <fastvideo_sdk.h>
  #include <boost/thread.hpp>
+#include <boost/thread/barrier.hpp>
  
  
  class J2KEncoderFastvideoBackend : public J2KEncoderBackend
  {
  public:
-       J2KEncoderFastvideoBackend ();
+       J2KEncoderFastvideoBackend (boost::barrier& barrier);
         ~J2KEncoderFastvideoBackend ();
  
         std::vector<dcp::Data> encode (std::vector<boost::shared_ptr<DCPVideo> > video);
  
         int quantity () const {
-               return 16;
+               return 4;
         }
  
  private:
         void setup (dcp::Size size);
  
+       boost::barrier& _barrier;
         bool _setup_done;
         fastImportFromHostHandle_t _adapter;
         fastDeviceSurfaceBufferHandle_t _src_buffer;
author	Carl Hetherington <cth@carlh.net>
	Sun, 23 Aug 2020 18:27:33 +0000 (20:27 +0200)
committer	Carl Hetherington <cth@carlh.net>
	Sun, 13 Sep 2020 18:23:29 +0000 (20:23 +0200)
src/lib/j2k_encoder.cc		patch \| blob \| history
src/lib/j2k_encoder.h		patch \| blob \| history
src/lib/j2k_encoder_fastvideo_backend.cc		patch \| blob \| history
src/lib/j2k_encoder_fastvideo_backend.h		patch \| blob \| history