Rearrange encoder threading.
[dcpomatic.git] / src / lib / j2k_encoder.cc
index 5c3fd477ef16dfeefb63d90bdb13bdbdd156bd0a..8c7a1ef1be60c8c69babcc68d9bb03a450eb315c 100644 (file)
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2012-2019 Carl Hetherington <cth@carlh.net>
+    Copyright (C) 2012-2021 Carl Hetherington <cth@carlh.net>
 
     This file is part of DCP-o-matic.
 
 
 */
 
+
 /** @file src/j2k_encoder.cc
  *  @brief J2K encoder class.
  */
 
-#include "j2k_encoder.h"
-#include "util.h"
-#include "film.h"
-#include "log.h"
-#include "dcpomatic_log.h"
+
+#include "compose.hpp"
 #include "config.h"
-#include "dcp_video.h"
 #include "cross.h"
-#include "writer.h"
+#include "dcp_video.h"
+#include "dcpomatic_log.h"
+#include "encode_server_description.h"
 #include "encode_server_finder.h"
-#include "player.h"
+#include "film.h"
+#include "cpu_j2k_encoder_thread.h"
+#include "grok_j2k_encoder_thread.h"
+#include "remote_j2k_encoder_thread.h"
+#include "j2k_encoder.h"
+#include "log.h"
 #include "player_video.h"
-#include "encode_server_description.h"
-#include "compose.hpp"
+#include "util.h"
+#include "writer.h"
 #include <libcxml/cxml.h>
-#include <boost/foreach.hpp>
 #include <iostream>
 
 #include "i18n.h"
 
-using std::list;
+
 using std::cout;
+using std::dynamic_pointer_cast;
 using std::exception;
-using boost::shared_ptr;
-using boost::weak_ptr;
+using std::list;
+using std::make_shared;
+using std::shared_ptr;
+using std::weak_ptr;
 using boost::optional;
 using dcp::Data;
 using namespace dcpomatic;
 
+
+static grk_plugin::GrokInitializer grokInitializer;
+
 /** @param film Film that we are encoding.
  *  @param writer Writer that we are using.
  */
-J2KEncoder::J2KEncoder (shared_ptr<const Film> film, shared_ptr<Writer> writer)
+J2KEncoder::J2KEncoder(shared_ptr<const Film> film, Writer& writer)
        : _film (film)
        , _history (200)
        , _writer (writer)
+       , _dcpomatic_context(film, writer, _history, Config::instance()->gpu_binary_location())
+       , _context(Config::instance()->enable_gpu() ? new grk_plugin::GrokContext(_dcpomatic_context) : nullptr)
 {
-       servers_list_changed ();
 }
 
+
 J2KEncoder::~J2KEncoder ()
 {
-       try {
-               terminate_threads ();
-       } catch (...) {
-               /* Destructors must not throw exceptions; anything bad
-                  happening now is too late to worry about anyway,
-                  I think.
-               */
-       }
+       _server_found_connection.disconnect();
+
+       terminate_threads();
+
+       delete _context;
 }
 
+
+void
+J2KEncoder::servers_list_changed()
+{
+       auto config = Config::instance();
+
+       auto const cpu = (config->enable_gpu() || config->only_servers_encode()) ? 0 : config->master_encoding_threads();
+       auto const gpu = config->enable_gpu() ? config->master_encoding_threads() : 0;
+
+       remake_threads(cpu, gpu, EncodeServerFinder::instance()->servers());
+}
+
+
 void
 J2KEncoder::begin ()
 {
-       weak_ptr<J2KEncoder> wp = shared_from_this ();
-       _server_found_connection = EncodeServerFinder::instance()->ServersListChanged.connect (
-               boost::bind (&J2KEncoder::call_servers_list_changed, wp)
+       _server_found_connection = EncodeServerFinder::instance()->ServersListChanged.connect(
+               boost::bind(&J2KEncoder::servers_list_changed, this)
                );
+       servers_list_changed ();
 }
 
-/* We don't want the servers-list-changed callback trying to do things
-   during destruction of J2KEncoder, and I think this is the neatest way
-   to achieve that.
-*/
+
 void
-J2KEncoder::call_servers_list_changed (weak_ptr<J2KEncoder> encoder)
+J2KEncoder::pause()
 {
-       shared_ptr<J2KEncoder> e = encoder.lock ();
-       if (e) {
-               e->servers_list_changed ();
+       if (!Config::instance()->enable_gpu()) {
+               return;
        }
+
+       terminate_threads ();
+
+       /* Something might have been thrown during terminate_threads */
+       rethrow ();
+
+       delete _context;
+       _context = nullptr;
 }
 
+
+void J2KEncoder::resume()
+{
+       if (!Config::instance()->enable_gpu()) {
+               return;
+       }
+
+       _context = new grk_plugin::GrokContext(_dcpomatic_context);
+       servers_list_changed();
+}
+
+
 void
-J2KEncoder::end ()
+J2KEncoder::end()
 {
        boost::mutex::scoped_lock lock (_queue_mutex);
 
@@ -106,16 +143,17 @@ J2KEncoder::end ()
        /* Keep waking workers until the queue is empty */
        while (!_queue.empty ()) {
                rethrow ();
-               _empty_condition.notify_all ();
                _full_condition.wait (lock);
        }
-
        lock.unlock ();
 
        LOG_GENERAL_NC (N_("Terminating encoder threads"));
 
        terminate_threads ();
 
+       /* Something might have been thrown during terminate_threads */
+       rethrow ();
+
        LOG_GENERAL (N_("Mopping up %1"), _queue.size());
 
        /* The following sequence of events can occur in the above code:
@@ -126,22 +164,33 @@ J2KEncoder::end ()
 
             So just mop up anything left in the queue here.
        */
-
-       for (list<shared_ptr<DCPVideo> >::iterator i = _queue.begin(); i != _queue.end(); ++i) {
-               LOG_GENERAL (N_("Encode left-over frame %1"), (*i)->index ());
-               try {
-                       _writer->write (
-                               (*i)->encode_locally(),
-                               (*i)->index(),
-                               (*i)->eyes()
-                               );
-                       frame_done ();
-               } catch (std::exception& e) {
-                       LOG_ERROR (N_("Local encode failed (%1)"), e.what ());
+       for (auto & i: _queue) {
+               if (Config::instance()->enable_gpu ()) {
+                       if (!_context->scheduleCompress(i)){
+                               LOG_GENERAL (N_("[%1] J2KEncoder thread pushes frame %2 back onto queue after failure"), thread_id(), i.index());
+                               // handle error
+                       }
+               }
+               else {
+                       LOG_GENERAL(N_("Encode left-over frame %1"), i.index());
+                       try {
+                               _writer.write(
+                                       make_shared<dcp::ArrayData>(i.encode_locally()),
+                                       i.index(),
+                                       i.eyes()
+                                       );
+                               frame_done ();
+                       } catch (std::exception& e) {
+                               LOG_ERROR (N_("Local encode failed (%1)"), e.what ());
+                       }
                }
        }
+
+       delete _context;
+       _context = nullptr;
 }
 
+
 /** @return an estimate of the current number of frames we are encoding per second,
  *  if known.
  */
@@ -151,6 +200,7 @@ J2KEncoder::current_encoding_rate () const
        return _history.rate ();
 }
 
+
 /** @return Number of video frames that have been queued for encoding */
 int
 J2KEncoder::video_frames_enqueued () const
@@ -162,6 +212,7 @@ J2KEncoder::video_frames_enqueued () const
        return _last_player_video_time->frames_floor (_film->video_frame_rate ());
 }
 
+
 /** Should be called when a frame has been encoded successfully */
 void
 J2KEncoder::frame_done ()
@@ -169,6 +220,7 @@ J2KEncoder::frame_done ()
        _history.event ();
 }
 
+
 /** Called to request encoding of the next video frame in the DCP.  This is called in order,
  *  so each time the supplied frame is the one after the previous one.
  *  pv represents one video frame, and could be empty if there is nothing to encode
@@ -184,8 +236,8 @@ J2KEncoder::encode (shared_ptr<PlayerVideo> pv, DCPTime time)
 
        size_t threads = 0;
        {
-               boost::mutex::scoped_lock threads_lock (_threads_mutex);
-               threads = _threads.size ();
+               boost::mutex::scoped_lock lm (_threads_mutex);
+               threads = _threads.size();
        }
 
        boost::mutex::scoped_lock queue_lock (_queue_mutex);
@@ -199,40 +251,40 @@ J2KEncoder::encode (shared_ptr<PlayerVideo> pv, DCPTime time)
                LOG_TIMING ("decoder-wake queue=%1 threads=%2", _queue.size(), threads);
        }
 
-       _writer->rethrow ();
+       _writer.rethrow();
        /* Re-throw any exception raised by one of our threads.  If more
           than one has thrown an exception, only one will be rethrown, I think;
           but then, if that happens something has gone badly wrong.
        */
        rethrow ();
 
-       Frame const position = time.frames_floor(_film->video_frame_rate());
+       auto const position = time.frames_floor(_film->video_frame_rate());
 
-       if (_writer->can_fake_write (position)) {
+       if (_writer.can_fake_write(position)) {
                /* We can fake-write this frame */
                LOG_DEBUG_ENCODE("Frame @ %1 FAKE", to_string(time));
-               _writer->fake_write (position, pv->eyes ());
+               _writer.fake_write(position, pv->eyes ());
                frame_done ();
        } else if (pv->has_j2k() && !_film->reencode_j2k()) {
                LOG_DEBUG_ENCODE("Frame @ %1 J2K", to_string(time));
                /* This frame already has J2K data, so just write it */
-               _writer->write (pv->j2k(), position, pv->eyes ());
-       } else if (_last_player_video[pv->eyes()] && _writer->can_repeat(position) && pv->same (_last_player_video[pv->eyes()])) {
+               _writer.write(pv->j2k(), position, pv->eyes ());
+               frame_done ();
+       } else if (_last_player_video[pv->eyes()] && _writer.can_repeat(position) && pv->same(_last_player_video[pv->eyes()])) {
                LOG_DEBUG_ENCODE("Frame @ %1 REPEAT", to_string(time));
-               _writer->repeat (position, pv->eyes ());
+               _writer.repeat(position, pv->eyes());
        } else {
                LOG_DEBUG_ENCODE("Frame @ %1 ENCODE", to_string(time));
                /* Queue this new frame for encoding */
                LOG_TIMING ("add-frame-to-queue queue=%1", _queue.size ());
-               _queue.push_back (shared_ptr<DCPVideo> (
-                                         new DCPVideo (
-                                                 pv,
-                                                 position,
-                                                 _film->video_frame_rate(),
-                                                 _film->j2k_bandwidth(),
-                                                 _film->resolution()
-                                                 )
-                                         ));
+               auto dcpv = DCPVideo(
+                               pv,
+                               position,
+                               _film->video_frame_rate(),
+                               _film->j2k_bandwidth(),
+                               _film->resolution()
+                               );
+               _queue.push_back (dcpv);
 
                /* The queue might not be empty any more, so notify anything which is
                   waiting on that.
@@ -244,187 +296,140 @@ J2KEncoder::encode (shared_ptr<PlayerVideo> pv, DCPTime time)
        _last_player_video_time = time;
 }
 
+
 void
 J2KEncoder::terminate_threads ()
 {
-       boost::mutex::scoped_lock threads_lock (_threads_mutex);
-
-       int n = 0;
-       for (list<boost::thread *>::iterator i = _threads.begin(); i != _threads.end(); ++i) {
-               /* Be careful not to throw in here otherwise _threads will not be clear()ed */
-               LOG_GENERAL ("Terminating thread %1 of %2", n + 1, _threads.size ());
-               (*i)->interrupt ();
-               if (!(*i)->joinable()) {
-                       LOG_ERROR_NC ("About to join() a non-joinable thread");
-               }
-               try {
-                       (*i)->join ();
-               } catch (boost::thread_interrupted& e) {
-                       /* This is to be expected (I think?) */
-               } catch (exception& e) {
-                       LOG_ERROR ("join() threw an exception: %1", e.what());
-               } catch (...) {
-                       LOG_ERROR_NC ("join() threw an exception");
-               }
-               delete *i;
-               LOG_GENERAL_NC ("Thread terminated");
-               ++n;
+       boost::mutex::scoped_lock lm(_threads_mutex);
+       boost::this_thread::disable_interruption dis;
+
+       for (auto& thread: _threads) {
+               thread->stop();
        }
 
-       _threads.clear ();
+       _threads.clear();
+       _ending = true;
 }
 
+
 void
-J2KEncoder::encoder_thread (optional<EncodeServerDescription> server)
-try
+J2KEncoder::remake_threads(int cpu, int gpu, list<EncodeServerDescription> servers)
 {
-       if (server) {
-               LOG_TIMING ("start-encoder-thread thread=%1 server=%2", thread_id (), server->host_name ());
-       } else {
-               LOG_TIMING ("start-encoder-thread thread=%1 server=localhost", thread_id ());
+       boost::mutex::scoped_lock lm (_threads_mutex);
+       if (_ending) {
+               return;
        }
 
-       /* Number of seconds that we currently wait between attempts
-          to connect to the server; not relevant for localhost
-          encodings.
-       */
-       int remote_backoff = 0;
+       auto remove_threads = [this](int wanted, int current, std::function<bool (shared_ptr<J2KEncoderThread>)> predicate) {
+               for (auto i = wanted; i < current; ++i) {
+                       auto iter = std::find_if(_threads.begin(), _threads.end(), predicate);
+                       if (iter != _threads.end()) {
+                               (*iter)->stop();
+                               _threads.erase(iter);
+                       }
+               }
+       };
+
+
+       /* CPU */
+
+       auto const is_cpu_thread = [](shared_ptr<J2KEncoderThread> thread) {
+               return static_cast<bool>(dynamic_pointer_cast<CPUJ2KEncoderThread>(thread));
+       };
+
+       auto const current_cpu_threads = std::count_if(_threads.begin(), _threads.end(), is_cpu_thread);
+
+       for (auto i = current_cpu_threads; i < cpu; ++i) {
+               auto thread = make_shared<CPUJ2KEncoderThread>(*this);
+               thread->start();
+               _threads.push_back(thread);
+       }
+
+       remove_threads(cpu, current_cpu_threads, is_cpu_thread);
+
+
+       /* GPU */
+
+       auto const is_grok_thread = [](shared_ptr<J2KEncoderThread> thread) {
+               return static_cast<bool>(dynamic_pointer_cast<GrokJ2KEncoderThread>(thread));
+       };
+
+       auto const current_gpu_threads = std::count_if(_threads.begin(), _threads.end(), is_grok_thread);
+
+       for (auto i = current_gpu_threads; i < gpu; ++i) {
+               auto thread = make_shared<GrokJ2KEncoderThread>(*this, _context);
+               thread->start();
+               _threads.push_back(thread);
+       }
+
+       remove_threads(gpu, current_gpu_threads, is_grok_thread);
 
-       while (true) {
 
-               LOG_TIMING ("encoder-sleep thread=%1", thread_id ());
-               boost::mutex::scoped_lock lock (_queue_mutex);
-               while (_queue.empty ()) {
-                       _empty_condition.wait (lock);
+       /* Remote */
+
+       for (auto const& server: servers) {
+               if (!server.current_link_version()) {
+                       continue;
                }
 
-               LOG_TIMING ("encoder-wake thread=%1 queue=%2", thread_id(), _queue.size());
-               shared_ptr<DCPVideo> vf = _queue.front ();
+               auto is_remote_thread = [server](shared_ptr<J2KEncoderThread> thread) {
+                       auto remote = dynamic_pointer_cast<RemoteJ2KEncoderThread>(thread);
+                       return remote && remote->server().host_name() == server.host_name();
+               };
 
-               /* We're about to commit to either encoding this frame or putting it back onto the queue,
-                  so we must not be interrupted until one or other of these things have happened.  This
-                  block has thread interruption disabled.
-               */
-               {
-                       boost::this_thread::disable_interruption dis;
-
-                       LOG_TIMING ("encoder-pop thread=%1 frame=%2 eyes=%3", thread_id(), vf->index(), (int) vf->eyes ());
-                       _queue.pop_front ();
-
-                       lock.unlock ();
-
-                       optional<Data> encoded;
-
-                       /* We need to encode this input */
-                       if (server) {
-                               try {
-                                       encoded = vf->encode_remotely (server.get ());
-
-                                       if (remote_backoff > 0) {
-                                               LOG_GENERAL ("%1 was lost, but now she is found; removing backoff", server->host_name ());
-                                       }
-
-                                       /* This job succeeded, so remove any backoff */
-                                       remote_backoff = 0;
-
-                               } catch (std::exception& e) {
-                                       if (remote_backoff < 60) {
-                                               /* back off more */
-                                               remote_backoff += 10;
-                                       }
-                                       LOG_ERROR (
-                                               N_("Remote encode of %1 on %2 failed (%3); thread sleeping for %4s"),
-                                               vf->index(), server->host_name(), e.what(), remote_backoff
-                                               );
-                               }
-
-                       } else {
-                               try {
-                                       LOG_TIMING ("start-local-encode thread=%1 frame=%2", thread_id(), vf->index());
-                                       encoded = vf->encode_locally ();
-                                       LOG_TIMING ("finish-local-encode thread=%1 frame=%2", thread_id(), vf->index());
-                               } catch (std::exception& e) {
-                                       /* This is very bad, so don't cope with it, just pass it on */
-                                       LOG_ERROR (N_("Local encode failed (%1)"), e.what ());
-                                       throw;
-                               }
-                       }
+               auto const current_threads = std::count_if(_threads.begin(), _threads.end(), is_remote_thread);
 
-                       if (encoded) {
-                               _writer->write (encoded.get(), vf->index (), vf->eyes ());
-                               frame_done ();
-                       } else {
-                               lock.lock ();
-                               LOG_GENERAL (N_("[%1] J2KEncoder thread pushes frame %2 back onto queue after failure"), thread_id(), vf->index());
-                               _queue.push_front (vf);
-                               lock.unlock ();
-                       }
+               auto const wanted_threads = server.threads();
+
+               if (wanted_threads > current_threads) {
+                       LOG_GENERAL(N_("Adding %1 worker threads for remote %2"), wanted_threads - current_threads, server.host_name());
+               } else if (wanted_threads < current_threads) {
+                       LOG_GENERAL(N_("Removing %1 worker threads for remote %2"), current_threads - wanted_threads, server.host_name());
                }
 
-               if (remote_backoff > 0) {
-                       boost::this_thread::sleep (boost::posix_time::seconds (remote_backoff));
+               for (auto i = current_threads; i < wanted_threads; ++i) {
+                       auto thread = make_shared<RemoteJ2KEncoderThread>(*this, server);
+                       thread->start();
+                       _threads.push_back(thread);
                }
 
-               /* The queue might not be full any more, so notify anything that is waiting on that */
-               lock.lock ();
-               _full_condition.notify_all ();
+               remove_threads(wanted_threads, current_threads, is_remote_thread);
        }
-}
-catch (boost::thread_interrupted& e) {
-       /* Ignore these and just stop the thread */
-       _full_condition.notify_all ();
-}
-catch (...)
-{
-       store_current ();
-       /* Wake anything waiting on _full_condition so it can see the exception */
-       _full_condition.notify_all ();
+
+       _writer.set_encoder_threads(_threads.size());
 }
 
-void
-J2KEncoder::servers_list_changed ()
+
+DCPVideo
+J2KEncoder::pop()
 {
-       terminate_threads ();
+       boost::mutex::scoped_lock lock(_queue_mutex);
+       while (_queue.empty()) {
+               _empty_condition.wait (lock);
+       }
 
-       /* XXX: could re-use threads */
+       LOG_TIMING("encoder-wake thread=%1 queue=%2", thread_id(), _queue.size());
 
-       boost::mutex::scoped_lock lm (_threads_mutex);
+       auto vf = _queue.front();
+       _queue.pop_front();
 
-#ifdef BOOST_THREAD_PLATFORM_WIN32
-       OSVERSIONINFO info;
-       info.dwOSVersionInfoSize = sizeof (OSVERSIONINFO);
-       GetVersionEx (&info);
-       bool const windows_xp = (info.dwMajorVersion == 5 && info.dwMinorVersion == 1);
-       if (windows_xp) {
-               LOG_GENERAL_NC (N_("Setting thread affinity for Windows XP"));
-       }
-#endif
-
-       if (!Config::instance()->only_servers_encode ()) {
-               for (int i = 0; i < Config::instance()->master_encoding_threads (); ++i) {
-                       boost::thread* t = new boost::thread (boost::bind (&J2KEncoder::encoder_thread, this, optional<EncodeServerDescription> ()));
-#ifdef DCPOMATIC_LINUX
-                       pthread_setname_np (t->native_handle(), "encode-worker");
-#endif
-                       _threads.push_back (t);
-#ifdef BOOST_THREAD_PLATFORM_WIN32
-                       if (windows_xp) {
-                               SetThreadAffinityMask (t->native_handle(), 1 << i);
-                       }
-#endif
-               }
-       }
+       _full_condition.notify_all();
+       return vf;
+}
 
-       BOOST_FOREACH (EncodeServerDescription i, EncodeServerFinder::instance()->servers()) {
-               if (!i.current_link_version()) {
-                       continue;
-               }
 
-               LOG_GENERAL (N_("Adding %1 worker threads for remote %2"), i.threads(), i.host_name ());
-               for (int j = 0; j < i.threads(); ++j) {
-                       _threads.push_back (new boost::thread (boost::bind (&J2KEncoder::encoder_thread, this, i)));
-               }
-       }
+void
+J2KEncoder::retry(DCPVideo video)
+{
+       boost::mutex::scoped_lock lock(_queue_mutex);
+       _queue.push_front(video);
+       _empty_condition.notify_all();
+}
 
-       _writer->set_encoder_threads (_threads.size ());
+
+void
+J2KEncoder::write(shared_ptr<const dcp::Data> data, int index, Eyes eyes)
+{
+       _writer.write(data, index, eyes);
+       frame_done();
 }