Work around deadlock when destroying J2KEncoder with a full writer queue (#2784). v2.16.83
authorCarl Hetherington <cth@carlh.net>
Sun, 5 May 2024 19:34:29 +0000 (21:34 +0200)
committerCarl Hetherington <cth@carlh.net>
Tue, 7 May 2024 23:33:41 +0000 (01:33 +0200)
This feels like a hack, but I can't think of a nicer way to do it.

The interruption disable makes sense because when we destroy encoder threads
during a DCP encode (because a remote server goes away, for example) we don't
want any frames to be lost due to the encode thread being interrupted between
taking the frame off the queue and sending it to the writer.

When we're destroying the encoder we don't care about this, but I can't see
how you'd differentiate.

Maybe the encoder queue could have two lists: to-do and in-progress;
the encoder thread atomically moves a frame from to-do to in-progress,
but then how do you know when the in-progress ones are orphaned and need
to be re-added to the main queue.

You could make the writer return saying "no" if the queue is full (rather
than blocking and waiting for the queue to empty) but that seems wasteful
as then the frame would be re-encoded.

src/lib/j2k_encoder.cc
src/lib/writer.cc
src/lib/writer.h
test/j2k_encoder_test.cc [new file with mode: 0644]
test/wscript

index 7c9777c1694ed77df52f5af315b3c928e08a2252..32d2fefc2830c86fec1f057179c70d5dd74ef7e6 100644 (file)
@@ -69,6 +69,16 @@ J2KEncoder::~J2KEncoder ()
 {
        _server_found_connection.disconnect();
 
+       /* One of our encoder threads may be waiting on Writer::write() to return, if that method
+        * is blocked with the writer queue full waiting for _full_condition.  In that case, the
+        * attempt to terminate the encoder threads below (in terminate_threads()) will fail because
+        * the encoder thread waiting for ::write() will have interruption disabled.
+        *
+        * To work around that, make the writer into a zombie to unblock any pending write()s and
+        * not block on any future ones.
+        */
+       _writer.zombify();
+
        boost::mutex::scoped_lock lm (_threads_mutex);
        terminate_threads ();
 }
index fbe2d248d5c952c3112f1e5f9f7184ae3b4073c3..7b9defd73071f3c8ad546ff7d5f1385d5f38b6c2 100644 (file)
@@ -144,6 +144,10 @@ Writer::write (shared_ptr<const Data> encoded, Frame frame, Eyes eyes)
 {
        boost::mutex::scoped_lock lock (_state_mutex);
 
+       if (_zombie) {
+               return;
+       }
+
        while (_queued_full_in_memory > _maximum_frames_in_memory) {
                /* There are too many full frames in memory; wake the main writer thread and
                   wait until it sorts everything out */
@@ -377,6 +381,9 @@ try
        while (true)
        {
                boost::mutex::scoped_lock lock (_state_mutex);
+               if (_zombie) {
+                       return;
+               }
 
                while (true) {
 
@@ -1042,3 +1049,17 @@ Writer::write_hanging_text (ReelWriter& reel)
        }
        _hanging_texts = new_hanging_texts;
 }
+
+
+/** Set the writer so that it has no queue and drops any pending or future requests to write images */
+void
+Writer::zombify()
+{
+       boost::mutex::scoped_lock lock(_state_mutex);
+
+       _queue.clear();
+       _queued_full_in_memory = 0;
+       _zombie = true;
+       _full_condition.notify_all();
+}
+
index 6e7f5ca9063889b4caa30a3e1ee9a2182528c605..f0f1fe69ac77d07ea8ce25df94a29667f9279d26 100644 (file)
@@ -127,6 +127,8 @@ public:
 
        void set_encoder_threads (int threads);
 
+       void zombify();
+
 private:
        friend struct ::writer_disambiguate_font_ids1;
        friend struct ::writer_disambiguate_font_ids2;
@@ -229,6 +231,8 @@ private:
        };
 
        std::vector<HangingText> _hanging_texts;
+
+       bool _zombie = false;
 };
 
 
diff --git a/test/j2k_encoder_test.cc b/test/j2k_encoder_test.cc
new file mode 100644 (file)
index 0000000..bc3bd97
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+    Copyright (C) 2024 Carl Hetherington <cth@carlh.net>
+
+    This file is part of DCP-o-matic.
+
+    DCP-o-matic is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    DCP-o-matic is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+
+#include "lib/config.h"
+#include "lib/cross.h"
+#include "lib/image.h"
+#include "lib/j2k_encoder.h"
+#include "lib/player_video.h"
+#include "lib/raw_image_proxy.h"
+#include "lib/writer.h"
+#include "test.h"
+extern "C" {
+#include <libavutil/pixfmt.h>
+}
+#include <boost/test/unit_test.hpp>
+
+
+using std::make_shared;
+using std::weak_ptr;
+using boost::optional;
+
+
+BOOST_AUTO_TEST_CASE(j2k_encoder_deadlock_test)
+{
+       ConfigRestorer cr;
+
+       auto film = new_test_film2("j2k_encoder_deadlock_test");
+
+       /* Don't call ::start() on this Writer, so it can never write anything */
+       Writer writer(film, {});
+       writer.set_encoder_threads(4);
+
+       /* We want to test the case where the writer queue fills, and this can't happen unless there
+        * are enough encoding threads (each of which will end up waiting for the writer to empty,
+        * which will never happen).
+        */
+       Config::instance()->set_master_encoding_threads(4);
+       J2KEncoder encoder(film, writer);
+       encoder.begin();
+
+       for (int i = 0; i < 26; ++i) {
+               auto image = make_shared<Image>(AV_PIX_FMT_RGB24, dcp::Size(1998, 1080), Image::Alignment::PADDED);
+               auto image_proxy = make_shared<RawImageProxy>(image);
+               encoder.encode(
+                       std::make_shared<PlayerVideo>(
+                               image_proxy,
+                               Crop(),
+                               optional<double>(),
+                               dcp::Size(1998, 1080),
+                               dcp::Size(1998, 1080),
+                               Eyes::BOTH,
+                               Part::WHOLE,
+                               optional<ColourConversion>(),
+                               VideoRange::VIDEO,
+                               weak_ptr<Content>(),
+                               optional<Frame>(),
+                               false
+                               ),
+                       {}
+                       );
+       }
+
+       dcpomatic_sleep_seconds(10);
+}
+
index bb66d1a5f952ff07d115ac63c9d647e6346829a6..cd23badc0610eac04f971087bab4466ffa0aad9b 100644 (file)
@@ -112,6 +112,7 @@ def build(bld):
                  interrupt_encoder_test.cc
                  isdcf_name_test.cc
                  j2k_bandwidth_test.cc
+                 j2k_encoder_test.cc
                  job_manager_test.cc
                  kdm_cli_test.cc
                  kdm_naming_test.cc