From d42bc731c125170efb1bb7b8c9f990a3e9fa5b57 Mon Sep 17 00:00:00 2001
From: Carl Hetherington <cth@carlh.net>
Date: Tue, 15 Nov 2022 00:44:37 +0100
Subject: [PATCH] Improve flushing behaviour when there is a lot of space to
 fill (#2364).

Previously a call to flush() could result in a lot of audio being
emitted from the decoder (if there is a big gap between the end
of the audio and the video).  This would end up being emitted in
one chunk from the player, crashing the audio analyser with an OOM
in some cases.
---
 src/lib/audio_decoder.cc  |  6 +--
 src/lib/audio_decoder.h   |  2 +-
 src/lib/ffmpeg_decoder.cc | 83 +++++++++++++++++++++++++++------------
 src/lib/ffmpeg_decoder.h  | 18 ++++++++-
 4 files changed, 78 insertions(+), 31 deletions(-)
diff --git a/src/lib/audio_decoder.cc b/src/lib/audio_decoder.cc
index ca1faa010..ad3374762 100644
--- a/src/lib/audio_decoder.cc
+++ b/src/lib/audio_decoder.cc
@@ -52,14 +52,14 @@ AudioDecoder::AudioDecoder (Decoder* parent, shared_ptr<const AudioContent> cont
 
 /** @param time_already_delayed true if the delay should not be added to time */
 void
-AudioDecoder::emit (shared_ptr<const Film> film, AudioStreamPtr stream, shared_ptr<const AudioBuffers> data, ContentTime time, bool time_already_delayed)
+AudioDecoder::emit(shared_ptr<const Film> film, AudioStreamPtr stream, shared_ptr<const AudioBuffers> data, ContentTime time, bool flushing)
 {
 	if (ignore ()) {
 		return;
 	}
 
 	int const resampled_rate = _content->resampled_frame_rate(film);
-	if (!time_already_delayed) {
+	if (!flushing) {
 		time += ContentTime::from_seconds (_content->delay() / 1000.0);
 	}
 
@@ -119,7 +119,7 @@ AudioDecoder::emit (shared_ptr<const Film> film, AudioStreamPtr stream, shared_p
 		}
 	}
 
-	if (resampler) {
+	if (resampler && !flushing) {
 		auto ro = resampler->run (data);
 		if (ro->frames() == 0) {
 			return;
diff --git a/src/lib/audio_decoder.h b/src/lib/audio_decoder.h
index a8495aaa8..7417fee44 100644
--- a/src/lib/audio_decoder.h
+++ b/src/lib/audio_decoder.h
@@ -52,7 +52,7 @@ public:
 	AudioDecoder (Decoder* parent, std::shared_ptr<const AudioContent> content, bool fast);
 
 	boost::optional<dcpomatic::ContentTime> position (std::shared_ptr<const Film> film) const override;
-	void emit (std::shared_ptr<const Film> film, AudioStreamPtr stream, std::shared_ptr<const AudioBuffers>, dcpomatic::ContentTime, bool time_already_delayed = false);
+	void emit(std::shared_ptr<const Film> film, AudioStreamPtr stream, std::shared_ptr<const AudioBuffers>, dcpomatic::ContentTime, bool flushing = false);
 	void seek () override;
 	void flush ();
 
diff --git a/src/lib/ffmpeg_decoder.cc b/src/lib/ffmpeg_decoder.cc
index 8b07658ca..85f79b513 100644
--- a/src/lib/ffmpeg_decoder.cc
+++ b/src/lib/ffmpeg_decoder.cc
@@ -106,11 +106,41 @@ FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> film, shared_ptr<const FFmp
 }
 
 
-bool
+FFmpegDecoder::FlushResult
 FFmpegDecoder::flush ()
 {
-	/* Flush video and audio once */
+	LOG_DEBUG_PLAYER("Flush FFmpeg decoder: current state %1", static_cast<int>(_flush_state));
+
+	switch (_flush_state) {
+	case FlushState::CODECS:
+		if (flush_codecs() == FlushResult::DONE) {
+			LOG_DEBUG_PLAYER_NC("Finished flushing codecs");
+			_flush_state = FlushState::AUDIO_DECODER;
+		}
+		break;
+	case FlushState::AUDIO_DECODER:
+		if (audio) {
+			audio->flush();
+		}
+		LOG_DEBUG_PLAYER_NC("Finished flushing audio decoder");
+		_flush_state = FlushState::FILL;
+		break;
+	case FlushState::FILL:
+		if (flush_fill() == FlushResult::DONE) {
+			LOG_DEBUG_PLAYER_NC("Finished flushing fills");
+			return FlushResult::DONE;
+		}
+		break;
+	}
 
+	return FlushResult::AGAIN;
+}
+
+
+/** @return true if we have finished flushing the codecs */
+FFmpegDecoder::FlushResult
+FFmpegDecoder::flush_codecs()
+{
 	bool did_something = false;
 	if (video) {
 		if (decode_and_process_video_packet(nullptr)) {
@@ -132,48 +162,49 @@ FFmpegDecoder::flush ()
 		}
 	}
 
-	if (did_something) {
-		/* We want to be called again */
-		return false;
-	}
+	return did_something ? FlushResult::AGAIN : FlushResult::DONE;
+}
 
+
+FFmpegDecoder::FlushResult
+FFmpegDecoder::flush_fill()
+{
 	/* Make sure all streams are the same length and round up to the next video frame */
 
+	bool did_something = false;
+
 	auto const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
 	ContentTime full_length (_ffmpeg_content->full_length(film()), frc);
 	full_length = full_length.ceil (frc.source);
-	if (video) {
+	if (video && !video->ignore()) {
 		double const vfr = _ffmpeg_content->video_frame_rate().get();
 		auto const f = full_length.frames_round (vfr);
-		auto v = video->position(film()).get_value_or(ContentTime()).frames_round(vfr) + 1;
-		while (v < f) {
-			video->emit (film(), make_shared<const RawImageProxy>(_black_image), v);
-			++v;
+		auto const v = video->position(film()).get_value_or(ContentTime()).frames_round(vfr) + 1;
+		if (v < f) {
+			video->emit(film(), make_shared<const RawImageProxy>(_black_image), v);
+			did_something = true;
 		}
 	}
 
-	for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
-		auto a = audio->stream_position(film(), i);
-		/* Unfortunately if a is 0 that really means that we don't know the stream position since
-		   there has been no data on it since the last seek.  In this case we'll just do nothing
-		   here.  I'm not sure if that's the right idea.
-		*/
-		if (a > ContentTime()) {
-			while (a < full_length) {
+	if (audio && !audio->ignore()) {
+		for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
+			auto const a = audio->stream_position(film(), i);
+			/* Unfortunately if a is 0 that really means that we don't know the stream position since
+			   there has been no data on it since the last seek.  In this case we'll just do nothing
+			   here.  I'm not sure if that's the right idea.
+			*/
+			if (a > ContentTime() && a < full_length) {
+				LOG_DEBUG_PLAYER("Flush inserts silence at %1", to_string(a));
 				auto to_do = min (full_length - a, ContentTime::from_seconds (0.1));
 				auto silence = make_shared<AudioBuffers>(i->channels(), to_do.frames_ceil (i->frame_rate()));
 				silence->make_silent ();
 				audio->emit (film(), i, silence, a, true);
-				a += to_do;
+				did_something = true;
 			}
 		}
 	}
 
-	if (audio) {
-		audio->flush ();
-	}
-
-	return true;
+	return did_something ? FlushResult::AGAIN : FlushResult::DONE;
 }
 
 
@@ -199,7 +230,7 @@ FFmpegDecoder::pass ()
 		}
 
 		av_packet_free (&packet);
-		return flush ();
+		return flush() == FlushResult::DONE;
 	}
 
 	int const si = packet->stream_index;
diff --git a/src/lib/ffmpeg_decoder.h b/src/lib/ffmpeg_decoder.h
index 9de44333c..1e47e2fca 100644
--- a/src/lib/ffmpeg_decoder.h
+++ b/src/lib/ffmpeg_decoder.h
@@ -58,7 +58,12 @@ public:
 private:
 	friend struct ::ffmpeg_pts_offset_test;
 
-	bool flush ();
+	enum class FlushResult {
+		DONE,
+		AGAIN
+	};
+
+	FlushResult flush();
 
 	AVSampleFormat audio_sample_format (std::shared_ptr<FFmpegAudioStream> stream) const;
 	int bytes_per_audio_sample (std::shared_ptr<FFmpegAudioStream> stream) const;
@@ -77,6 +82,9 @@ private:
 
 	void maybe_add_subtitle ();
 
+	FlushResult flush_codecs();
+	FlushResult flush_fill();
+
 	VideoFilterGraphSet _filter_graphs;
 
 	dcpomatic::ContentTime _pts_offset;
@@ -87,4 +95,12 @@ private:
 	std::shared_ptr<Image> _black_image;
 
 	std::map<std::shared_ptr<FFmpegAudioStream>, boost::optional<dcpomatic::ContentTime>> _next_time;
+
+	enum class FlushState {
+		CODECS,
+		AUDIO_DECODER,
+		FILL,
+	};
+
+	FlushState _flush_state = FlushState::CODECS;
 };
-- 
2.30.2