Use a separate AVFrame for each stream when decoding.

This seems to be what ffplay does and it feels like it makes sense as frames may be built from multiple packets AFAICS.
author: Carl Hetherington <cth@carlh.net> 2022-01-11 16:34:26 +0100
committer: Carl Hetherington <cth@carlh.net> 2022-01-11 22:16:04 +0100
commit: b703142e8750c509174b4d964009aecf93f3d834 (patch)
tree: 22936a3161af7df64acb42f95d0b08d32e9593dd /src/lib
parent: 5cb5c48441b20c478eb29001cebe3b08c0422341 (diff)
5 files changed, 60 insertions, 25 deletions
diff --git a/src/lib/ffmpeg.cc b/src/lib/ffmpeg.cc
index eed9ab94c..0f63ea172 100644
--- a/src/lib/ffmpeg.cc
+++ b/src/lib/ffmpeg.cc
@@ -72,7 +72,11 @@ FFmpeg::~FFmpeg ()
 		avcodec_free_context (&i);
 	}
 
-	av_frame_free (&_frame);
+	av_frame_free (&_video_frame);
+	for (auto& audio_frame: _audio_frame) {
+		av_frame_free (&audio_frame.second);
+	}
+
 	avformat_close_input (&_format_context);
 }
 
@@ -188,8 +192,8 @@ FFmpeg::setup_general ()
 		}
 	}
 
-	_frame = av_frame_alloc ();
-	if (_frame == 0) {
+	_video_frame = av_frame_alloc ();
+	if (_video_frame == nullptr) {
 		throw std::bad_alloc ();
 	}
 }
@@ -354,3 +358,23 @@ FFmpeg::pts_offset (vector<shared_ptr<FFmpegAudioStream>> audio_streams, optiona
 
 	return po;
 }
+
+
+AVFrame *
+FFmpeg::audio_frame (shared_ptr<const FFmpegAudioStream> stream)
+{
+	auto iter = _audio_frame.find(stream);
+	if (iter != _audio_frame.end()) {
+		return iter->second;
+	}
+
+	auto frame = av_frame_alloc ();
+	if (frame == nullptr) {
+		throw std::bad_alloc();
+	}
+
+	_audio_frame[stream] = frame;
+	return frame;
+
+}
+
diff --git a/src/lib/ffmpeg.h b/src/lib/ffmpeg.h
index b0769971e..59b2dedd5 100644
--- a/src/lib/ffmpeg.h
+++ b/src/lib/ffmpeg.h
@@ -75,11 +75,14 @@ protected:
 
 	AVFormatContext* _format_context = nullptr;
 	std::vector<AVCodecContext*> _codec_context;
-	AVFrame* _frame = nullptr;
 
+	/** AVFrame used for decoding video */
+	AVFrame* _video_frame = nullptr;
 	/** Index of video stream within AVFormatContext */
 	boost::optional<int> _video_stream;
 
+	AVFrame* audio_frame (std::shared_ptr<const FFmpegAudioStream> stream);
+
 	/* It would appear (though not completely verified) that one must have
 	   a mutex around calls to avcodec_open* and avcodec_close... and here
 	   it is.
@@ -92,6 +95,9 @@ private:
 
 	static void ffmpeg_log_callback (void* ptr, int level, const char* fmt, va_list vl);
 	static std::weak_ptr<Log> _ffmpeg_log;
+
+	/** AVFrames used for decoding audio streams; accessed with audio_frame() */
+	std::map<std::shared_ptr<const FFmpegAudioStream>, AVFrame*> _audio_frame;
 };
 
 
diff --git a/src/lib/ffmpeg_decoder.cc b/src/lib/ffmpeg_decoder.cc
index ea961a894..e5685f661 100644
--- a/src/lib/ffmpeg_decoder.cc
+++ b/src/lib/ffmpeg_decoder.cc
@@ -122,7 +122,7 @@ FFmpegDecoder::flush ()
 			/* EOF can happen if we've already sent a flush packet */
 			throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegDecoder::flush"), r);
 		}
-		r = avcodec_receive_frame (context, _frame);
+		r = avcodec_receive_frame (context, audio_frame(i));
 		if (r >= 0) {
 			process_audio_frame (i);
 			did_something = true;
@@ -447,10 +447,11 @@ FFmpegDecoder::audio_stream_from_index (int index) const
 void
 FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
 {
-	auto data = deinterleave_audio (_frame);
+	auto frame = audio_frame (stream);
+	auto data = deinterleave_audio (frame);
 
 	ContentTime ct;
-	if (_frame->pts == AV_NOPTS_VALUE) {
+	if (frame->pts == AV_NOPTS_VALUE) {
 		/* In some streams we see not every frame coming through with a timestamp; for those
 		   that have AV_NOPTS_VALUE we need to work out the timestamp ourselves.  This is
 		   particularly noticeable with TrueHD streams (see #1111).
@@ -460,7 +461,7 @@ FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
 		}
 	} else {
 		ct = ContentTime::from_seconds (
-			_frame->best_effort_timestamp *
+			frame->best_effort_timestamp *
 			av_q2d (stream->stream(_format_context)->time_base))
 			+ _pts_offset;
 	}
@@ -481,7 +482,7 @@ FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
 			to_string(ct),
 			data->frames(),
 			stream->id(),
-			_frame->best_effort_timestamp,
+			frame->best_effort_timestamp,
 			av_q2d(stream->stream(_format_context)->time_base),
 			to_string(_pts_offset)
 			);
@@ -503,13 +504,14 @@ FFmpegDecoder::decode_and_process_audio_packet (AVPacket* packet)
 	}
 
 	auto context = _codec_context[stream->index(_format_context)];
+	auto frame = audio_frame (stream);
 
 	int r = avcodec_send_packet (context, packet);
 	if (r < 0) {
 		LOG_WARNING("avcodec_send_packet returned %1 for an audio packet", r);
 	}
 	while (r >= 0) {
-		r = avcodec_receive_frame (context, _frame);
+		r = avcodec_receive_frame (context, frame);
 		if (r == AVERROR(EAGAIN)) {
 			/* More input is required */
 			return;
@@ -536,7 +538,7 @@ FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
 		LOG_WARNING("avcodec_send_packet returned %1 for a video packet", r);
 	}
 
-	r = avcodec_receive_frame (context, _frame);
+	r = avcodec_receive_frame (context, _video_frame);
 	if (r == AVERROR(EAGAIN) || r == AVERROR_EOF || (r < 0 && !packet)) {
 		/* More input is required, no more frames are coming, or we are flushing and there was
 		 * some error which we just want to ignore.
@@ -553,21 +555,21 @@ FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
 	shared_ptr<VideoFilterGraph> graph;
 
 	auto i = _filter_graphs.begin();
-	while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
+	while (i != _filter_graphs.end() && !(*i)->can_process(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format)) {
 		++i;
 	}
 
 	if (i == _filter_graphs.end ()) {
 		dcp::Fraction vfr (lrint(_ffmpeg_content->video_frame_rate().get() * 1000), 1000);
-		graph = make_shared<VideoFilterGraph>(dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format, vfr);
+		graph = make_shared<VideoFilterGraph>(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format, vfr);
 		graph->setup (_ffmpeg_content->filters ());
 		_filter_graphs.push_back (graph);
-		LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format);
+		LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _video_frame->width, _video_frame->height, _video_frame->format);
 	} else {
 		graph = *i;
 	}
 
-	auto images = graph->process (_frame);
+	auto images = graph->process (_video_frame);
 
 	for (auto const& i: images) {
 
diff --git a/src/lib/ffmpeg_examiner.cc b/src/lib/ffmpeg_examiner.cc
index 0a236d836..5e53f0974 100644
--- a/src/lib/ffmpeg_examiner.cc
+++ b/src/lib/ffmpeg_examiner.cc
@@ -229,7 +229,7 @@ FFmpegExaminer::video_packet (AVCodecContext* context, string& temporal_referenc
 		throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegExaminer::video_packet"), r);
 	}
 
-	r = avcodec_receive_frame (context, _frame);
+	r = avcodec_receive_frame (context, _video_frame);
 	if (r == AVERROR(EAGAIN)) {
 		/* More input is required */
 		return true;
@@ -239,16 +239,17 @@ FFmpegExaminer::video_packet (AVCodecContext* context, string& temporal_referenc
 	}
 
 	if (!_first_video) {
-		_first_video = frame_time (_format_context->streams[_video_stream.get()]);
+		_first_video = frame_time (_video_frame, _format_context->streams[_video_stream.get()]);
 	}
 	if (_need_video_length) {
 		_video_length = frame_time (
+			_video_frame,
 			_format_context->streams[_video_stream.get()]
 			).get_value_or (ContentTime ()).frames_round (video_frame_rate().get ());
 	}
 	if (temporal_reference.size() < (PULLDOWN_CHECK_FRAMES * 2)) {
-		temporal_reference += (_frame->top_field_first ? "T" : "B");
-		temporal_reference += (_frame->repeat_pict ? "3" : "2");
+		temporal_reference += (_video_frame->top_field_first ? "T" : "B");
+		temporal_reference += (_video_frame->repeat_pict ? "3" : "2");
 	}
 
 	return true;
@@ -271,22 +272,24 @@ FFmpegExaminer::audio_packet (AVCodecContext* context, shared_ptr<FFmpegAudioStr
 		throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegExaminer::audio_packet"), r);
 	}
 
-	if (avcodec_receive_frame (context, _frame) < 0) {
+	auto frame = audio_frame (stream);
+
+	if (avcodec_receive_frame (context, frame) < 0) {
 		return;
 	}
 
-	stream->first_audio = frame_time (stream->stream(_format_context));
+	stream->first_audio = frame_time (frame, stream->stream(_format_context));
 }
 
 
 optional<ContentTime>
-FFmpegExaminer::frame_time (AVStream* s) const
+FFmpegExaminer::frame_time (AVFrame* frame, AVStream* stream) const
 {
 	optional<ContentTime> t;
 
-	int64_t const bet = _frame->best_effort_timestamp;
+	int64_t const bet = frame->best_effort_timestamp;
 	if (bet != AV_NOPTS_VALUE) {
-		t = ContentTime::from_seconds (bet * av_q2d (s->time_base));
+		t = ContentTime::from_seconds (bet * av_q2d(stream->time_base));
 	}
 
 	return t;
diff --git a/src/lib/ffmpeg_examiner.h b/src/lib/ffmpeg_examiner.h
index f978eb52b..36efc92b1 100644
--- a/src/lib/ffmpeg_examiner.h
+++ b/src/lib/ffmpeg_examiner.h
@@ -91,7 +91,7 @@ private:
 
 	std::string stream_name (AVStream* s) const;
 	std::string subtitle_stream_name (AVStream* s) const;
-	boost::optional<dcpomatic::ContentTime> frame_time (AVStream* s) const;
+	boost::optional<dcpomatic::ContentTime> frame_time (AVFrame* frame, AVStream* stream) const;
 
 	std::vector<std::shared_ptr<FFmpegSubtitleStream>> _subtitle_streams;
 	std::vector<std::shared_ptr<FFmpegAudioStream>> _audio_streams;
author	Carl Hetherington <cth@carlh.net>	2022-01-11 16:34:26 +0100
committer	Carl Hetherington <cth@carlh.net>	2022-01-11 22:16:04 +0100
commit	b703142e8750c509174b4d964009aecf93f3d834 (patch)
tree	22936a3161af7df64acb42f95d0b08d32e9593dd /src/lib
parent	5cb5c48441b20c478eb29001cebe3b08c0422341 (diff)