Improve FFmpeg sync, in theory.

author: Carl Hetherington <cth@carlh.net> 2013-06-26 16:35:43 +0100
committer: Carl Hetherington <cth@carlh.net> 2013-06-26 16:35:43 +0100
commit: d39bfb9a1360a83f3451b79d01fdc4f465ef21d3 (patch)
tree: 64ea69741155d15d114ad96daf0f90e24b3abe28 /src
parent: 7cd29c67a1977d67fc63f58c1d29826d7993a750 (diff)
12 files changed, 91 insertions, 42 deletions
diff --git a/src/lib/audio_decoder.cc b/src/lib/audio_decoder.cc
index 396471910..dc49a1846 100644
--- a/src/lib/audio_decoder.cc
+++ b/src/lib/audio_decoder.cc
@@ -33,7 +33,7 @@ using boost::shared_ptr;
 
 AudioDecoder::AudioDecoder (shared_ptr<const Film> f)
 	: Decoder (f)
-	, _next_audio_frame (0)
+	, _audio_position (0)
 {
 }
 
@@ -71,5 +71,5 @@ void
 AudioDecoder::audio (shared_ptr<const AudioBuffers> data, AudioContent::Frame frame)
 {
 	Audio (data, frame);
-	_next_audio_frame = frame + data->frames ();
+	_audio_position = frame + data->frames ();
 }
diff --git a/src/lib/audio_decoder.h b/src/lib/audio_decoder.h
index 168348c2e..ddfb296c9 100644
--- a/src/lib/audio_decoder.h
+++ b/src/lib/audio_decoder.h
@@ -43,7 +43,7 @@ public:
 protected:
 
 	void audio (boost::shared_ptr<const AudioBuffers>, AudioContent::Frame);
-	AudioContent::Frame _next_audio_frame;
+	AudioContent::Frame _audio_position;
 };
 
 #endif
diff --git a/src/lib/ffmpeg_content.cc b/src/lib/ffmpeg_content.cc
index 378bd98cb..1135cc9a3 100644
--- a/src/lib/ffmpeg_content.cc
+++ b/src/lib/ffmpeg_content.cc
@@ -296,7 +296,7 @@ FFmpegAudioStream::FFmpegAudioStream (shared_ptr<const cxml::Node> node)
 	frame_rate = node->number_child<int> ("FrameRate");
 	channels = node->number_child<int64_t> ("Channels");
 	mapping = AudioMapping (node->node_child ("Mapping"));
-	start = node->optional_number_child<Time> ("Start");
+	first_audio = node->optional_number_child<Time> ("FirstAudio");
 }
 
 void
@@ -306,8 +306,8 @@ FFmpegAudioStream::as_xml (xmlpp::Node* root) const
 	root->add_child("Id")->add_child_text (lexical_cast<string> (id));
 	root->add_child("FrameRate")->add_child_text (lexical_cast<string> (frame_rate));
 	root->add_child("Channels")->add_child_text (lexical_cast<string> (channels));
-	if (start) {
-		root->add_child("Start")->add_child_text (lexical_cast<string> (start));
+	if (first_audio) {
+		root->add_child("FirstAudio")->add_child_text (lexical_cast<string> (first_audio));
 	}
 	mapping.as_xml (root->add_child("Mapping"));
 }
diff --git a/src/lib/ffmpeg_content.h b/src/lib/ffmpeg_content.h
index fc45267ee..c5ccee77a 100644
--- a/src/lib/ffmpeg_content.h
+++ b/src/lib/ffmpeg_content.h
@@ -46,7 +46,7 @@ public:
         int frame_rate;
 	int channels;
 	AudioMapping mapping;
-	boost::optional<Time> start;
+	boost::optional<double> first_audio;
 };
 
 extern bool operator== (FFmpegAudioStream const & a, FFmpegAudioStream const & b);
@@ -134,6 +134,11 @@ public:
 
         void set_subtitle_stream (boost::shared_ptr<FFmpegSubtitleStream>);
         void set_audio_stream (boost::shared_ptr<FFmpegAudioStream>);
+
+	boost::optional<Time> first_video () const {
+		boost::mutex::scoped_lock lm (_mutex);
+		return _first_video;
+	}
 	
 private:
 	std::vector<boost::shared_ptr<FFmpegSubtitleStream> > _subtitle_streams;
diff --git a/src/lib/ffmpeg_decoder.cc b/src/lib/ffmpeg_decoder.cc
index 1d8a00866..2d1792390 100644
--- a/src/lib/ffmpeg_decoder.cc
+++ b/src/lib/ffmpeg_decoder.cc
@@ -66,8 +66,30 @@ FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegC
 	, _subtitle_codec (0)
 	, _decode_video (video)
 	, _decode_audio (audio)
+	, _pts_offset (0)
 {
 	setup_subtitle ();
+
+	if (video && audio && c->audio_stream() && c->first_video() && c->audio_stream()->first_audio) {
+		_pts_offset = compute_pts_offset (c->first_video().get(), c->audio_stream()->first_audio.get(), c->video_frame_rate());
+	}
+}
+
+double
+FFmpegDecoder::compute_pts_offset (double first_video, double first_audio, float video_frame_rate)
+{
+	assert (first_video >= 0);
+	assert (first_audio >= 0);
+	
+	double const old_first_video = first_video;
+	
+	/* Round the first video to a frame boundary */
+	if (fabs (rint (first_video * video_frame_rate) - first_video * video_frame_rate) > 1e-6) {
+		first_video = ceil (first_video * video_frame_rate) / video_frame_rate;
+	}
+
+	/* Compute the required offset (also removing any common start delay) */
+	return first_video - old_first_video - min (first_video, first_audio);
 }
 
 FFmpegDecoder::~FFmpegDecoder ()
@@ -108,8 +130,8 @@ FFmpegDecoder::pass ()
 		}
 
 		/* Stop us being asked for any more data */
-		_next_video_frame = _ffmpeg_content->video_length ();
-		_next_audio_frame = _ffmpeg_content->audio_length ();
+		_video_position = _ffmpeg_content->video_length ();
+		_audio_position = _ffmpeg_content->audio_length ();
 		return;
 	}
 
@@ -267,11 +289,11 @@ FFmpegDecoder::seek (VideoContent::Frame frame)
 void
 FFmpegDecoder::seek_back ()
 {
-	if (_next_video_frame == 0) {
+	if (_video_position == 0) {
 		return;
 	}
 	
-	do_seek (_next_video_frame - 1, true, true);
+	do_seek (_video_position - 1, true, true);
 }
 
 void
@@ -327,17 +349,33 @@ FFmpegDecoder::decode_audio_packet ()
 		int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, &copy_packet);
 		if (decode_result >= 0) {
 			if (frame_finished) {
-			
-				/* Where we are in the source, in seconds */
-				double const source_pts_seconds = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
-					* av_frame_get_best_effort_timestamp(_frame);
+
+				if (_audio_position == 0) {
+					/* Where we are in the source, in seconds */
+					double const pts = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
+						* av_frame_get_best_effort_timestamp(_frame) - _pts_offset;
+
+					if (pts > 0) {
+						/* Emit some silence */
+						shared_ptr<AudioBuffers> silence (
+							new AudioBuffers (
+								_ffmpeg_content->audio_channels(),
+								pts * _ffmpeg_content->content_audio_frame_rate()
+								)
+							);
+						
+						silence->make_silent ();
+						audio (silence, _audio_position);
+					}
+				}
+					
 				
 				int const data_size = av_samples_get_buffer_size (
 					0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
 					);
 				
 				assert (audio_codec_context()->channels == _ffmpeg_content->audio_channels());
-				Audio (deinterleave_audio (_frame->data, data_size), source_pts_seconds * _ffmpeg_content->content_audio_frame_rate());
+				audio (deinterleave_audio (_frame->data, data_size), _audio_position);
 			}
 			
 			copy_packet.data += decode_result;
@@ -389,8 +427,8 @@ FFmpegDecoder::decode_video_packet ()
 		int64_t const bet = av_frame_get_best_effort_timestamp (_frame);
 		if (bet != AV_NOPTS_VALUE) {
 
-			double const pts = bet * av_q2d (_format_context->streams[_video_stream]->time_base);
-			double const next = _next_video_frame / _ffmpeg_content->video_frame_rate();
+			double const pts = bet * av_q2d (_format_context->streams[_video_stream]->time_base) - _pts_offset;
+			double const next = _video_position / _ffmpeg_content->video_frame_rate();
 			double const one_frame = 1 / _ffmpeg_content->video_frame_rate ();
 			double delta = pts - next;
 
@@ -407,13 +445,13 @@ FFmpegDecoder::decode_video_packet ()
 					);
 				
 				black->make_black ();
-				video (image, false, _next_video_frame);
+				video (image, false, _video_position);
 				delta -= one_frame;
 			}
 
 			if (delta > -one_frame) {
 				/* This PTS is within a frame of being right; emit this (otherwise it will be dropped) */
-				video (image, false, _next_video_frame);
+				video (image, false, _video_position);
 			}
 		} else {
 			shared_ptr<const Film> film = _film.lock ();
@@ -450,8 +488,8 @@ FFmpegDecoder::setup_subtitle ()
 bool
 FFmpegDecoder::done () const
 {
-	bool const vd = !_decode_video || (_next_video_frame >= _ffmpeg_content->video_length());
-	bool const ad = !_decode_audio || !_ffmpeg_content->audio_stream() || (_next_audio_frame >= _ffmpeg_content->audio_length());
+	bool const vd = !_decode_video || (_video_position >= _ffmpeg_content->video_length());
+	bool const ad = !_decode_audio || !_ffmpeg_content->audio_stream() || (_audio_position >= _ffmpeg_content->audio_length());
 	return vd && ad;
 }
 	
diff --git a/src/lib/ffmpeg_decoder.h b/src/lib/ffmpeg_decoder.h
index a8eabb972..8f0482aad 100644
--- a/src/lib/ffmpeg_decoder.h
+++ b/src/lib/ffmpeg_decoder.h
@@ -38,6 +38,7 @@ extern "C" {
 #include "ffmpeg.h"
 
 class Film;
+class ffmpeg_pts_offset_test;
 
 /** @class FFmpegDecoder
  *  @brief A decoder using FFmpeg to decode content.
@@ -54,11 +55,14 @@ public:
 	bool done () const;
 
 private:
+	friend class ::ffmpeg_pts_offset_test;
 
 	/* No copy construction */
 	FFmpegDecoder (FFmpegDecoder const &);
 	FFmpegDecoder& operator= (FFmpegDecoder const &);
 
+	static double compute_pts_offset (double, double, float);
+
 	void setup_subtitle ();
 
 	AVSampleFormat audio_sample_format () const;
@@ -79,4 +83,6 @@ private:
 
 	bool _decode_video;
 	bool _decode_audio;
+
+	double _pts_offset;
 };
diff --git a/src/lib/ffmpeg_examiner.cc b/src/lib/ffmpeg_examiner.cc
index 6f1524f50..f45b0fe52 100644
--- a/src/lib/ffmpeg_examiner.cc
+++ b/src/lib/ffmpeg_examiner.cc
@@ -79,9 +79,9 @@ FFmpegExaminer::FFmpegExaminer (shared_ptr<const FFmpegContent> c)
 			}
 		} else {
 			for (size_t i = 0; i < _audio_streams.size(); ++i) {
-				if (_packet.stream_index == _audio_streams[i]->id && !_audio_streams[i]->start) {
+				if (_packet.stream_index == _audio_streams[i]->id && !_audio_streams[i]->first_audio) {
 					if (avcodec_decode_audio4 (context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
-						_audio_streams[i]->start = frame_time (_audio_streams[i]->id);
+						_audio_streams[i]->first_audio = frame_time (_audio_streams[i]->id);
 					}
 				}
 			}
@@ -90,7 +90,7 @@ FFmpegExaminer::FFmpegExaminer (shared_ptr<const FFmpegContent> c)
 		bool have_all_audio = true;
 		size_t i = 0;
 		while (i < _audio_streams.size() && have_all_audio) {
-			have_all_audio = _audio_streams[i]->start;
+			have_all_audio = _audio_streams[i]->first_audio;
 			++i;
 		}
 
@@ -102,14 +102,14 @@ FFmpegExaminer::FFmpegExaminer (shared_ptr<const FFmpegContent> c)
 	}
 }
 
-optional<Time>
+optional<double>
 FFmpegExaminer::frame_time (int stream) const
 {
-	optional<Time> t;
+	optional<double> t;
 	
 	int64_t const bet = av_frame_get_best_effort_timestamp (_frame);
 	if (bet != AV_NOPTS_VALUE) {
-		t = bet * av_q2d (_format_context->streams[stream]->time_base) * TIME_HZ;
+		t = bet * av_q2d (_format_context->streams[stream]->time_base);
 	}
 
 	return t;
diff --git a/src/lib/ffmpeg_examiner.h b/src/lib/ffmpeg_examiner.h
index 5cf9c2d0a..ec84865ed 100644
--- a/src/lib/ffmpeg_examiner.h
+++ b/src/lib/ffmpeg_examiner.h
@@ -41,15 +41,15 @@ public:
 		return _audio_streams;
 	}
 
-	boost::optional<Time> first_video () const {
+	boost::optional<double> first_video () const {
 		return _first_video;
 	}
 	
 private:
 	std::string stream_name (AVStream* s) const;
-	boost::optional<Time> frame_time (int) const;
+	boost::optional<double> frame_time (int) const;
 	
         std::vector<boost::shared_ptr<FFmpegSubtitleStream> > _subtitle_streams;
         std::vector<boost::shared_ptr<FFmpegAudioStream> > _audio_streams;
-	boost::optional<Time> _first_video;
+	boost::optional<double> _first_video;
 };
diff --git a/src/lib/imagemagick_decoder.cc b/src/lib/imagemagick_decoder.cc
index 49b9d4911..04d3d9df7 100644
--- a/src/lib/imagemagick_decoder.cc
+++ b/src/lib/imagemagick_decoder.cc
@@ -43,12 +43,12 @@ ImageMagickDecoder::ImageMagickDecoder (shared_ptr<const Film> f, shared_ptr<con
 void
 ImageMagickDecoder::pass ()
 {
-	if (_next_video_frame >= _imagemagick_content->video_length ()) {
+	if (_video_position >= _imagemagick_content->video_length ()) {
 		return;
 	}
 
 	if (_image) {
-		video (_image, true, _next_video_frame);
+		video (_image, true, _video_position);
 		return;
 	}
 
@@ -71,25 +71,25 @@ ImageMagickDecoder::pass ()
 
 	delete magick_image;
 
-	video (_image, false, _next_video_frame);
+	video (_image, false, _video_position);
 }
 
 void
 ImageMagickDecoder::seek (VideoContent::Frame frame)
 {
-	_next_video_frame = frame;
+	_video_position = frame;
 }
 
 void
 ImageMagickDecoder::seek_back ()
 {
-	if (_next_video_frame > 0) {
-		_next_video_frame--;
+	if (_video_position > 0) {
+		_video_position--;
 	}
 }
 
 bool
 ImageMagickDecoder::done () const
 {
-	return _next_video_frame >= _imagemagick_content->video_length ();
+	return _video_position >= _imagemagick_content->video_length ();
 }
diff --git a/src/lib/sndfile_decoder.cc b/src/lib/sndfile_decoder.cc
index 9030021e7..80a6afd2b 100644
--- a/src/lib/sndfile_decoder.cc
+++ b/src/lib/sndfile_decoder.cc
@@ -115,5 +115,5 @@ SndfileDecoder::audio_frame_rate () const
 bool
 SndfileDecoder::done () const
 {
-	return _next_audio_frame > _sndfile_content->audio_length ();
+	return _audio_position >= _sndfile_content->audio_length ();
 }
diff --git a/src/lib/video_decoder.cc b/src/lib/video_decoder.cc
index b5cc7d158..f61e63d4d 100644
--- a/src/lib/video_decoder.cc
+++ b/src/lib/video_decoder.cc
@@ -30,7 +30,7 @@ using boost::shared_ptr;
 
 VideoDecoder::VideoDecoder (shared_ptr<const Film> f)
 	: Decoder (f)
-	, _next_video_frame (0)
+	, _video_position (0)
 {
 
 }
@@ -39,7 +39,7 @@ void
 VideoDecoder::video (shared_ptr<const Image> image, bool same, VideoContent::Frame frame)
 {
         Video (image, same, frame);
-	_next_video_frame = frame + 1;
+	_video_position = frame + 1;
 }
 
 #if 0
diff --git a/src/lib/video_decoder.h b/src/lib/video_decoder.h
index c86248417..d24219d95 100644
--- a/src/lib/video_decoder.h
+++ b/src/lib/video_decoder.h
@@ -43,7 +43,7 @@ public:
 protected:
 
 	void video (boost::shared_ptr<const Image>, bool, VideoContent::Frame);
-	VideoContent::Frame _next_video_frame;
+	VideoContent::Frame _video_position;
 };
 
 #endif
author	Carl Hetherington <cth@carlh.net>	2013-06-26 16:35:43 +0100
committer	Carl Hetherington <cth@carlh.net>	2013-06-26 16:35:43 +0100
commit	d39bfb9a1360a83f3451b79d01fdc4f465ef21d3 (patch)
tree	64ea69741155d15d114ad96daf0f90e24b3abe28 /src
parent	7cd29c67a1977d67fc63f58c1d29826d7993a750 (diff)