Try again to sort out the audio padding / alignment.

author: Carl Hetherington <cth@carlh.net> 2012-10-22 21:04:50 +0100
committer: Carl Hetherington <cth@carlh.net> 2012-10-22 21:04:50 +0100
commit: 579dfce1bae018efd73c9f5f3f31770cb401d19a (patch)
tree: f7bb1b2ad0537515fe3478d9ffe46a081f3b3414 /src
parent: 71b7808830e17423453502ed5112b70bf90fff1b (diff)
9 files changed, 58 insertions, 39 deletions
diff --git a/src/lib/decoder.cc b/src/lib/decoder.cc
index 65e5ff722..c0ad85da3 100644
--- a/src/lib/decoder.cc
+++ b/src/lib/decoder.cc
@@ -112,7 +112,7 @@ Decoder::process_end ()
 	   in to get it to the right length.
 	*/
 
-	int64_t const video_length_in_audio_frames = ((int64_t) _fs->dcp_length() * _fs->target_sample_rate() / _fs->frames_per_second());
+	int64_t const video_length_in_audio_frames = ((int64_t) _fs->dcp_length() * _fs->audio_sample_rate() / _fs->frames_per_second());
 	int64_t const audio_short_by_frames = video_length_in_audio_frames - _audio_frames_processed;
 
 	_log->log (
@@ -266,7 +266,7 @@ Decoder::emit_audio (uint8_t* data, int size)
 	}
 
 	/* Update the number of audio frames we've pushed to the encoder */
-	_audio_frames_processed += frames;
+	_audio_frames_processed += audio->frames ();
 
 	Audio (audio);
 }
diff --git a/src/lib/ffmpeg_decoder.cc b/src/lib/ffmpeg_decoder.cc
index c74fee008..5eb32a014 100644
--- a/src/lib/ffmpeg_decoder.cc
+++ b/src/lib/ffmpeg_decoder.cc
@@ -66,8 +66,6 @@ FFmpegDecoder::FFmpegDecoder (boost::shared_ptr<const FilmState> s, boost::share
 	, _audio_codec (0)
 	, _subtitle_codec_context (0)
 	, _subtitle_codec (0)
-	, _first_video_pts (-1)
-	, _first_audio_pts (-1)
 {
 	setup_general ();
 	setup_video ();
@@ -213,6 +211,7 @@ bool
 FFmpegDecoder::do_pass ()
 {
 	int r = av_read_frame (_format_context, &_packet);
+	
 	if (r < 0) {
 		if (r != AVERROR_EOF) {
 			throw DecodeError ("error on av_read_frame");
@@ -245,10 +244,12 @@ FFmpegDecoder::do_pass ()
 		return true;
 	}
 
+	double const pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base) * _packet.pts;
+	
 	if (_packet.stream_index == _video_stream) {
 
-		if (_first_video_pts == -1) {
-			_first_video_pts = _packet.pts;
+		if (!_first_video) {
+			_first_video = pts_seconds;
 		}
 		
 		int frame_finished;
@@ -256,39 +257,33 @@ FFmpegDecoder::do_pass ()
 			process_video (_frame);
 		}
 
-	} else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && (_first_video_pts != -1 && _packet.pts > _first_video_pts)) {
+	} else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && _first_video && _first_video.get() <= pts_seconds) {
 
-		/* Note: We only decode audio if we've had our first video packet through, and if this
-		   packet comes after it.  Until then it is thrown away.
+		/* Note: We only decode audio if we've had our first video packet through, and if it
+		   was before this packet.  Until then audio is thrown away.
 		*/
-
-		if (_first_audio_pts == -1) {
-			_first_audio_pts = _packet.pts;
-
+		
+		if (!_first_audio) {
+			_first_audio = pts_seconds;
+			
 			/* This is our first audio packet, and if we've arrived here we must have had our
 			   first video packet.  Push some silence to make up the gap between our first
 			   video packet and our first audio.
 			*/
 			
-			AVStream* v = _format_context->streams[_video_stream];
-			AVStream* a = _format_context->streams[_audio_stream];
+			/* frames of silence that we must push */
+			int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ());
 			
-			assert (v->time_base.num == a->time_base.num);
-			assert (v->time_base.den == a->time_base.den);
-
-			/* samples of silence that we must push */
-			int const s = rint (av_q2d (v->time_base) * (_first_audio_pts - _first_video_pts) * audio_sample_rate ());
-
 			_log->log (
 				String::compose (
-					"First video at %1, first audio at %2, pushing %3 samples of silence",
-					_first_video_pts, _first_audio_pts, s
+					"First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
+					_first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample()
 					)
 				);
-
+			
 			/* hence bytes */
 			int const b = s * audio_channels() * bytes_per_audio_sample();
-
+			
 			/* XXX: this assumes that it won't be too much, and there are shaky assumptions
 			   that all sound representations are silent with memset()ed zero data.
 			*/
@@ -296,7 +291,7 @@ FFmpegDecoder::do_pass ()
 			memset (silence, 0, b);
 			process_audio (silence, b);
 		}
-
+		
 		avcodec_get_frame_defaults (_frame);
 		
 		int frame_finished;
@@ -304,11 +299,11 @@ FFmpegDecoder::do_pass ()
 			int const data_size = av_samples_get_buffer_size (
 				0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
 				);
-
+			
 			assert (_audio_codec_context->channels == _fs->audio_channels());
 			process_audio (_frame->data[0], data_size);
 		}
-
+			
 	} else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles) {
 
 		int got_subtitle;
diff --git a/src/lib/ffmpeg_decoder.h b/src/lib/ffmpeg_decoder.h
index ac4cd6fcf..29e06c7d4 100644
--- a/src/lib/ffmpeg_decoder.h
+++ b/src/lib/ffmpeg_decoder.h
@@ -25,6 +25,7 @@
 #include <string>
 #include <stdint.h>
 #include <boost/shared_ptr.hpp>
+#include <boost/optional.hpp>
 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libpostproc/postprocess.h>
@@ -105,6 +106,6 @@ private:
 
 	AVPacket _packet;
 
-	int64_t _first_video_pts;
-	int64_t _first_audio_pts;
+	boost::optional<double> _first_video;
+	boost::optional<double> _first_audio;
 };
diff --git a/src/lib/film_state.cc b/src/lib/film_state.cc
index 283830f59..0d92a16ee 100644
--- a/src/lib/film_state.cc
+++ b/src/lib/film_state.cc
@@ -348,8 +348,9 @@ FilmState::content_type () const
 	return VIDEO;
 }
 
+/** @return The sampling rate that we will resample the audio to */
 int
-FilmState::target_sample_rate () const
+FilmState::target_audio_sample_rate () const
 {
 	/* Resample to a DCI-approved sample rate */
 	double t = dcp_audio_sample_rate (_audio_sample_rate);
diff --git a/src/lib/film_state.h b/src/lib/film_state.h
index 8940d0e5d..14a8f7eb5 100644
--- a/src/lib/film_state.h
+++ b/src/lib/film_state.h
@@ -130,7 +130,7 @@ public:
 	std::string thumb_base (int) const;
 	int thumb_frame (int) const;
 
-	int target_sample_rate () const;
+	int target_audio_sample_rate () const;
 	
 	void write_metadata () const;
 	void read_metadata ();
diff --git a/src/lib/j2k_wav_encoder.cc b/src/lib/j2k_wav_encoder.cc
index 8747bb7ad..c0c785d11 100644
--- a/src/lib/j2k_wav_encoder.cc
+++ b/src/lib/j2k_wav_encoder.cc
@@ -49,6 +49,7 @@ J2KWAVEncoder::J2KWAVEncoder (shared_ptr<const FilmState> s, shared_ptr<const Op
 #ifdef HAVE_SWRESAMPLE	  
 	, _swr_context (0)
 #endif	  
+	, _audio_frames_written (0)
 	, _process_end (false)
 {
 	/* Create sound output files with .tmp suffixes; we will rename
@@ -218,11 +219,11 @@ J2KWAVEncoder::encoder_thread (ServerDescription* server)
 void
 J2KWAVEncoder::process_begin (int64_t audio_channel_layout)
 {
-	if (_fs->audio_sample_rate() != _fs->target_sample_rate()) {
+	if (_fs->audio_sample_rate() != _fs->target_audio_sample_rate()) {
 #ifdef HAVE_SWRESAMPLE
 
 		stringstream s;
-		s << "Will resample audio from " << _fs->audio_sample_rate() << " to " << _fs->target_sample_rate();
+		s << "Will resample audio from " << _fs->audio_sample_rate() << " to " << _fs->target_audio_sample_rate();
 		_log->log (s.str ());
 
 		/* We will be using planar float data when we call the resampler */
@@ -230,7 +231,7 @@ J2KWAVEncoder::process_begin (int64_t audio_channel_layout)
 			0,
 			audio_channel_layout,
 			AV_SAMPLE_FMT_FLTP,
-			_fs->target_sample_rate(),
+			_fs->target_audio_sample_rate(),
 			audio_channel_layout,
 			AV_SAMPLE_FMT_FLTP,
 			_fs->audio_sample_rate(),
@@ -321,7 +322,13 @@ J2KWAVEncoder::process_end ()
 
 		swr_free (&_swr_context);
 	}
-#endif	
+#endif
+
+	int const dcp_sr = dcp_audio_sample_rate (_fs->audio_sample_rate ());
+	int64_t const extra_audio_frames = dcp_sr - (_audio_frames_written % dcp_sr);
+	shared_ptr<AudioBuffers> silence (new AudioBuffers (_fs->audio_channels(), extra_audio_frames));
+	silence->make_silent ();
+	write_audio (silence);
 	
 	close_sound_files ();
 
@@ -344,7 +351,7 @@ J2KWAVEncoder::process_audio (shared_ptr<const AudioBuffers> audio)
 	if (_swr_context) {
 
 		/* Compute the resampled frames count and add 32 for luck */
-		int const max_resampled_frames = ceil (audio->frames() * _fs->target_sample_rate() / _fs->audio_sample_rate()) + 32;
+		int const max_resampled_frames = ceil (audio->frames() * _fs->target_audio_sample_rate() / _fs->audio_sample_rate()) + 32;
 
 		resampled.reset (new AudioBuffers (_fs->audio_channels(), max_resampled_frames));
 
@@ -368,10 +375,12 @@ J2KWAVEncoder::process_audio (shared_ptr<const AudioBuffers> audio)
 }
 
 void
-J2KWAVEncoder::write_audio (shared_ptr<const AudioBuffers> audio) const
+J2KWAVEncoder::write_audio (shared_ptr<const AudioBuffers> audio)
 {
 	for (int i = 0; i < _fs->audio_channels(); ++i) {
 		sf_write_float (_sound_files[i], audio->data(i), audio->frames());
 	}
+
+	_audio_frames_written += audio->frames ();
 }
 
diff --git a/src/lib/j2k_wav_encoder.h b/src/lib/j2k_wav_encoder.h
index 6733221de..69d445b09 100644
--- a/src/lib/j2k_wav_encoder.h
+++ b/src/lib/j2k_wav_encoder.h
@@ -57,7 +57,7 @@ public:
 
 private:
 
-	void write_audio (boost::shared_ptr<const AudioBuffers> audio) const;
+	void write_audio (boost::shared_ptr<const AudioBuffers> audio);
 	void encoder_thread (ServerDescription *);
 	void close_sound_files ();
 	void terminate_worker_threads ();
@@ -67,6 +67,7 @@ private:
 #endif	
 
 	std::vector<SNDFILE*> _sound_files;
+	int64_t _audio_frames_written;
 
 	bool _process_end;
 	std::list<boost::shared_ptr<DCPVideoFrame> > _queue;
diff --git a/src/lib/util.cc b/src/lib/util.cc
index 6221b8b62..81dc72b3d 100644
--- a/src/lib/util.cc
+++ b/src/lib/util.cc
@@ -735,6 +735,16 @@ AudioBuffers::set_frames (int f)
 }
 
 void
+AudioBuffers::make_silent ()
+{
+	for (int i = 0; i < _channels; ++i) {
+		for (int j = 0; j < _frames; ++j) {
+			_data[i][j] = 0;
+		}
+	}
+}
+
+void
 ensure_ui_thread ()
 {
 	assert (this_thread::get_id() == ui_thread);
diff --git a/src/lib/util.h b/src/lib/util.h
index 2265dfe70..1d9984d9d 100644
--- a/src/lib/util.h
+++ b/src/lib/util.h
@@ -222,6 +222,8 @@ public:
 
 	void set_frames (int f);
 
+	void make_silent ();
+
 private:
 	/* no copy construction */
 	AudioBuffers (AudioBuffers const &);
author	Carl Hetherington <cth@carlh.net>	2012-10-22 21:04:50 +0100
committer	Carl Hetherington <cth@carlh.net>	2012-10-22 21:04:50 +0100
commit	579dfce1bae018efd73c9f5f3f31770cb401d19a (patch)
tree	f7bb1b2ad0537515fe3478d9ffe46a081f3b3414 /src
parent	71b7808830e17423453502ed5112b70bf90fff1b (diff)