Merge branch 'resample-drop-frame'

author: Carl Hetherington <cth@carlh.net> 2012-10-02 16:14:29 +0100
committer: Carl Hetherington <cth@carlh.net> 2012-10-02 16:14:29 +0100
commit: c55d8bcda8f4da74bbc9489127354211cea8f2ff (patch)
tree: d81b7197b635c6a9bcf9889c3dc4e6b9bc45bde3 /src/lib
parent: 11c0aac8508ac1a54e63bdcb31a85c941a7fb546 (diff)
parent: 0f154f43bd0c88d1615e455bd8a169826a08c086 (diff)
11 files changed, 176 insertions, 134 deletions
diff --git a/src/lib/ab_transcoder.cc b/src/lib/ab_transcoder.cc
index aabaf2d03..95492a9d8 100644
--- a/src/lib/ab_transcoder.cc
+++ b/src/lib/ab_transcoder.cc
@@ -103,7 +103,7 @@ ABTranscoder::process_video (shared_ptr<Image> yuv, int frame, int index)
 void
 ABTranscoder::go ()
 {
-	_encoder->process_begin ();
+	_encoder->process_begin (_da->audio_channel_layout(), _da->audio_sample_format());
 	_da->process_begin ();
 	_db->process_begin ();
 	
diff --git a/src/lib/decoder.cc b/src/lib/decoder.cc
index e35517012..8aa5f77c6 100644
--- a/src/lib/decoder.cc
+++ b/src/lib/decoder.cc
@@ -70,9 +70,6 @@ Decoder::Decoder (boost::shared_ptr<const FilmState> s, boost::shared_ptr<const
 	, _video_frame (0)
 	, _buffer_src_context (0)
 	, _buffer_sink_context (0)
-#if HAVE_SWRESAMPLE	  
-	, _swr_context (0)
-#endif	  
 	, _have_setup_video_filters (false)
 	, _delay_line (0)
 	, _delay_in_bytes (0)
@@ -92,29 +89,6 @@ Decoder::~Decoder ()
 void
 Decoder::process_begin ()
 {
-	if (_fs->audio_sample_rate != dcp_audio_sample_rate (_fs->audio_sample_rate)) {
-#if HAVE_SWRESAMPLE		
-		_swr_context = swr_alloc_set_opts (
-			0,
-			audio_channel_layout(),
-			audio_sample_format(),
-			dcp_audio_sample_rate (_fs->audio_sample_rate),
-			audio_channel_layout(),
-			audio_sample_format(),
-			_fs->audio_sample_rate,
-			0, 0
-			);
-		
-		swr_init (_swr_context);
-#else
-		throw DecodeError ("Cannot resample audio as libswresample is not present");
-#endif		
-	} else {
-#if HAVE_SWRESAMPLE		
-		_swr_context = 0;
-#endif		
-	}
-
 	_delay_in_bytes = _fs->audio_delay * _fs->audio_sample_rate * _fs->audio_channels * _fs->bytes_per_sample() / 1000;
 	delete _delay_line;
 	_delay_line = new DelayLine (_delay_in_bytes);
@@ -126,35 +100,6 @@ Decoder::process_begin ()
 void
 Decoder::process_end ()
 {
-#if HAVE_SWRESAMPLE	
-	if (_swr_context) {
-
-		int mop = 0;
-		while (1) {
-			uint8_t buffer[256 * _fs->bytes_per_sample() * _fs->audio_channels];
-			uint8_t* out[1] = {
-				buffer
-			};
-
-			int const frames = swr_convert (_swr_context, out, 256, 0, 0);
-
-			if (frames < 0) {
-				throw DecodeError ("could not run sample-rate converter");
-			}
-
-			if (frames == 0) {
-				break;
-			}
-
-			mop += frames;
-			int available = _delay_line->feed (buffer, frames * _fs->audio_channels * _fs->bytes_per_sample());
-			Audio (buffer, available);
-		}
-
-		swr_free (&_swr_context);
-	}
-#endif	
-	
 	if (_delay_in_bytes < 0) {
 		uint8_t remainder[-_delay_in_bytes];
 		_delay_line->get_remaining (remainder);
@@ -167,18 +112,23 @@ Decoder::process_end ()
 	*/
 
 	int64_t const audio_short_by_frames =
-		((int64_t) decoding_frames() * dcp_audio_sample_rate (_fs->audio_sample_rate) / _fs->frames_per_second)
+		((int64_t) decoding_frames() * _fs->target_sample_rate() / _fs->frames_per_second)
 		- _audio_frames_processed;
 
 	if (audio_short_by_frames >= 0) {
-		int bytes = audio_short_by_frames * _fs->audio_channels * _fs->bytes_per_sample();
+
+		stringstream s;
+		s << "Adding " << audio_short_by_frames << " frames of silence to the end.";
+		_log->log (s.str ());
+
+		int64_t bytes = audio_short_by_frames * _fs->audio_channels * _fs->bytes_per_sample();
 		
-		int const silence_size = 64 * 1024;
+		int64_t const silence_size = 64 * 1024;
 		uint8_t silence[silence_size];
 		memset (silence, 0, silence_size);
 		
 		while (bytes) {
-			int const t = min (bytes, silence_size);
+			int64_t const t = min (bytes, silence_size);
 			Audio (silence, t);
 			bytes -= t;
 		}
@@ -241,16 +191,9 @@ Decoder::pass ()
 void
 Decoder::process_audio (uint8_t* data, int size)
 {
-	/* Here's samples per channel */
+	/* Samples per channel */
 	int const samples = size / _fs->bytes_per_sample();
 
-#if HAVE_SWRESAMPLE	
-	/* And here's frames (where 1 frame is a collection of samples, 1 for each channel,
-	   so for 5.1 a frame would be 6 samples)
-	*/
-	int const frames = samples / _fs->audio_channels;
-#endif	
-
 	/* Maybe apply gain */
 	if (_fs->audio_gain != 0) {
 		float const linear_gain = pow (10, _fs->audio_gain / 20);
@@ -283,51 +226,12 @@ Decoder::process_audio (uint8_t* data, int size)
 		}
 	}
 
-	/* This is a buffer we might use if we are sample-rate converting;
-	   it will need freeing if so.
-	*/
-	uint8_t* out_buffer = 0;
-
-	/* Maybe sample-rate convert */
-#if HAVE_SWRESAMPLE	
-	if (_swr_context) {
-
-		uint8_t const * in[2] = {
-			data,
-			0
-		};
-
-		/* Compute the resampled frame count and add 32 for luck */
-		int const out_buffer_size_frames = ceil (frames * float (dcp_audio_sample_rate (_fs->audio_sample_rate)) / _fs->audio_sample_rate) + 32;
-		int const out_buffer_size_bytes = out_buffer_size_frames * _fs->audio_channels * _fs->bytes_per_sample();
-		out_buffer = new uint8_t[out_buffer_size_bytes];
-
-		uint8_t* out[2] = {
-			out_buffer, 
-			0
-		};
-
-		/* Resample audio */
-		int out_frames = swr_convert (_swr_context, out, out_buffer_size_frames, in, frames);
-		if (out_frames < 0) {
-			throw DecodeError ("could not run sample-rate converter");
-		}
-
-		/* And point our variables at the resampled audio */
-		data = out_buffer;
-		size = out_frames * _fs->audio_channels * _fs->bytes_per_sample();
-	}
-#endif	
-		
 	/* Update the number of audio frames we've pushed to the encoder */
 	_audio_frames_processed += size / (_fs->audio_channels * _fs->bytes_per_sample ());
 
 	/* Push into the delay line and then tell the world what we've got */
 	int available = _delay_line->feed (data, size);
 	Audio (data, available);
-
-	/* Delete the sample-rate conversion buffer, if it exists */
-	delete[] out_buffer;
 }
 
 /** Called by subclasses to tell the world that some video data is ready.
diff --git a/src/lib/decoder.h b/src/lib/decoder.h
index 14b25c7b0..19ef25ede 100644
--- a/src/lib/decoder.h
+++ b/src/lib/decoder.h
@@ -29,11 +29,6 @@
 #include <stdint.h>
 #include <boost/shared_ptr.hpp>
 #include <sigc++/sigc++.h>
-#ifdef HAVE_SWRESAMPLE
-extern "C" {
-#include <libswresample/swresample.h>
-}
-#endif
 #include "util.h"
 
 class Job;
@@ -134,10 +129,6 @@ private:
 	AVFilterContext* _buffer_src_context;
 	AVFilterContext* _buffer_sink_context;
 
-#if HAVE_SWRESAMPLE	
-	SwrContext* _swr_context;
-#endif	
-
 	bool _have_setup_video_filters;
 	DelayLine* _delay_line;
 	int _delay_in_bytes;
diff --git a/src/lib/encoder.h b/src/lib/encoder.h
index 539b2912c..ea356cec4 100644
--- a/src/lib/encoder.h
+++ b/src/lib/encoder.h
@@ -28,6 +28,9 @@
 #include <boost/thread/mutex.hpp>
 #include <list>
 #include <stdint.h>
+extern "C" {
+#include <libavutil/samplefmt.h>
+}
 
 class FilmState;
 class Options;
@@ -50,7 +53,7 @@ public:
 	Encoder (boost::shared_ptr<const FilmState> s, boost::shared_ptr<const Options> o, Log* l);
 
 	/** Called to indicate that a processing run is about to begin */
-	virtual void process_begin () = 0;
+	virtual void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format) = 0;
 
 	/** Called with a frame of video.
 	 *  @param i Video frame image.
diff --git a/src/lib/film_state.cc b/src/lib/film_state.cc
index e472434ce..0c1ac87dc 100644
--- a/src/lib/film_state.cc
+++ b/src/lib/film_state.cc
@@ -35,6 +35,7 @@
 #include "format.h"
 #include "dcp_content_type.h"
 #include "util.h"
+#include "exceptions.h"
 
 using namespace std;
 using namespace boost;
@@ -278,3 +279,23 @@ FilmState::bytes_per_sample () const
 
 	return 0;
 }
+
+int
+FilmState::target_sample_rate () const
+{
+	double t = dcp_audio_sample_rate (audio_sample_rate);
+	if (rint (frames_per_second) != frames_per_second) {
+		if (fabs (frames_per_second - 23.976) < 1e-6) {
+			/* 24fps drop-frame ie 24 * 1000 / 1001 frames per second;
+			   hence we need to resample the audio to dcp_audio_sample_rate * 1000 / 1001
+			   so that when we play it back at dcp_audio_sample_rate it is sped up
+			   by the same amount that the video is
+			*/
+			t *= double(1000) / 1001;
+		} else {
+			throw EncodeError ("unknown fractional frame rate");
+		}
+	}
+
+	return rint (t);
+}
diff --git a/src/lib/film_state.h b/src/lib/film_state.h
index 12d44cdce..8dc0ce11b 100644
--- a/src/lib/film_state.h
+++ b/src/lib/film_state.h
@@ -80,6 +80,7 @@ public:
 	int thumb_frame (int) const;
 
 	int bytes_per_sample () const;
+	int target_sample_rate () const;
 	
 	void write_metadata (std::ofstream &) const;
 	void read_metadata (std::string, std::string);
diff --git a/src/lib/j2k_still_encoder.h b/src/lib/j2k_still_encoder.h
index d4d68724e..755c68877 100644
--- a/src/lib/j2k_still_encoder.h
+++ b/src/lib/j2k_still_encoder.h
@@ -36,7 +36,7 @@ class J2KStillEncoder : public Encoder
 public:
 	J2KStillEncoder (boost::shared_ptr<const FilmState>, boost::shared_ptr<const Options>, Log *);
 
-	void process_begin () {}
+	void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format) {}
 	void process_video (boost::shared_ptr<Image>, int);
 	void process_audio (uint8_t *, int) {}
 	void process_end () {}
diff --git a/src/lib/j2k_wav_encoder.cc b/src/lib/j2k_wav_encoder.cc
index 08c796350..87514bf14 100644
--- a/src/lib/j2k_wav_encoder.cc
+++ b/src/lib/j2k_wav_encoder.cc
@@ -46,6 +46,9 @@ using namespace boost;
 
 J2KWAVEncoder::J2KWAVEncoder (shared_ptr<const FilmState> s, shared_ptr<const Options> o, Log* l)
 	: Encoder (s, o, l)
+#ifdef HAVE_SWRESAMPLE	  
+	, _swr_context (0)
+#endif	  
 	, _deinterleave_buffer_size (8192)
 	, _deinterleave_buffer (0)
 	, _process_end (false)
@@ -216,8 +219,36 @@ J2KWAVEncoder::encoder_thread (ServerDescription* server)
 }
 
 void
-J2KWAVEncoder::process_begin ()
+J2KWAVEncoder::process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format)
 {
+	if ((_fs->audio_sample_rate != dcp_audio_sample_rate (_fs->audio_sample_rate)) || (rint (_fs->frames_per_second) != _fs->frames_per_second)) {
+#ifdef HAVE_SWRESAMPLE
+
+		stringstream s;
+		s << "Will resample audio from " << _fs->audio_sample_rate << " to " << _fs->target_sample_rate();
+		_log->log (s.str ());
+		
+		_swr_context = swr_alloc_set_opts (
+			0,
+			audio_channel_layout,
+			audio_sample_format,
+			_fs->target_sample_rate(),
+			audio_channel_layout,
+			audio_sample_format,
+			_fs->audio_sample_rate,
+			0, 0
+			);
+		
+		swr_init (_swr_context);
+#else
+		throw EncodeError ("Cannot resample audio as libswresample is not present");
+#endif
+	} else {
+#ifdef HAVE_SWRESAMPLE
+		_swr_context = 0;
+#endif		
+	}
+	
 	for (int i = 0; i < Config::instance()->num_local_encoding_threads (); ++i) {
 		_worker_threads.push_back (new boost::thread (boost::bind (&J2KWAVEncoder::encoder_thread, this, (ServerDescription *) 0)));
 	}
@@ -270,6 +301,33 @@ J2KWAVEncoder::process_end ()
 			_log->log (String::compose ("Local encode failed (%1)", e.what ()));
 		}
 	}
+
+#if HAVE_SWRESAMPLE	
+	if (_swr_context) {
+
+		while (1) {
+			uint8_t buffer[256 * _fs->bytes_per_sample() * _fs->audio_channels];
+			uint8_t* out[2] = {
+				buffer,
+				0
+			};
+
+			int const frames = swr_convert (_swr_context, out, 256, 0, 0);
+
+			if (frames < 0) {
+				throw EncodeError ("could not run sample-rate converter");
+			}
+
+			if (frames == 0) {
+				break;
+			}
+
+			write_audio (buffer, frames * _fs->bytes_per_sample() * _fs->audio_channels);
+		}
+
+		swr_free (&_swr_context);
+	}
+#endif	
 	
 	close_sound_files ();
 
@@ -283,39 +341,92 @@ J2KWAVEncoder::process_end ()
 }
 
 void
-J2KWAVEncoder::process_audio (uint8_t* data, int data_size)
+J2KWAVEncoder::process_audio (uint8_t* data, int size)
 {
-	/* Size of a sample in bytes */
-	int const sample_size = 2;
+	/* This is a buffer we might use if we are sample-rate converting;
+	   it will need freeing if so.
+	*/
+	uint8_t* out_buffer = 0;
 	
-	/* XXX: we are assuming that sample_size is right, the _deinterleave_buffer_size is a multiple
-	   of the sample size and that data_size is a multiple of _fs->audio_channels * sample_size.
+	/* Maybe sample-rate convert */
+#if HAVE_SWRESAMPLE	
+	if (_swr_context) {
+
+		uint8_t const * in[2] = {
+			data,
+			0
+		};
+
+		/* Here's samples per channel */
+		int const samples = size / _fs->bytes_per_sample();
+		
+		/* And here's frames (where 1 frame is a collection of samples, 1 for each channel,
+		   so for 5.1 a frame would be 6 samples)
+		*/
+		int const frames = samples / _fs->audio_channels;
+
+		/* Compute the resampled frame count and add 32 for luck */
+		int const out_buffer_size_frames = ceil (frames * _fs->target_sample_rate() / _fs->audio_sample_rate) + 32;
+		int const out_buffer_size_bytes = out_buffer_size_frames * _fs->audio_channels * _fs->bytes_per_sample();
+		out_buffer = new uint8_t[out_buffer_size_bytes];
+
+		uint8_t* out[2] = {
+			out_buffer, 
+			0
+		};
+
+		/* Resample audio */
+		int out_frames = swr_convert (_swr_context, out, out_buffer_size_frames, in, frames);
+		if (out_frames < 0) {
+			throw EncodeError ("could not run sample-rate converter");
+		}
+
+		/* And point our variables at the resampled audio */
+		data = out_buffer;
+		size = out_frames * _fs->audio_channels * _fs->bytes_per_sample();
+	}
+#endif
+
+	write_audio (data, size);
+
+	/* Delete the sample-rate conversion buffer, if it exists */
+	delete[] out_buffer;
+}
+
+void
+J2KWAVEncoder::write_audio (uint8_t* data, int size)
+{
+	/* XXX: we are assuming that the _deinterleave_buffer_size is a multiple
+	   of the sample size and that size is a multiple of _fs->audio_channels * sample_size.
 	*/
+
+	assert ((size % (_fs->audio_channels * _fs->bytes_per_sample())) == 0);
+	assert ((_deinterleave_buffer_size % _fs->bytes_per_sample()) == 0);
 	
 	/* XXX: this code is very tricksy and it must be possible to make it simpler ... */
 	
 	/* Number of bytes left to read this time */
-	int remaining = data_size;
+	int remaining = size;
 	/* Our position in the output buffers, in bytes */
 	int position = 0;
 	while (remaining > 0) {
 		/* How many bytes of the deinterleaved data to do this time */
 		int this_time = min (remaining / _fs->audio_channels, _deinterleave_buffer_size);
 		for (int i = 0; i < _fs->audio_channels; ++i) {
-			for (int j = 0; j < this_time; j += sample_size) {
-				for (int k = 0; k < sample_size; ++k) {
+			for (int j = 0; j < this_time; j += _fs->bytes_per_sample()) {
+				for (int k = 0; k < _fs->bytes_per_sample(); ++k) {
 					int const to = j + k;
-					int const from = position + (i * sample_size) + (j * _fs->audio_channels) + k;
+					int const from = position + (i * _fs->bytes_per_sample()) + (j * _fs->audio_channels) + k;
 					_deinterleave_buffer[to] = data[from];
 				}
 			}
 			
 			switch (_fs->audio_sample_format) {
 			case AV_SAMPLE_FMT_S16:
-				sf_write_short (_sound_files[i], (const short *) _deinterleave_buffer, this_time / sample_size);
+				sf_write_short (_sound_files[i], (const short *) _deinterleave_buffer, this_time / _fs->bytes_per_sample());
 				break;
 			default:
-				throw DecodeError ("unknown audio sample format");
+				throw EncodeError ("unknown audio sample format");
 			}
 		}
 		
@@ -323,3 +434,4 @@ J2KWAVEncoder::process_audio (uint8_t* data, int data_size)
 		remaining -= this_time * _fs->audio_channels;
 	}
 }
+
diff --git a/src/lib/j2k_wav_encoder.h b/src/lib/j2k_wav_encoder.h
index 1c2f50065..e11358c2c 100644
--- a/src/lib/j2k_wav_encoder.h
+++ b/src/lib/j2k_wav_encoder.h
@@ -26,6 +26,11 @@
 #include <boost/thread/condition.hpp>
 #include <boost/thread/mutex.hpp>
 #include <boost/thread.hpp>
+#ifdef HAVE_SWRESAMPLE
+extern "C" {
+#include <libswresample/swresample.h>
+}
+#endif
 #include <sndfile.h>
 #include "encoder.h"
 
@@ -43,17 +48,22 @@ public:
 	J2KWAVEncoder (boost::shared_ptr<const FilmState>, boost::shared_ptr<const Options>, Log *);
 	~J2KWAVEncoder ();
 
-	void process_begin ();
+	void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format);
 	void process_video (boost::shared_ptr<Image>, int);
 	void process_audio (uint8_t *, int);
 	void process_end ();
 
-private:	
+private:
 
+	void write_audio (uint8_t* data, int size);
 	void encoder_thread (ServerDescription *);
 	void close_sound_files ();
 	void terminate_worker_threads ();
 
+#if HAVE_SWRESAMPLE	
+	SwrContext* _swr_context;
+#endif	
+
 	std::vector<SNDFILE*> _sound_files;
 	int _deinterleave_buffer_size;
 	uint8_t* _deinterleave_buffer;
diff --git a/src/lib/tiff_encoder.h b/src/lib/tiff_encoder.h
index ec8e38011..ef1ce25d2 100644
--- a/src/lib/tiff_encoder.h
+++ b/src/lib/tiff_encoder.h
@@ -36,7 +36,7 @@ class TIFFEncoder : public Encoder
 public:
 	TIFFEncoder (boost::shared_ptr<const FilmState> s, boost::shared_ptr<const Options> o, Log* l);
 
-	void process_begin () {}
+	void process_begin (int64_t audio_channel_layout, AVSampleFormat audio_sample_format) {}
 	void process_video (boost::shared_ptr<Image>, int);
 	void process_audio (uint8_t *, int) {}
 	void process_end () {}
diff --git a/src/lib/transcoder.cc b/src/lib/transcoder.cc
index 3d71b68f5..b74d09174 100644
--- a/src/lib/transcoder.cc
+++ b/src/lib/transcoder.cc
@@ -57,7 +57,7 @@ Transcoder::Transcoder (shared_ptr<const FilmState> s, shared_ptr<const Options>
 void
 Transcoder::go ()
 {
-	_encoder->process_begin ();
+	_encoder->process_begin (_decoder->audio_channel_layout(), _decoder->audio_sample_format());
 	try {
 		_decoder->go ();
 	} catch (...) {
author	Carl Hetherington <cth@carlh.net>	2012-10-02 16:14:29 +0100
committer	Carl Hetherington <cth@carlh.net>	2012-10-02 16:14:29 +0100
commit	c55d8bcda8f4da74bbc9489127354211cea8f2ff (patch)
tree	d81b7197b635c6a9bcf9889c3dc4e6b9bc45bde3 /src/lib
parent	11c0aac8508ac1a54e63bdcb31a85c941a7fb546 (diff)
parent	0f154f43bd0c88d1615e455bd8a169826a08c086 (diff)