From 579dfce1bae018efd73c9f5f3f31770cb401d19a Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Mon, 22 Oct 2012 21:04:50 +0100 Subject: Try again to sort out the audio padding / alignment. --- src/lib/decoder.cc | 4 ++-- src/lib/ffmpeg_decoder.cc | 47 +++++++++++++++++++++------------------------- src/lib/ffmpeg_decoder.h | 5 +++-- src/lib/film_state.cc | 3 ++- src/lib/film_state.h | 2 +- src/lib/j2k_wav_encoder.cc | 21 +++++++++++++++------ src/lib/j2k_wav_encoder.h | 3 ++- src/lib/util.cc | 10 ++++++++++ src/lib/util.h | 2 ++ 9 files changed, 58 insertions(+), 39 deletions(-) (limited to 'src/lib') diff --git a/src/lib/decoder.cc b/src/lib/decoder.cc index 65e5ff722..c0ad85da3 100644 --- a/src/lib/decoder.cc +++ b/src/lib/decoder.cc @@ -112,7 +112,7 @@ Decoder::process_end () in to get it to the right length. */ - int64_t const video_length_in_audio_frames = ((int64_t) _fs->dcp_length() * _fs->target_sample_rate() / _fs->frames_per_second()); + int64_t const video_length_in_audio_frames = ((int64_t) _fs->dcp_length() * _fs->audio_sample_rate() / _fs->frames_per_second()); int64_t const audio_short_by_frames = video_length_in_audio_frames - _audio_frames_processed; _log->log ( @@ -266,7 +266,7 @@ Decoder::emit_audio (uint8_t* data, int size) } /* Update the number of audio frames we've pushed to the encoder */ - _audio_frames_processed += frames; + _audio_frames_processed += audio->frames (); Audio (audio); } diff --git a/src/lib/ffmpeg_decoder.cc b/src/lib/ffmpeg_decoder.cc index c74fee008..5eb32a014 100644 --- a/src/lib/ffmpeg_decoder.cc +++ b/src/lib/ffmpeg_decoder.cc @@ -66,8 +66,6 @@ FFmpegDecoder::FFmpegDecoder (boost::shared_ptr s, boost::share , _audio_codec (0) , _subtitle_codec_context (0) , _subtitle_codec (0) - , _first_video_pts (-1) - , _first_audio_pts (-1) { setup_general (); setup_video (); @@ -213,6 +211,7 @@ bool FFmpegDecoder::do_pass () { int r = av_read_frame (_format_context, &_packet); + if (r < 0) { if (r != AVERROR_EOF) { throw DecodeError ("error on av_read_frame"); @@ -245,10 +244,12 @@ FFmpegDecoder::do_pass () return true; } + double const pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base) * _packet.pts; + if (_packet.stream_index == _video_stream) { - if (_first_video_pts == -1) { - _first_video_pts = _packet.pts; + if (!_first_video) { + _first_video = pts_seconds; } int frame_finished; @@ -256,39 +257,33 @@ FFmpegDecoder::do_pass () process_video (_frame); } - } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && (_first_video_pts != -1 && _packet.pts > _first_video_pts)) { + } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && _first_video && _first_video.get() <= pts_seconds) { - /* Note: We only decode audio if we've had our first video packet through, and if this - packet comes after it. Until then it is thrown away. + /* Note: We only decode audio if we've had our first video packet through, and if it + was before this packet. Until then audio is thrown away. */ - - if (_first_audio_pts == -1) { - _first_audio_pts = _packet.pts; - + + if (!_first_audio) { + _first_audio = pts_seconds; + /* This is our first audio packet, and if we've arrived here we must have had our first video packet. Push some silence to make up the gap between our first video packet and our first audio. */ - AVStream* v = _format_context->streams[_video_stream]; - AVStream* a = _format_context->streams[_audio_stream]; + /* frames of silence that we must push */ + int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ()); - assert (v->time_base.num == a->time_base.num); - assert (v->time_base.den == a->time_base.den); - - /* samples of silence that we must push */ - int const s = rint (av_q2d (v->time_base) * (_first_audio_pts - _first_video_pts) * audio_sample_rate ()); - _log->log ( String::compose ( - "First video at %1, first audio at %2, pushing %3 samples of silence", - _first_video_pts, _first_audio_pts, s + "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)", + _first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample() ) ); - + /* hence bytes */ int const b = s * audio_channels() * bytes_per_audio_sample(); - + /* XXX: this assumes that it won't be too much, and there are shaky assumptions that all sound representations are silent with memset()ed zero data. */ @@ -296,7 +291,7 @@ FFmpegDecoder::do_pass () memset (silence, 0, b); process_audio (silence, b); } - + avcodec_get_frame_defaults (_frame); int frame_finished; @@ -304,11 +299,11 @@ FFmpegDecoder::do_pass () int const data_size = av_samples_get_buffer_size ( 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1 ); - + assert (_audio_codec_context->channels == _fs->audio_channels()); process_audio (_frame->data[0], data_size); } - + } else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles) { int got_subtitle; diff --git a/src/lib/ffmpeg_decoder.h b/src/lib/ffmpeg_decoder.h index ac4cd6fcf..29e06c7d4 100644 --- a/src/lib/ffmpeg_decoder.h +++ b/src/lib/ffmpeg_decoder.h @@ -25,6 +25,7 @@ #include #include #include +#include extern "C" { #include #include @@ -105,6 +106,6 @@ private: AVPacket _packet; - int64_t _first_video_pts; - int64_t _first_audio_pts; + boost::optional _first_video; + boost::optional _first_audio; }; diff --git a/src/lib/film_state.cc b/src/lib/film_state.cc index 283830f59..0d92a16ee 100644 --- a/src/lib/film_state.cc +++ b/src/lib/film_state.cc @@ -348,8 +348,9 @@ FilmState::content_type () const return VIDEO; } +/** @return The sampling rate that we will resample the audio to */ int -FilmState::target_sample_rate () const +FilmState::target_audio_sample_rate () const { /* Resample to a DCI-approved sample rate */ double t = dcp_audio_sample_rate (_audio_sample_rate); diff --git a/src/lib/film_state.h b/src/lib/film_state.h index 8940d0e5d..14a8f7eb5 100644 --- a/src/lib/film_state.h +++ b/src/lib/film_state.h @@ -130,7 +130,7 @@ public: std::string thumb_base (int) const; int thumb_frame (int) const; - int target_sample_rate () const; + int target_audio_sample_rate () const; void write_metadata () const; void read_metadata (); diff --git a/src/lib/j2k_wav_encoder.cc b/src/lib/j2k_wav_encoder.cc index 8747bb7ad..c0c785d11 100644 --- a/src/lib/j2k_wav_encoder.cc +++ b/src/lib/j2k_wav_encoder.cc @@ -49,6 +49,7 @@ J2KWAVEncoder::J2KWAVEncoder (shared_ptr s, shared_ptraudio_sample_rate() != _fs->target_sample_rate()) { + if (_fs->audio_sample_rate() != _fs->target_audio_sample_rate()) { #ifdef HAVE_SWRESAMPLE stringstream s; - s << "Will resample audio from " << _fs->audio_sample_rate() << " to " << _fs->target_sample_rate(); + s << "Will resample audio from " << _fs->audio_sample_rate() << " to " << _fs->target_audio_sample_rate(); _log->log (s.str ()); /* We will be using planar float data when we call the resampler */ @@ -230,7 +231,7 @@ J2KWAVEncoder::process_begin (int64_t audio_channel_layout) 0, audio_channel_layout, AV_SAMPLE_FMT_FLTP, - _fs->target_sample_rate(), + _fs->target_audio_sample_rate(), audio_channel_layout, AV_SAMPLE_FMT_FLTP, _fs->audio_sample_rate(), @@ -321,7 +322,13 @@ J2KWAVEncoder::process_end () swr_free (&_swr_context); } -#endif +#endif + + int const dcp_sr = dcp_audio_sample_rate (_fs->audio_sample_rate ()); + int64_t const extra_audio_frames = dcp_sr - (_audio_frames_written % dcp_sr); + shared_ptr silence (new AudioBuffers (_fs->audio_channels(), extra_audio_frames)); + silence->make_silent (); + write_audio (silence); close_sound_files (); @@ -344,7 +351,7 @@ J2KWAVEncoder::process_audio (shared_ptr audio) if (_swr_context) { /* Compute the resampled frames count and add 32 for luck */ - int const max_resampled_frames = ceil (audio->frames() * _fs->target_sample_rate() / _fs->audio_sample_rate()) + 32; + int const max_resampled_frames = ceil (audio->frames() * _fs->target_audio_sample_rate() / _fs->audio_sample_rate()) + 32; resampled.reset (new AudioBuffers (_fs->audio_channels(), max_resampled_frames)); @@ -368,10 +375,12 @@ J2KWAVEncoder::process_audio (shared_ptr audio) } void -J2KWAVEncoder::write_audio (shared_ptr audio) const +J2KWAVEncoder::write_audio (shared_ptr audio) { for (int i = 0; i < _fs->audio_channels(); ++i) { sf_write_float (_sound_files[i], audio->data(i), audio->frames()); } + + _audio_frames_written += audio->frames (); } diff --git a/src/lib/j2k_wav_encoder.h b/src/lib/j2k_wav_encoder.h index 6733221de..69d445b09 100644 --- a/src/lib/j2k_wav_encoder.h +++ b/src/lib/j2k_wav_encoder.h @@ -57,7 +57,7 @@ public: private: - void write_audio (boost::shared_ptr audio) const; + void write_audio (boost::shared_ptr audio); void encoder_thread (ServerDescription *); void close_sound_files (); void terminate_worker_threads (); @@ -67,6 +67,7 @@ private: #endif std::vector _sound_files; + int64_t _audio_frames_written; bool _process_end; std::list > _queue; diff --git a/src/lib/util.cc b/src/lib/util.cc index 6221b8b62..81dc72b3d 100644 --- a/src/lib/util.cc +++ b/src/lib/util.cc @@ -734,6 +734,16 @@ AudioBuffers::set_frames (int f) _frames = f; } +void +AudioBuffers::make_silent () +{ + for (int i = 0; i < _channels; ++i) { + for (int j = 0; j < _frames; ++j) { + _data[i][j] = 0; + } + } +} + void ensure_ui_thread () { diff --git a/src/lib/util.h b/src/lib/util.h index 2265dfe70..1d9984d9d 100644 --- a/src/lib/util.h +++ b/src/lib/util.h @@ -222,6 +222,8 @@ public: void set_frames (int f); + void make_silent (); + private: /* no copy construction */ AudioBuffers (AudioBuffers const &); -- cgit v1.2.3