+ /* We only decode audio if we've had our first video packet through, and if it
+ was before this packet. Until then audio is thrown away.
+ */
+
+ if (_first_video && _first_video.get() <= source_pts_seconds) {
+
+ if (!_first_audio) {
+ _first_audio = source_pts_seconds;
+
+ /* This is our first audio frame, and if we've arrived here we must have had our
+ first video frame. Push some silence to make up any gap between our first
+ video frame and our first audio.
+ */
+
+ /* frames of silence that we must push */
+ int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ());
+
+ _film->log()->log (
+ String::compose (
+ "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
+ _first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample()
+ )
+ );
+
+ if (s) {
+ /* hence bytes */
+ int const b = s * audio_channels() * bytes_per_audio_sample();
+
+ /* XXX: this assumes that it won't be too much, and there are shaky assumptions
+ that all sound representations are silent with memset()ed zero data.
+ */
+ uint8_t silence[b];
+ memset (silence, 0, b);
+ process_audio (silence, b);
+ }
+ }
+
+ int const data_size = av_samples_get_buffer_size (
+ 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
+ );
+
+ assert (_audio_codec_context->channels == _film->audio_channels());
+ process_audio (_frame->data[0], data_size);
+ }
+ }
+
+ } else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles && _first_video) {