X-Git-Url: https://git.carlh.net/gitweb/?a=blobdiff_plain;f=src%2Flib%2Fffmpeg_decoder.cc;h=198925bd3a4285a9ed35e50a8ed2fff14de9bef4;hb=b4dd5979b6c1d48b0af4fefd9d1df4e9947da402;hp=ba875711ad53a5db7f5ea02ed410e1297b47568e;hpb=cb8d293cffc078ab49663725f9950a73f121bfe0;p=dcpomatic.git diff --git a/src/lib/ffmpeg_decoder.cc b/src/lib/ffmpeg_decoder.cc index ba875711a..198925bd3 100644 --- a/src/lib/ffmpeg_decoder.cc +++ b/src/lib/ffmpeg_decoder.cc @@ -49,6 +49,7 @@ extern "C" { #include "ffmpeg_decoder.h" #include "subtitle.h" +using std::cout; using std::string; using std::vector; using std::stringstream; @@ -87,8 +88,9 @@ FFmpegDecoder::~FFmpegDecoder () if (_subtitle_codec_context) { avcodec_close (_subtitle_codec_context); } - + av_free (_frame); + avformat_close_input (&_format_context); } @@ -158,7 +160,7 @@ FFmpegDecoder::setup_video () /* I think this prevents problems with green hash on decodes and "changing frame properties on the fly is not supported by all filters" - messages with some content. + messages with some content. Although I'm not sure; needs checking. */ AVDictionary* opts = 0; av_dict_set (&opts, "threads", "1", 0); @@ -216,13 +218,16 @@ FFmpegDecoder::setup_subtitle () bool -FFmpegDecoder::do_pass () +FFmpegDecoder::pass () { int r = av_read_frame (_format_context, &_packet); if (r < 0) { if (r != AVERROR_EOF) { - throw DecodeError ("error on av_read_frame"); + /* Maybe we should fail here, but for now we'll just finish off instead */ + char buf[256]; + av_strerror (r, buf, sizeof(buf)); + _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r)); } /* Get any remaining frames */ @@ -252,64 +257,109 @@ FFmpegDecoder::do_pass () return true; } - double const pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base) * _packet.pts; + avcodec_get_frame_defaults (_frame); if (_packet.stream_index == _video_stream) { - if (!_first_video) { - _first_video = pts_seconds; - } - int frame_finished; - if (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) { - process_video (_frame); - } + int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet); + if (r >= 0 && frame_finished) { - } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio && _first_video && _first_video.get() <= pts_seconds) { + if (r != _packet.size) { + _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size)); + } - /* Note: We only decode audio if we've had our first video packet through, and if it - was before this packet. Until then audio is thrown away. - */ - - if (!_first_audio) { - _first_audio = pts_seconds; - - /* This is our first audio packet, and if we've arrived here we must have had our - first video packet. Push some silence to make up the gap between our first - video packet and our first audio. - */ - - /* frames of silence that we must push */ - int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ()); - - _film->log()->log ( - String::compose ( - "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)", - _first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample() - ) - ); - - /* hence bytes */ - int const b = s * audio_channels() * bytes_per_audio_sample(); - - /* XXX: this assumes that it won't be too much, and there are shaky assumptions - that all sound representations are silent with memset()ed zero data. - */ - uint8_t silence[b]; - memset (silence, 0, b); - process_audio (silence, b); + /* Where we are in the output, in seconds */ + double const out_pts_seconds = video_frame_index() / frames_per_second(); + + /* Where we are in the source, in seconds */ + double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base) + * av_frame_get_best_effort_timestamp(_frame); + + if (!_first_video) { + _first_video = source_pts_seconds; + } + + /* Difference between where we are and where we should be */ + double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds; + double const one_frame = 1 / frames_per_second(); + + /* Insert frames if required to get out_pts_seconds up to pts_seconds */ + if (delta > one_frame) { + int const extra = rint (delta / one_frame); + for (int i = 0; i < extra; ++i) { + repeat_last_video (); + _film->log()->log ( + String::compose ( + "Extra frame inserted at %1s; DCP frame %2, source PTS %3", + out_pts_seconds, video_frame_index(), source_pts_seconds + ) + ); + } + } + + if (delta > -one_frame) { + /* Process this frame */ + process_video (_frame); + } else { + /* Otherwise we are omitting a frame to keep things right */ + _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds)); + } } - - avcodec_get_frame_defaults (_frame); - + + } else if (_audio_stream >= 0 && _packet.stream_index == _audio_stream && _opt->decode_audio) { + int frame_finished; if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) { - int const data_size = av_samples_get_buffer_size ( - 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1 - ); + + /* Where we are in the source, in seconds */ + double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base) + * av_frame_get_best_effort_timestamp(_frame); + + /* We only decode audio if we've had our first video packet through, and if it + was before this packet. Until then audio is thrown away. + */ + + if (_first_video && _first_video.get() <= source_pts_seconds) { + + if (!_first_audio) { + _first_audio = source_pts_seconds; + + /* This is our first audio frame, and if we've arrived here we must have had our + first video frame. Push some silence to make up any gap between our first + video frame and our first audio. + */ - assert (_audio_codec_context->channels == _film->audio_channels()); - process_audio (_frame->data[0], data_size); + /* frames of silence that we must push */ + int const s = rint ((_first_audio.get() - _first_video.get()) * audio_sample_rate ()); + + _film->log()->log ( + String::compose ( + "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)", + _first_video.get(), _first_audio.get(), s, audio_channels(), bytes_per_audio_sample() + ) + ); + + if (s) { + /* hence bytes */ + int const b = s * audio_channels() * bytes_per_audio_sample(); + + /* XXX: this assumes that it won't be too much, and there are shaky assumptions + that all sound representations are silent with memset()ed zero data. + */ + uint8_t silence[b]; + memset (silence, 0, b); + process_audio (silence, b); + } + } + + int const data_size = av_samples_get_buffer_size ( + 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1 + ); + + assert (_audio_codec_context->channels == _film->audio_channels()); + process_audio (_frame->data[0], data_size); + } } } else if (_subtitle_stream >= 0 && _packet.stream_index == _subtitle_stream && _opt->decode_subtitles && _first_video) {