2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
58 using std::stringstream;
60 using boost::shared_ptr;
61 using boost::optional;
62 using boost::dynamic_pointer_cast;
65 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, DecodeOptions o)
72 , _video_codec_context (0)
74 , _audio_codec_context (0)
76 , _subtitle_codec_context (0)
89 FFmpegDecoder::~FFmpegDecoder ()
91 if (_audio_codec_context) {
92 avcodec_close (_audio_codec_context);
95 if (_video_codec_context) {
96 avcodec_close (_video_codec_context);
99 if (_subtitle_codec_context) {
100 avcodec_close (_subtitle_codec_context);
105 avformat_close_input (&_format_context);
109 FFmpegDecoder::setup_general ()
113 if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
114 throw OpenFileError (_film->content_path ());
117 if (avformat_find_stream_info (_format_context, 0) < 0) {
118 throw DecodeError (_("could not find stream information"));
121 /* Find video, audio and subtitle streams and choose the first of each */
123 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
124 AVStream* s = _format_context->streams[i];
125 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
127 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
129 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
130 so bodge it here. No idea why we should have to do this.
133 if (s->codec->channel_layout == 0) {
134 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
137 _audio_streams.push_back (
138 shared_ptr<AudioStream> (
139 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
143 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
144 _subtitle_streams.push_back (
145 shared_ptr<SubtitleStream> (
146 new SubtitleStream (stream_name (s), i)
152 if (_video_stream < 0) {
153 throw DecodeError (N_("could not find video stream"));
156 _frame = avcodec_alloc_frame ();
158 throw DecodeError (N_("could not allocate frame"));
163 FFmpegDecoder::setup_video ()
165 _video_codec_context = _format_context->streams[_video_stream]->codec;
166 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
168 if (_video_codec == 0) {
169 throw DecodeError (_("could not find video decoder"));
172 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
173 throw DecodeError (N_("could not open video decoder"));
178 FFmpegDecoder::setup_audio ()
180 if (!_audio_stream) {
184 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
187 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
188 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
190 if (_audio_codec == 0) {
191 throw DecodeError (_("could not find audio decoder"));
194 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
195 throw DecodeError (N_("could not open audio decoder"));
200 FFmpegDecoder::setup_subtitle ()
202 if (!_subtitle_stream || _subtitle_stream->id() >= int (_format_context->nb_streams)) {
206 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
207 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
209 if (_subtitle_codec == 0) {
210 throw DecodeError (_("could not find subtitle decoder"));
213 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
214 throw DecodeError (N_("could not open subtitle decoder"));
220 FFmpegDecoder::pass ()
222 int r = av_read_frame (_format_context, &_packet);
225 if (r != AVERROR_EOF) {
226 /* Maybe we should fail here, but for now we'll just finish off instead */
228 av_strerror (r, buf, sizeof(buf));
229 _film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
232 /* Get any remaining frames */
237 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
241 if (_opt.decode_video) {
242 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
243 filter_and_emit_video (_frame);
247 if (_audio_stream && _opt.decode_audio) {
248 decode_audio_packet ();
254 avcodec_get_frame_defaults (_frame);
256 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
258 if (_packet.stream_index == _video_stream && _opt.decode_video) {
261 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
262 if (r >= 0 && frame_finished) {
264 if (r != _packet.size) {
265 _film->log()->log (String::compose (N_("Used only %1 bytes of %2 in packet"), r, _packet.size));
268 if (_opt.video_sync) {
271 filter_and_emit_video (_frame);
275 } else if (ffa && _packet.stream_index == ffa->id() && _opt.decode_audio) {
276 decode_audio_packet ();
277 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt.decode_subtitles && _first_video) {
281 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
282 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
283 indicate that the previous subtitle should stop.
285 if (sub.num_rects > 0) {
286 shared_ptr<TimedSubtitle> ts;
288 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
290 /* some problem with the subtitle; we probably didn't understand it */
293 emit_subtitle (shared_ptr<TimedSubtitle> ());
295 avsubtitle_free (&sub);
299 av_free_packet (&_packet);
303 /** @param data pointer to array of pointers to buffers.
304 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
306 shared_ptr<AudioBuffers>
307 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
309 assert (_film->audio_channels());
310 assert (bytes_per_audio_sample());
312 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
315 /* Deinterleave and convert to float */
317 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
319 int const total_samples = size / bytes_per_audio_sample();
320 int const frames = total_samples / _film->audio_channels();
321 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
323 switch (audio_sample_format()) {
324 case AV_SAMPLE_FMT_S16:
326 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
329 for (int i = 0; i < total_samples; ++i) {
330 audio->data(channel)[sample] = float(*p++) / (1 << 15);
333 if (channel == _film->audio_channels()) {
341 case AV_SAMPLE_FMT_S16P:
343 int16_t** p = reinterpret_cast<int16_t **> (data);
344 for (int i = 0; i < _film->audio_channels(); ++i) {
345 for (int j = 0; j < frames; ++j) {
346 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
352 case AV_SAMPLE_FMT_S32:
354 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
357 for (int i = 0; i < total_samples; ++i) {
358 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
361 if (channel == _film->audio_channels()) {
369 case AV_SAMPLE_FMT_FLT:
371 float* p = reinterpret_cast<float*> (data[0]);
374 for (int i = 0; i < total_samples; ++i) {
375 audio->data(channel)[sample] = *p++;
378 if (channel == _film->audio_channels()) {
386 case AV_SAMPLE_FMT_FLTP:
388 float** p = reinterpret_cast<float**> (data);
389 for (int i = 0; i < _film->audio_channels(); ++i) {
390 memcpy (audio->data(i), p[i], frames * sizeof(float));
396 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
403 FFmpegDecoder::frames_per_second () const
405 AVStream* s = _format_context->streams[_video_stream];
407 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
408 return av_q2d (s->avg_frame_rate);
411 return av_q2d (s->r_frame_rate);
415 FFmpegDecoder::audio_sample_format () const
417 if (_audio_codec_context == 0) {
418 return (AVSampleFormat) 0;
421 return _audio_codec_context->sample_fmt;
425 FFmpegDecoder::native_size () const
427 return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
431 FFmpegDecoder::pixel_format () const
433 return _video_codec_context->pix_fmt;
437 FFmpegDecoder::time_base_numerator () const
439 return _video_codec_context->time_base.num;
443 FFmpegDecoder::time_base_denominator () const
445 return _video_codec_context->time_base.den;
449 FFmpegDecoder::sample_aspect_ratio_numerator () const
451 return _video_codec_context->sample_aspect_ratio.num;
455 FFmpegDecoder::sample_aspect_ratio_denominator () const
457 return _video_codec_context->sample_aspect_ratio.den;
461 FFmpegDecoder::stream_name (AVStream* s) const
465 AVDictionaryEntry const * lang = av_dict_get (s->metadata, N_("language"), 0, 0);
470 AVDictionaryEntry const * title = av_dict_get (s->metadata, N_("title"), 0, 0);
472 if (!n.str().empty()) {
478 if (n.str().empty()) {
486 FFmpegDecoder::bytes_per_audio_sample () const
488 return av_get_bytes_per_sample (audio_sample_format ());
492 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
494 AudioDecoder::set_audio_stream (s);
499 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
501 VideoDecoder::set_subtitle_stream (s);
507 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
509 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
511 shared_ptr<FilterGraph> graph;
513 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
514 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
518 if (i == _filter_graphs.end ()) {
519 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
520 _filter_graphs.push_back (graph);
521 _film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), frame->width, frame->height, frame->format));
526 list<shared_ptr<Image> > images = graph->process (frame);
528 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
529 emit_video (*i, frame_time ());
534 FFmpegDecoder::seek (double p)
536 return do_seek (p, false);
540 FFmpegDecoder::seek_to_last ()
542 /* This AVSEEK_FLAG_BACKWARD in do_seek is a bit of a hack; without it, if we ask for a seek to the same place as last time
543 (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
544 staying in the same place.
546 return do_seek (last_source_time(), true);
550 FFmpegDecoder::do_seek (double p, bool backwards)
552 int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
554 int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
556 avcodec_flush_buffers (_video_codec_context);
557 if (_subtitle_codec_context) {
558 avcodec_flush_buffers (_subtitle_codec_context);
564 shared_ptr<FFmpegAudioStream>
565 FFmpegAudioStream::create (string t, optional<int> v)
568 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
569 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
575 if (type != N_("ffmpeg")) {
576 return shared_ptr<FFmpegAudioStream> ();
579 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
582 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
590 n >> _id >> channels;
591 _channel_layout = av_get_default_channel_layout (channels);
595 /* Current (marked version 1) */
596 n >> type >> _id >> _sample_rate >> _channel_layout;
597 assert (type == N_("ffmpeg"));
600 for (int i = 0; i < name_index; ++i) {
601 size_t const s = t.find (' ');
602 if (s != string::npos) {
603 t = t.substr (s + 1);
611 FFmpegAudioStream::to_string () const
613 return String::compose (N_("ffmpeg %1 %2 %3 %4"), _id, _sample_rate, _channel_layout, _name);
617 FFmpegDecoder::out_with_sync ()
619 /* Where we are in the output, in seconds */
620 double const out_pts_seconds = video_frame() / frames_per_second();
622 /* Where we are in the source, in seconds */
623 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
624 * av_frame_get_best_effort_timestamp(_frame);
627 String::compose (N_("Source video frame ready; source at %1, output at %2"), source_pts_seconds, out_pts_seconds),
632 _first_video = source_pts_seconds;
635 /* Difference between where we are and where we should be */
636 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
637 double const one_frame = 1 / frames_per_second();
639 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
640 if (delta > one_frame) {
641 int const extra = rint (delta / one_frame);
642 for (int i = 0; i < extra; ++i) {
643 repeat_last_video ();
646 N_("Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)"),
647 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
653 if (delta > -one_frame) {
654 /* Process this frame */
655 filter_and_emit_video (_frame);
657 /* Otherwise we are omitting a frame to keep things right */
658 _film->log()->log (String::compose (N_("Frame removed at %1s"), out_pts_seconds));
663 FFmpegDecoder::film_changed (Film::Property p)
669 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
670 _filter_graphs.clear ();
680 /** @return Length (in video frames) according to our content's header */
682 FFmpegDecoder::length () const
684 return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
688 FFmpegDecoder::frame_time () const
690 return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);
694 FFmpegDecoder::decode_audio_packet ()
696 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
699 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
703 AVPacket copy_packet = _packet;
705 while (copy_packet.size > 0) {
708 int const decode_result = avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, ©_packet);
709 if (decode_result >= 0 && frame_finished) {
711 /* Where we are in the source, in seconds */
712 double const source_pts_seconds = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
713 * av_frame_get_best_effort_timestamp(_frame);
715 /* We only decode audio if we've had our first video packet through, and if it
716 was before this packet. Until then audio is thrown away.
719 if ((_first_video && _first_video.get() <= source_pts_seconds) || !_opt.decode_video) {
721 if (!_first_audio && _opt.decode_video) {
722 _first_audio = source_pts_seconds;
724 /* This is our first audio frame, and if we've arrived here we must have had our
725 first video frame. Push some silence to make up any gap between our first
726 video frame and our first audio.
729 /* frames of silence that we must push */
730 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
734 N_("First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)"),
735 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
740 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
741 audio->make_silent ();
746 int const data_size = av_samples_get_buffer_size (
747 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
750 assert (_audio_codec_context->channels == _film->audio_channels());
751 Audio (deinterleave_audio (_frame->data, data_size));
755 if (decode_result >= 0) {
756 copy_packet.data += decode_result;
757 copy_packet.size -= decode_result;