2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
44 #include "exceptions.h"
48 #include "ffmpeg_decoder.h"
49 #include "filter_graph.h"
57 using std::stringstream;
59 using boost::shared_ptr;
60 using boost::optional;
61 using boost::dynamic_pointer_cast;
64 boost::mutex FFmpegDecoder::_mutex;
66 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio, bool subtitles, bool video_sync)
74 , _video_codec_context (0)
76 , _audio_codec_context (0)
78 , _subtitle_codec_context (0)
80 , _decode_video (video)
81 , _decode_audio (audio)
82 , _decode_subtitles (subtitles)
83 , _video_sync (video_sync)
95 FFmpegDecoder::~FFmpegDecoder ()
97 boost::mutex::scoped_lock lm (_mutex);
99 if (_audio_codec_context) {
100 avcodec_close (_audio_codec_context);
103 if (_video_codec_context) {
104 avcodec_close (_video_codec_context);
107 if (_subtitle_codec_context) {
108 avcodec_close (_subtitle_codec_context);
113 avformat_close_input (&_format_context);
117 FFmpegDecoder::setup_general ()
121 if (avformat_open_input (&_format_context, _ffmpeg_content->file().string().c_str(), 0, 0) < 0) {
122 throw OpenFileError (_ffmpeg_content->file().string ());
125 if (avformat_find_stream_info (_format_context, 0) < 0) {
126 throw DecodeError (_("could not find stream information"));
129 /* Find video, audio and subtitle streams */
131 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
132 AVStream* s = _format_context->streams[i];
133 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
135 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
137 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
138 so bodge it here. No idea why we should have to do this.
141 if (s->codec->channel_layout == 0) {
142 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
145 _audio_streams.push_back (
146 FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
149 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
150 _subtitle_streams.push_back (FFmpegSubtitleStream (stream_name (s), i));
154 if (_video_stream < 0) {
155 throw DecodeError (N_("could not find video stream"));
158 _frame = avcodec_alloc_frame ();
160 throw DecodeError (N_("could not allocate frame"));
165 FFmpegDecoder::setup_video ()
167 boost::mutex::scoped_lock lm (_mutex);
169 _video_codec_context = _format_context->streams[_video_stream]->codec;
170 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
172 if (_video_codec == 0) {
173 throw DecodeError (_("could not find video decoder"));
176 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
177 throw DecodeError (N_("could not open video decoder"));
182 FFmpegDecoder::setup_audio ()
184 boost::mutex::scoped_lock lm (_mutex);
186 if (!_ffmpeg_content->audio_stream ()) {
190 _audio_codec_context = _format_context->streams[_ffmpeg_content->audio_stream()->id]->codec;
191 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
193 if (_audio_codec == 0) {
194 throw DecodeError (_("could not find audio decoder"));
197 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
198 throw DecodeError (N_("could not open audio decoder"));
203 FFmpegDecoder::setup_subtitle ()
205 boost::mutex::scoped_lock lm (_mutex);
207 if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->id >= int (_format_context->nb_streams)) {
211 _subtitle_codec_context = _format_context->streams[_ffmpeg_content->subtitle_stream()->id]->codec;
212 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
214 if (_subtitle_codec == 0) {
215 throw DecodeError (_("could not find subtitle decoder"));
218 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
219 throw DecodeError (N_("could not open subtitle decoder"));
225 FFmpegDecoder::pass ()
227 int r = av_read_frame (_format_context, &_packet);
230 if (r != AVERROR_EOF) {
231 /* Maybe we should fail here, but for now we'll just finish off instead */
233 av_strerror (r, buf, sizeof(buf));
234 _film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
237 /* Get any remaining frames */
242 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
247 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
248 filter_and_emit_video (_frame);
252 if (_ffmpeg_content->audio_stream() && _decode_audio) {
253 decode_audio_packet ();
259 avcodec_get_frame_defaults (_frame);
261 if (_packet.stream_index == _video_stream && _decode_video) {
264 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
265 if (r >= 0 && frame_finished) {
267 if (r != _packet.size) {
268 _film->log()->log (String::compose (N_("Used only %1 bytes of %2 in packet"), r, _packet.size));
274 filter_and_emit_video (_frame);
278 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->id && _decode_audio) {
279 decode_audio_packet ();
280 } else if (_ffmpeg_content->subtitle_stream() && _packet.stream_index == _ffmpeg_content->subtitle_stream()->id && _decode_subtitles && _first_video) {
284 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
285 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
286 indicate that the previous subtitle should stop.
288 if (sub.num_rects > 0) {
289 shared_ptr<TimedSubtitle> ts;
291 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
293 /* some problem with the subtitle; we probably didn't understand it */
296 emit_subtitle (shared_ptr<TimedSubtitle> ());
298 avsubtitle_free (&sub);
302 av_free_packet (&_packet);
306 /** @param data pointer to array of pointers to buffers.
307 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
309 shared_ptr<AudioBuffers>
310 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
312 assert (_ffmpeg_content->audio_channels());
313 assert (bytes_per_audio_sample());
315 /* Deinterleave and convert to float */
317 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
319 int const total_samples = size / bytes_per_audio_sample();
320 int const frames = total_samples / _ffmpeg_content->audio_channels();
321 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
323 switch (audio_sample_format()) {
324 case AV_SAMPLE_FMT_S16:
326 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
329 for (int i = 0; i < total_samples; ++i) {
330 audio->data(channel)[sample] = float(*p++) / (1 << 15);
333 if (channel == _ffmpeg_content->audio_channels()) {
341 case AV_SAMPLE_FMT_S16P:
343 int16_t** p = reinterpret_cast<int16_t **> (data);
344 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
345 for (int j = 0; j < frames; ++j) {
346 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
352 case AV_SAMPLE_FMT_S32:
354 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
357 for (int i = 0; i < total_samples; ++i) {
358 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
361 if (channel == _ffmpeg_content->audio_channels()) {
369 case AV_SAMPLE_FMT_FLT:
371 float* p = reinterpret_cast<float*> (data[0]);
374 for (int i = 0; i < total_samples; ++i) {
375 audio->data(channel)[sample] = *p++;
378 if (channel == _ffmpeg_content->audio_channels()) {
386 case AV_SAMPLE_FMT_FLTP:
388 float** p = reinterpret_cast<float**> (data);
389 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
390 memcpy (audio->data(i), p[i], frames * sizeof(float));
396 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
403 FFmpegDecoder::video_frame_rate () const
405 AVStream* s = _format_context->streams[_video_stream];
407 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
408 return av_q2d (s->avg_frame_rate);
411 return av_q2d (s->r_frame_rate);
415 FFmpegDecoder::audio_sample_format () const
417 if (_audio_codec_context == 0) {
418 return (AVSampleFormat) 0;
421 return _audio_codec_context->sample_fmt;
425 FFmpegDecoder::native_size () const
427 return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
431 FFmpegDecoder::pixel_format () const
433 return _video_codec_context->pix_fmt;
437 FFmpegDecoder::time_base_numerator () const
439 return _video_codec_context->time_base.num;
443 FFmpegDecoder::time_base_denominator () const
445 return _video_codec_context->time_base.den;
449 FFmpegDecoder::sample_aspect_ratio_numerator () const
451 return _video_codec_context->sample_aspect_ratio.num;
455 FFmpegDecoder::sample_aspect_ratio_denominator () const
457 return _video_codec_context->sample_aspect_ratio.den;
461 FFmpegDecoder::stream_name (AVStream* s) const
465 AVDictionaryEntry const * lang = av_dict_get (s->metadata, N_("language"), 0, 0);
470 AVDictionaryEntry const * title = av_dict_get (s->metadata, N_("title"), 0, 0);
472 if (!n.str().empty()) {
478 if (n.str().empty()) {
486 FFmpegDecoder::bytes_per_audio_sample () const
488 return av_get_bytes_per_sample (audio_sample_format ());
492 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
494 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
496 shared_ptr<FilterGraph> graph;
498 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
499 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
503 if (i == _filter_graphs.end ()) {
504 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
505 _filter_graphs.push_back (graph);
506 _film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), frame->width, frame->height, frame->format));
511 list<shared_ptr<Image> > images = graph->process (frame);
513 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
514 emit_video (*i, frame_time ());
519 FFmpegDecoder::seek (double p)
521 return do_seek (p, false);
525 FFmpegDecoder::seek_to_last ()
527 /* This AVSEEK_FLAG_BACKWARD in do_seek is a bit of a hack; without it, if we ask for a seek to the same place as last time
528 (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
529 staying in the same place.
531 return do_seek (last_source_time(), true);
535 FFmpegDecoder::do_seek (double p, bool backwards)
537 int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
539 int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
541 avcodec_flush_buffers (_video_codec_context);
542 if (_subtitle_codec_context) {
543 avcodec_flush_buffers (_subtitle_codec_context);
550 FFmpegDecoder::out_with_sync ()
552 /* Where we are in the output, in seconds */
553 double const out_pts_seconds = video_frame() / video_frame_rate();
555 /* Where we are in the source, in seconds */
556 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
557 * av_frame_get_best_effort_timestamp(_frame);
560 String::compose (N_("Source video frame ready; source at %1, output at %2"), source_pts_seconds, out_pts_seconds),
565 _first_video = source_pts_seconds;
568 /* Difference between where we are and where we should be */
569 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
570 double const one_frame = 1 / video_frame_rate();
572 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
573 if (delta > one_frame) {
574 int const extra = rint (delta / one_frame);
575 for (int i = 0; i < extra; ++i) {
576 repeat_last_video (frame_time ());
579 N_("Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)"),
580 out_pts_seconds, video_frame(), source_pts_seconds, video_frame_rate()
586 if (delta > -one_frame) {
587 /* Process this frame */
588 filter_and_emit_video (_frame);
590 /* Otherwise we are omitting a frame to keep things right */
591 _film->log()->log (String::compose (N_("Frame removed at %1s"), out_pts_seconds));
596 FFmpegDecoder::film_changed (Film::Property p)
602 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
603 _filter_graphs.clear ();
612 /** @return Length (in video frames) according to our content's header */
614 FFmpegDecoder::video_length () const
616 return (double(_format_context->duration) / AV_TIME_BASE) * video_frame_rate();
620 FFmpegDecoder::frame_time () const
622 return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);
626 FFmpegDecoder::decode_audio_packet ()
628 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
632 AVPacket copy_packet = _packet;
634 while (copy_packet.size > 0) {
637 int const decode_result = avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, ©_packet);
638 if (decode_result >= 0 && frame_finished) {
640 /* Where we are in the source, in seconds */
641 double const source_pts_seconds = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
642 * av_frame_get_best_effort_timestamp(_frame);
644 /* We only decode audio if we've had our first video packet through, and if it
645 was before this packet. Until then audio is thrown away.
648 if ((_first_video && _first_video.get() <= source_pts_seconds) || !_decode_video) {
650 if (!_first_audio && _decode_video) {
651 _first_audio = source_pts_seconds;
653 /* This is our first audio frame, and if we've arrived here we must have had our
654 first video frame. Push some silence to make up any gap between our first
655 video frame and our first audio.
658 /* frames of silence that we must push */
659 int const s = rint ((_first_audio.get() - _first_video.get()) * _ffmpeg_content->audio_frame_rate ());
663 N_("First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)"),
664 _first_video.get(), _first_audio.get(), s, _ffmpeg_content->audio_channels(), bytes_per_audio_sample()
669 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), s));
670 audio->make_silent ();
675 int const data_size = av_samples_get_buffer_size (
676 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
679 assert (_audio_codec_context->channels == _ffmpeg_content->audio_channels());
680 Audio (deinterleave_audio (_frame->data, data_size));
684 if (decode_result >= 0) {
685 copy_packet.data += decode_result;
686 copy_packet.size -= decode_result;