2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
44 #include "exceptions.h"
48 #include "ffmpeg_decoder.h"
49 #include "filter_graph.h"
57 using std::stringstream;
59 using boost::shared_ptr;
60 using boost::optional;
61 using boost::dynamic_pointer_cast;
64 boost::mutex FFmpegDecoder::_mutex;
66 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio, bool subtitles, bool video_sync)
74 , _video_codec_context (0)
76 , _audio_codec_context (0)
78 , _subtitle_codec_context (0)
80 , _decode_video (video)
81 , _decode_audio (audio)
82 , _decode_subtitles (subtitles)
83 , _video_sync (video_sync)
95 FFmpegDecoder::~FFmpegDecoder ()
97 boost::mutex::scoped_lock lm (_mutex);
99 if (_audio_codec_context) {
100 avcodec_close (_audio_codec_context);
103 if (_video_codec_context) {
104 avcodec_close (_video_codec_context);
107 if (_subtitle_codec_context) {
108 avcodec_close (_subtitle_codec_context);
113 avformat_close_input (&_format_context);
117 FFmpegDecoder::setup_general ()
121 if (avformat_open_input (&_format_context, _ffmpeg_content->file().string().c_str(), 0, 0) < 0) {
122 throw OpenFileError (_ffmpeg_content->file().string ());
125 if (avformat_find_stream_info (_format_context, 0) < 0) {
126 throw DecodeError (_("could not find stream information"));
129 /* Find video, audio and subtitle streams */
131 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
132 AVStream* s = _format_context->streams[i];
133 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
135 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
137 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
138 so bodge it here. No idea why we should have to do this.
141 if (s->codec->channel_layout == 0) {
142 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
145 _audio_streams.push_back (
146 FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
149 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
150 _subtitle_streams.push_back (FFmpegSubtitleStream (stream_name (s), i));
154 if (_video_stream < 0) {
155 throw DecodeError (N_("could not find video stream"));
158 _frame = avcodec_alloc_frame ();
160 throw DecodeError (N_("could not allocate frame"));
165 FFmpegDecoder::setup_video ()
167 boost::mutex::scoped_lock lm (_mutex);
169 _video_codec_context = _format_context->streams[_video_stream]->codec;
170 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
172 if (_video_codec == 0) {
173 throw DecodeError (_("could not find video decoder"));
176 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
177 throw DecodeError (N_("could not open video decoder"));
182 FFmpegDecoder::setup_audio ()
184 boost::mutex::scoped_lock lm (_mutex);
186 if (!_ffmpeg_content->audio_stream ()) {
190 _audio_codec_context = _format_context->streams[_ffmpeg_content->audio_stream()->id]->codec;
191 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
193 if (_audio_codec == 0) {
194 throw DecodeError (_("could not find audio decoder"));
197 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
198 throw DecodeError (N_("could not open audio decoder"));
203 FFmpegDecoder::setup_subtitle ()
205 boost::mutex::scoped_lock lm (_mutex);
207 if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->id >= int (_format_context->nb_streams)) {
211 _subtitle_codec_context = _format_context->streams[_ffmpeg_content->subtitle_stream()->id]->codec;
212 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
214 if (_subtitle_codec == 0) {
215 throw DecodeError (_("could not find subtitle decoder"));
218 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
219 throw DecodeError (N_("could not open subtitle decoder"));
225 FFmpegDecoder::pass ()
227 int r = av_read_frame (_format_context, &_packet);
230 if (r != AVERROR_EOF) {
231 /* Maybe we should fail here, but for now we'll just finish off instead */
233 av_strerror (r, buf, sizeof(buf));
234 _film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
237 /* Get any remaining frames */
242 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
247 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
248 filter_and_emit_video (_frame);
252 if (_ffmpeg_content->audio_stream() && _decode_audio) {
253 decode_audio_packet ();
259 avcodec_get_frame_defaults (_frame);
261 if (_packet.stream_index == _video_stream && _decode_video) {
264 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
265 if (r >= 0 && frame_finished) {
267 if (r != _packet.size) {
268 _film->log()->log (String::compose (N_("Used only %1 bytes of %2 in packet"), r, _packet.size));
274 filter_and_emit_video (_frame);
278 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->id && _decode_audio) {
279 decode_audio_packet ();
280 } else if (_ffmpeg_content->subtitle_stream() && _packet.stream_index == _ffmpeg_content->subtitle_stream()->id && _decode_subtitles && _first_video) {
284 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
285 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
286 indicate that the previous subtitle should stop.
288 if (sub.num_rects > 0) {
289 shared_ptr<TimedSubtitle> ts;
291 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
293 /* some problem with the subtitle; we probably didn't understand it */
296 emit_subtitle (shared_ptr<TimedSubtitle> ());
298 avsubtitle_free (&sub);
302 av_free_packet (&_packet);
306 /** @param data pointer to array of pointers to buffers.
307 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
309 shared_ptr<AudioBuffers>
310 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
312 assert (_ffmpeg_content->audio_channels());
313 assert (bytes_per_audio_sample());
315 /* Deinterleave and convert to float */
317 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
319 int const total_samples = size / bytes_per_audio_sample();
320 int const frames = total_samples / _ffmpeg_content->audio_channels();
321 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
323 switch (audio_sample_format()) {
324 case AV_SAMPLE_FMT_S16:
326 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
329 for (int i = 0; i < total_samples; ++i) {
330 audio->data(channel)[sample] = float(*p++) / (1 << 15);
333 if (channel == _ffmpeg_content->audio_channels()) {
341 case AV_SAMPLE_FMT_S16P:
343 int16_t** p = reinterpret_cast<int16_t **> (data);
344 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
345 for (int j = 0; j < frames; ++j) {
346 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
352 case AV_SAMPLE_FMT_S32:
354 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
357 for (int i = 0; i < total_samples; ++i) {
358 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
361 if (channel == _ffmpeg_content->audio_channels()) {
369 case AV_SAMPLE_FMT_FLT:
371 float* p = reinterpret_cast<float*> (data[0]);
374 for (int i = 0; i < total_samples; ++i) {
375 audio->data(channel)[sample] = *p++;
378 if (channel == _ffmpeg_content->audio_channels()) {
386 case AV_SAMPLE_FMT_FLTP:
388 float** p = reinterpret_cast<float**> (data);
389 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
390 memcpy (audio->data(i), p[i], frames * sizeof(float));
396 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
403 FFmpegDecoder::video_frame_rate () const
405 AVStream* s = _format_context->streams[_video_stream];
407 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
408 return av_q2d (s->avg_frame_rate);
411 return av_q2d (s->r_frame_rate);
415 FFmpegDecoder::audio_sample_format () const
417 if (_audio_codec_context == 0) {
418 return (AVSampleFormat) 0;
421 return _audio_codec_context->sample_fmt;
425 FFmpegDecoder::native_size () const
427 return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
431 FFmpegDecoder::pixel_format () const
433 return _video_codec_context->pix_fmt;
437 FFmpegDecoder::time_base_numerator () const
439 return _video_codec_context->time_base.num;
443 FFmpegDecoder::time_base_denominator () const
445 return _video_codec_context->time_base.den;
449 FFmpegDecoder::sample_aspect_ratio_numerator () const
451 return _video_codec_context->sample_aspect_ratio.num;
455 FFmpegDecoder::sample_aspect_ratio_denominator () const
457 return _video_codec_context->sample_aspect_ratio.den;
461 FFmpegDecoder::stream_name (AVStream* s) const
465 AVDictionaryEntry const * lang = av_dict_get (s->metadata, N_("language"), 0, 0);
470 AVDictionaryEntry const * title = av_dict_get (s->metadata, N_("title"), 0, 0);
472 if (!n.str().empty()) {
478 if (n.str().empty()) {
486 FFmpegDecoder::bytes_per_audio_sample () const
488 return av_get_bytes_per_sample (audio_sample_format ());
492 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
494 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
496 shared_ptr<FilterGraph> graph;
498 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
499 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
503 if (i == _filter_graphs.end ()) {
504 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
505 _filter_graphs.push_back (graph);
506 _film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), frame->width, frame->height, frame->format));
511 list<shared_ptr<Image> > images = graph->process (frame);
513 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
514 emit_video (*i, frame_time ());
519 FFmpegDecoder::seek (double p)
521 /* This use of AVSEEK_FLAG_BACKWARD is a bit of a hack; without it, if we ask for a seek to the same place as last time
522 (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
523 staying in the same place.
525 bool const backwards = (p == last_content_time());
527 int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
529 int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
531 avcodec_flush_buffers (_video_codec_context);
532 if (_subtitle_codec_context) {
533 avcodec_flush_buffers (_subtitle_codec_context);
540 FFmpegDecoder::out_with_sync ()
542 /* Where we are in the output, in seconds */
543 double const out_pts_seconds = video_frame() / video_frame_rate();
545 /* Where we are in the source, in seconds */
546 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
547 * av_frame_get_best_effort_timestamp(_frame);
550 String::compose (N_("Source video frame ready; source at %1, output at %2"), source_pts_seconds, out_pts_seconds),
555 _first_video = source_pts_seconds;
558 /* Difference between where we are and where we should be */
559 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
560 double const one_frame = 1 / video_frame_rate();
562 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
563 if (delta > one_frame) {
564 int const extra = rint (delta / one_frame);
565 for (int i = 0; i < extra; ++i) {
566 repeat_last_video (frame_time ());
569 N_("Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)"),
570 out_pts_seconds, video_frame(), source_pts_seconds, video_frame_rate()
576 if (delta > -one_frame) {
577 /* Process this frame */
578 filter_and_emit_video (_frame);
580 /* Otherwise we are omitting a frame to keep things right */
581 _film->log()->log (String::compose (N_("Frame removed at %1s"), out_pts_seconds));
586 FFmpegDecoder::film_changed (Film::Property p)
592 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
593 _filter_graphs.clear ();
602 /** @return Length (in video frames) according to our content's header */
604 FFmpegDecoder::video_length () const
606 return (double(_format_context->duration) / AV_TIME_BASE) * video_frame_rate();
610 FFmpegDecoder::frame_time () const
612 return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);
616 FFmpegDecoder::decode_audio_packet ()
618 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
622 AVPacket copy_packet = _packet;
624 while (copy_packet.size > 0) {
627 int const decode_result = avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, ©_packet);
628 if (decode_result >= 0 && frame_finished) {
630 /* Where we are in the source, in seconds */
631 double const source_pts_seconds = av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
632 * av_frame_get_best_effort_timestamp(_frame);
634 /* We only decode audio if we've had our first video packet through, and if it
635 was before this packet. Until then audio is thrown away.
638 if ((_first_video && _first_video.get() <= source_pts_seconds) || !_decode_video) {
640 if (!_first_audio && _decode_video) {
641 _first_audio = source_pts_seconds;
643 /* This is our first audio frame, and if we've arrived here we must have had our
644 first video frame. Push some silence to make up any gap between our first
645 video frame and our first audio.
648 /* frames of silence that we must push */
649 int const s = rint ((_first_audio.get() - _first_video.get()) * _ffmpeg_content->audio_frame_rate ());
653 N_("First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)"),
654 _first_video.get(), _first_audio.get(), s, _ffmpeg_content->audio_channels(), bytes_per_audio_sample()
659 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), s));
660 audio->make_silent ();
665 int const data_size = av_samples_get_buffer_size (
666 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
669 assert (_audio_codec_context->channels == _ffmpeg_content->audio_channels());
670 Audio (deinterleave_audio (_frame->data, data_size));
674 if (decode_result >= 0) {
675 copy_packet.data += decode_result;
676 copy_packet.size -= decode_result;