2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
58 using std::stringstream;
60 using boost::shared_ptr;
61 using boost::optional;
62 using boost::dynamic_pointer_cast;
65 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, DecodeOptions o)
72 , _video_codec_context (0)
74 , _audio_codec_context (0)
76 , _subtitle_codec_context (0)
89 FFmpegDecoder::~FFmpegDecoder ()
91 if (_audio_codec_context) {
92 avcodec_close (_audio_codec_context);
95 if (_video_codec_context) {
96 avcodec_close (_video_codec_context);
99 if (_subtitle_codec_context) {
100 avcodec_close (_subtitle_codec_context);
105 avformat_close_input (&_format_context);
109 FFmpegDecoder::setup_general ()
113 if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
114 throw OpenFileError (_film->content_path ());
117 if (avformat_find_stream_info (_format_context, 0) < 0) {
118 throw DecodeError (_("could not find stream information"));
121 /* Find video, audio and subtitle streams and choose the first of each */
123 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
124 AVStream* s = _format_context->streams[i];
125 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
127 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
129 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
130 so bodge it here. No idea why we should have to do this.
133 if (s->codec->channel_layout == 0) {
134 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
137 _audio_streams.push_back (
138 shared_ptr<AudioStream> (
139 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
143 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
144 _subtitle_streams.push_back (
145 shared_ptr<SubtitleStream> (
146 new SubtitleStream (stream_name (s), i)
152 if (_video_stream < 0) {
153 throw DecodeError (N_("could not find video stream"));
156 _frame = avcodec_alloc_frame ();
158 throw DecodeError (N_("could not allocate frame"));
163 FFmpegDecoder::setup_video ()
165 _video_codec_context = _format_context->streams[_video_stream]->codec;
166 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
168 if (_video_codec == 0) {
169 throw DecodeError (_("could not find video decoder"));
172 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
173 throw DecodeError (N_("could not open video decoder"));
178 FFmpegDecoder::setup_audio ()
180 if (!_audio_stream) {
184 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
187 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
188 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
190 if (_audio_codec == 0) {
191 throw DecodeError (_("could not find audio decoder"));
194 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
195 throw DecodeError (N_("could not open audio decoder"));
200 FFmpegDecoder::setup_subtitle ()
202 if (!_subtitle_stream || _subtitle_stream->id() >= int (_format_context->nb_streams)) {
206 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
207 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
209 if (_subtitle_codec == 0) {
210 throw DecodeError (_("could not find subtitle decoder"));
213 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
214 throw DecodeError (N_("could not open subtitle decoder"));
220 FFmpegDecoder::pass ()
222 int r = av_read_frame (_format_context, &_packet);
225 if (r != AVERROR_EOF) {
226 /* Maybe we should fail here, but for now we'll just finish off instead */
228 av_strerror (r, buf, sizeof(buf));
229 _film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
232 /* Get any remaining frames */
237 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
241 if (_opt.decode_video) {
242 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
243 filter_and_emit_video (_frame);
247 if (_audio_stream && _opt.decode_audio) {
248 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
249 int const data_size = av_samples_get_buffer_size (
250 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
253 assert (_audio_codec_context->channels == _film->audio_channels());
254 Audio (deinterleave_audio (_frame->data, data_size));
261 avcodec_get_frame_defaults (_frame);
263 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
265 if (_packet.stream_index == _video_stream && _opt.decode_video) {
268 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
269 if (r >= 0 && frame_finished) {
271 if (r != _packet.size) {
272 _film->log()->log (String::compose (N_("Used only %1 bytes of %2 in packet"), r, _packet.size));
275 if (_opt.video_sync) {
278 filter_and_emit_video (_frame);
282 } else if (ffa && _packet.stream_index == ffa->id() && _opt.decode_audio) {
285 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
287 /* Where we are in the source, in seconds */
288 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
289 * av_frame_get_best_effort_timestamp(_frame);
291 /* We only decode audio if we've had our first video packet through, and if it
292 was before this packet. Until then audio is thrown away.
295 if ((_first_video && _first_video.get() <= source_pts_seconds) || !_opt.decode_video) {
297 if (!_first_audio && _opt.decode_video) {
298 _first_audio = source_pts_seconds;
300 /* This is our first audio frame, and if we've arrived here we must have had our
301 first video frame. Push some silence to make up any gap between our first
302 video frame and our first audio.
305 /* frames of silence that we must push */
306 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
310 N_("First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)"),
311 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
316 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
317 audio->make_silent ();
322 int const data_size = av_samples_get_buffer_size (
323 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
326 assert (_audio_codec_context->channels == _film->audio_channels());
327 Audio (deinterleave_audio (_frame->data, data_size));
331 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt.decode_subtitles && _first_video) {
335 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
336 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
337 indicate that the previous subtitle should stop.
339 if (sub.num_rects > 0) {
340 shared_ptr<TimedSubtitle> ts;
342 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
344 /* some problem with the subtitle; we probably didn't understand it */
347 emit_subtitle (shared_ptr<TimedSubtitle> ());
349 avsubtitle_free (&sub);
353 av_free_packet (&_packet);
357 /** @param data pointer to array of pointers to buffers.
358 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
360 shared_ptr<AudioBuffers>
361 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
363 assert (_film->audio_channels());
364 assert (bytes_per_audio_sample());
366 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
369 /* Deinterleave and convert to float */
371 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
373 int const total_samples = size / bytes_per_audio_sample();
374 int const frames = total_samples / _film->audio_channels();
375 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
377 switch (audio_sample_format()) {
378 case AV_SAMPLE_FMT_S16:
380 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
383 for (int i = 0; i < total_samples; ++i) {
384 audio->data(channel)[sample] = float(*p++) / (1 << 15);
387 if (channel == _film->audio_channels()) {
395 case AV_SAMPLE_FMT_S16P:
397 int16_t** p = reinterpret_cast<int16_t **> (data);
398 for (int i = 0; i < _film->audio_channels(); ++i) {
399 for (int j = 0; j < frames; ++j) {
400 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
406 case AV_SAMPLE_FMT_S32:
408 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
411 for (int i = 0; i < total_samples; ++i) {
412 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
415 if (channel == _film->audio_channels()) {
423 case AV_SAMPLE_FMT_FLT:
425 float* p = reinterpret_cast<float*> (data[0]);
428 for (int i = 0; i < total_samples; ++i) {
429 audio->data(channel)[sample] = *p++;
432 if (channel == _film->audio_channels()) {
440 case AV_SAMPLE_FMT_FLTP:
442 float** p = reinterpret_cast<float**> (data);
443 for (int i = 0; i < _film->audio_channels(); ++i) {
444 memcpy (audio->data(i), p[i], frames * sizeof(float));
450 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
457 FFmpegDecoder::frames_per_second () const
459 AVStream* s = _format_context->streams[_video_stream];
461 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
462 return av_q2d (s->avg_frame_rate);
465 return av_q2d (s->r_frame_rate);
469 FFmpegDecoder::audio_sample_format () const
471 if (_audio_codec_context == 0) {
472 return (AVSampleFormat) 0;
475 return _audio_codec_context->sample_fmt;
479 FFmpegDecoder::native_size () const
481 return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
485 FFmpegDecoder::pixel_format () const
487 return _video_codec_context->pix_fmt;
491 FFmpegDecoder::time_base_numerator () const
493 return _video_codec_context->time_base.num;
497 FFmpegDecoder::time_base_denominator () const
499 return _video_codec_context->time_base.den;
503 FFmpegDecoder::sample_aspect_ratio_numerator () const
505 return _video_codec_context->sample_aspect_ratio.num;
509 FFmpegDecoder::sample_aspect_ratio_denominator () const
511 return _video_codec_context->sample_aspect_ratio.den;
515 FFmpegDecoder::stream_name (AVStream* s) const
519 AVDictionaryEntry const * lang = av_dict_get (s->metadata, N_("language"), 0, 0);
524 AVDictionaryEntry const * title = av_dict_get (s->metadata, N_("title"), 0, 0);
526 if (!n.str().empty()) {
532 if (n.str().empty()) {
540 FFmpegDecoder::bytes_per_audio_sample () const
542 return av_get_bytes_per_sample (audio_sample_format ());
546 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
548 AudioDecoder::set_audio_stream (s);
553 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
555 VideoDecoder::set_subtitle_stream (s);
561 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
563 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
565 shared_ptr<FilterGraph> graph;
567 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
568 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
572 if (i == _filter_graphs.end ()) {
573 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
574 _filter_graphs.push_back (graph);
575 _film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), frame->width, frame->height, frame->format));
580 list<shared_ptr<Image> > images = graph->process (frame);
582 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
583 emit_video (*i, frame_time ());
588 FFmpegDecoder::seek (double p)
590 return do_seek (p, false);
594 FFmpegDecoder::seek_to_last ()
596 /* This AVSEEK_FLAG_BACKWARD in do_seek is a bit of a hack; without it, if we ask for a seek to the same place as last time
597 (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
598 staying in the same place.
600 return do_seek (last_source_time(), true);
604 FFmpegDecoder::do_seek (double p, bool backwards)
606 int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
608 int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
610 avcodec_flush_buffers (_video_codec_context);
611 if (_subtitle_codec_context) {
612 avcodec_flush_buffers (_subtitle_codec_context);
618 shared_ptr<FFmpegAudioStream>
619 FFmpegAudioStream::create (string t, optional<int> v)
622 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
623 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
629 if (type != N_("ffmpeg")) {
630 return shared_ptr<FFmpegAudioStream> ();
633 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
636 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
644 n >> _id >> channels;
645 _channel_layout = av_get_default_channel_layout (channels);
649 /* Current (marked version 1) */
650 n >> type >> _id >> _sample_rate >> _channel_layout;
651 assert (type == N_("ffmpeg"));
654 for (int i = 0; i < name_index; ++i) {
655 size_t const s = t.find (' ');
656 if (s != string::npos) {
657 t = t.substr (s + 1);
665 FFmpegAudioStream::to_string () const
667 return String::compose (N_("ffmpeg %1 %2 %3 %4"), _id, _sample_rate, _channel_layout, _name);
671 FFmpegDecoder::out_with_sync ()
673 /* Where we are in the output, in seconds */
674 double const out_pts_seconds = video_frame() / frames_per_second();
676 /* Where we are in the source, in seconds */
677 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
678 * av_frame_get_best_effort_timestamp(_frame);
681 String::compose (N_("Source video frame ready; source at %1, output at %2"), source_pts_seconds, out_pts_seconds),
686 _first_video = source_pts_seconds;
689 /* Difference between where we are and where we should be */
690 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
691 double const one_frame = 1 / frames_per_second();
693 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
694 if (delta > one_frame) {
695 int const extra = rint (delta / one_frame);
696 for (int i = 0; i < extra; ++i) {
697 repeat_last_video ();
700 N_("Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)"),
701 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
707 if (delta > -one_frame) {
708 /* Process this frame */
709 filter_and_emit_video (_frame);
711 /* Otherwise we are omitting a frame to keep things right */
712 _film->log()->log (String::compose (N_("Frame removed at %1s"), out_pts_seconds));
717 FFmpegDecoder::film_changed (Film::Property p)
723 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
724 _filter_graphs.clear ();
734 /** @return Length (in video frames) according to our content's header */
736 FFmpegDecoder::length () const
738 return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
742 FFmpegDecoder::frame_time () const
744 return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);