2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
56 using std::stringstream;
58 using boost::shared_ptr;
59 using boost::optional;
60 using boost::dynamic_pointer_cast;
63 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, DecodeOptions o)
70 , _video_codec_context (0)
72 , _audio_codec_context (0)
74 , _subtitle_codec_context (0)
87 FFmpegDecoder::~FFmpegDecoder ()
89 if (_audio_codec_context) {
90 avcodec_close (_audio_codec_context);
93 if (_video_codec_context) {
94 avcodec_close (_video_codec_context);
97 if (_subtitle_codec_context) {
98 avcodec_close (_subtitle_codec_context);
103 avformat_close_input (&_format_context);
107 FFmpegDecoder::setup_general ()
111 if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
112 throw OpenFileError (_film->content_path ());
115 if (avformat_find_stream_info (_format_context, 0) < 0) {
116 throw DecodeError ("could not find stream information");
119 /* Find video, audio and subtitle streams and choose the first of each */
121 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
122 AVStream* s = _format_context->streams[i];
123 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
125 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
127 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
128 so bodge it here. No idea why we should have to do this.
131 if (s->codec->channel_layout == 0) {
132 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
135 _audio_streams.push_back (
136 shared_ptr<AudioStream> (
137 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
141 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
142 _subtitle_streams.push_back (
143 shared_ptr<SubtitleStream> (
144 new SubtitleStream (stream_name (s), i)
150 if (_video_stream < 0) {
151 throw DecodeError ("could not find video stream");
154 _frame = avcodec_alloc_frame ();
156 throw DecodeError ("could not allocate frame");
161 FFmpegDecoder::setup_video ()
163 _video_codec_context = _format_context->streams[_video_stream]->codec;
164 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
166 if (_video_codec == 0) {
167 throw DecodeError ("could not find video decoder");
170 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
171 throw DecodeError ("could not open video decoder");
176 FFmpegDecoder::setup_audio ()
178 if (!_audio_stream) {
182 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
185 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
186 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
188 if (_audio_codec == 0) {
189 throw DecodeError ("could not find audio decoder");
192 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
193 throw DecodeError ("could not open audio decoder");
198 FFmpegDecoder::setup_subtitle ()
200 if (!_subtitle_stream || _subtitle_stream->id() >= int (_format_context->nb_streams)) {
204 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
205 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
207 if (_subtitle_codec == 0) {
208 throw DecodeError ("could not find subtitle decoder");
211 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
212 throw DecodeError ("could not open subtitle decoder");
218 FFmpegDecoder::pass ()
220 int r = av_read_frame (_format_context, &_packet);
223 if (r != AVERROR_EOF) {
224 /* Maybe we should fail here, but for now we'll just finish off instead */
226 av_strerror (r, buf, sizeof(buf));
227 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
230 /* Get any remaining frames */
235 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
239 if (_opt.decode_video) {
240 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
241 filter_and_emit_video (_frame);
245 if (_audio_stream && _opt.decode_audio) {
246 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
247 int const data_size = av_samples_get_buffer_size (
248 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
251 assert (_audio_codec_context->channels == _film->audio_channels());
252 Audio (deinterleave_audio (_frame->data, data_size));
259 avcodec_get_frame_defaults (_frame);
261 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
263 if (_packet.stream_index == _video_stream && _opt.decode_video) {
266 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
267 if (r >= 0 && frame_finished) {
269 if (r != _packet.size) {
270 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
273 if (_opt.video_sync) {
276 filter_and_emit_video (_frame);
280 } else if (ffa && _packet.stream_index == ffa->id() && _opt.decode_audio) {
283 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
285 /* Where we are in the source, in seconds */
286 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
287 * av_frame_get_best_effort_timestamp(_frame);
289 /* We only decode audio if we've had our first video packet through, and if it
290 was before this packet. Until then audio is thrown away.
293 if ((_first_video && _first_video.get() <= source_pts_seconds) || !_opt.decode_video) {
295 if (!_first_audio && _opt.decode_video) {
296 _first_audio = source_pts_seconds;
298 /* This is our first audio frame, and if we've arrived here we must have had our
299 first video frame. Push some silence to make up any gap between our first
300 video frame and our first audio.
303 /* frames of silence that we must push */
304 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
308 "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
309 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
314 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
315 audio->make_silent ();
320 int const data_size = av_samples_get_buffer_size (
321 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
324 assert (_audio_codec_context->channels == _film->audio_channels());
325 Audio (deinterleave_audio (_frame->data, data_size));
329 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt.decode_subtitles && _first_video) {
333 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
334 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
335 indicate that the previous subtitle should stop.
337 if (sub.num_rects > 0) {
338 shared_ptr<TimedSubtitle> ts;
340 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
342 /* some problem with the subtitle; we probably didn't understand it */
345 emit_subtitle (shared_ptr<TimedSubtitle> ());
347 avsubtitle_free (&sub);
351 av_free_packet (&_packet);
355 /** @param data pointer to array of pointers to buffers.
356 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
358 shared_ptr<AudioBuffers>
359 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
361 assert (_film->audio_channels());
362 assert (bytes_per_audio_sample());
364 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
367 /* Deinterleave and convert to float */
369 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
371 int const total_samples = size / bytes_per_audio_sample();
372 int const frames = total_samples / _film->audio_channels();
373 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
375 switch (audio_sample_format()) {
376 case AV_SAMPLE_FMT_S16:
378 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
381 for (int i = 0; i < total_samples; ++i) {
382 audio->data(channel)[sample] = float(*p++) / (1 << 15);
385 if (channel == _film->audio_channels()) {
393 case AV_SAMPLE_FMT_S16P:
395 int16_t** p = reinterpret_cast<int16_t **> (data);
396 for (int i = 0; i < _film->audio_channels(); ++i) {
397 for (int j = 0; j < frames; ++j) {
398 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
404 case AV_SAMPLE_FMT_S32:
406 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
409 for (int i = 0; i < total_samples; ++i) {
410 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
413 if (channel == _film->audio_channels()) {
421 case AV_SAMPLE_FMT_FLT:
423 float* p = reinterpret_cast<float*> (data[0]);
426 for (int i = 0; i < total_samples; ++i) {
427 audio->data(channel)[sample] = *p++;
430 if (channel == _film->audio_channels()) {
438 case AV_SAMPLE_FMT_FLTP:
440 float** p = reinterpret_cast<float**> (data);
441 for (int i = 0; i < _film->audio_channels(); ++i) {
442 memcpy (audio->data(i), p[i], frames * sizeof(float));
448 throw DecodeError (String::compose ("Unrecognised audio sample format (%1)", static_cast<int> (audio_sample_format())));
455 FFmpegDecoder::frames_per_second () const
457 AVStream* s = _format_context->streams[_video_stream];
459 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
460 return av_q2d (s->avg_frame_rate);
463 return av_q2d (s->r_frame_rate);
467 FFmpegDecoder::audio_sample_format () const
469 if (_audio_codec_context == 0) {
470 return (AVSampleFormat) 0;
473 return _audio_codec_context->sample_fmt;
477 FFmpegDecoder::native_size () const
479 return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
483 FFmpegDecoder::pixel_format () const
485 return _video_codec_context->pix_fmt;
489 FFmpegDecoder::time_base_numerator () const
491 return _video_codec_context->time_base.num;
495 FFmpegDecoder::time_base_denominator () const
497 return _video_codec_context->time_base.den;
501 FFmpegDecoder::sample_aspect_ratio_numerator () const
503 return _video_codec_context->sample_aspect_ratio.num;
507 FFmpegDecoder::sample_aspect_ratio_denominator () const
509 return _video_codec_context->sample_aspect_ratio.den;
513 FFmpegDecoder::stream_name (AVStream* s) const
517 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
522 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
524 if (!n.str().empty()) {
530 if (n.str().empty()) {
538 FFmpegDecoder::bytes_per_audio_sample () const
540 return av_get_bytes_per_sample (audio_sample_format ());
544 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
546 AudioDecoder::set_audio_stream (s);
551 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
553 VideoDecoder::set_subtitle_stream (s);
559 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
561 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
563 shared_ptr<FilterGraph> graph;
565 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
566 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
570 if (i == _filter_graphs.end ()) {
571 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
572 _filter_graphs.push_back (graph);
573 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
578 list<shared_ptr<Image> > images = graph->process (frame);
580 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
581 emit_video (*i, frame_time ());
586 FFmpegDecoder::seek (double p)
588 return do_seek (p, false);
592 FFmpegDecoder::seek_to_last ()
594 /* This AVSEEK_FLAG_BACKWARD in do_seek is a bit of a hack; without it, if we ask for a seek to the same place as last time
595 (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
596 staying in the same place.
598 return do_seek (last_source_time(), true);
602 FFmpegDecoder::do_seek (double p, bool backwards)
604 int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
606 int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
608 avcodec_flush_buffers (_video_codec_context);
609 if (_subtitle_codec_context) {
610 avcodec_flush_buffers (_subtitle_codec_context);
616 shared_ptr<FFmpegAudioStream>
617 FFmpegAudioStream::create (string t, optional<int> v)
620 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
621 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
627 if (type != "ffmpeg") {
628 return shared_ptr<FFmpegAudioStream> ();
631 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
634 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
642 n >> _id >> channels;
643 _channel_layout = av_get_default_channel_layout (channels);
647 /* Current (marked version 1) */
648 n >> type >> _id >> _sample_rate >> _channel_layout;
649 assert (type == "ffmpeg");
652 for (int i = 0; i < name_index; ++i) {
653 size_t const s = t.find (' ');
654 if (s != string::npos) {
655 t = t.substr (s + 1);
663 FFmpegAudioStream::to_string () const
665 return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
669 FFmpegDecoder::out_with_sync ()
671 /* Where we are in the output, in seconds */
672 double const out_pts_seconds = video_frame() / frames_per_second();
674 /* Where we are in the source, in seconds */
675 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
676 * av_frame_get_best_effort_timestamp(_frame);
679 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
684 _first_video = source_pts_seconds;
687 /* Difference between where we are and where we should be */
688 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
689 double const one_frame = 1 / frames_per_second();
691 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
692 if (delta > one_frame) {
693 int const extra = rint (delta / one_frame);
694 for (int i = 0; i < extra; ++i) {
695 repeat_last_video ();
698 "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
699 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
705 if (delta > -one_frame) {
706 /* Process this frame */
707 filter_and_emit_video (_frame);
709 /* Otherwise we are omitting a frame to keep things right */
710 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
715 FFmpegDecoder::film_changed (Film::Property p)
721 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
722 _filter_graphs.clear ();
732 /** @return Length (in video frames) according to our content's header */
734 FFmpegDecoder::length () const
736 return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
740 FFmpegDecoder::frame_time () const
742 return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);