2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
56 using std::stringstream;
58 using boost::shared_ptr;
59 using boost::optional;
60 using boost::dynamic_pointer_cast;
62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const DecodeOptions> o, Job* j)
64 , VideoDecoder (f, o, j)
65 , AudioDecoder (f, o, j)
69 , _video_codec_context (0)
71 , _audio_codec_context (0)
73 , _subtitle_codec_context (0)
86 FFmpegDecoder::~FFmpegDecoder ()
88 if (_audio_codec_context) {
89 avcodec_close (_audio_codec_context);
92 if (_video_codec_context) {
93 avcodec_close (_video_codec_context);
96 if (_subtitle_codec_context) {
97 avcodec_close (_subtitle_codec_context);
102 avformat_close_input (&_format_context);
106 FFmpegDecoder::setup_general ()
110 if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
111 throw OpenFileError (_film->content_path ());
114 if (avformat_find_stream_info (_format_context, 0) < 0) {
115 throw DecodeError ("could not find stream information");
118 /* Find video, audio and subtitle streams and choose the first of each */
120 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
121 AVStream* s = _format_context->streams[i];
122 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
124 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
126 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
127 so bodge it here. No idea why we should have to do this.
130 if (s->codec->channel_layout == 0) {
131 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
134 _audio_streams.push_back (
135 shared_ptr<AudioStream> (
136 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
140 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
141 _subtitle_streams.push_back (
142 shared_ptr<SubtitleStream> (
143 new SubtitleStream (stream_name (s), i)
149 if (_video_stream < 0) {
150 throw DecodeError ("could not find video stream");
153 _frame = avcodec_alloc_frame ();
155 throw DecodeError ("could not allocate frame");
160 FFmpegDecoder::setup_video ()
162 _video_codec_context = _format_context->streams[_video_stream]->codec;
163 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
165 if (_video_codec == 0) {
166 throw DecodeError ("could not find video decoder");
169 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
170 throw DecodeError ("could not open video decoder");
175 FFmpegDecoder::setup_audio ()
177 if (!_audio_stream) {
181 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
184 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
185 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
187 if (_audio_codec == 0) {
188 throw DecodeError ("could not find audio decoder");
191 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
192 throw DecodeError ("could not open audio decoder");
197 FFmpegDecoder::setup_subtitle ()
199 if (!_subtitle_stream) {
203 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
204 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
206 if (_subtitle_codec == 0) {
207 throw DecodeError ("could not find subtitle decoder");
210 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
211 throw DecodeError ("could not open subtitle decoder");
217 FFmpegDecoder::pass ()
219 int r = av_read_frame (_format_context, &_packet);
222 if (r != AVERROR_EOF) {
223 /* Maybe we should fail here, but for now we'll just finish off instead */
225 av_strerror (r, buf, sizeof(buf));
226 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
229 /* Get any remaining frames */
234 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
238 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
239 filter_and_emit_video (_frame);
242 if (_audio_stream && _opt->decode_audio) {
243 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
244 int const data_size = av_samples_get_buffer_size (
245 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
248 assert (_audio_codec_context->channels == _film->audio_channels());
249 Audio (deinterleave_audio (_frame->data[0], data_size));
256 avcodec_get_frame_defaults (_frame);
258 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
260 if (_packet.stream_index == _video_stream) {
263 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
264 if (r >= 0 && frame_finished) {
266 if (r != _packet.size) {
267 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
270 if (_opt->video_sync) {
273 filter_and_emit_video (_frame);
277 } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
280 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
282 /* Where we are in the source, in seconds */
283 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
284 * av_frame_get_best_effort_timestamp(_frame);
286 /* We only decode audio if we've had our first video packet through, and if it
287 was before this packet. Until then audio is thrown away.
290 if (_first_video && _first_video.get() <= source_pts_seconds) {
293 _first_audio = source_pts_seconds;
295 /* This is our first audio frame, and if we've arrived here we must have had our
296 first video frame. Push some silence to make up any gap between our first
297 video frame and our first audio.
300 /* frames of silence that we must push */
301 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
305 "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
306 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
311 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
312 audio->make_silent ();
317 int const data_size = av_samples_get_buffer_size (
318 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
321 assert (_audio_codec_context->channels == _film->audio_channels());
322 Audio (deinterleave_audio (_frame->data[0], data_size));
326 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
330 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
331 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
332 indicate that the previous subtitle should stop.
334 if (sub.num_rects > 0) {
335 shared_ptr<TimedSubtitle> ts;
337 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
339 /* some problem with the subtitle; we probably didn't understand it */
342 emit_subtitle (shared_ptr<TimedSubtitle> ());
344 avsubtitle_free (&sub);
348 av_free_packet (&_packet);
352 shared_ptr<AudioBuffers>
353 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
355 assert (_film->audio_channels());
356 assert (bytes_per_audio_sample());
358 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
361 /* Deinterleave and convert to float */
363 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
365 int const total_samples = size / bytes_per_audio_sample();
366 int const frames = total_samples / _film->audio_channels();
367 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
369 switch (audio_sample_format()) {
370 case AV_SAMPLE_FMT_S16:
372 int16_t* p = reinterpret_cast<int16_t *> (data);
375 for (int i = 0; i < total_samples; ++i) {
376 audio->data(channel)[sample] = float(*p++) / (1 << 15);
379 if (channel == _film->audio_channels()) {
387 case AV_SAMPLE_FMT_S16P:
389 int16_t* p = reinterpret_cast<int16_t *> (data);
390 for (int i = 0; i < _film->audio_channels(); ++i) {
391 for (int j = 0; j < frames; ++j) {
392 audio->data(i)[j] = static_cast<float>(*p++) / (1 << 15);
398 case AV_SAMPLE_FMT_S32:
400 int32_t* p = reinterpret_cast<int32_t *> (data);
403 for (int i = 0; i < total_samples; ++i) {
404 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
407 if (channel == _film->audio_channels()) {
415 case AV_SAMPLE_FMT_FLT:
417 float* p = reinterpret_cast<float*> (data);
420 for (int i = 0; i < total_samples; ++i) {
421 audio->data(channel)[sample] = *p++;
424 if (channel == _film->audio_channels()) {
432 case AV_SAMPLE_FMT_FLTP:
434 float* p = reinterpret_cast<float*> (data);
435 for (int i = 0; i < _film->audio_channels(); ++i) {
436 memcpy (audio->data(i), p, frames * sizeof(float));
443 throw DecodeError (String::compose ("Unrecognised audio sample format (%1)", static_cast<int> (audio_sample_format())));
450 FFmpegDecoder::frames_per_second () const
452 AVStream* s = _format_context->streams[_video_stream];
454 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
455 return av_q2d (s->avg_frame_rate);
458 return av_q2d (s->r_frame_rate);
462 FFmpegDecoder::audio_sample_format () const
464 if (_audio_codec_context == 0) {
465 return (AVSampleFormat) 0;
468 return _audio_codec_context->sample_fmt;
472 FFmpegDecoder::native_size () const
474 return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
478 FFmpegDecoder::pixel_format () const
480 return _video_codec_context->pix_fmt;
484 FFmpegDecoder::time_base_numerator () const
486 return _video_codec_context->time_base.num;
490 FFmpegDecoder::time_base_denominator () const
492 return _video_codec_context->time_base.den;
496 FFmpegDecoder::sample_aspect_ratio_numerator () const
498 return _video_codec_context->sample_aspect_ratio.num;
502 FFmpegDecoder::sample_aspect_ratio_denominator () const
504 return _video_codec_context->sample_aspect_ratio.den;
508 FFmpegDecoder::stream_name (AVStream* s) const
512 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
517 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
519 if (!n.str().empty()) {
525 if (n.str().empty()) {
533 FFmpegDecoder::bytes_per_audio_sample () const
535 return av_get_bytes_per_sample (audio_sample_format ());
539 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
541 AudioDecoder::set_audio_stream (s);
546 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
548 VideoDecoder::set_subtitle_stream (s);
554 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
556 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
558 shared_ptr<FilterGraph> graph;
560 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
561 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
565 if (i == _filter_graphs.end ()) {
566 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
567 _filter_graphs.push_back (graph);
568 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
573 list<shared_ptr<Image> > images = graph->process (frame);
575 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
576 emit_video (*i, frame_time ());
581 FFmpegDecoder::seek (double p)
583 return do_seek (p, false);
587 FFmpegDecoder::seek_to_last ()
589 /* This AVSEEK_FLAG_BACKWARD in do_seek is a bit of a hack; without it, if we ask for a seek to the same place as last time
590 (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
591 staying in the same place.
593 return do_seek (last_source_time(), true);
597 FFmpegDecoder::do_seek (double p, bool backwards)
599 int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
601 int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
603 avcodec_flush_buffers (_video_codec_context);
604 if (_subtitle_codec_context) {
605 avcodec_flush_buffers (_subtitle_codec_context);
611 shared_ptr<FFmpegAudioStream>
612 FFmpegAudioStream::create (string t, optional<int> v)
615 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
616 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
622 if (type != "ffmpeg") {
623 return shared_ptr<FFmpegAudioStream> ();
626 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
629 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
637 n >> _id >> channels;
638 _channel_layout = av_get_default_channel_layout (channels);
642 /* Current (marked version 1) */
643 n >> type >> _id >> _sample_rate >> _channel_layout;
644 assert (type == "ffmpeg");
647 for (int i = 0; i < name_index; ++i) {
648 size_t const s = t.find (' ');
649 if (s != string::npos) {
650 t = t.substr (s + 1);
658 FFmpegAudioStream::to_string () const
660 return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
664 FFmpegDecoder::out_with_sync ()
666 /* Where we are in the output, in seconds */
667 double const out_pts_seconds = video_frame() / frames_per_second();
669 /* Where we are in the source, in seconds */
670 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
671 * av_frame_get_best_effort_timestamp(_frame);
674 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
679 _first_video = source_pts_seconds;
682 /* Difference between where we are and where we should be */
683 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
684 double const one_frame = 1 / frames_per_second();
686 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
687 if (delta > one_frame) {
688 int const extra = rint (delta / one_frame);
689 for (int i = 0; i < extra; ++i) {
690 repeat_last_video ();
693 "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
694 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
700 if (delta > -one_frame) {
701 /* Process this frame */
702 filter_and_emit_video (_frame);
704 /* Otherwise we are omitting a frame to keep things right */
705 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
710 FFmpegDecoder::film_changed (Film::Property p)
716 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
717 _filter_graphs.clear ();
727 /** @return Length (in video frames) according to our content's header */
729 FFmpegDecoder::length () const
731 return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
735 FFmpegDecoder::frame_time () const
737 return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);