2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
56 using std::stringstream;
58 using boost::shared_ptr;
59 using boost::optional;
60 using boost::dynamic_pointer_cast;
63 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, DecodeOptions o)
70 , _video_codec_context (0)
72 , _audio_codec_context (0)
74 , _subtitle_codec_context (0)
87 FFmpegDecoder::~FFmpegDecoder ()
89 if (_audio_codec_context) {
90 avcodec_close (_audio_codec_context);
93 if (_video_codec_context) {
94 avcodec_close (_video_codec_context);
97 if (_subtitle_codec_context) {
98 avcodec_close (_subtitle_codec_context);
103 avformat_close_input (&_format_context);
107 FFmpegDecoder::setup_general ()
111 if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
112 throw OpenFileError (_film->content_path ());
115 if (avformat_find_stream_info (_format_context, 0) < 0) {
116 throw DecodeError ("could not find stream information");
119 /* Find video, audio and subtitle streams and choose the first of each */
121 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
122 AVStream* s = _format_context->streams[i];
123 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
125 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
127 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
128 so bodge it here. No idea why we should have to do this.
131 if (s->codec->channel_layout == 0) {
132 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
135 _audio_streams.push_back (
136 shared_ptr<AudioStream> (
137 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
141 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
142 _subtitle_streams.push_back (
143 shared_ptr<SubtitleStream> (
144 new SubtitleStream (stream_name (s), i)
150 if (_video_stream < 0) {
151 throw DecodeError ("could not find video stream");
154 _frame = avcodec_alloc_frame ();
156 throw DecodeError ("could not allocate frame");
161 FFmpegDecoder::setup_video ()
163 _video_codec_context = _format_context->streams[_video_stream]->codec;
164 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
166 if (_video_codec == 0) {
167 throw DecodeError ("could not find video decoder");
170 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
171 throw DecodeError ("could not open video decoder");
176 FFmpegDecoder::setup_audio ()
178 if (!_audio_stream) {
182 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
185 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
186 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
188 if (_audio_codec == 0) {
189 throw DecodeError ("could not find audio decoder");
192 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
193 throw DecodeError ("could not open audio decoder");
198 FFmpegDecoder::setup_subtitle ()
200 if (!_subtitle_stream) {
204 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
205 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
207 if (_subtitle_codec == 0) {
208 throw DecodeError ("could not find subtitle decoder");
211 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
212 throw DecodeError ("could not open subtitle decoder");
218 FFmpegDecoder::pass ()
220 int r = av_read_frame (_format_context, &_packet);
223 if (r != AVERROR_EOF) {
224 /* Maybe we should fail here, but for now we'll just finish off instead */
226 av_strerror (r, buf, sizeof(buf));
227 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
230 /* Get any remaining frames */
235 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
239 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
240 filter_and_emit_video (_frame);
243 if (_audio_stream && _opt.decode_audio) {
244 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
245 int const data_size = av_samples_get_buffer_size (
246 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
249 assert (_audio_codec_context->channels == _film->audio_channels());
250 Audio (deinterleave_audio (_frame->data, data_size));
257 avcodec_get_frame_defaults (_frame);
259 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
261 if (_packet.stream_index == _video_stream) {
264 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
265 if (r >= 0 && frame_finished) {
267 if (r != _packet.size) {
268 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
271 if (_opt.video_sync) {
274 filter_and_emit_video (_frame);
278 } else if (ffa && _packet.stream_index == ffa->id() && _opt.decode_audio) {
281 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
283 /* Where we are in the source, in seconds */
284 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
285 * av_frame_get_best_effort_timestamp(_frame);
287 /* We only decode audio if we've had our first video packet through, and if it
288 was before this packet. Until then audio is thrown away.
291 if (_first_video && _first_video.get() <= source_pts_seconds) {
294 _first_audio = source_pts_seconds;
296 /* This is our first audio frame, and if we've arrived here we must have had our
297 first video frame. Push some silence to make up any gap between our first
298 video frame and our first audio.
301 /* frames of silence that we must push */
302 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
306 "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
307 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
312 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
313 audio->make_silent ();
318 int const data_size = av_samples_get_buffer_size (
319 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
322 assert (_audio_codec_context->channels == _film->audio_channels());
323 Audio (deinterleave_audio (_frame->data, data_size));
327 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt.decode_subtitles && _first_video) {
331 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
332 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
333 indicate that the previous subtitle should stop.
335 if (sub.num_rects > 0) {
336 shared_ptr<TimedSubtitle> ts;
338 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub)));
340 /* some problem with the subtitle; we probably didn't understand it */
343 emit_subtitle (shared_ptr<TimedSubtitle> ());
345 avsubtitle_free (&sub);
349 av_free_packet (&_packet);
353 /** @param data pointer to array of pointers to buffers.
354 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
356 shared_ptr<AudioBuffers>
357 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
359 assert (_film->audio_channels());
360 assert (bytes_per_audio_sample());
362 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
365 /* Deinterleave and convert to float */
367 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
369 int const total_samples = size / bytes_per_audio_sample();
370 int const frames = total_samples / _film->audio_channels();
371 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
373 switch (audio_sample_format()) {
374 case AV_SAMPLE_FMT_S16:
376 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
379 for (int i = 0; i < total_samples; ++i) {
380 audio->data(channel)[sample] = float(*p++) / (1 << 15);
383 if (channel == _film->audio_channels()) {
391 case AV_SAMPLE_FMT_S16P:
393 int16_t** p = reinterpret_cast<int16_t **> (data);
394 for (int i = 0; i < _film->audio_channels(); ++i) {
395 for (int j = 0; j < frames; ++j) {
396 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
402 case AV_SAMPLE_FMT_S32:
404 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
407 for (int i = 0; i < total_samples; ++i) {
408 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
411 if (channel == _film->audio_channels()) {
419 case AV_SAMPLE_FMT_FLT:
421 float* p = reinterpret_cast<float*> (data[0]);
424 for (int i = 0; i < total_samples; ++i) {
425 audio->data(channel)[sample] = *p++;
428 if (channel == _film->audio_channels()) {
436 case AV_SAMPLE_FMT_FLTP:
438 float** p = reinterpret_cast<float**> (data);
439 for (int i = 0; i < _film->audio_channels(); ++i) {
440 memcpy (audio->data(i), p[i], frames * sizeof(float));
446 throw DecodeError (String::compose ("Unrecognised audio sample format (%1)", static_cast<int> (audio_sample_format())));
453 FFmpegDecoder::frames_per_second () const
455 AVStream* s = _format_context->streams[_video_stream];
457 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
458 return av_q2d (s->avg_frame_rate);
461 return av_q2d (s->r_frame_rate);
465 FFmpegDecoder::audio_sample_format () const
467 if (_audio_codec_context == 0) {
468 return (AVSampleFormat) 0;
471 return _audio_codec_context->sample_fmt;
475 FFmpegDecoder::native_size () const
477 return libdcp::Size (_video_codec_context->width, _video_codec_context->height);
481 FFmpegDecoder::pixel_format () const
483 return _video_codec_context->pix_fmt;
487 FFmpegDecoder::time_base_numerator () const
489 return _video_codec_context->time_base.num;
493 FFmpegDecoder::time_base_denominator () const
495 return _video_codec_context->time_base.den;
499 FFmpegDecoder::sample_aspect_ratio_numerator () const
501 return _video_codec_context->sample_aspect_ratio.num;
505 FFmpegDecoder::sample_aspect_ratio_denominator () const
507 return _video_codec_context->sample_aspect_ratio.den;
511 FFmpegDecoder::stream_name (AVStream* s) const
515 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
520 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
522 if (!n.str().empty()) {
528 if (n.str().empty()) {
536 FFmpegDecoder::bytes_per_audio_sample () const
538 return av_get_bytes_per_sample (audio_sample_format ());
542 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
544 AudioDecoder::set_audio_stream (s);
549 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
551 VideoDecoder::set_subtitle_stream (s);
557 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
559 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
561 shared_ptr<FilterGraph> graph;
563 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
564 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
568 if (i == _filter_graphs.end ()) {
569 graph.reset (new FilterGraph (_film, this, libdcp::Size (frame->width, frame->height), (AVPixelFormat) frame->format));
570 _filter_graphs.push_back (graph);
571 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
576 list<shared_ptr<Image> > images = graph->process (frame);
578 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
579 emit_video (*i, frame_time ());
584 FFmpegDecoder::seek (double p)
586 return do_seek (p, false);
590 FFmpegDecoder::seek_to_last ()
592 /* This AVSEEK_FLAG_BACKWARD in do_seek is a bit of a hack; without it, if we ask for a seek to the same place as last time
593 (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
594 staying in the same place.
596 return do_seek (last_source_time(), true);
600 FFmpegDecoder::do_seek (double p, bool backwards)
602 int64_t const vt = p / av_q2d (_format_context->streams[_video_stream]->time_base);
604 int const r = av_seek_frame (_format_context, _video_stream, vt, backwards ? AVSEEK_FLAG_BACKWARD : 0);
606 avcodec_flush_buffers (_video_codec_context);
607 if (_subtitle_codec_context) {
608 avcodec_flush_buffers (_subtitle_codec_context);
614 shared_ptr<FFmpegAudioStream>
615 FFmpegAudioStream::create (string t, optional<int> v)
618 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
619 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
625 if (type != "ffmpeg") {
626 return shared_ptr<FFmpegAudioStream> ();
629 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
632 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
640 n >> _id >> channels;
641 _channel_layout = av_get_default_channel_layout (channels);
645 /* Current (marked version 1) */
646 n >> type >> _id >> _sample_rate >> _channel_layout;
647 assert (type == "ffmpeg");
650 for (int i = 0; i < name_index; ++i) {
651 size_t const s = t.find (' ');
652 if (s != string::npos) {
653 t = t.substr (s + 1);
661 FFmpegAudioStream::to_string () const
663 return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
667 FFmpegDecoder::out_with_sync ()
669 /* Where we are in the output, in seconds */
670 double const out_pts_seconds = video_frame() / frames_per_second();
672 /* Where we are in the source, in seconds */
673 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
674 * av_frame_get_best_effort_timestamp(_frame);
677 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
682 _first_video = source_pts_seconds;
685 /* Difference between where we are and where we should be */
686 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
687 double const one_frame = 1 / frames_per_second();
689 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
690 if (delta > one_frame) {
691 int const extra = rint (delta / one_frame);
692 for (int i = 0; i < extra; ++i) {
693 repeat_last_video ();
696 "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
697 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
703 if (delta > -one_frame) {
704 /* Process this frame */
705 filter_and_emit_video (_frame);
707 /* Otherwise we are omitting a frame to keep things right */
708 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
713 FFmpegDecoder::film_changed (Film::Property p)
719 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
720 _filter_graphs.clear ();
730 /** @return Length (in video frames) according to our content's header */
732 FFmpegDecoder::length () const
734 return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();
738 FFmpegDecoder::frame_time () const
740 return av_frame_get_best_effort_timestamp(_frame) * av_q2d (_format_context->streams[_video_stream]->time_base);