2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
56 using std::stringstream;
58 using boost::shared_ptr;
59 using boost::optional;
60 using boost::dynamic_pointer_cast;
62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const DecodeOptions> o, Job* j)
64 , VideoDecoder (f, o, j)
65 , AudioDecoder (f, o, j)
69 , _video_codec_context (0)
71 , _audio_codec_context (0)
73 , _subtitle_codec_context (0)
81 _film_connection = f->Changed.connect (bind (&FFmpegDecoder::film_changed, this, _1));
88 FFmpegDecoder::~FFmpegDecoder ()
90 if (_audio_codec_context) {
91 avcodec_close (_audio_codec_context);
94 if (_video_codec_context) {
95 avcodec_close (_video_codec_context);
98 if (_subtitle_codec_context) {
99 avcodec_close (_subtitle_codec_context);
104 avformat_close_input (&_format_context);
108 FFmpegDecoder::setup_general ()
112 if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
113 throw OpenFileError (_film->content_path ());
116 if (avformat_find_stream_info (_format_context, 0) < 0) {
117 throw DecodeError ("could not find stream information");
120 /* Find video, audio and subtitle streams and choose the first of each */
122 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
123 AVStream* s = _format_context->streams[i];
124 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
126 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
128 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
129 so bodge it here. No idea why we should have to do this.
132 if (s->codec->channel_layout == 0) {
133 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
136 _audio_streams.push_back (
137 shared_ptr<AudioStream> (
138 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
142 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
143 _subtitle_streams.push_back (
144 shared_ptr<SubtitleStream> (
145 new SubtitleStream (stream_name (s), i)
151 if (_video_stream < 0) {
152 throw DecodeError ("could not find video stream");
155 _frame = avcodec_alloc_frame ();
157 throw DecodeError ("could not allocate frame");
162 FFmpegDecoder::setup_video ()
164 _video_codec_context = _format_context->streams[_video_stream]->codec;
165 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
167 if (_video_codec == 0) {
168 throw DecodeError ("could not find video decoder");
171 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
172 throw DecodeError ("could not open video decoder");
177 FFmpegDecoder::setup_audio ()
179 if (!_audio_stream) {
183 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
186 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
187 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
189 if (_audio_codec == 0) {
190 throw DecodeError ("could not find audio decoder");
193 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
194 throw DecodeError ("could not open audio decoder");
199 FFmpegDecoder::setup_subtitle ()
201 if (!_subtitle_stream) {
205 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
206 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
208 if (_subtitle_codec == 0) {
209 throw DecodeError ("could not find subtitle decoder");
212 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
213 throw DecodeError ("could not open subtitle decoder");
219 FFmpegDecoder::pass ()
221 int r = av_read_frame (_format_context, &_packet);
224 if (r != AVERROR_EOF) {
225 /* Maybe we should fail here, but for now we'll just finish off instead */
227 av_strerror (r, buf, sizeof(buf));
228 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
231 /* Get any remaining frames */
236 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
240 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
241 filter_and_emit_video (_frame);
244 if (_audio_stream && _opt->decode_audio) {
245 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
246 int const data_size = av_samples_get_buffer_size (
247 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
250 assert (_audio_codec_context->channels == _film->audio_channels());
251 Audio (deinterleave_audio (_frame->data[0], data_size));
258 avcodec_get_frame_defaults (_frame);
260 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
262 if (_packet.stream_index == _video_stream) {
265 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
266 if (r >= 0 && frame_finished) {
268 if (r != _packet.size) {
269 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
272 if (_opt->video_sync) {
275 filter_and_emit_video (_frame);
279 } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
282 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
284 /* Where we are in the source, in seconds */
285 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
286 * av_frame_get_best_effort_timestamp(_frame);
288 /* We only decode audio if we've had our first video packet through, and if it
289 was before this packet. Until then audio is thrown away.
292 if (_first_video && _first_video.get() <= source_pts_seconds) {
295 _first_audio = source_pts_seconds;
297 /* This is our first audio frame, and if we've arrived here we must have had our
298 first video frame. Push some silence to make up any gap between our first
299 video frame and our first audio.
302 /* frames of silence that we must push */
303 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
307 "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
308 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
313 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
314 audio->make_silent ();
319 int const data_size = av_samples_get_buffer_size (
320 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
323 assert (_audio_codec_context->channels == _film->audio_channels());
324 Audio (deinterleave_audio (_frame->data[0], data_size));
328 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
332 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
333 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
334 indicate that the previous subtitle should stop.
336 if (sub.num_rects > 0) {
337 shared_ptr<TimedSubtitle> ts;
339 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
341 /* some problem with the subtitle; we probably didn't understand it */
344 emit_subtitle (shared_ptr<TimedSubtitle> ());
346 avsubtitle_free (&sub);
350 av_free_packet (&_packet);
354 shared_ptr<AudioBuffers>
355 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
357 assert (_film->audio_channels());
358 assert (bytes_per_audio_sample());
360 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
363 /* Deinterleave and convert to float */
365 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
367 int const total_samples = size / bytes_per_audio_sample();
368 int const frames = total_samples / _film->audio_channels();
369 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
371 switch (audio_sample_format()) {
372 case AV_SAMPLE_FMT_S16:
374 int16_t* p = (int16_t *) data;
377 for (int i = 0; i < total_samples; ++i) {
378 audio->data(channel)[sample] = float(*p++) / (1 << 15);
381 if (channel == _film->audio_channels()) {
389 case AV_SAMPLE_FMT_S32:
391 int32_t* p = (int32_t *) data;
394 for (int i = 0; i < total_samples; ++i) {
395 audio->data(channel)[sample] = float(*p++) / (1 << 31);
398 if (channel == _film->audio_channels()) {
405 case AV_SAMPLE_FMT_FLTP:
407 float* p = reinterpret_cast<float*> (data);
408 for (int i = 0; i < _film->audio_channels(); ++i) {
409 memcpy (audio->data(i), p, frames * sizeof(float));
423 FFmpegDecoder::frames_per_second () const
425 AVStream* s = _format_context->streams[_video_stream];
427 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
428 return av_q2d (s->avg_frame_rate);
431 return av_q2d (s->r_frame_rate);
435 FFmpegDecoder::audio_sample_format () const
437 if (_audio_codec_context == 0) {
438 return (AVSampleFormat) 0;
441 return _audio_codec_context->sample_fmt;
445 FFmpegDecoder::native_size () const
447 return Size (_video_codec_context->width, _video_codec_context->height);
451 FFmpegDecoder::pixel_format () const
453 return _video_codec_context->pix_fmt;
457 FFmpegDecoder::time_base_numerator () const
459 return _video_codec_context->time_base.num;
463 FFmpegDecoder::time_base_denominator () const
465 return _video_codec_context->time_base.den;
469 FFmpegDecoder::sample_aspect_ratio_numerator () const
471 return _video_codec_context->sample_aspect_ratio.num;
475 FFmpegDecoder::sample_aspect_ratio_denominator () const
477 return _video_codec_context->sample_aspect_ratio.den;
481 FFmpegDecoder::stream_name (AVStream* s) const
485 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
490 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
492 if (!n.str().empty()) {
498 if (n.str().empty()) {
506 FFmpegDecoder::bytes_per_audio_sample () const
508 return av_get_bytes_per_sample (audio_sample_format ());
512 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
514 AudioDecoder::set_audio_stream (s);
519 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
521 VideoDecoder::set_subtitle_stream (s);
527 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
529 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
531 shared_ptr<FilterGraph> graph;
533 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
534 while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
538 if (i == _filter_graphs.end ()) {
539 graph.reset (new FilterGraph (_film, this, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
540 _filter_graphs.push_back (graph);
541 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
546 list<shared_ptr<Image> > images = graph->process (frame);
548 SourceFrame const sf = av_q2d (_format_context->streams[_video_stream]->time_base)
549 * av_frame_get_best_effort_timestamp(_frame) * frames_per_second();
551 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
557 FFmpegDecoder::seek (SourceFrame f)
559 int64_t const vt = static_cast<int64_t>(f) / (av_q2d (_format_context->streams[_video_stream]->time_base) * frames_per_second());
561 /* This AVSEEK_FLAG_BACKWARD is a bit of a hack; without it, if we ask for a seek to the same place as last time
562 (used when we change decoder parameters and want to re-fetch the frame) we end up going forwards rather than
563 staying in the same place.
565 int const r = av_seek_frame (_format_context, _video_stream, vt, (f == last_source_frame() ? AVSEEK_FLAG_BACKWARD : 0));
567 avcodec_flush_buffers (_video_codec_context);
568 if (_subtitle_codec_context) {
569 avcodec_flush_buffers (_subtitle_codec_context);
575 shared_ptr<FFmpegAudioStream>
576 FFmpegAudioStream::create (string t, optional<int> v)
579 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
580 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
586 if (type != "ffmpeg") {
587 return shared_ptr<FFmpegAudioStream> ();
590 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
593 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
601 n >> _id >> channels;
602 _channel_layout = av_get_default_channel_layout (channels);
606 /* Current (marked version 1) */
607 n >> type >> _id >> _sample_rate >> _channel_layout;
608 assert (type == "ffmpeg");
611 for (int i = 0; i < name_index; ++i) {
612 size_t const s = t.find (' ');
613 if (s != string::npos) {
614 t = t.substr (s + 1);
622 FFmpegAudioStream::to_string () const
624 return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
628 FFmpegDecoder::out_with_sync ()
630 /* Where we are in the output, in seconds */
631 double const out_pts_seconds = video_frame() / frames_per_second();
633 /* Where we are in the source, in seconds */
634 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
635 * av_frame_get_best_effort_timestamp(_frame);
638 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
643 _first_video = source_pts_seconds;
646 /* Difference between where we are and where we should be */
647 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
648 double const one_frame = 1 / frames_per_second();
650 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
651 if (delta > one_frame) {
652 int const extra = rint (delta / one_frame);
653 for (int i = 0; i < extra; ++i) {
654 repeat_last_video ();
657 "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
658 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
664 if (delta > -one_frame) {
665 /* Process this frame */
666 filter_and_emit_video (_frame);
668 /* Otherwise we are omitting a frame to keep things right */
669 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
674 FFmpegDecoder::film_changed (Film::Property p)
680 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
681 _filter_graphs.clear ();
691 /** @return Length (in video frames) according to our content's header */
693 FFmpegDecoder::length () const
695 return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();