2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
56 using std::stringstream;
58 using boost::shared_ptr;
59 using boost::optional;
60 using boost::dynamic_pointer_cast;
62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const DecodeOptions> o, Job* j)
64 , VideoDecoder (f, o, j)
65 , AudioDecoder (f, o, j)
69 , _video_codec_context (0)
71 , _audio_codec_context (0)
73 , _subtitle_codec_context (0)
81 _film_connection = f->Changed.connect (bind (&FFmpegDecoder::film_changed, this, _1));
84 FFmpegDecoder::~FFmpegDecoder ()
86 if (_audio_codec_context) {
87 avcodec_close (_audio_codec_context);
90 if (_video_codec_context) {
91 avcodec_close (_video_codec_context);
94 if (_subtitle_codec_context) {
95 avcodec_close (_subtitle_codec_context);
100 avformat_close_input (&_format_context);
104 FFmpegDecoder::setup_general ()
108 if (avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0) < 0) {
109 throw OpenFileError (_film->content_path ());
112 if (avformat_find_stream_info (_format_context, 0) < 0) {
113 throw DecodeError ("could not find stream information");
116 /* Find video, audio and subtitle streams and choose the first of each */
118 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
119 AVStream* s = _format_context->streams[i];
120 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
122 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
124 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
125 so bodge it here. No idea why we should have to do this.
128 if (s->codec->channel_layout == 0) {
129 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
132 _audio_streams.push_back (
133 shared_ptr<AudioStream> (
134 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
138 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
139 _subtitle_streams.push_back (
140 shared_ptr<SubtitleStream> (
141 new SubtitleStream (stream_name (s), i)
147 if (_video_stream < 0) {
148 throw DecodeError ("could not find video stream");
151 _frame = avcodec_alloc_frame ();
153 throw DecodeError ("could not allocate frame");
158 FFmpegDecoder::setup_video ()
160 _video_codec_context = _format_context->streams[_video_stream]->codec;
161 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
163 if (_video_codec == 0) {
164 throw DecodeError ("could not find video decoder");
167 /* I think this prevents problems with green hash on decodes and
168 "changing frame properties on the fly is not supported by all filters"
169 messages with some content. Although I'm not sure; needs checking.
171 AVDictionary* opts = 0;
172 av_dict_set (&opts, "threads", "1", 0);
174 if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
175 throw DecodeError ("could not open video decoder");
180 FFmpegDecoder::setup_audio ()
182 if (!_audio_stream) {
186 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
189 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
190 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
192 if (_audio_codec == 0) {
193 throw DecodeError ("could not find audio decoder");
196 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
197 throw DecodeError ("could not open audio decoder");
202 FFmpegDecoder::setup_subtitle ()
204 if (!_subtitle_stream) {
208 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
209 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
211 if (_subtitle_codec == 0) {
212 throw DecodeError ("could not find subtitle decoder");
215 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
216 throw DecodeError ("could not open subtitle decoder");
222 FFmpegDecoder::pass ()
224 int r = av_read_frame (_format_context, &_packet);
227 if (r != AVERROR_EOF) {
228 /* Maybe we should fail here, but for now we'll just finish off instead */
230 av_strerror (r, buf, sizeof(buf));
231 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
234 /* Get any remaining frames */
239 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
243 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
244 filter_and_emit_video (_frame);
247 if (_audio_stream && _opt->decode_audio) {
248 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
249 int const data_size = av_samples_get_buffer_size (
250 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
253 assert (_audio_codec_context->channels == _film->audio_channels());
254 Audio (deinterleave_audio (_frame->data[0], data_size));
261 avcodec_get_frame_defaults (_frame);
263 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
265 if (_packet.stream_index == _video_stream) {
268 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
269 if (r >= 0 && frame_finished) {
271 if (r != _packet.size) {
272 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
275 if (_opt->video_sync) {
278 filter_and_emit_video (_frame);
282 } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
285 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
287 /* Where we are in the source, in seconds */
288 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
289 * av_frame_get_best_effort_timestamp(_frame);
291 /* We only decode audio if we've had our first video packet through, and if it
292 was before this packet. Until then audio is thrown away.
295 if (_first_video && _first_video.get() <= source_pts_seconds) {
298 _first_audio = source_pts_seconds;
300 /* This is our first audio frame, and if we've arrived here we must have had our
301 first video frame. Push some silence to make up any gap between our first
302 video frame and our first audio.
305 /* frames of silence that we must push */
306 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
310 "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
311 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
316 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
317 audio->make_silent ();
322 int const data_size = av_samples_get_buffer_size (
323 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
326 assert (_audio_codec_context->channels == _film->audio_channels());
327 Audio (deinterleave_audio (_frame->data[0], data_size));
331 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
335 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
336 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
337 indicate that the previous subtitle should stop.
339 if (sub.num_rects > 0) {
340 shared_ptr<TimedSubtitle> ts;
342 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
344 /* some problem with the subtitle; we probably didn't understand it */
347 emit_subtitle (shared_ptr<TimedSubtitle> ());
349 avsubtitle_free (&sub);
353 av_free_packet (&_packet);
357 shared_ptr<AudioBuffers>
358 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
360 assert (_film->audio_channels());
361 assert (bytes_per_audio_sample());
363 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
366 /* Deinterleave and convert to float */
368 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
370 int const total_samples = size / bytes_per_audio_sample();
371 int const frames = total_samples / _film->audio_channels();
372 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
374 switch (audio_sample_format()) {
375 case AV_SAMPLE_FMT_S16:
377 int16_t* p = (int16_t *) data;
380 for (int i = 0; i < total_samples; ++i) {
381 audio->data(channel)[sample] = float(*p++) / (1 << 15);
384 if (channel == _film->audio_channels()) {
392 case AV_SAMPLE_FMT_S32:
394 int32_t* p = (int32_t *) data;
397 for (int i = 0; i < total_samples; ++i) {
398 audio->data(channel)[sample] = float(*p++) / (1 << 31);
401 if (channel == _film->audio_channels()) {
408 case AV_SAMPLE_FMT_FLTP:
410 float* p = reinterpret_cast<float*> (data);
411 for (int i = 0; i < _film->audio_channels(); ++i) {
412 memcpy (audio->data(i), p, frames * sizeof(float));
426 FFmpegDecoder::frames_per_second () const
428 AVStream* s = _format_context->streams[_video_stream];
430 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
431 return av_q2d (s->avg_frame_rate);
434 return av_q2d (s->r_frame_rate);
438 FFmpegDecoder::audio_sample_format () const
440 if (_audio_codec_context == 0) {
441 return (AVSampleFormat) 0;
444 return _audio_codec_context->sample_fmt;
448 FFmpegDecoder::native_size () const
450 return Size (_video_codec_context->width, _video_codec_context->height);
454 FFmpegDecoder::pixel_format () const
456 return _video_codec_context->pix_fmt;
460 FFmpegDecoder::time_base_numerator () const
462 return _video_codec_context->time_base.num;
466 FFmpegDecoder::time_base_denominator () const
468 return _video_codec_context->time_base.den;
472 FFmpegDecoder::sample_aspect_ratio_numerator () const
474 return _video_codec_context->sample_aspect_ratio.num;
478 FFmpegDecoder::sample_aspect_ratio_denominator () const
480 return _video_codec_context->sample_aspect_ratio.den;
484 FFmpegDecoder::stream_name (AVStream* s) const
488 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
493 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
495 if (!n.str().empty()) {
501 if (n.str().empty()) {
509 FFmpegDecoder::bytes_per_audio_sample () const
511 return av_get_bytes_per_sample (audio_sample_format ());
515 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
517 AudioDecoder::set_audio_stream (s);
522 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
524 VideoDecoder::set_subtitle_stream (s);
529 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
531 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
533 shared_ptr<FilterGraph> graph;
535 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
536 while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
540 if (i == _filter_graphs.end ()) {
541 graph.reset (new FilterGraph (_film, this, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
542 _filter_graphs.push_back (graph);
543 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
548 list<shared_ptr<Image> > images = graph->process (frame);
550 SourceFrame const sf = av_q2d (_format_context->streams[_video_stream]->time_base)
551 * av_frame_get_best_effort_timestamp(_frame) * frames_per_second();
553 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
559 FFmpegDecoder::seek (SourceFrame f)
561 int64_t const t = static_cast<int64_t>(f) / (av_q2d (_format_context->streams[_video_stream]->time_base) * frames_per_second());
562 int const r = av_seek_frame (_format_context, _video_stream, t, 0);
563 avcodec_flush_buffers (_video_codec_context);
567 shared_ptr<FFmpegAudioStream>
568 FFmpegAudioStream::create (string t, optional<int> v)
571 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
572 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
578 if (type != "ffmpeg") {
579 return shared_ptr<FFmpegAudioStream> ();
582 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
585 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
593 n >> _id >> channels;
594 _channel_layout = av_get_default_channel_layout (channels);
598 /* Current (marked version 1) */
599 n >> type >> _id >> _sample_rate >> _channel_layout;
600 assert (type == "ffmpeg");
603 for (int i = 0; i < name_index; ++i) {
604 size_t const s = t.find (' ');
605 if (s != string::npos) {
606 t = t.substr (s + 1);
614 FFmpegAudioStream::to_string () const
616 return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
621 FFmpegDecoder::out_with_sync ()
623 /* Where we are in the output, in seconds */
624 double const out_pts_seconds = video_frame() / frames_per_second();
626 /* Where we are in the source, in seconds */
627 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
628 * av_frame_get_best_effort_timestamp(_frame);
631 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
636 _first_video = source_pts_seconds;
639 /* Difference between where we are and where we should be */
640 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
641 double const one_frame = 1 / frames_per_second();
643 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
644 if (delta > one_frame) {
645 int const extra = rint (delta / one_frame);
646 for (int i = 0; i < extra; ++i) {
647 repeat_last_video ();
650 "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
651 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
657 if (delta > -one_frame) {
658 /* Process this frame */
659 filter_and_emit_video (_frame);
661 /* Otherwise we are omitting a frame to keep things right */
662 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
667 FFmpegDecoder::film_changed (Film::Property p)
672 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
673 _filter_graphs.clear ();