2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
56 using std::stringstream;
58 using boost::shared_ptr;
59 using boost::optional;
60 using boost::dynamic_pointer_cast;
62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
64 , VideoDecoder (f, o, j)
65 , AudioDecoder (f, o, j)
69 , _video_codec_context (0)
71 , _audio_codec_context (0)
73 , _subtitle_codec_context (0)
82 FFmpegDecoder::~FFmpegDecoder ()
84 if (_audio_codec_context) {
85 avcodec_close (_audio_codec_context);
88 if (_video_codec_context) {
89 avcodec_close (_video_codec_context);
92 if (_subtitle_codec_context) {
93 avcodec_close (_subtitle_codec_context);
98 avformat_close_input (&_format_context);
102 FFmpegDecoder::setup_general ()
108 if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
109 throw OpenFileError (_film->content_path ());
112 if (avformat_find_stream_info (_format_context, 0) < 0) {
113 throw DecodeError ("could not find stream information");
116 /* Find video, audio and subtitle streams and choose the first of each */
118 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
119 AVStream* s = _format_context->streams[i];
120 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
122 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
124 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
125 so bodge it here. No idea why we should have to do this.
128 if (s->codec->channel_layout == 0) {
129 s->codec->channel_layout = av_get_default_channel_layout (s->codec->channels);
132 _audio_streams.push_back (
133 shared_ptr<AudioStream> (
134 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
138 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
139 _subtitle_streams.push_back (
140 shared_ptr<SubtitleStream> (
141 new SubtitleStream (stream_name (s), i)
147 if (_video_stream < 0) {
148 throw DecodeError ("could not find video stream");
151 _frame = avcodec_alloc_frame ();
153 throw DecodeError ("could not allocate frame");
158 FFmpegDecoder::setup_video ()
160 _video_codec_context = _format_context->streams[_video_stream]->codec;
161 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
163 if (_video_codec == 0) {
164 throw DecodeError ("could not find video decoder");
167 /* I think this prevents problems with green hash on decodes and
168 "changing frame properties on the fly is not supported by all filters"
169 messages with some content. Although I'm not sure; needs checking.
171 AVDictionary* opts = 0;
172 av_dict_set (&opts, "threads", "1", 0);
174 if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
175 throw DecodeError ("could not open video decoder");
180 FFmpegDecoder::setup_audio ()
182 if (!_audio_stream) {
186 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
189 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
190 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
192 if (_audio_codec == 0) {
193 throw DecodeError ("could not find audio decoder");
196 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
197 throw DecodeError ("could not open audio decoder");
202 FFmpegDecoder::setup_subtitle ()
204 if (!_subtitle_stream) {
208 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
209 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
211 if (_subtitle_codec == 0) {
212 throw DecodeError ("could not find subtitle decoder");
215 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
216 throw DecodeError ("could not open subtitle decoder");
222 FFmpegDecoder::pass ()
224 int r = av_read_frame (_format_context, &_packet);
227 if (r != AVERROR_EOF) {
228 /* Maybe we should fail here, but for now we'll just finish off instead */
230 av_strerror (r, buf, sizeof(buf));
231 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
234 /* Get any remaining frames */
239 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
243 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
244 filter_and_emit_video (_frame);
247 if (_audio_stream && _opt->decode_audio) {
248 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
249 int const data_size = av_samples_get_buffer_size (
250 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
253 assert (_audio_codec_context->channels == _film->audio_channels());
254 Audio (deinterleave_audio (_frame->data[0], data_size));
261 avcodec_get_frame_defaults (_frame);
263 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
265 if (_packet.stream_index == _video_stream) {
268 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
269 if (r >= 0 && frame_finished) {
271 if (r != _packet.size) {
272 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
275 /* Where we are in the output, in seconds */
276 double const out_pts_seconds = video_frame() / frames_per_second();
278 /* Where we are in the source, in seconds */
279 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
280 * av_frame_get_best_effort_timestamp(_frame);
283 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
288 _first_video = source_pts_seconds;
291 /* Difference between where we are and where we should be */
292 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
293 double const one_frame = 1 / frames_per_second();
295 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
296 if (delta > one_frame) {
297 int const extra = rint (delta / one_frame);
298 for (int i = 0; i < extra; ++i) {
299 repeat_last_video ();
302 "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
303 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
309 if (delta > -one_frame) {
310 /* Process this frame */
311 filter_and_emit_video (_frame);
313 /* Otherwise we are omitting a frame to keep things right */
314 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
318 } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
321 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
323 /* Where we are in the source, in seconds */
324 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
325 * av_frame_get_best_effort_timestamp(_frame);
327 /* We only decode audio if we've had our first video packet through, and if it
328 was before this packet. Until then audio is thrown away.
331 if (_first_video && _first_video.get() <= source_pts_seconds) {
334 _first_audio = source_pts_seconds;
336 /* This is our first audio frame, and if we've arrived here we must have had our
337 first video frame. Push some silence to make up any gap between our first
338 video frame and our first audio.
341 /* frames of silence that we must push */
342 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
346 "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
347 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
352 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
353 audio->make_silent ();
358 int const data_size = av_samples_get_buffer_size (
359 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
362 assert (_audio_codec_context->channels == _film->audio_channels());
363 Audio (deinterleave_audio (_frame->data[0], data_size));
367 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
371 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
372 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
373 indicate that the previous subtitle should stop.
375 if (sub.num_rects > 0) {
376 shared_ptr<TimedSubtitle> ts;
378 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
380 /* some problem with the subtitle; we probably didn't understand it */
383 emit_subtitle (shared_ptr<TimedSubtitle> ());
385 avsubtitle_free (&sub);
389 av_free_packet (&_packet);
393 shared_ptr<AudioBuffers>
394 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
396 assert (_film->audio_channels());
397 assert (bytes_per_audio_sample());
399 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
402 /* Deinterleave and convert to float */
404 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
406 int const total_samples = size / bytes_per_audio_sample();
407 int const frames = total_samples / _film->audio_channels();
408 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
410 switch (audio_sample_format()) {
411 case AV_SAMPLE_FMT_S16:
413 int16_t* p = (int16_t *) data;
416 for (int i = 0; i < total_samples; ++i) {
417 audio->data(channel)[sample] = float(*p++) / (1 << 15);
420 if (channel == _film->audio_channels()) {
428 case AV_SAMPLE_FMT_S32:
430 int32_t* p = (int32_t *) data;
433 for (int i = 0; i < total_samples; ++i) {
434 audio->data(channel)[sample] = float(*p++) / (1 << 31);
437 if (channel == _film->audio_channels()) {
444 case AV_SAMPLE_FMT_FLTP:
446 float* p = reinterpret_cast<float*> (data);
447 for (int i = 0; i < _film->audio_channels(); ++i) {
448 memcpy (audio->data(i), p, frames * sizeof(float));
462 FFmpegDecoder::frames_per_second () const
464 AVStream* s = _format_context->streams[_video_stream];
466 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
467 return av_q2d (s->avg_frame_rate);
470 return av_q2d (s->r_frame_rate);
474 FFmpegDecoder::audio_sample_format () const
476 if (_audio_codec_context == 0) {
477 return (AVSampleFormat) 0;
480 return _audio_codec_context->sample_fmt;
484 FFmpegDecoder::native_size () const
486 return Size (_video_codec_context->width, _video_codec_context->height);
490 FFmpegDecoder::pixel_format () const
492 return _video_codec_context->pix_fmt;
496 FFmpegDecoder::time_base_numerator () const
498 return _video_codec_context->time_base.num;
502 FFmpegDecoder::time_base_denominator () const
504 return _video_codec_context->time_base.den;
508 FFmpegDecoder::sample_aspect_ratio_numerator () const
510 return _video_codec_context->sample_aspect_ratio.num;
514 FFmpegDecoder::sample_aspect_ratio_denominator () const
516 return _video_codec_context->sample_aspect_ratio.den;
520 FFmpegDecoder::stream_name (AVStream* s) const
524 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
529 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
531 if (!n.str().empty()) {
537 if (n.str().empty()) {
545 FFmpegDecoder::bytes_per_audio_sample () const
547 return av_get_bytes_per_sample (audio_sample_format ());
551 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
553 AudioDecoder::set_audio_stream (s);
558 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
560 VideoDecoder::set_subtitle_stream (s);
565 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
567 shared_ptr<FilterGraph> graph;
569 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
570 while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
574 if (i == _filter_graphs.end ()) {
575 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
576 _filter_graphs.push_back (graph);
577 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
582 list<shared_ptr<Image> > images = graph->process (frame);
584 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
589 shared_ptr<FFmpegAudioStream>
590 FFmpegAudioStream::create (string t, optional<int> v)
593 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
594 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
600 if (type != "ffmpeg") {
601 return shared_ptr<FFmpegAudioStream> ();
604 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
607 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
615 n >> _id >> channels;
616 _channel_layout = av_get_default_channel_layout (channels);
620 /* Current (marked version 1) */
621 n >> type >> _id >> _sample_rate >> _channel_layout;
622 assert (type == "ffmpeg");
625 for (int i = 0; i < name_index; ++i) {
626 size_t const s = t.find (' ');
627 if (s != string::npos) {
628 t = t.substr (s + 1);
636 FFmpegAudioStream::to_string () const
638 return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);