2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
56 using std::stringstream;
58 using boost::shared_ptr;
59 using boost::optional;
61 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
66 , _video_codec_context (0)
68 , _audio_codec_context (0)
70 , _subtitle_codec_context (0)
79 FFmpegDecoder::~FFmpegDecoder ()
81 if (_audio_codec_context) {
82 avcodec_close (_audio_codec_context);
85 if (_video_codec_context) {
86 avcodec_close (_video_codec_context);
89 if (_subtitle_codec_context) {
90 avcodec_close (_subtitle_codec_context);
95 avformat_close_input (&_format_context);
99 FFmpegDecoder::setup_general ()
105 if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
106 throw OpenFileError (_film->content_path ());
109 if (avformat_find_stream_info (_format_context, 0) < 0) {
110 throw DecodeError ("could not find stream information");
113 /* Find video, audio and subtitle streams and choose the first of each */
115 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
116 AVStream* s = _format_context->streams[i];
117 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
119 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
120 _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout));
121 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
122 _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
126 if (_video_stream < 0) {
127 throw DecodeError ("could not find video stream");
130 _frame = avcodec_alloc_frame ();
132 throw DecodeError ("could not allocate frame");
137 FFmpegDecoder::setup_video ()
139 _video_codec_context = _format_context->streams[_video_stream]->codec;
140 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
142 if (_video_codec == 0) {
143 throw DecodeError ("could not find video decoder");
146 /* I think this prevents problems with green hash on decodes and
147 "changing frame properties on the fly is not supported by all filters"
148 messages with some content. Although I'm not sure; needs checking.
150 AVDictionary* opts = 0;
151 av_dict_set (&opts, "threads", "1", 0);
153 if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
154 throw DecodeError ("could not open video decoder");
159 FFmpegDecoder::setup_audio ()
161 if (!_audio_stream) {
165 _audio_codec_context = _format_context->streams[_audio_stream.get().id()]->codec;
166 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
168 if (_audio_codec == 0) {
169 throw DecodeError ("could not find audio decoder");
172 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
173 throw DecodeError ("could not open audio decoder");
176 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
177 so bodge it here. No idea why we should have to do this.
180 if (_audio_codec_context->channel_layout == 0) {
181 _audio_codec_context->channel_layout = av_get_default_channel_layout (_audio_stream.get().channels());
186 FFmpegDecoder::setup_subtitle ()
188 if (!_subtitle_stream) {
192 _subtitle_codec_context = _format_context->streams[_subtitle_stream.get().id()]->codec;
193 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
195 if (_subtitle_codec == 0) {
196 throw DecodeError ("could not find subtitle decoder");
199 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
200 throw DecodeError ("could not open subtitle decoder");
206 FFmpegDecoder::pass ()
208 int r = av_read_frame (_format_context, &_packet);
211 if (r != AVERROR_EOF) {
212 /* Maybe we should fail here, but for now we'll just finish off instead */
214 av_strerror (r, buf, sizeof(buf));
215 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
218 /* Get any remaining frames */
223 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
227 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
228 filter_and_emit_video (_frame);
231 if (_audio_stream && _opt->decode_audio && _film->use_content_audio()) {
232 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
233 int const data_size = av_samples_get_buffer_size (
234 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
237 assert (_audio_codec_context->channels == _film->audio_channels());
238 Audio (deinterleave_audio (_frame->data[0], data_size));
245 avcodec_get_frame_defaults (_frame);
247 if (_packet.stream_index == _video_stream) {
250 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
251 if (r >= 0 && frame_finished) {
253 if (r != _packet.size) {
254 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
257 /* Where we are in the output, in seconds */
258 double const out_pts_seconds = video_frame() / frames_per_second();
260 /* Where we are in the source, in seconds */
261 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
262 * av_frame_get_best_effort_timestamp(_frame);
265 _first_video = source_pts_seconds;
268 /* Difference between where we are and where we should be */
269 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
270 double const one_frame = 1 / frames_per_second();
272 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
273 if (delta > one_frame) {
274 int const extra = rint (delta / one_frame);
275 for (int i = 0; i < extra; ++i) {
276 repeat_last_video ();
279 "Extra frame inserted at %1s; source frame %2, source PTS %3",
280 out_pts_seconds, video_frame(), source_pts_seconds
286 if (delta > -one_frame) {
287 /* Process this frame */
288 filter_and_emit_video (_frame);
290 /* Otherwise we are omitting a frame to keep things right */
291 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
295 } else if (_audio_stream && _packet.stream_index == _audio_stream.get().id() && _opt->decode_audio && _film->use_content_audio()) {
298 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
300 /* Where we are in the source, in seconds */
301 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
302 * av_frame_get_best_effort_timestamp(_frame);
304 /* We only decode audio if we've had our first video packet through, and if it
305 was before this packet. Until then audio is thrown away.
308 if (_first_video && _first_video.get() <= source_pts_seconds) {
311 _first_audio = source_pts_seconds;
313 /* This is our first audio frame, and if we've arrived here we must have had our
314 first video frame. Push some silence to make up any gap between our first
315 video frame and our first audio.
318 /* frames of silence that we must push */
319 int const s = rint ((_first_audio.get() - _first_video.get()) * _audio_stream.get().sample_rate ());
323 "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
324 _first_video.get(), _first_audio.get(), s, _audio_stream.get().channels(), bytes_per_audio_sample()
329 shared_ptr<AudioBuffers> audio (new AudioBuffers (_audio_stream.get().channels(), s));
330 audio->make_silent ();
335 int const data_size = av_samples_get_buffer_size (
336 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
339 assert (_audio_codec_context->channels == _film->audio_channels());
340 Audio (deinterleave_audio (_frame->data[0], data_size));
344 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream.get().id() && _opt->decode_subtitles && _first_video) {
348 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
349 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
350 indicate that the previous subtitle should stop.
352 if (sub.num_rects > 0) {
353 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
355 emit_subtitle (shared_ptr<TimedSubtitle> ());
357 avsubtitle_free (&sub);
361 av_free_packet (&_packet);
365 shared_ptr<AudioBuffers>
366 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
368 assert (_film->audio_channels());
369 assert (bytes_per_audio_sample());
371 /* Deinterleave and convert to float */
373 assert ((size % (bytes_per_audio_sample() * _audio_stream.get().channels())) == 0);
375 int const total_samples = size / bytes_per_audio_sample();
376 int const frames = total_samples / _film->audio_channels();
377 shared_ptr<AudioBuffers> audio (new AudioBuffers (_audio_stream.get().channels(), frames));
379 switch (audio_sample_format()) {
380 case AV_SAMPLE_FMT_S16:
382 int16_t* p = (int16_t *) data;
385 for (int i = 0; i < total_samples; ++i) {
386 audio->data(channel)[sample] = float(*p++) / (1 << 15);
389 if (channel == _film->audio_channels()) {
397 case AV_SAMPLE_FMT_S32:
399 int32_t* p = (int32_t *) data;
402 for (int i = 0; i < total_samples; ++i) {
403 audio->data(channel)[sample] = float(*p++) / (1 << 31);
406 if (channel == _film->audio_channels()) {
413 case AV_SAMPLE_FMT_FLTP:
415 float* p = reinterpret_cast<float*> (data);
416 for (int i = 0; i < _film->audio_channels(); ++i) {
417 memcpy (audio->data(i), p, frames * sizeof(float));
431 FFmpegDecoder::frames_per_second () const
433 AVStream* s = _format_context->streams[_video_stream];
435 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
436 return av_q2d (s->avg_frame_rate);
439 return av_q2d (s->r_frame_rate);
443 FFmpegDecoder::audio_sample_format () const
445 if (_audio_codec_context == 0) {
446 return (AVSampleFormat) 0;
449 return _audio_codec_context->sample_fmt;
453 FFmpegDecoder::native_size () const
455 return Size (_video_codec_context->width, _video_codec_context->height);
459 FFmpegDecoder::pixel_format () const
461 return _video_codec_context->pix_fmt;
465 FFmpegDecoder::time_base_numerator () const
467 return _video_codec_context->time_base.num;
471 FFmpegDecoder::time_base_denominator () const
473 return _video_codec_context->time_base.den;
477 FFmpegDecoder::sample_aspect_ratio_numerator () const
479 return _video_codec_context->sample_aspect_ratio.num;
483 FFmpegDecoder::sample_aspect_ratio_denominator () const
485 return _video_codec_context->sample_aspect_ratio.den;
489 FFmpegDecoder::stream_name (AVStream* s) const
493 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
498 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
500 if (!n.str().empty()) {
506 if (n.str().empty()) {
514 FFmpegDecoder::bytes_per_audio_sample () const
516 return av_get_bytes_per_sample (audio_sample_format ());
520 FFmpegDecoder::set_audio_stream (optional<AudioStream> s)
522 Decoder::set_audio_stream (s);
527 FFmpegDecoder::set_subtitle_stream (optional<SubtitleStream> s)
529 Decoder::set_subtitle_stream (s);
534 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
536 shared_ptr<FilterGraph> graph;
538 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
539 while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
543 if (i == _filter_graphs.end ()) {
544 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
545 _filter_graphs.push_back (graph);
546 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
551 list<shared_ptr<Image> > images = graph->process (frame);
553 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {