2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
55 using std::stringstream;
56 using boost::shared_ptr;
57 using boost::optional;
59 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
64 , _video_codec_context (0)
66 , _audio_codec_context (0)
68 , _subtitle_codec_context (0)
77 FFmpegDecoder::~FFmpegDecoder ()
79 if (_audio_codec_context) {
80 avcodec_close (_audio_codec_context);
83 if (_video_codec_context) {
84 avcodec_close (_video_codec_context);
87 if (_subtitle_codec_context) {
88 avcodec_close (_subtitle_codec_context);
93 avformat_close_input (&_format_context);
97 FFmpegDecoder::setup_general ()
103 if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
104 throw OpenFileError (_film->content_path ());
107 if (avformat_find_stream_info (_format_context, 0) < 0) {
108 throw DecodeError ("could not find stream information");
111 /* Find video, audio and subtitle streams and choose the first of each */
113 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
114 AVStream* s = _format_context->streams[i];
115 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
117 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
118 _audio_streams.push_back (AudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout));
119 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
120 _subtitle_streams.push_back (SubtitleStream (stream_name (s), i));
124 if (_video_stream < 0) {
125 throw DecodeError ("could not find video stream");
128 _frame = avcodec_alloc_frame ();
130 throw DecodeError ("could not allocate frame");
135 FFmpegDecoder::setup_video ()
137 _video_codec_context = _format_context->streams[_video_stream]->codec;
138 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
140 if (_video_codec == 0) {
141 throw DecodeError ("could not find video decoder");
144 /* I think this prevents problems with green hash on decodes and
145 "changing frame properties on the fly is not supported by all filters"
146 messages with some content. Although I'm not sure; needs checking.
148 AVDictionary* opts = 0;
149 av_dict_set (&opts, "threads", "1", 0);
151 if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
152 throw DecodeError ("could not open video decoder");
157 FFmpegDecoder::setup_audio ()
159 if (!_audio_stream) {
163 _audio_codec_context = _format_context->streams[_audio_stream.get().id()]->codec;
164 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
166 if (_audio_codec == 0) {
167 throw DecodeError ("could not find audio decoder");
170 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
171 throw DecodeError ("could not open audio decoder");
174 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
175 so bodge it here. No idea why we should have to do this.
178 if (_audio_codec_context->channel_layout == 0) {
179 _audio_codec_context->channel_layout = av_get_default_channel_layout (_audio_stream.get().channels());
184 FFmpegDecoder::setup_subtitle ()
186 if (!_subtitle_stream) {
190 _subtitle_codec_context = _format_context->streams[_subtitle_stream.get().id()]->codec;
191 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
193 if (_subtitle_codec == 0) {
194 throw DecodeError ("could not find subtitle decoder");
197 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
198 throw DecodeError ("could not open subtitle decoder");
204 FFmpegDecoder::pass ()
206 int r = av_read_frame (_format_context, &_packet);
209 if (r != AVERROR_EOF) {
210 /* Maybe we should fail here, but for now we'll just finish off instead */
212 av_strerror (r, buf, sizeof(buf));
213 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
216 /* Get any remaining frames */
221 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
225 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
226 process_video (_frame);
229 if (_audio_stream && _opt->decode_audio && _film->use_content_audio()) {
230 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
231 int const data_size = av_samples_get_buffer_size (
232 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
235 assert (_audio_codec_context->channels == _film->audio_channels());
236 process_audio (deinterleave_audio (_frame->data[0], data_size));
243 avcodec_get_frame_defaults (_frame);
245 if (_packet.stream_index == _video_stream) {
248 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
249 if (r >= 0 && frame_finished) {
251 if (r != _packet.size) {
252 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
255 /* Where we are in the output, in seconds */
256 double const out_pts_seconds = video_frame() / frames_per_second();
258 /* Where we are in the source, in seconds */
259 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
260 * av_frame_get_best_effort_timestamp(_frame);
263 _first_video = source_pts_seconds;
266 /* Difference between where we are and where we should be */
267 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
268 double const one_frame = 1 / frames_per_second();
270 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
271 if (delta > one_frame) {
272 int const extra = rint (delta / one_frame);
273 for (int i = 0; i < extra; ++i) {
274 repeat_last_video ();
277 "Extra frame inserted at %1s; source frame %2, source PTS %3",
278 out_pts_seconds, video_frame(), source_pts_seconds
284 if (delta > -one_frame) {
285 /* Process this frame */
286 process_video (_frame);
288 /* Otherwise we are omitting a frame to keep things right */
289 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
293 } else if (_audio_stream && _packet.stream_index == _audio_stream.get().id() && _opt->decode_audio && _film->use_content_audio()) {
296 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
298 /* Where we are in the source, in seconds */
299 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
300 * av_frame_get_best_effort_timestamp(_frame);
302 /* We only decode audio if we've had our first video packet through, and if it
303 was before this packet. Until then audio is thrown away.
306 if (_first_video && _first_video.get() <= source_pts_seconds) {
309 _first_audio = source_pts_seconds;
311 /* This is our first audio frame, and if we've arrived here we must have had our
312 first video frame. Push some silence to make up any gap between our first
313 video frame and our first audio.
316 /* frames of silence that we must push */
317 int const s = rint ((_first_audio.get() - _first_video.get()) * _audio_stream.get().sample_rate ());
321 "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
322 _first_video.get(), _first_audio.get(), s, _audio_stream.get().channels(), bytes_per_audio_sample()
327 shared_ptr<AudioBuffers> audio (new AudioBuffers (_audio_stream.get().channels(), s));
328 audio->make_silent ();
329 process_audio (audio);
333 int const data_size = av_samples_get_buffer_size (
334 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
337 assert (_audio_codec_context->channels == _film->audio_channels());
338 process_audio (deinterleave_audio (_frame->data[0], data_size));
342 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream.get().id() && _opt->decode_subtitles && _first_video) {
346 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
347 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
348 indicate that the previous subtitle should stop.
350 if (sub.num_rects > 0) {
351 process_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
353 process_subtitle (shared_ptr<TimedSubtitle> ());
355 avsubtitle_free (&sub);
359 av_free_packet (&_packet);
363 shared_ptr<AudioBuffers>
364 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
366 assert (_film->audio_channels());
367 assert (bytes_per_audio_sample());
369 /* Deinterleave and convert to float */
371 assert ((size % (bytes_per_audio_sample() * _audio_stream.get().channels())) == 0);
373 int const total_samples = size / bytes_per_audio_sample();
374 int const frames = total_samples / _film->audio_channels();
375 shared_ptr<AudioBuffers> audio (new AudioBuffers (_audio_stream.get().channels(), frames));
377 switch (audio_sample_format()) {
378 case AV_SAMPLE_FMT_S16:
380 int16_t* p = (int16_t *) data;
383 for (int i = 0; i < total_samples; ++i) {
384 audio->data(channel)[sample] = float(*p++) / (1 << 15);
387 if (channel == _film->audio_channels()) {
395 case AV_SAMPLE_FMT_S32:
397 int32_t* p = (int32_t *) data;
400 for (int i = 0; i < total_samples; ++i) {
401 audio->data(channel)[sample] = float(*p++) / (1 << 31);
404 if (channel == _film->audio_channels()) {
411 case AV_SAMPLE_FMT_FLTP:
413 float* p = reinterpret_cast<float*> (data);
414 for (int i = 0; i < _film->audio_channels(); ++i) {
415 memcpy (audio->data(i), p, frames * sizeof(float));
429 FFmpegDecoder::frames_per_second () const
431 AVStream* s = _format_context->streams[_video_stream];
433 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
434 return av_q2d (s->avg_frame_rate);
437 return av_q2d (s->r_frame_rate);
441 FFmpegDecoder::audio_sample_format () const
443 if (_audio_codec_context == 0) {
444 return (AVSampleFormat) 0;
447 return _audio_codec_context->sample_fmt;
451 FFmpegDecoder::native_size () const
453 return Size (_video_codec_context->width, _video_codec_context->height);
457 FFmpegDecoder::pixel_format () const
459 return _video_codec_context->pix_fmt;
463 FFmpegDecoder::time_base_numerator () const
465 return _video_codec_context->time_base.num;
469 FFmpegDecoder::time_base_denominator () const
471 return _video_codec_context->time_base.den;
475 FFmpegDecoder::sample_aspect_ratio_numerator () const
477 return _video_codec_context->sample_aspect_ratio.num;
481 FFmpegDecoder::sample_aspect_ratio_denominator () const
483 return _video_codec_context->sample_aspect_ratio.den;
487 FFmpegDecoder::stream_name (AVStream* s) const
491 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
496 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
498 if (!n.str().empty()) {
504 if (n.str().empty()) {
512 FFmpegDecoder::bytes_per_audio_sample () const
514 return av_get_bytes_per_sample (audio_sample_format ());
518 FFmpegDecoder::set_audio_stream (optional<AudioStream> s)
520 Decoder::set_audio_stream (s);
525 FFmpegDecoder::set_subtitle_stream (optional<SubtitleStream> s)
527 Decoder::set_subtitle_stream (s);