2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
56 using std::stringstream;
58 using boost::shared_ptr;
59 using boost::optional;
60 using boost::dynamic_pointer_cast;
62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
64 , VideoDecoder (f, o, j)
65 , AudioDecoder (f, o, j)
69 , _video_codec_context (0)
71 , _audio_codec_context (0)
73 , _subtitle_codec_context (0)
82 FFmpegDecoder::~FFmpegDecoder ()
84 if (_audio_codec_context) {
85 avcodec_close (_audio_codec_context);
88 if (_video_codec_context) {
89 avcodec_close (_video_codec_context);
92 if (_subtitle_codec_context) {
93 avcodec_close (_subtitle_codec_context);
98 avformat_close_input (&_format_context);
102 FFmpegDecoder::setup_general ()
108 if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
109 throw OpenFileError (_film->content_path ());
112 if (avformat_find_stream_info (_format_context, 0) < 0) {
113 throw DecodeError ("could not find stream information");
116 /* Find video, audio and subtitle streams and choose the first of each */
118 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
119 AVStream* s = _format_context->streams[i];
120 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
122 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
123 _audio_streams.push_back (
124 shared_ptr<AudioStream> (
125 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
128 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
129 _subtitle_streams.push_back (
130 shared_ptr<SubtitleStream> (
131 new SubtitleStream (stream_name (s), i)
137 if (_video_stream < 0) {
138 throw DecodeError ("could not find video stream");
141 _frame = avcodec_alloc_frame ();
143 throw DecodeError ("could not allocate frame");
148 FFmpegDecoder::setup_video ()
150 _video_codec_context = _format_context->streams[_video_stream]->codec;
151 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
153 if (_video_codec == 0) {
154 throw DecodeError ("could not find video decoder");
157 /* I think this prevents problems with green hash on decodes and
158 "changing frame properties on the fly is not supported by all filters"
159 messages with some content. Although I'm not sure; needs checking.
161 AVDictionary* opts = 0;
162 av_dict_set (&opts, "threads", "1", 0);
164 if (avcodec_open2 (_video_codec_context, _video_codec, &opts) < 0) {
165 throw DecodeError ("could not open video decoder");
170 FFmpegDecoder::setup_audio ()
172 if (!_audio_stream) {
176 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
179 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
180 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
182 if (_audio_codec == 0) {
183 throw DecodeError ("could not find audio decoder");
186 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
187 throw DecodeError ("could not open audio decoder");
190 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
191 so bodge it here. No idea why we should have to do this.
194 if (_audio_codec_context->channel_layout == 0) {
195 _audio_codec_context->channel_layout = av_get_default_channel_layout (ffa->channels());
200 FFmpegDecoder::setup_subtitle ()
202 if (!_subtitle_stream) {
206 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
207 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
209 if (_subtitle_codec == 0) {
210 throw DecodeError ("could not find subtitle decoder");
213 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
214 throw DecodeError ("could not open subtitle decoder");
220 FFmpegDecoder::pass ()
222 int r = av_read_frame (_format_context, &_packet);
225 if (r != AVERROR_EOF) {
226 /* Maybe we should fail here, but for now we'll just finish off instead */
228 av_strerror (r, buf, sizeof(buf));
229 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
232 /* Get any remaining frames */
237 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
241 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
242 filter_and_emit_video (_frame);
245 if (_audio_stream && _opt->decode_audio) {
246 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
247 int const data_size = av_samples_get_buffer_size (
248 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
251 assert (_audio_codec_context->channels == _film->audio_channels());
252 Audio (deinterleave_audio (_frame->data[0], data_size));
259 avcodec_get_frame_defaults (_frame);
261 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
263 if (_packet.stream_index == _video_stream) {
266 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
267 if (r >= 0 && frame_finished) {
269 if (r != _packet.size) {
270 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
273 /* Where we are in the output, in seconds */
274 double const out_pts_seconds = video_frame() / frames_per_second();
276 /* Where we are in the source, in seconds */
277 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
278 * av_frame_get_best_effort_timestamp(_frame);
281 _first_video = source_pts_seconds;
284 /* Difference between where we are and where we should be */
285 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
286 double const one_frame = 1 / frames_per_second();
288 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
289 if (delta > one_frame) {
290 int const extra = rint (delta / one_frame);
291 for (int i = 0; i < extra; ++i) {
292 repeat_last_video ();
295 "Extra frame inserted at %1s; source frame %2, source PTS %3",
296 out_pts_seconds, video_frame(), source_pts_seconds
302 if (delta > -one_frame) {
303 /* Process this frame */
304 filter_and_emit_video (_frame);
306 /* Otherwise we are omitting a frame to keep things right */
307 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
311 } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
314 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
316 /* Where we are in the source, in seconds */
317 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
318 * av_frame_get_best_effort_timestamp(_frame);
320 /* We only decode audio if we've had our first video packet through, and if it
321 was before this packet. Until then audio is thrown away.
324 if (_first_video && _first_video.get() <= source_pts_seconds) {
327 _first_audio = source_pts_seconds;
329 /* This is our first audio frame, and if we've arrived here we must have had our
330 first video frame. Push some silence to make up any gap between our first
331 video frame and our first audio.
334 /* frames of silence that we must push */
335 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
339 "First video at %1, first audio at %2, pushing %3 frames of silence for %4 channels (%5 bytes per sample)",
340 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
345 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
346 audio->make_silent ();
351 int const data_size = av_samples_get_buffer_size (
352 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
355 assert (_audio_codec_context->channels == _film->audio_channels());
356 Audio (deinterleave_audio (_frame->data[0], data_size));
360 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
364 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
365 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
366 indicate that the previous subtitle should stop.
368 if (sub.num_rects > 0) {
369 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
371 emit_subtitle (shared_ptr<TimedSubtitle> ());
373 avsubtitle_free (&sub);
377 av_free_packet (&_packet);
381 shared_ptr<AudioBuffers>
382 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
384 assert (_film->audio_channels());
385 assert (bytes_per_audio_sample());
387 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
390 /* Deinterleave and convert to float */
392 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
394 int const total_samples = size / bytes_per_audio_sample();
395 int const frames = total_samples / _film->audio_channels();
396 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
398 switch (audio_sample_format()) {
399 case AV_SAMPLE_FMT_S16:
401 int16_t* p = (int16_t *) data;
404 for (int i = 0; i < total_samples; ++i) {
405 audio->data(channel)[sample] = float(*p++) / (1 << 15);
408 if (channel == _film->audio_channels()) {
416 case AV_SAMPLE_FMT_S32:
418 int32_t* p = (int32_t *) data;
421 for (int i = 0; i < total_samples; ++i) {
422 audio->data(channel)[sample] = float(*p++) / (1 << 31);
425 if (channel == _film->audio_channels()) {
432 case AV_SAMPLE_FMT_FLTP:
434 float* p = reinterpret_cast<float*> (data);
435 for (int i = 0; i < _film->audio_channels(); ++i) {
436 memcpy (audio->data(i), p, frames * sizeof(float));
450 FFmpegDecoder::frames_per_second () const
452 AVStream* s = _format_context->streams[_video_stream];
454 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
455 return av_q2d (s->avg_frame_rate);
458 return av_q2d (s->r_frame_rate);
462 FFmpegDecoder::audio_sample_format () const
464 if (_audio_codec_context == 0) {
465 return (AVSampleFormat) 0;
468 return _audio_codec_context->sample_fmt;
472 FFmpegDecoder::native_size () const
474 return Size (_video_codec_context->width, _video_codec_context->height);
478 FFmpegDecoder::pixel_format () const
480 return _video_codec_context->pix_fmt;
484 FFmpegDecoder::time_base_numerator () const
486 return _video_codec_context->time_base.num;
490 FFmpegDecoder::time_base_denominator () const
492 return _video_codec_context->time_base.den;
496 FFmpegDecoder::sample_aspect_ratio_numerator () const
498 return _video_codec_context->sample_aspect_ratio.num;
502 FFmpegDecoder::sample_aspect_ratio_denominator () const
504 return _video_codec_context->sample_aspect_ratio.den;
508 FFmpegDecoder::stream_name (AVStream* s) const
512 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
517 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
519 if (!n.str().empty()) {
525 if (n.str().empty()) {
533 FFmpegDecoder::bytes_per_audio_sample () const
535 return av_get_bytes_per_sample (audio_sample_format ());
539 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
541 AudioDecoder::set_audio_stream (s);
546 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
548 VideoDecoder::set_subtitle_stream (s);
553 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
555 shared_ptr<FilterGraph> graph;
557 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
558 while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
562 if (i == _filter_graphs.end ()) {
563 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
564 _filter_graphs.push_back (graph);
565 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
570 list<shared_ptr<Image> > images = graph->process (frame);
572 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
577 shared_ptr<FFmpegAudioStream>
578 FFmpegAudioStream::create (string t, optional<int> v)
581 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
582 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
588 if (type != "ffmpeg") {
589 return shared_ptr<FFmpegAudioStream> ();
592 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
595 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
603 n >> _id >> channels;
604 _channel_layout = av_get_default_channel_layout (channels);
608 /* Current (marked version 1) */
609 n >> type >> _id >> _sample_rate >> _channel_layout;
610 assert (type == "ffmpeg");
613 for (int i = 0; i < name_index; ++i) {
614 size_t const s = t.find (' ');
615 if (s != string::npos) {
616 t = t.substr (s + 1);
624 FFmpegAudioStream::to_string () const
626 return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);