2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
35 #include <libswscale/swscale.h>
36 #include <libpostproc/postprocess.h>
41 #include "transcoder.h"
45 #include "exceptions.h"
49 #include "ffmpeg_decoder.h"
50 #include "filter_graph.h"
56 using std::stringstream;
58 using boost::shared_ptr;
59 using boost::optional;
60 using boost::dynamic_pointer_cast;
62 FFmpegDecoder::FFmpegDecoder (shared_ptr<Film> f, shared_ptr<const Options> o, Job* j)
64 , VideoDecoder (f, o, j)
65 , AudioDecoder (f, o, j)
69 , _video_codec_context (0)
71 , _audio_codec_context (0)
73 , _subtitle_codec_context (0)
82 FFmpegDecoder::~FFmpegDecoder ()
84 if (_audio_codec_context) {
85 avcodec_close (_audio_codec_context);
88 if (_video_codec_context) {
89 avcodec_close (_video_codec_context);
92 if (_subtitle_codec_context) {
93 avcodec_close (_subtitle_codec_context);
98 avformat_close_input (&_format_context);
102 FFmpegDecoder::setup_general ()
108 if ((r = avformat_open_input (&_format_context, _film->content_path().c_str(), 0, 0)) != 0) {
109 throw OpenFileError (_film->content_path ());
112 if (avformat_find_stream_info (_format_context, 0) < 0) {
113 throw DecodeError ("could not find stream information");
116 /* Find video, audio and subtitle streams and choose the first of each */
118 for (uint32_t i = 0; i < _format_context->nb_streams; ++i) {
119 AVStream* s = _format_context->streams[i];
120 if (s->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
122 } else if (s->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
123 _audio_streams.push_back (
124 shared_ptr<AudioStream> (
125 new FFmpegAudioStream (stream_name (s), i, s->codec->sample_rate, s->codec->channel_layout)
128 } else if (s->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) {
129 _subtitle_streams.push_back (
130 shared_ptr<SubtitleStream> (
131 new SubtitleStream (stream_name (s), i)
137 if (_video_stream < 0) {
138 throw DecodeError ("could not find video stream");
141 _frame = avcodec_alloc_frame ();
143 throw DecodeError ("could not allocate frame");
148 FFmpegDecoder::setup_video ()
150 _video_codec_context = _format_context->streams[_video_stream]->codec;
151 _video_codec = avcodec_find_decoder (_video_codec_context->codec_id);
153 if (_video_codec == 0) {
154 throw DecodeError ("could not find video decoder");
157 if (avcodec_open2 (_video_codec_context, _video_codec, 0) < 0) {
158 throw DecodeError ("could not open video decoder");
163 FFmpegDecoder::setup_audio ()
165 if (!_audio_stream) {
169 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
172 _audio_codec_context = _format_context->streams[ffa->id()]->codec;
173 _audio_codec = avcodec_find_decoder (_audio_codec_context->codec_id);
175 if (_audio_codec == 0) {
176 throw DecodeError ("could not find audio decoder");
179 if (avcodec_open2 (_audio_codec_context, _audio_codec, 0) < 0) {
180 throw DecodeError ("could not open audio decoder");
183 /* This is a hack; sometimes it seems that _audio_codec_context->channel_layout isn't set up,
184 so bodge it here. No idea why we should have to do this.
187 if (_audio_codec_context->channel_layout == 0) {
188 _audio_codec_context->channel_layout = av_get_default_channel_layout (ffa->channels());
193 FFmpegDecoder::setup_subtitle ()
195 if (!_subtitle_stream) {
199 _subtitle_codec_context = _format_context->streams[_subtitle_stream->id()]->codec;
200 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
202 if (_subtitle_codec == 0) {
203 throw DecodeError ("could not find subtitle decoder");
206 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
207 throw DecodeError ("could not open subtitle decoder");
213 FFmpegDecoder::pass ()
215 int r = av_read_frame (_format_context, &_packet);
218 if (r != AVERROR_EOF) {
219 /* Maybe we should fail here, but for now we'll just finish off instead */
221 av_strerror (r, buf, sizeof(buf));
222 _film->log()->log (String::compose ("error on av_read_frame (%1) (%2)", buf, r));
225 /* Get any remaining frames */
230 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
234 while (avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
235 filter_and_emit_video (_frame);
238 if (_audio_stream && _opt->decode_audio) {
239 while (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
240 int const data_size = av_samples_get_buffer_size (
241 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
244 assert (_audio_codec_context->channels == _film->audio_channels());
245 Audio (deinterleave_audio (_frame->data[0], data_size));
252 avcodec_get_frame_defaults (_frame);
254 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
256 if (_packet.stream_index == _video_stream) {
259 int const r = avcodec_decode_video2 (_video_codec_context, _frame, &frame_finished, &_packet);
260 if (r >= 0 && frame_finished) {
262 if (r != _packet.size) {
263 _film->log()->log (String::compose ("Used only %1 bytes of %2 in packet", r, _packet.size));
266 /* Where we are in the output, in seconds */
267 double const out_pts_seconds = video_frame() / frames_per_second();
269 /* Where we are in the source, in seconds */
270 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
271 * av_frame_get_best_effort_timestamp(_frame);
274 String::compose ("Source video frame ready; source at %1, output at %2", source_pts_seconds, out_pts_seconds),
279 _first_video = source_pts_seconds;
282 /* Difference between where we are and where we should be */
283 double const delta = source_pts_seconds - _first_video.get() - out_pts_seconds;
284 double const one_frame = 1 / frames_per_second();
286 /* Insert frames if required to get out_pts_seconds up to pts_seconds */
287 if (delta > one_frame) {
288 int const extra = rint (delta / one_frame);
289 for (int i = 0; i < extra; ++i) {
290 repeat_last_video ();
293 "Extra video frame inserted at %1s; source frame %2, source PTS %3 (at %4 fps)",
294 out_pts_seconds, video_frame(), source_pts_seconds, frames_per_second()
300 if (delta > -one_frame) {
301 /* Process this frame */
302 filter_and_emit_video (_frame);
304 /* Otherwise we are omitting a frame to keep things right */
305 _film->log()->log (String::compose ("Frame removed at %1s", out_pts_seconds));
309 } else if (ffa && _packet.stream_index == ffa->id() && _opt->decode_audio) {
312 if (avcodec_decode_audio4 (_audio_codec_context, _frame, &frame_finished, &_packet) >= 0 && frame_finished) {
314 /* Where we are in the source, in seconds */
315 double const source_pts_seconds = av_q2d (_format_context->streams[_packet.stream_index]->time_base)
316 * av_frame_get_best_effort_timestamp(_frame);
318 /* We only decode audio if we've had our first video packet through, and if it
319 was before this packet. Until then audio is thrown away.
322 if (_first_video && _first_video.get() <= source_pts_seconds) {
325 _first_audio = source_pts_seconds;
327 /* This is our first audio frame, and if we've arrived here we must have had our
328 first video frame. Push some silence to make up any gap between our first
329 video frame and our first audio.
332 /* frames of silence that we must push */
333 int const s = rint ((_first_audio.get() - _first_video.get()) * ffa->sample_rate ());
337 "First video at %1, first audio at %2, pushing %3 audio frames of silence for %4 channels (%5 bytes per sample)",
338 _first_video.get(), _first_audio.get(), s, ffa->channels(), bytes_per_audio_sample()
343 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), s));
344 audio->make_silent ();
349 int const data_size = av_samples_get_buffer_size (
350 0, _audio_codec_context->channels, _frame->nb_samples, audio_sample_format (), 1
353 assert (_audio_codec_context->channels == _film->audio_channels());
354 Audio (deinterleave_audio (_frame->data[0], data_size));
358 } else if (_subtitle_stream && _packet.stream_index == _subtitle_stream->id() && _opt->decode_subtitles && _first_video) {
362 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) && got_subtitle) {
363 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
364 indicate that the previous subtitle should stop.
366 if (sub.num_rects > 0) {
367 shared_ptr<TimedSubtitle> ts;
369 emit_subtitle (shared_ptr<TimedSubtitle> (new TimedSubtitle (sub, _first_video.get())));
371 /* some problem with the subtitle; we probably didn't understand it */
374 emit_subtitle (shared_ptr<TimedSubtitle> ());
376 avsubtitle_free (&sub);
380 av_free_packet (&_packet);
384 shared_ptr<AudioBuffers>
385 FFmpegDecoder::deinterleave_audio (uint8_t* data, int size)
387 assert (_film->audio_channels());
388 assert (bytes_per_audio_sample());
390 shared_ptr<FFmpegAudioStream> ffa = dynamic_pointer_cast<FFmpegAudioStream> (_audio_stream);
393 /* Deinterleave and convert to float */
395 assert ((size % (bytes_per_audio_sample() * ffa->channels())) == 0);
397 int const total_samples = size / bytes_per_audio_sample();
398 int const frames = total_samples / _film->audio_channels();
399 shared_ptr<AudioBuffers> audio (new AudioBuffers (ffa->channels(), frames));
401 switch (audio_sample_format()) {
402 case AV_SAMPLE_FMT_S16:
404 int16_t* p = (int16_t *) data;
407 for (int i = 0; i < total_samples; ++i) {
408 audio->data(channel)[sample] = float(*p++) / (1 << 15);
411 if (channel == _film->audio_channels()) {
419 case AV_SAMPLE_FMT_S32:
421 int32_t* p = (int32_t *) data;
424 for (int i = 0; i < total_samples; ++i) {
425 audio->data(channel)[sample] = float(*p++) / (1 << 31);
428 if (channel == _film->audio_channels()) {
435 case AV_SAMPLE_FMT_FLTP:
437 float* p = reinterpret_cast<float*> (data);
438 for (int i = 0; i < _film->audio_channels(); ++i) {
439 memcpy (audio->data(i), p, frames * sizeof(float));
453 FFmpegDecoder::frames_per_second () const
455 AVStream* s = _format_context->streams[_video_stream];
457 if (s->avg_frame_rate.num && s->avg_frame_rate.den) {
458 return av_q2d (s->avg_frame_rate);
461 return av_q2d (s->r_frame_rate);
465 FFmpegDecoder::audio_sample_format () const
467 if (_audio_codec_context == 0) {
468 return (AVSampleFormat) 0;
471 return _audio_codec_context->sample_fmt;
475 FFmpegDecoder::native_size () const
477 return Size (_video_codec_context->width, _video_codec_context->height);
481 FFmpegDecoder::pixel_format () const
483 return _video_codec_context->pix_fmt;
487 FFmpegDecoder::time_base_numerator () const
489 return _video_codec_context->time_base.num;
493 FFmpegDecoder::time_base_denominator () const
495 return _video_codec_context->time_base.den;
499 FFmpegDecoder::sample_aspect_ratio_numerator () const
501 return _video_codec_context->sample_aspect_ratio.num;
505 FFmpegDecoder::sample_aspect_ratio_denominator () const
507 return _video_codec_context->sample_aspect_ratio.den;
511 FFmpegDecoder::stream_name (AVStream* s) const
515 AVDictionaryEntry const * lang = av_dict_get (s->metadata, "language", 0, 0);
520 AVDictionaryEntry const * title = av_dict_get (s->metadata, "title", 0, 0);
522 if (!n.str().empty()) {
528 if (n.str().empty()) {
536 FFmpegDecoder::bytes_per_audio_sample () const
538 return av_get_bytes_per_sample (audio_sample_format ());
542 FFmpegDecoder::set_audio_stream (shared_ptr<AudioStream> s)
544 AudioDecoder::set_audio_stream (s);
549 FFmpegDecoder::set_subtitle_stream (shared_ptr<SubtitleStream> s)
551 VideoDecoder::set_subtitle_stream (s);
556 FFmpegDecoder::filter_and_emit_video (AVFrame* frame)
558 shared_ptr<FilterGraph> graph;
560 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
561 while (i != _filter_graphs.end() && !(*i)->can_process (Size (frame->width, frame->height), (AVPixelFormat) frame->format)) {
565 if (i == _filter_graphs.end ()) {
566 graph.reset (new FilterGraph (_film, this, _opt->apply_crop, Size (frame->width, frame->height), (AVPixelFormat) frame->format));
567 _filter_graphs.push_back (graph);
568 _film->log()->log (String::compose ("New graph for %1x%2, pixel format %3", frame->width, frame->height, frame->format));
573 list<shared_ptr<Image> > images = graph->process (frame);
575 for (list<shared_ptr<Image> >::iterator i = images.begin(); i != images.end(); ++i) {
580 shared_ptr<FFmpegAudioStream>
581 FFmpegAudioStream::create (string t, optional<int> v)
584 /* version < 1; no type in the string, and there's only FFmpeg streams anyway */
585 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
591 if (type != "ffmpeg") {
592 return shared_ptr<FFmpegAudioStream> ();
595 return shared_ptr<FFmpegAudioStream> (new FFmpegAudioStream (t, v));
598 FFmpegAudioStream::FFmpegAudioStream (string t, optional<int> version)
606 n >> _id >> channels;
607 _channel_layout = av_get_default_channel_layout (channels);
611 /* Current (marked version 1) */
612 n >> type >> _id >> _sample_rate >> _channel_layout;
613 assert (type == "ffmpeg");
616 for (int i = 0; i < name_index; ++i) {
617 size_t const s = t.find (' ');
618 if (s != string::npos) {
619 t = t.substr (s + 1);
627 FFmpegAudioStream::to_string () const
629 return String::compose ("ffmpeg %1 %2 %3 %4", _id, _sample_rate, _channel_layout, _name);
632 /** @return Length (in video frames) according to our content's header */
634 FFmpegDecoder::length () const
636 return (double(_format_context->duration) / AV_TIME_BASE) * frames_per_second();