2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
37 #include "exceptions.h"
41 #include "ffmpeg_decoder.h"
42 #include "filter_graph.h"
43 #include "audio_buffers.h"
44 #include "ffmpeg_content.h"
51 using std::stringstream;
55 using boost::shared_ptr;
56 using boost::optional;
57 using boost::dynamic_pointer_cast;
60 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log, bool video, bool audio, bool subtitles)
65 , _subtitle_codec_context (0)
67 , _decode_video (video)
68 , _decode_audio (audio)
69 , _decode_subtitles (subtitles)
74 /* Audio and video frame PTS values may not start with 0. We want
75 to fiddle them so that:
77 1. One of them starts at time 0.
78 2. The first video PTS value ends up on a frame boundary.
80 Then we remove big initial gaps in PTS and we allow our
81 insertion of black frames to work.
83 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
86 bool const have_video = video && c->first_video();
87 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
89 /* First, make one of them start at 0 */
91 if (have_audio && have_video) {
92 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
93 } else if (have_video) {
94 _pts_offset = - c->first_video().get();
95 } else if (have_audio) {
96 _pts_offset = - c->audio_stream()->first_audio.get();
99 /* Now adjust both so that the video pts starts on a frame */
100 if (have_video && have_audio) {
101 ContentTime first_video = c->first_video().get() + _pts_offset;
102 ContentTime const old_first_video = first_video;
103 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
107 FFmpegDecoder::~FFmpegDecoder ()
109 boost::mutex::scoped_lock lm (_mutex);
111 if (_subtitle_codec_context) {
112 avcodec_close (_subtitle_codec_context);
117 FFmpegDecoder::flush ()
119 /* Get any remaining frames */
124 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
127 while (decode_video_packet ()) {}
130 if (_ffmpeg_content->audio_stream() && _decode_audio) {
131 decode_audio_packet ();
132 AudioDecoder::flush ();
137 FFmpegDecoder::pass ()
139 int r = av_read_frame (_format_context, &_packet);
142 if (r != AVERROR_EOF) {
143 /* Maybe we should fail here, but for now we'll just finish off instead */
145 av_strerror (r, buf, sizeof(buf));
146 _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
153 int const si = _packet.stream_index;
155 if (si == _video_stream && _decode_video) {
156 decode_video_packet ();
157 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si) && _decode_audio) {
158 decode_audio_packet ();
159 } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si) && _decode_subtitles) {
160 decode_subtitle_packet ();
163 av_free_packet (&_packet);
167 /** @param data pointer to array of pointers to buffers.
168 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
170 shared_ptr<AudioBuffers>
171 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
173 assert (_ffmpeg_content->audio_channels());
174 assert (bytes_per_audio_sample());
176 /* Deinterleave and convert to float */
178 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
180 int const total_samples = size / bytes_per_audio_sample();
181 int const frames = total_samples / _ffmpeg_content->audio_channels();
182 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
184 switch (audio_sample_format()) {
185 case AV_SAMPLE_FMT_U8:
187 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
190 for (int i = 0; i < total_samples; ++i) {
191 audio->data(channel)[sample] = float(*p++) / (1 << 23);
194 if (channel == _ffmpeg_content->audio_channels()) {
202 case AV_SAMPLE_FMT_S16:
204 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
207 for (int i = 0; i < total_samples; ++i) {
208 audio->data(channel)[sample] = float(*p++) / (1 << 15);
211 if (channel == _ffmpeg_content->audio_channels()) {
219 case AV_SAMPLE_FMT_S16P:
221 int16_t** p = reinterpret_cast<int16_t **> (data);
222 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
223 for (int j = 0; j < frames; ++j) {
224 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
230 case AV_SAMPLE_FMT_S32:
232 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
235 for (int i = 0; i < total_samples; ++i) {
236 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
239 if (channel == _ffmpeg_content->audio_channels()) {
247 case AV_SAMPLE_FMT_FLT:
249 float* p = reinterpret_cast<float*> (data[0]);
252 for (int i = 0; i < total_samples; ++i) {
253 audio->data(channel)[sample] = *p++;
256 if (channel == _ffmpeg_content->audio_channels()) {
264 case AV_SAMPLE_FMT_FLTP:
266 float** p = reinterpret_cast<float**> (data);
267 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
268 memcpy (audio->data(i), p[i], frames * sizeof(float));
274 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
281 FFmpegDecoder::audio_sample_format () const
283 if (!_ffmpeg_content->audio_stream()) {
284 return (AVSampleFormat) 0;
287 return audio_codec_context()->sample_fmt;
291 FFmpegDecoder::bytes_per_audio_sample () const
293 return av_get_bytes_per_sample (audio_sample_format ());
297 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
300 optional<ContentTime> last_video;
301 optional<ContentTime> last_audio;
303 while (!finished (last_video, last_audio, frames_read)) {
304 int r = av_read_frame (_format_context, &_packet);
306 /* We should flush our decoders here, possibly yielding a few more frames,
307 but the consequence of having to do that is too hideous to contemplate.
308 Instead we give up and say that you can't seek too close to the end
316 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
318 if (_packet.stream_index == _video_stream) {
320 avcodec_get_frame_defaults (_frame);
323 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
324 if (r >= 0 && finished) {
325 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
328 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
329 AVPacket copy_packet = _packet;
330 while (copy_packet.size > 0) {
333 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
334 if (r >= 0 && finished) {
335 last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
338 copy_packet.data += r;
339 copy_packet.size -= r;
343 av_free_packet (&_packet);
350 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
352 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
356 FFmpegDecoder::seek_final_finished (int n, int done) const
362 FFmpegDecoder::seek_and_flush (ContentTime t)
364 ContentTime const u = t - _pts_offset;
365 int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
367 if (_ffmpeg_content->audio_stream ()) {
369 s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
373 /* Ridiculous empirical hack */
379 av_seek_frame (_format_context, _video_stream, s, 0);
381 avcodec_flush_buffers (video_codec_context());
382 if (audio_codec_context ()) {
383 avcodec_flush_buffers (audio_codec_context ());
385 if (_subtitle_codec_context) {
386 avcodec_flush_buffers (_subtitle_codec_context);
391 FFmpegDecoder::seek (ContentTime time, bool accurate)
393 Decoder::seek (time, accurate);
395 AudioDecoder::seek (time, accurate);
398 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
399 a number plucked from the air) earlier than we want to end up. The loop below
400 will hopefully then step through to where we want to be.
403 ContentTime pre_roll = accurate ? ContentTime::from_seconds (0.2) : ContentTime (0);
404 ContentTime initial_seek = time - pre_roll;
405 if (initial_seek < ContentTime (0)) {
406 initial_seek = ContentTime (0);
409 /* Initial seek time in the video stream's timebase */
411 seek_and_flush (initial_seek);
418 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
420 seek_and_flush (initial_seek);
422 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
427 FFmpegDecoder::decode_audio_packet ()
429 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
433 AVPacket copy_packet = _packet;
435 while (copy_packet.size > 0) {
438 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
440 if (decode_result < 0) {
441 _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
445 if (frame_finished) {
446 ContentTime const ct = ContentTime::from_seconds (
447 av_frame_get_best_effort_timestamp (_frame) *
448 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
451 int const data_size = av_samples_get_buffer_size (
452 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
455 audio (deinterleave_audio (_frame->data, data_size), ct);
458 copy_packet.data += decode_result;
459 copy_packet.size -= decode_result;
464 FFmpegDecoder::decode_video_packet ()
467 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
471 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
473 shared_ptr<FilterGraph> graph;
475 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
476 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
480 if (i == _filter_graphs.end ()) {
481 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
482 _filter_graphs.push_back (graph);
483 _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
488 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
490 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
492 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
494 shared_ptr<Image> image = i->first;
495 if (!post_process.empty ()) {
496 image = image->post_process (post_process, true);
499 if (i->second != AV_NOPTS_VALUE) {
500 video (image, false, ContentTime::from_seconds (i->second * av_q2d (_format_context->streams[_video_stream]->time_base)) + _pts_offset);
502 _log->log ("Dropping frame without PTS");
511 FFmpegDecoder::setup_subtitle ()
513 boost::mutex::scoped_lock lm (_mutex);
515 if (!_ffmpeg_content->subtitle_stream()) {
519 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
520 if (_subtitle_codec_context == 0) {
521 throw DecodeError (N_("could not find subtitle stream"));
524 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
526 if (_subtitle_codec == 0) {
527 throw DecodeError (N_("could not find subtitle decoder"));
530 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
531 throw DecodeError (N_("could not open subtitle decoder"));
536 FFmpegDecoder::decode_subtitle_packet ()
540 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
544 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
545 indicate that the previous subtitle should stop.
547 if (sub.num_rects <= 0) {
548 image_subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), ContentTime (), ContentTime ());
550 } else if (sub.num_rects > 1) {
551 throw DecodeError (_("multi-part subtitles not yet supported"));
554 /* Subtitle PTS (within the source, not taking into account any of the
555 source that we may have chopped off for the DCP)
557 ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
559 /* hence start time for this sub */
560 ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
561 ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
563 AVSubtitleRect const * rect = sub.rects[0];
565 if (rect->type != SUBTITLE_BITMAP) {
566 throw DecodeError (_("non-bitmap subtitles not yet supported"));
569 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
570 G, third B, fourth A.
572 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
574 /* Start of the first line in the subtitle */
575 uint8_t* sub_p = rect->pict.data[0];
576 /* sub_p looks up into a BGRA palette which is here
577 (i.e. first byte B, second G, third R, fourth A)
579 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
580 /* Start of the output data */
581 uint32_t* out_p = (uint32_t *) image->data()[0];
583 for (int y = 0; y < rect->h; ++y) {
584 uint8_t* sub_line_p = sub_p;
585 uint32_t* out_line_p = out_p;
586 for (int x = 0; x < rect->w; ++x) {
587 uint32_t const p = palette[*sub_line_p++];
588 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
590 sub_p += rect->pict.linesize[0];
591 out_p += image->stride()[0] / sizeof (uint32_t);
594 dcp::Size const vs = _ffmpeg_content->video_size ();
598 dcpomatic::Rect<double> (
599 static_cast<double> (rect->x) / vs.width,
600 static_cast<double> (rect->y) / vs.height,
601 static_cast<double> (rect->w) / vs.width,
602 static_cast<double> (rect->h) / vs.height
609 avsubtitle_free (&sub);