2 Copyright (C) 2012-2014 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
32 #include <libavcodec/avcodec.h>
33 #include <libavformat/avformat.h>
36 #include "exceptions.h"
40 #include "ffmpeg_decoder.h"
41 #include "ffmpeg_audio_stream.h"
42 #include "ffmpeg_subtitle_stream.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
46 #include "image_proxy.h"
53 using std::stringstream;
57 using boost::shared_ptr;
58 using boost::optional;
59 using boost::dynamic_pointer_cast;
62 FFmpegDecoder::FFmpegDecoder (shared_ptr<const FFmpegContent> c, shared_ptr<Log> log)
67 , _subtitle_codec_context (0)
72 /* Audio and video frame PTS values may not start with 0. We want
73 to fiddle them so that:
75 1. One of them starts at time 0.
76 2. The first video PTS value ends up on a frame boundary.
78 Then we remove big initial gaps in PTS and we allow our
79 insertion of black frames to work.
81 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
84 bool const have_video = c->first_video();
85 bool const have_audio = c->audio_stream () && c->audio_stream()->first_audio;
87 /* First, make one of them start at 0 */
89 if (have_audio && have_video) {
90 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
91 } else if (have_video) {
92 _pts_offset = - c->first_video().get();
93 } else if (have_audio) {
94 _pts_offset = - c->audio_stream()->first_audio.get();
97 /* Now adjust both so that the video pts starts on a frame */
98 if (have_video && have_audio) {
99 ContentTime first_video = c->first_video().get() + _pts_offset;
100 ContentTime const old_first_video = first_video;
101 _pts_offset += first_video.round_up (c->video_frame_rate ()) - old_first_video;
105 FFmpegDecoder::~FFmpegDecoder ()
107 boost::mutex::scoped_lock lm (_mutex);
109 if (_subtitle_codec_context) {
110 avcodec_close (_subtitle_codec_context);
115 FFmpegDecoder::flush ()
117 /* Get any remaining frames */
122 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
124 while (decode_video_packet ()) {}
126 if (_ffmpeg_content->audio_stream()) {
127 decode_audio_packet ();
128 AudioDecoder::flush ();
133 FFmpegDecoder::pass ()
135 int r = av_read_frame (_format_context, &_packet);
138 if (r != AVERROR_EOF) {
139 /* Maybe we should fail here, but for now we'll just finish off instead */
141 av_strerror (r, buf, sizeof(buf));
142 _log->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
149 int const si = _packet.stream_index;
151 if (si == _video_stream) {
152 decode_video_packet ();
153 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si)) {
154 decode_audio_packet ();
155 } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si)) {
156 decode_subtitle_packet ();
159 av_free_packet (&_packet);
163 /** @param data pointer to array of pointers to buffers.
164 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
166 shared_ptr<AudioBuffers>
167 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
169 assert (_ffmpeg_content->audio_channels());
170 assert (bytes_per_audio_sample());
172 /* Deinterleave and convert to float */
174 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
176 int const total_samples = size / bytes_per_audio_sample();
177 int const frames = total_samples / _ffmpeg_content->audio_channels();
178 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
180 switch (audio_sample_format()) {
181 case AV_SAMPLE_FMT_U8:
183 uint8_t* p = reinterpret_cast<uint8_t *> (data[0]);
186 for (int i = 0; i < total_samples; ++i) {
187 audio->data(channel)[sample] = float(*p++) / (1 << 23);
190 if (channel == _ffmpeg_content->audio_channels()) {
198 case AV_SAMPLE_FMT_S16:
200 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
203 for (int i = 0; i < total_samples; ++i) {
204 audio->data(channel)[sample] = float(*p++) / (1 << 15);
207 if (channel == _ffmpeg_content->audio_channels()) {
215 case AV_SAMPLE_FMT_S16P:
217 int16_t** p = reinterpret_cast<int16_t **> (data);
218 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
219 for (int j = 0; j < frames; ++j) {
220 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
226 case AV_SAMPLE_FMT_S32:
228 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
231 for (int i = 0; i < total_samples; ++i) {
232 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
235 if (channel == _ffmpeg_content->audio_channels()) {
243 case AV_SAMPLE_FMT_FLT:
245 float* p = reinterpret_cast<float*> (data[0]);
248 for (int i = 0; i < total_samples; ++i) {
249 audio->data(channel)[sample] = *p++;
252 if (channel == _ffmpeg_content->audio_channels()) {
260 case AV_SAMPLE_FMT_FLTP:
262 float** p = reinterpret_cast<float**> (data);
263 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
264 memcpy (audio->data(i), p[i], frames * sizeof(float));
270 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
277 FFmpegDecoder::audio_sample_format () const
279 if (!_ffmpeg_content->audio_stream()) {
280 return (AVSampleFormat) 0;
283 return audio_codec_context()->sample_fmt;
287 FFmpegDecoder::bytes_per_audio_sample () const
289 return av_get_bytes_per_sample (audio_sample_format ());
293 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
296 optional<ContentTime> last_video;
297 optional<ContentTime> last_audio;
299 while (!finished (last_video, last_audio, frames_read)) {
300 int r = av_read_frame (_format_context, &_packet);
302 /* We should flush our decoders here, possibly yielding a few more frames,
303 but the consequence of having to do that is too hideous to contemplate.
304 Instead we give up and say that you can't seek too close to the end
312 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
314 if (_packet.stream_index == _video_stream) {
316 avcodec_get_frame_defaults (_frame);
319 r = avcodec_decode_video2 (video_codec_context(), _frame, &got_picture, &_packet);
320 if (r >= 0 && got_picture) {
321 last_video = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
324 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
325 AVPacket copy_packet = _packet;
326 while (copy_packet.size > 0) {
329 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &got_frame, &_packet);
330 if (r >= 0 && got_frame) {
331 last_audio = ContentTime::from_seconds (av_frame_get_best_effort_timestamp (_frame) * time_base) + _pts_offset;
334 copy_packet.data += r;
335 copy_packet.size -= r;
339 av_free_packet (&_packet);
346 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
348 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
352 FFmpegDecoder::seek_final_finished (int n, int done) const
358 FFmpegDecoder::seek_and_flush (ContentTime t)
360 ContentTime const u = t - _pts_offset;
361 int64_t s = u.seconds() / av_q2d (_format_context->streams[_video_stream]->time_base);
363 if (_ffmpeg_content->audio_stream ()) {
365 s, int64_t (u.seconds() / av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base))
369 /* Ridiculous empirical hack */
375 av_seek_frame (_format_context, _video_stream, s, 0);
377 avcodec_flush_buffers (video_codec_context());
378 if (audio_codec_context ()) {
379 avcodec_flush_buffers (audio_codec_context ());
381 if (_subtitle_codec_context) {
382 avcodec_flush_buffers (_subtitle_codec_context);
387 FFmpegDecoder::seek (ContentTime time, bool accurate)
389 VideoDecoder::seek (time, accurate);
390 AudioDecoder::seek (time, accurate);
392 /* If we are doing an accurate seek, our initial shot will be 2s (2 being
393 a number plucked from the air) earlier than we want to end up. The loop below
394 will hopefully then step through to where we want to be.
397 ContentTime pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
398 ContentTime initial_seek = time - pre_roll;
399 if (initial_seek < ContentTime (0)) {
400 initial_seek = ContentTime (0);
403 /* Initial seek time in the video stream's timebase */
405 seek_and_flush (initial_seek);
412 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
414 seek_and_flush (initial_seek);
416 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
421 FFmpegDecoder::decode_audio_packet ()
423 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
427 AVPacket copy_packet = _packet;
429 while (copy_packet.size > 0) {
432 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
434 if (decode_result < 0) {
435 _log->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
439 if (frame_finished) {
440 ContentTime const ct = ContentTime::from_seconds (
441 av_frame_get_best_effort_timestamp (_frame) *
442 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base))
445 int const data_size = av_samples_get_buffer_size (
446 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
449 audio (deinterleave_audio (_frame->data, data_size), ct);
452 copy_packet.data += decode_result;
453 copy_packet.size -= decode_result;
458 FFmpegDecoder::decode_video_packet ()
461 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
465 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
467 shared_ptr<FilterGraph> graph;
469 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
470 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
474 if (i == _filter_graphs.end ()) {
475 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
476 _filter_graphs.push_back (graph);
477 _log->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
482 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
484 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
486 shared_ptr<Image> image = i->first;
488 if (i->second != AV_NOPTS_VALUE) {
489 double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset.seconds ();
490 video (shared_ptr<ImageProxy> (new RawImageProxy (image)), rint (pts * _ffmpeg_content->video_frame_rate ()));
492 _log->log ("Dropping frame without PTS");
501 FFmpegDecoder::setup_subtitle ()
503 boost::mutex::scoped_lock lm (_mutex);
505 if (!_ffmpeg_content->subtitle_stream()) {
509 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
510 if (_subtitle_codec_context == 0) {
511 throw DecodeError (N_("could not find subtitle stream"));
514 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
516 if (_subtitle_codec == 0) {
517 throw DecodeError (N_("could not find subtitle decoder"));
520 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
521 throw DecodeError (N_("could not open subtitle decoder"));
526 FFmpegDecoder::decode_subtitle_packet ()
530 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
534 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
535 indicate that the previous subtitle should stop.
537 if (sub.num_rects <= 0) {
538 image_subtitle (ContentTime (), ContentTime (), shared_ptr<Image> (), dcpomatic::Rect<double> ());
540 } else if (sub.num_rects > 1) {
541 throw DecodeError (_("multi-part subtitles not yet supported"));
544 /* Subtitle PTS (within the source, not taking into account any of the
545 source that we may have chopped off for the DCP)
547 ContentTime packet_time = ContentTime::from_seconds (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
549 /* hence start time for this sub */
550 ContentTime const from = packet_time + ContentTime::from_seconds (sub.start_display_time / 1e3);
551 ContentTime const to = packet_time + ContentTime::from_seconds (sub.end_display_time / 1e3);
553 AVSubtitleRect const * rect = sub.rects[0];
555 if (rect->type != SUBTITLE_BITMAP) {
557 // throw DecodeError (_("non-bitmap subtitles not yet supported"));
561 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
562 G, third B, fourth A.
564 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
566 /* Start of the first line in the subtitle */
567 uint8_t* sub_p = rect->pict.data[0];
568 /* sub_p looks up into a BGRA palette which is here
569 (i.e. first byte B, second G, third R, fourth A)
571 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
572 /* Start of the output data */
573 uint32_t* out_p = (uint32_t *) image->data()[0];
575 for (int y = 0; y < rect->h; ++y) {
576 uint8_t* sub_line_p = sub_p;
577 uint32_t* out_line_p = out_p;
578 for (int x = 0; x < rect->w; ++x) {
579 uint32_t const p = palette[*sub_line_p++];
580 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
582 sub_p += rect->pict.linesize[0];
583 out_p += image->stride()[0] / sizeof (uint32_t);
586 dcp::Size const vs = _ffmpeg_content->video_size ();
592 dcpomatic::Rect<double> (
593 static_cast<double> (rect->x) / vs.width,
594 static_cast<double> (rect->y) / vs.height,
595 static_cast<double> (rect->w) / vs.width,
596 static_cast<double> (rect->h) / vs.height
600 avsubtitle_free (&sub);