2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
38 #include "exceptions.h"
42 #include "ffmpeg_decoder.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
52 using std::stringstream;
56 using boost::shared_ptr;
57 using boost::optional;
58 using boost::dynamic_pointer_cast;
61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
67 , _subtitle_codec_context (0)
69 , _decode_video (video)
70 , _decode_audio (audio)
75 /* Audio and video frame PTS values may not start with 0. We want
76 to fiddle them so that:
78 1. One of them starts at time 0.
79 2. The first video PTS value ends up on a frame boundary.
81 Then we remove big initial gaps in PTS and we allow our
82 insertion of black frames to work.
84 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
87 bool const have_video = video && c->first_video();
88 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
90 /* First, make one of them start at 0 */
92 if (have_audio && have_video) {
93 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
94 } else if (have_video) {
95 _pts_offset = - c->first_video().get();
96 } else if (have_audio) {
97 _pts_offset = - c->audio_stream()->first_audio.get();
100 /* Now adjust both so that the video pts starts on a frame */
101 if (have_video && have_audio) {
102 double first_video = c->first_video().get() + _pts_offset;
103 double const old_first_video = first_video;
105 /* Round the first video up to a frame boundary */
106 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
107 first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
110 _pts_offset += first_video - old_first_video;
114 FFmpegDecoder::~FFmpegDecoder ()
116 boost::mutex::scoped_lock lm (_mutex);
118 if (_subtitle_codec_context) {
119 avcodec_close (_subtitle_codec_context);
124 FFmpegDecoder::flush ()
126 /* Get any remaining frames */
131 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
134 while (decode_video_packet ()) {}
137 if (_ffmpeg_content->audio_stream() && _decode_audio) {
138 decode_audio_packet ();
141 AudioDecoder::flush ();
145 FFmpegDecoder::pass ()
147 int r = av_read_frame (_format_context, &_packet);
150 if (r != AVERROR_EOF) {
151 /* Maybe we should fail here, but for now we'll just finish off instead */
153 av_strerror (r, buf, sizeof(buf));
154 shared_ptr<const Film> film = _film.lock ();
156 film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
163 shared_ptr<const Film> film = _film.lock ();
166 int const si = _packet.stream_index;
168 if (si == _video_stream && _decode_video) {
169 decode_video_packet ();
170 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si) && _decode_audio) {
171 decode_audio_packet ();
172 } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si) && film->with_subtitles ()) {
173 decode_subtitle_packet ();
176 av_free_packet (&_packet);
180 /** @param data pointer to array of pointers to buffers.
181 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
183 shared_ptr<AudioBuffers>
184 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
186 assert (_ffmpeg_content->audio_channels());
187 assert (bytes_per_audio_sample());
189 /* Deinterleave and convert to float */
191 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
193 int const total_samples = size / bytes_per_audio_sample();
194 int const frames = total_samples / _ffmpeg_content->audio_channels();
195 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
197 switch (audio_sample_format()) {
198 case AV_SAMPLE_FMT_S16:
200 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
203 for (int i = 0; i < total_samples; ++i) {
204 audio->data(channel)[sample] = float(*p++) / (1 << 15);
207 if (channel == _ffmpeg_content->audio_channels()) {
215 case AV_SAMPLE_FMT_S16P:
217 int16_t** p = reinterpret_cast<int16_t **> (data);
218 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
219 for (int j = 0; j < frames; ++j) {
220 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
226 case AV_SAMPLE_FMT_S32:
228 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
231 for (int i = 0; i < total_samples; ++i) {
232 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
235 if (channel == _ffmpeg_content->audio_channels()) {
243 case AV_SAMPLE_FMT_FLT:
245 float* p = reinterpret_cast<float*> (data[0]);
248 for (int i = 0; i < total_samples; ++i) {
249 audio->data(channel)[sample] = *p++;
252 if (channel == _ffmpeg_content->audio_channels()) {
260 case AV_SAMPLE_FMT_FLTP:
262 float** p = reinterpret_cast<float**> (data);
263 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
264 memcpy (audio->data(i), p[i], frames * sizeof(float));
270 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
277 FFmpegDecoder::audio_sample_format () const
279 if (!_ffmpeg_content->audio_stream()) {
280 return (AVSampleFormat) 0;
283 return audio_codec_context()->sample_fmt;
287 FFmpegDecoder::bytes_per_audio_sample () const
289 return av_get_bytes_per_sample (audio_sample_format ());
293 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
296 optional<ContentTime> last_video;
297 optional<ContentTime> last_audio;
299 while (!finished (last_video, last_audio, frames_read)) {
300 int r = av_read_frame (_format_context, &_packet);
302 /* We should flush our decoders here, possibly yielding a few more frames,
303 but the consequence of having to do that is too hideous to contemplate.
304 Instead we give up and say that you can't seek too close to the end
312 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
314 if (_packet.stream_index == _video_stream) {
316 avcodec_get_frame_defaults (_frame);
319 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
320 if (r >= 0 && finished) {
322 (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
326 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
327 AVPacket copy_packet = _packet;
328 while (copy_packet.size > 0) {
331 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
332 if (r >= 0 && finished) {
334 (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
338 copy_packet.data += r;
339 copy_packet.size -= r;
343 av_free_packet (&_packet);
350 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
352 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
356 FFmpegDecoder::seek_final_finished (int n, int done) const
362 FFmpegDecoder::seek_and_flush (ContentTime t)
364 int64_t s = ((double (t) / TIME_HZ) - _pts_offset) /
365 av_q2d (_format_context->streams[_video_stream]->time_base);
367 if (_ffmpeg_content->audio_stream ()) {
370 ((double (t) / TIME_HZ) - _pts_offset) /
371 av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base)
376 /* Ridiculous empirical hack */
379 av_seek_frame (_format_context, _video_stream, s, AVSEEK_FLAG_BACKWARD);
381 avcodec_flush_buffers (video_codec_context());
382 if (audio_codec_context ()) {
383 avcodec_flush_buffers (audio_codec_context ());
385 if (_subtitle_codec_context) {
386 avcodec_flush_buffers (_subtitle_codec_context);
391 FFmpegDecoder::seek (ContentTime time, bool accurate)
393 Decoder::seek (time, accurate);
394 AudioDecoder::seek (time, accurate);
396 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
397 a number plucked from the air) earlier than we want to end up. The loop below
398 will hopefully then step through to where we want to be.
401 ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
402 ContentTime initial_seek = time - pre_roll;
403 if (initial_seek < 0) {
407 /* Initial seek time in the video stream's timebase */
409 seek_and_flush (initial_seek);
416 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
418 seek_and_flush (initial_seek);
420 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
425 FFmpegDecoder::decode_audio_packet ()
427 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
431 AVPacket copy_packet = _packet;
433 while (copy_packet.size > 0) {
436 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
438 if (decode_result < 0) {
439 shared_ptr<const Film> film = _film.lock ();
441 film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
445 if (frame_finished) {
446 ContentTime const ct = (
447 av_frame_get_best_effort_timestamp (_frame) *
448 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base)
452 int const data_size = av_samples_get_buffer_size (
453 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
456 audio (deinterleave_audio (_frame->data, data_size), ct);
459 copy_packet.data += decode_result;
460 copy_packet.size -= decode_result;
465 FFmpegDecoder::decode_video_packet ()
468 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
472 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
474 shared_ptr<FilterGraph> graph;
476 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
477 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
481 if (i == _filter_graphs.end ()) {
482 shared_ptr<const Film> film = _film.lock ();
485 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
486 _filter_graphs.push_back (graph);
488 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
493 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
495 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
497 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
499 shared_ptr<Image> image = i->first;
500 if (!post_process.empty ()) {
501 image = image->post_process (post_process, true);
504 if (i->second != AV_NOPTS_VALUE) {
505 double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset;
506 VideoFrame const f = rint (pts * _ffmpeg_content->video_frame_rate ());
507 video (image, false, f);
509 shared_ptr<const Film> film = _film.lock ();
511 film->log()->log ("Dropping frame without PTS");
520 FFmpegDecoder::setup_subtitle ()
522 boost::mutex::scoped_lock lm (_mutex);
524 if (!_ffmpeg_content->subtitle_stream()) {
528 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
529 if (_subtitle_codec_context == 0) {
530 throw DecodeError (N_("could not find subtitle stream"));
533 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
535 if (_subtitle_codec == 0) {
536 throw DecodeError (N_("could not find subtitle decoder"));
539 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
540 throw DecodeError (N_("could not open subtitle decoder"));
545 FFmpegDecoder::decode_subtitle_packet ()
549 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
553 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
554 indicate that the previous subtitle should stop.
556 if (sub.num_rects <= 0) {
557 image_subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
559 } else if (sub.num_rects > 1) {
560 throw DecodeError (_("multi-part subtitles not yet supported"));
563 /* Subtitle PTS in seconds (within the source, not taking into account any of the
564 source that we may have chopped off for the DCP)
566 double const packet_time = (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
568 /* hence start time for this sub */
569 ContentTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
570 ContentTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
572 AVSubtitleRect const * rect = sub.rects[0];
574 if (rect->type != SUBTITLE_BITMAP) {
575 throw DecodeError (_("non-bitmap subtitles not yet supported"));
578 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
579 G, third B, fourth A.
581 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
583 /* Start of the first line in the subtitle */
584 uint8_t* sub_p = rect->pict.data[0];
585 /* sub_p looks up into a BGRA palette which is here
586 (i.e. first byte B, second G, third R, fourth A)
588 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
589 /* Start of the output data */
590 uint32_t* out_p = (uint32_t *) image->data()[0];
592 for (int y = 0; y < rect->h; ++y) {
593 uint8_t* sub_line_p = sub_p;
594 uint32_t* out_line_p = out_p;
595 for (int x = 0; x < rect->w; ++x) {
596 uint32_t const p = palette[*sub_line_p++];
597 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
599 sub_p += rect->pict.linesize[0];
600 out_p += image->stride()[0] / sizeof (uint32_t);
603 dcp::Size const vs = _ffmpeg_content->video_size ();
607 dcpomatic::Rect<double> (
608 static_cast<double> (rect->x) / vs.width,
609 static_cast<double> (rect->y) / vs.height,
610 static_cast<double> (rect->w) / vs.width,
611 static_cast<double> (rect->h) / vs.height
618 avsubtitle_free (&sub);