2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
38 #include "exceptions.h"
42 #include "ffmpeg_decoder.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
52 using std::stringstream;
56 using boost::shared_ptr;
57 using boost::optional;
58 using boost::dynamic_pointer_cast;
61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
67 , _subtitle_codec_context (0)
69 , _decode_video (video)
70 , _decode_audio (audio)
71 , _video_pts_offset (0)
72 , _audio_pts_offset (0)
76 /* Audio and video frame PTS values may not start with 0. We want
77 to fiddle them so that:
79 1. One of them starts at time 0.
80 2. The first video PTS value ends up on a frame boundary.
82 Then we remove big initial gaps in PTS and we allow our
83 insertion of black frames to work.
86 audio_pts_to_use = audio_pts_from_ffmpeg + audio_pts_offset;
87 video_pts_to_use = video_pts_from_ffmpeg + video_pts_offset;
90 bool const have_video = video && c->first_video();
91 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
93 /* First, make one of them start at 0 */
95 if (have_audio && have_video) {
96 _video_pts_offset = _audio_pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
97 } else if (have_video) {
98 _video_pts_offset = - c->first_video().get();
99 } else if (have_audio) {
100 _audio_pts_offset = - c->audio_stream()->first_audio.get();
103 /* Now adjust both so that the video pts starts on a frame */
104 if (have_video && have_audio) {
105 double first_video = c->first_video().get() + _video_pts_offset;
106 double const old_first_video = first_video;
108 /* Round the first video up to a frame boundary */
109 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
110 first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
113 _video_pts_offset += first_video - old_first_video;
114 _audio_pts_offset += first_video - old_first_video;
118 FFmpegDecoder::~FFmpegDecoder ()
120 boost::mutex::scoped_lock lm (_mutex);
122 if (_subtitle_codec_context) {
123 avcodec_close (_subtitle_codec_context);
128 FFmpegDecoder::flush ()
130 /* Get any remaining frames */
135 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
138 while (decode_video_packet ()) {}
141 if (_ffmpeg_content->audio_stream() && _decode_audio) {
142 decode_audio_packet ();
145 AudioDecoder::flush ();
149 FFmpegDecoder::pass ()
151 int r = av_read_frame (_format_context, &_packet);
154 if (r != AVERROR_EOF) {
155 /* Maybe we should fail here, but for now we'll just finish off instead */
157 av_strerror (r, buf, sizeof(buf));
158 shared_ptr<const Film> film = _film.lock ();
160 film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
167 avcodec_get_frame_defaults (_frame);
169 shared_ptr<const Film> film = _film.lock ();
172 int const si = _packet.stream_index;
174 if (si == _video_stream && _decode_video) {
175 decode_video_packet ();
176 } else if (_ffmpeg_content->audio_stream() && si == _ffmpeg_content->audio_stream()->index (_format_context) && _decode_audio) {
177 decode_audio_packet ();
178 } else if (_ffmpeg_content->subtitle_stream() && si == _ffmpeg_content->subtitle_stream()->index (_format_context) && film->with_subtitles ()) {
179 decode_subtitle_packet ();
182 av_free_packet (&_packet);
186 /** @param data pointer to array of pointers to buffers.
187 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
189 shared_ptr<AudioBuffers>
190 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
192 assert (_ffmpeg_content->audio_channels());
193 assert (bytes_per_audio_sample());
195 /* Deinterleave and convert to float */
197 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
199 int const total_samples = size / bytes_per_audio_sample();
200 int const frames = total_samples / _ffmpeg_content->audio_channels();
201 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
203 switch (audio_sample_format()) {
204 case AV_SAMPLE_FMT_S16:
206 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
209 for (int i = 0; i < total_samples; ++i) {
210 audio->data(channel)[sample] = float(*p++) / (1 << 15);
213 if (channel == _ffmpeg_content->audio_channels()) {
221 case AV_SAMPLE_FMT_S16P:
223 int16_t** p = reinterpret_cast<int16_t **> (data);
224 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
225 for (int j = 0; j < frames; ++j) {
226 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
232 case AV_SAMPLE_FMT_S32:
234 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
237 for (int i = 0; i < total_samples; ++i) {
238 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
241 if (channel == _ffmpeg_content->audio_channels()) {
249 case AV_SAMPLE_FMT_FLT:
251 float* p = reinterpret_cast<float*> (data[0]);
254 for (int i = 0; i < total_samples; ++i) {
255 audio->data(channel)[sample] = *p++;
258 if (channel == _ffmpeg_content->audio_channels()) {
266 case AV_SAMPLE_FMT_FLTP:
268 float** p = reinterpret_cast<float**> (data);
269 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
270 memcpy (audio->data(i), p[i], frames * sizeof(float));
276 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
283 FFmpegDecoder::audio_sample_format () const
285 if (!_ffmpeg_content->audio_stream()) {
286 return (AVSampleFormat) 0;
289 return audio_codec_context()->sample_fmt;
293 FFmpegDecoder::bytes_per_audio_sample () const
295 return av_get_bytes_per_sample (audio_sample_format ());
299 FFmpegDecoder::minimal_run (boost::function<bool (ContentTime, ContentTime, int)> finished)
302 ContentTime last_video = 0;
303 ContentTime last_audio = 0;
305 while (!finished (last_video, last_audio, frames_read)) {
306 int r = av_read_frame (_format_context, &_packet);
308 /* We should flush our decoders here, possibly yielding a few more frames,
309 but the consequence of having to do that is too hideous to contemplate.
310 Instead we give up and say that you can't seek too close to the end
318 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
320 if (_packet.stream_index == _video_stream) {
322 avcodec_get_frame_defaults (_frame);
325 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
326 if (r >= 0 && finished) {
328 (av_frame_get_best_effort_timestamp (_frame) * time_base + _video_pts_offset) * TIME_HZ
332 } else if (_ffmpeg_content->audio_stream() && _packet.stream_index == _ffmpeg_content->audio_stream()->index (_format_context)) {
333 AVPacket copy_packet = _packet;
334 while (copy_packet.size > 0) {
337 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
338 if (r >= 0 && finished) {
340 (av_frame_get_best_effort_timestamp (_frame) * time_base + _audio_pts_offset) * TIME_HZ
344 copy_packet.data += r;
345 copy_packet.size -= r;
349 av_free_packet (&_packet);
356 FFmpegDecoder::seek_overrun_finished (ContentTime seek, ContentTime last_video, ContentTime last_audio) const
358 return last_video >= seek || last_audio >= seek;
362 FFmpegDecoder::seek_final_finished (int n, int done) const
368 FFmpegDecoder::seek_and_flush (ContentTime t)
370 int64_t const initial_v = ((double (t) / TIME_HZ) - _video_pts_offset) /
371 av_q2d (_format_context->streams[_video_stream]->time_base);
373 av_seek_frame (_format_context, _video_stream, initial_v, AVSEEK_FLAG_BACKWARD);
375 shared_ptr<FFmpegAudioStream> as = _ffmpeg_content->audio_stream ();
377 int64_t initial_a = ((double (t) / TIME_HZ) - _audio_pts_offset) /
378 av_q2d (as->stream(_format_context)->time_base);
380 av_seek_frame (_format_context, as->index (_format_context), initial_a, AVSEEK_FLAG_BACKWARD);
383 avcodec_flush_buffers (video_codec_context());
384 if (audio_codec_context ()) {
385 avcodec_flush_buffers (audio_codec_context ());
387 if (_subtitle_codec_context) {
388 avcodec_flush_buffers (_subtitle_codec_context);
393 FFmpegDecoder::seek (ContentTime time, bool accurate)
395 Decoder::seek (time, accurate);
397 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
398 a number plucked from the air) earlier than we want to end up. The loop below
399 will hopefully then step through to where we want to be.
402 ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
403 ContentTime initial_seek = time - pre_roll;
404 if (initial_seek < 0) {
408 /* Initial seek time in the video stream's timebase */
410 seek_and_flush (initial_seek);
412 if (time == 0 || !accurate) {
413 /* We're already there, or we're as close as we need to be */
417 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
419 seek_and_flush (initial_seek);
421 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
426 FFmpegDecoder::decode_audio_packet ()
428 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
432 AVPacket copy_packet = _packet;
434 while (copy_packet.size > 0) {
437 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
439 if (decode_result < 0) {
440 shared_ptr<const Film> film = _film.lock ();
442 film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
446 if (frame_finished) {
447 ContentTime const t = rint (
448 (av_q2d (_format_context->streams[copy_packet.stream_index]->time_base)
449 * av_frame_get_best_effort_timestamp(_frame) + _audio_pts_offset) * TIME_HZ
452 int const data_size = av_samples_get_buffer_size (
453 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
456 audio (deinterleave_audio (_frame->data, data_size), t);
459 copy_packet.data += decode_result;
460 copy_packet.size -= decode_result;
465 FFmpegDecoder::decode_video_packet ()
468 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
472 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
474 shared_ptr<FilterGraph> graph;
476 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
477 while (i != _filter_graphs.end() && !(*i)->can_process (libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
481 if (i == _filter_graphs.end ()) {
482 shared_ptr<const Film> film = _film.lock ();
485 graph.reset (new FilterGraph (_ffmpeg_content, libdcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
486 _filter_graphs.push_back (graph);
488 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
493 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
495 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
497 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
499 shared_ptr<Image> image = i->first;
500 if (!post_process.empty ()) {
501 image = image->post_process (post_process, true);
504 if (i->second != AV_NOPTS_VALUE) {
505 ContentTime const t = rint ((i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _video_pts_offset) * TIME_HZ);
506 video (image, false, t);
508 shared_ptr<const Film> film = _film.lock ();
510 film->log()->log ("Dropping frame without PTS");
519 FFmpegDecoder::setup_subtitle ()
521 boost::mutex::scoped_lock lm (_mutex);
523 if (!_ffmpeg_content->subtitle_stream() || _ffmpeg_content->subtitle_stream()->index (_format_context) >= int (_format_context->nb_streams)) {
527 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
528 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
530 if (_subtitle_codec == 0) {
531 throw DecodeError (_("could not find subtitle decoder"));
534 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
535 throw DecodeError (N_("could not open subtitle decoder"));
540 FFmpegDecoder::decode_subtitle_packet ()
544 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
548 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
549 indicate that the previous subtitle should stop.
551 if (sub.num_rects <= 0) {
552 subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
554 } else if (sub.num_rects > 1) {
555 throw DecodeError (_("multi-part subtitles not yet supported"));
558 /* Subtitle PTS in seconds (within the source, not taking into account any of the
559 source that we may have chopped off for the DCP)
561 double const packet_time = static_cast<double> (sub.pts) / AV_TIME_BASE;
563 /* hence start time for this sub */
564 DCPTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
565 DCPTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
567 AVSubtitleRect const * rect = sub.rects[0];
569 if (rect->type != SUBTITLE_BITMAP) {
570 throw DecodeError (_("non-bitmap subtitles not yet supported"));
573 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, libdcp::Size (rect->w, rect->h), true));
575 /* Start of the first line in the subtitle */
576 uint8_t* sub_p = rect->pict.data[0];
577 /* sub_p looks up into a RGB palette which is here */
578 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
579 /* Start of the output data */
580 uint32_t* out_p = (uint32_t *) image->data()[0];
582 for (int y = 0; y < rect->h; ++y) {
583 uint8_t* sub_line_p = sub_p;
584 uint32_t* out_line_p = out_p;
585 for (int x = 0; x < rect->w; ++x) {
586 *out_line_p++ = palette[*sub_line_p++];
588 sub_p += rect->pict.linesize[0];
589 out_p += image->stride()[0] / sizeof (uint32_t);
592 libdcp::Size const vs = _ffmpeg_content->video_size ();
596 dcpomatic::Rect<double> (
597 static_cast<double> (rect->x) / vs.width,
598 static_cast<double> (rect->y) / vs.height,
599 static_cast<double> (rect->w) / vs.width,
600 static_cast<double> (rect->h) / vs.height
607 avsubtitle_free (&sub);