2 Copyright (C) 2012 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 /** @file src/ffmpeg_decoder.cc
21 * @brief A decoder using FFmpeg to decode content.
30 #include <boost/lexical_cast.hpp>
33 #include <libavcodec/avcodec.h>
34 #include <libavformat/avformat.h>
38 #include "exceptions.h"
42 #include "ffmpeg_decoder.h"
43 #include "filter_graph.h"
44 #include "audio_buffers.h"
45 #include "ffmpeg_content.h"
52 using std::stringstream;
56 using boost::shared_ptr;
57 using boost::optional;
58 using boost::dynamic_pointer_cast;
61 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> f, shared_ptr<const FFmpegContent> c, bool video, bool audio)
67 , _subtitle_codec_context (0)
69 , _decode_video (video)
70 , _decode_audio (audio)
75 /* Audio and video frame PTS values may not start with 0. We want
76 to fiddle them so that:
78 1. One of them starts at time 0.
79 2. The first video PTS value ends up on a frame boundary.
81 Then we remove big initial gaps in PTS and we allow our
82 insertion of black frames to work.
84 We will do pts_to_use = pts_from_ffmpeg + pts_offset;
87 bool const have_video = video && c->first_video();
88 bool const have_audio = _decode_audio && c->audio_stream () && c->audio_stream()->first_audio;
90 /* First, make one of them start at 0 */
92 if (have_audio && have_video) {
93 _pts_offset = - min (c->first_video().get(), c->audio_stream()->first_audio.get());
94 } else if (have_video) {
95 _pts_offset = - c->first_video().get();
96 } else if (have_audio) {
97 _pts_offset = - c->audio_stream()->first_audio.get();
100 /* Now adjust both so that the video pts starts on a frame */
101 if (have_video && have_audio) {
102 double first_video = c->first_video().get() + _pts_offset;
103 double const old_first_video = first_video;
105 /* Round the first video up to a frame boundary */
106 if (fabs (rint (first_video * c->video_frame_rate()) - first_video * c->video_frame_rate()) > 1e-6) {
107 first_video = ceil (first_video * c->video_frame_rate()) / c->video_frame_rate ();
110 _pts_offset += first_video - old_first_video;
114 FFmpegDecoder::~FFmpegDecoder ()
116 boost::mutex::scoped_lock lm (_mutex);
118 if (_subtitle_codec_context) {
119 avcodec_close (_subtitle_codec_context);
124 FFmpegDecoder::flush ()
126 /* Get any remaining frames */
131 /* XXX: should we reset _packet.data and size after each *_decode_* call? */
134 while (decode_video_packet ()) {}
137 if (_ffmpeg_content->audio_stream() && _decode_audio) {
138 decode_audio_packet ();
139 AudioDecoder::flush ();
144 FFmpegDecoder::pass ()
146 int r = av_read_frame (_format_context, &_packet);
149 if (r != AVERROR_EOF) {
150 /* Maybe we should fail here, but for now we'll just finish off instead */
152 av_strerror (r, buf, sizeof(buf));
153 shared_ptr<const Film> film = _film.lock ();
155 film->log()->log (String::compose (N_("error on av_read_frame (%1) (%2)"), buf, r));
162 shared_ptr<const Film> film = _film.lock ();
165 int const si = _packet.stream_index;
167 if (si == _video_stream && _decode_video) {
168 decode_video_packet ();
169 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, si) && _decode_audio) {
170 decode_audio_packet ();
171 } else if (_ffmpeg_content->subtitle_stream() && _ffmpeg_content->subtitle_stream()->uses_index (_format_context, si) && film->with_subtitles ()) {
172 decode_subtitle_packet ();
175 av_free_packet (&_packet);
179 /** @param data pointer to array of pointers to buffers.
180 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
182 shared_ptr<AudioBuffers>
183 FFmpegDecoder::deinterleave_audio (uint8_t** data, int size)
185 assert (_ffmpeg_content->audio_channels());
186 assert (bytes_per_audio_sample());
188 /* Deinterleave and convert to float */
190 assert ((size % (bytes_per_audio_sample() * _ffmpeg_content->audio_channels())) == 0);
192 int const total_samples = size / bytes_per_audio_sample();
193 int const frames = total_samples / _ffmpeg_content->audio_channels();
194 shared_ptr<AudioBuffers> audio (new AudioBuffers (_ffmpeg_content->audio_channels(), frames));
196 switch (audio_sample_format()) {
197 case AV_SAMPLE_FMT_S16:
199 int16_t* p = reinterpret_cast<int16_t *> (data[0]);
202 for (int i = 0; i < total_samples; ++i) {
203 audio->data(channel)[sample] = float(*p++) / (1 << 15);
206 if (channel == _ffmpeg_content->audio_channels()) {
214 case AV_SAMPLE_FMT_S16P:
216 int16_t** p = reinterpret_cast<int16_t **> (data);
217 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
218 for (int j = 0; j < frames; ++j) {
219 audio->data(i)[j] = static_cast<float>(p[i][j]) / (1 << 15);
225 case AV_SAMPLE_FMT_S32:
227 int32_t* p = reinterpret_cast<int32_t *> (data[0]);
230 for (int i = 0; i < total_samples; ++i) {
231 audio->data(channel)[sample] = static_cast<float>(*p++) / (1 << 31);
234 if (channel == _ffmpeg_content->audio_channels()) {
242 case AV_SAMPLE_FMT_FLT:
244 float* p = reinterpret_cast<float*> (data[0]);
247 for (int i = 0; i < total_samples; ++i) {
248 audio->data(channel)[sample] = *p++;
251 if (channel == _ffmpeg_content->audio_channels()) {
259 case AV_SAMPLE_FMT_FLTP:
261 float** p = reinterpret_cast<float**> (data);
262 for (int i = 0; i < _ffmpeg_content->audio_channels(); ++i) {
263 memcpy (audio->data(i), p[i], frames * sizeof(float));
269 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format())));
276 FFmpegDecoder::audio_sample_format () const
278 if (!_ffmpeg_content->audio_stream()) {
279 return (AVSampleFormat) 0;
282 return audio_codec_context()->sample_fmt;
286 FFmpegDecoder::bytes_per_audio_sample () const
288 return av_get_bytes_per_sample (audio_sample_format ());
292 FFmpegDecoder::minimal_run (boost::function<bool (optional<ContentTime>, optional<ContentTime>, int)> finished)
295 optional<ContentTime> last_video;
296 optional<ContentTime> last_audio;
298 while (!finished (last_video, last_audio, frames_read)) {
299 int r = av_read_frame (_format_context, &_packet);
301 /* We should flush our decoders here, possibly yielding a few more frames,
302 but the consequence of having to do that is too hideous to contemplate.
303 Instead we give up and say that you can't seek too close to the end
311 double const time_base = av_q2d (_format_context->streams[_packet.stream_index]->time_base);
313 if (_packet.stream_index == _video_stream) {
315 avcodec_get_frame_defaults (_frame);
318 r = avcodec_decode_video2 (video_codec_context(), _frame, &finished, &_packet);
319 if (r >= 0 && finished) {
321 (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
325 } else if (_ffmpeg_content->audio_stream() && _ffmpeg_content->audio_stream()->uses_index (_format_context, _packet.stream_index)) {
326 AVPacket copy_packet = _packet;
327 while (copy_packet.size > 0) {
330 r = avcodec_decode_audio4 (audio_codec_context(), _frame, &finished, &_packet);
331 if (r >= 0 && finished) {
333 (av_frame_get_best_effort_timestamp (_frame) * time_base + _pts_offset) * TIME_HZ
337 copy_packet.data += r;
338 copy_packet.size -= r;
342 av_free_packet (&_packet);
349 FFmpegDecoder::seek_overrun_finished (ContentTime seek, optional<ContentTime> last_video, optional<ContentTime> last_audio) const
351 return (last_video && last_video.get() >= seek) || (last_audio && last_audio.get() >= seek);
355 FFmpegDecoder::seek_final_finished (int n, int done) const
361 FFmpegDecoder::seek_and_flush (ContentTime t)
363 int64_t s = ((double (t) / TIME_HZ) - _pts_offset) /
364 av_q2d (_format_context->streams[_video_stream]->time_base);
366 if (_ffmpeg_content->audio_stream ()) {
369 ((double (t) / TIME_HZ) - _pts_offset) /
370 av_q2d (_ffmpeg_content->audio_stream()->stream(_format_context)->time_base)
375 cout << "S&F " << t << "\n";
377 /* Ridiculous empirical hack */
383 av_seek_frame (_format_context, _video_stream, s, 0);
385 avcodec_flush_buffers (video_codec_context());
386 if (audio_codec_context ()) {
387 avcodec_flush_buffers (audio_codec_context ());
389 if (_subtitle_codec_context) {
390 avcodec_flush_buffers (_subtitle_codec_context);
395 FFmpegDecoder::seek (ContentTime time, bool accurate)
397 Decoder::seek (time, accurate);
399 AudioDecoder::seek (time, accurate);
402 /* If we are doing an accurate seek, our initial shot will be 200ms (200 being
403 a number plucked from the air) earlier than we want to end up. The loop below
404 will hopefully then step through to where we want to be.
407 ContentTime pre_roll = accurate ? (0.2 * TIME_HZ) : 0;
408 ContentTime initial_seek = time - pre_roll;
409 if (initial_seek < 0) {
413 /* Initial seek time in the video stream's timebase */
415 seek_and_flush (initial_seek);
422 int const N = minimal_run (boost::bind (&FFmpegDecoder::seek_overrun_finished, this, time, _1, _2));
424 seek_and_flush (initial_seek);
426 minimal_run (boost::bind (&FFmpegDecoder::seek_final_finished, this, N - 1, _3));
431 FFmpegDecoder::decode_audio_packet ()
433 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
437 AVPacket copy_packet = _packet;
439 while (copy_packet.size > 0) {
442 int const decode_result = avcodec_decode_audio4 (audio_codec_context(), _frame, &frame_finished, ©_packet);
444 if (decode_result < 0) {
445 shared_ptr<const Film> film = _film.lock ();
447 film->log()->log (String::compose ("avcodec_decode_audio4 failed (%1)", decode_result));
451 if (frame_finished) {
452 ContentTime const ct = (
453 av_frame_get_best_effort_timestamp (_frame) *
454 av_q2d (_ffmpeg_content->audio_stream()->stream (_format_context)->time_base)
458 int const data_size = av_samples_get_buffer_size (
459 0, audio_codec_context()->channels, _frame->nb_samples, audio_sample_format (), 1
462 audio (deinterleave_audio (_frame->data, data_size), ct);
465 copy_packet.data += decode_result;
466 copy_packet.size -= decode_result;
471 FFmpegDecoder::decode_video_packet ()
474 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, &_packet) < 0 || !frame_finished) {
478 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
480 shared_ptr<FilterGraph> graph;
482 list<shared_ptr<FilterGraph> >::iterator i = _filter_graphs.begin();
483 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
487 if (i == _filter_graphs.end ()) {
488 shared_ptr<const Film> film = _film.lock ();
491 graph.reset (new FilterGraph (_ffmpeg_content, dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format));
492 _filter_graphs.push_back (graph);
494 film->log()->log (String::compose (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format));
499 list<pair<shared_ptr<Image>, int64_t> > images = graph->process (_frame);
501 string post_process = Filter::ffmpeg_strings (_ffmpeg_content->filters()).second;
503 for (list<pair<shared_ptr<Image>, int64_t> >::iterator i = images.begin(); i != images.end(); ++i) {
505 shared_ptr<Image> image = i->first;
506 if (!post_process.empty ()) {
507 image = image->post_process (post_process, true);
510 if (i->second != AV_NOPTS_VALUE) {
511 double const pts = i->second * av_q2d (_format_context->streams[_video_stream]->time_base) + _pts_offset;
512 VideoFrame const f = rint (pts * _ffmpeg_content->video_frame_rate ());
513 video (image, false, f);
515 shared_ptr<const Film> film = _film.lock ();
517 film->log()->log ("Dropping frame without PTS");
526 FFmpegDecoder::setup_subtitle ()
528 boost::mutex::scoped_lock lm (_mutex);
530 if (!_ffmpeg_content->subtitle_stream()) {
534 _subtitle_codec_context = _ffmpeg_content->subtitle_stream()->stream(_format_context)->codec;
535 if (_subtitle_codec_context == 0) {
536 throw DecodeError (N_("could not find subtitle stream"));
539 _subtitle_codec = avcodec_find_decoder (_subtitle_codec_context->codec_id);
541 if (_subtitle_codec == 0) {
542 throw DecodeError (N_("could not find subtitle decoder"));
545 if (avcodec_open2 (_subtitle_codec_context, _subtitle_codec, 0) < 0) {
546 throw DecodeError (N_("could not open subtitle decoder"));
551 FFmpegDecoder::decode_subtitle_packet ()
555 if (avcodec_decode_subtitle2 (_subtitle_codec_context, &sub, &got_subtitle, &_packet) < 0 || !got_subtitle) {
559 /* Sometimes we get an empty AVSubtitle, which is used by some codecs to
560 indicate that the previous subtitle should stop.
562 if (sub.num_rects <= 0) {
563 image_subtitle (shared_ptr<Image> (), dcpomatic::Rect<double> (), 0, 0);
565 } else if (sub.num_rects > 1) {
566 throw DecodeError (_("multi-part subtitles not yet supported"));
569 /* Subtitle PTS in seconds (within the source, not taking into account any of the
570 source that we may have chopped off for the DCP)
572 double const packet_time = (static_cast<double> (sub.pts) / AV_TIME_BASE) + _pts_offset;
574 /* hence start time for this sub */
575 ContentTime const from = (packet_time + (double (sub.start_display_time) / 1e3)) * TIME_HZ;
576 ContentTime const to = (packet_time + (double (sub.end_display_time) / 1e3)) * TIME_HZ;
578 AVSubtitleRect const * rect = sub.rects[0];
580 if (rect->type != SUBTITLE_BITMAP) {
581 throw DecodeError (_("non-bitmap subtitles not yet supported"));
584 /* Note RGBA is expressed little-endian, so the first byte in the word is R, second
585 G, third B, fourth A.
587 shared_ptr<Image> image (new Image (PIX_FMT_RGBA, dcp::Size (rect->w, rect->h), true));
589 /* Start of the first line in the subtitle */
590 uint8_t* sub_p = rect->pict.data[0];
591 /* sub_p looks up into a BGRA palette which is here
592 (i.e. first byte B, second G, third R, fourth A)
594 uint32_t const * palette = (uint32_t *) rect->pict.data[1];
595 /* Start of the output data */
596 uint32_t* out_p = (uint32_t *) image->data()[0];
598 for (int y = 0; y < rect->h; ++y) {
599 uint8_t* sub_line_p = sub_p;
600 uint32_t* out_line_p = out_p;
601 for (int x = 0; x < rect->w; ++x) {
602 uint32_t const p = palette[*sub_line_p++];
603 *out_line_p++ = ((p & 0xff) << 16) | (p & 0xff00) | ((p & 0xff0000) >> 16) | (p & 0xff000000);
605 sub_p += rect->pict.linesize[0];
606 out_p += image->stride()[0] / sizeof (uint32_t);
609 dcp::Size const vs = _ffmpeg_content->video_size ();
613 dcpomatic::Rect<double> (
614 static_cast<double> (rect->x) / vs.width,
615 static_cast<double> (rect->y) / vs.height,
616 static_cast<double> (rect->w) / vs.width,
617 static_cast<double> (rect->h) / vs.height
624 avsubtitle_free (&sub);