2 Copyright (C) 2012-2018 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
22 /** @file src/ffmpeg_decoder.cc
23 * @brief A decoder using FFmpeg to decode content.
27 #include "audio_buffers.h"
28 #include "audio_content.h"
29 #include "audio_decoder.h"
30 #include "compose.hpp"
31 #include "dcpomatic_log.h"
32 #include "exceptions.h"
33 #include "ffmpeg_audio_stream.h"
34 #include "ffmpeg_content.h"
35 #include "ffmpeg_decoder.h"
36 #include "ffmpeg_subtitle_stream.h"
39 #include "frame_interval_checker.h"
42 #include "raw_image_proxy.h"
43 #include "text_content.h"
44 #include "text_decoder.h"
46 #include "video_decoder.h"
47 #include "video_filter_graph.h"
48 #include <dcp/subtitle_string.h>
49 #include <sub/ssa_reader.h>
50 #include <sub/subtitle.h>
51 #include <sub/collect.h>
53 #include <libavcodec/avcodec.h>
54 #include <libavformat/avformat.h>
56 #include <boost/algorithm/string.hpp>
66 using std::dynamic_pointer_cast;
67 using std::make_shared;
69 using std::shared_ptr;
72 using boost::optional;
74 using namespace dcpomatic;
77 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> film, shared_ptr<const FFmpegContent> c, bool fast)
80 , _filter_graphs(c->filters(), dcp::Fraction(lrint(_ffmpeg_content->video_frame_rate().get_value_or(24) * 1000), 1000))
82 if (c->video && c->video->use()) {
83 video = make_shared<VideoDecoder>(this, c);
84 _pts_offset = pts_offset (c->ffmpeg_audio_streams(), c->first_video(), c->active_video_frame_rate(film));
85 /* It doesn't matter what size or pixel format this is, it just needs to be black */
86 _black_image = make_shared<Image>(AV_PIX_FMT_RGB24, dcp::Size (128, 128), Image::Alignment::PADDED);
87 _black_image->make_black ();
93 audio = make_shared<AudioDecoder>(this, c->audio, fast);
97 text.push_back (make_shared<TextDecoder>(this, c->only_text()));
98 /* XXX: we should be calling maybe_set_position() on this TextDecoder, but we can't easily find
99 * the time of the first subtitle at this point.
103 for (auto i: c->ffmpeg_audio_streams()) {
104 _next_time[i] = boost::optional<dcpomatic::ContentTime>();
109 FFmpegDecoder::FlushResult
110 FFmpegDecoder::flush ()
112 LOG_DEBUG_PLAYER("Flush FFmpeg decoder: current state %1", static_cast<int>(_flush_state));
114 switch (_flush_state) {
115 case FlushState::CODECS:
116 if (flush_codecs() == FlushResult::DONE) {
117 LOG_DEBUG_PLAYER_NC("Finished flushing codecs");
118 _flush_state = FlushState::AUDIO_DECODER;
121 case FlushState::AUDIO_DECODER:
125 LOG_DEBUG_PLAYER_NC("Finished flushing audio decoder");
126 _flush_state = FlushState::FILL;
128 case FlushState::FILL:
129 if (flush_fill() == FlushResult::DONE) {
130 LOG_DEBUG_PLAYER_NC("Finished flushing fills");
131 return FlushResult::DONE;
136 return FlushResult::AGAIN;
140 /** @return true if we have finished flushing the codecs */
141 FFmpegDecoder::FlushResult
142 FFmpegDecoder::flush_codecs()
144 bool did_something = false;
146 if (decode_and_process_video_packet(nullptr)) {
147 did_something = true;
151 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
152 auto context = _codec_context[i->index(_format_context)];
153 int r = avcodec_send_packet (context, nullptr);
154 if (r < 0 && r != AVERROR_EOF) {
155 /* EOF can happen if we've already sent a flush packet */
156 throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegDecoder::flush"), r);
158 r = avcodec_receive_frame (context, audio_frame(i));
160 process_audio_frame (i);
161 did_something = true;
165 return did_something ? FlushResult::AGAIN : FlushResult::DONE;
169 FFmpegDecoder::FlushResult
170 FFmpegDecoder::flush_fill()
172 /* Make sure all streams are the same length and round up to the next video frame */
174 bool did_something = false;
176 auto const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
177 ContentTime full_length (_ffmpeg_content->full_length(film()), frc);
178 full_length = full_length.ceil (frc.source);
179 if (video && !video->ignore()) {
180 double const vfr = _ffmpeg_content->video_frame_rate().get();
181 auto const f = full_length.frames_round (vfr);
182 auto const v = video->position(film()).get_value_or(ContentTime()).frames_round(vfr) + 1;
184 video->emit(film(), make_shared<const RawImageProxy>(_black_image), v);
185 did_something = true;
189 if (audio && !audio->ignore()) {
190 for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
191 auto const a = audio->stream_position(film(), i);
192 /* Unfortunately if a is 0 that really means that we don't know the stream position since
193 there has been no data on it since the last seek. In this case we'll just do nothing
194 here. I'm not sure if that's the right idea.
196 if (a > ContentTime() && a < full_length) {
197 LOG_DEBUG_PLAYER("Flush inserts silence at %1", to_string(a));
198 auto to_do = min (full_length - a, ContentTime::from_seconds (0.1));
199 auto silence = make_shared<AudioBuffers>(i->channels(), to_do.frames_ceil (i->frame_rate()));
200 silence->make_silent ();
201 audio->emit (film(), i, silence, a, true);
202 did_something = true;
207 return did_something ? FlushResult::AGAIN : FlushResult::DONE;
212 FFmpegDecoder::pass ()
214 auto packet = av_packet_alloc();
215 DCPOMATIC_ASSERT (packet);
217 int r = av_read_frame (_format_context, packet);
219 /* AVERROR_INVALIDDATA can apparently be returned sometimes even when av_read_frame
220 has pretty-much succeeded (and hence generated data which should be processed).
221 Hence it makes sense to continue here in that case.
223 if (r < 0 && r != AVERROR_INVALIDDATA) {
224 LOG_DEBUG_PLAYER("FFpmegDecoder::pass flushes because av_read_frame returned %1", r);
225 if (r != AVERROR_EOF) {
226 /* Maybe we should fail here, but for now we'll just finish off instead */
228 av_strerror (r, buf, sizeof(buf));
229 LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), &buf[0], r);
232 av_packet_free (&packet);
233 return flush() == FlushResult::DONE;
236 int const si = packet->stream_index;
237 auto fc = _ffmpeg_content;
239 if (_video_stream && si == _video_stream.get() && video && !video->ignore()) {
240 decode_and_process_video_packet (packet);
241 } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index(_format_context, si) && !only_text()->ignore()) {
242 decode_and_process_subtitle_packet (packet);
244 decode_and_process_audio_packet (packet);
247 av_packet_free (&packet);
252 /** @param data pointer to array of pointers to buffers.
253 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
256 shared_ptr<AudioBuffers>
257 deinterleave_audio(shared_ptr<FFmpegAudioStream> stream, AVFrame* frame)
259 auto format = static_cast<AVSampleFormat>(frame->format);
261 /* XXX: can't we use swr_convert() to do the format conversion? */
263 int const channels = frame->channels;
264 int const frames = frame->nb_samples;
265 int const total_samples = frames * channels;
266 auto audio = make_shared<AudioBuffers>(channels, frames);
267 auto data = audio->data();
270 case AV_SAMPLE_FMT_U8:
272 auto p = reinterpret_cast<uint8_t *> (frame->data[0]);
275 for (int i = 0; i < total_samples; ++i) {
276 data[channel][sample] = float(*p++) / (1 << 23);
279 if (channel == channels) {
287 case AV_SAMPLE_FMT_S16:
289 auto p = reinterpret_cast<int16_t *> (frame->data[0]);
292 for (int i = 0; i < total_samples; ++i) {
293 data[channel][sample] = float(*p++) / (1 << 15);
296 if (channel == channels) {
304 case AV_SAMPLE_FMT_S16P:
306 auto p = reinterpret_cast<int16_t **> (frame->data);
307 for (int i = 0; i < channels; ++i) {
308 for (int j = 0; j < frames; ++j) {
309 data[i][j] = static_cast<float>(p[i][j]) / (1 << 15);
315 case AV_SAMPLE_FMT_S32:
317 auto p = reinterpret_cast<int32_t *> (frame->data[0]);
320 for (int i = 0; i < total_samples; ++i) {
321 data[channel][sample] = static_cast<float>(*p++) / 2147483648;
324 if (channel == channels) {
332 case AV_SAMPLE_FMT_S32P:
334 auto p = reinterpret_cast<int32_t **> (frame->data);
335 for (int i = 0; i < channels; ++i) {
336 for (int j = 0; j < frames; ++j) {
337 data[i][j] = static_cast<float>(p[i][j]) / 2147483648;
343 case AV_SAMPLE_FMT_FLT:
345 auto p = reinterpret_cast<float*> (frame->data[0]);
348 for (int i = 0; i < total_samples; ++i) {
349 data[channel][sample] = *p++;
352 if (channel == channels) {
360 case AV_SAMPLE_FMT_FLTP:
362 auto p = reinterpret_cast<float**> (frame->data);
363 DCPOMATIC_ASSERT(channels <= stream->channels());
364 /* Sometimes there aren't as many channels in the frame as in the stream */
365 for (int i = 0; i < channels; ++i) {
366 memcpy (data[i], p[i], frames * sizeof(float));
368 for (int i = channels; i < stream->channels(); ++i) {
369 audio->make_silent (i);
375 throw DecodeError (String::compose(_("Unrecognised audio sample format (%1)"), static_cast<int>(format)));
383 FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
385 return static_cast<AVSampleFormat>(stream->stream(_format_context)->codecpar->format);
390 FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
392 return av_get_bytes_per_sample (audio_sample_format (stream));
397 FFmpegDecoder::seek (ContentTime time, bool accurate)
399 Decoder::seek (time, accurate);
401 /* If we are doing an `accurate' seek, we need to use pre-roll, as
402 we don't really know what the seek will give us.
405 auto pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
408 /* XXX: it seems debatable whether PTS should be used here...
409 http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
412 optional<int> stream;
415 stream = _video_stream;
417 DCPOMATIC_ASSERT (_ffmpeg_content->audio);
418 auto s = dynamic_pointer_cast<FFmpegAudioStream>(_ffmpeg_content->audio->stream());
420 stream = s->index (_format_context);
424 DCPOMATIC_ASSERT (stream);
426 auto u = time - _pts_offset;
427 if (u < ContentTime ()) {
433 u.seconds() / av_q2d (_format_context->streams[stream.get()]->time_base),
437 /* Force re-creation of filter graphs to reset them and hence to make sure
438 they don't have any pre-seek frames knocking about.
440 _filter_graphs.clear();
442 if (video_codec_context ()) {
443 avcodec_flush_buffers (video_codec_context());
446 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
447 avcodec_flush_buffers (_codec_context[i->index(_format_context)]);
450 if (subtitle_codec_context ()) {
451 avcodec_flush_buffers (subtitle_codec_context ());
454 _have_current_subtitle = false;
456 for (auto& i: _next_time) {
457 i.second = boost::optional<dcpomatic::ContentTime>();
462 shared_ptr<FFmpegAudioStream>
463 FFmpegDecoder::audio_stream_from_index (int index) const
465 /* XXX: inefficient */
466 auto streams = ffmpeg_content()->ffmpeg_audio_streams();
467 auto stream = streams.begin();
468 while (stream != streams.end() && !(*stream)->uses_index(_format_context, index)) {
472 if (stream == streams.end ()) {
481 FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
483 auto frame = audio_frame (stream);
484 auto data = deinterleave_audio(stream, frame);
486 auto const time_base = stream->stream(_format_context)->time_base;
489 if (frame->pts == AV_NOPTS_VALUE) {
490 /* In some streams we see not every frame coming through with a timestamp; for those
491 that have AV_NOPTS_VALUE we need to work out the timestamp ourselves. This is
492 particularly noticeable with TrueHD streams (see #1111).
494 if (_next_time[stream]) {
495 ct = *_next_time[stream];
498 ct = ContentTime::from_seconds (
499 frame->best_effort_timestamp *
503 "Process audio with timestamp %1 (BET %2, timebase %3/%4, (PTS offset %5)",
505 frame->best_effort_timestamp,
508 to_string(_pts_offset)
512 _next_time[stream] = ct + ContentTime::from_frames(data->frames(), stream->frame_rate());
514 if (ct < ContentTime()) {
515 /* Discard audio data that comes before time 0 */
516 auto const remove = min (int64_t(data->frames()), (-ct).frames_ceil(double(stream->frame_rate())));
517 data->move (data->frames() - remove, remove, 0);
518 data->set_frames (data->frames() - remove);
519 ct += ContentTime::from_frames (remove, stream->frame_rate());
522 if (ct < ContentTime()) {
524 "Crazy timestamp %1 for %2 samples in stream %3 (ts=%4 tb=%5, off=%6)",
528 frame->best_effort_timestamp,
530 to_string(_pts_offset)
534 /* Give this data provided there is some, and its time is sane */
535 if (ct >= ContentTime() && data->frames() > 0) {
536 audio->emit (film(), stream, data, ct);
542 FFmpegDecoder::decode_and_process_audio_packet (AVPacket* packet)
544 auto stream = audio_stream_from_index (packet->stream_index);
549 auto context = _codec_context[stream->index(_format_context)];
550 auto frame = audio_frame (stream);
552 LOG_DEBUG_PLAYER("Send audio packet on stream %1", stream->index(_format_context));
553 int r = avcodec_send_packet (context, packet);
555 LOG_WARNING("avcodec_send_packet returned %1 for an audio packet", r);
558 r = avcodec_receive_frame (context, frame);
559 if (r == AVERROR(EAGAIN)) {
560 /* More input is required */
561 LOG_DEBUG_PLAYER_NC("EAGAIN after trying to receive audio frame");
565 /* We choose to be relaxed here about other errors; it seems that there may be valid
566 * data to decode even if an error occurred. #352 may be related (though this was
567 * when we were using an old version of the FFmpeg API).
569 process_audio_frame (stream);
575 FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
577 DCPOMATIC_ASSERT (_video_stream);
579 auto context = video_codec_context();
581 bool pending = false;
583 int r = avcodec_send_packet (context, packet);
585 LOG_WARNING("avcodec_send_packet returned %1 for a video packet", r);
588 /* EAGAIN means we should call avcodec_receive_frame and then re-send the same packet */
589 pending = r == AVERROR(EAGAIN);
592 r = avcodec_receive_frame (context, _video_frame);
593 if (r == AVERROR(EAGAIN) || r == AVERROR_EOF || (r < 0 && !packet)) {
594 /* More input is required, no more frames are coming, or we are flushing and there was
595 * some error which we just want to ignore.
599 throw DecodeError (N_("avcodec_receive_frame"), N_("FFmpeg::decode_and_process_video_packet"), r);
602 process_video_frame ();
611 FFmpegDecoder::process_video_frame ()
613 auto graph = _filter_graphs.get(dcp::Size(_video_frame->width, _video_frame->height), static_cast<AVPixelFormat>(_video_frame->format));
614 auto images = graph->process (_video_frame);
616 for (auto const& i: images) {
618 auto image = i.first;
620 if (i.second != AV_NOPTS_VALUE) {
621 double const pts = i.second * av_q2d(_format_context->streams[_video_stream.get()]->time_base) + _pts_offset.seconds();
625 make_shared<RawImageProxy>(image),
626 llrint(pts * _ffmpeg_content->active_video_frame_rate(film()))
629 LOG_WARNING_NC ("Dropping frame without PTS");
636 FFmpegDecoder::decode_and_process_subtitle_packet (AVPacket* packet)
640 if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, packet) < 0 || !got_subtitle) {
644 auto sub_period = subtitle_period (packet, ffmpeg_content()->subtitle_stream()->stream(_format_context), sub);
646 /* Stop any current subtitle, either at the time it was supposed to stop, or now if now is sooner */
647 if (_have_current_subtitle) {
648 if (_current_subtitle_to) {
649 only_text()->emit_stop (min(*_current_subtitle_to, sub_period.from + _pts_offset));
651 only_text()->emit_stop (sub_period.from + _pts_offset);
653 _have_current_subtitle = false;
656 if (sub.num_rects <= 0) {
657 /* Nothing new in this subtitle */
658 avsubtitle_free (&sub);
662 /* Subtitle PTS (within the source, not taking into account any of the
663 source that we may have chopped off for the DCP).
666 from = sub_period.from + _pts_offset;
668 _current_subtitle_to = *sub_period.to + _pts_offset;
670 _current_subtitle_to = optional<ContentTime>();
671 _have_current_subtitle = true;
674 ContentBitmapText bitmap_text(from);
675 for (unsigned int i = 0; i < sub.num_rects; ++i) {
676 auto const rect = sub.rects[i];
678 switch (rect->type) {
681 case SUBTITLE_BITMAP:
682 bitmap_text.subs.push_back(process_bitmap_subtitle(rect));
685 cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
688 process_ass_subtitle (rect->ass, from);
693 if (!bitmap_text.subs.empty()) {
694 only_text()->emit_bitmap_start(bitmap_text);
697 if (_current_subtitle_to) {
698 only_text()->emit_stop (*_current_subtitle_to);
701 avsubtitle_free (&sub);
706 FFmpegDecoder::process_bitmap_subtitle (AVSubtitleRect const * rect)
708 /* Note BGRA is expressed little-endian, so the first byte in the word is B, second
709 G, third R, fourth A.
711 auto image = make_shared<Image>(AV_PIX_FMT_BGRA, dcp::Size (rect->w, rect->h), Image::Alignment::PADDED);
713 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
714 /* Start of the first line in the subtitle */
715 auto sub_p = rect->pict.data[0];
716 /* sub_p looks up into a BGRA palette which is at rect->pict.data[1];
717 (i.e. first byte B, second G, third R, fourth A)
719 auto const palette = rect->pict.data[1];
721 /* Start of the first line in the subtitle */
722 auto sub_p = rect->data[0];
723 /* sub_p looks up into a BGRA palette which is at rect->data[1].
724 (first byte B, second G, third R, fourth A)
726 auto const* palette = rect->data[1];
728 /* And the stream has a map of those palette colours to colours
729 chosen by the user; created a `mapped' palette from those settings.
731 auto colour_map = ffmpeg_content()->subtitle_stream()->colours();
732 vector<RGBA> mapped_palette (rect->nb_colors);
733 for (int i = 0; i < rect->nb_colors; ++i) {
734 RGBA c (palette[2], palette[1], palette[0], palette[3]);
735 auto j = colour_map.find (c);
736 if (j != colour_map.end ()) {
737 mapped_palette[i] = j->second;
739 /* This colour was not found in the FFmpegSubtitleStream's colour map; probably because
740 it is from a project that was created before this stuff was added. Just use the
741 colour straight from the original palette.
743 mapped_palette[i] = c;
748 /* Start of the output data */
749 auto out_p = image->data()[0];
751 for (int y = 0; y < rect->h; ++y) {
752 auto sub_line_p = sub_p;
753 auto out_line_p = out_p;
754 for (int x = 0; x < rect->w; ++x) {
755 auto const p = mapped_palette[*sub_line_p++];
761 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
762 sub_p += rect->pict.linesize[0];
764 sub_p += rect->linesize[0];
766 out_p += image->stride()[0];
769 int target_width = subtitle_codec_context()->width;
770 if (target_width == 0 && video_codec_context()) {
771 /* subtitle_codec_context()->width == 0 has been seen in the wild but I don't
772 know if it's supposed to mean something from FFmpeg's point of view.
774 target_width = video_codec_context()->width;
776 int target_height = subtitle_codec_context()->height;
777 if (target_height == 0 && video_codec_context()) {
778 target_height = video_codec_context()->height;
780 DCPOMATIC_ASSERT (target_width);
781 DCPOMATIC_ASSERT (target_height);
782 dcpomatic::Rect<double> const scaled_rect (
783 static_cast<double>(rect->x) / target_width,
784 static_cast<double>(rect->y) / target_height,
785 static_cast<double>(rect->w) / target_width,
786 static_cast<double>(rect->h) / target_height
789 return { image, scaled_rect };
794 FFmpegDecoder::process_ass_subtitle (string ass, ContentTime from)
796 /* We have no styles and no Format: line, so I'm assuming that FFmpeg
797 produces a single format of Dialogue: lines...
802 for (size_t i = 0; i < ass.length(); ++i) {
803 if (commas < 9 && ass[i] == ',') {
805 } else if (commas == 9) {
814 sub::RawSubtitle base;
815 auto raw = sub::SSAReader::parse_line (
818 _ffmpeg_content->video->size().width,
819 _ffmpeg_content->video->size().height,
823 for (auto const& i: sub::collect<vector<sub::Subtitle>>(raw)) {
824 only_text()->emit_plain_start (from, i);