2 Copyright (C) 2012-2018 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
22 /** @file src/ffmpeg_decoder.cc
23 * @brief A decoder using FFmpeg to decode content.
27 #include "audio_buffers.h"
28 #include "audio_content.h"
29 #include "audio_decoder.h"
30 #include "compose.hpp"
31 #include "dcpomatic_log.h"
32 #include "exceptions.h"
33 #include "ffmpeg_audio_stream.h"
34 #include "ffmpeg_content.h"
35 #include "ffmpeg_decoder.h"
36 #include "ffmpeg_subtitle_stream.h"
39 #include "frame_interval_checker.h"
42 #include "raw_image_proxy.h"
43 #include "text_content.h"
44 #include "text_decoder.h"
46 #include "video_decoder.h"
47 #include "video_filter_graph.h"
48 #include <dcp/subtitle_string.h>
49 #include <sub/ssa_reader.h>
50 #include <sub/subtitle.h>
51 #include <sub/collect.h>
53 #include <libavcodec/avcodec.h>
54 #include <libavformat/avformat.h>
56 #include <boost/algorithm/string.hpp>
66 using std::dynamic_pointer_cast;
67 using std::make_shared;
69 using std::shared_ptr;
72 using boost::optional;
74 using namespace dcpomatic;
77 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> film, shared_ptr<const FFmpegContent> c, bool fast)
80 , _filter_graphs(c->filters(), dcp::Fraction(lrint(_ffmpeg_content->video_frame_rate().get_value_or(24) * 1000), 1000))
82 if (c->video && c->video->use()) {
83 video = make_shared<VideoDecoder>(this, c);
84 _pts_offset = pts_offset (c->ffmpeg_audio_streams(), c->first_video(), c->active_video_frame_rate(film));
85 /* It doesn't matter what size or pixel format this is, it just needs to be black */
86 _black_image = make_shared<Image>(AV_PIX_FMT_RGB24, dcp::Size (128, 128), Image::Alignment::PADDED);
87 _black_image->make_black ();
93 audio = make_shared<AudioDecoder>(this, c->audio, fast);
97 text.push_back (make_shared<TextDecoder>(this, c->only_text()));
98 /* XXX: we should be calling maybe_set_position() on this TextDecoder, but we can't easily find
99 * the time of the first subtitle at this point.
103 for (auto i: c->ffmpeg_audio_streams()) {
104 _next_time[i] = boost::optional<dcpomatic::ContentTime>();
109 FFmpegDecoder::FlushResult
110 FFmpegDecoder::flush ()
112 LOG_DEBUG_PLAYER("Flush FFmpeg decoder: current state %1", static_cast<int>(_flush_state));
114 switch (_flush_state) {
115 case FlushState::CODECS:
116 if (flush_codecs() == FlushResult::DONE) {
117 LOG_DEBUG_PLAYER_NC("Finished flushing codecs");
118 _flush_state = FlushState::AUDIO_DECODER;
121 case FlushState::AUDIO_DECODER:
125 LOG_DEBUG_PLAYER_NC("Finished flushing audio decoder");
126 _flush_state = FlushState::FILL;
128 case FlushState::FILL:
129 if (flush_fill() == FlushResult::DONE) {
130 LOG_DEBUG_PLAYER_NC("Finished flushing fills");
131 return FlushResult::DONE;
136 return FlushResult::AGAIN;
140 /** @return true if we have finished flushing the codecs */
141 FFmpegDecoder::FlushResult
142 FFmpegDecoder::flush_codecs()
144 bool did_something = false;
146 if (decode_and_process_video_packet(nullptr)) {
147 did_something = true;
151 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
152 auto context = _codec_context[i->index(_format_context)];
153 int r = avcodec_send_packet (context, nullptr);
154 if (r < 0 && r != AVERROR_EOF) {
155 /* EOF can happen if we've already sent a flush packet */
156 throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegDecoder::flush"), r);
158 r = avcodec_receive_frame (context, audio_frame(i));
160 process_audio_frame (i);
161 did_something = true;
165 return did_something ? FlushResult::AGAIN : FlushResult::DONE;
169 FFmpegDecoder::FlushResult
170 FFmpegDecoder::flush_fill()
172 /* Make sure all streams are the same length and round up to the next video frame */
174 bool did_something = false;
176 auto const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
177 ContentTime full_length (_ffmpeg_content->full_length(film()), frc);
178 full_length = full_length.ceil (frc.source);
179 if (video && !video->ignore()) {
180 double const vfr = _ffmpeg_content->video_frame_rate().get();
181 auto const v = video->position(film()).get_value_or(ContentTime()) + ContentTime::from_frames(1, vfr);
182 if (v < full_length) {
183 video->emit(film(), make_shared<const RawImageProxy>(_black_image), v);
184 did_something = true;
188 if (audio && !audio->ignore()) {
189 for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
190 auto const a = audio->stream_position(film(), i);
191 /* Unfortunately if a is 0 that really means that we don't know the stream position since
192 there has been no data on it since the last seek. In this case we'll just do nothing
193 here. I'm not sure if that's the right idea.
195 if (a > ContentTime() && a < full_length) {
196 LOG_DEBUG_PLAYER("Flush inserts silence at %1", to_string(a));
197 auto to_do = min (full_length - a, ContentTime::from_seconds (0.1));
198 auto silence = make_shared<AudioBuffers>(i->channels(), to_do.frames_ceil (i->frame_rate()));
199 silence->make_silent ();
200 audio->emit (film(), i, silence, a, true);
201 did_something = true;
206 return did_something ? FlushResult::AGAIN : FlushResult::DONE;
211 FFmpegDecoder::pass ()
213 auto packet = av_packet_alloc();
214 DCPOMATIC_ASSERT (packet);
216 int r = av_read_frame (_format_context, packet);
218 /* AVERROR_INVALIDDATA can apparently be returned sometimes even when av_read_frame
219 has pretty-much succeeded (and hence generated data which should be processed).
220 Hence it makes sense to continue here in that case.
222 if (r < 0 && r != AVERROR_INVALIDDATA) {
223 LOG_DEBUG_PLAYER("FFpmegDecoder::pass flushes because av_read_frame returned %1", r);
224 if (r != AVERROR_EOF) {
225 /* Maybe we should fail here, but for now we'll just finish off instead */
227 av_strerror (r, buf, sizeof(buf));
228 LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), &buf[0], r);
231 av_packet_free (&packet);
232 return flush() == FlushResult::DONE;
235 int const si = packet->stream_index;
236 auto fc = _ffmpeg_content;
238 if (_video_stream && si == _video_stream.get() && video && !video->ignore()) {
239 decode_and_process_video_packet (packet);
240 } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index(_format_context, si) && !only_text()->ignore()) {
241 decode_and_process_subtitle_packet (packet);
243 decode_and_process_audio_packet (packet);
246 av_packet_free (&packet);
251 /** @param data pointer to array of pointers to buffers.
252 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
255 shared_ptr<AudioBuffers>
256 deinterleave_audio(AVFrame* frame)
258 auto format = static_cast<AVSampleFormat>(frame->format);
260 /* XXX: can't we use swr_convert() to do the format conversion? */
262 int const channels = frame->ch_layout.nb_channels;
263 int const frames = frame->nb_samples;
264 int const total_samples = frames * channels;
265 auto audio = make_shared<AudioBuffers>(channels, frames);
266 auto data = audio->data();
273 case AV_SAMPLE_FMT_U8:
275 auto p = reinterpret_cast<uint8_t *> (frame->data[0]);
278 for (int i = 0; i < total_samples; ++i) {
279 data[channel][sample] = float(*p++) / (1 << 23);
282 if (channel == channels) {
290 case AV_SAMPLE_FMT_S16:
292 auto p = reinterpret_cast<int16_t *> (frame->data[0]);
295 for (int i = 0; i < total_samples; ++i) {
296 data[channel][sample] = float(*p++) / (1 << 15);
299 if (channel == channels) {
307 case AV_SAMPLE_FMT_S16P:
309 auto p = reinterpret_cast<int16_t **> (frame->data);
310 for (int i = 0; i < channels; ++i) {
311 for (int j = 0; j < frames; ++j) {
312 data[i][j] = static_cast<float>(p[i][j]) / (1 << 15);
318 case AV_SAMPLE_FMT_S32:
320 auto p = reinterpret_cast<int32_t *> (frame->data[0]);
323 for (int i = 0; i < total_samples; ++i) {
324 data[channel][sample] = static_cast<float>(*p++) / 2147483648;
327 if (channel == channels) {
335 case AV_SAMPLE_FMT_S32P:
337 auto p = reinterpret_cast<int32_t **> (frame->data);
338 for (int i = 0; i < channels; ++i) {
339 for (int j = 0; j < frames; ++j) {
340 data[i][j] = static_cast<float>(p[i][j]) / 2147483648;
346 case AV_SAMPLE_FMT_FLT:
348 auto p = reinterpret_cast<float*> (frame->data[0]);
351 for (int i = 0; i < total_samples; ++i) {
352 data[channel][sample] = *p++;
355 if (channel == channels) {
363 case AV_SAMPLE_FMT_FLTP:
365 auto p = reinterpret_cast<float**> (frame->data);
366 for (int i = 0; i < channels; ++i) {
367 memcpy (data[i], p[i], frames * sizeof(float));
373 throw DecodeError (String::compose(_("Unrecognised audio sample format (%1)"), static_cast<int>(format)));
381 FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
383 return static_cast<AVSampleFormat>(stream->stream(_format_context)->codecpar->format);
388 FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
390 return av_get_bytes_per_sample (audio_sample_format (stream));
395 FFmpegDecoder::seek (ContentTime time, bool accurate)
397 Decoder::seek (time, accurate);
399 /* If we are doing an `accurate' seek, we need to use pre-roll, as
400 we don't really know what the seek will give us.
403 auto pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
406 /* XXX: it seems debatable whether PTS should be used here...
407 http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
410 optional<int> stream;
413 stream = _video_stream;
415 DCPOMATIC_ASSERT (_ffmpeg_content->audio);
416 auto s = dynamic_pointer_cast<FFmpegAudioStream>(_ffmpeg_content->audio->stream());
418 stream = s->index (_format_context);
422 DCPOMATIC_ASSERT (stream);
424 auto u = time - _pts_offset;
425 if (u < ContentTime ()) {
431 u.seconds() / av_q2d (_format_context->streams[stream.get()]->time_base),
435 /* Force re-creation of filter graphs to reset them and hence to make sure
436 they don't have any pre-seek frames knocking about.
438 _filter_graphs.clear();
440 if (video_codec_context ()) {
441 avcodec_flush_buffers (video_codec_context());
444 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
445 avcodec_flush_buffers (_codec_context[i->index(_format_context)]);
448 if (subtitle_codec_context ()) {
449 avcodec_flush_buffers (subtitle_codec_context ());
452 _have_current_subtitle = false;
454 for (auto& i: _next_time) {
455 i.second = boost::optional<dcpomatic::ContentTime>();
460 shared_ptr<FFmpegAudioStream>
461 FFmpegDecoder::audio_stream_from_index (int index) const
463 /* XXX: inefficient */
464 auto streams = ffmpeg_content()->ffmpeg_audio_streams();
465 auto stream = streams.begin();
466 while (stream != streams.end() && !(*stream)->uses_index(_format_context, index)) {
470 if (stream == streams.end ()) {
479 FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
481 auto frame = audio_frame (stream);
482 auto data = deinterleave_audio(frame);
484 auto const time_base = stream->stream(_format_context)->time_base;
487 if (frame->pts == AV_NOPTS_VALUE) {
488 /* In some streams we see not every frame coming through with a timestamp; for those
489 that have AV_NOPTS_VALUE we need to work out the timestamp ourselves. This is
490 particularly noticeable with TrueHD streams (see #1111).
492 if (_next_time[stream]) {
493 ct = *_next_time[stream];
496 ct = ContentTime::from_seconds (
497 frame->best_effort_timestamp *
501 "Process audio with timestamp %1 (BET %2, timebase %3/%4, (PTS offset %5)",
503 frame->best_effort_timestamp,
506 to_string(_pts_offset)
510 _next_time[stream] = ct + ContentTime::from_frames(data->frames(), stream->frame_rate());
512 if (ct < ContentTime()) {
513 /* Discard audio data that comes before time 0 */
514 auto const remove = min (int64_t(data->frames()), (-ct).frames_ceil(double(stream->frame_rate())));
515 data->move (data->frames() - remove, remove, 0);
516 data->set_frames (data->frames() - remove);
517 ct += ContentTime::from_frames (remove, stream->frame_rate());
520 if (ct < ContentTime()) {
522 "Crazy timestamp %1 for %2 samples in stream %3 (ts=%4 tb=%5, off=%6)",
526 frame->best_effort_timestamp,
528 to_string(_pts_offset)
532 /* Give this data provided there is some, and its time is sane */
533 if (ct >= ContentTime() && data->frames() > 0) {
534 audio->emit (film(), stream, data, ct);
540 FFmpegDecoder::decode_and_process_audio_packet (AVPacket* packet)
542 auto stream = audio_stream_from_index (packet->stream_index);
547 auto context = _codec_context[stream->index(_format_context)];
548 auto frame = audio_frame (stream);
550 LOG_DEBUG_PLAYER("Send audio packet on stream %1", stream->index(_format_context));
551 int r = avcodec_send_packet (context, packet);
553 LOG_WARNING("avcodec_send_packet returned %1 for an audio packet", r);
556 r = avcodec_receive_frame (context, frame);
557 if (r == AVERROR(EAGAIN)) {
558 /* More input is required */
559 LOG_DEBUG_PLAYER_NC("EAGAIN after trying to receive audio frame");
563 /* We choose to be relaxed here about other errors; it seems that there may be valid
564 * data to decode even if an error occurred. #352 may be related (though this was
565 * when we were using an old version of the FFmpeg API).
567 process_audio_frame (stream);
573 FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
575 DCPOMATIC_ASSERT (_video_stream);
577 auto context = video_codec_context();
579 bool pending = false;
581 int r = avcodec_send_packet (context, packet);
583 LOG_WARNING("avcodec_send_packet returned %1 for a video packet", r);
586 /* EAGAIN means we should call avcodec_receive_frame and then re-send the same packet */
587 pending = r == AVERROR(EAGAIN);
590 r = avcodec_receive_frame (context, _video_frame);
591 if (r == AVERROR(EAGAIN) || r == AVERROR_EOF || (r < 0 && !packet)) {
592 /* More input is required, no more frames are coming, or we are flushing and there was
593 * some error which we just want to ignore.
597 throw DecodeError (N_("avcodec_receive_frame"), N_("FFmpeg::decode_and_process_video_packet"), r);
600 process_video_frame ();
609 FFmpegDecoder::process_video_frame ()
611 auto graph = _filter_graphs.get(dcp::Size(_video_frame->width, _video_frame->height), static_cast<AVPixelFormat>(_video_frame->format));
612 auto images = graph->process (_video_frame);
614 for (auto const& i: images) {
616 auto image = i.first;
618 if (i.second != AV_NOPTS_VALUE) {
619 double const pts = i.second * av_q2d(_format_context->streams[_video_stream.get()]->time_base) + _pts_offset.seconds();
623 make_shared<RawImageProxy>(image),
624 ContentTime::from_seconds(pts)
627 LOG_WARNING_NC ("Dropping frame without PTS");
634 FFmpegDecoder::decode_and_process_subtitle_packet (AVPacket* packet)
636 auto context = subtitle_codec_context();
643 if (avcodec_decode_subtitle2(context, &sub, &got_subtitle, packet) < 0 || !got_subtitle) {
647 auto sub_period = subtitle_period (packet, ffmpeg_content()->subtitle_stream()->stream(_format_context), sub);
649 /* Stop any current subtitle, either at the time it was supposed to stop, or now if now is sooner */
650 if (_have_current_subtitle) {
651 if (_current_subtitle_to) {
652 only_text()->emit_stop (min(*_current_subtitle_to, sub_period.from + _pts_offset));
654 only_text()->emit_stop (sub_period.from + _pts_offset);
656 _have_current_subtitle = false;
659 if (sub.num_rects <= 0) {
660 /* Nothing new in this subtitle */
661 avsubtitle_free (&sub);
665 /* Subtitle PTS (within the source, not taking into account any of the
666 source that we may have chopped off for the DCP).
669 from = sub_period.from + _pts_offset;
671 _current_subtitle_to = *sub_period.to + _pts_offset;
673 _current_subtitle_to = optional<ContentTime>();
674 _have_current_subtitle = true;
677 ContentBitmapText bitmap_text(from);
678 for (unsigned int i = 0; i < sub.num_rects; ++i) {
679 auto const rect = sub.rects[i];
681 switch (rect->type) {
684 case SUBTITLE_BITMAP:
685 bitmap_text.subs.push_back(process_bitmap_subtitle(rect));
688 cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
691 process_ass_subtitle (rect->ass, from);
696 if (!bitmap_text.subs.empty()) {
697 only_text()->emit_bitmap_start(bitmap_text);
700 if (_current_subtitle_to) {
701 only_text()->emit_stop (*_current_subtitle_to);
704 avsubtitle_free (&sub);
709 FFmpegDecoder::process_bitmap_subtitle (AVSubtitleRect const * rect)
711 /* Note BGRA is expressed little-endian, so the first byte in the word is B, second
712 G, third R, fourth A.
714 auto image = make_shared<Image>(AV_PIX_FMT_BGRA, dcp::Size (rect->w, rect->h), Image::Alignment::PADDED);
716 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
717 /* Start of the first line in the subtitle */
718 auto sub_p = rect->pict.data[0];
719 /* sub_p looks up into a BGRA palette which is at rect->pict.data[1];
720 (i.e. first byte B, second G, third R, fourth A)
722 auto const* palette = rect->pict.data[1];
724 /* Start of the first line in the subtitle */
725 auto sub_p = rect->data[0];
726 /* sub_p looks up into a BGRA palette which is at rect->data[1].
727 (first byte B, second G, third R, fourth A)
729 auto const* palette = rect->data[1];
731 /* And the stream has a map of those palette colours to colours
732 chosen by the user; created a `mapped' palette from those settings.
734 auto colour_map = ffmpeg_content()->subtitle_stream()->colours();
735 vector<RGBA> mapped_palette (rect->nb_colors);
736 for (int i = 0; i < rect->nb_colors; ++i) {
737 RGBA c (palette[2], palette[1], palette[0], palette[3]);
738 auto j = colour_map.find (c);
739 if (j != colour_map.end ()) {
740 mapped_palette[i] = j->second;
742 /* This colour was not found in the FFmpegSubtitleStream's colour map; probably because
743 it is from a project that was created before this stuff was added. Just use the
744 colour straight from the original palette.
746 mapped_palette[i] = c;
751 /* Start of the output data */
752 auto out_p = image->data()[0];
754 for (int y = 0; y < rect->h; ++y) {
755 auto sub_line_p = sub_p;
756 auto out_line_p = out_p;
757 for (int x = 0; x < rect->w; ++x) {
758 auto const p = mapped_palette[*sub_line_p++];
764 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
765 sub_p += rect->pict.linesize[0];
767 sub_p += rect->linesize[0];
769 out_p += image->stride()[0];
772 int target_width = subtitle_codec_context()->width;
773 if (target_width == 0 && video_codec_context()) {
774 /* subtitle_codec_context()->width == 0 has been seen in the wild but I don't
775 know if it's supposed to mean something from FFmpeg's point of view.
777 target_width = video_codec_context()->width;
779 int target_height = subtitle_codec_context()->height;
780 if (target_height == 0 && video_codec_context()) {
781 target_height = video_codec_context()->height;
786 if (_ffmpeg_content->video && _ffmpeg_content->video->use()) {
787 auto const crop = _ffmpeg_content->video->actual_crop();
788 target_width -= crop.left + crop.right;
789 target_height -= crop.top + crop.bottom;
790 x_offset = -crop.left;
791 y_offset = -crop.top;
794 DCPOMATIC_ASSERT(target_width > 0);
795 DCPOMATIC_ASSERT(target_height > 0);
797 dcpomatic::Rect<double> const scaled_rect (
798 static_cast<double>(rect->x + x_offset) / target_width,
799 static_cast<double>(rect->y + y_offset) / target_height,
800 static_cast<double>(rect->w) / target_width,
801 static_cast<double>(rect->h) / target_height
804 return { image, scaled_rect };
809 FFmpegDecoder::process_ass_subtitle (string ass, ContentTime from)
811 /* We have no styles and no Format: line, so I'm assuming that FFmpeg
812 produces a single format of Dialogue: lines...
817 for (size_t i = 0; i < ass.length(); ++i) {
818 if (commas < 9 && ass[i] == ',') {
820 } else if (commas == 9) {
829 sub::RawSubtitle base;
830 auto video_size = _ffmpeg_content->video->size();
831 DCPOMATIC_ASSERT(video_size);
833 auto raw = sub::SSAReader::parse_line (
841 for (auto const& i: sub::collect<vector<sub::Subtitle>>(raw)) {
842 only_text()->emit_plain_start (from, i);