2 Copyright (C) 2012-2018 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
22 /** @file src/ffmpeg_decoder.cc
23 * @brief A decoder using FFmpeg to decode content.
27 #include "audio_buffers.h"
28 #include "audio_content.h"
29 #include "audio_decoder.h"
30 #include "compose.hpp"
31 #include "dcpomatic_log.h"
32 #include "exceptions.h"
33 #include "ffmpeg_audio_stream.h"
34 #include "ffmpeg_content.h"
35 #include "ffmpeg_decoder.h"
36 #include "ffmpeg_subtitle_stream.h"
39 #include "frame_interval_checker.h"
42 #include "raw_image_proxy.h"
43 #include "text_content.h"
44 #include "text_decoder.h"
46 #include "video_decoder.h"
47 #include "video_filter_graph.h"
48 #include <dcp/subtitle_string.h>
49 #include <sub/ssa_reader.h>
50 #include <sub/subtitle.h>
51 #include <sub/collect.h>
53 #include <libavcodec/avcodec.h>
54 #include <libavformat/avformat.h>
56 #include <boost/algorithm/string.hpp>
66 using std::dynamic_pointer_cast;
67 using std::make_shared;
69 using std::shared_ptr;
72 using boost::optional;
74 using namespace dcpomatic;
77 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> film, shared_ptr<const FFmpegContent> c, bool fast)
81 if (c->video && c->video->use()) {
82 video = make_shared<VideoDecoder>(this, c);
83 _pts_offset = pts_offset (c->ffmpeg_audio_streams(), c->first_video(), c->active_video_frame_rate(film));
84 /* It doesn't matter what size or pixel format this is, it just needs to be black */
85 _black_image = make_shared<Image>(AV_PIX_FMT_RGB24, dcp::Size (128, 128), Image::Alignment::PADDED);
86 _black_image->make_black ();
92 audio = make_shared<AudioDecoder>(this, c->audio, fast);
96 text.push_back (make_shared<TextDecoder>(this, c->only_text()));
97 /* XXX: we should be calling maybe_set_position() on this TextDecoder, but we can't easily find
98 * the time of the first subtitle at this point.
102 for (auto i: c->ffmpeg_audio_streams()) {
103 _next_time[i] = boost::optional<dcpomatic::ContentTime>();
109 FFmpegDecoder::flush ()
111 /* Flush video and audio once */
113 bool did_something = false;
115 if (decode_and_process_video_packet(nullptr)) {
116 did_something = true;
120 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
121 auto context = _codec_context[i->index(_format_context)];
122 int r = avcodec_send_packet (context, nullptr);
123 if (r < 0 && r != AVERROR_EOF) {
124 /* EOF can happen if we've already sent a flush packet */
125 throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegDecoder::flush"), r);
127 r = avcodec_receive_frame (context, audio_frame(i));
129 process_audio_frame (i);
130 did_something = true;
135 /* We want to be called again */
139 /* Make sure all streams are the same length and round up to the next video frame */
141 auto const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
142 ContentTime full_length (_ffmpeg_content->full_length(film()), frc);
143 full_length = full_length.ceil (frc.source);
145 double const vfr = _ffmpeg_content->video_frame_rate().get();
146 auto const f = full_length.frames_round (vfr);
147 auto v = video->position(film()).get_value_or(ContentTime()).frames_round(vfr) + 1;
149 video->emit (film(), make_shared<const RawImageProxy>(_black_image), v);
154 for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
155 auto a = audio->stream_position(film(), i);
156 /* Unfortunately if a is 0 that really means that we don't know the stream position since
157 there has been no data on it since the last seek. In this case we'll just do nothing
158 here. I'm not sure if that's the right idea.
160 if (a > ContentTime()) {
161 while (a < full_length) {
162 auto to_do = min (full_length - a, ContentTime::from_seconds (0.1));
163 auto silence = make_shared<AudioBuffers>(i->channels(), to_do.frames_ceil (i->frame_rate()));
164 silence->make_silent ();
165 audio->emit (film(), i, silence, a, true);
180 FFmpegDecoder::pass ()
182 auto packet = av_packet_alloc();
183 DCPOMATIC_ASSERT (packet);
185 int r = av_read_frame (_format_context, packet);
187 /* AVERROR_INVALIDDATA can apparently be returned sometimes even when av_read_frame
188 has pretty-much succeeded (and hence generated data which should be processed).
189 Hence it makes sense to continue here in that case.
191 if (r < 0 && r != AVERROR_INVALIDDATA) {
192 if (r != AVERROR_EOF) {
193 /* Maybe we should fail here, but for now we'll just finish off instead */
195 av_strerror (r, buf, sizeof(buf));
196 LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), &buf[0], r);
199 av_packet_free (&packet);
203 int const si = packet->stream_index;
204 auto fc = _ffmpeg_content;
206 if (_video_stream && si == _video_stream.get() && video && !video->ignore()) {
207 decode_and_process_video_packet (packet);
208 } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index(_format_context, si) && !only_text()->ignore()) {
209 decode_and_process_subtitle_packet (packet);
211 decode_and_process_audio_packet (packet);
214 av_packet_free (&packet);
219 /** @param data pointer to array of pointers to buffers.
220 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
223 shared_ptr<AudioBuffers>
224 deinterleave_audio(shared_ptr<FFmpegAudioStream> stream, AVFrame* frame)
226 auto format = static_cast<AVSampleFormat>(frame->format);
228 /* XXX: can't we use swr_convert() to do the format conversion? */
230 int const channels = frame->channels;
231 int const frames = frame->nb_samples;
232 int const total_samples = frames * channels;
233 auto audio = make_shared<AudioBuffers>(channels, frames);
234 auto data = audio->data();
237 case AV_SAMPLE_FMT_U8:
239 auto p = reinterpret_cast<uint8_t *> (frame->data[0]);
242 for (int i = 0; i < total_samples; ++i) {
243 data[channel][sample] = float(*p++) / (1 << 23);
246 if (channel == channels) {
254 case AV_SAMPLE_FMT_S16:
256 auto p = reinterpret_cast<int16_t *> (frame->data[0]);
259 for (int i = 0; i < total_samples; ++i) {
260 data[channel][sample] = float(*p++) / (1 << 15);
263 if (channel == channels) {
271 case AV_SAMPLE_FMT_S16P:
273 auto p = reinterpret_cast<int16_t **> (frame->data);
274 for (int i = 0; i < channels; ++i) {
275 for (int j = 0; j < frames; ++j) {
276 data[i][j] = static_cast<float>(p[i][j]) / (1 << 15);
282 case AV_SAMPLE_FMT_S32:
284 auto p = reinterpret_cast<int32_t *> (frame->data[0]);
287 for (int i = 0; i < total_samples; ++i) {
288 data[channel][sample] = static_cast<float>(*p++) / 2147483648;
291 if (channel == channels) {
299 case AV_SAMPLE_FMT_S32P:
301 auto p = reinterpret_cast<int32_t **> (frame->data);
302 for (int i = 0; i < channels; ++i) {
303 for (int j = 0; j < frames; ++j) {
304 data[i][j] = static_cast<float>(p[i][j]) / 2147483648;
310 case AV_SAMPLE_FMT_FLT:
312 auto p = reinterpret_cast<float*> (frame->data[0]);
315 for (int i = 0; i < total_samples; ++i) {
316 data[channel][sample] = *p++;
319 if (channel == channels) {
327 case AV_SAMPLE_FMT_FLTP:
329 auto p = reinterpret_cast<float**> (frame->data);
330 DCPOMATIC_ASSERT(channels <= stream->channels());
331 /* Sometimes there aren't as many channels in the frame as in the stream */
332 for (int i = 0; i < channels; ++i) {
333 memcpy (data[i], p[i], frames * sizeof(float));
335 for (int i = channels; i < stream->channels(); ++i) {
336 audio->make_silent (i);
342 throw DecodeError (String::compose(_("Unrecognised audio sample format (%1)"), static_cast<int>(format)));
350 FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
352 return static_cast<AVSampleFormat>(stream->stream(_format_context)->codecpar->format);
357 FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
359 return av_get_bytes_per_sample (audio_sample_format (stream));
364 FFmpegDecoder::seek (ContentTime time, bool accurate)
366 Decoder::seek (time, accurate);
368 /* If we are doing an `accurate' seek, we need to use pre-roll, as
369 we don't really know what the seek will give us.
372 auto pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
375 /* XXX: it seems debatable whether PTS should be used here...
376 http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
379 optional<int> stream;
382 stream = _video_stream;
384 DCPOMATIC_ASSERT (_ffmpeg_content->audio);
385 auto s = dynamic_pointer_cast<FFmpegAudioStream>(_ffmpeg_content->audio->stream());
387 stream = s->index (_format_context);
391 DCPOMATIC_ASSERT (stream);
393 auto u = time - _pts_offset;
394 if (u < ContentTime ()) {
400 u.seconds() / av_q2d (_format_context->streams[stream.get()]->time_base),
404 /* Force re-creation of filter graphs to reset them and hence to make sure
405 they don't have any pre-seek frames knocking about.
407 _filter_graphs.clear();
409 if (video_codec_context ()) {
410 avcodec_flush_buffers (video_codec_context());
413 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
414 avcodec_flush_buffers (_codec_context[i->index(_format_context)]);
417 if (subtitle_codec_context ()) {
418 avcodec_flush_buffers (subtitle_codec_context ());
421 _have_current_subtitle = false;
423 for (auto& i: _next_time) {
424 i.second = boost::optional<dcpomatic::ContentTime>();
429 shared_ptr<FFmpegAudioStream>
430 FFmpegDecoder::audio_stream_from_index (int index) const
432 /* XXX: inefficient */
433 auto streams = ffmpeg_content()->ffmpeg_audio_streams();
434 auto stream = streams.begin();
435 while (stream != streams.end() && !(*stream)->uses_index(_format_context, index)) {
439 if (stream == streams.end ()) {
448 FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
450 auto frame = audio_frame (stream);
451 auto data = deinterleave_audio(stream, frame);
453 auto const time_base = stream->stream(_format_context)->time_base;
456 if (frame->pts == AV_NOPTS_VALUE) {
457 /* In some streams we see not every frame coming through with a timestamp; for those
458 that have AV_NOPTS_VALUE we need to work out the timestamp ourselves. This is
459 particularly noticeable with TrueHD streams (see #1111).
461 if (_next_time[stream]) {
462 ct = *_next_time[stream];
465 ct = ContentTime::from_seconds (
466 frame->best_effort_timestamp *
470 "Process audio with timestamp %1 (BET %2, timebase %3/%4, (PTS offset %5)",
472 frame->best_effort_timestamp,
475 to_string(_pts_offset)
479 _next_time[stream] = ct + ContentTime::from_frames(data->frames(), stream->frame_rate());
481 if (ct < ContentTime()) {
482 /* Discard audio data that comes before time 0 */
483 auto const remove = min (int64_t(data->frames()), (-ct).frames_ceil(double(stream->frame_rate())));
484 data->move (data->frames() - remove, remove, 0);
485 data->set_frames (data->frames() - remove);
486 ct += ContentTime::from_frames (remove, stream->frame_rate());
489 if (ct < ContentTime()) {
491 "Crazy timestamp %1 for %2 samples in stream %3 (ts=%4 tb=%5, off=%6)",
495 frame->best_effort_timestamp,
497 to_string(_pts_offset)
501 /* Give this data provided there is some, and its time is sane */
502 if (ct >= ContentTime() && data->frames() > 0) {
503 audio->emit (film(), stream, data, ct);
509 FFmpegDecoder::decode_and_process_audio_packet (AVPacket* packet)
511 auto stream = audio_stream_from_index (packet->stream_index);
516 auto context = _codec_context[stream->index(_format_context)];
517 auto frame = audio_frame (stream);
519 LOG_DEBUG_PLAYER("Send audio packet on stream %1", stream->index(_format_context));
520 int r = avcodec_send_packet (context, packet);
522 LOG_WARNING("avcodec_send_packet returned %1 for an audio packet", r);
525 r = avcodec_receive_frame (context, frame);
526 if (r == AVERROR(EAGAIN)) {
527 /* More input is required */
528 LOG_DEBUG_PLAYER_NC("EAGAIN after trying to receive audio frame");
532 /* We choose to be relaxed here about other errors; it seems that there may be valid
533 * data to decode even if an error occurred. #352 may be related (though this was
534 * when we were using an old version of the FFmpeg API).
536 process_audio_frame (stream);
542 FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
544 DCPOMATIC_ASSERT (_video_stream);
546 auto context = video_codec_context();
548 bool pending = false;
550 int r = avcodec_send_packet (context, packet);
552 LOG_WARNING("avcodec_send_packet returned %1 for a video packet", r);
555 /* EAGAIN means we should call avcodec_receive_frame and then re-send the same packet */
556 pending = r == AVERROR(EAGAIN);
559 r = avcodec_receive_frame (context, _video_frame);
560 if (r == AVERROR(EAGAIN) || r == AVERROR_EOF || (r < 0 && !packet)) {
561 /* More input is required, no more frames are coming, or we are flushing and there was
562 * some error which we just want to ignore.
566 throw DecodeError (N_("avcodec_receive_frame"), N_("FFmpeg::decode_and_process_video_packet"), r);
569 process_video_frame ();
578 FFmpegDecoder::process_video_frame ()
580 shared_ptr<VideoFilterGraph> graph;
582 auto i = _filter_graphs.begin();
583 while (i != _filter_graphs.end() && !(*i)->can_process(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format)) {
587 if (i == _filter_graphs.end ()) {
588 dcp::Fraction vfr (lrint(_ffmpeg_content->video_frame_rate().get() * 1000), 1000);
589 graph = make_shared<VideoFilterGraph>(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format, vfr);
590 graph->setup (_ffmpeg_content->filters ());
591 _filter_graphs.push_back (graph);
592 LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _video_frame->width, _video_frame->height, _video_frame->format);
597 auto images = graph->process (_video_frame);
599 for (auto const& i: images) {
601 auto image = i.first;
603 if (i.second != AV_NOPTS_VALUE) {
604 double const pts = i.second * av_q2d(_format_context->streams[_video_stream.get()]->time_base) + _pts_offset.seconds();
608 make_shared<RawImageProxy>(image),
609 llrint(pts * _ffmpeg_content->active_video_frame_rate(film()))
612 LOG_WARNING_NC ("Dropping frame without PTS");
619 FFmpegDecoder::decode_and_process_subtitle_packet (AVPacket* packet)
623 if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, packet) < 0 || !got_subtitle) {
627 auto sub_period = subtitle_period (packet, ffmpeg_content()->subtitle_stream()->stream(_format_context), sub);
629 /* Stop any current subtitle, either at the time it was supposed to stop, or now if now is sooner */
630 if (_have_current_subtitle) {
631 if (_current_subtitle_to) {
632 only_text()->emit_stop (min(*_current_subtitle_to, sub_period.from + _pts_offset));
634 only_text()->emit_stop (sub_period.from + _pts_offset);
636 _have_current_subtitle = false;
639 if (sub.num_rects <= 0) {
640 /* Nothing new in this subtitle */
641 avsubtitle_free (&sub);
645 /* Subtitle PTS (within the source, not taking into account any of the
646 source that we may have chopped off for the DCP).
649 from = sub_period.from + _pts_offset;
651 _current_subtitle_to = *sub_period.to + _pts_offset;
653 _current_subtitle_to = optional<ContentTime>();
654 _have_current_subtitle = true;
657 ContentBitmapText bitmap_text(from);
658 for (unsigned int i = 0; i < sub.num_rects; ++i) {
659 auto const rect = sub.rects[i];
661 switch (rect->type) {
664 case SUBTITLE_BITMAP:
665 bitmap_text.subs.push_back(process_bitmap_subtitle(rect));
668 cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
671 process_ass_subtitle (rect->ass, from);
676 if (!bitmap_text.subs.empty()) {
677 only_text()->emit_bitmap_start(bitmap_text);
680 if (_current_subtitle_to) {
681 only_text()->emit_stop (*_current_subtitle_to);
684 avsubtitle_free (&sub);
689 FFmpegDecoder::process_bitmap_subtitle (AVSubtitleRect const * rect)
691 /* Note BGRA is expressed little-endian, so the first byte in the word is B, second
692 G, third R, fourth A.
694 auto image = make_shared<Image>(AV_PIX_FMT_BGRA, dcp::Size (rect->w, rect->h), Image::Alignment::PADDED);
696 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
697 /* Start of the first line in the subtitle */
698 auto sub_p = rect->pict.data[0];
699 /* sub_p looks up into a BGRA palette which is at rect->pict.data[1];
700 (i.e. first byte B, second G, third R, fourth A)
702 auto const palette = rect->pict.data[1];
704 /* Start of the first line in the subtitle */
705 auto sub_p = rect->data[0];
706 /* sub_p looks up into a BGRA palette which is at rect->data[1].
707 (first byte B, second G, third R, fourth A)
709 auto const* palette = rect->data[1];
711 /* And the stream has a map of those palette colours to colours
712 chosen by the user; created a `mapped' palette from those settings.
714 auto colour_map = ffmpeg_content()->subtitle_stream()->colours();
715 vector<RGBA> mapped_palette (rect->nb_colors);
716 for (int i = 0; i < rect->nb_colors; ++i) {
717 RGBA c (palette[2], palette[1], palette[0], palette[3]);
718 auto j = colour_map.find (c);
719 if (j != colour_map.end ()) {
720 mapped_palette[i] = j->second;
722 /* This colour was not found in the FFmpegSubtitleStream's colour map; probably because
723 it is from a project that was created before this stuff was added. Just use the
724 colour straight from the original palette.
726 mapped_palette[i] = c;
731 /* Start of the output data */
732 auto out_p = image->data()[0];
734 for (int y = 0; y < rect->h; ++y) {
735 auto sub_line_p = sub_p;
736 auto out_line_p = out_p;
737 for (int x = 0; x < rect->w; ++x) {
738 auto const p = mapped_palette[*sub_line_p++];
744 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
745 sub_p += rect->pict.linesize[0];
747 sub_p += rect->linesize[0];
749 out_p += image->stride()[0];
752 int target_width = subtitle_codec_context()->width;
753 if (target_width == 0 && video_codec_context()) {
754 /* subtitle_codec_context()->width == 0 has been seen in the wild but I don't
755 know if it's supposed to mean something from FFmpeg's point of view.
757 target_width = video_codec_context()->width;
759 int target_height = subtitle_codec_context()->height;
760 if (target_height == 0 && video_codec_context()) {
761 target_height = video_codec_context()->height;
763 DCPOMATIC_ASSERT (target_width);
764 DCPOMATIC_ASSERT (target_height);
765 dcpomatic::Rect<double> const scaled_rect (
766 static_cast<double>(rect->x) / target_width,
767 static_cast<double>(rect->y) / target_height,
768 static_cast<double>(rect->w) / target_width,
769 static_cast<double>(rect->h) / target_height
772 return { image, scaled_rect };
777 FFmpegDecoder::process_ass_subtitle (string ass, ContentTime from)
779 /* We have no styles and no Format: line, so I'm assuming that FFmpeg
780 produces a single format of Dialogue: lines...
785 for (size_t i = 0; i < ass.length(); ++i) {
786 if (commas < 9 && ass[i] == ',') {
788 } else if (commas == 9) {
797 sub::RawSubtitle base;
798 auto raw = sub::SSAReader::parse_line (
801 _ffmpeg_content->video->size().width,
802 _ffmpeg_content->video->size().height,
806 for (auto const& i: sub::collect<vector<sub::Subtitle>>(raw)) {
807 only_text()->emit_plain_start (from, i);