2 Copyright (C) 2012-2018 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
21 /** @file src/ffmpeg_decoder.cc
22 * @brief A decoder using FFmpeg to decode content.
26 #include "exceptions.h"
30 #include "dcpomatic_log.h"
31 #include "ffmpeg_decoder.h"
32 #include "text_decoder.h"
33 #include "ffmpeg_audio_stream.h"
34 #include "ffmpeg_subtitle_stream.h"
35 #include "video_filter_graph.h"
36 #include "audio_buffers.h"
37 #include "ffmpeg_content.h"
38 #include "raw_image_proxy.h"
39 #include "video_decoder.h"
41 #include "audio_decoder.h"
42 #include "compose.hpp"
43 #include "text_content.h"
44 #include "audio_content.h"
45 #include "frame_interval_checker.h"
46 #include <dcp/subtitle_string.h>
47 #include <sub/ssa_reader.h>
48 #include <sub/subtitle.h>
49 #include <sub/collect.h>
51 #include <libavcodec/avcodec.h>
52 #include <libavformat/avformat.h>
54 #include <boost/algorithm/string.hpp>
70 using std::shared_ptr;
71 using std::make_shared;
73 using boost::is_any_of;
75 using boost::optional;
76 using std::dynamic_pointer_cast;
78 using namespace dcpomatic;
81 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> film, shared_ptr<const FFmpegContent> c, bool fast)
84 , _have_current_subtitle (false)
86 if (c->video && c->video->use()) {
87 video = make_shared<VideoDecoder>(this, c);
88 _pts_offset = pts_offset (c->ffmpeg_audio_streams(), c->first_video(), c->active_video_frame_rate(film));
89 /* It doesn't matter what size or pixel format this is, it just needs to be black */
90 _black_image.reset (new Image (AV_PIX_FMT_RGB24, dcp::Size (128, 128), true));
91 _black_image->make_black ();
97 audio = make_shared<AudioDecoder>(this, c->audio, fast);
100 if (c->only_text()) {
101 /* XXX: this time here should be the time of the first subtitle, not 0 */
102 text.push_back (make_shared<TextDecoder>(this, c->only_text(), ContentTime()));
105 for (auto i: c->ffmpeg_audio_streams()) {
106 _next_time[i] = boost::optional<dcpomatic::ContentTime>();
112 FFmpegDecoder::flush ()
114 /* Flush video and audio once */
116 bool did_something = false;
119 av_init_packet (&packet);
120 packet.data = nullptr;
122 if (decode_and_process_video_packet(&packet)) {
123 did_something = true;
127 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
129 av_init_packet (&packet);
130 packet.data = nullptr;
132 auto result = decode_audio_packet (i, &packet);
134 process_audio_frame (i);
135 did_something = true;
140 /* We want to be called again */
144 /* Make sure all streams are the same length and round up to the next video frame */
146 auto const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
147 ContentTime full_length (_ffmpeg_content->full_length(film()), frc);
148 full_length = full_length.ceil (frc.source);
150 double const vfr = _ffmpeg_content->video_frame_rate().get();
151 auto const f = full_length.frames_round (vfr);
152 auto v = video->position(film()).get_value_or(ContentTime()).frames_round(vfr) + 1;
154 video->emit (film(), shared_ptr<const ImageProxy> (new RawImageProxy (_black_image)), v);
159 for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
160 auto a = audio->stream_position(film(), i);
161 /* Unfortunately if a is 0 that really means that we don't know the stream position since
162 there has been no data on it since the last seek. In this case we'll just do nothing
163 here. I'm not sure if that's the right idea.
165 if (a > ContentTime()) {
166 while (a < full_length) {
167 auto to_do = min (full_length - a, ContentTime::from_seconds (0.1));
168 auto silence = make_shared<AudioBuffers>(i->channels(), to_do.frames_ceil (i->frame_rate()));
169 silence->make_silent ();
170 audio->emit (film(), i, silence, a, true);
185 FFmpegDecoder::pass ()
187 auto packet = av_packet_alloc();
188 DCPOMATIC_ASSERT (packet);
190 int r = av_read_frame (_format_context, packet);
192 /* AVERROR_INVALIDDATA can apparently be returned sometimes even when av_read_frame
193 has pretty-much succeeded (and hence generated data which should be processed).
194 Hence it makes sense to continue here in that case.
196 if (r < 0 && r != AVERROR_INVALIDDATA) {
197 if (r != AVERROR_EOF) {
198 /* Maybe we should fail here, but for now we'll just finish off instead */
200 av_strerror (r, buf, sizeof(buf));
201 LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), &buf[0], r);
204 av_packet_free (&packet);
208 int const si = packet->stream_index;
209 auto fc = _ffmpeg_content;
211 if (_video_stream && si == _video_stream.get() && video && !video->ignore()) {
212 decode_and_process_video_packet (packet);
213 } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index(_format_context, si) && !only_text()->ignore()) {
214 decode_and_process_subtitle_packet (packet);
216 decode_and_process_audio_packet (packet);
219 av_packet_free (&packet);
224 /** @param data pointer to array of pointers to buffers.
225 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
227 shared_ptr<AudioBuffers>
228 FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream) const
230 DCPOMATIC_ASSERT (bytes_per_audio_sample (stream));
232 DCPOMATIC_DISABLE_WARNINGS
233 int const size = av_samples_get_buffer_size (
234 0, stream->stream(_format_context)->codec->channels, _frame->nb_samples, audio_sample_format (stream), 1
236 DCPOMATIC_ENABLE_WARNINGS
238 /* XXX: can't we just use _frame->nb_samples directly here? */
239 /* XXX: can't we use swr_convert() to do the format conversion? */
241 /* Deinterleave and convert to float */
243 /* total_samples and frames will be rounded down here, so if there are stray samples at the end
244 of the block that do not form a complete sample or frame they will be dropped.
246 int const total_samples = size / bytes_per_audio_sample (stream);
247 int const channels = stream->channels();
248 int const frames = total_samples / channels;
249 auto audio = make_shared<AudioBuffers>(channels, frames);
250 auto data = audio->data();
252 switch (audio_sample_format (stream)) {
253 case AV_SAMPLE_FMT_U8:
255 uint8_t* p = reinterpret_cast<uint8_t *> (_frame->data[0]);
258 for (int i = 0; i < total_samples; ++i) {
259 data[channel][sample] = float(*p++) / (1 << 23);
262 if (channel == channels) {
270 case AV_SAMPLE_FMT_S16:
272 int16_t* p = reinterpret_cast<int16_t *> (_frame->data[0]);
275 for (int i = 0; i < total_samples; ++i) {
276 data[channel][sample] = float(*p++) / (1 << 15);
279 if (channel == channels) {
287 case AV_SAMPLE_FMT_S16P:
289 int16_t** p = reinterpret_cast<int16_t **> (_frame->data);
290 for (int i = 0; i < channels; ++i) {
291 for (int j = 0; j < frames; ++j) {
292 data[i][j] = static_cast<float>(p[i][j]) / (1 << 15);
298 case AV_SAMPLE_FMT_S32:
300 int32_t* p = reinterpret_cast<int32_t *> (_frame->data[0]);
303 for (int i = 0; i < total_samples; ++i) {
304 data[channel][sample] = static_cast<float>(*p++) / 2147483648;
307 if (channel == channels) {
315 case AV_SAMPLE_FMT_S32P:
317 int32_t** p = reinterpret_cast<int32_t **> (_frame->data);
318 for (int i = 0; i < channels; ++i) {
319 for (int j = 0; j < frames; ++j) {
320 data[i][j] = static_cast<float>(p[i][j]) / 2147483648;
326 case AV_SAMPLE_FMT_FLT:
328 float* p = reinterpret_cast<float*> (_frame->data[0]);
331 for (int i = 0; i < total_samples; ++i) {
332 data[channel][sample] = *p++;
335 if (channel == channels) {
343 case AV_SAMPLE_FMT_FLTP:
345 float** p = reinterpret_cast<float**> (_frame->data);
346 DCPOMATIC_ASSERT (_frame->channels <= channels);
347 /* Sometimes there aren't as many channels in the _frame as in the stream */
348 for (int i = 0; i < _frame->channels; ++i) {
349 memcpy (data[i], p[i], frames * sizeof(float));
351 for (int i = _frame->channels; i < channels; ++i) {
352 audio->make_silent (i);
358 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format (stream))));
366 FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
368 DCPOMATIC_DISABLE_WARNINGS
369 return stream->stream (_format_context)->codec->sample_fmt;
370 DCPOMATIC_ENABLE_WARNINGS
375 FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
377 return av_get_bytes_per_sample (audio_sample_format (stream));
382 FFmpegDecoder::seek (ContentTime time, bool accurate)
384 Decoder::seek (time, accurate);
386 /* If we are doing an `accurate' seek, we need to use pre-roll, as
387 we don't really know what the seek will give us.
390 auto pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
393 /* XXX: it seems debatable whether PTS should be used here...
394 http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
397 optional<int> stream;
400 stream = _video_stream;
402 DCPOMATIC_ASSERT (_ffmpeg_content->audio);
403 auto s = dynamic_pointer_cast<FFmpegAudioStream>(_ffmpeg_content->audio->stream());
405 stream = s->index (_format_context);
409 DCPOMATIC_ASSERT (stream);
411 auto u = time - _pts_offset;
412 if (u < ContentTime ()) {
418 u.seconds() / av_q2d (_format_context->streams[stream.get()]->time_base),
423 /* Force re-creation of filter graphs to reset them and hence to make sure
424 they don't have any pre-seek frames knocking about.
426 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
427 _filter_graphs.clear ();
430 if (video_codec_context ()) {
431 avcodec_flush_buffers (video_codec_context());
434 DCPOMATIC_DISABLE_WARNINGS
435 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
436 avcodec_flush_buffers (i->stream(_format_context)->codec);
438 DCPOMATIC_ENABLE_WARNINGS
440 if (subtitle_codec_context ()) {
441 avcodec_flush_buffers (subtitle_codec_context ());
444 _have_current_subtitle = false;
446 for (auto& i: _next_time) {
447 i.second = boost::optional<dcpomatic::ContentTime>();
452 shared_ptr<FFmpegAudioStream>
453 FFmpegDecoder::audio_stream_from_index (int index) const
455 /* XXX: inefficient */
456 auto streams = ffmpeg_content()->ffmpeg_audio_streams();
457 auto stream = streams.begin();
458 while (stream != streams.end() && !(*stream)->uses_index(_format_context, index)) {
462 if (stream == streams.end ()) {
471 FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
473 auto data = deinterleave_audio (stream);
476 if (_frame->pts == AV_NOPTS_VALUE) {
477 /* In some streams we see not every frame coming through with a timestamp; for those
478 that have AV_NOPTS_VALUE we need to work out the timestamp ourselves. This is
479 particularly noticeable with TrueHD streams (see #1111).
481 if (_next_time[stream]) {
482 ct = *_next_time[stream];
485 ct = ContentTime::from_seconds (
486 _frame->best_effort_timestamp *
487 av_q2d (stream->stream(_format_context)->time_base))
491 _next_time[stream] = ct + ContentTime::from_frames(data->frames(), stream->frame_rate());
493 if (ct < ContentTime()) {
494 /* Discard audio data that comes before time 0 */
495 auto const remove = min (int64_t(data->frames()), (-ct).frames_ceil(double(stream->frame_rate())));
496 data->move (data->frames() - remove, remove, 0);
497 data->set_frames (data->frames() - remove);
498 ct += ContentTime::from_frames (remove, stream->frame_rate());
501 if (ct < ContentTime()) {
503 "Crazy timestamp %1 for %2 samples in stream %3 (ts=%4 tb=%5, off=%6)",
507 _frame->best_effort_timestamp,
508 av_q2d(stream->stream(_format_context)->time_base),
509 to_string(_pts_offset)
513 /* Give this data provided there is some, and its time is sane */
514 if (ct >= ContentTime() && data->frames() > 0) {
515 audio->emit (film(), stream, data, ct);
521 FFmpegDecoder::decode_audio_packet (shared_ptr<FFmpegAudioStream> stream, AVPacket* packet)
524 DCPOMATIC_DISABLE_WARNINGS
525 int decode_result = avcodec_decode_audio4 (stream->stream(_format_context)->codec, _frame, &frame_finished, packet);
526 DCPOMATIC_ENABLE_WARNINGS
527 if (decode_result < 0) {
528 /* avcodec_decode_audio4 can sometimes return an error even though it has decoded
529 some valid data; for example dca_subframe_footer can return AVERROR_INVALIDDATA
530 if it overreads the auxiliary data. ffplay carries on if frame_finished is true,
531 even in the face of such an error, so I think we should too.
533 Returning from the method here caused mantis #352.
535 LOG_WARNING ("avcodec_decode_audio4 failed (%1)", decode_result);
537 return make_pair(decode_result, frame_finished);
542 FFmpegDecoder::decode_and_process_audio_packet (AVPacket* packet)
544 auto stream = audio_stream_from_index (packet->stream_index);
549 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
550 several times. Make a simple copy so we can alter data and size.
552 AVPacket copy_packet = *packet;
554 while (copy_packet.size > 0) {
555 auto result = decode_audio_packet (stream, ©_packet);
556 if (result.first < 0) {
557 /* avcodec_decode_audio4 can sometimes return an error even though it has decoded
558 some valid data; for example dca_subframe_footer can return AVERROR_INVALIDDATA
559 if it overreads the auxiliary data. ffplay carries on if frame_finished is true,
560 even in the face of such an error, so I think we should too.
562 Returning from the method here caused mantis #352.
567 process_audio_frame (stream);
574 copy_packet.data += result.first;
575 copy_packet.size -= result.first;
581 FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
583 DCPOMATIC_ASSERT (_video_stream);
586 DCPOMATIC_DISABLE_WARNINGS
587 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, packet) < 0 || !frame_finished) {
590 DCPOMATIC_ENABLE_WARNINGS
592 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
594 shared_ptr<VideoFilterGraph> graph;
596 auto i = _filter_graphs.begin();
597 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
601 if (i == _filter_graphs.end ()) {
602 dcp::Fraction vfr (lrint(_ffmpeg_content->video_frame_rate().get() * 1000), 1000);
603 graph = make_shared<VideoFilterGraph>(dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format, vfr);
604 graph->setup (_ffmpeg_content->filters ());
605 _filter_graphs.push_back (graph);
606 LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format);
611 auto images = graph->process (_frame);
613 for (auto const& i: images) {
615 auto image = i.first;
617 if (i.second != AV_NOPTS_VALUE) {
618 double const pts = i.second * av_q2d(_format_context->streams[_video_stream.get()]->time_base) + _pts_offset.seconds();
622 make_shared<RawImageProxy>(image),
623 llrint(pts * _ffmpeg_content->active_video_frame_rate(film()))
626 LOG_WARNING_NC ("Dropping frame without PTS");
635 FFmpegDecoder::decode_and_process_subtitle_packet (AVPacket* packet)
639 if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, packet) < 0 || !got_subtitle) {
643 /* Stop any current subtitle, either at the time it was supposed to stop, or now if now is sooner */
644 if (_have_current_subtitle) {
645 if (_current_subtitle_to) {
646 only_text()->emit_stop (min(*_current_subtitle_to, subtitle_period(sub).from + _pts_offset));
648 only_text()->emit_stop (subtitle_period(sub).from + _pts_offset);
650 _have_current_subtitle = false;
653 if (sub.num_rects <= 0) {
654 /* Nothing new in this subtitle */
658 /* Subtitle PTS (within the source, not taking into account any of the
659 source that we may have chopped off for the DCP).
661 auto sub_period = subtitle_period (sub);
663 from = sub_period.from + _pts_offset;
665 _current_subtitle_to = *sub_period.to + _pts_offset;
667 _current_subtitle_to = optional<ContentTime>();
668 _have_current_subtitle = true;
671 for (unsigned int i = 0; i < sub.num_rects; ++i) {
672 auto const rect = sub.rects[i];
674 switch (rect->type) {
677 case SUBTITLE_BITMAP:
678 process_bitmap_subtitle (rect, from);
681 cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
684 process_ass_subtitle (rect->ass, from);
689 if (_current_subtitle_to) {
690 only_text()->emit_stop (*_current_subtitle_to);
693 avsubtitle_free (&sub);
698 FFmpegDecoder::process_bitmap_subtitle (AVSubtitleRect const * rect, ContentTime from)
700 /* Note BGRA is expressed little-endian, so the first byte in the word is B, second
701 G, third R, fourth A.
703 auto image = make_shared<Image>(AV_PIX_FMT_BGRA, dcp::Size (rect->w, rect->h), true);
705 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
706 /* Start of the first line in the subtitle */
707 auto sub_p = rect->pict.data[0];
708 /* sub_p looks up into a BGRA palette which is at rect->pict.data[1];
709 (i.e. first byte B, second G, third R, fourth A)
711 auto const palette = rect->pict.data[1];
713 /* Start of the first line in the subtitle */
714 auto sub_p = rect->data[0];
715 /* sub_p looks up into a BGRA palette which is at rect->data[1].
716 (first byte B, second G, third R, fourth A)
718 auto const* palette = rect->data[1];
720 /* And the stream has a map of those palette colours to colours
721 chosen by the user; created a `mapped' palette from those settings.
723 auto colour_map = ffmpeg_content()->subtitle_stream()->colours();
724 vector<RGBA> mapped_palette (rect->nb_colors);
725 for (int i = 0; i < rect->nb_colors; ++i) {
726 RGBA c (palette[2], palette[1], palette[0], palette[3]);
727 auto j = colour_map.find (c);
728 if (j != colour_map.end ()) {
729 mapped_palette[i] = j->second;
731 /* This colour was not found in the FFmpegSubtitleStream's colour map; probably because
732 it is from a project that was created before this stuff was added. Just use the
733 colour straight from the original palette.
735 mapped_palette[i] = c;
740 /* Start of the output data */
741 auto out_p = image->data()[0];
743 for (int y = 0; y < rect->h; ++y) {
744 auto sub_line_p = sub_p;
745 auto out_line_p = out_p;
746 for (int x = 0; x < rect->w; ++x) {
747 auto const p = mapped_palette[*sub_line_p++];
753 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
754 sub_p += rect->pict.linesize[0];
756 sub_p += rect->linesize[0];
758 out_p += image->stride()[0];
761 int target_width = subtitle_codec_context()->width;
762 if (target_width == 0 && video_codec_context()) {
763 /* subtitle_codec_context()->width == 0 has been seen in the wild but I don't
764 know if it's supposed to mean something from FFmpeg's point of view.
766 target_width = video_codec_context()->width;
768 int target_height = subtitle_codec_context()->height;
769 if (target_height == 0 && video_codec_context()) {
770 target_height = video_codec_context()->height;
772 DCPOMATIC_ASSERT (target_width);
773 DCPOMATIC_ASSERT (target_height);
774 dcpomatic::Rect<double> const scaled_rect (
775 static_cast<double>(rect->x) / target_width,
776 static_cast<double>(rect->y) / target_height,
777 static_cast<double>(rect->w) / target_width,
778 static_cast<double>(rect->h) / target_height
781 only_text()->emit_bitmap_start (from, image, scaled_rect);
786 FFmpegDecoder::process_ass_subtitle (string ass, ContentTime from)
788 /* We have no styles and no Format: line, so I'm assuming that FFmpeg
789 produces a single format of Dialogue: lines...
794 for (size_t i = 0; i < ass.length(); ++i) {
795 if (commas < 9 && ass[i] == ',') {
797 } else if (commas == 9) {
806 sub::RawSubtitle base;
807 auto raw = sub::SSAReader::parse_line (
810 _ffmpeg_content->video->size().width,
811 _ffmpeg_content->video->size().height
814 for (auto const& i: sub::collect<vector<sub::Subtitle>>(raw)) {
815 only_text()->emit_plain_start (from, i);