2 Copyright (C) 2012-2018 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
21 /** @file src/ffmpeg_decoder.cc
22 * @brief A decoder using FFmpeg to decode content.
26 #include "exceptions.h"
30 #include "dcpomatic_log.h"
31 #include "ffmpeg_decoder.h"
32 #include "text_decoder.h"
33 #include "ffmpeg_audio_stream.h"
34 #include "ffmpeg_subtitle_stream.h"
35 #include "video_filter_graph.h"
36 #include "audio_buffers.h"
37 #include "ffmpeg_content.h"
38 #include "raw_image_proxy.h"
39 #include "video_decoder.h"
41 #include "audio_decoder.h"
42 #include "compose.hpp"
43 #include "text_content.h"
44 #include "audio_content.h"
45 #include "frame_interval_checker.h"
46 #include <dcp/subtitle_string.h>
47 #include <sub/ssa_reader.h>
48 #include <sub/subtitle.h>
49 #include <sub/collect.h>
51 #include <libavcodec/avcodec.h>
52 #include <libavformat/avformat.h>
54 #include <boost/algorithm/string.hpp>
70 using std::shared_ptr;
71 using std::make_shared;
73 using boost::is_any_of;
75 using boost::optional;
76 using std::dynamic_pointer_cast;
78 using namespace dcpomatic;
81 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> film, shared_ptr<const FFmpegContent> c, bool fast)
84 , _have_current_subtitle (false)
86 if (c->video && c->video->use()) {
87 video = make_shared<VideoDecoder>(this, c);
88 _pts_offset = pts_offset (c->ffmpeg_audio_streams(), c->first_video(), c->active_video_frame_rate(film));
89 /* It doesn't matter what size or pixel format this is, it just needs to be black */
90 _black_image.reset (new Image (AV_PIX_FMT_RGB24, dcp::Size (128, 128), true));
91 _black_image->make_black ();
97 audio = make_shared<AudioDecoder>(this, c->audio, fast);
100 if (c->only_text()) {
101 /* XXX: this time here should be the time of the first subtitle, not 0 */
102 text.push_back (make_shared<TextDecoder>(this, c->only_text(), ContentTime()));
105 for (auto i: c->ffmpeg_audio_streams()) {
106 _next_time[i] = boost::optional<dcpomatic::ContentTime>();
112 FFmpegDecoder::flush ()
114 /* Flush video and audio once */
116 bool did_something = false;
119 av_init_packet (&packet);
120 packet.data = nullptr;
122 if (decode_and_process_video_packet(&packet)) {
123 did_something = true;
127 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
129 av_init_packet (&packet);
130 packet.data = nullptr;
132 auto result = decode_audio_packet (i, &packet);
134 process_audio_frame (i);
135 did_something = true;
140 /* We want to be called again */
144 /* Make sure all streams are the same length and round up to the next video frame */
146 auto const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
147 ContentTime full_length (_ffmpeg_content->full_length(film()), frc);
148 full_length = full_length.ceil (frc.source);
150 double const vfr = _ffmpeg_content->video_frame_rate().get();
151 auto const f = full_length.frames_round (vfr);
152 auto v = video->position(film()).get_value_or(ContentTime()).frames_round(vfr) + 1;
154 video->emit (film(), shared_ptr<const ImageProxy> (new RawImageProxy (_black_image)), v);
159 for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
160 auto a = audio->stream_position(film(), i);
161 /* Unfortunately if a is 0 that really means that we don't know the stream position since
162 there has been no data on it since the last seek. In this case we'll just do nothing
163 here. I'm not sure if that's the right idea.
165 if (a > ContentTime()) {
166 while (a < full_length) {
167 auto to_do = min (full_length - a, ContentTime::from_seconds (0.1));
168 auto silence = make_shared<AudioBuffers>(i->channels(), to_do.frames_ceil (i->frame_rate()));
169 silence->make_silent ();
170 audio->emit (film(), i, silence, a, true);
185 FFmpegDecoder::pass ()
187 auto packet = av_packet_alloc();
188 DCPOMATIC_ASSERT (packet);
190 int r = av_read_frame (_format_context, packet);
192 /* AVERROR_INVALIDDATA can apparently be returned sometimes even when av_read_frame
193 has pretty-much succeeded (and hence generated data which should be processed).
194 Hence it makes sense to continue here in that case.
196 if (r < 0 && r != AVERROR_INVALIDDATA) {
197 if (r != AVERROR_EOF) {
198 /* Maybe we should fail here, but for now we'll just finish off instead */
200 av_strerror (r, buf, sizeof(buf));
201 LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), &buf[0], r);
204 av_packet_free (&packet);
208 int const si = packet->stream_index;
209 auto fc = _ffmpeg_content;
211 if (_video_stream && si == _video_stream.get() && video && !video->ignore()) {
212 decode_and_process_video_packet (packet);
213 } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index(_format_context, si) && !only_text()->ignore()) {
214 decode_and_process_subtitle_packet (packet);
216 decode_and_process_audio_packet (packet);
219 av_packet_free (&packet);
224 /** @param data pointer to array of pointers to buffers.
225 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
227 shared_ptr<AudioBuffers>
228 FFmpegDecoder::deinterleave_audio (shared_ptr<FFmpegAudioStream> stream) const
230 DCPOMATIC_ASSERT (bytes_per_audio_sample (stream));
232 DCPOMATIC_DISABLE_WARNINGS
233 int const size = av_samples_get_buffer_size (
234 0, stream->stream(_format_context)->codec->channels, _frame->nb_samples, audio_sample_format (stream), 1
236 DCPOMATIC_ENABLE_WARNINGS
237 DCPOMATIC_ASSERT (size >= 0);
239 /* XXX: can't we just use _frame->nb_samples directly here? */
240 /* XXX: can't we use swr_convert() to do the format conversion? */
242 /* Deinterleave and convert to float */
244 /* total_samples and frames will be rounded down here, so if there are stray samples at the end
245 of the block that do not form a complete sample or frame they will be dropped.
247 int const total_samples = size / bytes_per_audio_sample (stream);
248 int const channels = stream->channels();
249 int const frames = total_samples / channels;
250 auto audio = make_shared<AudioBuffers>(channels, frames);
251 auto data = audio->data();
253 switch (audio_sample_format (stream)) {
254 case AV_SAMPLE_FMT_U8:
256 uint8_t* p = reinterpret_cast<uint8_t *> (_frame->data[0]);
259 for (int i = 0; i < total_samples; ++i) {
260 data[channel][sample] = float(*p++) / (1 << 23);
263 if (channel == channels) {
271 case AV_SAMPLE_FMT_S16:
273 int16_t* p = reinterpret_cast<int16_t *> (_frame->data[0]);
276 for (int i = 0; i < total_samples; ++i) {
277 data[channel][sample] = float(*p++) / (1 << 15);
280 if (channel == channels) {
288 case AV_SAMPLE_FMT_S16P:
290 int16_t** p = reinterpret_cast<int16_t **> (_frame->data);
291 for (int i = 0; i < channels; ++i) {
292 for (int j = 0; j < frames; ++j) {
293 data[i][j] = static_cast<float>(p[i][j]) / (1 << 15);
299 case AV_SAMPLE_FMT_S32:
301 int32_t* p = reinterpret_cast<int32_t *> (_frame->data[0]);
304 for (int i = 0; i < total_samples; ++i) {
305 data[channel][sample] = static_cast<float>(*p++) / 2147483648;
308 if (channel == channels) {
316 case AV_SAMPLE_FMT_S32P:
318 int32_t** p = reinterpret_cast<int32_t **> (_frame->data);
319 for (int i = 0; i < channels; ++i) {
320 for (int j = 0; j < frames; ++j) {
321 data[i][j] = static_cast<float>(p[i][j]) / 2147483648;
327 case AV_SAMPLE_FMT_FLT:
329 float* p = reinterpret_cast<float*> (_frame->data[0]);
332 for (int i = 0; i < total_samples; ++i) {
333 data[channel][sample] = *p++;
336 if (channel == channels) {
344 case AV_SAMPLE_FMT_FLTP:
346 float** p = reinterpret_cast<float**> (_frame->data);
347 DCPOMATIC_ASSERT (_frame->channels <= channels);
348 /* Sometimes there aren't as many channels in the _frame as in the stream */
349 for (int i = 0; i < _frame->channels; ++i) {
350 memcpy (data[i], p[i], frames * sizeof(float));
352 for (int i = _frame->channels; i < channels; ++i) {
353 audio->make_silent (i);
359 throw DecodeError (String::compose (_("Unrecognised audio sample format (%1)"), static_cast<int> (audio_sample_format (stream))));
367 FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
369 DCPOMATIC_DISABLE_WARNINGS
370 return stream->stream (_format_context)->codec->sample_fmt;
371 DCPOMATIC_ENABLE_WARNINGS
376 FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
378 return av_get_bytes_per_sample (audio_sample_format (stream));
383 FFmpegDecoder::seek (ContentTime time, bool accurate)
385 Decoder::seek (time, accurate);
387 /* If we are doing an `accurate' seek, we need to use pre-roll, as
388 we don't really know what the seek will give us.
391 auto pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
394 /* XXX: it seems debatable whether PTS should be used here...
395 http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
398 optional<int> stream;
401 stream = _video_stream;
403 DCPOMATIC_ASSERT (_ffmpeg_content->audio);
404 auto s = dynamic_pointer_cast<FFmpegAudioStream>(_ffmpeg_content->audio->stream());
406 stream = s->index (_format_context);
410 DCPOMATIC_ASSERT (stream);
412 auto u = time - _pts_offset;
413 if (u < ContentTime ()) {
419 u.seconds() / av_q2d (_format_context->streams[stream.get()]->time_base),
424 /* Force re-creation of filter graphs to reset them and hence to make sure
425 they don't have any pre-seek frames knocking about.
427 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
428 _filter_graphs.clear ();
431 if (video_codec_context ()) {
432 avcodec_flush_buffers (video_codec_context());
435 DCPOMATIC_DISABLE_WARNINGS
436 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
437 avcodec_flush_buffers (i->stream(_format_context)->codec);
439 DCPOMATIC_ENABLE_WARNINGS
441 if (subtitle_codec_context ()) {
442 avcodec_flush_buffers (subtitle_codec_context ());
445 _have_current_subtitle = false;
447 for (auto& i: _next_time) {
448 i.second = boost::optional<dcpomatic::ContentTime>();
453 shared_ptr<FFmpegAudioStream>
454 FFmpegDecoder::audio_stream_from_index (int index) const
456 /* XXX: inefficient */
457 auto streams = ffmpeg_content()->ffmpeg_audio_streams();
458 auto stream = streams.begin();
459 while (stream != streams.end() && !(*stream)->uses_index(_format_context, index)) {
463 if (stream == streams.end ()) {
472 FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
474 auto data = deinterleave_audio (stream);
477 if (_frame->pts == AV_NOPTS_VALUE) {
478 /* In some streams we see not every frame coming through with a timestamp; for those
479 that have AV_NOPTS_VALUE we need to work out the timestamp ourselves. This is
480 particularly noticeable with TrueHD streams (see #1111).
482 if (_next_time[stream]) {
483 ct = *_next_time[stream];
486 ct = ContentTime::from_seconds (
487 _frame->best_effort_timestamp *
488 av_q2d (stream->stream(_format_context)->time_base))
492 _next_time[stream] = ct + ContentTime::from_frames(data->frames(), stream->frame_rate());
494 if (ct < ContentTime()) {
495 /* Discard audio data that comes before time 0 */
496 auto const remove = min (int64_t(data->frames()), (-ct).frames_ceil(double(stream->frame_rate())));
497 data->move (data->frames() - remove, remove, 0);
498 data->set_frames (data->frames() - remove);
499 ct += ContentTime::from_frames (remove, stream->frame_rate());
502 if (ct < ContentTime()) {
504 "Crazy timestamp %1 for %2 samples in stream %3 (ts=%4 tb=%5, off=%6)",
508 _frame->best_effort_timestamp,
509 av_q2d(stream->stream(_format_context)->time_base),
510 to_string(_pts_offset)
514 /* Give this data provided there is some, and its time is sane */
515 if (ct >= ContentTime() && data->frames() > 0) {
516 audio->emit (film(), stream, data, ct);
522 FFmpegDecoder::decode_audio_packet (shared_ptr<FFmpegAudioStream> stream, AVPacket* packet)
525 DCPOMATIC_DISABLE_WARNINGS
526 int decode_result = avcodec_decode_audio4 (stream->stream(_format_context)->codec, _frame, &frame_finished, packet);
527 DCPOMATIC_ENABLE_WARNINGS
528 if (decode_result < 0) {
529 /* avcodec_decode_audio4 can sometimes return an error even though it has decoded
530 some valid data; for example dca_subframe_footer can return AVERROR_INVALIDDATA
531 if it overreads the auxiliary data. ffplay carries on if frame_finished is true,
532 even in the face of such an error, so I think we should too.
534 Returning from the method here caused mantis #352.
536 LOG_WARNING ("avcodec_decode_audio4 failed (%1)", decode_result);
538 return make_pair(decode_result, frame_finished);
543 FFmpegDecoder::decode_and_process_audio_packet (AVPacket* packet)
545 auto stream = audio_stream_from_index (packet->stream_index);
550 /* Audio packets can contain multiple frames, so we may have to call avcodec_decode_audio4
551 several times. Make a simple copy so we can alter data and size.
553 AVPacket copy_packet = *packet;
555 while (copy_packet.size > 0) {
556 auto result = decode_audio_packet (stream, ©_packet);
557 if (result.first < 0) {
558 /* avcodec_decode_audio4 can sometimes return an error even though it has decoded
559 some valid data; for example dca_subframe_footer can return AVERROR_INVALIDDATA
560 if it overreads the auxiliary data. ffplay carries on if frame_finished is true,
561 even in the face of such an error, so I think we should too.
563 Returning from the method here caused mantis #352.
568 process_audio_frame (stream);
575 copy_packet.data += result.first;
576 copy_packet.size -= result.first;
582 FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
584 DCPOMATIC_ASSERT (_video_stream);
587 DCPOMATIC_DISABLE_WARNINGS
588 if (avcodec_decode_video2 (video_codec_context(), _frame, &frame_finished, packet) < 0 || !frame_finished) {
591 DCPOMATIC_ENABLE_WARNINGS
593 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
595 shared_ptr<VideoFilterGraph> graph;
597 auto i = _filter_graphs.begin();
598 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
602 if (i == _filter_graphs.end ()) {
603 dcp::Fraction vfr (lrint(_ffmpeg_content->video_frame_rate().get() * 1000), 1000);
604 graph = make_shared<VideoFilterGraph>(dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format, vfr);
605 graph->setup (_ffmpeg_content->filters ());
606 _filter_graphs.push_back (graph);
607 LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format);
612 auto images = graph->process (_frame);
614 for (auto const& i: images) {
616 auto image = i.first;
618 if (i.second != AV_NOPTS_VALUE) {
619 double const pts = i.second * av_q2d(_format_context->streams[_video_stream.get()]->time_base) + _pts_offset.seconds();
623 make_shared<RawImageProxy>(image),
624 llrint(pts * _ffmpeg_content->active_video_frame_rate(film()))
627 LOG_WARNING_NC ("Dropping frame without PTS");
636 FFmpegDecoder::decode_and_process_subtitle_packet (AVPacket* packet)
640 if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, packet) < 0 || !got_subtitle) {
644 /* Stop any current subtitle, either at the time it was supposed to stop, or now if now is sooner */
645 if (_have_current_subtitle) {
646 if (_current_subtitle_to) {
647 only_text()->emit_stop (min(*_current_subtitle_to, subtitle_period(sub).from + _pts_offset));
649 only_text()->emit_stop (subtitle_period(sub).from + _pts_offset);
651 _have_current_subtitle = false;
654 if (sub.num_rects <= 0) {
655 /* Nothing new in this subtitle */
659 /* Subtitle PTS (within the source, not taking into account any of the
660 source that we may have chopped off for the DCP).
662 auto sub_period = subtitle_period (sub);
664 from = sub_period.from + _pts_offset;
666 _current_subtitle_to = *sub_period.to + _pts_offset;
668 _current_subtitle_to = optional<ContentTime>();
669 _have_current_subtitle = true;
672 for (unsigned int i = 0; i < sub.num_rects; ++i) {
673 auto const rect = sub.rects[i];
675 switch (rect->type) {
678 case SUBTITLE_BITMAP:
679 process_bitmap_subtitle (rect, from);
682 cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
685 process_ass_subtitle (rect->ass, from);
690 if (_current_subtitle_to) {
691 only_text()->emit_stop (*_current_subtitle_to);
694 avsubtitle_free (&sub);
699 FFmpegDecoder::process_bitmap_subtitle (AVSubtitleRect const * rect, ContentTime from)
701 /* Note BGRA is expressed little-endian, so the first byte in the word is B, second
702 G, third R, fourth A.
704 auto image = make_shared<Image>(AV_PIX_FMT_BGRA, dcp::Size (rect->w, rect->h), true);
706 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
707 /* Start of the first line in the subtitle */
708 auto sub_p = rect->pict.data[0];
709 /* sub_p looks up into a BGRA palette which is at rect->pict.data[1];
710 (i.e. first byte B, second G, third R, fourth A)
712 auto const palette = rect->pict.data[1];
714 /* Start of the first line in the subtitle */
715 auto sub_p = rect->data[0];
716 /* sub_p looks up into a BGRA palette which is at rect->data[1].
717 (first byte B, second G, third R, fourth A)
719 auto const* palette = rect->data[1];
721 /* And the stream has a map of those palette colours to colours
722 chosen by the user; created a `mapped' palette from those settings.
724 auto colour_map = ffmpeg_content()->subtitle_stream()->colours();
725 vector<RGBA> mapped_palette (rect->nb_colors);
726 for (int i = 0; i < rect->nb_colors; ++i) {
727 RGBA c (palette[2], palette[1], palette[0], palette[3]);
728 auto j = colour_map.find (c);
729 if (j != colour_map.end ()) {
730 mapped_palette[i] = j->second;
732 /* This colour was not found in the FFmpegSubtitleStream's colour map; probably because
733 it is from a project that was created before this stuff was added. Just use the
734 colour straight from the original palette.
736 mapped_palette[i] = c;
741 /* Start of the output data */
742 auto out_p = image->data()[0];
744 for (int y = 0; y < rect->h; ++y) {
745 auto sub_line_p = sub_p;
746 auto out_line_p = out_p;
747 for (int x = 0; x < rect->w; ++x) {
748 auto const p = mapped_palette[*sub_line_p++];
754 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
755 sub_p += rect->pict.linesize[0];
757 sub_p += rect->linesize[0];
759 out_p += image->stride()[0];
762 int target_width = subtitle_codec_context()->width;
763 if (target_width == 0 && video_codec_context()) {
764 /* subtitle_codec_context()->width == 0 has been seen in the wild but I don't
765 know if it's supposed to mean something from FFmpeg's point of view.
767 target_width = video_codec_context()->width;
769 int target_height = subtitle_codec_context()->height;
770 if (target_height == 0 && video_codec_context()) {
771 target_height = video_codec_context()->height;
773 DCPOMATIC_ASSERT (target_width);
774 DCPOMATIC_ASSERT (target_height);
775 dcpomatic::Rect<double> const scaled_rect (
776 static_cast<double>(rect->x) / target_width,
777 static_cast<double>(rect->y) / target_height,
778 static_cast<double>(rect->w) / target_width,
779 static_cast<double>(rect->h) / target_height
782 only_text()->emit_bitmap_start (from, image, scaled_rect);
787 FFmpegDecoder::process_ass_subtitle (string ass, ContentTime from)
789 /* We have no styles and no Format: line, so I'm assuming that FFmpeg
790 produces a single format of Dialogue: lines...
795 for (size_t i = 0; i < ass.length(); ++i) {
796 if (commas < 9 && ass[i] == ',') {
798 } else if (commas == 9) {
807 sub::RawSubtitle base;
808 auto raw = sub::SSAReader::parse_line (
811 _ffmpeg_content->video->size().width,
812 _ffmpeg_content->video->size().height
815 for (auto const& i: sub::collect<vector<sub::Subtitle>>(raw)) {
816 only_text()->emit_plain_start (from, i);