2 Copyright (C) 2012-2018 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
22 /** @file src/ffmpeg_decoder.cc
23 * @brief A decoder using FFmpeg to decode content.
27 #include "audio_buffers.h"
28 #include "audio_content.h"
29 #include "audio_decoder.h"
30 #include "compose.hpp"
31 #include "dcpomatic_log.h"
32 #include "exceptions.h"
33 #include "ffmpeg_audio_stream.h"
34 #include "ffmpeg_content.h"
35 #include "ffmpeg_decoder.h"
36 #include "ffmpeg_subtitle_stream.h"
39 #include "frame_interval_checker.h"
42 #include "raw_image_proxy.h"
43 #include "text_content.h"
44 #include "text_decoder.h"
46 #include "video_decoder.h"
47 #include "video_filter_graph.h"
48 #include <dcp/subtitle_string.h>
49 #include <sub/ssa_reader.h>
50 #include <sub/subtitle.h>
51 #include <sub/collect.h>
53 #include <libavcodec/avcodec.h>
54 #include <libavformat/avformat.h>
56 #include <boost/algorithm/string.hpp>
66 using std::dynamic_pointer_cast;
67 using std::make_shared;
69 using std::shared_ptr;
72 using boost::optional;
74 using namespace dcpomatic;
77 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> film, shared_ptr<const FFmpegContent> c, bool fast)
81 if (c->video && c->video->use()) {
82 video = make_shared<VideoDecoder>(this, c);
83 _pts_offset = pts_offset (c->ffmpeg_audio_streams(), c->first_video(), c->active_video_frame_rate(film));
84 /* It doesn't matter what size or pixel format this is, it just needs to be black */
85 _black_image = make_shared<Image>(AV_PIX_FMT_RGB24, dcp::Size (128, 128), Image::Alignment::PADDED);
86 _black_image->make_black ();
92 audio = make_shared<AudioDecoder>(this, c->audio, fast);
96 /* XXX: this time here should be the time of the first subtitle, not 0 */
97 text.push_back (make_shared<TextDecoder>(this, c->only_text(), ContentTime()));
100 for (auto i: c->ffmpeg_audio_streams()) {
101 _next_time[i] = boost::optional<dcpomatic::ContentTime>();
107 FFmpegDecoder::flush ()
109 /* Flush video and audio once */
111 bool did_something = false;
113 if (decode_and_process_video_packet(nullptr)) {
114 did_something = true;
118 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
119 auto context = _codec_context[i->index(_format_context)];
120 int r = avcodec_send_packet (context, nullptr);
121 if (r < 0 && r != AVERROR_EOF) {
122 /* EOF can happen if we've already sent a flush packet */
123 throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegDecoder::flush"), r);
125 r = avcodec_receive_frame (context, _frame);
127 process_audio_frame (i);
128 did_something = true;
133 /* We want to be called again */
137 /* Make sure all streams are the same length and round up to the next video frame */
139 auto const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
140 ContentTime full_length (_ffmpeg_content->full_length(film()), frc);
141 full_length = full_length.ceil (frc.source);
143 double const vfr = _ffmpeg_content->video_frame_rate().get();
144 auto const f = full_length.frames_round (vfr);
145 auto v = video->position(film()).get_value_or(ContentTime()).frames_round(vfr) + 1;
147 video->emit (film(), shared_ptr<const ImageProxy> (new RawImageProxy (_black_image)), v);
152 for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
153 auto a = audio->stream_position(film(), i);
154 /* Unfortunately if a is 0 that really means that we don't know the stream position since
155 there has been no data on it since the last seek. In this case we'll just do nothing
156 here. I'm not sure if that's the right idea.
158 if (a > ContentTime()) {
159 while (a < full_length) {
160 auto to_do = min (full_length - a, ContentTime::from_seconds (0.1));
161 auto silence = make_shared<AudioBuffers>(i->channels(), to_do.frames_ceil (i->frame_rate()));
162 silence->make_silent ();
163 audio->emit (film(), i, silence, a, true);
178 FFmpegDecoder::pass ()
180 auto packet = av_packet_alloc();
181 DCPOMATIC_ASSERT (packet);
183 int r = av_read_frame (_format_context, packet);
185 /* AVERROR_INVALIDDATA can apparently be returned sometimes even when av_read_frame
186 has pretty-much succeeded (and hence generated data which should be processed).
187 Hence it makes sense to continue here in that case.
189 if (r < 0 && r != AVERROR_INVALIDDATA) {
190 if (r != AVERROR_EOF) {
191 /* Maybe we should fail here, but for now we'll just finish off instead */
193 av_strerror (r, buf, sizeof(buf));
194 LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), &buf[0], r);
197 av_packet_free (&packet);
201 int const si = packet->stream_index;
202 auto fc = _ffmpeg_content;
204 if (_video_stream && si == _video_stream.get() && video && !video->ignore()) {
205 decode_and_process_video_packet (packet);
206 } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index(_format_context, si) && !only_text()->ignore()) {
207 decode_and_process_subtitle_packet (packet);
209 decode_and_process_audio_packet (packet);
212 av_packet_free (&packet);
217 /** @param data pointer to array of pointers to buffers.
218 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
220 shared_ptr<AudioBuffers>
221 FFmpegDecoder::deinterleave_audio (AVFrame* frame)
223 auto format = static_cast<AVSampleFormat>(frame->format);
225 /* XXX: can't we use swr_convert() to do the format conversion? */
227 int const channels = frame->channels;
228 int const frames = frame->nb_samples;
229 int const total_samples = frames * channels;
230 auto audio = make_shared<AudioBuffers>(channels, frames);
231 auto data = audio->data();
234 case AV_SAMPLE_FMT_U8:
236 auto p = reinterpret_cast<uint8_t *> (frame->data[0]);
239 for (int i = 0; i < total_samples; ++i) {
240 data[channel][sample] = float(*p++) / (1 << 23);
243 if (channel == channels) {
251 case AV_SAMPLE_FMT_S16:
253 auto p = reinterpret_cast<int16_t *> (frame->data[0]);
256 for (int i = 0; i < total_samples; ++i) {
257 data[channel][sample] = float(*p++) / (1 << 15);
260 if (channel == channels) {
268 case AV_SAMPLE_FMT_S16P:
270 auto p = reinterpret_cast<int16_t **> (frame->data);
271 for (int i = 0; i < channels; ++i) {
272 for (int j = 0; j < frames; ++j) {
273 data[i][j] = static_cast<float>(p[i][j]) / (1 << 15);
279 case AV_SAMPLE_FMT_S32:
281 auto p = reinterpret_cast<int32_t *> (frame->data[0]);
284 for (int i = 0; i < total_samples; ++i) {
285 data[channel][sample] = static_cast<float>(*p++) / 2147483648;
288 if (channel == channels) {
296 case AV_SAMPLE_FMT_S32P:
298 auto p = reinterpret_cast<int32_t **> (frame->data);
299 for (int i = 0; i < channels; ++i) {
300 for (int j = 0; j < frames; ++j) {
301 data[i][j] = static_cast<float>(p[i][j]) / 2147483648;
307 case AV_SAMPLE_FMT_FLT:
309 auto p = reinterpret_cast<float*> (frame->data[0]);
312 for (int i = 0; i < total_samples; ++i) {
313 data[channel][sample] = *p++;
316 if (channel == channels) {
324 case AV_SAMPLE_FMT_FLTP:
326 auto p = reinterpret_cast<float**> (frame->data);
327 DCPOMATIC_ASSERT (frame->channels <= channels);
328 /* Sometimes there aren't as many channels in the frame as in the stream */
329 for (int i = 0; i < frame->channels; ++i) {
330 memcpy (data[i], p[i], frames * sizeof(float));
332 for (int i = frame->channels; i < channels; ++i) {
333 audio->make_silent (i);
339 throw DecodeError (String::compose(_("Unrecognised audio sample format (%1)"), static_cast<int>(format)));
347 FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
349 return static_cast<AVSampleFormat>(stream->stream(_format_context)->codecpar->format);
354 FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
356 return av_get_bytes_per_sample (audio_sample_format (stream));
361 FFmpegDecoder::seek (ContentTime time, bool accurate)
363 Decoder::seek (time, accurate);
365 /* If we are doing an `accurate' seek, we need to use pre-roll, as
366 we don't really know what the seek will give us.
369 auto pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
372 /* XXX: it seems debatable whether PTS should be used here...
373 http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
376 optional<int> stream;
379 stream = _video_stream;
381 DCPOMATIC_ASSERT (_ffmpeg_content->audio);
382 auto s = dynamic_pointer_cast<FFmpegAudioStream>(_ffmpeg_content->audio->stream());
384 stream = s->index (_format_context);
388 DCPOMATIC_ASSERT (stream);
390 auto u = time - _pts_offset;
391 if (u < ContentTime ()) {
397 u.seconds() / av_q2d (_format_context->streams[stream.get()]->time_base),
402 /* Force re-creation of filter graphs to reset them and hence to make sure
403 they don't have any pre-seek frames knocking about.
405 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
406 _filter_graphs.clear ();
409 if (video_codec_context ()) {
410 avcodec_flush_buffers (video_codec_context());
413 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
414 avcodec_flush_buffers (_codec_context[i->index(_format_context)]);
417 if (subtitle_codec_context ()) {
418 avcodec_flush_buffers (subtitle_codec_context ());
421 _have_current_subtitle = false;
423 for (auto& i: _next_time) {
424 i.second = boost::optional<dcpomatic::ContentTime>();
427 /* We find that we get some errors from av_send_packet after a seek. Perhaps we should ignore
428 * all of them (which seems risky), or perhaps we should have some proper fix. But instead
429 * let's ignore the next 2 errors.
431 _errors_to_ignore = 2;
435 shared_ptr<FFmpegAudioStream>
436 FFmpegDecoder::audio_stream_from_index (int index) const
438 /* XXX: inefficient */
439 auto streams = ffmpeg_content()->ffmpeg_audio_streams();
440 auto stream = streams.begin();
441 while (stream != streams.end() && !(*stream)->uses_index(_format_context, index)) {
445 if (stream == streams.end ()) {
454 FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
456 auto data = deinterleave_audio (_frame);
459 if (_frame->pts == AV_NOPTS_VALUE) {
460 /* In some streams we see not every frame coming through with a timestamp; for those
461 that have AV_NOPTS_VALUE we need to work out the timestamp ourselves. This is
462 particularly noticeable with TrueHD streams (see #1111).
464 if (_next_time[stream]) {
465 ct = *_next_time[stream];
468 ct = ContentTime::from_seconds (
469 _frame->best_effort_timestamp *
470 av_q2d (stream->stream(_format_context)->time_base))
474 _next_time[stream] = ct + ContentTime::from_frames(data->frames(), stream->frame_rate());
476 if (ct < ContentTime()) {
477 /* Discard audio data that comes before time 0 */
478 auto const remove = min (int64_t(data->frames()), (-ct).frames_ceil(double(stream->frame_rate())));
479 data->move (data->frames() - remove, remove, 0);
480 data->set_frames (data->frames() - remove);
481 ct += ContentTime::from_frames (remove, stream->frame_rate());
484 if (ct < ContentTime()) {
486 "Crazy timestamp %1 for %2 samples in stream %3 (ts=%4 tb=%5, off=%6)",
490 _frame->best_effort_timestamp,
491 av_q2d(stream->stream(_format_context)->time_base),
492 to_string(_pts_offset)
496 /* Give this data provided there is some, and its time is sane */
497 if (ct >= ContentTime() && data->frames() > 0) {
498 audio->emit (film(), stream, data, ct);
504 FFmpegDecoder::decode_and_process_audio_packet (AVPacket* packet)
506 auto stream = audio_stream_from_index (packet->stream_index);
511 auto context = _codec_context[stream->index(_format_context)];
513 int r = avcodec_send_packet (context, packet);
515 /* We could cope with AVERROR(EAGAIN) and re-send the packet but I think it should never happen.
516 * Likewise I think AVERROR_EOF should not happen.
518 if (_errors_to_ignore > 0) {
519 /* We see errors here after a seek, which is hopefully to be nothing to worry about */
521 LOG_GENERAL("Ignoring error %1 avcodec_send_packet after seek; will ignore %2 more", r, _errors_to_ignore);
524 throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegDecoder::decode_and_process_audio_packet"), r);
528 r = avcodec_receive_frame (context, _frame);
529 if (r == AVERROR(EAGAIN)) {
530 /* More input is required */
534 /* We choose to be relaxed here about other errors; it seems that there may be valid
535 * data to decode even if an error occurred. #352 may be related (though this was
536 * when we were using an old version of the FFmpeg API).
538 process_audio_frame (stream);
544 FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
546 DCPOMATIC_ASSERT (_video_stream);
548 auto context = video_codec_context();
550 int r = avcodec_send_packet (context, packet);
551 if (r < 0 && !(r == AVERROR_EOF && !packet)) {
552 /* We could cope with AVERROR(EAGAIN) and re-send the packet but I think it should never happen.
553 * AVERROR_EOF can happen during flush if we've already sent a flush packet.
555 throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegDecoder::decode_and_process_video_packet"), r);
558 r = avcodec_receive_frame (context, _frame);
559 if (r == AVERROR(EAGAIN) || r == AVERROR_EOF || (r < 0 && !packet)) {
560 /* More input is required, no more frames are coming, or we are flushing and there was
561 * some error which we just want to ignore.
565 throw DecodeError (N_("avcodec_receive_frame"), N_("FFmpeg::decode_and_process_video_packet"), r);
568 /* We assume we'll only get one frame here, which I think is safe */
570 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
572 shared_ptr<VideoFilterGraph> graph;
574 auto i = _filter_graphs.begin();
575 while (i != _filter_graphs.end() && !(*i)->can_process (dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format)) {
579 if (i == _filter_graphs.end ()) {
580 dcp::Fraction vfr (lrint(_ffmpeg_content->video_frame_rate().get() * 1000), 1000);
581 graph = make_shared<VideoFilterGraph>(dcp::Size (_frame->width, _frame->height), (AVPixelFormat) _frame->format, vfr);
582 graph->setup (_ffmpeg_content->filters ());
583 _filter_graphs.push_back (graph);
584 LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _frame->width, _frame->height, _frame->format);
589 auto images = graph->process (_frame);
591 for (auto const& i: images) {
593 auto image = i.first;
595 if (i.second != AV_NOPTS_VALUE) {
596 double const pts = i.second * av_q2d(_format_context->streams[_video_stream.get()]->time_base) + _pts_offset.seconds();
600 make_shared<RawImageProxy>(image),
601 llrint(pts * _ffmpeg_content->active_video_frame_rate(film()))
604 LOG_WARNING_NC ("Dropping frame without PTS");
613 FFmpegDecoder::decode_and_process_subtitle_packet (AVPacket* packet)
617 if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, packet) < 0 || !got_subtitle) {
621 auto sub_period = subtitle_period (packet, ffmpeg_content()->subtitle_stream()->stream(_format_context), sub);
623 /* Stop any current subtitle, either at the time it was supposed to stop, or now if now is sooner */
624 if (_have_current_subtitle) {
625 if (_current_subtitle_to) {
626 only_text()->emit_stop (min(*_current_subtitle_to, sub_period.from + _pts_offset));
628 only_text()->emit_stop (sub_period.from + _pts_offset);
630 _have_current_subtitle = false;
633 if (sub.num_rects <= 0) {
634 /* Nothing new in this subtitle */
635 avsubtitle_free (&sub);
639 /* Subtitle PTS (within the source, not taking into account any of the
640 source that we may have chopped off for the DCP).
643 from = sub_period.from + _pts_offset;
645 _current_subtitle_to = *sub_period.to + _pts_offset;
647 _current_subtitle_to = optional<ContentTime>();
648 _have_current_subtitle = true;
651 for (unsigned int i = 0; i < sub.num_rects; ++i) {
652 auto const rect = sub.rects[i];
654 switch (rect->type) {
657 case SUBTITLE_BITMAP:
658 process_bitmap_subtitle (rect, from);
661 cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
664 process_ass_subtitle (rect->ass, from);
669 if (_current_subtitle_to) {
670 only_text()->emit_stop (*_current_subtitle_to);
673 avsubtitle_free (&sub);
678 FFmpegDecoder::process_bitmap_subtitle (AVSubtitleRect const * rect, ContentTime from)
680 /* Note BGRA is expressed little-endian, so the first byte in the word is B, second
681 G, third R, fourth A.
683 auto image = make_shared<Image>(AV_PIX_FMT_BGRA, dcp::Size (rect->w, rect->h), Image::Alignment::PADDED);
685 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
686 /* Start of the first line in the subtitle */
687 auto sub_p = rect->pict.data[0];
688 /* sub_p looks up into a BGRA palette which is at rect->pict.data[1];
689 (i.e. first byte B, second G, third R, fourth A)
691 auto const palette = rect->pict.data[1];
693 /* Start of the first line in the subtitle */
694 auto sub_p = rect->data[0];
695 /* sub_p looks up into a BGRA palette which is at rect->data[1].
696 (first byte B, second G, third R, fourth A)
698 auto const* palette = rect->data[1];
700 /* And the stream has a map of those palette colours to colours
701 chosen by the user; created a `mapped' palette from those settings.
703 auto colour_map = ffmpeg_content()->subtitle_stream()->colours();
704 vector<RGBA> mapped_palette (rect->nb_colors);
705 for (int i = 0; i < rect->nb_colors; ++i) {
706 RGBA c (palette[2], palette[1], palette[0], palette[3]);
707 auto j = colour_map.find (c);
708 if (j != colour_map.end ()) {
709 mapped_palette[i] = j->second;
711 /* This colour was not found in the FFmpegSubtitleStream's colour map; probably because
712 it is from a project that was created before this stuff was added. Just use the
713 colour straight from the original palette.
715 mapped_palette[i] = c;
720 /* Start of the output data */
721 auto out_p = image->data()[0];
723 for (int y = 0; y < rect->h; ++y) {
724 auto sub_line_p = sub_p;
725 auto out_line_p = out_p;
726 for (int x = 0; x < rect->w; ++x) {
727 auto const p = mapped_palette[*sub_line_p++];
733 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
734 sub_p += rect->pict.linesize[0];
736 sub_p += rect->linesize[0];
738 out_p += image->stride()[0];
741 int target_width = subtitle_codec_context()->width;
742 if (target_width == 0 && video_codec_context()) {
743 /* subtitle_codec_context()->width == 0 has been seen in the wild but I don't
744 know if it's supposed to mean something from FFmpeg's point of view.
746 target_width = video_codec_context()->width;
748 int target_height = subtitle_codec_context()->height;
749 if (target_height == 0 && video_codec_context()) {
750 target_height = video_codec_context()->height;
752 DCPOMATIC_ASSERT (target_width);
753 DCPOMATIC_ASSERT (target_height);
754 dcpomatic::Rect<double> const scaled_rect (
755 static_cast<double>(rect->x) / target_width,
756 static_cast<double>(rect->y) / target_height,
757 static_cast<double>(rect->w) / target_width,
758 static_cast<double>(rect->h) / target_height
761 only_text()->emit_bitmap_start (from, image, scaled_rect);
766 FFmpegDecoder::process_ass_subtitle (string ass, ContentTime from)
768 /* We have no styles and no Format: line, so I'm assuming that FFmpeg
769 produces a single format of Dialogue: lines...
774 for (size_t i = 0; i < ass.length(); ++i) {
775 if (commas < 9 && ass[i] == ',') {
777 } else if (commas == 9) {
786 sub::RawSubtitle base;
787 auto raw = sub::SSAReader::parse_line (
790 _ffmpeg_content->video->size().width,
791 _ffmpeg_content->video->size().height
794 for (auto const& i: sub::collect<vector<sub::Subtitle>>(raw)) {
795 only_text()->emit_plain_start (from, i);