2 Copyright (C) 2012-2018 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
22 /** @file src/ffmpeg_decoder.cc
23 * @brief A decoder using FFmpeg to decode content.
27 #include "audio_buffers.h"
28 #include "audio_content.h"
29 #include "audio_decoder.h"
30 #include "compose.hpp"
31 #include "dcpomatic_log.h"
32 #include "exceptions.h"
33 #include "ffmpeg_audio_stream.h"
34 #include "ffmpeg_content.h"
35 #include "ffmpeg_decoder.h"
36 #include "ffmpeg_subtitle_stream.h"
39 #include "frame_interval_checker.h"
42 #include "raw_image_proxy.h"
43 #include "text_content.h"
44 #include "text_decoder.h"
46 #include "video_decoder.h"
47 #include "video_filter_graph.h"
48 #include <dcp/subtitle_string.h>
49 #include <sub/ssa_reader.h>
50 #include <sub/subtitle.h>
51 #include <sub/collect.h>
53 #include <libavcodec/avcodec.h>
54 #include <libavformat/avformat.h>
56 #include <boost/algorithm/string.hpp>
66 using std::dynamic_pointer_cast;
67 using std::make_shared;
69 using std::shared_ptr;
72 using boost::optional;
74 using namespace dcpomatic;
77 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> film, shared_ptr<const FFmpegContent> c, bool fast)
81 if (c->video && c->video->use()) {
82 video = make_shared<VideoDecoder>(this, c);
83 _pts_offset = pts_offset (c->ffmpeg_audio_streams(), c->first_video(), c->active_video_frame_rate(film));
84 /* It doesn't matter what size or pixel format this is, it just needs to be black */
85 _black_image = make_shared<Image>(AV_PIX_FMT_RGB24, dcp::Size (128, 128), Image::Alignment::PADDED);
86 _black_image->make_black ();
92 audio = make_shared<AudioDecoder>(this, c->audio, fast);
96 text.push_back (make_shared<TextDecoder>(this, c->only_text()));
97 /* XXX: we should be calling maybe_set_position() on this TextDecoder, but we can't easily find
98 * the time of the first subtitle at this point.
102 for (auto i: c->ffmpeg_audio_streams()) {
103 _next_time[i] = boost::optional<dcpomatic::ContentTime>();
109 FFmpegDecoder::flush ()
111 /* Flush video and audio once */
113 bool did_something = false;
115 if (decode_and_process_video_packet(nullptr)) {
116 did_something = true;
120 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
121 auto context = _codec_context[i->index(_format_context)];
122 int r = avcodec_send_packet (context, nullptr);
123 if (r < 0 && r != AVERROR_EOF) {
124 /* EOF can happen if we've already sent a flush packet */
125 throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegDecoder::flush"), r);
127 r = avcodec_receive_frame (context, audio_frame(i));
129 process_audio_frame (i);
130 did_something = true;
135 /* We want to be called again */
139 /* Make sure all streams are the same length and round up to the next video frame */
141 auto const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
142 ContentTime full_length (_ffmpeg_content->full_length(film()), frc);
143 full_length = full_length.ceil (frc.source);
145 double const vfr = _ffmpeg_content->video_frame_rate().get();
146 auto const f = full_length.frames_round (vfr);
147 auto v = video->position(film()).get_value_or(ContentTime()).frames_round(vfr) + 1;
149 video->emit (film(), make_shared<const RawImageProxy>(_black_image), v);
154 for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
155 auto a = audio->stream_position(film(), i);
156 /* Unfortunately if a is 0 that really means that we don't know the stream position since
157 there has been no data on it since the last seek. In this case we'll just do nothing
158 here. I'm not sure if that's the right idea.
160 if (a > ContentTime()) {
161 while (a < full_length) {
162 auto to_do = min (full_length - a, ContentTime::from_seconds (0.1));
163 auto silence = make_shared<AudioBuffers>(i->channels(), to_do.frames_ceil (i->frame_rate()));
164 silence->make_silent ();
165 audio->emit (film(), i, silence, a, true);
180 FFmpegDecoder::pass ()
182 auto packet = av_packet_alloc();
183 DCPOMATIC_ASSERT (packet);
185 int r = av_read_frame (_format_context, packet);
187 /* AVERROR_INVALIDDATA can apparently be returned sometimes even when av_read_frame
188 has pretty-much succeeded (and hence generated data which should be processed).
189 Hence it makes sense to continue here in that case.
191 if (r < 0 && r != AVERROR_INVALIDDATA) {
192 if (r != AVERROR_EOF) {
193 /* Maybe we should fail here, but for now we'll just finish off instead */
195 av_strerror (r, buf, sizeof(buf));
196 LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), &buf[0], r);
199 av_packet_free (&packet);
203 int const si = packet->stream_index;
204 auto fc = _ffmpeg_content;
206 if (_video_stream && si == _video_stream.get() && video && !video->ignore()) {
207 decode_and_process_video_packet (packet);
208 } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index(_format_context, si) && !only_text()->ignore()) {
209 decode_and_process_subtitle_packet (packet);
211 decode_and_process_audio_packet (packet);
214 av_packet_free (&packet);
219 /** @param data pointer to array of pointers to buffers.
220 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
223 shared_ptr<AudioBuffers>
224 deinterleave_audio(shared_ptr<FFmpegAudioStream> stream, AVFrame* frame)
226 auto format = static_cast<AVSampleFormat>(frame->format);
228 /* XXX: can't we use swr_convert() to do the format conversion? */
230 int const channels = frame->channels;
231 int const frames = frame->nb_samples;
232 int const total_samples = frames * channels;
233 auto audio = make_shared<AudioBuffers>(channels, frames);
234 auto data = audio->data();
237 case AV_SAMPLE_FMT_U8:
239 auto p = reinterpret_cast<uint8_t *> (frame->data[0]);
242 for (int i = 0; i < total_samples; ++i) {
243 data[channel][sample] = float(*p++) / (1 << 23);
246 if (channel == channels) {
254 case AV_SAMPLE_FMT_S16:
256 auto p = reinterpret_cast<int16_t *> (frame->data[0]);
259 for (int i = 0; i < total_samples; ++i) {
260 data[channel][sample] = float(*p++) / (1 << 15);
263 if (channel == channels) {
271 case AV_SAMPLE_FMT_S16P:
273 auto p = reinterpret_cast<int16_t **> (frame->data);
274 for (int i = 0; i < channels; ++i) {
275 for (int j = 0; j < frames; ++j) {
276 data[i][j] = static_cast<float>(p[i][j]) / (1 << 15);
282 case AV_SAMPLE_FMT_S32:
284 auto p = reinterpret_cast<int32_t *> (frame->data[0]);
287 for (int i = 0; i < total_samples; ++i) {
288 data[channel][sample] = static_cast<float>(*p++) / 2147483648;
291 if (channel == channels) {
299 case AV_SAMPLE_FMT_S32P:
301 auto p = reinterpret_cast<int32_t **> (frame->data);
302 for (int i = 0; i < channels; ++i) {
303 for (int j = 0; j < frames; ++j) {
304 data[i][j] = static_cast<float>(p[i][j]) / 2147483648;
310 case AV_SAMPLE_FMT_FLT:
312 auto p = reinterpret_cast<float*> (frame->data[0]);
315 for (int i = 0; i < total_samples; ++i) {
316 data[channel][sample] = *p++;
319 if (channel == channels) {
327 case AV_SAMPLE_FMT_FLTP:
329 auto p = reinterpret_cast<float**> (frame->data);
330 DCPOMATIC_ASSERT(channels <= stream->channels());
331 /* Sometimes there aren't as many channels in the frame as in the stream */
332 for (int i = 0; i < channels; ++i) {
333 memcpy (data[i], p[i], frames * sizeof(float));
335 for (int i = channels; i < stream->channels(); ++i) {
336 audio->make_silent (i);
342 throw DecodeError (String::compose(_("Unrecognised audio sample format (%1)"), static_cast<int>(format)));
350 FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
352 return static_cast<AVSampleFormat>(stream->stream(_format_context)->codecpar->format);
357 FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
359 return av_get_bytes_per_sample (audio_sample_format (stream));
364 FFmpegDecoder::seek (ContentTime time, bool accurate)
366 Decoder::seek (time, accurate);
368 /* If we are doing an `accurate' seek, we need to use pre-roll, as
369 we don't really know what the seek will give us.
372 auto pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
375 /* XXX: it seems debatable whether PTS should be used here...
376 http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
379 optional<int> stream;
382 stream = _video_stream;
384 DCPOMATIC_ASSERT (_ffmpeg_content->audio);
385 auto s = dynamic_pointer_cast<FFmpegAudioStream>(_ffmpeg_content->audio->stream());
387 stream = s->index (_format_context);
391 DCPOMATIC_ASSERT (stream);
393 auto u = time - _pts_offset;
394 if (u < ContentTime ()) {
400 u.seconds() / av_q2d (_format_context->streams[stream.get()]->time_base),
405 /* Force re-creation of filter graphs to reset them and hence to make sure
406 they don't have any pre-seek frames knocking about.
408 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
409 _filter_graphs.clear ();
412 if (video_codec_context ()) {
413 avcodec_flush_buffers (video_codec_context());
416 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
417 avcodec_flush_buffers (_codec_context[i->index(_format_context)]);
420 if (subtitle_codec_context ()) {
421 avcodec_flush_buffers (subtitle_codec_context ());
424 _have_current_subtitle = false;
426 for (auto& i: _next_time) {
427 i.second = boost::optional<dcpomatic::ContentTime>();
432 shared_ptr<FFmpegAudioStream>
433 FFmpegDecoder::audio_stream_from_index (int index) const
435 /* XXX: inefficient */
436 auto streams = ffmpeg_content()->ffmpeg_audio_streams();
437 auto stream = streams.begin();
438 while (stream != streams.end() && !(*stream)->uses_index(_format_context, index)) {
442 if (stream == streams.end ()) {
451 FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
453 auto frame = audio_frame (stream);
454 auto data = deinterleave_audio(stream, frame);
456 auto const time_base = stream->stream(_format_context)->time_base;
459 if (frame->pts == AV_NOPTS_VALUE) {
460 /* In some streams we see not every frame coming through with a timestamp; for those
461 that have AV_NOPTS_VALUE we need to work out the timestamp ourselves. This is
462 particularly noticeable with TrueHD streams (see #1111).
464 if (_next_time[stream]) {
465 ct = *_next_time[stream];
468 ct = ContentTime::from_seconds (
469 frame->best_effort_timestamp *
473 "Process audio with timestamp %1 (BET %2, timebase %3/%4, (PTS offset %5)",
475 frame->best_effort_timestamp,
478 to_string(_pts_offset)
482 _next_time[stream] = ct + ContentTime::from_frames(data->frames(), stream->frame_rate());
484 if (ct < ContentTime()) {
485 /* Discard audio data that comes before time 0 */
486 auto const remove = min (int64_t(data->frames()), (-ct).frames_ceil(double(stream->frame_rate())));
487 data->move (data->frames() - remove, remove, 0);
488 data->set_frames (data->frames() - remove);
489 ct += ContentTime::from_frames (remove, stream->frame_rate());
492 if (ct < ContentTime()) {
494 "Crazy timestamp %1 for %2 samples in stream %3 (ts=%4 tb=%5, off=%6)",
498 frame->best_effort_timestamp,
500 to_string(_pts_offset)
504 /* Give this data provided there is some, and its time is sane */
505 if (ct >= ContentTime() && data->frames() > 0) {
506 audio->emit (film(), stream, data, ct);
512 FFmpegDecoder::decode_and_process_audio_packet (AVPacket* packet)
514 auto stream = audio_stream_from_index (packet->stream_index);
519 auto context = _codec_context[stream->index(_format_context)];
520 auto frame = audio_frame (stream);
522 LOG_DEBUG_PLAYER("Send audio packet on stream %1", stream->index(_format_context));
523 int r = avcodec_send_packet (context, packet);
525 LOG_WARNING("avcodec_send_packet returned %1 for an audio packet", r);
528 r = avcodec_receive_frame (context, frame);
529 if (r == AVERROR(EAGAIN)) {
530 /* More input is required */
531 LOG_DEBUG_PLAYER_NC("EAGAIN after trying to receive audio frame");
535 /* We choose to be relaxed here about other errors; it seems that there may be valid
536 * data to decode even if an error occurred. #352 may be related (though this was
537 * when we were using an old version of the FFmpeg API).
539 process_audio_frame (stream);
545 FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
547 DCPOMATIC_ASSERT (_video_stream);
549 auto context = video_codec_context();
551 bool pending = false;
553 int r = avcodec_send_packet (context, packet);
555 LOG_WARNING("avcodec_send_packet returned %1 for a video packet", r);
558 /* EAGAIN means we should call avcodec_receive_frame and then re-send the same packet */
559 pending = r == AVERROR(EAGAIN);
562 r = avcodec_receive_frame (context, _video_frame);
563 if (r == AVERROR(EAGAIN) || r == AVERROR_EOF || (r < 0 && !packet)) {
564 /* More input is required, no more frames are coming, or we are flushing and there was
565 * some error which we just want to ignore.
569 throw DecodeError (N_("avcodec_receive_frame"), N_("FFmpeg::decode_and_process_video_packet"), r);
572 process_video_frame ();
581 FFmpegDecoder::process_video_frame ()
583 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
585 shared_ptr<VideoFilterGraph> graph;
587 auto i = _filter_graphs.begin();
588 while (i != _filter_graphs.end() && !(*i)->can_process(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format)) {
592 if (i == _filter_graphs.end ()) {
593 dcp::Fraction vfr (lrint(_ffmpeg_content->video_frame_rate().get() * 1000), 1000);
594 graph = make_shared<VideoFilterGraph>(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format, vfr);
595 graph->setup (_ffmpeg_content->filters ());
596 _filter_graphs.push_back (graph);
597 LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _video_frame->width, _video_frame->height, _video_frame->format);
602 auto images = graph->process (_video_frame);
604 for (auto const& i: images) {
606 auto image = i.first;
608 if (i.second != AV_NOPTS_VALUE) {
609 double const pts = i.second * av_q2d(_format_context->streams[_video_stream.get()]->time_base) + _pts_offset.seconds();
613 make_shared<RawImageProxy>(image),
614 llrint(pts * _ffmpeg_content->active_video_frame_rate(film()))
617 LOG_WARNING_NC ("Dropping frame without PTS");
624 FFmpegDecoder::decode_and_process_subtitle_packet (AVPacket* packet)
628 if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, packet) < 0 || !got_subtitle) {
632 auto sub_period = subtitle_period (packet, ffmpeg_content()->subtitle_stream()->stream(_format_context), sub);
634 /* Stop any current subtitle, either at the time it was supposed to stop, or now if now is sooner */
635 if (_have_current_subtitle) {
636 if (_current_subtitle_to) {
637 only_text()->emit_stop (min(*_current_subtitle_to, sub_period.from + _pts_offset));
639 only_text()->emit_stop (sub_period.from + _pts_offset);
641 _have_current_subtitle = false;
644 if (sub.num_rects <= 0) {
645 /* Nothing new in this subtitle */
646 avsubtitle_free (&sub);
650 /* Subtitle PTS (within the source, not taking into account any of the
651 source that we may have chopped off for the DCP).
654 from = sub_period.from + _pts_offset;
656 _current_subtitle_to = *sub_period.to + _pts_offset;
658 _current_subtitle_to = optional<ContentTime>();
659 _have_current_subtitle = true;
662 ContentBitmapText bitmap_text(from);
663 for (unsigned int i = 0; i < sub.num_rects; ++i) {
664 auto const rect = sub.rects[i];
666 switch (rect->type) {
669 case SUBTITLE_BITMAP:
670 bitmap_text.subs.push_back(process_bitmap_subtitle(rect));
673 cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
676 process_ass_subtitle (rect->ass, from);
681 if (!bitmap_text.subs.empty()) {
682 only_text()->emit_bitmap_start(bitmap_text);
685 if (_current_subtitle_to) {
686 only_text()->emit_stop (*_current_subtitle_to);
689 avsubtitle_free (&sub);
694 FFmpegDecoder::process_bitmap_subtitle (AVSubtitleRect const * rect)
696 /* Note BGRA is expressed little-endian, so the first byte in the word is B, second
697 G, third R, fourth A.
699 auto image = make_shared<Image>(AV_PIX_FMT_BGRA, dcp::Size (rect->w, rect->h), Image::Alignment::PADDED);
701 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
702 /* Start of the first line in the subtitle */
703 auto sub_p = rect->pict.data[0];
704 /* sub_p looks up into a BGRA palette which is at rect->pict.data[1];
705 (i.e. first byte B, second G, third R, fourth A)
707 auto const palette = rect->pict.data[1];
709 /* Start of the first line in the subtitle */
710 auto sub_p = rect->data[0];
711 /* sub_p looks up into a BGRA palette which is at rect->data[1].
712 (first byte B, second G, third R, fourth A)
714 auto const* palette = rect->data[1];
716 /* And the stream has a map of those palette colours to colours
717 chosen by the user; created a `mapped' palette from those settings.
719 auto colour_map = ffmpeg_content()->subtitle_stream()->colours();
720 vector<RGBA> mapped_palette (rect->nb_colors);
721 for (int i = 0; i < rect->nb_colors; ++i) {
722 RGBA c (palette[2], palette[1], palette[0], palette[3]);
723 auto j = colour_map.find (c);
724 if (j != colour_map.end ()) {
725 mapped_palette[i] = j->second;
727 /* This colour was not found in the FFmpegSubtitleStream's colour map; probably because
728 it is from a project that was created before this stuff was added. Just use the
729 colour straight from the original palette.
731 mapped_palette[i] = c;
736 /* Start of the output data */
737 auto out_p = image->data()[0];
739 for (int y = 0; y < rect->h; ++y) {
740 auto sub_line_p = sub_p;
741 auto out_line_p = out_p;
742 for (int x = 0; x < rect->w; ++x) {
743 auto const p = mapped_palette[*sub_line_p++];
749 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
750 sub_p += rect->pict.linesize[0];
752 sub_p += rect->linesize[0];
754 out_p += image->stride()[0];
757 int target_width = subtitle_codec_context()->width;
758 if (target_width == 0 && video_codec_context()) {
759 /* subtitle_codec_context()->width == 0 has been seen in the wild but I don't
760 know if it's supposed to mean something from FFmpeg's point of view.
762 target_width = video_codec_context()->width;
764 int target_height = subtitle_codec_context()->height;
765 if (target_height == 0 && video_codec_context()) {
766 target_height = video_codec_context()->height;
768 DCPOMATIC_ASSERT (target_width);
769 DCPOMATIC_ASSERT (target_height);
770 dcpomatic::Rect<double> const scaled_rect (
771 static_cast<double>(rect->x) / target_width,
772 static_cast<double>(rect->y) / target_height,
773 static_cast<double>(rect->w) / target_width,
774 static_cast<double>(rect->h) / target_height
777 return { image, scaled_rect };
782 FFmpegDecoder::process_ass_subtitle (string ass, ContentTime from)
784 /* We have no styles and no Format: line, so I'm assuming that FFmpeg
785 produces a single format of Dialogue: lines...
790 for (size_t i = 0; i < ass.length(); ++i) {
791 if (commas < 9 && ass[i] == ',') {
793 } else if (commas == 9) {
802 sub::RawSubtitle base;
803 auto raw = sub::SSAReader::parse_line (
806 _ffmpeg_content->video->size().width,
807 _ffmpeg_content->video->size().height,
811 for (auto const& i: sub::collect<vector<sub::Subtitle>>(raw)) {
812 only_text()->emit_plain_start (from, i);