2 Copyright (C) 2012-2018 Carl Hetherington <cth@carlh.net>
4 This file is part of DCP-o-matic.
6 DCP-o-matic is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 DCP-o-matic is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with DCP-o-matic. If not, see <http://www.gnu.org/licenses/>.
22 /** @file src/ffmpeg_decoder.cc
23 * @brief A decoder using FFmpeg to decode content.
27 #include "audio_buffers.h"
28 #include "audio_content.h"
29 #include "audio_decoder.h"
30 #include "compose.hpp"
31 #include "dcpomatic_log.h"
32 #include "exceptions.h"
33 #include "ffmpeg_audio_stream.h"
34 #include "ffmpeg_content.h"
35 #include "ffmpeg_decoder.h"
36 #include "ffmpeg_subtitle_stream.h"
39 #include "frame_interval_checker.h"
42 #include "raw_image_proxy.h"
43 #include "text_content.h"
44 #include "text_decoder.h"
46 #include "video_decoder.h"
47 #include "video_filter_graph.h"
48 #include <dcp/subtitle_string.h>
49 #include <sub/ssa_reader.h>
50 #include <sub/subtitle.h>
51 #include <sub/collect.h>
53 #include <libavcodec/avcodec.h>
54 #include <libavformat/avformat.h>
56 #include <boost/algorithm/string.hpp>
66 using std::dynamic_pointer_cast;
67 using std::make_shared;
69 using std::shared_ptr;
72 using boost::optional;
74 using namespace dcpomatic;
77 FFmpegDecoder::FFmpegDecoder (shared_ptr<const Film> film, shared_ptr<const FFmpegContent> c, bool fast)
81 if (c->video && c->video->use()) {
82 video = make_shared<VideoDecoder>(this, c);
83 _pts_offset = pts_offset (c->ffmpeg_audio_streams(), c->first_video(), c->active_video_frame_rate(film));
84 /* It doesn't matter what size or pixel format this is, it just needs to be black */
85 _black_image = make_shared<Image>(AV_PIX_FMT_RGB24, dcp::Size (128, 128), Image::Alignment::PADDED);
86 _black_image->make_black ();
92 audio = make_shared<AudioDecoder>(this, c->audio, fast);
96 text.push_back (make_shared<TextDecoder>(this, c->only_text()));
97 /* XXX: we should be calling maybe_set_position() on this TextDecoder, but we can't easily find
98 * the time of the first subtitle at this point.
102 for (auto i: c->ffmpeg_audio_streams()) {
103 _next_time[i] = boost::optional<dcpomatic::ContentTime>();
109 FFmpegDecoder::flush ()
111 /* Flush video and audio once */
113 bool did_something = false;
115 if (decode_and_process_video_packet(nullptr)) {
116 did_something = true;
120 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
121 auto context = _codec_context[i->index(_format_context)];
122 int r = avcodec_send_packet (context, nullptr);
123 if (r < 0 && r != AVERROR_EOF) {
124 /* EOF can happen if we've already sent a flush packet */
125 throw DecodeError (N_("avcodec_send_packet"), N_("FFmpegDecoder::flush"), r);
127 r = avcodec_receive_frame (context, audio_frame(i));
129 process_audio_frame (i);
130 did_something = true;
135 /* We want to be called again */
139 /* Make sure all streams are the same length and round up to the next video frame */
141 auto const frc = film()->active_frame_rate_change(_ffmpeg_content->position());
142 ContentTime full_length (_ffmpeg_content->full_length(film()), frc);
143 full_length = full_length.ceil (frc.source);
145 double const vfr = _ffmpeg_content->video_frame_rate().get();
146 auto const f = full_length.frames_round (vfr);
147 auto v = video->position(film()).get_value_or(ContentTime()).frames_round(vfr) + 1;
149 video->emit (film(), make_shared<const RawImageProxy>(_black_image), v);
154 for (auto i: _ffmpeg_content->ffmpeg_audio_streams ()) {
155 auto a = audio->stream_position(film(), i);
156 /* Unfortunately if a is 0 that really means that we don't know the stream position since
157 there has been no data on it since the last seek. In this case we'll just do nothing
158 here. I'm not sure if that's the right idea.
160 if (a > ContentTime()) {
161 while (a < full_length) {
162 auto to_do = min (full_length - a, ContentTime::from_seconds (0.1));
163 auto silence = make_shared<AudioBuffers>(i->channels(), to_do.frames_ceil (i->frame_rate()));
164 silence->make_silent ();
165 audio->emit (film(), i, silence, a, true);
180 FFmpegDecoder::pass ()
182 auto packet = av_packet_alloc();
183 DCPOMATIC_ASSERT (packet);
185 int r = av_read_frame (_format_context, packet);
187 /* AVERROR_INVALIDDATA can apparently be returned sometimes even when av_read_frame
188 has pretty-much succeeded (and hence generated data which should be processed).
189 Hence it makes sense to continue here in that case.
191 if (r < 0 && r != AVERROR_INVALIDDATA) {
192 if (r != AVERROR_EOF) {
193 /* Maybe we should fail here, but for now we'll just finish off instead */
195 av_strerror (r, buf, sizeof(buf));
196 LOG_ERROR (N_("error on av_read_frame (%1) (%2)"), &buf[0], r);
199 av_packet_free (&packet);
203 int const si = packet->stream_index;
204 auto fc = _ffmpeg_content;
206 if (_video_stream && si == _video_stream.get() && video && !video->ignore()) {
207 decode_and_process_video_packet (packet);
208 } else if (fc->subtitle_stream() && fc->subtitle_stream()->uses_index(_format_context, si) && !only_text()->ignore()) {
209 decode_and_process_subtitle_packet (packet);
211 decode_and_process_audio_packet (packet);
214 av_packet_free (&packet);
219 /** @param data pointer to array of pointers to buffers.
220 * Only the first buffer will be used for non-planar data, otherwise there will be one per channel.
222 shared_ptr<AudioBuffers>
223 FFmpegDecoder::deinterleave_audio (AVFrame* frame)
225 auto format = static_cast<AVSampleFormat>(frame->format);
227 /* XXX: can't we use swr_convert() to do the format conversion? */
229 int const channels = frame->channels;
230 int const frames = frame->nb_samples;
231 int const total_samples = frames * channels;
232 auto audio = make_shared<AudioBuffers>(channels, frames);
233 auto data = audio->data();
236 case AV_SAMPLE_FMT_U8:
238 auto p = reinterpret_cast<uint8_t *> (frame->data[0]);
241 for (int i = 0; i < total_samples; ++i) {
242 data[channel][sample] = float(*p++) / (1 << 23);
245 if (channel == channels) {
253 case AV_SAMPLE_FMT_S16:
255 auto p = reinterpret_cast<int16_t *> (frame->data[0]);
258 for (int i = 0; i < total_samples; ++i) {
259 data[channel][sample] = float(*p++) / (1 << 15);
262 if (channel == channels) {
270 case AV_SAMPLE_FMT_S16P:
272 auto p = reinterpret_cast<int16_t **> (frame->data);
273 for (int i = 0; i < channels; ++i) {
274 for (int j = 0; j < frames; ++j) {
275 data[i][j] = static_cast<float>(p[i][j]) / (1 << 15);
281 case AV_SAMPLE_FMT_S32:
283 auto p = reinterpret_cast<int32_t *> (frame->data[0]);
286 for (int i = 0; i < total_samples; ++i) {
287 data[channel][sample] = static_cast<float>(*p++) / 2147483648;
290 if (channel == channels) {
298 case AV_SAMPLE_FMT_S32P:
300 auto p = reinterpret_cast<int32_t **> (frame->data);
301 for (int i = 0; i < channels; ++i) {
302 for (int j = 0; j < frames; ++j) {
303 data[i][j] = static_cast<float>(p[i][j]) / 2147483648;
309 case AV_SAMPLE_FMT_FLT:
311 auto p = reinterpret_cast<float*> (frame->data[0]);
314 for (int i = 0; i < total_samples; ++i) {
315 data[channel][sample] = *p++;
318 if (channel == channels) {
326 case AV_SAMPLE_FMT_FLTP:
328 auto p = reinterpret_cast<float**> (frame->data);
329 DCPOMATIC_ASSERT (frame->channels <= channels);
330 /* Sometimes there aren't as many channels in the frame as in the stream */
331 for (int i = 0; i < frame->channels; ++i) {
332 memcpy (data[i], p[i], frames * sizeof(float));
334 for (int i = frame->channels; i < channels; ++i) {
335 audio->make_silent (i);
341 throw DecodeError (String::compose(_("Unrecognised audio sample format (%1)"), static_cast<int>(format)));
349 FFmpegDecoder::audio_sample_format (shared_ptr<FFmpegAudioStream> stream) const
351 return static_cast<AVSampleFormat>(stream->stream(_format_context)->codecpar->format);
356 FFmpegDecoder::bytes_per_audio_sample (shared_ptr<FFmpegAudioStream> stream) const
358 return av_get_bytes_per_sample (audio_sample_format (stream));
363 FFmpegDecoder::seek (ContentTime time, bool accurate)
365 Decoder::seek (time, accurate);
367 /* If we are doing an `accurate' seek, we need to use pre-roll, as
368 we don't really know what the seek will give us.
371 auto pre_roll = accurate ? ContentTime::from_seconds (2) : ContentTime (0);
374 /* XXX: it seems debatable whether PTS should be used here...
375 http://www.mjbshaw.com/2012/04/seeking-in-ffmpeg-know-your-timestamp.html
378 optional<int> stream;
381 stream = _video_stream;
383 DCPOMATIC_ASSERT (_ffmpeg_content->audio);
384 auto s = dynamic_pointer_cast<FFmpegAudioStream>(_ffmpeg_content->audio->stream());
386 stream = s->index (_format_context);
390 DCPOMATIC_ASSERT (stream);
392 auto u = time - _pts_offset;
393 if (u < ContentTime ()) {
399 u.seconds() / av_q2d (_format_context->streams[stream.get()]->time_base),
404 /* Force re-creation of filter graphs to reset them and hence to make sure
405 they don't have any pre-seek frames knocking about.
407 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
408 _filter_graphs.clear ();
411 if (video_codec_context ()) {
412 avcodec_flush_buffers (video_codec_context());
415 for (auto i: ffmpeg_content()->ffmpeg_audio_streams()) {
416 avcodec_flush_buffers (_codec_context[i->index(_format_context)]);
419 if (subtitle_codec_context ()) {
420 avcodec_flush_buffers (subtitle_codec_context ());
423 _have_current_subtitle = false;
425 for (auto& i: _next_time) {
426 i.second = boost::optional<dcpomatic::ContentTime>();
431 shared_ptr<FFmpegAudioStream>
432 FFmpegDecoder::audio_stream_from_index (int index) const
434 /* XXX: inefficient */
435 auto streams = ffmpeg_content()->ffmpeg_audio_streams();
436 auto stream = streams.begin();
437 while (stream != streams.end() && !(*stream)->uses_index(_format_context, index)) {
441 if (stream == streams.end ()) {
450 FFmpegDecoder::process_audio_frame (shared_ptr<FFmpegAudioStream> stream)
452 auto frame = audio_frame (stream);
453 auto data = deinterleave_audio (frame);
455 auto const time_base = stream->stream(_format_context)->time_base;
458 if (frame->pts == AV_NOPTS_VALUE) {
459 /* In some streams we see not every frame coming through with a timestamp; for those
460 that have AV_NOPTS_VALUE we need to work out the timestamp ourselves. This is
461 particularly noticeable with TrueHD streams (see #1111).
463 if (_next_time[stream]) {
464 ct = *_next_time[stream];
467 ct = ContentTime::from_seconds (
468 frame->best_effort_timestamp *
472 "Process audio with timestamp %1 (BET %2, timebase %3/%4, (PTS offset %5)",
474 frame->best_effort_timestamp,
477 to_string(_pts_offset)
481 _next_time[stream] = ct + ContentTime::from_frames(data->frames(), stream->frame_rate());
483 if (ct < ContentTime()) {
484 /* Discard audio data that comes before time 0 */
485 auto const remove = min (int64_t(data->frames()), (-ct).frames_ceil(double(stream->frame_rate())));
486 data->move (data->frames() - remove, remove, 0);
487 data->set_frames (data->frames() - remove);
488 ct += ContentTime::from_frames (remove, stream->frame_rate());
491 if (ct < ContentTime()) {
493 "Crazy timestamp %1 for %2 samples in stream %3 (ts=%4 tb=%5, off=%6)",
497 frame->best_effort_timestamp,
499 to_string(_pts_offset)
503 /* Give this data provided there is some, and its time is sane */
504 if (ct >= ContentTime() && data->frames() > 0) {
505 audio->emit (film(), stream, data, ct);
511 FFmpegDecoder::decode_and_process_audio_packet (AVPacket* packet)
513 auto stream = audio_stream_from_index (packet->stream_index);
518 auto context = _codec_context[stream->index(_format_context)];
519 auto frame = audio_frame (stream);
521 LOG_DEBUG_PLAYER("Send audio packet on stream %1", stream->index(_format_context));
522 int r = avcodec_send_packet (context, packet);
524 LOG_WARNING("avcodec_send_packet returned %1 for an audio packet", r);
527 r = avcodec_receive_frame (context, frame);
528 if (r == AVERROR(EAGAIN)) {
529 /* More input is required */
530 LOG_DEBUG_PLAYER_NC("EAGAIN after trying to receive auido frame");
534 /* We choose to be relaxed here about other errors; it seems that there may be valid
535 * data to decode even if an error occurred. #352 may be related (though this was
536 * when we were using an old version of the FFmpeg API).
538 process_audio_frame (stream);
544 FFmpegDecoder::decode_and_process_video_packet (AVPacket* packet)
546 DCPOMATIC_ASSERT (_video_stream);
548 auto context = video_codec_context();
550 bool pending = false;
552 int r = avcodec_send_packet (context, packet);
554 LOG_WARNING("avcodec_send_packet returned %1 for a video packet", r);
557 /* EAGAIN means we should call avcodec_receive_frame and then re-send the same packet */
558 pending = r == AVERROR(EAGAIN);
561 r = avcodec_receive_frame (context, _video_frame);
562 if (r == AVERROR(EAGAIN) || r == AVERROR_EOF || (r < 0 && !packet)) {
563 /* More input is required, no more frames are coming, or we are flushing and there was
564 * some error which we just want to ignore.
568 throw DecodeError (N_("avcodec_receive_frame"), N_("FFmpeg::decode_and_process_video_packet"), r);
571 process_video_frame ();
580 FFmpegDecoder::process_video_frame ()
582 boost::mutex::scoped_lock lm (_filter_graphs_mutex);
584 shared_ptr<VideoFilterGraph> graph;
586 auto i = _filter_graphs.begin();
587 while (i != _filter_graphs.end() && !(*i)->can_process(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format)) {
591 if (i == _filter_graphs.end ()) {
592 dcp::Fraction vfr (lrint(_ffmpeg_content->video_frame_rate().get() * 1000), 1000);
593 graph = make_shared<VideoFilterGraph>(dcp::Size(_video_frame->width, _video_frame->height), (AVPixelFormat) _video_frame->format, vfr);
594 graph->setup (_ffmpeg_content->filters ());
595 _filter_graphs.push_back (graph);
596 LOG_GENERAL (N_("New graph for %1x%2, pixel format %3"), _video_frame->width, _video_frame->height, _video_frame->format);
601 auto images = graph->process (_video_frame);
603 for (auto const& i: images) {
605 auto image = i.first;
607 if (i.second != AV_NOPTS_VALUE) {
608 double const pts = i.second * av_q2d(_format_context->streams[_video_stream.get()]->time_base) + _pts_offset.seconds();
612 make_shared<RawImageProxy>(image),
613 llrint(pts * _ffmpeg_content->active_video_frame_rate(film()))
616 LOG_WARNING_NC ("Dropping frame without PTS");
623 FFmpegDecoder::decode_and_process_subtitle_packet (AVPacket* packet)
627 if (avcodec_decode_subtitle2 (subtitle_codec_context(), &sub, &got_subtitle, packet) < 0 || !got_subtitle) {
631 auto sub_period = subtitle_period (packet, ffmpeg_content()->subtitle_stream()->stream(_format_context), sub);
633 /* Stop any current subtitle, either at the time it was supposed to stop, or now if now is sooner */
634 if (_have_current_subtitle) {
635 if (_current_subtitle_to) {
636 only_text()->emit_stop (min(*_current_subtitle_to, sub_period.from + _pts_offset));
638 only_text()->emit_stop (sub_period.from + _pts_offset);
640 _have_current_subtitle = false;
643 if (sub.num_rects <= 0) {
644 /* Nothing new in this subtitle */
645 avsubtitle_free (&sub);
649 /* Subtitle PTS (within the source, not taking into account any of the
650 source that we may have chopped off for the DCP).
653 from = sub_period.from + _pts_offset;
655 _current_subtitle_to = *sub_period.to + _pts_offset;
657 _current_subtitle_to = optional<ContentTime>();
658 _have_current_subtitle = true;
661 ContentBitmapText bitmap_text(from);
662 for (unsigned int i = 0; i < sub.num_rects; ++i) {
663 auto const rect = sub.rects[i];
665 switch (rect->type) {
668 case SUBTITLE_BITMAP:
669 bitmap_text.subs.push_back(process_bitmap_subtitle(rect));
672 cout << "XXX: SUBTITLE_TEXT " << rect->text << "\n";
675 process_ass_subtitle (rect->ass, from);
680 if (!bitmap_text.subs.empty()) {
681 only_text()->emit_bitmap_start(bitmap_text);
684 if (_current_subtitle_to) {
685 only_text()->emit_stop (*_current_subtitle_to);
688 avsubtitle_free (&sub);
693 FFmpegDecoder::process_bitmap_subtitle (AVSubtitleRect const * rect)
695 /* Note BGRA is expressed little-endian, so the first byte in the word is B, second
696 G, third R, fourth A.
698 auto image = make_shared<Image>(AV_PIX_FMT_BGRA, dcp::Size (rect->w, rect->h), Image::Alignment::PADDED);
700 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
701 /* Start of the first line in the subtitle */
702 auto sub_p = rect->pict.data[0];
703 /* sub_p looks up into a BGRA palette which is at rect->pict.data[1];
704 (i.e. first byte B, second G, third R, fourth A)
706 auto const palette = rect->pict.data[1];
708 /* Start of the first line in the subtitle */
709 auto sub_p = rect->data[0];
710 /* sub_p looks up into a BGRA palette which is at rect->data[1].
711 (first byte B, second G, third R, fourth A)
713 auto const* palette = rect->data[1];
715 /* And the stream has a map of those palette colours to colours
716 chosen by the user; created a `mapped' palette from those settings.
718 auto colour_map = ffmpeg_content()->subtitle_stream()->colours();
719 vector<RGBA> mapped_palette (rect->nb_colors);
720 for (int i = 0; i < rect->nb_colors; ++i) {
721 RGBA c (palette[2], palette[1], palette[0], palette[3]);
722 auto j = colour_map.find (c);
723 if (j != colour_map.end ()) {
724 mapped_palette[i] = j->second;
726 /* This colour was not found in the FFmpegSubtitleStream's colour map; probably because
727 it is from a project that was created before this stuff was added. Just use the
728 colour straight from the original palette.
730 mapped_palette[i] = c;
735 /* Start of the output data */
736 auto out_p = image->data()[0];
738 for (int y = 0; y < rect->h; ++y) {
739 auto sub_line_p = sub_p;
740 auto out_line_p = out_p;
741 for (int x = 0; x < rect->w; ++x) {
742 auto const p = mapped_palette[*sub_line_p++];
748 #ifdef DCPOMATIC_HAVE_AVSUBTITLERECT_PICT
749 sub_p += rect->pict.linesize[0];
751 sub_p += rect->linesize[0];
753 out_p += image->stride()[0];
756 int target_width = subtitle_codec_context()->width;
757 if (target_width == 0 && video_codec_context()) {
758 /* subtitle_codec_context()->width == 0 has been seen in the wild but I don't
759 know if it's supposed to mean something from FFmpeg's point of view.
761 target_width = video_codec_context()->width;
763 int target_height = subtitle_codec_context()->height;
764 if (target_height == 0 && video_codec_context()) {
765 target_height = video_codec_context()->height;
767 DCPOMATIC_ASSERT (target_width);
768 DCPOMATIC_ASSERT (target_height);
769 dcpomatic::Rect<double> const scaled_rect (
770 static_cast<double>(rect->x) / target_width,
771 static_cast<double>(rect->y) / target_height,
772 static_cast<double>(rect->w) / target_width,
773 static_cast<double>(rect->h) / target_height
776 return { image, scaled_rect };
781 FFmpegDecoder::process_ass_subtitle (string ass, ContentTime from)
783 /* We have no styles and no Format: line, so I'm assuming that FFmpeg
784 produces a single format of Dialogue: lines...
789 for (size_t i = 0; i < ass.length(); ++i) {
790 if (commas < 9 && ass[i] == ',') {
792 } else if (commas == 9) {
801 sub::RawSubtitle base;
802 auto raw = sub::SSAReader::parse_line (
805 _ffmpeg_content->video->size().width,
806 _ffmpeg_content->video->size().height,
810 for (auto const& i: sub::collect<vector<sub::Subtitle>>(raw)) {
811 only_text()->emit_plain_start (from, i);