X-Git-Url: https://git.carlh.net/gitweb/?p=dcpomatic.git;a=blobdiff_plain;f=src%2Flib%2Fvideo_decoder.cc;h=cf21f885a3fbc6af45ba2efde1cfc18ad645eb9b;hp=0e9ee0c1a874d1e9ffe1f7e0531b15369f431d83;hb=c103d8c1306e5fb3937b3a6c430a3fff32653fa3;hpb=936ff6927ad24daf0ed20776b19b6faa3df9bf83

diff --git a/src/lib/video_decoder.cc b/src/lib/video_decoder.cc
index 0e9ee0c1a..cf21f885a 100644
--- a/src/lib/video_decoder.cc
+++ b/src/lib/video_decoder.cc
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2012-2016 Carl Hetherington <cth@carlh.net>
+    Copyright (C) 2012-2021 Carl Hetherington <cth@carlh.net>
 
     This file is part of DCP-o-matic.
 
@@ -18,377 +18,174 @@
 
 */
 
-#include "video_decoder.h"
-#include "image.h"
-#include "raw_image_proxy.h"
+
+#include "compose.hpp"
 #include "film.h"
+#include "frame_interval_checker.h"
+#include "image.h"
+#include "j2k_image_proxy.h"
 #include "log.h"
-#include "compose.hpp"
-#include <boost/foreach.hpp>
+#include "raw_image_proxy.h"
+#include "video_decoder.h"
 #include <iostream>
 
 #include "i18n.h"
 
+
 using std::cout;
-using std::list;
-using std::max;
-using std::back_inserter;
-using boost::shared_ptr;
-using boost::optional;
+using std::dynamic_pointer_cast;
+using std::shared_ptr;
+using namespace dcpomatic;
 
-VideoDecoder::VideoDecoder (Decoder* parent, shared_ptr<const Content> c, shared_ptr<Log> log)
-	: DecoderPart (parent, log)
-#ifdef DCPOMATIC_DEBUG
-	, test_gaps (0)
-#endif
-	, _content (c)
-	, _last_seek_accurate (true)
-{
-	_black_image.reset (new Image (AV_PIX_FMT_RGB24, _content->video->size(), true));
-	_black_image->make_black ();
-}
 
-list<ContentVideo>
-VideoDecoder::decoded (Frame frame)
+VideoDecoder::VideoDecoder (Decoder* parent, shared_ptr<const Content> c)
+	: DecoderPart (parent)
+	, _content (c)
+	, _frame_interval_checker (new FrameIntervalChecker())
 {
-	list<ContentVideo> output;
 
-	BOOST_FOREACH (ContentVideo const & i, _decoded) {
-		if (i.frame.index() == frame) {
-			output.push_back (i);
-		}
-	}
-
-	return output;
 }
 
-/** Get all frames which exist in the content at a given frame index.
- *  @param frame Frame index.
- *  @param accurate true to try hard to return frames at the precise time that was requested, otherwise frames nearby may be returned.
- *  @return Frames; there may be none (if there is no video there), 1 for 2D or 2 for 3D.
+
+/** Called by decoder classes when they have a video frame ready.
+ *  @param frame Frame index within the content; this does not take into account 3D
+ *  so for 3D_ALTERNATE this value goes:
+ *     0: frame 0 left
+ *     1: frame 0 right
+ *     2: frame 1 left
+ *     3: frame 1 right
+ *  and so on.
  */
-list<ContentVideo>
-VideoDecoder::get (Frame frame, bool accurate)
+void
+VideoDecoder::emit (shared_ptr<const Film> film, shared_ptr<const ImageProxy> image, Frame decoder_frame)
 {
-	if (_no_data_frame && frame >= _no_data_frame.get()) {
-		return list<ContentVideo> ();
+	if (ignore ()) {
+		return;
 	}
 
-	_log->log (String::compose ("VD has request for %1", frame), LogEntry::TYPE_DEBUG_DECODE);
+	auto const afr = _content->active_video_frame_rate(film);
+	auto const vft = _content->video->frame_type();
 
-	/* See if we have frame, and suggest a seek if not */
+	auto frame_time = ContentTime::from_frames (decoder_frame, afr);
 
-	list<ContentVideo>::const_iterator i = _decoded.begin ();
-	while (i != _decoded.end() && i->frame.index() != frame) {
-		_log->log (String::compose ("VD has stored %1 which is no good", i->frame.index()), LogEntry::TYPE_DEBUG_DECODE);
-		++i;
-	}
-
-	if (i == _decoded.end()) {
-		Frame seek_frame = frame;
-		if (_content->video->frame_type() == VIDEO_FRAME_TYPE_3D_ALTERNATE) {
-			/* 3D alternate is a special case as the frame index in the content is not the same
-			   as the frame index we are talking about here.
-			*/
-			seek_frame *= 2;
+	/* Do some heuristics to try and spot the case where the user sets content to 3D
+	 * when it is not.  We try to tell this by looking at the differences in time between
+	 * the first few frames.  Real 3D content should have two frames for each timestamp.
+	 */
+	if (_frame_interval_checker) {
+		_frame_interval_checker->feed (frame_time, afr);
+		if (_frame_interval_checker->guess() == FrameIntervalChecker::PROBABLY_NOT_3D && vft == VideoFrameType::THREE_D) {
+			boost::throw_exception (
+				DecodeError(
+					String::compose(
+						_("The content file %1 is set as 3D but does not appear to contain 3D images.  Please set it to 2D.  "
+						  "You can still make a 3D DCP from this content by ticking the 3D option in the DCP video tab."),
+						_content->path(0)
+						)
+					)
+				);
 		}
-		_log->log (String::compose ("VD suggests seek to %1", seek_frame), LogEntry::TYPE_DEBUG_DECODE);
-		maybe_seek (ContentTime::from_frames (seek_frame, _content->active_video_frame_rate()), accurate);
-	}
 
-	/* Work out the number of frames that we should return; we
-	   must return all frames in our content at the requested `time'
-	   (i.e. frame)
-	*/
-	unsigned int frames_wanted = 0;
-	switch (_content->video->frame_type()) {
-	case VIDEO_FRAME_TYPE_2D:
-	case VIDEO_FRAME_TYPE_3D_LEFT:
-	case VIDEO_FRAME_TYPE_3D_RIGHT:
-		frames_wanted = 1;
-		break;
-	case VIDEO_FRAME_TYPE_3D:
-	case VIDEO_FRAME_TYPE_3D_ALTERNATE:
-	case VIDEO_FRAME_TYPE_3D_LEFT_RIGHT:
-	case VIDEO_FRAME_TYPE_3D_TOP_BOTTOM:
-		frames_wanted = 2;
-		break;
-	default:
-		DCPOMATIC_ASSERT (false);
+		if (_frame_interval_checker->guess() != FrameIntervalChecker::AGAIN) {
+			_frame_interval_checker.reset ();
+		}
 	}
 
-	list<ContentVideo> dec;
-
-	/* Now enough pass() calls should either:
-	 *  (a) give us what we want, or
-	 *  (b) give us something after what we want, indicating that we will never get what we want, or
-	 *  (c) hit the end of the decoder.
-	 */
-	if (accurate) {
-		/* We are being accurate, so we want the right frame.
-		 * This could all be one statement but it's split up for clarity.
-		 */
-		bool no_data = false;
-
-		while (true) {
-			if (decoded(frame).size() == frames_wanted) {
-				/* We got what we want */
-				break;
-			}
-
-			if (_parent->pass (Decoder::PASS_REASON_VIDEO, accurate)) {
-				/* The decoder has nothing more for us */
-				no_data = true;
-				break;
-			}
+	Frame frame;
+	Eyes eyes = Eyes::BOTH;
+	if (!_position) {
+		/* This is the first data we have received since initialisation or seek.  Set
+		   the position based on the frame that was given.  After this first time
+		   we just count frames, since (as with audio) it seems that ContentTimes
+		   are unreliable from FFmpegDecoder.  They are much better than audio times
+		   but still we get the occasional one which is duplicated.  In this case
+		   ffmpeg seems to carry on regardless, processing the video frame as normal.
+		   If we drop the frame with the duplicated timestamp we obviously lose sync.
+		*/
 
-			if (!_decoded.empty() && _decoded.front().frame.index() > frame) {
-				/* We're never going to get the frame we want.  Perhaps the caller is asking
-				 * for a video frame before the content's video starts (if its audio
-				 * begins before its video, for example).
+		if (vft == VideoFrameType::THREE_D_ALTERNATE) {
+			frame = decoder_frame / 2;
+			eyes = (decoder_frame % 2) ? Eyes::RIGHT : Eyes::LEFT;
+		} else {
+			frame = decoder_frame;
+			if (vft == VideoFrameType::THREE_D) {
+				auto j2k = dynamic_pointer_cast<const J2KImageProxy>(image);
+				/* At the moment only DCP decoders producers VideoFrameType::THREE_D, so only the J2KImageProxy
+				 * knows which eye it is.
 				 */
-				break;
+				if (j2k && j2k->eye()) {
+					eyes = j2k->eye().get() == dcp::Eye::LEFT ? Eyes::LEFT : Eyes::RIGHT;
+				}
 			}
 		}
 
-		dec = decoded (frame);
-
-		if (no_data && dec.empty()) {
-			_no_data_frame = frame;
-		}
-
+		_position = ContentTime::from_frames (frame, afr);
 	} else {
-		/* Any frame(s) will do: use the first one(s) that comes out of pass() */
-		while (_decoded.size() < frames_wanted && !_parent->pass (Decoder::PASS_REASON_VIDEO, accurate)) {}
-		list<ContentVideo>::const_iterator i = _decoded.begin();
-		unsigned int j = 0;
-		while (i != _decoded.end() && j < frames_wanted) {
-			dec.push_back (*i);
-			++i;
-			++j;
-		}
-	}
-
-	/* Clean up _decoded; keep the frame we are returning, if any (which may have two images
-	   for 3D), but nothing before that
-	*/
-	while (!_decoded.empty() && !dec.empty() && _decoded.front().frame.index() < dec.front().frame.index()) {
-		_log->log (String::compose ("VD discards %1", _decoded.front().frame.index()), LogEntry::TYPE_DEBUG_DECODE);
-		_decoded.pop_front ();
-	}
-
-	return dec;
-}
-
-/** Fill _decoded from `from' up to, but not including, `to' with
- *  a frame for one particular Eyes value (which could be EYES_BOTH,
- *  EYES_LEFT or EYES_RIGHT)
- */
-void
-VideoDecoder::fill_one_eye (Frame from, Frame to, Eyes eye)
-{
-	if (to == 0) {
-		/* Already OK */
-		return;
-	}
-
-	/* Fill with black... */
-	shared_ptr<const ImageProxy> filler_image (new RawImageProxy (_black_image));
-	Part filler_part = PART_WHOLE;
-
-	/* ...unless there's some video we can fill with */
-	if (!_decoded.empty ()) {
-		filler_image = _decoded.back().image;
-		filler_part = _decoded.back().part;
-	}
-
-	for (Frame i = from; i < to; ++i) {
-#ifdef DCPOMATIC_DEBUG
-		test_gaps++;
-#endif
-		_decoded.push_back (
-			ContentVideo (filler_image, VideoFrame (i, eye), filler_part)
-			);
-	}
-}
-
-/** Fill _decoded from `from' up to, but not including, `to'
- *  adding both left and right eye frames.
- */
-void
-VideoDecoder::fill_both_eyes (VideoFrame from, VideoFrame to)
-{
-	/* Fill with black... */
-	shared_ptr<const ImageProxy> filler_left_image (new RawImageProxy (_black_image));
-	shared_ptr<const ImageProxy> filler_right_image (new RawImageProxy (_black_image));
-	Part filler_left_part = PART_WHOLE;
-	Part filler_right_part = PART_WHOLE;
-
-	/* ...unless there's some video we can fill with */
-	for (list<ContentVideo>::const_reverse_iterator i = _decoded.rbegin(); i != _decoded.rend(); ++i) {
-		if (i->frame.eyes() == EYES_LEFT && !filler_left_image) {
-			filler_left_image = i->image;
-			filler_left_part = i->part;
-		} else if (i->frame.eyes() == EYES_RIGHT && !filler_right_image) {
-			filler_right_image = i->image;
-			filler_right_part = i->part;
-		}
-
-		if (filler_left_image && filler_right_image) {
-			break;
+		if (vft == VideoFrameType::THREE_D) {
+			auto j2k = dynamic_pointer_cast<const J2KImageProxy>(image);
+			if (j2k && j2k->eye()) {
+				if (j2k->eye() == dcp::Eye::LEFT) {
+					frame = _position->frames_round(afr) + 1;
+					eyes = Eyes::LEFT;
+				} else {
+					frame = _position->frames_round(afr);
+					eyes = Eyes::RIGHT;
+				}
+			} else {
+				/* This should not happen; see above */
+				frame = _position->frames_round(afr) + 1;
+			}
+		} else if (vft == VideoFrameType::THREE_D_ALTERNATE) {
+			DCPOMATIC_ASSERT (_last_emitted_eyes);
+			if (_last_emitted_eyes.get() == Eyes::RIGHT) {
+				frame = _position->frames_round(afr) + 1;
+				eyes = Eyes::LEFT;
+			} else {
+				frame = _position->frames_round(afr);
+				eyes = Eyes::RIGHT;
+			}
+		} else {
+			frame = _position->frames_round(afr) + 1;
 		}
 	}
 
-	while (from != to) {
-
-#ifdef DCPOMATIC_DEBUG
-		test_gaps++;
-#endif
-
-		_decoded.push_back (
-			ContentVideo (
-				from.eyes() == EYES_LEFT ? filler_left_image : filler_right_image,
-				from,
-				from.eyes() == EYES_LEFT ? filler_left_part : filler_right_part
-				)
-			);
-
-		++from;
-	}
-}
-
-/** Called by decoder classes when they have a video frame ready.
- *  @param frame Frame index within the content; this does not take into account 3D
- *  so for 3D_ALTERNATE this value goes:
- *     0: frame 0 left
- *     1: frame 0 right
- *     2: frame 1 left
- *     3: frame 1 right
- *  and so on.
- */
-void
-VideoDecoder::give (shared_ptr<const ImageProxy> image, Frame frame)
-{
-	if (ignore ()) {
-		return;
-	}
-
-	_log->log (String::compose ("VD receives %1", frame), LogEntry::TYPE_DEBUG_DECODE);
-	_position = ContentTime::from_frames (frame, _content->active_video_frame_rate());
-
-	/* Work out what we are going to push into _decoded next */
-	list<ContentVideo> to_push;
-	switch (_content->video->frame_type ()) {
-	case VIDEO_FRAME_TYPE_2D:
-		to_push.push_back (ContentVideo (image, VideoFrame (frame, EYES_BOTH), PART_WHOLE));
+	switch (vft) {
+	case VideoFrameType::TWO_D:
+	case VideoFrameType::THREE_D:
+		Data (ContentVideo (image, frame, eyes, Part::WHOLE));
 		break;
-	case VIDEO_FRAME_TYPE_3D:
+	case VideoFrameType::THREE_D_ALTERNATE:
 	{
-		/* We receive the same frame index twice for 3D; hence we know which
-		   frame this one is.
-		*/
-		bool const same = (!_decoded.empty() && frame == _decoded.back().frame.index());
-		to_push.push_back (ContentVideo (image, VideoFrame (frame, same ? EYES_RIGHT : EYES_LEFT), PART_WHOLE));
+		Data (ContentVideo (image, frame, eyes, Part::WHOLE));
+		_last_emitted_eyes = eyes;
 		break;
 	}
-	case VIDEO_FRAME_TYPE_3D_ALTERNATE:
-		to_push.push_back (ContentVideo (image, VideoFrame (frame / 2, (frame % 2) ? EYES_RIGHT : EYES_LEFT), PART_WHOLE));
+	case VideoFrameType::THREE_D_LEFT_RIGHT:
+		Data (ContentVideo (image, frame, Eyes::LEFT, Part::LEFT_HALF));
+		Data (ContentVideo (image, frame, Eyes::RIGHT, Part::RIGHT_HALF));
 		break;
-	case VIDEO_FRAME_TYPE_3D_LEFT_RIGHT:
-		to_push.push_back (ContentVideo (image, VideoFrame (frame, EYES_LEFT), PART_LEFT_HALF));
-		to_push.push_back (ContentVideo (image, VideoFrame (frame, EYES_RIGHT), PART_RIGHT_HALF));
+	case VideoFrameType::THREE_D_TOP_BOTTOM:
+		Data (ContentVideo (image, frame, Eyes::LEFT, Part::TOP_HALF));
+		Data (ContentVideo (image, frame, Eyes::RIGHT, Part::BOTTOM_HALF));
 		break;
-	case VIDEO_FRAME_TYPE_3D_TOP_BOTTOM:
-		to_push.push_back (ContentVideo (image, VideoFrame (frame, EYES_LEFT), PART_TOP_HALF));
-		to_push.push_back (ContentVideo (image, VideoFrame (frame, EYES_RIGHT), PART_BOTTOM_HALF));
+	case VideoFrameType::THREE_D_LEFT:
+		Data (ContentVideo (image, frame, Eyes::LEFT, Part::WHOLE));
 		break;
-	case VIDEO_FRAME_TYPE_3D_LEFT:
-		to_push.push_back (ContentVideo (image, VideoFrame (frame, EYES_LEFT), PART_WHOLE));
-		break;
-	case VIDEO_FRAME_TYPE_3D_RIGHT:
-		to_push.push_back (ContentVideo (image, VideoFrame (frame, EYES_RIGHT), PART_WHOLE));
+	case VideoFrameType::THREE_D_RIGHT:
+		Data (ContentVideo (image, frame, Eyes::RIGHT, Part::WHOLE));
 		break;
 	default:
 		DCPOMATIC_ASSERT (false);
 	}
 
-	/* Now VideoDecoder is required never to have gaps in the frames that it presents
-	   via get_video().  Hence we need to fill in any gap between the last thing in _decoded
-	   and the things we are about to push.
-	*/
-
-	optional<VideoFrame> from;
-
-	if (_decoded.empty() && _last_seek_time && _last_seek_accurate) {
-		from = VideoFrame (
-			_last_seek_time->frames_round (_content->active_video_frame_rate ()),
-			_content->video->frame_type() == VIDEO_FRAME_TYPE_2D ? EYES_BOTH : EYES_LEFT
-			);
-	} else if (!_decoded.empty ()) {
-		/* Get the last frame we have */
-		from = _decoded.back().frame;
-		/* And move onto the first frame we need */
-		++(*from);
-		if (_content->video->frame_type() == VIDEO_FRAME_TYPE_3D_LEFT || _content->video->frame_type() == VIDEO_FRAME_TYPE_3D_RIGHT) {
-			/* The previous ++ will increment a 3D-left-eye to the same index right-eye.  If we are dealing with
-			   a single-eye source we need an extra ++ to move back to the same eye.
-			*/
-			++(*from);
-		}
-	}
-
-	/* If we've pre-rolled on a seek we may now receive out-of-order frames
-	   (frames before the last seek time) which we can just ignore.
-	*/
-	if (from && (*from) > to_push.front().frame) {
-		return;
-	}
-
-	unsigned int const max_decoded_size = 96;
-
-	/* If _decoded is already `full' there is no point in adding anything more to it,
-	   as the new stuff will just be removed again.
-	*/
-	if (_decoded.size() < max_decoded_size) {
-		if (from) {
-			switch (_content->video->frame_type ()) {
-			case VIDEO_FRAME_TYPE_2D:
-				fill_one_eye (from->index(), to_push.front().frame.index(), EYES_BOTH);
-				break;
-			case VIDEO_FRAME_TYPE_3D:
-			case VIDEO_FRAME_TYPE_3D_LEFT_RIGHT:
-			case VIDEO_FRAME_TYPE_3D_TOP_BOTTOM:
-			case VIDEO_FRAME_TYPE_3D_ALTERNATE:
-				fill_both_eyes (from.get(), to_push.front().frame);
-				break;
-			case VIDEO_FRAME_TYPE_3D_LEFT:
-				fill_one_eye (from->index(), to_push.front().frame.index(), EYES_LEFT);
-				break;
-			case VIDEO_FRAME_TYPE_3D_RIGHT:
-				fill_one_eye (from->index(), to_push.front().frame.index(), EYES_RIGHT);
-				break;
-			}
-		}
-
-		copy (to_push.begin(), to_push.end(), back_inserter (_decoded));
-	}
-
-	/* We can't let this build up too much or we will run out of memory.  There is a
-	   `best' value for the allowed size of _decoded which balances memory use
-	   with decoding efficiency (lack of seeks).  Throwing away video frames here
-	   is not a problem for correctness, so do it.
-	*/
-	while (_decoded.size() > max_decoded_size) {
-		_decoded.pop_back ();
-	}
+	_position = ContentTime::from_frames (frame, afr);
 }
 
+
 void
-VideoDecoder::seek (ContentTime s, bool accurate)
+VideoDecoder::seek ()
 {
-	_log->log (String::compose ("VD seek to %1", to_string(s)), LogEntry::TYPE_DEBUG_DECODE);
-	_decoded.clear ();
-	_last_seek_time = s;
-	_last_seek_accurate = accurate;
-	_position.reset ();
+	_position = boost::none;
+	_last_emitted_eyes.reset ();
+	_frame_interval_checker.reset (new FrameIntervalChecker());
 }