Change how video timing is done.v2.17.1

This commit changes the approach with video timing. Previously, we would (more-or-less) try to use every video frame from the content in the output, hoping that they come at a constant frame rate. This is not always the case, however. Here we preserve the PTS of video frames, and then when one arrives we output whatever DCP video frames we can (at the regular DCP frame rate). Hopefully this will solve a range of sync problems, but it could also introduce new ones.
author: Carl Hetherington <cth@carlh.net> 2022-09-29 10:17:50 +0200
committer: Carl Hetherington <cth@carlh.net> 2022-10-04 01:10:48 +0200
commit: fb2e821b36c4b50709f3a085131325b668d4ed57 (patch)
tree: 009c675884993be8cd77c20080d3fc5b67f16956 /src/lib/video_decoder.cc
parent: 95469c2bb15f490f7d3fbeaed9903bd0cbc13677 (diff)
1 files changed, 30 insertions, 81 deletions
diff --git a/src/lib/video_decoder.cc b/src/lib/video_decoder.cc
index cf21f885a..c628fddd9 100644
--- a/src/lib/video_decoder.cc
+++ b/src/lib/video_decoder.cc
@@ -20,7 +20,6 @@
 
 
 #include "compose.hpp"
-#include "film.h"
 #include "frame_interval_checker.h"
 #include "image.h"
 #include "j2k_image_proxy.h"
@@ -47,17 +46,9 @@ VideoDecoder::VideoDecoder (Decoder* parent, shared_ptr<const Content> c)
 }
 
 
-/** Called by decoder classes when they have a video frame ready.
- *  @param frame Frame index within the content; this does not take into account 3D
- *  so for 3D_ALTERNATE this value goes:
- *     0: frame 0 left
- *     1: frame 0 right
- *     2: frame 1 left
- *     3: frame 1 right
- *  and so on.
- */
+/** Called by decoder classes when they have a video frame ready */
 void
-VideoDecoder::emit (shared_ptr<const Film> film, shared_ptr<const ImageProxy> image, Frame decoder_frame)
+VideoDecoder::emit(shared_ptr<const Film> film, shared_ptr<const ImageProxy> image, ContentTime time)
 {
 	if (ignore ()) {
 		return;
@@ -66,14 +57,12 @@ VideoDecoder::emit (shared_ptr<const Film> film, shared_ptr<const ImageProxy> im
 	auto const afr = _content->active_video_frame_rate(film);
 	auto const vft = _content->video->frame_type();
 
-	auto frame_time = ContentTime::from_frames (decoder_frame, afr);
-
 	/* Do some heuristics to try and spot the case where the user sets content to 3D
 	 * when it is not.  We try to tell this by looking at the differences in time between
 	 * the first few frames.  Real 3D content should have two frames for each timestamp.
 	 */
 	if (_frame_interval_checker) {
-		_frame_interval_checker->feed (frame_time, afr);
+		_frame_interval_checker->feed(time, afr);
 		if (_frame_interval_checker->guess() == FrameIntervalChecker::PROBABLY_NOT_3D && vft == VideoFrameType::THREE_D) {
 			boost::throw_exception (
 				DecodeError(
@@ -91,94 +80,54 @@ VideoDecoder::emit (shared_ptr<const Film> film, shared_ptr<const ImageProxy> im
 		}
 	}
 
-	Frame frame;
-	Eyes eyes = Eyes::BOTH;
-	if (!_position) {
-		/* This is the first data we have received since initialisation or seek.  Set
-		   the position based on the frame that was given.  After this first time
-		   we just count frames, since (as with audio) it seems that ContentTimes
-		   are unreliable from FFmpegDecoder.  They are much better than audio times
-		   but still we get the occasional one which is duplicated.  In this case
-		   ffmpeg seems to carry on regardless, processing the video frame as normal.
-		   If we drop the frame with the duplicated timestamp we obviously lose sync.
-		*/
-
-		if (vft == VideoFrameType::THREE_D_ALTERNATE) {
-			frame = decoder_frame / 2;
-			eyes = (decoder_frame % 2) ? Eyes::RIGHT : Eyes::LEFT;
-		} else {
-			frame = decoder_frame;
-			if (vft == VideoFrameType::THREE_D) {
-				auto j2k = dynamic_pointer_cast<const J2KImageProxy>(image);
-				/* At the moment only DCP decoders producers VideoFrameType::THREE_D, so only the J2KImageProxy
-				 * knows which eye it is.
-				 */
-				if (j2k && j2k->eye()) {
-					eyes = j2k->eye().get() == dcp::Eye::LEFT ? Eyes::LEFT : Eyes::RIGHT;
-				}
-			}
-		}
-
-		_position = ContentTime::from_frames (frame, afr);
-	} else {
-		if (vft == VideoFrameType::THREE_D) {
-			auto j2k = dynamic_pointer_cast<const J2KImageProxy>(image);
-			if (j2k && j2k->eye()) {
-				if (j2k->eye() == dcp::Eye::LEFT) {
-					frame = _position->frames_round(afr) + 1;
-					eyes = Eyes::LEFT;
-				} else {
-					frame = _position->frames_round(afr);
-					eyes = Eyes::RIGHT;
-				}
-			} else {
-				/* This should not happen; see above */
-				frame = _position->frames_round(afr) + 1;
-			}
-		} else if (vft == VideoFrameType::THREE_D_ALTERNATE) {
-			DCPOMATIC_ASSERT (_last_emitted_eyes);
-			if (_last_emitted_eyes.get() == Eyes::RIGHT) {
-				frame = _position->frames_round(afr) + 1;
-				eyes = Eyes::LEFT;
-			} else {
-				frame = _position->frames_round(afr);
-				eyes = Eyes::RIGHT;
-			}
-		} else {
-			frame = _position->frames_round(afr) + 1;
-		}
-	}
-
 	switch (vft) {
 	case VideoFrameType::TWO_D:
+		Data(ContentVideo(image, time, Eyes::BOTH, Part::WHOLE));
+		break;
 	case VideoFrameType::THREE_D:
-		Data (ContentVideo (image, frame, eyes, Part::WHOLE));
+	{
+		auto eyes = Eyes::LEFT;
+		auto j2k = dynamic_pointer_cast<const J2KImageProxy>(image);
+		if (j2k && j2k->eye()) {
+			eyes = *j2k->eye() == dcp::Eye::LEFT ? Eyes::LEFT : Eyes::RIGHT;
+		}
+
+		Data(ContentVideo(image, time, eyes, Part::WHOLE));
 		break;
+	}
 	case VideoFrameType::THREE_D_ALTERNATE:
 	{
-		Data (ContentVideo (image, frame, eyes, Part::WHOLE));
+		Eyes eyes;
+		if (_last_emitted_eyes) {
+			eyes = _last_emitted_eyes.get() == Eyes::LEFT ? Eyes::RIGHT : Eyes::LEFT;
+		} else {
+			/* We don't know what eye this frame is, so just guess */
+			auto frame = time.frames_round(_content->video_frame_rate().get_value_or(24));
+			eyes = (frame % 2) ? Eyes::RIGHT : Eyes::LEFT;
+		}
+		Data(ContentVideo(image, time, eyes, Part::WHOLE));
 		_last_emitted_eyes = eyes;
 		break;
 	}
 	case VideoFrameType::THREE_D_LEFT_RIGHT:
-		Data (ContentVideo (image, frame, Eyes::LEFT, Part::LEFT_HALF));
-		Data (ContentVideo (image, frame, Eyes::RIGHT, Part::RIGHT_HALF));
+		Data(ContentVideo(image, time, Eyes::LEFT, Part::LEFT_HALF));
+		Data(ContentVideo(image, time, Eyes::RIGHT, Part::RIGHT_HALF));
 		break;
 	case VideoFrameType::THREE_D_TOP_BOTTOM:
-		Data (ContentVideo (image, frame, Eyes::LEFT, Part::TOP_HALF));
-		Data (ContentVideo (image, frame, Eyes::RIGHT, Part::BOTTOM_HALF));
+		Data(ContentVideo(image, time, Eyes::LEFT, Part::TOP_HALF));
+		Data(ContentVideo(image, time, Eyes::RIGHT, Part::BOTTOM_HALF));
 		break;
 	case VideoFrameType::THREE_D_LEFT:
-		Data (ContentVideo (image, frame, Eyes::LEFT, Part::WHOLE));
+		Data(ContentVideo(image, time, Eyes::LEFT, Part::WHOLE));
 		break;
 	case VideoFrameType::THREE_D_RIGHT:
-		Data (ContentVideo (image, frame, Eyes::RIGHT, Part::WHOLE));
+		Data(ContentVideo(image, time, Eyes::RIGHT, Part::WHOLE));
 		break;
 	default:
 		DCPOMATIC_ASSERT (false);
 	}
 
-	_position = ContentTime::from_frames (frame, afr);
+	_position = time;
 }
author	Carl Hetherington <cth@carlh.net>	2022-09-29 10:17:50 +0200
committer	Carl Hetherington <cth@carlh.net>	2022-10-04 01:10:48 +0200
commit	fb2e821b36c4b50709f3a085131325b668d4ed57 (patch)
tree	009c675884993be8cd77c20080d3fc5b67f16956 /src/lib/video_decoder.cc
parent	95469c2bb15f490f7d3fbeaed9903bd0cbc13677 (diff)