From da93921d843b8eb9c3d0d77a7a7d629beb5d1f86 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Wed, 12 Aug 2020 21:42:37 +0200 Subject: Add memory buffer allocated by cuda and XYZ->RGB conversion; slower than previous. --- src/lib/fastvideo_player_video_preparer.cc | 31 +++++++++++++++++++++++++++--- src/lib/fastvideo_player_video_preparer.h | 2 ++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/lib/fastvideo_player_video_preparer.cc b/src/lib/fastvideo_player_video_preparer.cc index 4e0f267ab..6e092c060 100644 --- a/src/lib/fastvideo_player_video_preparer.cc +++ b/src/lib/fastvideo_player_video_preparer.cc @@ -6,11 +6,13 @@ #include "j2k_image_proxy.h" #include "player_video.h" #include "timer.h" +#include #include #include #include +using std::bad_alloc; using boost::bind; using boost::const_pointer_cast; using boost::dynamic_pointer_cast; @@ -22,6 +24,7 @@ FastvideoPlayerVideoPreparer::FastvideoPlayerVideoPreparer (boost::function proxy = const_pointer_cast(dynamic_pointer_cast(pv->image_proxy())); DCPOMATIC_ASSERT (proxy); - dcp::Size const size = proxy->size(); - shared_ptr image(new dcpomatic::Image(AV_PIX_FMT_RGB24, size, true)); fastExportParameters_t export_parameters; export_parameters.convert = FAST_CONVERT_NONE; - fastStatus_t r = fastExportToHostCopy(_adapter, image->data()[0], size.width, image->stride()[0], size.height, &export_parameters); + dcp::Size const size = proxy->size(); + fastStatus_t r = fastExportToHostCopy(_adapter, _decoded, size.width, _decoded_stride, size.height, &export_parameters); if (r != FAST_OK) { throw FastvideoError ("ExportToHostCopy", r); } + shared_ptr image(new dcpomatic::Image(AV_PIX_FMT_XYZ12LE, size, true)); + uint8_t* from = reinterpret_cast(_decoded); + uint16_t* to = reinterpret_cast(image->data()[0]); + for (int y = 0; y < size.height; ++y) { + uint8_t* from_p = from; + uint16_t* to_p = to; + for (int x = 0; x < size.width; ++x) { + *to_p++ = uint16_t(*from_p++) << 8; + *to_p++ = uint16_t(*from_p++) << 8; + *to_p++ = uint16_t(*from_p++) << 8; + } + from += _decoded_stride; + to += image->stride()[0] / 2; + } + timestamped_printf("fv sets image for %d\n", pv->time.frames_round(24)); proxy->set_image (image); @@ -222,4 +240,11 @@ FastvideoPlayerVideoPreparer::setup (dcp::Data sample) if (r != FAST_OK) { throw FastvideoError ("ExportToHostCreate"); } + + _decoded_stride = info.width * 3; + _decoded_stride += 32 - (_decoded_stride % 32); + cudaError e = cudaMallocHost (&_decoded, _decoded_stride * info.height); + if (e != cudaSuccess) { + throw bad_alloc (); + } } diff --git a/src/lib/fastvideo_player_video_preparer.h b/src/lib/fastvideo_player_video_preparer.h index 384d8edeb..753af6cbd 100644 --- a/src/lib/fastvideo_player_video_preparer.h +++ b/src/lib/fastvideo_player_video_preparer.h @@ -40,6 +40,8 @@ private: bool _setup_done; std::vector > _batch; + void* _decoded; + int _decoded_stride; CPUPlayerVideoPreparer _cpu; -- cgit v1.2.3