summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarl Hetherington <cth@carlh.net>2020-08-12 21:42:37 +0200
committerCarl Hetherington <cth@carlh.net>2020-09-13 20:22:44 +0200
commitda93921d843b8eb9c3d0d77a7a7d629beb5d1f86 (patch)
tree4c23a1ed8de5e0c51054443a310570cd77625657
parentf842d02dba8873e66b6f4fe1cd5d96d5c78a62fd (diff)
Add memory buffer allocated by cuda and XYZ->RGB conversion;
slower than previous.
-rw-r--r--src/lib/fastvideo_player_video_preparer.cc31
-rw-r--r--src/lib/fastvideo_player_video_preparer.h2
2 files changed, 30 insertions, 3 deletions
diff --git a/src/lib/fastvideo_player_video_preparer.cc b/src/lib/fastvideo_player_video_preparer.cc
index 4e0f267ab..6e092c060 100644
--- a/src/lib/fastvideo_player_video_preparer.cc
+++ b/src/lib/fastvideo_player_video_preparer.cc
@@ -6,11 +6,13 @@
#include "j2k_image_proxy.h"
#include "player_video.h"
#include "timer.h"
+#include <cuda_runtime.h>
#include <fastvideo_decoder_j2k.h>
#include <fastvideo_sdk.h>
#include <boost/bind.hpp>
+using std::bad_alloc;
using boost::bind;
using boost::const_pointer_cast;
using boost::dynamic_pointer_cast;
@@ -22,6 +24,7 @@ FastvideoPlayerVideoPreparer::FastvideoPlayerVideoPreparer (boost::function<AVPi
: _stop_thread (false)
, _decoder (0)
, _setup_done (false)
+ , _decoded (0)
, _cpu (pixel_format, aligned, fast)
{
fastSdkParametersHandle_t sdk_parameters;
@@ -49,6 +52,7 @@ FastvideoPlayerVideoPreparer::~FastvideoPlayerVideoPreparer ()
if (_setup_done) {
fastDecoderJ2kDestroy(_decoder);
fastExportToHostDestroy(_adapter);
+ cudaFreeHost (_decoded);
}
}
@@ -142,15 +146,29 @@ FastvideoPlayerVideoPreparer::transform_and_extract ()
/* XXX: this should be memlocked or whatever fastMalloc does */
shared_ptr<J2KImageProxy> proxy = const_pointer_cast<J2KImageProxy>(dynamic_pointer_cast<const J2KImageProxy>(pv->image_proxy()));
DCPOMATIC_ASSERT (proxy);
- dcp::Size const size = proxy->size();
- shared_ptr<dcpomatic::Image> image(new dcpomatic::Image(AV_PIX_FMT_RGB24, size, true));
fastExportParameters_t export_parameters;
export_parameters.convert = FAST_CONVERT_NONE;
- fastStatus_t r = fastExportToHostCopy(_adapter, image->data()[0], size.width, image->stride()[0], size.height, &export_parameters);
+ dcp::Size const size = proxy->size();
+ fastStatus_t r = fastExportToHostCopy(_adapter, _decoded, size.width, _decoded_stride, size.height, &export_parameters);
if (r != FAST_OK) {
throw FastvideoError ("ExportToHostCopy", r);
}
+ shared_ptr<dcpomatic::Image> image(new dcpomatic::Image(AV_PIX_FMT_XYZ12LE, size, true));
+ uint8_t* from = reinterpret_cast<uint8_t*>(_decoded);
+ uint16_t* to = reinterpret_cast<uint16_t*>(image->data()[0]);
+ for (int y = 0; y < size.height; ++y) {
+ uint8_t* from_p = from;
+ uint16_t* to_p = to;
+ for (int x = 0; x < size.width; ++x) {
+ *to_p++ = uint16_t(*from_p++) << 8;
+ *to_p++ = uint16_t(*from_p++) << 8;
+ *to_p++ = uint16_t(*from_p++) << 8;
+ }
+ from += _decoded_stride;
+ to += image->stride()[0] / 2;
+ }
+
timestamped_printf("fv sets image for %d\n", pv->time.frames_round(24));
proxy->set_image (image);
@@ -222,4 +240,11 @@ FastvideoPlayerVideoPreparer::setup (dcp::Data sample)
if (r != FAST_OK) {
throw FastvideoError ("ExportToHostCreate");
}
+
+ _decoded_stride = info.width * 3;
+ _decoded_stride += 32 - (_decoded_stride % 32);
+ cudaError e = cudaMallocHost (&_decoded, _decoded_stride * info.height);
+ if (e != cudaSuccess) {
+ throw bad_alloc ();
+ }
}
diff --git a/src/lib/fastvideo_player_video_preparer.h b/src/lib/fastvideo_player_video_preparer.h
index 384d8edeb..753af6cbd 100644
--- a/src/lib/fastvideo_player_video_preparer.h
+++ b/src/lib/fastvideo_player_video_preparer.h
@@ -40,6 +40,8 @@ private:
bool _setup_done;
std::vector<boost::weak_ptr<PlayerVideo> > _batch;
+ void* _decoded;
+ int _decoded_stride;
CPUPlayerVideoPreparer _cpu;