wip: hacks towards batch processing of JPEG2000.
authorCarl Hetherington <cth@carlh.net>
Tue, 11 Aug 2020 17:03:00 +0000 (19:03 +0200)
committerCarl Hetherington <cth@carlh.net>
Sun, 13 Sep 2020 18:22:44 +0000 (20:22 +0200)
src/lib/fastvideo.cc
src/lib/player_video_preparer.h [new file with mode: 0644]

index bd3d480c618142c0f4da0d4aa6a1125c050d099d..2b236bd9bf999aab861e635238ff35f985a1c447 100644 (file)
@@ -8,9 +8,53 @@
 using boost::shared_ptr;
 
 
+void
+handle (fastExportToHostHandle_t& adapter, fastGpuTimerHandle_t& device_to_host_timer)
+{
+       fastExportParameters_t exportParameters;
+       exportParameters.convert = options.ConvertToBGR ? FAST_CONVERT_BGR : FAST_CONVERT_NONE;
+
+       int aligned_width = info.width * 3;
+       aligned_width += 4 - (aligned_width % FAST_ALIGNMENT);
+       size_t decoded_size = info.height * aligned_width;
+       uint8_t* decoded = 0;
+       fastStatus_t r = fastMalloc(reinterpret_cast<void**>(&decoded), decoded_size);
+       if (r != FAST_OK) {
+               throw FastvideoError ("fastMalloc");
+       }
+
+       fastGpuTimerStart(device_to_host_timer  );
+
+       CHECK_FAST(fastExportToHostCopy(
+                       adapter,
+
+                       img.data.get(),
+                       img.w,
+                       img.wPitch,
+                       img.h,
+
+                       &exportParameters
+                       ));
+
+       float elapsedTimeGpu = 0.;
+       fastGpuTimerStop(deviceToHostTimer);
+       fastGpuTimerGetTime(deviceToHostTimer, &elapsedTimeGpu);
+
+       totalInternalTime += elapsedTimeGpu / 1000.0;
+
+       outputImgs->push_back(img);
+       if (imagesLeft == 0)
+               break;
+       CHECK_FAST(fastDecoderJ2kGetNextDecodedImage(decoder, &report, &imagesLeft));
+}
+
+
 shared_ptr<OpenJPEGImage>
 fastvideo_decompress_j2k (dcp::Data data, int reduce)
 {
+       const int max_batch_size = 16;
+       const int images_to_convert = 64;
+
        fastTraceCreate("/home/carl/trace.log");
 
        /*
@@ -85,7 +129,7 @@ fastvideo_decompress_j2k (dcp::Data data, int reduce)
                        &decoder,
                        &parameters,
                        FAST_RGB8, info.width, info.height,
-                       1,
+                       max_batch_si\e,
                        &buffer
                        );
        if (r != FAST_OK) {
@@ -143,9 +187,20 @@ fastvideo_decompress_j2k (dcp::Data data, int reduce)
                throw FastvideoError ("fastMalloc");
        }
 
-       r = fastDecoderJ2kTransform (decoder, data.data().get(), data.size(), &report);
-       if (r != FAST_OK) {
-               throw FastvideoError ("J2kTransform");
+       for (int i = 0; i < images_to_convert; ++i) {
+               fastDecoderJ2kAddImageToBatch(decoder, data.data().get(), data.size());
+               int free_slots = 0;
+               fastDecoderJ2kFreeSlotsInBatch(decoder, &free_slots);
+               if (free_slots == 0) {
+                       CHECK_FAST(TransformAndExtractBatch(img, &outputImgs));
+               }
+       }
+
+       int unprocessed_images_count = 0;
+       r = fastDecoderJ2kUnprocessedImagesCount(decoder, &unprocessed_images_count);
+       if (unprocessed_images_count > 0) { // Process the last non-complete batch
+
+               CHECK_FAST(TransformAndExtractBatch(img, &outputImgs));
        }
 
        total_time += report.elapsedTime;
diff --git a/src/lib/player_video_preparer.h b/src/lib/player_video_preparer.h
new file mode 100644 (file)
index 0000000..c164634
--- /dev/null
@@ -0,0 +1,14 @@
+class PlayerVideoPreparer
+{
+public:
+       virtual void request (boost::shared_ptr<PlayerVideo> pv) = 0;
+};
+
+
+class CPUPlayerVideoPreparer : public PlayerVideoPreparer
+{
+public:
+       void request (boost::shared_ptr<PlayerVideo> pv);
+};
+
+