Remove batching (seems pointless).
authorCarl Hetherington <cth@carlh.net>
Mon, 23 May 2022 14:39:41 +0000 (16:39 +0200)
committerCarl Hetherington <cth@carlh.net>
Mon, 23 May 2022 14:55:28 +0000 (16:55 +0200)
src/lib/cuda_j2k_frame_encoder.cc
src/lib/cuda_j2k_frame_encoder.h
src/lib/j2k_encoder.cc

index 3726f63a7c5577be1f976053166b094351d5c809..7fba7b58ed0a285bc770cfa4570fd9921f366f52 100644 (file)
@@ -35,20 +35,13 @@ using std::vector;
 using boost::optional;
 
 
-boost::mutex CUDAJ2KFrameEncoder::_mutex;
-boost::condition CUDAJ2KFrameEncoder::_condition;
-std::vector<CUDAJ2KFrameEncoder::Input> CUDAJ2KFrameEncoder::_queue;
-std::map<std::pair<int, Eyes>, dcp::ArrayData> CUDAJ2KFrameEncoder::_output;
-
-boost::optional<dcp::Size> CUDAJ2KFrameEncoder::_size;
-boost::optional<Resolution> CUDAJ2KFrameEncoder::_resolution;
-
-
 CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder()
 {
        nvjpeg2kEncoderCreateSimple(&_encoder_handle);
        nvjpeg2kEncodeStateCreate(_encoder_handle, &_encoder_state);
        nvjpeg2kEncodeParamsCreate(&_encoder_params);
+
+       cudaStreamCreateWithFlags(&_stream, cudaStreamNonBlocking);
 }
 
 
@@ -92,8 +85,6 @@ CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf)
                if (status != cudaSuccess) {
                        throw CUDAError("cudaMemcpy2D", status);
                }
-
-               cudaDeviceSynchronize();
        }
 
        _device_image.num_components = 3;
@@ -133,8 +124,6 @@ CUDAJ2KFrameEncoder::encode(DCPVideo const& vf)
 {
        auto input = Input(vf);
 
-       boost::mutex::scoped_lock lm(_mutex);
-
        auto const size = vf.frame()->out_size();
        DCPOMATIC_ASSERT(!_size || size == *_size);
        _size = size;
@@ -142,27 +131,6 @@ CUDAJ2KFrameEncoder::encode(DCPVideo const& vf)
        DCPOMATIC_ASSERT(!_resolution || vf.resolution() == *_resolution);
        _resolution = vf.resolution();
 
-       _queue.push_back(std::move(input));
-       if (_queue.size() < batch_size) {
-               std::cout << "queue is " << _queue.size() << " - waiting\n";
-               _condition.wait(lm);
-       } else {
-               encode_queue();
-               _condition.notify_all();
-       }
-
-       auto output = _output.find(make_pair(vf.index(), vf.eyes()));
-       if (output == _output.end()) {
-               return {};
-       }
-
-       return output->second;
-}
-
-
-void
-CUDAJ2KFrameEncoder::encode_queue()
-{
        nvjpeg2kImageComponentInfo_t info[3];
        for (int i = 0; i < 3; ++i) {
                info[i].component_width = _size->width;
@@ -192,33 +160,26 @@ CUDAJ2KFrameEncoder::encode_queue()
        }
 
        // XXX: quality
-       status = nvjpeg2kEncodeParamsSetQuality(_encoder_params, 25);
+       status = nvjpeg2kEncodeParamsSetQuality(_encoder_params, 30);
        if (status != NVJPEG2K_STATUS_SUCCESS) {
                throw CUDAError("nvjpeg2kEncodeParamsSetQuality", status);
        }
 
-       std::cout << "encoding queue of " << _queue.size() << "\n";
-       for (auto const& frame: _queue) {
-
-               status = nvjpeg2kEncode(_encoder_handle, _encoder_state, _encoder_params, frame.device_image(), 0);
-               if (status != NVJPEG2K_STATUS_SUCCESS) {
-                       throw CUDAError("nvjpeg2kEncode", status);
-               }
-
-               size_t compressed_size;
-               status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, nullptr, &compressed_size, 0);
+       status = nvjpeg2kEncode(_encoder_handle, _encoder_state, _encoder_params, input.device_image(), _stream);
+       if (status != NVJPEG2K_STATUS_SUCCESS) {
+               throw CUDAError("nvjpeg2kEncode", status);
+       }
 
-               dcp::ArrayData this_output(compressed_size);
-               status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, this_output.data(), &compressed_size, 0);
-               if (status != NVJPEG2K_STATUS_SUCCESS) {
-                       throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status);
-               }
+       size_t compressed_size;
+       status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, nullptr, &compressed_size, _stream);
 
-               _output[make_pair(frame.index(), frame.eyes())] = this_output;
-               cudaStreamSynchronize(0);
+       dcp::ArrayData output(compressed_size);
+       status = nvjpeg2kEncodeRetrieveBitstream(_encoder_handle, _encoder_state, output.data(), &compressed_size, _stream);
+       if (status != NVJPEG2K_STATUS_SUCCESS) {
+               throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status);
        }
 
-       _queue.clear();
+       return output;
 }
 
 
@@ -232,7 +193,5 @@ CUDAJ2KFrameEncoder::log_thread_start ()
 void
 CUDAJ2KFrameEncoder::flush()
 {
-       boost::mutex::scoped_lock lm(_mutex);
-       encode_queue();
-       _condition.notify_all();
+
 }
index 9a8666cf6c5b37f7f2014909a68af11bb3990088..b17ad69d9fb2ce9b6367311e30bf864b3c06838f 100644 (file)
@@ -42,8 +42,6 @@ public:
        void flush() override;
        void log_thread_start() override;
 
-       static int constexpr batch_size = 1;
-
 private:
        void encode_queue();
 
@@ -81,13 +79,9 @@ private:
                Eyes _eyes;
        };
 
-       static boost::mutex _mutex;
-       static boost::condition _condition;
-       static std::vector<Input> _queue;
-       static std::map<std::pair<int, Eyes>, dcp::ArrayData> _output;
-
-       static boost::optional<dcp::Size> _size;
-       static boost::optional<Resolution> _resolution;
+       boost::optional<dcp::Size> _size;
+       boost::optional<Resolution> _resolution;
+       cudaStream_t _stream;
 };
 
 
index 49f11377cdff0f267f65c0c9e52b8e7cb3e4a4f7..00814dcb0fb07bdc2fdaf0e0be5d34684f2d1cd3 100644 (file)
@@ -382,7 +382,7 @@ J2KEncoder::servers_list_changed ()
                }
        }
 #endif
-       for (int i = 0; i < CUDAJ2KFrameEncoder::batch_size; ++i) {
+       for (int i = 0; i < 16; ++i) {
                auto worker = make_shared<CUDAJ2KFrameEncoder>();
                _workers.push_back(worker);
                _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, worker));