#include <vector>
+using std::make_pair;
using std::vector;
using boost::optional;
+boost::mutex CUDAJ2KFrameEncoder::_mutex;
+boost::condition CUDAJ2KFrameEncoder::_condition;
+std::vector<CUDAJ2KFrameEncoder::Input> CUDAJ2KFrameEncoder::_queue;
+std::map<std::pair<int, Eyes>, dcp::ArrayData> CUDAJ2KFrameEncoder::_output;
+
+boost::optional<dcp::Size> CUDAJ2KFrameEncoder::_size;
+boost::optional<Resolution> CUDAJ2KFrameEncoder::_resolution;
+
+
CUDAJ2KFrameEncoder::CUDAJ2KFrameEncoder()
{
nvjpeg2kEncoderCreateSimple(&_encoder_handle);
}
-CUDAJ2KFrameEncoder::Frame::Frame(DCPVideo const& vf)
+CUDAJ2KFrameEncoder::Input::Input(DCPVideo const& vf)
+ : _index(vf.index())
+ , _eyes(vf.eyes())
{
auto xyz = convert_to_xyz(vf.frame(), boost::bind(&Log::dcp_log, dcpomatic_log.get(), _1, _2));
}
-CUDAJ2KFrameEncoder::Frame::Frame(Frame&& other)
+CUDAJ2KFrameEncoder::Input::Input(Input&& other)
+ : _index(other._index)
+ , _eyes(other._eyes)
{
for (int i = 0; i < 3; ++i) {
_pixel_data_d[i] = other._pixel_data_d[i];
}
-CUDAJ2KFrameEncoder::Frame::~Frame()
+CUDAJ2KFrameEncoder::Input::~Input()
{
cudaFree(_pixel_data_d[0]);
cudaFree(_pixel_data_d[1]);
}
-vector<dcp::ArrayData>
+optional<dcp::ArrayData>
CUDAJ2KFrameEncoder::encode(DCPVideo const& vf)
{
- int constexpr BATCH_SIZE = 128;
+ auto input = Input(vf);
+
+ boost::mutex::scoped_lock lm(_mutex);
auto const size = vf.frame()->out_size();
DCPOMATIC_ASSERT(!_size || size == *_size);
DCPOMATIC_ASSERT(!_resolution || vf.resolution() == *_resolution);
_resolution = vf.resolution();
- if (_batch.size() < BATCH_SIZE) {
- _batch.push_back(Frame(vf));
+ _queue.push_back(std::move(input));
+ if (_queue.size() < batch_size) {
+ std::cout << "queue is " << _queue.size() << " - waiting\n";
+ _condition.wait(lm);
+ } else {
+ encode_queue();
+ _condition.notify_all();
+ }
+
+ auto output = _output.find(make_pair(vf.index(), vf.eyes()));
+ if (output == _output.end()) {
return {};
}
- return flush();
+ return output->second;
}
-vector<dcp::ArrayData>
-CUDAJ2KFrameEncoder::flush()
+void
+CUDAJ2KFrameEncoder::encode_queue()
{
- if (_batch.empty()) {
- return {};
- }
-
nvjpeg2kImageComponentInfo_t info[3];
for (int i = 0; i < 3; ++i) {
info[i].component_width = _size->width;
throw CUDAError("nvjpeg2kEncodeParamsSetQuality", status);
}
- vector<dcp::ArrayData> output;
-
- for (auto const& frame: _batch) {
-
- auto x = frame.device_image();
+ std::cout << "encoding queue of " << _queue.size() << "\n";
+ for (auto const& frame: _queue) {
status = nvjpeg2kEncode(_encoder_handle, _encoder_state, _encoder_params, frame.device_image(), 0);
if (status != NVJPEG2K_STATUS_SUCCESS) {
throw CUDAError("nvjpeg2kEncodeRetrieveBitstream", status);
}
- output.push_back(this_output);
+ _output[make_pair(frame.index(), frame.eyes())] = this_output;
cudaStreamSynchronize(0);
}
- _batch.clear();
-
- return output;
+ _queue.clear();
}
{
LOG_TIMING("start-encoder-thread thread=%1", thread_id());
}
+
+
+void
+CUDAJ2KFrameEncoder::flush()
+{
+ boost::mutex::scoped_lock lm(_mutex);
+ encode_queue();
+ _condition.notify_all();
+}
#include "types.h"
#include <dcp/types.h>
#include <nvjpeg2k.h>
+#include <boost/thread/condition.hpp>
+#include <boost/thread/mutex.hpp>
+#include <map>
#include <vector>
public:
CUDAJ2KFrameEncoder();
- std::vector<dcp::ArrayData> encode(DCPVideo const &) override;
- std::vector<dcp::ArrayData> flush() override;
+ boost::optional<dcp::ArrayData> encode(DCPVideo const &) override;
+ void flush() override;
void log_thread_start() override;
+ static int constexpr batch_size = 1;
+
private:
+ void encode_queue();
+
nvjpeg2kEncoder_t _encoder_handle;
nvjpeg2kEncodeState_t _encoder_state;
nvjpeg2kEncodeParams_t _encoder_params;
- class Frame
+ class Input
{
public:
- Frame(DCPVideo const& vf);
- Frame(Frame const& other) = delete;
- Frame(Frame&& other);
- ~Frame();
+ Input(DCPVideo const& vf);
+ Input(Input const& other) = delete;
+ Input(Input&& other);
+ ~Input();
- Frame& operator=(Frame const& other) = delete;
+ Input& operator=(Input const& other) = delete;
nvjpeg2kImage_t const* const device_image() const {
return &_device_image;
}
+ int index() const {
+ return _index;
+ }
+
+ Eyes eyes() const {
+ return _eyes;
+ }
+
private:
uint8_t* _pixel_data_d[3];
size_t _pitch_in_bytes[3];
nvjpeg2kImage_t _device_image;
+ int _index;
+ Eyes _eyes;
};
- std::vector<Frame> _batch;
- boost::optional<dcp::Size> _size;
- boost::optional<Resolution> _resolution;
+ static boost::mutex _mutex;
+ static boost::condition _condition;
+ static std::vector<Input> _queue;
+ static std::map<std::pair<int, Eyes>, dcp::ArrayData> _output;
+
+ static boost::optional<dcp::Size> _size;
+ static boost::optional<Resolution> _resolution;
};
_full_condition.wait (lock);
}
+ for (auto& worker: _workers) {
+ worker->flush();
+ }
+
lock.unlock ();
LOG_GENERAL_NC (N_("Terminating encoder threads"));
}
}
#endif
- auto worker = make_shared<CUDAJ2KFrameEncoder>();
- _workers.push_back(worker);
- _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, worker));
+ for (int i = 0; i < CUDAJ2KFrameEncoder::batch_size; ++i) {
+ auto worker = make_shared<CUDAJ2KFrameEncoder>();
+ _workers.push_back(worker);
+ _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, worker));
+ }
_writer->set_encoder_threads (_threads->size());
}