6e092c0608610335142185c3ec98e9358d44368b
[dcpomatic.git] / src / lib / fastvideo_player_video_preparer.cc
1 #include "dcpomatic_assert.h"
2 #include "exceptions.h"
3 #include "image.h"
4 #include "image_proxy.h"
5 #include "fastvideo_player_video_preparer.h"
6 #include "j2k_image_proxy.h"
7 #include "player_video.h"
8 #include "timer.h"
9 #include <cuda_runtime.h>
10 #include <fastvideo_decoder_j2k.h>
11 #include <fastvideo_sdk.h>
12 #include <boost/bind.hpp>
13
14
15 using std::bad_alloc;
16 using boost::bind;
17 using boost::const_pointer_cast;
18 using boost::dynamic_pointer_cast;
19 using boost::shared_ptr;
20 using boost::weak_ptr;
21
22
23 FastvideoPlayerVideoPreparer::FastvideoPlayerVideoPreparer (boost::function<AVPixelFormat (AVPixelFormat)> pixel_format, bool aligned, bool fast)
24         : _stop_thread (false)
25         , _decoder (0)
26         , _setup_done (false)
27         , _decoded (0)
28         , _cpu (pixel_format, aligned, fast)
29 {
30         fastSdkParametersHandle_t sdk_parameters;
31         fastStatus_t r = fastGetSdkParametersHandle(&sdk_parameters);
32         if (r != FAST_OK) {
33                 throw FastvideoError ("GetSdkParametersHandle", r);
34         }
35         r = fastDecoderJ2kLibraryInit(sdk_parameters);
36         if (r != FAST_OK) {
37                 throw FastvideoError ("DecoderJ2kLibraryInit", r);
38         }
39
40         _thread = boost::thread (bind(&FastvideoPlayerVideoPreparer::thread, this));
41 }
42
43
44 FastvideoPlayerVideoPreparer::~FastvideoPlayerVideoPreparer ()
45 {
46         _stop_thread = true;
47         _work.notify_all ();
48         try {
49                 _thread.join ();
50         } catch (...) {}
51
52         if (_setup_done) {
53                 fastDecoderJ2kDestroy(_decoder);
54                 fastExportToHostDestroy(_adapter);
55                 cudaFreeHost (_decoded);
56         }
57 }
58
59
60 void
61 FastvideoPlayerVideoPreparer::request (shared_ptr<PlayerVideo> pv)
62 {
63         boost::mutex::scoped_lock lm (_mutex);
64         _queue.push (weak_ptr<PlayerVideo>(pv));
65         _work.notify_all ();
66 }
67
68
69 void
70 FastvideoPlayerVideoPreparer::thread ()
71 {
72         while (true) {
73                 boost::mutex::scoped_lock lm (_mutex);
74                 while (!_stop_thread && _queue.empty()) {
75                         _work.wait (lm);
76                 }
77
78                 if (_stop_thread) {
79                         return;
80                 }
81
82                 weak_ptr<PlayerVideo> weak = _queue.front ();
83                 _queue.pop ();
84
85                 lm.unlock ();
86                 shared_ptr<PlayerVideo> pv = weak.lock ();
87                 if (pv) {
88                         /* We're going to "prepare" just the image proxy here (if it interests us i.e. if it's J2K,
89                          * then hand over the rest of the work to a CPUPlayerVideoPreparer.
90                          */
91                         shared_ptr<const ImageProxy> ip = pv->image_proxy ();
92                         shared_ptr<const J2KImageProxy> jp = dynamic_pointer_cast<const J2KImageProxy> (ip);
93                         if (jp) {
94                                 send_to_gpu (pv, jp);
95                         } else {
96                                 _cpu.request (pv);
97                         }
98                 }
99         }
100 }
101
102
103 void
104 FastvideoPlayerVideoPreparer::send_to_gpu (shared_ptr<PlayerVideo> pv, shared_ptr<const J2KImageProxy> proxy)
105 {
106         if (!_setup_done) {
107                 setup (proxy->j2k());
108                 _setup_done = true;
109         }
110
111         //std::cout << "add to batch.\n";
112         timestamped_printf("fv sends %d to gpu\n", pv->time.frames_round(24));
113         fastDecoderJ2kAddImageToBatch(_decoder, proxy->j2k().data().get(), proxy->j2k().size());
114         _batch.push_back (pv);
115
116         int free_slots = 0;
117         fastDecoderJ2kFreeSlotsInBatch(_decoder, &free_slots);
118         timestamped_printf("now %d slots left.\n", free_slots);
119         //std::cout << free_slots << " left in batch.\n";
120         if (free_slots == 0) {
121                 /* Do some decoding */
122                 timestamped_printf("tx & extract\n");
123                 transform_and_extract ();
124         }
125
126         /* XXX: maybe should hoover up any left-overs at some point */
127 }
128
129
130 void
131 FastvideoPlayerVideoPreparer::transform_and_extract ()
132 {
133         fastDecoderJ2kReport_t report;
134         fastStatus_t r = fastDecoderJ2kTransformBatch(_decoder, &report);
135         if (r != FAST_OK) {
136                 throw FastvideoError ("DecoderJ2kTransformBatch", r);
137         }
138
139         size_t index = 0;
140         int images_left = 1;
141         while (true) {
142                 DCPOMATIC_ASSERT (index <= _batch.size());
143                 shared_ptr<PlayerVideo> pv = _batch[index].lock();
144                 if (pv) {
145                         //std::cout << "got a pv for batch number " << index << "\n";
146                         /* XXX: this should be memlocked or whatever fastMalloc does */
147                         shared_ptr<J2KImageProxy> proxy = const_pointer_cast<J2KImageProxy>(dynamic_pointer_cast<const J2KImageProxy>(pv->image_proxy()));
148                         DCPOMATIC_ASSERT (proxy);
149                         fastExportParameters_t export_parameters;
150                         export_parameters.convert = FAST_CONVERT_NONE;
151                         dcp::Size const size = proxy->size();
152                         fastStatus_t r = fastExportToHostCopy(_adapter, _decoded, size.width, _decoded_stride, size.height, &export_parameters);
153                         if (r != FAST_OK) {
154                                 throw FastvideoError ("ExportToHostCopy", r);
155                         }
156
157                         shared_ptr<dcpomatic::Image> image(new dcpomatic::Image(AV_PIX_FMT_XYZ12LE, size, true));
158                         uint8_t* from = reinterpret_cast<uint8_t*>(_decoded);
159                         uint16_t* to = reinterpret_cast<uint16_t*>(image->data()[0]);
160                         for (int y = 0; y < size.height; ++y) {
161                                 uint8_t* from_p = from;
162                                 uint16_t* to_p = to;
163                                 for (int x = 0; x < size.width; ++x) {
164                                         *to_p++ = uint16_t(*from_p++) << 8;
165                                         *to_p++ = uint16_t(*from_p++) << 8;
166                                         *to_p++ = uint16_t(*from_p++) << 8;
167                                 }
168                                 from += _decoded_stride;
169                                 to += image->stride()[0] / 2;
170                         }
171
172                         timestamped_printf("fv sets image for %d\n", pv->time.frames_round(24));
173                         proxy->set_image (image);
174
175                         _cpu.request (pv);
176                 }
177
178                 if (images_left == 0) {
179                         break;
180                 }
181                 fastStatus_t r = fastDecoderJ2kGetNextDecodedImage(_decoder, &report, &images_left);
182                 if (r != FAST_OK) {
183                         throw FastvideoError ("DecoderJ2kGetNextDecodedImage", r);
184                 }
185
186                 ++index;
187         };
188
189         _batch.clear ();
190 }
191
192
193 void
194 FastvideoPlayerVideoPreparer::setup (dcp::Data sample)
195 {
196         fastJ2kImageInfo_t info;
197         fastStatus_t r = fastDecoderJ2kPredecode(&info, sample.data().get(), sample.size());
198         if (r != FAST_OK) {
199                 throw FastvideoError ("DecoderJ2kPredecode", r);
200         }
201
202         fastDecoderJ2kStaticParameters_t parameters;
203         memset(&parameters, 0, sizeof(fastDecoderJ2kStaticParameters_t));
204
205         parameters.ResolutionLevels = 0;
206         parameters.verboseLevel = 1;
207         parameters.enableROI = 0;
208
209         parameters.maxTileWidth = info.width;
210         parameters.maxTileHeight = info.height;
211
212         parameters.windowX0 = 0;
213         parameters.windowY0 = 0;
214         parameters.windowWidth = info.width;
215         parameters.windowHeight = info.height;
216
217         parameters.truncationLength = 0;
218         parameters.truncationMode = 0;
219         parameters.truncationRate = 0;
220
221         parameters.DecodePasses = 0;
222         parameters.imageInfo = &info;
223         parameters.maxStreamSize = max_stream_size;
224
225         r = fastDecoderJ2kCreate(
226                         &_decoder,
227                         &parameters,
228                         FAST_RGB8, info.width, info.height,
229                         max_batch_size,
230                         &_buffer
231                         );
232
233         if (r != FAST_OK) {
234                 fastTraceClose ();
235                 throw FastvideoError ("J2kCreate", r);
236         }
237
238         fastSurfaceFormat_t surface_format = FAST_RGB8;
239         r = fastExportToHostCreate(&_adapter, &surface_format, _buffer);
240         if (r != FAST_OK) {
241                 throw FastvideoError ("ExportToHostCreate");
242         }
243
244         _decoded_stride = info.width * 3;
245         _decoded_stride += 32 - (_decoded_stride % 32);
246         cudaError e = cudaMallocHost (&_decoded, _decoded_stride * info.height);
247         if (e != cudaSuccess) {
248                 throw bad_alloc ();
249         }
250 }