Use thread pool for DWT decoding
authorEven Rouault <even.rouault@spatialys.com>
Wed, 25 May 2016 16:07:15 +0000 (18:07 +0200)
committerEven Rouault <even.rouault@spatialys.com>
Wed, 25 May 2016 19:02:07 +0000 (21:02 +0200)
src/lib/openjp2/dwt.c
src/lib/openjp2/dwt.h
src/lib/openjp2/tcd.c

index a4ff01ba8f9883f06fd99f00300a39c5a63fd1a0..e21cc16fcfa74c7ba7d8437fa8c1e8e0d53b9daf 100644 (file)
@@ -124,7 +124,7 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_st
 /**
 Inverse wavelet transform in 2-D.
 */
-static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT1DFN fn);
+static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT1DFN fn);
 
 static OPJ_BOOL opj_dwt_encode_procedure(      opj_tcd_tilecomp_t * tilec,
                                                                                    void (*p_function)(OPJ_INT32 *, OPJ_INT32,OPJ_INT32,OPJ_INT32) );
@@ -473,8 +473,8 @@ OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec)
 /* <summary>                            */
 /* Inverse 5-3 wavelet transform in 2-D. */
 /* </summary>                           */
-OPJ_BOOL opj_dwt_decode(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) {
-       return opj_dwt_decode_tile(tilec, numres, &opj_dwt_decode_1);
+OPJ_BOOL opj_dwt_decode(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) {
+       return opj_dwt_decode_tile(tp, tilec, numres, &opj_dwt_decode_1);
 }
 
 
@@ -556,10 +556,72 @@ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_U
        return mr ;
 }
 
+typedef struct
+{
+    opj_dwt_t h;
+    DWT1DFN dwt_1D;
+    OPJ_UINT32 rw;
+    OPJ_UINT32 w;
+    OPJ_INT32 * restrict tiledp;
+    int min_j;
+    int max_j;
+} opj_dwd_decode_h_job_t;
+
+static void opj_dwt_decode_h_func(void* user_data, opj_tls_t* tls)
+{
+    int j;
+    opj_dwd_decode_h_job_t* job;
+    (void)tls;
+
+    job = (opj_dwd_decode_h_job_t*)user_data;
+    for( j = job->min_j; j < job->max_j; j++ )
+    {
+          opj_dwt_interleave_h(&job->h, &job->tiledp[j*job->w]);
+          (job->dwt_1D)(&job->h);
+          memcpy(&job->tiledp[j*job->w], job->h.mem, job->rw * sizeof(OPJ_INT32));
+    }
+
+    opj_aligned_free(job->h.mem);
+    opj_free(job);
+}
+
+typedef struct
+{
+    opj_dwt_t v;
+    DWT1DFN dwt_1D;
+    OPJ_UINT32 rh;
+    OPJ_UINT32 w;
+    OPJ_INT32 * restrict tiledp;
+    int min_j;
+    int max_j;
+} opj_dwd_decode_v_job_t;
+
+static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
+{
+    int j;
+    opj_dwd_decode_v_job_t* job;
+    (void)tls;
+
+    job = (opj_dwd_decode_v_job_t*)user_data;
+    for( j = job->min_j; j < job->max_j; j++ )
+    {
+        OPJ_UINT32 k;
+        opj_dwt_interleave_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w);
+        (job->dwt_1D)(&job->v);
+        for(k = 0; k < job->rh; ++k) {
+            job->tiledp[k * job->w + j] = job->v.mem[k];
+        }
+    }
+
+    opj_aligned_free(job->v.mem);
+    opj_free(job);
+}
+
+
 /* <summary>                            */
 /* Inverse wavelet transform in 2-D.     */
 /* </summary>                           */
-static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) {
+static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) {
        opj_dwt_t h;
        opj_dwt_t v;
 
@@ -569,11 +631,15 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres
        OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0);  /* height of the resolution level computed */
 
        OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
+    size_t h_mem_size;
+    int num_threads;
        
        if (numres == 1U) {
                return OPJ_TRUE;
        }
-       h.mem = (OPJ_INT32*)opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32));
+       num_threads = opj_thread_pool_get_thread_count(tp);
+       h_mem_size = opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32);
+       h.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size);
        if (! h.mem){
                /* FIXME event manager error callback */
                return OPJ_FALSE;
@@ -595,23 +661,93 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres
                h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn);
                h.cas = tr->x0 % 2;
 
-               for(j = 0; j < rh; ++j) {
-                       opj_dwt_interleave_h(&h, &tiledp[j*w]);
-                       (dwt_1D)(&h);
-                       memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32));
-               }
+        if( num_threads <= 1 || rh == 1 )
+        {
+            for(j = 0; j < rh; ++j) {
+                opj_dwt_interleave_h(&h, &tiledp[j*w]);
+                (dwt_1D)(&h);
+                memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32));
+            }
+        }
+        else
+        {
+            int num_jobs = num_threads;
+            if( rh < num_jobs )
+                num_jobs = rh;
+            for( j = 0; j < num_jobs; j++ )
+            {
+                opj_dwd_decode_h_job_t* job;
+
+                job = (opj_dwd_decode_h_job_t*) opj_malloc(sizeof(opj_dwd_decode_h_job_t));
+                job->h = h;
+                job->dwt_1D = dwt_1D;
+                job->rw = rw;
+                job->w = w;
+                job->tiledp = tiledp;
+                job->min_j = j * (rh / num_jobs);
+                job->max_j = (j+1) * (rh / num_jobs);
+                if( job->max_j > rh || j == num_jobs - 1 )
+                    job->max_j = rh;
+                job->h.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size);
+                if (!job->h.mem)
+                {
+                    /* FIXME event manager error callback */
+                    opj_thread_pool_wait_completion(tp, 0);
+                    opj_free(job);
+                    opj_aligned_free(h.mem);
+                    return OPJ_FALSE;
+                }
+                opj_thread_pool_submit_job( tp, opj_dwt_decode_h_func, job );
+            }
+            opj_thread_pool_wait_completion(tp, 0);
+        }
 
                v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn);
                v.cas = tr->y0 % 2;
 
-               for(j = 0; j < rw; ++j){
-                       OPJ_UINT32 k;
-                       opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w);
-                       (dwt_1D)(&v);
-                       for(k = 0; k < rh; ++k) {
-                               tiledp[k * w + j] = v.mem[k];
-                       }
-               }
+        if( num_threads <= 1 || rw == 1 )
+        {
+            for(j = 0; j < rw; ++j){
+                OPJ_UINT32 k;
+                opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w);
+                (dwt_1D)(&v);
+                for(k = 0; k < rh; ++k) {
+                    tiledp[k * w + j] = v.mem[k];
+                }
+            }
+        }
+        else
+        {
+            int num_jobs = num_threads;
+            if( rw < num_jobs )
+                num_jobs = rw;
+            for( j = 0; j < num_jobs; j++ )
+            {
+                opj_dwd_decode_v_job_t* job;
+
+                job = (opj_dwd_decode_v_job_t*) opj_malloc(sizeof(opj_dwd_decode_v_job_t));
+                job->v = v;
+                job->dwt_1D = dwt_1D;
+                job->rh = rh;
+                job->w = w;
+                job->tiledp = tiledp;
+                job->min_j = j * (rw / num_jobs);
+                job->max_j = (j+1) * (rw / num_jobs);
+                if( job->max_j > rw || j == num_jobs - 1 )
+                    job->max_j = rw;
+                job->v.mem = (OPJ_INT32*)opj_aligned_malloc(h_mem_size);
+                if (!job->v.mem)
+                {
+                    /* FIXME event manager error callback */
+                    opj_thread_pool_wait_completion(tp, 0);
+                    opj_free(job);
+                    opj_aligned_free(v.mem);
+                    return OPJ_FALSE;
+                }
+                opj_thread_pool_submit_job( tp, opj_dwt_decode_v_func, job );
+            }
+            opj_thread_pool_wait_completion(tp, 0);
+        }
        }
        opj_aligned_free(h.mem);
        return OPJ_TRUE;
index 21fe942a984e5c3b6568025fa1051f3bec677757..9385002602112bcc017eefb5e886e47eafd49f5d 100644 (file)
@@ -63,10 +63,11 @@ OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec);
 /**
 Inverse 5-3 wavelet transform in 2-D.
 Apply a reversible inverse DWT transform to a component of an image.
+@param tp Thread pool
 @param tilec Tile component information (current tile)
 @param numres Number of resolution levels to decode
 */
-OPJ_BOOL opj_dwt_decode(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres);
+OPJ_BOOL opj_dwt_decode(opj_thread_pool_t* tp, opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres);
 
 /**
 Get the gain of a subband for the reversible 5-3 DWT.
index a34fa18c75a459970b4e69c262ec7856924c5ccc..2980f723601c2718031c597bfc5a9d8626df9ff8 100644 (file)
@@ -1610,7 +1610,7 @@ static OPJ_BOOL opj_tcd_dwt_decode ( opj_tcd_t *p_tcd )
                 */
 
                 if (l_tccp->qmfbid == 1) {
-                        if (! opj_dwt_decode(l_tile_comp, l_img_comp->resno_decoded+1)) {
+                        if (! opj_dwt_decode(p_tcd->thread_pool, l_tile_comp, l_img_comp->resno_decoded+1)) {
                                 return OPJ_FALSE;
                         }
                 }