use tolerance to bail out early from rate control : much faster
[openjpeg.git] / src / lib / openjp2 / tcd.c
index 2189dcc37fde540e691c083dd78e038ded317543..b3e789e3353602f8347c76e323b400356c5f69b1 100644 (file)
@@ -14,6 +14,7 @@
  * Copyright (c) 2006-2007, Parvatha Elangovan
  * Copyright (c) 2008, 2011-2012, Centre National d'Etudes Spatiales (CNES), FR
  * Copyright (c) 2012, CS Systemes d'Information, France
+ * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -180,12 +181,14 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd,
                                   OPJ_BYTE * p_dest_data,
                                   OPJ_UINT32 * p_data_written,
                                   OPJ_UINT32 p_max_dest_size,
-                                  opj_codestream_info_t *p_cstr_info);
+                                  opj_codestream_info_t *p_cstr_info,
+                                  opj_event_mgr_t *p_manager);
 
 static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd,
         OPJ_BYTE * p_dest_data,
         OPJ_UINT32 p_max_dest_size,
-        opj_codestream_info_t *p_cstr_info);
+        opj_codestream_info_t *p_cstr_info,
+        opj_event_mgr_t *p_manager);
 
 /* ----------------------------------------------------------------------- */
 
@@ -266,28 +269,33 @@ void opj_tcd_makelayer(opj_tcd_t *tcd,
 
                         n = cblk->numpassesinlayers;
 
-                        for (passno = cblk->numpassesinlayers; passno < cblk->totalpasses; passno++) {
-                            OPJ_UINT32 dr;
-                            OPJ_FLOAT64 dd;
-                            opj_tcd_pass_t *pass = &cblk->passes[passno];
-
-                            if (n == 0) {
-                                dr = pass->rate;
-                                dd = pass->distortiondec;
-                            } else {
-                                dr = pass->rate - cblk->passes[n - 1].rate;
-                                dd = pass->distortiondec - cblk->passes[n - 1].distortiondec;
-                            }
+                        if (thresh < 0) {
+                            /* Special value to indicate to use all passes */
+                            n = cblk->totalpasses;
+                        } else {
+                            for (passno = cblk->numpassesinlayers; passno < cblk->totalpasses; passno++) {
+                                OPJ_UINT32 dr;
+                                OPJ_FLOAT64 dd;
+                                opj_tcd_pass_t *pass = &cblk->passes[passno];
+
+                                if (n == 0) {
+                                    dr = pass->rate;
+                                    dd = pass->distortiondec;
+                                } else {
+                                    dr = pass->rate - cblk->passes[n - 1].rate;
+                                    dd = pass->distortiondec - cblk->passes[n - 1].distortiondec;
+                                }
 
-                            if (!dr) {
-                                if (dd != 0) {
+                                if (!dr) {
+                                    if (dd != 0) {
+                                        n = passno + 1;
+                                    }
+                                    continue;
+                                }
+                                if (thresh - (dd / dr) <
+                                        DBL_EPSILON) { /* do not rely on float equality, check with DBL_EPSILON margin */
                                     n = passno + 1;
                                 }
-                                continue;
-                            }
-                            if (thresh - (dd / dr) <
-                                    DBL_EPSILON) { /* do not rely on float equality, check with DBL_EPSILON margin */
-                                n = passno + 1;
                             }
                         }
 
@@ -431,7 +439,8 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
                               OPJ_BYTE *dest,
                               OPJ_UINT32 * p_data_written,
                               OPJ_UINT32 len,
-                              opj_codestream_info_t *cstr_info)
+                              opj_codestream_info_t *cstr_info,
+                              opj_event_mgr_t *p_manager)
 {
     OPJ_UINT32 compno, resno, bandno, precno, cblkno, layno;
     OPJ_UINT32 passno;
@@ -530,6 +539,8 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
                                 OPJ_UINT32) ceil(tcd_tcp->rates[layno])), len) : len;
         OPJ_FLOAT64 goodthresh = 0;
         OPJ_FLOAT64 stable_thresh = 0;
+               OPJ_FLOAT64 old_thresh = -1;
+               const OPJ_FLOAT64 tolerance = 0.01;
         OPJ_UINT32 i;
         OPJ_FLOAT64 distotarget;                /* fixed_quality */
 
@@ -558,12 +569,15 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
                 thresh = (lo + hi) / 2;
 
                 opj_tcd_makelayer(tcd, layno, thresh, 0);
+                               if ((fabs(old_thresh - thresh)) < tolerance)
+                                       break;
+                               old_thresh = thresh;
 
                 if (cp->m_specific_param.m_enc.m_fixed_quality) {       /* fixed_quality */
                     if (OPJ_IS_CINEMA(cp->rsiz)) {
                         if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
                                                     p_data_written, maxlen, cstr_info, tcd->cur_tp_num, tcd->tp_pos, tcd->cur_pino,
-                                                    THRESH_CALC)) {
+                                                    THRESH_CALC, p_manager)) {
 
                             lo = thresh;
                             continue;
@@ -593,7 +607,7 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
                 } else {
                     if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
                                                 p_data_written, maxlen, cstr_info, tcd->cur_tp_num, tcd->tp_pos, tcd->cur_pino,
-                                                THRESH_CALC)) {
+                                                THRESH_CALC, p_manager)) {
                         /* TODO: what to do with l ??? seek / tell ??? */
                         /* opj_event_msg(tcd->cinfo, EVT_INFO, "rate alloc: len=%d, max=%d\n", l, maxlen); */
                         lo = thresh;
@@ -609,7 +623,8 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
 
             opj_t2_destroy(t2);
         } else {
-            goodthresh = min;
+            /* Special value to indicate to use all passes */
+            goodthresh = -1;
         }
 
         if (cstr_info) { /* Threshold for Marcela Index */
@@ -787,6 +802,7 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
         l_tilec->y0 = opj_int_ceildiv(l_tile->y0, (OPJ_INT32)l_image_comp->dy);
         l_tilec->x1 = opj_int_ceildiv(l_tile->x1, (OPJ_INT32)l_image_comp->dx);
         l_tilec->y1 = opj_int_ceildiv(l_tile->y1, (OPJ_INT32)l_image_comp->dy);
+        l_tilec->compno = compno;
         /*fprintf(stderr, "\tTile compo border = %d,%d,%d,%d\n", l_tilec->x0, l_tilec->y0,l_tilec->x1,l_tilec->y1);*/
 
         /* compute l_data_size with overflow check */
@@ -965,8 +981,10 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
                 numbps = (OPJ_INT32)(l_image_comp->prec + l_gain);
                 l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0,
                                                   (OPJ_INT32)(numbps - l_step_size->expn)))) * fraction;
+                /* Mb value of Equation E-2 in "E.1 Inverse quantization
+                 * procedure" of the standard */
                 l_band->numbps = l_step_size->expn + (OPJ_INT32)l_tccp->numgbits -
-                                 1;      /* WHY -1 ? */
+                                 1;
 
                 if (!l_band->precincts && (l_nb_precincts > 0U)) {
                     l_band->precincts = (opj_tcd_precinct_t *) opj_malloc(/*3 * */
@@ -1182,8 +1200,11 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t *
 {
     OPJ_UINT32 l_data_size;
 
-    /* The +1 is needed for https://github.com/uclouvain/openjpeg/issues/835 */
-    l_data_size = 1 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) *
+    /* +1 is needed for https://github.com/uclouvain/openjpeg/issues/835 */
+    /* and actually +2 required for https://github.com/uclouvain/openjpeg/issues/982 */
+    /* TODO: is there a theoretical upper-bound for the compressed code */
+    /* block size ? */
+    l_data_size = 2 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) *
                                    (p_code_block->y1 - p_code_block->y0) * (OPJ_INT32)sizeof(OPJ_UINT32));
 
     if (l_data_size > p_code_block->data_size) {
@@ -1208,20 +1229,19 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t *
     return OPJ_TRUE;
 }
 
+
+void opj_tcd_reinit_segment(opj_tcd_seg_t* seg)
+{
+    memset(seg, 0, sizeof(opj_tcd_seg_t));
+}
+
 /**
  * Allocates memory for a decoding code block.
  */
 static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t *
         p_code_block)
 {
-    if (! p_code_block->data) {
-
-        p_code_block->data = (OPJ_BYTE*) opj_malloc(OPJ_COMMON_DEFAULT_CBLK_DATA_SIZE);
-        if (! p_code_block->data) {
-            return OPJ_FALSE;
-        }
-        p_code_block->data_max_size = OPJ_COMMON_DEFAULT_CBLK_DATA_SIZE;
-        /*fprintf(stderr, "Allocate 8192 elements of code_block->data\n");*/
+    if (! p_code_block->segs) {
 
         p_code_block->segs = (opj_tcd_seg_t *) opj_calloc(OPJ_J2K_DEFAULT_NB_SEGS,
                              sizeof(opj_tcd_seg_t));
@@ -1234,16 +1254,20 @@ static OPJ_BOOL opj_tcd_code_block_dec_allocate(opj_tcd_cblk_dec_t *
         /*fprintf(stderr, "m_current_max_segs of code_block->data = %d\n", p_code_block->m_current_max_segs);*/
     } else {
         /* sanitize */
-        OPJ_BYTE* l_data = p_code_block->data;
-        OPJ_UINT32 l_data_max_size = p_code_block->data_max_size;
         opj_tcd_seg_t * l_segs = p_code_block->segs;
         OPJ_UINT32 l_current_max_segs = p_code_block->m_current_max_segs;
+        opj_tcd_seg_data_chunk_t* l_chunks = p_code_block->chunks;
+        OPJ_UINT32 l_numchunksalloc = p_code_block->numchunksalloc;
+        OPJ_UINT32 i;
 
         memset(p_code_block, 0, sizeof(opj_tcd_cblk_dec_t));
-        p_code_block->data = l_data;
-        p_code_block->data_max_size = l_data_max_size;
         p_code_block->segs = l_segs;
         p_code_block->m_current_max_segs = l_current_max_segs;
+        for (i = 0; i < l_current_max_segs; ++i) {
+            opj_tcd_reinit_segment(&l_segs[i]);
+        }
+        p_code_block->chunks = l_chunks;
+        p_code_block->numchunksalloc = l_numchunksalloc;
     }
 
     return OPJ_TRUE;
@@ -1298,7 +1322,8 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd,
                              OPJ_BYTE *p_dest,
                              OPJ_UINT32 * p_data_written,
                              OPJ_UINT32 p_max_length,
-                             opj_codestream_info_t *p_cstr_info)
+                             opj_codestream_info_t *p_cstr_info,
+                             opj_event_mgr_t *p_manager)
 {
 
     if (p_tcd->cur_tp_num == 0) {
@@ -1360,7 +1385,8 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd,
         /* FIXME _ProfStop(PGROUP_T1); */
 
         /* FIXME _ProfStart(PGROUP_RATE); */
-        if (! opj_tcd_rate_allocate_encode(p_tcd, p_dest, p_max_length, p_cstr_info)) {
+        if (! opj_tcd_rate_allocate_encode(p_tcd, p_dest, p_max_length,
+                                           p_cstr_info, p_manager)) {
             return OPJ_FALSE;
         }
         /* FIXME _ProfStop(PGROUP_RATE); */
@@ -1375,7 +1401,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd,
     /* FIXME _ProfStart(PGROUP_T2); */
 
     if (! opj_tcd_t2_encode(p_tcd, p_dest, p_data_written, p_max_length,
-                            p_cstr_info)) {
+                            p_cstr_info, p_manager)) {
         return OPJ_FALSE;
     }
     /* FIXME _ProfStop(PGROUP_T2); */
@@ -1386,6 +1412,10 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd,
 }
 
 OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd,
+                             OPJ_UINT32 decoded_x0,
+                             OPJ_UINT32 decoded_y0,
+                             OPJ_UINT32 decoded_x1,
+                             OPJ_UINT32 decoded_y1,
                              OPJ_BYTE *p_src,
                              OPJ_UINT32 p_max_length,
                              OPJ_UINT32 p_tile_no,
@@ -1396,6 +1426,10 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd,
     OPJ_UINT32 l_data_read;
     p_tcd->tcd_tileno = p_tile_no;
     p_tcd->tcp = &(p_tcd->cp->tcps[p_tile_no]);
+    p_tcd->decoded_x0 = decoded_x0;
+    p_tcd->decoded_y0 = decoded_y0;
+    p_tcd->decoded_x1 = decoded_x1;
+    p_tcd->decoded_y1 = decoded_y1;
 
 #ifdef TODO_MSD /* FIXME */
     /* INDEX >>  */
@@ -1677,6 +1711,7 @@ static OPJ_BOOL opj_tcd_t2_decode(opj_tcd_t *p_tcd,
     }
 
     if (! opj_t2_decode_packets(
+                p_tcd,
                 l_t2,
                 p_tcd->tcd_tileno,
                 p_tcd->tcd_image->tiles,
@@ -1714,7 +1749,7 @@ static OPJ_BOOL opj_tcd_t1_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager)
     }
 
     for (compno = 0; compno < l_tile->numcomps; ++compno) {
-        opj_t1_decode_cblks(p_tcd->thread_pool, &ret, l_tile_comp, l_tccp,
+        opj_t1_decode_cblks(p_tcd, &ret, l_tile_comp, l_tccp,
                             p_manager, p_manager_mutex, check_pterm);
         if (!ret) {
             break;
@@ -1754,12 +1789,13 @@ static OPJ_BOOL opj_tcd_dwt_decode(opj_tcd_t *p_tcd)
         */
 
         if (l_tccp->qmfbid == 1) {
-            if (! opj_dwt_decode(p_tcd->thread_pool, l_tile_comp,
+            if (! opj_dwt_decode(p_tcd, l_tile_comp,
                                  l_img_comp->resno_decoded + 1)) {
                 return OPJ_FALSE;
             }
         } else {
-            if (! opj_dwt_decode_real(l_tile_comp, l_img_comp->resno_decoded + 1)) {
+            if (! opj_dwt_decode_real(p_tcd, l_tile_comp,
+                                      l_img_comp->resno_decoded + 1)) {
                 return OPJ_FALSE;
             }
         }
@@ -1946,16 +1982,16 @@ static void opj_tcd_code_block_dec_deallocate(opj_tcd_precinct_t * p_precinct)
 
         for (cblkno = 0; cblkno < l_nb_code_blocks; ++cblkno) {
 
-            if (l_code_block->data) {
-                opj_free(l_code_block->data);
-                l_code_block->data = 00;
-            }
-
             if (l_code_block->segs) {
                 opj_free(l_code_block->segs);
                 l_code_block->segs = 00;
             }
 
+            if (l_code_block->chunks) {
+                opj_free(l_code_block->chunks);
+                l_code_block->chunks = 00;
+            }
+
             ++l_code_block;
         }
 
@@ -2191,7 +2227,8 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd,
                                   OPJ_BYTE * p_dest_data,
                                   OPJ_UINT32 * p_data_written,
                                   OPJ_UINT32 p_max_dest_size,
-                                  opj_codestream_info_t *p_cstr_info)
+                                  opj_codestream_info_t *p_cstr_info,
+                                  opj_event_mgr_t *p_manager)
 {
     opj_t2_t * l_t2;
 
@@ -2212,7 +2249,8 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd,
                 p_tcd->tp_num,
                 p_tcd->tp_pos,
                 p_tcd->cur_pino,
-                FINAL_PASS)) {
+                FINAL_PASS,
+                p_manager)) {
         opj_t2_destroy(l_t2);
         return OPJ_FALSE;
     }
@@ -2227,7 +2265,8 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd,
 static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd,
         OPJ_BYTE * p_dest_data,
         OPJ_UINT32 p_max_dest_size,
-        opj_codestream_info_t *p_cstr_info)
+        opj_codestream_info_t *p_cstr_info,
+        opj_event_mgr_t *p_manager)
 {
     opj_cp_t * l_cp = p_tcd->cp;
     OPJ_UINT32 l_nb_written = 0;
@@ -2241,7 +2280,7 @@ static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd,
         /* fixed_quality */
         /* Normal Rate/distortion allocation */
         if (! opj_tcd_rateallocate(p_tcd, p_dest_data, &l_nb_written, p_max_dest_size,
-                                   p_cstr_info)) {
+                                   p_cstr_info, p_manager)) {
             return OPJ_FALSE;
         }
     } else {
@@ -2343,3 +2382,84 @@ OPJ_BOOL opj_tcd_is_band_empty(opj_tcd_band_t* band)
 {
     return (band->x1 - band->x0 == 0) || (band->y1 - band->y0 == 0);
 }
+
+OPJ_BOOL opj_tcd_is_subband_area_of_interest(opj_tcd_t *tcd,
+        OPJ_UINT32 compno,
+        OPJ_UINT32 resno,
+        OPJ_UINT32 bandno,
+        OPJ_UINT32 band_x0,
+        OPJ_UINT32 band_y0,
+        OPJ_UINT32 band_x1,
+        OPJ_UINT32 band_y1)
+{
+    /* Note: those values for filter_margin are in part the result of */
+    /* experimentation. The value 2 for QMFBID=1 (5x3 filter) can be linked */
+    /* to the maximum left/right extension given in tables F.2 and F.3 of the */
+    /* standard. The value 3 for QMFBID=0 (9x7 filter) is more suspicious, */
+    /* since F.2 and F.3 would lead to 4 instead, so the current 3 might be */
+    /* needed to be bumped to 4, in case inconsistencies are found while */
+    /* decoding parts of irreversible coded images. */
+    /* See opj_dwt_decode_partial_53 and opj_dwt_decode_partial_97 as well */
+    OPJ_UINT32 filter_margin = (tcd->tcp->tccps[compno].qmfbid == 1) ? 2 : 3;
+    opj_tcd_tilecomp_t *tilec = &(tcd->tcd_image->tiles->comps[compno]);
+    opj_image_comp_t* image_comp = &(tcd->image->comps[compno]);
+    /* Compute the intersection of the area of interest, expressed in tile coordinates */
+    /* with the tile coordinates */
+    OPJ_UINT32 tcx0 = opj_uint_max(
+                          (OPJ_UINT32)tilec->x0,
+                          opj_uint_ceildiv(tcd->decoded_x0, image_comp->dx));
+    OPJ_UINT32 tcy0 = opj_uint_max(
+                          (OPJ_UINT32)tilec->y0,
+                          opj_uint_ceildiv(tcd->decoded_y0, image_comp->dy));
+    OPJ_UINT32 tcx1 = opj_uint_min(
+                          (OPJ_UINT32)tilec->x1,
+                          opj_uint_ceildiv(tcd->decoded_x1, image_comp->dx));
+    OPJ_UINT32 tcy1 = opj_uint_min(
+                          (OPJ_UINT32)tilec->y1,
+                          opj_uint_ceildiv(tcd->decoded_y1, image_comp->dy));
+    /* Compute number of decomposition for this band. See table F-1 */
+    OPJ_UINT32 nb = (resno == 0) ?
+                    tilec->numresolutions - 1 :
+                    tilec->numresolutions - resno;
+    /* Map above tile-based coordinates to sub-band-based coordinates per */
+    /* equation B-15 of the standard */
+    OPJ_UINT32 x0b = bandno & 1;
+    OPJ_UINT32 y0b = bandno >> 1;
+    OPJ_UINT32 tbx0 = (nb == 0) ? tcx0 :
+                      (tcx0 <= (1U << (nb - 1)) * x0b) ? 0 :
+                      opj_uint_ceildivpow2(tcx0 - (1U << (nb - 1)) * x0b, nb);
+    OPJ_UINT32 tby0 = (nb == 0) ? tcy0 :
+                      (tcy0 <= (1U << (nb - 1)) * y0b) ? 0 :
+                      opj_uint_ceildivpow2(tcy0 - (1U << (nb - 1)) * y0b, nb);
+    OPJ_UINT32 tbx1 = (nb == 0) ? tcx1 :
+                      (tcx1 <= (1U << (nb - 1)) * x0b) ? 0 :
+                      opj_uint_ceildivpow2(tcx1 - (1U << (nb - 1)) * x0b, nb);
+    OPJ_UINT32 tby1 = (nb == 0) ? tcy1 :
+                      (tcy1 <= (1U << (nb - 1)) * y0b) ? 0 :
+                      opj_uint_ceildivpow2(tcy1 - (1U << (nb - 1)) * y0b, nb);
+    OPJ_BOOL intersects;
+
+    if (tbx0 < filter_margin) {
+        tbx0 = 0;
+    } else {
+        tbx0 -= filter_margin;
+    }
+    if (tby0 < filter_margin) {
+        tby0 = 0;
+    } else {
+        tby0 -= filter_margin;
+    }
+    tbx1 = opj_uint_adds(tbx1, filter_margin);
+    tby1 = opj_uint_adds(tby1, filter_margin);
+
+    intersects = band_x0 < tbx1 && band_y0 < tby1 && band_x1 > tbx0 &&
+                 band_y1 > tby0;
+
+#ifdef DEBUG_VERBOSE
+    printf("compno=%u resno=%u nb=%u bandno=%u x0b=%u y0b=%u band=%u,%u,%u,%u tb=%u,%u,%u,%u -> %u\n",
+           compno, resno, nb, bandno, x0b, y0b,
+           band_x0, band_y0, band_x1, band_y1,
+           tbx0, tby0, tbx1, tby1, intersects);
+#endif
+    return intersects;
+}