+ OPJ_BOOL ret;
+ ret = opj_sparse_array_int32_read(sa,
+ win_l_x0, sa_line,
+ win_l_x1, sa_line + 1,
+ dest + cas + 2 * win_l_x0,
+ 2, 0, OPJ_TRUE);
+ assert(ret);
+ ret = opj_sparse_array_int32_read(sa,
+ sn + win_h_x0, sa_line,
+ sn + win_h_x1, sa_line + 1,
+ dest + 1 - cas + 2 * win_h_x0,
+ 2, 0, OPJ_TRUE);
+ assert(ret);
+ OPJ_UNUSED(ret);
+}
+
+
+static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest,
+ OPJ_INT32 cas,
+ opj_sparse_array_int32_t* sa,
+ OPJ_UINT32 sa_col,
+ OPJ_UINT32 nb_cols,
+ OPJ_UINT32 sn,
+ OPJ_UINT32 win_l_y0,
+ OPJ_UINT32 win_l_y1,
+ OPJ_UINT32 win_h_y0,
+ OPJ_UINT32 win_h_y1)
+{
+ OPJ_BOOL ret;
+ ret = opj_sparse_array_int32_read(sa,
+ sa_col, win_l_y0,
+ sa_col + nb_cols, win_l_y1,
+ dest + cas * 4 + 2 * 4 * win_l_y0,
+ 1, 2 * 4, OPJ_TRUE);
+ assert(ret);
+ ret = opj_sparse_array_int32_read(sa,
+ sa_col, sn + win_h_y0,
+ sa_col + nb_cols, sn + win_h_y1,
+ dest + (1 - cas) * 4 + 2 * 4 * win_h_y0,
+ 1, 2 * 4, OPJ_TRUE);
+ assert(ret);
+ OPJ_UNUSED(ret);
+}
+
+static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+ OPJ_INT32 cas,
+ OPJ_INT32 win_l_x0,
+ OPJ_INT32 win_l_x1,
+ OPJ_INT32 win_h_x0,
+ OPJ_INT32 win_h_x1)
+{
+ OPJ_INT32 i;
+
+ if (!cas) {
+ if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */
+
+ /* Naive version is :
+ for (i = win_l_x0; i < i_max; i++) {
+ OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
+ }
+ for (i = win_h_x0; i < win_h_x1; i++) {
+ OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
+ }
+ but the compiler doesn't manage to unroll it to avoid bound
+ checking in OPJ_S_ and OPJ_D_ macros
+ */
+
+ i = win_l_x0;
+ if (i < win_l_x1) {
+ OPJ_INT32 i_max;
+
+ /* Left-most case */
+ OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
+ i ++;
+
+ i_max = win_l_x1;
+ if (i_max > dn) {
+ i_max = dn;
+ }
+ for (; i < i_max; i++) {
+ /* No bound checking */
+ OPJ_S(i) -= (OPJ_D(i - 1) + OPJ_D(i) + 2) >> 2;
+ }
+ for (; i < win_l_x1; i++) {
+ /* Right-most case */
+ OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
+ }
+ }
+
+ i = win_h_x0;
+ if (i < win_h_x1) {
+ OPJ_INT32 i_max = win_h_x1;
+ if (i_max >= sn) {
+ i_max = sn - 1;
+ }
+ for (; i < i_max; i++) {
+ /* No bound checking */
+ OPJ_D(i) += (OPJ_S(i) + OPJ_S(i + 1)) >> 1;
+ }
+ for (; i < win_h_x1; i++) {
+ /* Right-most case */
+ OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
+ }
+ }
+ }
+ } else {
+ if (!sn && dn == 1) { /* NEW : CASE ONE ELEMENT */
+ OPJ_S(0) /= 2;
+ } else {
+ for (i = win_l_x0; i < win_l_x1; i++) {
+ OPJ_D(i) = opj_int_sub_no_overflow(OPJ_D(i),
+ opj_int_add_no_overflow(opj_int_add_no_overflow(OPJ_SS_(i), OPJ_SS_(i + 1)),
+ 2) >> 2);
+ }
+ for (i = win_h_x0; i < win_h_x1; i++) {
+ OPJ_S(i) = opj_int_add_no_overflow(OPJ_S(i),
+ opj_int_add_no_overflow(OPJ_DD_(i), OPJ_DD_(i - 1)) >> 1);
+ }
+ }
+ }
+}
+
+#define OPJ_S_off(i,off) a[(OPJ_UINT32)(i)*2*4+off]
+#define OPJ_D_off(i,off) a[(1+(OPJ_UINT32)(i)*2)*4+off]
+#define OPJ_S__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=sn?OPJ_S_off(sn-1,off):OPJ_S_off(i,off)))
+#define OPJ_D__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=dn?OPJ_D_off(dn-1,off):OPJ_D_off(i,off)))
+#define OPJ_SS__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=dn?OPJ_S_off(dn-1,off):OPJ_S_off(i,off)))
+#define OPJ_DD__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=sn?OPJ_D_off(sn-1,off):OPJ_D_off(i,off)))
+
+static void opj_dwt_decode_partial_1_parallel(OPJ_INT32 *a,
+ OPJ_UINT32 nb_cols,
+ OPJ_INT32 dn, OPJ_INT32 sn,
+ OPJ_INT32 cas,
+ OPJ_INT32 win_l_x0,
+ OPJ_INT32 win_l_x1,
+ OPJ_INT32 win_h_x0,
+ OPJ_INT32 win_h_x1)
+{
+ OPJ_INT32 i;
+ OPJ_UINT32 off;
+
+ (void)nb_cols;
+
+ if (!cas) {
+ if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */
+
+ /* Naive version is :
+ for (i = win_l_x0; i < i_max; i++) {
+ OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
+ }
+ for (i = win_h_x0; i < win_h_x1; i++) {
+ OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
+ }
+ but the compiler doesn't manage to unroll it to avoid bound
+ checking in OPJ_S_ and OPJ_D_ macros
+ */
+
+ i = win_l_x0;
+ if (i < win_l_x1) {
+ OPJ_INT32 i_max;
+
+ /* Left-most case */
+ for (off = 0; off < 4; off++) {
+ OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2;
+ }
+ i ++;
+
+ i_max = win_l_x1;
+ if (i_max > dn) {
+ i_max = dn;
+ }
+
+#ifdef __SSE2__
+ if (i + 1 < i_max) {
+ const __m128i two = _mm_set1_epi32(2);
+ __m128i Dm1 = _mm_load_si128((__m128i * const)(a + 4 + (i - 1) * 8));
+ for (; i + 1 < i_max; i += 2) {
+ /* No bound checking */
+ __m128i S = _mm_load_si128((__m128i * const)(a + i * 8));
+ __m128i D = _mm_load_si128((__m128i * const)(a + 4 + i * 8));
+ __m128i S1 = _mm_load_si128((__m128i * const)(a + (i + 1) * 8));
+ __m128i D1 = _mm_load_si128((__m128i * const)(a + 4 + (i + 1) * 8));
+ S = _mm_sub_epi32(S,
+ _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(Dm1, D), two), 2));
+ S1 = _mm_sub_epi32(S1,
+ _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(D, D1), two), 2));
+ _mm_store_si128((__m128i*)(a + i * 8), S);
+ _mm_store_si128((__m128i*)(a + (i + 1) * 8), S1);
+ Dm1 = D1;
+ }
+ }
+#endif
+
+ for (; i < i_max; i++) {
+ /* No bound checking */
+ for (off = 0; off < 4; off++) {
+ OPJ_S_off(i, off) -= (OPJ_D_off(i - 1, off) + OPJ_D_off(i, off) + 2) >> 2;
+ }
+ }
+ for (; i < win_l_x1; i++) {
+ /* Right-most case */
+ for (off = 0; off < 4; off++) {
+ OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2;
+ }
+ }
+ }
+
+ i = win_h_x0;
+ if (i < win_h_x1) {
+ OPJ_INT32 i_max = win_h_x1;
+ if (i_max >= sn) {
+ i_max = sn - 1;
+ }
+
+#ifdef __SSE2__
+ if (i + 1 < i_max) {
+ __m128i S = _mm_load_si128((__m128i * const)(a + i * 8));
+ for (; i + 1 < i_max; i += 2) {
+ /* No bound checking */
+ __m128i D = _mm_load_si128((__m128i * const)(a + 4 + i * 8));
+ __m128i S1 = _mm_load_si128((__m128i * const)(a + (i + 1) * 8));
+ __m128i D1 = _mm_load_si128((__m128i * const)(a + 4 + (i + 1) * 8));
+ __m128i S2 = _mm_load_si128((__m128i * const)(a + (i + 2) * 8));
+ D = _mm_add_epi32(D, _mm_srai_epi32(_mm_add_epi32(S, S1), 1));
+ D1 = _mm_add_epi32(D1, _mm_srai_epi32(_mm_add_epi32(S1, S2), 1));
+ _mm_store_si128((__m128i*)(a + 4 + i * 8), D);
+ _mm_store_si128((__m128i*)(a + 4 + (i + 1) * 8), D1);
+ S = S2;
+ }
+ }
+#endif
+
+ for (; i < i_max; i++) {
+ /* No bound checking */
+ for (off = 0; off < 4; off++) {
+ OPJ_D_off(i, off) += (OPJ_S_off(i, off) + OPJ_S_off(i + 1, off)) >> 1;
+ }
+ }
+ for (; i < win_h_x1; i++) {
+ /* Right-most case */
+ for (off = 0; off < 4; off++) {
+ OPJ_D_off(i, off) += (OPJ_S__off(i, off) + OPJ_S__off(i + 1, off)) >> 1;
+ }
+ }
+ }
+ }
+ } else {
+ if (!sn && dn == 1) { /* NEW : CASE ONE ELEMENT */
+ for (off = 0; off < 4; off++) {
+ OPJ_S_off(0, off) /= 2;
+ }
+ } else {
+ for (i = win_l_x0; i < win_l_x1; i++) {
+ for (off = 0; off < 4; off++) {
+ OPJ_D_off(i, off) = opj_int_sub_no_overflow(
+ OPJ_D_off(i, off),
+ opj_int_add_no_overflow(
+ opj_int_add_no_overflow(OPJ_SS__off(i, off), OPJ_SS__off(i + 1, off)), 2) >> 2);
+ }
+ }
+ for (i = win_h_x0; i < win_h_x1; i++) {
+ for (off = 0; off < 4; off++) {
+ OPJ_S_off(i, off) = opj_int_add_no_overflow(
+ OPJ_S_off(i, off),
+ opj_int_add_no_overflow(OPJ_DD__off(i, off), OPJ_DD__off(i - 1, off)) >> 1);
+ }
+ }
+ }
+ }
+}
+
+static void opj_dwt_get_band_coordinates(opj_tcd_tilecomp_t* tilec,
+ OPJ_UINT32 resno,
+ OPJ_UINT32 bandno,
+ OPJ_UINT32 tcx0,
+ OPJ_UINT32 tcy0,
+ OPJ_UINT32 tcx1,
+ OPJ_UINT32 tcy1,
+ OPJ_UINT32* tbx0,
+ OPJ_UINT32* tby0,
+ OPJ_UINT32* tbx1,
+ OPJ_UINT32* tby1)
+{
+ /* Compute number of decomposition for this band. See table F-1 */
+ OPJ_UINT32 nb = (resno == 0) ?
+ tilec->numresolutions - 1 :
+ tilec->numresolutions - resno;
+ /* Map above tile-based coordinates to sub-band-based coordinates per */
+ /* equation B-15 of the standard */
+ OPJ_UINT32 x0b = bandno & 1;
+ OPJ_UINT32 y0b = bandno >> 1;
+ if (tbx0) {
+ *tbx0 = (nb == 0) ? tcx0 :
+ (tcx0 <= (1U << (nb - 1)) * x0b) ? 0 :
+ opj_uint_ceildivpow2(tcx0 - (1U << (nb - 1)) * x0b, nb);
+ }
+ if (tby0) {
+ *tby0 = (nb == 0) ? tcy0 :
+ (tcy0 <= (1U << (nb - 1)) * y0b) ? 0 :
+ opj_uint_ceildivpow2(tcy0 - (1U << (nb - 1)) * y0b, nb);
+ }
+ if (tbx1) {
+ *tbx1 = (nb == 0) ? tcx1 :
+ (tcx1 <= (1U << (nb - 1)) * x0b) ? 0 :
+ opj_uint_ceildivpow2(tcx1 - (1U << (nb - 1)) * x0b, nb);
+ }
+ if (tby1) {
+ *tby1 = (nb == 0) ? tcy1 :
+ (tcy1 <= (1U << (nb - 1)) * y0b) ? 0 :
+ opj_uint_ceildivpow2(tcy1 - (1U << (nb - 1)) * y0b, nb);
+ }
+}
+
+static void opj_dwt_segment_grow(OPJ_UINT32 filter_width,
+ OPJ_UINT32 max_size,
+ OPJ_UINT32* start,
+ OPJ_UINT32* end)
+{
+ *start = opj_uint_subs(*start, filter_width);
+ *end = opj_uint_adds(*end, filter_width);
+ *end = opj_uint_min(*end, max_size);
+}
+
+
+static opj_sparse_array_int32_t* opj_dwt_init_sparse_array(
+ opj_tcd_tilecomp_t* tilec,
+ OPJ_UINT32 numres)
+{
+ opj_tcd_resolution_t* tr_max = &(tilec->resolutions[numres - 1]);
+ OPJ_UINT32 w = (OPJ_UINT32)(tr_max->x1 - tr_max->x0);
+ OPJ_UINT32 h = (OPJ_UINT32)(tr_max->y1 - tr_max->y0);
+ OPJ_UINT32 resno, bandno, precno, cblkno;
+ opj_sparse_array_int32_t* sa = opj_sparse_array_int32_create(
+ w, h, opj_uint_min(w, 64), opj_uint_min(h, 64));
+ if (sa == NULL) {
+ return NULL;
+ }
+
+ for (resno = 0; resno < numres; ++resno) {
+ opj_tcd_resolution_t* res = &tilec->resolutions[resno];
+
+ for (bandno = 0; bandno < res->numbands; ++bandno) {
+ opj_tcd_band_t* band = &res->bands[bandno];
+
+ for (precno = 0; precno < res->pw * res->ph; ++precno) {
+ opj_tcd_precinct_t* precinct = &band->precincts[precno];
+ for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
+ opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
+ if (cblk->decoded_data != NULL) {
+ OPJ_UINT32 x = (OPJ_UINT32)(cblk->x0 - band->x0);
+ OPJ_UINT32 y = (OPJ_UINT32)(cblk->y0 - band->y0);
+ OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
+ OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
+
+ if (band->bandno & 1) {
+ opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
+ x += (OPJ_UINT32)(pres->x1 - pres->x0);
+ }
+ if (band->bandno & 2) {
+ opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
+ y += (OPJ_UINT32)(pres->y1 - pres->y0);
+ }
+
+ if (!opj_sparse_array_int32_write(sa, x, y,
+ x + cblk_w, y + cblk_h,
+ cblk->decoded_data,
+ 1, cblk_w, OPJ_TRUE)) {
+ opj_sparse_array_int32_free(sa);
+ return NULL;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return sa;
+}
+
+
+static OPJ_BOOL opj_dwt_decode_partial_tile(
+ opj_tcd_tilecomp_t* tilec,
+ OPJ_UINT32 numres)
+{
+ opj_sparse_array_int32_t* sa;
+ opj_dwt_t h;
+ opj_dwt_t v;
+ OPJ_UINT32 resno;
+ /* This value matches the maximum left/right extension given in tables */
+ /* F.2 and F.3 of the standard. */
+ const OPJ_UINT32 filter_width = 2U;
+
+ opj_tcd_resolution_t* tr = tilec->resolutions;
+ opj_tcd_resolution_t* tr_max = &(tilec->resolutions[numres - 1]);
+
+ OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 -
+ tr->x0); /* width of the resolution level computed */
+ OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 -
+ tr->y0); /* height of the resolution level computed */
+
+ OPJ_SIZE_T h_mem_size;
+
+ /* Compute the intersection of the area of interest, expressed in tile coordinates */
+ /* with the tile coordinates */
+ OPJ_UINT32 win_tcx0 = tilec->win_x0;
+ OPJ_UINT32 win_tcy0 = tilec->win_y0;
+ OPJ_UINT32 win_tcx1 = tilec->win_x1;
+ OPJ_UINT32 win_tcy1 = tilec->win_y1;
+
+ if (tr_max->x0 == tr_max->x1 || tr_max->y0 == tr_max->y1) {
+ return OPJ_TRUE;
+ }
+
+ sa = opj_dwt_init_sparse_array(tilec, numres);
+ if (sa == NULL) {
+ return OPJ_FALSE;
+ }
+
+ if (numres == 1U) {
+ OPJ_BOOL ret = opj_sparse_array_int32_read(sa,
+ tr_max->win_x0 - (OPJ_UINT32)tr_max->x0,
+ tr_max->win_y0 - (OPJ_UINT32)tr_max->y0,
+ tr_max->win_x1 - (OPJ_UINT32)tr_max->x0,
+ tr_max->win_y1 - (OPJ_UINT32)tr_max->y0,
+ tilec->data_win,
+ 1, tr_max->win_x1 - tr_max->win_x0,
+ OPJ_TRUE);
+ assert(ret);
+ OPJ_UNUSED(ret);
+ opj_sparse_array_int32_free(sa);
+ return OPJ_TRUE;
+ }
+ h_mem_size = opj_dwt_max_resolution(tr, numres);
+ /* overflow check */
+ /* in vertical pass, we process 4 columns at a time */
+ if (h_mem_size > (SIZE_MAX / (4 * sizeof(OPJ_INT32)))) {
+ /* FIXME event manager error callback */
+ opj_sparse_array_int32_free(sa);
+ return OPJ_FALSE;
+ }
+
+ h_mem_size *= 4 * sizeof(OPJ_INT32);
+ h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size);
+ if (! h.mem) {
+ /* FIXME event manager error callback */
+ opj_sparse_array_int32_free(sa);
+ return OPJ_FALSE;
+ }
+
+ v.mem = h.mem;
+
+ for (resno = 1; resno < numres; resno ++) {
+ OPJ_UINT32 i, j;
+ /* Window of interest subband-based coordinates */
+ OPJ_UINT32 win_ll_x0, win_ll_y0, win_ll_x1, win_ll_y1;
+ OPJ_UINT32 win_hl_x0, win_hl_x1;
+ OPJ_UINT32 win_lh_y0, win_lh_y1;
+ /* Window of interest tile-resolution-based coordinates */
+ OPJ_UINT32 win_tr_x0, win_tr_x1, win_tr_y0, win_tr_y1;
+ /* Tile-resolution subband-based coordinates */
+ OPJ_UINT32 tr_ll_x0, tr_ll_y0, tr_hl_x0, tr_lh_y0;
+
+ ++tr;
+
+ h.sn = (OPJ_INT32)rw;
+ v.sn = (OPJ_INT32)rh;
+
+ rw = (OPJ_UINT32)(tr->x1 - tr->x0);
+ rh = (OPJ_UINT32)(tr->y1 - tr->y0);
+
+ h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn);
+ h.cas = tr->x0 % 2;
+
+ v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn);
+ v.cas = tr->y0 % 2;
+
+ /* Get the subband coordinates for the window of interest */
+ /* LL band */
+ opj_dwt_get_band_coordinates(tilec, resno, 0,
+ win_tcx0, win_tcy0, win_tcx1, win_tcy1,
+ &win_ll_x0, &win_ll_y0,
+ &win_ll_x1, &win_ll_y1);
+
+ /* HL band */
+ opj_dwt_get_band_coordinates(tilec, resno, 1,
+ win_tcx0, win_tcy0, win_tcx1, win_tcy1,
+ &win_hl_x0, NULL, &win_hl_x1, NULL);
+
+ /* LH band */
+ opj_dwt_get_band_coordinates(tilec, resno, 2,
+ win_tcx0, win_tcy0, win_tcx1, win_tcy1,
+ NULL, &win_lh_y0, NULL, &win_lh_y1);
+
+ /* Beware: band index for non-LL0 resolution are 0=HL, 1=LH and 2=HH */
+ tr_ll_x0 = (OPJ_UINT32)tr->bands[1].x0;
+ tr_ll_y0 = (OPJ_UINT32)tr->bands[0].y0;
+ tr_hl_x0 = (OPJ_UINT32)tr->bands[0].x0;
+ tr_lh_y0 = (OPJ_UINT32)tr->bands[1].y0;
+
+ /* Subtract the origin of the bands for this tile, to the subwindow */
+ /* of interest band coordinates, so as to get them relative to the */
+ /* tile */
+ win_ll_x0 = opj_uint_subs(win_ll_x0, tr_ll_x0);
+ win_ll_y0 = opj_uint_subs(win_ll_y0, tr_ll_y0);
+ win_ll_x1 = opj_uint_subs(win_ll_x1, tr_ll_x0);
+ win_ll_y1 = opj_uint_subs(win_ll_y1, tr_ll_y0);
+ win_hl_x0 = opj_uint_subs(win_hl_x0, tr_hl_x0);
+ win_hl_x1 = opj_uint_subs(win_hl_x1, tr_hl_x0);
+ win_lh_y0 = opj_uint_subs(win_lh_y0, tr_lh_y0);
+ win_lh_y1 = opj_uint_subs(win_lh_y1, tr_lh_y0);
+
+ opj_dwt_segment_grow(filter_width, (OPJ_UINT32)h.sn, &win_ll_x0, &win_ll_x1);
+ opj_dwt_segment_grow(filter_width, (OPJ_UINT32)h.dn, &win_hl_x0, &win_hl_x1);
+
+ opj_dwt_segment_grow(filter_width, (OPJ_UINT32)v.sn, &win_ll_y0, &win_ll_y1);
+ opj_dwt_segment_grow(filter_width, (OPJ_UINT32)v.dn, &win_lh_y0, &win_lh_y1);
+
+ /* Compute the tile-resolution-based coordinates for the window of interest */
+ if (h.cas == 0) {
+ win_tr_x0 = opj_uint_min(2 * win_ll_x0, 2 * win_hl_x0 + 1);
+ win_tr_x1 = opj_uint_min(opj_uint_max(2 * win_ll_x1, 2 * win_hl_x1 + 1), rw);
+ } else {
+ win_tr_x0 = opj_uint_min(2 * win_hl_x0, 2 * win_ll_x0 + 1);
+ win_tr_x1 = opj_uint_min(opj_uint_max(2 * win_hl_x1, 2 * win_ll_x1 + 1), rw);
+ }
+
+ if (v.cas == 0) {
+ win_tr_y0 = opj_uint_min(2 * win_ll_y0, 2 * win_lh_y0 + 1);
+ win_tr_y1 = opj_uint_min(opj_uint_max(2 * win_ll_y1, 2 * win_lh_y1 + 1), rh);
+ } else {
+ win_tr_y0 = opj_uint_min(2 * win_lh_y0, 2 * win_ll_y0 + 1);
+ win_tr_y1 = opj_uint_min(opj_uint_max(2 * win_lh_y1, 2 * win_ll_y1 + 1), rh);
+ }
+
+ for (j = 0; j < rh; ++j) {
+ if ((j >= win_ll_y0 && j < win_ll_y1) ||
+ (j >= win_lh_y0 + (OPJ_UINT32)v.sn && j < win_lh_y1 + (OPJ_UINT32)v.sn)) {
+
+ /* Avoids dwt.c:1584:44 (in opj_dwt_decode_partial_1): runtime error: */
+ /* signed integer overflow: -1094795586 + -1094795586 cannot be represented in type 'int' */
+ /* on opj_decompress -i ../../openjpeg/MAPA.jp2 -o out.tif -d 0,0,256,256 */
+ /* This is less extreme than memsetting the whole buffer to 0 */
+ /* although we could potentially do better with better handling of edge conditions */
+ if (win_tr_x1 >= 1 && win_tr_x1 < rw) {
+ h.mem[win_tr_x1 - 1] = 0;
+ }
+ if (win_tr_x1 < rw) {
+ h.mem[win_tr_x1] = 0;
+ }
+
+ opj_dwt_interleave_partial_h(h.mem,
+ h.cas,
+ sa,
+ j,
+ (OPJ_UINT32)h.sn,
+ win_ll_x0,
+ win_ll_x1,
+ win_hl_x0,
+ win_hl_x1);
+ opj_dwt_decode_partial_1(h.mem, h.dn, h.sn, h.cas,
+ (OPJ_INT32)win_ll_x0,
+ (OPJ_INT32)win_ll_x1,
+ (OPJ_INT32)win_hl_x0,
+ (OPJ_INT32)win_hl_x1);
+ if (!opj_sparse_array_int32_write(sa,
+ win_tr_x0, j,
+ win_tr_x1, j + 1,
+ h.mem + win_tr_x0,
+ 1, 0, OPJ_TRUE)) {
+ /* FIXME event manager error callback */
+ opj_sparse_array_int32_free(sa);
+ opj_aligned_free(h.mem);
+ return OPJ_FALSE;
+ }
+ }
+ }
+
+ for (i = win_tr_x0; i < win_tr_x1;) {
+ OPJ_UINT32 nb_cols = opj_uint_min(4U, win_tr_x1 - i);
+ opj_dwt_interleave_partial_v(v.mem,
+ v.cas,
+ sa,
+ i,
+ nb_cols,
+ (OPJ_UINT32)v.sn,
+ win_ll_y0,
+ win_ll_y1,
+ win_lh_y0,
+ win_lh_y1);
+ opj_dwt_decode_partial_1_parallel(v.mem, nb_cols, v.dn, v.sn, v.cas,
+ (OPJ_INT32)win_ll_y0,
+ (OPJ_INT32)win_ll_y1,
+ (OPJ_INT32)win_lh_y0,
+ (OPJ_INT32)win_lh_y1);
+ if (!opj_sparse_array_int32_write(sa,
+ i, win_tr_y0,
+ i + nb_cols, win_tr_y1,
+ v.mem + 4 * win_tr_y0,
+ 1, 4, OPJ_TRUE)) {
+ /* FIXME event manager error callback */
+ opj_sparse_array_int32_free(sa);
+ opj_aligned_free(h.mem);
+ return OPJ_FALSE;
+ }
+
+ i += nb_cols;
+ }
+ }
+ opj_aligned_free(h.mem);
+
+ {
+ OPJ_BOOL ret = opj_sparse_array_int32_read(sa,
+ tr_max->win_x0 - (OPJ_UINT32)tr_max->x0,
+ tr_max->win_y0 - (OPJ_UINT32)tr_max->y0,
+ tr_max->win_x1 - (OPJ_UINT32)tr_max->x0,
+ tr_max->win_y1 - (OPJ_UINT32)tr_max->y0,
+ tilec->data_win,
+ 1, tr_max->win_x1 - tr_max->win_x0,
+ OPJ_TRUE);
+ assert(ret);
+ OPJ_UNUSED(ret);
+ }
+ opj_sparse_array_int32_free(sa);
+ return OPJ_TRUE;
+}
+
+static void opj_v8dwt_interleave_h(opj_v8dwt_t* OPJ_RESTRICT dwt,
+ OPJ_FLOAT32* OPJ_RESTRICT a,
+ OPJ_UINT32 width,
+ OPJ_UINT32 remaining_height)
+{
+ OPJ_FLOAT32* OPJ_RESTRICT bi = (OPJ_FLOAT32*)(dwt->wavelet + dwt->cas);
+ OPJ_UINT32 i, k;
+ OPJ_UINT32 x0 = dwt->win_l_x0;
+ OPJ_UINT32 x1 = dwt->win_l_x1;