From f9e9942330f476b66ac4a35d0ae521200878f343 Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Fri, 1 Sep 2017 16:30:29 +0200
Subject: Sub-tile decoding: only allocate tile component buffer of the needed
 dimension

Instead of being the full tile size.

* Use a sparse array mechanism to store code-blocks and intermediate stages of
  IDWT.
* IDWT, DC level shift and MCT stages are done just on that smaller array.
* Improve copy of tile component array to final image, by saving an intermediate
  buffer.
* For full-tile decoding at reduced resolution, only allocate the tile buffer to
  the reduced size, instead of the full-resolution size.
---
 src/lib/openjp2/sparse_array.c | 233 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 233 insertions(+)
 create mode 100644 src/lib/openjp2/sparse_array.c

(limited to 'src/lib/openjp2/sparse_array.c')

diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c
new file mode 100644
index 00000000..fb552f8b
--- /dev/null
+++ b/src/lib/openjp2/sparse_array.c
@@ -0,0 +1,233 @@
+/*
+ * The copyright in this software is being made available under the 2-clauses
+ * BSD License, included below. This software may be subject to other third
+ * party and contributor rights, including patent rights, and no such rights
+ * are granted under this license.
+ *
+ * Copyright (c) 2017, IntoPix SA <contact@intopix.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opj_includes.h"
+
+
+struct opj_sparse_array_int32 {
+    OPJ_UINT32 width;
+    OPJ_UINT32 height;
+    OPJ_UINT32 block_width;
+    OPJ_UINT32 block_height;
+    OPJ_UINT32 block_count_hor;
+    OPJ_UINT32 block_count_ver;
+    OPJ_INT32** data_blocks;
+};
+
+opj_sparse_array_int32_t* opj_sparse_array_int32_create(OPJ_UINT32 width,
+        OPJ_UINT32 height,
+        OPJ_UINT32 block_width,
+        OPJ_UINT32 block_height)
+{
+    opj_sparse_array_int32_t* sa;
+
+    if (width == 0 || height == 0 || block_width == 0 || block_height == 0) {
+        return NULL;
+    }
+    if (block_width > ((OPJ_UINT32)~0U) / block_height / sizeof(OPJ_INT32)) {
+        return NULL;
+    }
+
+    sa = opj_calloc(1, sizeof(opj_sparse_array_int32_t));
+    sa->width = width;
+    sa->height = height;
+    sa->block_width = block_width;
+    sa->block_height = block_height;
+    sa->block_count_hor = opj_uint_ceildiv(width, block_width);
+    sa->block_count_ver = opj_uint_ceildiv(height, block_height);
+    if (sa->block_count_hor > ((OPJ_UINT32)~0U) / sa->block_count_ver) {
+        opj_free(sa);
+        return NULL;
+    }
+    sa->data_blocks = opj_calloc(sizeof(OPJ_INT32*),
+                                 sa->block_count_hor * sa->block_count_ver);
+    if (sa->data_blocks == NULL) {
+        opj_free(sa);
+        return NULL;
+    }
+
+    return sa;
+}
+
+void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa)
+{
+    if (sa) {
+        OPJ_UINT32 i;
+        for (i = 0; i < sa->block_count_hor * sa->block_count_ver; i++) {
+            if (sa->data_blocks[i]) {
+                opj_free(sa->data_blocks[i]);
+            }
+        }
+        opj_free(sa->data_blocks);
+        opj_free(sa);
+    }
+}
+
+OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa,
+        OPJ_UINT32 x0,
+        OPJ_UINT32 y0,
+        OPJ_UINT32 x1,
+        OPJ_UINT32 y1)
+{
+    return !(x0 >= sa->width || x1 <= x0 || x1 > sa->width ||
+             y0 >= sa->height || y1 <= y0 || y1 > sa->height);
+}
+
+static OPJ_BOOL opj_sparse_array_int32_read_or_write(
+    opj_sparse_array_int32_t* sa,
+    OPJ_UINT32 x0,
+    OPJ_UINT32 y0,
+    OPJ_UINT32 x1,
+    OPJ_UINT32 y1,
+    OPJ_INT32* buf,
+    OPJ_UINT32 buf_col_stride,
+    OPJ_UINT32 buf_line_stride,
+    OPJ_BOOL forgiving,
+    OPJ_BOOL is_read_op)
+{
+    OPJ_UINT32 y, block_y;
+    OPJ_UINT32 y_incr = 0;
+    if (!opj_sparse_array_is_region_valid(sa, x0, y0, x1, y1)) {
+        return forgiving;
+    }
+
+    block_y = y0 / sa->block_height;
+    for (y = y0; y < y1; block_y ++, y += y_incr) {
+        OPJ_UINT32 x, block_x;
+        OPJ_UINT32 x_incr = 0;
+        OPJ_UINT32 block_y_offset;
+        y_incr = (y == y0) ? sa->block_height - (y0 % sa->block_height) :
+                 sa->block_height;
+        block_y_offset = sa->block_height - y_incr;
+        y_incr = opj_uint_min(y_incr, y1 - y);
+        block_x = x0 / sa->block_width;
+        for (x = x0; x < x1; block_x ++, x += x_incr) {
+            OPJ_UINT32 j;
+            OPJ_UINT32 block_x_offset;
+            OPJ_INT32* src_block;
+            x_incr = (x == x0) ? sa->block_width - (x0 % sa->block_width) : sa->block_width;
+            block_x_offset = sa->block_width - x_incr;
+            x_incr = opj_uint_min(x_incr, x1 - x);
+            src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x];
+            if (is_read_op) {
+                if (src_block == NULL) {
+                    for (j = 0; j < y_incr; j++) {
+                        if (buf_col_stride == 1) {
+                            memset(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride,
+                                   0,
+                                   sizeof(OPJ_INT32) * x_incr);
+                        } else {
+                            OPJ_UINT32 k;
+                            for (k = 0; k < x_incr; k++) {
+                                *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) = 0;
+                            }
+                        }
+                    }
+                } else {
+                    for (j = 0; j < y_incr; j++) {
+                        if (buf_col_stride == 1) {
+                            memcpy(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride,
+                                   src_block + (block_y_offset + j) * sa->block_width + block_x_offset,
+                                   sizeof(OPJ_INT32) * x_incr);
+                        } else {
+                            OPJ_UINT32 k;
+                            for (k = 0; k < x_incr; k++) {
+                                *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) =
+                                    *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k);
+                            }
+                        }
+                    }
+                }
+            } else {
+                if (src_block == NULL) {
+                    src_block = opj_calloc(1,
+                                           sa->block_width * sa->block_height * sizeof(OPJ_INT32));
+                    if (src_block == NULL) {
+                        return OPJ_FALSE;
+                    }
+                    sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block;
+                }
+
+                for (j = 0; j < y_incr; j++) {
+                    if (buf_col_stride == 1) {
+                        memcpy(src_block + (block_y_offset + j) * sa->block_width + block_x_offset,
+                               buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride,
+                               sizeof(OPJ_INT32) * x_incr);
+                    } else {
+                        OPJ_UINT32 k;
+                        for (k = 0; k < x_incr; k++) {
+                            *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k) =
+                                *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return OPJ_TRUE;
+}
+
+OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa,
+                                     OPJ_UINT32 x0,
+                                     OPJ_UINT32 y0,
+                                     OPJ_UINT32 x1,
+                                     OPJ_UINT32 y1,
+                                     OPJ_INT32* dest,
+                                     OPJ_UINT32 dest_col_stride,
+                                     OPJ_UINT32 dest_line_stride,
+                                     OPJ_BOOL forgiving)
+{
+    return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1,
+            dest,
+            dest_col_stride,
+            dest_line_stride,
+            forgiving,
+            OPJ_TRUE);
+}
+
+OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa,
+                                      OPJ_UINT32 x0,
+                                      OPJ_UINT32 y0,
+                                      OPJ_UINT32 x1,
+                                      OPJ_UINT32 y1,
+                                      const OPJ_INT32* src,
+                                      OPJ_UINT32 src_col_stride,
+                                      OPJ_UINT32 src_line_stride,
+                                      OPJ_BOOL forgiving)
+{
+    return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1,
+            (OPJ_INT32*)src,
+            src_col_stride,
+            src_line_stride,
+            forgiving,
+            OPJ_FALSE);
+}
-- 
cgit v1.2.3


From 98b93103613c90753fb6c57696f9403f8ea0b1d6 Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Fri, 1 Sep 2017 16:30:44 +0200
Subject: Various changes to allow tile buffers of more than 4giga pixels

Untested though, since that means a tile buffer of at least 16 GB. So
there might be places where uint32 overflow on multiplication still occur...
---
 src/lib/openjp2/dwt.c          | 94 ++++++++++++++++++++++--------------------
 src/lib/openjp2/j2k.c          |  2 +-
 src/lib/openjp2/mct.c          | 22 +++++-----
 src/lib/openjp2/mct.h          | 12 +++---
 src/lib/openjp2/sparse_array.c | 25 ++++++-----
 src/lib/openjp2/t1.c           | 23 ++++++-----
 src/lib/openjp2/tcd.c          | 43 ++++++++++---------
 src/lib/openjp2/tcd.h          |  4 +-
 8 files changed, 119 insertions(+), 106 deletions(-)

(limited to 'src/lib/openjp2/sparse_array.c')

diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c
index 81a8150a..d0cd3b4e 100644
--- a/src/lib/openjp2/dwt.c
+++ b/src/lib/openjp2/dwt.c
@@ -279,7 +279,7 @@ static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
         l_src += 2;
     } /* b[i*x]=a[2*i+cas]; */
 
-    l_dest = b + sn * x;
+    l_dest = b + (size_t)sn * (size_t)x;
     l_src = a + 1 - cas;
 
     i = dn;
@@ -325,7 +325,7 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
         bi += 2;
         ai += x;
     }
-    ai = a + (v->sn * x);
+    ai = a + (v->sn * (size_t)x);
     bi = v->mem + 1 - v->cas;
     i = v->dn ;
     while (i--) {
@@ -616,7 +616,7 @@ static
 void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col,
                                const OPJ_INT32* tmp,
                                OPJ_INT32 len,
-                               OPJ_INT32 stride)
+                               size_t stride)
 {
     OPJ_INT32 i;
     for (i = 0; i < len; ++i) {
@@ -625,9 +625,9 @@ void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col,
                     PARALLEL_COLS_53 * sizeof(OPJ_INT32))
            would do but would be a tiny bit slower.
            We can take here advantage of our knowledge of alignment */
-        STOREU(&tiledp_col[i * stride + 0],
+        STOREU(&tiledp_col[(size_t)i * stride + 0],
                LOAD(&tmp[PARALLEL_COLS_53 * i + 0]));
-        STOREU(&tiledp_col[i * stride + VREG_INT_COUNT],
+        STOREU(&tiledp_col[(size_t)i * stride + VREG_INT_COUNT],
                LOAD(&tmp[PARALLEL_COLS_53 * i + VREG_INT_COUNT]));
     }
 }
@@ -639,12 +639,13 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
     const OPJ_INT32 sn,
     const OPJ_INT32 len,
     OPJ_INT32* tiledp_col,
-    const OPJ_INT32 stride)
+    const size_t stride)
 {
     const OPJ_INT32* in_even = &tiledp_col[0];
-    const OPJ_INT32* in_odd = &tiledp_col[sn * stride];
+    const OPJ_INT32* in_odd = &tiledp_col[(size_t)sn * stride];
 
-    OPJ_INT32 i, j;
+    OPJ_INT32 i;
+    size_t j;
     VREG d1c_0, d1n_0, s1n_0, s0c_0, s0n_0;
     VREG d1c_1, d1n_1, s1n_1, s0c_1, s0n_1;
     const VREG two = LOAD_CST(2);
@@ -703,7 +704,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
 
     if (len & 1) {
         VREG tmp_len_minus_1;
-        s1n_0 = LOADU(in_even + ((len - 1) / 2) * stride);
+        s1n_0 = LOADU(in_even + (size_t)((len - 1) / 2) * stride);
         /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */
         tmp_len_minus_1 = SUB(s1n_0, SAR(ADD3(d1n_0, d1n_0, two), 2));
         STORE(tmp + PARALLEL_COLS_53 * (len - 1), tmp_len_minus_1);
@@ -711,7 +712,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
         STORE(tmp + PARALLEL_COLS_53 * (len - 2),
               ADD(d1n_0, SAR(ADD(s0n_0, tmp_len_minus_1), 1)));
 
-        s1n_1 = LOADU(in_even + ((len - 1) / 2) * stride + VREG_INT_COUNT);
+        s1n_1 = LOADU(in_even + (size_t)((len - 1) / 2) * stride + VREG_INT_COUNT);
         /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */
         tmp_len_minus_1 = SUB(s1n_1, SAR(ADD3(d1n_1, d1n_1, two), 2));
         STORE(tmp + PARALLEL_COLS_53 * (len - 1) + VREG_INT_COUNT,
@@ -739,15 +740,16 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2(
     const OPJ_INT32 sn,
     const OPJ_INT32 len,
     OPJ_INT32* tiledp_col,
-    const OPJ_INT32 stride)
+    const size_t stride)
 {
-    OPJ_INT32 i, j;
+    OPJ_INT32 i;
+    size_t j;
 
     VREG s1_0, s2_0, dc_0, dn_0;
     VREG s1_1, s2_1, dc_1, dn_1;
     const VREG two = LOAD_CST(2);
 
-    const OPJ_INT32* in_even = &tiledp_col[sn * stride];
+    const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride];
     const OPJ_INT32* in_odd = &tiledp_col[0];
 
     assert(len > 2);
@@ -807,9 +809,9 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2(
 
     if (!(len & 1)) {
         /*dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1); */
-        dn_0 = SUB(LOADU(in_odd + (len / 2 - 1) * stride),
+        dn_0 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride),
                    SAR(ADD3(s1_0, s1_0, two), 2));
-        dn_1 = SUB(LOADU(in_odd + (len / 2 - 1) * stride + VREG_INT_COUNT),
+        dn_1 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride + VREG_INT_COUNT),
                    SAR(ADD3(s1_1, s1_1, two), 2));
 
         /* tmp[len - 2] = s1 + ((dn + dc) >> 1); */
@@ -849,7 +851,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
                              const OPJ_INT32 sn,
                              const OPJ_INT32 len,
                              OPJ_INT32* tiledp_col,
-                             const OPJ_INT32 stride)
+                             const size_t stride)
 {
     OPJ_INT32 i, j;
     OPJ_INT32 d1c, d1n, s1n, s0c, s0n;
@@ -860,15 +862,15 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
     /* accesses and explicit interleaving. */
 
     s1n = tiledp_col[0];
-    d1n = tiledp_col[sn * stride];
+    d1n = tiledp_col[(size_t)sn * stride];
     s0n = s1n - ((d1n + 1) >> 1);
 
     for (i = 0, j = 0; i < (len - 3); i += 2, j++) {
         d1c = d1n;
         s0c = s0n;
 
-        s1n = tiledp_col[(j + 1) * stride];
-        d1n = tiledp_col[(sn + j + 1) * stride];
+        s1n = tiledp_col[(size_t)(j + 1) * stride];
+        d1n = tiledp_col[(size_t)(sn + j + 1) * stride];
 
         s0n = s1n - ((d1c + d1n + 2) >> 2);
 
@@ -880,7 +882,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
 
     if (len & 1) {
         tmp[len - 1] =
-            tiledp_col[((len - 1) / 2) * stride] -
+            tiledp_col[(size_t)((len - 1) / 2) * stride] -
             ((d1n + 1) >> 1);
         tmp[len - 2] = d1n + ((s0n + tmp[len - 1]) >> 1);
     } else {
@@ -888,7 +890,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
     }
 
     for (i = 0; i < len; ++i) {
-        tiledp_col[i * stride] = tmp[i];
+        tiledp_col[(size_t)i * stride] = tmp[i];
     }
 }
 
@@ -899,11 +901,11 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
                              const OPJ_INT32 sn,
                              const OPJ_INT32 len,
                              OPJ_INT32* tiledp_col,
-                             const OPJ_INT32 stride)
+                             const size_t stride)
 {
     OPJ_INT32 i, j;
     OPJ_INT32 s1, s2, dc, dn;
-    const OPJ_INT32* in_even = &tiledp_col[sn * stride];
+    const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride];
     const OPJ_INT32* in_odd = &tiledp_col[0];
 
     assert(len > 2);
@@ -916,9 +918,9 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
     tmp[0] = in_even[0] + dc;
     for (i = 1, j = 1; i < (len - 2 - !(len & 1)); i += 2, j++) {
 
-        s2 = in_even[(j + 1) * stride];
+        s2 = in_even[(size_t)(j + 1) * stride];
 
-        dn = in_odd[j * stride] - ((s1 + s2 + 2) >> 2);
+        dn = in_odd[(size_t)j * stride] - ((s1 + s2 + 2) >> 2);
         tmp[i  ] = dc;
         tmp[i + 1] = s1 + ((dn + dc) >> 1);
 
@@ -927,7 +929,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
     }
     tmp[i] = dc;
     if (!(len & 1)) {
-        dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1);
+        dn = in_odd[(size_t)(len / 2 - 1) * stride] - ((s1 + 1) >> 1);
         tmp[len - 2] = s1 + ((dn + dc) >> 1);
         tmp[len - 1] = dn;
     } else {
@@ -935,7 +937,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
     }
 
     for (i = 0; i < len; ++i) {
-        tiledp_col[i * stride] = tmp[i];
+        tiledp_col[(size_t)i * stride] = tmp[i];
     }
 }
 #endif /* !defined(STANDARD_SLOW_VERSION) */
@@ -946,7 +948,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
 /* Performs interleave, inverse wavelet transform and copy back to buffer */
 static void opj_idwt53_v(const opj_dwt_t *dwt,
                          OPJ_INT32* tiledp_col,
-                         OPJ_INT32 stride,
+                         size_t stride,
                          OPJ_INT32 nb_cols)
 {
 #ifdef STANDARD_SLOW_VERSION
@@ -994,14 +996,14 @@ static void opj_idwt53_v(const opj_dwt_t *dwt,
             OPJ_INT32* out = dwt->mem;
             for (c = 0; c < nb_cols; c++, tiledp_col++) {
                 OPJ_INT32 i;
-                const OPJ_INT32* in_even = &tiledp_col[sn * stride];
+                const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride];
                 const OPJ_INT32* in_odd = &tiledp_col[0];
 
                 out[1] = in_odd[0] - ((in_even[0] + 1) >> 1);
                 out[0] = in_even[0] + out[1];
 
                 for (i = 0; i < len; ++i) {
-                    tiledp_col[i * stride] = out[i];
+                    tiledp_col[(size_t)i * stride] = out[i];
                 }
             }
 
@@ -1341,11 +1343,11 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
     job = (opj_dwd_decode_v_job_t*)user_data;
     for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j;
             j += PARALLEL_COLS_53) {
-        opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w,
+        opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w,
                      PARALLEL_COLS_53);
     }
     if (j < job->max_j)
-        opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w,
+        opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w,
                      (OPJ_INT32)(job->max_j - j));
 
     opj_aligned_free(job->v.mem);
@@ -1413,7 +1415,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
 
         if (num_threads <= 1 || rh <= 1) {
             for (j = 0; j < rh; ++j) {
-                opj_idwt53_h(&h, &tiledp[j * w]);
+                opj_idwt53_h(&h, &tiledp[(size_t)j * w]);
             }
         } else {
             OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
@@ -1465,10 +1467,10 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
         if (num_threads <= 1 || rw <= 1) {
             for (j = 0; j + PARALLEL_COLS_53 <= rw;
                     j += PARALLEL_COLS_53) {
-                opj_idwt53_v(&v, &tiledp[j], (OPJ_INT32)w, PARALLEL_COLS_53);
+                opj_idwt53_v(&v, &tiledp[j], (size_t)w, PARALLEL_COLS_53);
             }
             if (j < rw) {
-                opj_idwt53_v(&v, &tiledp[j], (OPJ_INT32)w, (OPJ_INT32)(rw - j));
+                opj_idwt53_v(&v, &tiledp[j], (size_t)w, (OPJ_INT32)(rw - j));
             }
         } else {
             OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
@@ -2035,14 +2037,16 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt,
     OPJ_UINT32 i;
 
     for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) {
-        memcpy(&bi[i * 2], &a[i * width], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
+        memcpy(&bi[i * 2], &a[i * (size_t)width],
+               (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
     }
 
-    a += (OPJ_UINT32)dwt->sn * width;
+    a += (OPJ_UINT32)dwt->sn * (size_t)width;
     bi = dwt->wavelet + 1 - dwt->cas;
 
     for (i = dwt->win_h_x0; i < dwt->win_h_x1; ++i) {
-        memcpy(&bi[i * 2], &a[i * width], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
+        memcpy(&bi[i * 2], &a[i * (size_t)width],
+               (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
     }
 }
 
@@ -2325,9 +2329,9 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
 
             for (k = 0; k < rw; k++) {
                 aj[k      ] = h.wavelet[k].f[0];
-                aj[k + w  ] = h.wavelet[k].f[1];
-                aj[k + w * 2] = h.wavelet[k].f[2];
-                aj[k + w * 3] = h.wavelet[k].f[3];
+                aj[k + (size_t)w  ] = h.wavelet[k].f[1];
+                aj[k + (size_t)w * 2] = h.wavelet[k].f[2];
+                aj[k + (size_t)w * 3] = h.wavelet[k].f[3];
             }
 
             aj += w * 4;
@@ -2340,10 +2344,10 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
             for (k = 0; k < rw; k++) {
                 switch (rh - j) {
                 case 3:
-                    aj[k + w * 2] = h.wavelet[k].f[2];
+                    aj[k + (size_t)w * 2] = h.wavelet[k].f[2];
                 /* FALLTHRU */
                 case 2:
-                    aj[k + w  ] = h.wavelet[k].f[1];
+                    aj[k + (size_t)w  ] = h.wavelet[k].f[1];
                 /* FALLTHRU */
                 case 1:
                     aj[k] = h.wavelet[k].f[0];
@@ -2366,7 +2370,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
             opj_v4dwt_decode(&v);
 
             for (k = 0; k < rh; ++k) {
-                memcpy(&aj[k * w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32));
+                memcpy(&aj[k * (size_t)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32));
             }
             aj += 4;
         }
@@ -2380,7 +2384,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
             opj_v4dwt_decode(&v);
 
             for (k = 0; k < rh; ++k) {
-                memcpy(&aj[k * w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32));
+                memcpy(&aj[k * (size_t)w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32));
             }
         }
     }
diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c
index 400fcaa4..aef0c1c0 100644
--- a/src/lib/openjp2/j2k.c
+++ b/src/lib/openjp2/j2k.c
@@ -10923,7 +10923,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k,
 {
     OPJ_UINT32 i, j;
     OPJ_UINT32 l_nb_tiles;
-    OPJ_UINT32 l_max_tile_size = 0, l_current_tile_size;
+    OPJ_SIZE_T l_max_tile_size = 0, l_current_tile_size;
     OPJ_BYTE * l_current_data = 00;
     OPJ_BOOL l_reuse_data = OPJ_FALSE;
     opj_tcd_t* p_tcd = 00;
diff --git a/src/lib/openjp2/mct.c b/src/lib/openjp2/mct.c
index 8c82ee20..b79d4b87 100644
--- a/src/lib/openjp2/mct.c
+++ b/src/lib/openjp2/mct.c
@@ -77,7 +77,7 @@ void opj_mct_encode(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_SIZE_T i;
     const OPJ_SIZE_T len = n;
@@ -119,7 +119,7 @@ void opj_mct_encode(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_SIZE_T i;
     const OPJ_SIZE_T len = n;
@@ -146,7 +146,7 @@ void opj_mct_decode(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_SIZE_T i;
     const OPJ_SIZE_T len = n;
@@ -181,7 +181,7 @@ void opj_mct_decode(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_UINT32 i;
     for (i = 0; i < n; ++i) {
@@ -214,7 +214,7 @@ void opj_mct_encode_real(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_SIZE_T i;
     const OPJ_SIZE_T len = n;
@@ -359,7 +359,7 @@ void opj_mct_encode_real(
     OPJ_INT32* OPJ_RESTRICT c0,
     OPJ_INT32* OPJ_RESTRICT c1,
     OPJ_INT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_UINT32 i;
     for (i = 0; i < n; ++i) {
@@ -386,7 +386,7 @@ void opj_mct_decode_real(
     OPJ_FLOAT32* OPJ_RESTRICT c0,
     OPJ_FLOAT32* OPJ_RESTRICT c1,
     OPJ_FLOAT32* OPJ_RESTRICT c2,
-    OPJ_UINT32 n)
+    OPJ_SIZE_T n)
 {
     OPJ_UINT32 i;
 #ifdef __SSE__
@@ -451,13 +451,13 @@ OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno)
 
 OPJ_BOOL opj_mct_encode_custom(
     OPJ_BYTE * pCodingdata,
-    OPJ_UINT32 n,
+    OPJ_SIZE_T n,
     OPJ_BYTE ** pData,
     OPJ_UINT32 pNbComp,
     OPJ_UINT32 isSigned)
 {
     OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata;
-    OPJ_UINT32 i;
+    OPJ_SIZE_T i;
     OPJ_UINT32 j;
     OPJ_UINT32 k;
     OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp;
@@ -505,13 +505,13 @@ OPJ_BOOL opj_mct_encode_custom(
 
 OPJ_BOOL opj_mct_decode_custom(
     OPJ_BYTE * pDecodingData,
-    OPJ_UINT32 n,
+    OPJ_SIZE_T n,
     OPJ_BYTE ** pData,
     OPJ_UINT32 pNbComp,
     OPJ_UINT32 isSigned)
 {
     OPJ_FLOAT32 * lMct;
-    OPJ_UINT32 i;
+    OPJ_SIZE_T i;
     OPJ_UINT32 j;
     OPJ_UINT32 k;
 
diff --git a/src/lib/openjp2/mct.h b/src/lib/openjp2/mct.h
index 0ed980e8..2e37ce73 100644
--- a/src/lib/openjp2/mct.h
+++ b/src/lib/openjp2/mct.h
@@ -61,7 +61,7 @@ Apply a reversible multi-component transform to an image
 @param n Number of samples for each component
 */
 void opj_mct_encode(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1,
-                    OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n);
+                    OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n);
 /**
 Apply a reversible multi-component inverse transform to an image
 @param c0 Samples for luminance component
@@ -70,7 +70,7 @@ Apply a reversible multi-component inverse transform to an image
 @param n Number of samples for each component
 */
 void opj_mct_decode(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1,
-                    OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n);
+                    OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n);
 /**
 Get norm of the basis function used for the reversible multi-component transform
 @param compno Number of the component (0->Y, 1->U, 2->V)
@@ -86,7 +86,7 @@ Apply an irreversible multi-component transform to an image
 @param n Number of samples for each component
 */
 void opj_mct_encode_real(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1,
-                         OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n);
+                         OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n);
 /**
 Apply an irreversible multi-component inverse transform to an image
 @param c0 Samples for luminance component
@@ -95,7 +95,7 @@ Apply an irreversible multi-component inverse transform to an image
 @param n Number of samples for each component
 */
 void opj_mct_decode_real(OPJ_FLOAT32* OPJ_RESTRICT c0,
-                         OPJ_FLOAT32* OPJ_RESTRICT c1, OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_UINT32 n);
+                         OPJ_FLOAT32* OPJ_RESTRICT c1, OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n);
 /**
 Get norm of the basis function used for the irreversible multi-component transform
 @param compno Number of the component (0->Y, 1->U, 2->V)
@@ -114,7 +114,7 @@ FIXME DOC
 */
 OPJ_BOOL opj_mct_encode_custom(
     OPJ_BYTE * p_coding_data,
-    OPJ_UINT32 n,
+    OPJ_SIZE_T n,
     OPJ_BYTE ** p_data,
     OPJ_UINT32 p_nb_comp,
     OPJ_UINT32 is_signed);
@@ -129,7 +129,7 @@ FIXME DOC
 */
 OPJ_BOOL opj_mct_decode_custom(
     OPJ_BYTE * pDecodingData,
-    OPJ_UINT32 n,
+    OPJ_SIZE_T n,
     OPJ_BYTE ** pData,
     OPJ_UINT32 pNbComp,
     OPJ_UINT32 isSigned);
diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c
index fb552f8b..3402dca2 100644
--- a/src/lib/openjp2/sparse_array.c
+++ b/src/lib/openjp2/sparse_array.c
@@ -141,27 +141,30 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                 if (src_block == NULL) {
                     for (j = 0; j < y_incr; j++) {
                         if (buf_col_stride == 1) {
-                            memset(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride,
+                            memset(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride,
                                    0,
                                    sizeof(OPJ_INT32) * x_incr);
                         } else {
                             OPJ_UINT32 k;
                             for (k = 0; k < x_incr; k++) {
-                                *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) = 0;
+                                *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride)
+                                    = 0;
                             }
                         }
                     }
                 } else {
                     for (j = 0; j < y_incr; j++) {
                         if (buf_col_stride == 1) {
-                            memcpy(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride,
-                                   src_block + (block_y_offset + j) * sa->block_width + block_x_offset,
+                            memcpy(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride,
+                                   src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset,
                                    sizeof(OPJ_INT32) * x_incr);
                         } else {
                             OPJ_UINT32 k;
                             for (k = 0; k < x_incr; k++) {
-                                *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) =
-                                    *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k);
+                                *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride)
+                                    =
+                                        *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset +
+                                          k);
                             }
                         }
                     }
@@ -178,14 +181,16 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
 
                 for (j = 0; j < y_incr; j++) {
                     if (buf_col_stride == 1) {
-                        memcpy(src_block + (block_y_offset + j) * sa->block_width + block_x_offset,
-                               buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride,
+                        memcpy(src_block + (block_y_offset + j) * (size_t)sa->block_width +
+                               block_x_offset,
+                               buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride,
                                sizeof(OPJ_INT32) * x_incr);
                     } else {
                         OPJ_UINT32 k;
                         for (k = 0; k < x_incr; k++) {
-                            *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k) =
-                                *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride);
+                            *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset +
+                              k) =
+                                  *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride);
                         }
                     }
                 }
diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c
index 953c7ab1..44a2f243 100644
--- a/src/lib/openjp2/t1.c
+++ b/src/lib/openjp2/t1.c
@@ -1696,8 +1696,8 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
             }
         }
     } else if (tccp->qmfbid == 1) {
-        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_UINT32)y * tile_w +
-                                                       (OPJ_UINT32)x];
+        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w +
+                                                   (size_t)x];
         for (j = 0; j < cblk_h; ++j) {
             i = 0;
             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
@@ -1705,19 +1705,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 0U] = tmp0 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 1U] = tmp1 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 2U] = tmp2 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 3U] = tmp3 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 0U] = tmp0 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 1U] = tmp1 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 2U] = tmp2 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 3U] = tmp3 / 2;
             }
             for (; i < cblk_w; ++i) {
                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp / 2;
+                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i] = tmp / 2;
             }
         }
     } else {        /* if (tccp->qmfbid == 0) */
-        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y *
-                                                         tile_w + (OPJ_UINT32)x];
+        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(size_t)y *
+                                                     tile_w + (size_t)x];
         for (j = 0; j < cblk_h; ++j) {
             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
             for (i = 0; i < cblk_w; ++i) {
@@ -2052,7 +2052,8 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         OPJ_INT32* OPJ_RESTRICT tiledp;
                         OPJ_UINT32 cblk_w;
                         OPJ_UINT32 cblk_h;
-                        OPJ_UINT32 i, j, tileIndex = 0, tileLineAdvance;
+                        OPJ_UINT32 i, j, tileLineAdvance;
+                        size_t tileIndex = 0;
 
                         OPJ_INT32 x = cblk->x0 - band->x0;
                         OPJ_INT32 y = cblk->y0 - band->y0;
@@ -2076,7 +2077,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         cblk_h = t1->h;
                         tileLineAdvance = tile_w - cblk_w;
 
-                        tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
+                        tiledp = &tilec->data[(size_t)y * tile_w + (size_t)x];
                         t1->data = tiledp;
                         t1->data_stride = tile_w;
                         if (tccp->qmfbid == 1) {
diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c
index 1213f757..485eb9e2 100644
--- a/src/lib/openjp2/tcd.c
+++ b/src/lib/openjp2/tcd.c
@@ -1974,17 +1974,17 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager)
                                           p_tcd->image->comps[1].resno_decoded;
         opj_tcd_resolution_t* res_comp2 = l_tile->comps[2].resolutions +
                                           p_tcd->image->comps[2].resno_decoded;
-        OPJ_INT32 l_res_samples = (OPJ_INT32)(res_comp0->x1 - res_comp0->x0) *
-                                  (res_comp0->y1 - res_comp0->y0);
+        size_t l_res_samples = (size_t)(res_comp0->x1 - res_comp0->x0) *
+                               (size_t)(res_comp0->y1 - res_comp0->y0);
         /* testcase 1336.pdf.asan.47.376 */
         if (p_tcd->image->comps[0].resno_decoded !=
                 p_tcd->image->comps[1].resno_decoded ||
                 p_tcd->image->comps[0].resno_decoded !=
                 p_tcd->image->comps[2].resno_decoded ||
-                (res_comp1->x1 - res_comp1->x0) * (res_comp1->y1 -
-                        res_comp1->y0) != l_res_samples ||
-                (res_comp2->x1 - res_comp2->x0) * (res_comp2->y1 -
-                        res_comp2->y0) != l_res_samples) {
+                (size_t)(res_comp1->x1 - res_comp1->x0) *
+                (size_t)(res_comp1->y1 - res_comp1->y0) != l_res_samples ||
+                (size_t)(res_comp2->x1 - res_comp2->x0) *
+                (size_t)(res_comp2->y1 - res_comp2->y0) != l_res_samples) {
             opj_event_msg(p_manager, EVT_ERROR,
                           "Tiles don't all have the same dimension. Skip the MCT step.\n");
             return OPJ_FALSE;
@@ -2225,9 +2225,10 @@ static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct)
     }
 }
 
-OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd)
+OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd)
 {
-    OPJ_UINT32 i, l_data_size = 0;
+    OPJ_UINT32 i;
+    OPJ_SIZE_T l_data_size = 0;
     opj_image_comp_t * l_img_comp = 00;
     opj_tcd_tilecomp_t * l_tilec = 00;
     OPJ_UINT32 l_size_comp, l_remaining;
@@ -2246,8 +2247,8 @@ OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd)
             l_size_comp = 4;
         }
 
-        l_data_size += l_size_comp * (OPJ_UINT32)((l_tilec->x1 - l_tilec->x0) *
-                       (l_tilec->y1 - l_tilec->y0));
+        l_data_size += l_size_comp * ((OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0) *
+                                      (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0));
         ++l_img_comp;
         ++l_tilec;
     }
@@ -2262,7 +2263,7 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd)
     opj_tccp_t * l_tccp = 00;
     opj_image_comp_t * l_img_comp = 00;
     opj_tcd_tile_t * l_tile;
-    OPJ_UINT32 l_nb_elem, i;
+    size_t l_nb_elem, i;
     OPJ_INT32 * l_current_ptr;
 
     l_tile = p_tcd->tcd_image->tiles;
@@ -2272,8 +2273,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd)
 
     for (compno = 0; compno < l_tile->numcomps; compno++) {
         l_current_ptr = l_tile_comp->data;
-        l_nb_elem = (OPJ_UINT32)((l_tile_comp->x1 - l_tile_comp->x0) *
-                                 (l_tile_comp->y1 - l_tile_comp->y0));
+        l_nb_elem = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) *
+                    (size_t)(l_tile_comp->y1 - l_tile_comp->y0);
 
         if (l_tccp->qmfbid == 1) {
             for (i = 0; i < l_nb_elem; ++i) {
@@ -2299,8 +2300,8 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd)
 {
     opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles;
     opj_tcd_tilecomp_t * l_tile_comp = p_tcd->tcd_image->tiles->comps;
-    OPJ_UINT32 samples = (OPJ_UINT32)((l_tile_comp->x1 - l_tile_comp->x0) *
-                                      (l_tile_comp->y1 - l_tile_comp->y0));
+    size_t samples = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) *
+                     (size_t)(l_tile_comp->y1 - l_tile_comp->y0);
     OPJ_UINT32 i;
     OPJ_BYTE ** l_data = 00;
     opj_tcp_t * l_tcp = p_tcd->tcp;
@@ -2482,13 +2483,15 @@ static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd,
 
 OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd,
                                 OPJ_BYTE * p_src,
-                                OPJ_UINT32 p_src_length)
+                                OPJ_SIZE_T p_src_length)
 {
-    OPJ_UINT32 i, j, l_data_size = 0;
+    OPJ_UINT32 i;
+    OPJ_SIZE_T j;
+    OPJ_SIZE_T l_data_size = 0;
     opj_image_comp_t * l_img_comp = 00;
     opj_tcd_tilecomp_t * l_tilec = 00;
     OPJ_UINT32 l_size_comp, l_remaining;
-    OPJ_UINT32 l_nb_elem;
+    OPJ_SIZE_T l_nb_elem;
 
     l_data_size = opj_tcd_get_encoded_tile_size(p_tcd);
     if (l_data_size != p_src_length) {
@@ -2500,8 +2503,8 @@ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd,
     for (i = 0; i < p_tcd->image->numcomps; ++i) {
         l_size_comp = l_img_comp->prec >> 3; /*(/ 8)*/
         l_remaining = l_img_comp->prec & 7;  /* (%8) */
-        l_nb_elem = (OPJ_UINT32)((l_tilec->x1 - l_tilec->x0) * (l_tilec->y1 -
-                                 l_tilec->y0));
+        l_nb_elem = (size_t)(l_tilec->x1 - l_tilec->x0) *
+                    (size_t)(l_tilec->y1 - l_tilec->y0);
 
         if (l_remaining) {
             ++l_size_comp;
diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h
index 7c974c53..63c22c45 100644
--- a/src/lib/openjp2/tcd.h
+++ b/src/lib/openjp2/tcd.h
@@ -409,7 +409,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd,
 /**
  *
  */
-OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd);
+OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd);
 
 /**
  * Initialize the tile coder and may reuse some meory.
@@ -428,7 +428,7 @@ OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd,
  */
 OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd,
                                 OPJ_BYTE * p_src,
-                                OPJ_UINT32 p_src_length);
+                                OPJ_SIZE_T p_src_length);
 
 /**
  * Allocates tile component data
-- 
cgit v1.2.3


From b2cc8f7f81242f967b65e76de043e5e31663d793 Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Fri, 1 Sep 2017 16:30:50 +0200
Subject: Optimize reading/write into sparse array

---
 src/lib/openjp2/sparse_array.c      | 126 ++++++++++++++++++++++++------------
 src/lib/openjp2/sparse_array.h      |   4 +-
 src/lib/openjp2/test_sparse_array.c |  26 +++++++-
 3 files changed, 111 insertions(+), 45 deletions(-)

(limited to 'src/lib/openjp2/sparse_array.c')

diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c
index 3402dca2..b0634f67 100644
--- a/src/lib/openjp2/sparse_array.c
+++ b/src/lib/openjp2/sparse_array.c
@@ -91,7 +91,7 @@ void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa)
     }
 }
 
-OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa,
+OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa,
         OPJ_UINT32 x0,
         OPJ_UINT32 y0,
         OPJ_UINT32 x1,
@@ -102,7 +102,7 @@ OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa,
 }
 
 static OPJ_BOOL opj_sparse_array_int32_read_or_write(
-    opj_sparse_array_int32_t* sa,
+    const opj_sparse_array_int32_t* sa,
     OPJ_UINT32 x0,
     OPJ_UINT32 y0,
     OPJ_UINT32 x1,
@@ -115,6 +115,8 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
 {
     OPJ_UINT32 y, block_y;
     OPJ_UINT32 y_incr = 0;
+    const OPJ_UINT32 block_width = sa->block_width;
+
     if (!opj_sparse_array_is_region_valid(sa, x0, y0, x1, y1)) {
         return forgiving;
     }
@@ -128,43 +130,64 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                  sa->block_height;
         block_y_offset = sa->block_height - y_incr;
         y_incr = opj_uint_min(y_incr, y1 - y);
-        block_x = x0 / sa->block_width;
+        block_x = x0 / block_width;
         for (x = x0; x < x1; block_x ++, x += x_incr) {
             OPJ_UINT32 j;
             OPJ_UINT32 block_x_offset;
             OPJ_INT32* src_block;
-            x_incr = (x == x0) ? sa->block_width - (x0 % sa->block_width) : sa->block_width;
-            block_x_offset = sa->block_width - x_incr;
+            x_incr = (x == x0) ? block_width - (x0 % block_width) : block_width;
+            block_x_offset = block_width - x_incr;
             x_incr = opj_uint_min(x_incr, x1 - x);
             src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x];
             if (is_read_op) {
                 if (src_block == NULL) {
-                    for (j = 0; j < y_incr; j++) {
-                        if (buf_col_stride == 1) {
-                            memset(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride,
-                                   0,
-                                   sizeof(OPJ_INT32) * x_incr);
-                        } else {
+                    if (buf_col_stride == 1) {
+                        OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                                              (x - x0) * buf_col_stride;
+                        for (j = 0; j < y_incr; j++) {
+                            memset(dest_ptr, 0, sizeof(OPJ_INT32) * x_incr);
+                            dest_ptr += buf_line_stride;
+                        }
+                    } else {
+                        OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                                              (x - x0) * buf_col_stride;
+                        for (j = 0; j < y_incr; j++) {
                             OPJ_UINT32 k;
                             for (k = 0; k < x_incr; k++) {
-                                *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride)
-                                    = 0;
+                                dest_ptr[k * buf_col_stride] = 0;
                             }
+                            dest_ptr += buf_line_stride;
                         }
                     }
                 } else {
-                    for (j = 0; j < y_incr; j++) {
-                        if (buf_col_stride == 1) {
-                            memcpy(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride,
-                                   src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset,
-                                   sizeof(OPJ_INT32) * x_incr);
+                    const OPJ_INT32* OPJ_RESTRICT src_ptr = src_block + block_y_offset *
+                                                            (size_t)block_width + block_x_offset;
+                    if (buf_col_stride == 1) {
+                        OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                                                           (x - x0) * buf_col_stride;
+                        for (j = 0; j < y_incr; j++) {
+                            memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                            dest_ptr += buf_line_stride;
+                            src_ptr += block_width;
+                        }
+                    } else {
+                        OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                                                           (x - x0) * buf_col_stride;
+                        if (x_incr == 1) {
+                            for (j = 0; j < y_incr; j++) {
+                                *dest_ptr = *src_ptr;
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
+                            }
                         } else {
-                            OPJ_UINT32 k;
-                            for (k = 0; k < x_incr; k++) {
-                                *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride)
-                                    =
-                                        *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset +
-                                          k);
+                            /* General case */
+                            for (j = 0; j < y_incr; j++) {
+                                OPJ_UINT32 k;
+                                for (k = 0; k < x_incr; k++) {
+                                    dest_ptr[k * buf_col_stride] = src_ptr[k];
+                                }
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
                             }
                         }
                     }
@@ -179,18 +202,36 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                     sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block;
                 }
 
-                for (j = 0; j < y_incr; j++) {
-                    if (buf_col_stride == 1) {
-                        memcpy(src_block + (block_y_offset + j) * (size_t)sa->block_width +
-                               block_x_offset,
-                               buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride,
-                               sizeof(OPJ_INT32) * x_incr);
+                if (buf_col_stride == 1) {
+                    OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *
+                                                       (size_t)block_width + block_x_offset;
+                    const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
+                                                            (size_t)buf_line_stride + (x - x0) * buf_col_stride;
+                    for (j = 0; j < y_incr; j++) {
+                        memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                        dest_ptr += block_width;
+                        src_ptr += buf_line_stride;
+                    }
+                } else {
+                    OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *
+                                                       (size_t)block_width + block_x_offset;
+                    const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
+                                                            (size_t)buf_line_stride + (x - x0) * buf_col_stride;
+                    if (x_incr == 1) {
+                        for (j = 0; j < y_incr; j++) {
+                            *dest_ptr = *src_ptr;
+                            src_ptr += buf_line_stride;
+                            dest_ptr += block_width;
+                        }
                     } else {
-                        OPJ_UINT32 k;
-                        for (k = 0; k < x_incr; k++) {
-                            *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset +
-                              k) =
-                                  *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride);
+                        /* General case */
+                        for (j = 0; j < y_incr; j++) {
+                            OPJ_UINT32 k;
+                            for (k = 0; k < x_incr; k++) {
+                                dest_ptr[k] = src_ptr[k * buf_col_stride];
+                            }
+                            src_ptr += buf_line_stride;
+                            dest_ptr += block_width;
                         }
                     }
                 }
@@ -201,7 +242,7 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
     return OPJ_TRUE;
 }
 
-OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa,
+OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa,
                                      OPJ_UINT32 x0,
                                      OPJ_UINT32 y0,
                                      OPJ_UINT32 x1,
@@ -211,12 +252,13 @@ OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa,
                                      OPJ_UINT32 dest_line_stride,
                                      OPJ_BOOL forgiving)
 {
-    return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1,
-            dest,
-            dest_col_stride,
-            dest_line_stride,
-            forgiving,
-            OPJ_TRUE);
+    return opj_sparse_array_int32_read_or_write(
+               (opj_sparse_array_int32_t*)sa, x0, y0, x1, y1,
+               dest,
+               dest_col_stride,
+               dest_line_stride,
+               forgiving,
+               OPJ_TRUE);
 }
 
 OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa,
diff --git a/src/lib/openjp2/sparse_array.h b/src/lib/openjp2/sparse_array.h
index 485cafea..130fe13e 100644
--- a/src/lib/openjp2/sparse_array.h
+++ b/src/lib/openjp2/sparse_array.h
@@ -77,7 +77,7 @@ void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa);
  * @param y1 bottom y coordinate (not included) of the region. Must be greater than y0.
  * @return OPJ_TRUE or OPJ_FALSE.
  */
-OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa,
+OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa,
         OPJ_UINT32 x0,
         OPJ_UINT32 y0,
         OPJ_UINT32 x1,
@@ -99,7 +99,7 @@ OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa,
  * @param forgiving if set to TRUE and the region is invalid, OPJ_TRUE will still be returned.
  * @return OPJ_TRUE in case of success.
  */
-OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa,
+OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa,
                                      OPJ_UINT32 x0,
                                      OPJ_UINT32 y0,
                                      OPJ_UINT32 x1,
diff --git a/src/lib/openjp2/test_sparse_array.c b/src/lib/openjp2/test_sparse_array.c
index 0b49110f..8e136451 100644
--- a/src/lib/openjp2/test_sparse_array.c
+++ b/src/lib/openjp2/test_sparse_array.c
@@ -92,6 +92,7 @@ int main()
     ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1,
                                        OPJ_FALSE);
     assert(ret);
+
     buffer[0] = 2;
     ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1,
                                        OPJ_FALSE);
@@ -105,6 +106,29 @@ int main()
     assert(buffer[0] == 2);
     assert(buffer[1] == 0xFF);
 
+    buffer[0] = 0xFF;
+    buffer[1] = 0xFF;
+    buffer[2] = 0xFF;
+    ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 2, buffer, 0, 1,
+                                      OPJ_FALSE);
+    assert(ret);
+    assert(buffer[0] == 2);
+    assert(buffer[1] == 0);
+    assert(buffer[2] == 0xFF);
+
+    buffer[0] = 3;
+    ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 0, 1,
+                                       OPJ_FALSE);
+    assert(ret);
+
+    buffer[0] = 0;
+    buffer[1] = 0xFF;
+    ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1,
+                                      OPJ_FALSE);
+    assert(ret);
+    assert(buffer[0] == 3);
+    assert(buffer[1] == 0xFF);
+
     w = 15 + 1;
     h = 17 + 1;
     memset(buffer, 0xFF, sizeof(buffer));
@@ -114,7 +138,7 @@ int main()
     for (j = 0; j < h; j++) {
         for (i = 0; i < w; i++) {
             if (i == 4 - 2 && j == 5 - 1) {
-                assert(buffer[ j * w + i ] == 2);
+                assert(buffer[ j * w + i ] == 3);
             } else {
                 assert(buffer[ j * w + i ] == 0);
             }
-- 
cgit v1.2.3


From 873004c615b1bed3ce780e869288602af86fdee5 Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Fri, 1 Sep 2017 16:31:00 +0200
Subject: Sub-tile decoding: speed up vertical pass in IDWT5x3 by processing 4
 cols at a time

---
 src/lib/openjp2/dwt.c          | 143 ++++++++++++++++++++++++++++++++++++-----
 src/lib/openjp2/sparse_array.c |  47 +++++++++++---
 2 files changed, 165 insertions(+), 25 deletions(-)

(limited to 'src/lib/openjp2/sparse_array.c')

diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c
index 153bfa40..ae1cbd50 100644
--- a/src/lib/openjp2/dwt.c
+++ b/src/lib/openjp2/dwt.c
@@ -1551,6 +1551,7 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest,
         OPJ_INT32 cas,
         opj_sparse_array_int32_t* sa,
         OPJ_UINT32 sa_col,
+        OPJ_UINT32 nb_cols,
         OPJ_UINT32 sn,
         OPJ_UINT32 win_l_y0,
         OPJ_UINT32 win_l_y1,
@@ -1560,15 +1561,15 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest,
     OPJ_BOOL ret;
     ret  = opj_sparse_array_int32_read(sa,
                                        sa_col, win_l_y0,
-                                       sa_col + 1, win_l_y1,
-                                       dest + cas + 2 * win_l_y0,
-                                       0, 2, OPJ_TRUE);
+                                       sa_col + nb_cols, win_l_y1,
+                                       dest + cas * 4 + 2 * 4 * win_l_y0,
+                                       1, 2 * 4, OPJ_TRUE);
     assert(ret);
     ret = opj_sparse_array_int32_read(sa,
                                       sa_col, sn + win_h_y0,
-                                      sa_col + 1, sn + win_h_y1,
-                                      dest + 1 - cas + 2 * win_h_y0,
-                                      0, 2, OPJ_TRUE);
+                                      sa_col + nb_cols, sn + win_h_y1,
+                                      dest + (1 - cas) * 4 + 2 * 4 * win_h_y0,
+                                      1, 2 * 4, OPJ_TRUE);
     assert(ret);
     OPJ_UNUSED(ret);
 }
@@ -1648,6 +1649,109 @@ static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
     }
 }
 
+#define OPJ_S_off(i,off) a[(OPJ_UINT32)(i)*2*4+off]
+#define OPJ_D_off(i,off) a[(1+(OPJ_UINT32)(i)*2)*4+off]
+#define OPJ_S__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=sn?OPJ_S_off(sn-1,off):OPJ_S_off(i,off)))
+#define OPJ_D__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=dn?OPJ_D_off(dn-1,off):OPJ_D_off(i,off)))
+#define OPJ_SS__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=dn?OPJ_S_off(dn-1,off):OPJ_S_off(i,off)))
+#define OPJ_DD__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=sn?OPJ_D_off(sn-1,off):OPJ_D_off(i,off)))
+
+static void opj_dwt_decode_partial_1_parallel(OPJ_INT32 *a,
+        OPJ_UINT32 nb_cols,
+        OPJ_INT32 dn, OPJ_INT32 sn,
+        OPJ_INT32 cas,
+        OPJ_INT32 win_l_x0,
+        OPJ_INT32 win_l_x1,
+        OPJ_INT32 win_h_x0,
+        OPJ_INT32 win_h_x1)
+{
+    OPJ_INT32 i;
+    OPJ_UINT32 off;
+
+    (void)nb_cols;
+
+    if (!cas) {
+        if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
+
+            /* Naive version is :
+            for (i = win_l_x0; i < i_max; i++) {
+                OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
+            }
+            for (i = win_h_x0; i < win_h_x1; i++) {
+                OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
+            }
+            but the compiler doesn't manage to unroll it to avoid bound
+            checking in OPJ_S_ and OPJ_D_ macros
+            */
+
+            i = win_l_x0;
+            if (i < win_l_x1) {
+                OPJ_INT32 i_max;
+
+                /* Left-most case */
+                for (off = 0; off < 4; off++) {
+                    OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2;
+                }
+                i ++;
+
+                i_max = win_l_x1;
+                if (i_max > dn) {
+                    i_max = dn;
+                }
+                for (; i < i_max; i++) {
+                    /* No bound checking */
+                    for (off = 0; off < 4; off++) {
+                        OPJ_S_off(i, off) -= (OPJ_D_off(i - 1, off) + OPJ_D_off(i, off) + 2) >> 2;
+                    }
+                }
+                for (; i < win_l_x1; i++) {
+                    /* Right-most case */
+                    for (off = 0; off < 4; off++) {
+                        OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2;
+                    }
+                }
+            }
+
+            i = win_h_x0;
+            if (i < win_h_x1) {
+                OPJ_INT32 i_max = win_h_x1;
+                if (i_max >= sn) {
+                    i_max = sn - 1;
+                }
+                for (; i < i_max; i++) {
+                    /* No bound checking */
+                    for (off = 0; off < 4; off++) {
+                        OPJ_D_off(i, off) += (OPJ_S_off(i, off) + OPJ_S_off(i + 1, off)) >> 1;
+                    }
+                }
+                for (; i < win_h_x1; i++) {
+                    /* Right-most case */
+                    for (off = 0; off < 4; off++) {
+                        OPJ_D_off(i, off) += (OPJ_S__off(i, off) + OPJ_S__off(i + 1, off)) >> 1;
+                    }
+                }
+            }
+        }
+    } else {
+        if (!sn  && dn == 1) {        /* NEW :  CASE ONE ELEMENT */
+            for (off = 0; off < 4; off++) {
+                OPJ_S_off(0, off) /= 2;
+            }
+        } else {
+            for (i = win_l_x0; i < win_l_x1; i++) {
+                for (off = 0; off < 4; off++) {
+                    OPJ_D_off(i, off) -= (OPJ_SS__off(i, off) + OPJ_SS__off(i + 1, off) + 2) >> 2;
+                }
+            }
+            for (i = win_h_x0; i < win_h_x1; i++) {
+                for (off = 0; off < 4; off++) {
+                    OPJ_S_off(i, off) += (OPJ_DD__off(i, off) + OPJ_DD__off(i - 1, off)) >> 1;
+                }
+            }
+        }
+    }
+}
+
 static void opj_dwt_get_band_coordinates(opj_tcd_tilecomp_t* tilec,
         OPJ_UINT32 resno,
         OPJ_UINT32 bandno,
@@ -1804,13 +1908,14 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
     }
     h_mem_size = opj_dwt_max_resolution(tr, numres);
     /* overflow check */
-    if (h_mem_size > (SIZE_MAX / sizeof(OPJ_INT32))) {
+    /* in vertical pass, we process 4 columns at a time */
+    if (h_mem_size > (SIZE_MAX / (4 * sizeof(OPJ_INT32)))) {
         /* FIXME event manager error callback */
         opj_sparse_array_int32_free(sa);
         return OPJ_FALSE;
     }
 
-    h_mem_size *= sizeof(OPJ_INT32);
+    h_mem_size *= 4 * sizeof(OPJ_INT32);
     h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size);
     if (! h.mem) {
         /* FIXME event manager error callback */
@@ -1946,31 +2051,35 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
             }
         }
 
-        for (i = win_tr_x0; i < win_tr_x1; ++i) {
+        for (i = win_tr_x0; i < win_tr_x1;) {
+            OPJ_UINT32 nb_cols = opj_uint_min(4U, win_tr_x1 - i);
             opj_dwt_interleave_partial_v(v.mem,
                                          v.cas,
                                          sa,
                                          i,
+                                         nb_cols,
                                          (OPJ_UINT32)v.sn,
                                          win_ll_y0,
                                          win_ll_y1,
                                          win_lh_y0,
                                          win_lh_y1);
-            opj_dwt_decode_partial_1(v.mem, v.dn, v.sn, v.cas,
-                                     (OPJ_INT32)win_ll_y0,
-                                     (OPJ_INT32)win_ll_y1,
-                                     (OPJ_INT32)win_lh_y0,
-                                     (OPJ_INT32)win_lh_y1);
+            opj_dwt_decode_partial_1_parallel(v.mem, nb_cols, v.dn, v.sn, v.cas,
+                                              (OPJ_INT32)win_ll_y0,
+                                              (OPJ_INT32)win_ll_y1,
+                                              (OPJ_INT32)win_lh_y0,
+                                              (OPJ_INT32)win_lh_y1);
             if (!opj_sparse_array_int32_write(sa,
                                               i, win_tr_y0,
-                                              i + 1, win_tr_y1,
-                                              v.mem + win_tr_y0,
-                                              0, 1, OPJ_TRUE)) {
+                                              i + nb_cols, win_tr_y1,
+                                              v.mem + 4 * win_tr_y0,
+                                              1, 4, OPJ_TRUE)) {
                 /* FIXME event manager error callback */
                 opj_sparse_array_int32_free(sa);
                 opj_aligned_free(h.mem);
                 return OPJ_FALSE;
             }
+
+            i += nb_cols;
         }
     }
     opj_aligned_free(h.mem);
diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c
index b0634f67..48c4b23b 100644
--- a/src/lib/openjp2/sparse_array.c
+++ b/src/lib/openjp2/sparse_array.c
@@ -165,10 +165,20 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                     if (buf_col_stride == 1) {
                         OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
                                                            (x - x0) * buf_col_stride;
-                        for (j = 0; j < y_incr; j++) {
-                            memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
-                            dest_ptr += buf_line_stride;
-                            src_ptr += block_width;
+                        if (x_incr == 4) {
+                            // Same code as general branch, but the compiler
+                            // can have an efficient memcpy()
+                            for (j = 0; j < y_incr; j++) {
+                                memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
+                            }
+                        } else {
+                            for (j = 0; j < y_incr; j++) {
+                                memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
+                            }
                         }
                     } else {
                         OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
@@ -179,6 +189,17 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                                 dest_ptr += buf_line_stride;
                                 src_ptr += block_width;
                             }
+                        } else if (y_incr == 1 && buf_col_stride == 2) {
+                            OPJ_UINT32 k;
+                            for (k = 0; k < (x_incr & ~3U); k += 4) {
+                                dest_ptr[k * buf_col_stride] = src_ptr[k];
+                                dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1];
+                                dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2];
+                                dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3];
+                            }
+                            for (; k < x_incr; k++) {
+                                dest_ptr[k * buf_col_stride] = src_ptr[k];
+                            }
                         } else {
                             /* General case */
                             for (j = 0; j < y_incr; j++) {
@@ -207,10 +228,20 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                                                        (size_t)block_width + block_x_offset;
                     const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
                                                             (size_t)buf_line_stride + (x - x0) * buf_col_stride;
-                    for (j = 0; j < y_incr; j++) {
-                        memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
-                        dest_ptr += block_width;
-                        src_ptr += buf_line_stride;
+                    if (x_incr == 4) {
+                        // Same code as general branch, but the compiler
+                        // can have an efficient memcpy()
+                        for (j = 0; j < y_incr; j++) {
+                            memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                            dest_ptr += block_width;
+                            src_ptr += buf_line_stride;
+                        }
+                    } else {
+                        for (j = 0; j < y_incr; j++) {
+                            memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                            dest_ptr += block_width;
+                            src_ptr += buf_line_stride;
+                        }
                     }
                 } else {
                     OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *
-- 
cgit v1.2.3


From 7017e67a01c378a7a1ee5e34dd544c793b5c23e4 Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Fri, 1 Sep 2017 16:31:10 +0200
Subject: sparse_array: optimizations for lossy case

---
 src/lib/openjp2/sparse_array.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'src/lib/openjp2/sparse_array.c')

diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c
index 48c4b23b..40459bdb 100644
--- a/src/lib/openjp2/sparse_array.c
+++ b/src/lib/openjp2/sparse_array.c
@@ -200,6 +200,21 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                             for (; k < x_incr; k++) {
                                 dest_ptr[k * buf_col_stride] = src_ptr[k];
                             }
+                        } else if (x_incr >= 8 && buf_col_stride == 8) {
+                            for (j = 0; j < y_incr; j++) {
+                                OPJ_UINT32 k;
+                                for (k = 0; k < (x_incr & ~3U); k += 4) {
+                                    dest_ptr[k * buf_col_stride] = src_ptr[k];
+                                    dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1];
+                                    dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2];
+                                    dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3];
+                                }
+                                for (; k < x_incr; k++) {
+                                    dest_ptr[k * buf_col_stride] = src_ptr[k];
+                                }
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
+                            }
                         } else {
                             /* General case */
                             for (j = 0; j < y_incr; j++) {
@@ -254,6 +269,21 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                             src_ptr += buf_line_stride;
                             dest_ptr += block_width;
                         }
+                    } else if (x_incr >= 8 && buf_col_stride == 8) {
+                        for (j = 0; j < y_incr; j++) {
+                            OPJ_UINT32 k;
+                            for (k = 0; k < (x_incr & ~3U); k += 4) {
+                                dest_ptr[k] = src_ptr[k * buf_col_stride];
+                                dest_ptr[k + 1] = src_ptr[(k + 1) * buf_col_stride];
+                                dest_ptr[k + 2] = src_ptr[(k + 2) * buf_col_stride];
+                                dest_ptr[k + 3] = src_ptr[(k + 3) * buf_col_stride];
+                            }
+                            for (; k < x_incr; k++) {
+                                dest_ptr[k] = src_ptr[k * buf_col_stride];
+                            }
+                            src_ptr += buf_line_stride;
+                            dest_ptr += block_width;
+                        }
                     } else {
                         /* General case */
                         for (j = 0; j < y_incr; j++) {
-- 
cgit v1.2.3


From 579b8937eae7e6b6868b8b5c6286a742c10a5130 Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Mon, 4 Sep 2017 17:35:52 +0200
Subject: Replace uses of size_t by OPJ_SIZE_T

---
 src/lib/openjp2/dwt.c          | 113 +++++++++++++++++++++--------------------
 src/lib/openjp2/sparse_array.c |  20 ++++----
 src/lib/openjp2/t1.c           |  24 ++++-----
 src/lib/openjp2/tcd.c          |  47 ++++++++---------
 4 files changed, 104 insertions(+), 100 deletions(-)

(limited to 'src/lib/openjp2/sparse_array.c')

diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c
index 719c7330..18270045 100644
--- a/src/lib/openjp2/dwt.c
+++ b/src/lib/openjp2/dwt.c
@@ -279,7 +279,7 @@ static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
         l_src += 2;
     } /* b[i*x]=a[2*i+cas]; */
 
-    l_dest = b + (size_t)sn * (size_t)x;
+    l_dest = b + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)x;
     l_src = a + 1 - cas;
 
     i = dn;
@@ -325,7 +325,7 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
         bi += 2;
         ai += x;
     }
-    ai = a + (v->sn * (size_t)x);
+    ai = a + (v->sn * (OPJ_SIZE_T)x);
     bi = v->mem + 1 - v->cas;
     i = v->dn ;
     while (i--) {
@@ -616,7 +616,7 @@ static
 void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col,
                                const OPJ_INT32* tmp,
                                OPJ_INT32 len,
-                               size_t stride)
+                               OPJ_SIZE_T stride)
 {
     OPJ_INT32 i;
     for (i = 0; i < len; ++i) {
@@ -625,9 +625,9 @@ void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col,
                     PARALLEL_COLS_53 * sizeof(OPJ_INT32))
            would do but would be a tiny bit slower.
            We can take here advantage of our knowledge of alignment */
-        STOREU(&tiledp_col[(size_t)i * stride + 0],
+        STOREU(&tiledp_col[(OPJ_SIZE_T)i * stride + 0],
                LOAD(&tmp[PARALLEL_COLS_53 * i + 0]));
-        STOREU(&tiledp_col[(size_t)i * stride + VREG_INT_COUNT],
+        STOREU(&tiledp_col[(OPJ_SIZE_T)i * stride + VREG_INT_COUNT],
                LOAD(&tmp[PARALLEL_COLS_53 * i + VREG_INT_COUNT]));
     }
 }
@@ -639,13 +639,13 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
     const OPJ_INT32 sn,
     const OPJ_INT32 len,
     OPJ_INT32* tiledp_col,
-    const size_t stride)
+    const OPJ_SIZE_T stride)
 {
     const OPJ_INT32* in_even = &tiledp_col[0];
-    const OPJ_INT32* in_odd = &tiledp_col[(size_t)sn * stride];
+    const OPJ_INT32* in_odd = &tiledp_col[(OPJ_SIZE_T)sn * stride];
 
     OPJ_INT32 i;
-    size_t j;
+    OPJ_SIZE_T j;
     VREG d1c_0, d1n_0, s1n_0, s0c_0, s0n_0;
     VREG d1c_1, d1n_1, s1n_1, s0c_1, s0n_1;
     const VREG two = LOAD_CST(2);
@@ -662,7 +662,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
     /* Note: loads of input even/odd values must be done in a unaligned */
     /* fashion. But stores in tmp can be done with aligned store, since */
     /* the temporary buffer is properly aligned */
-    assert((size_t)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
+    assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
 
     s1n_0 = LOADU(in_even + 0);
     s1n_1 = LOADU(in_even + VREG_INT_COUNT);
@@ -704,7 +704,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
 
     if (len & 1) {
         VREG tmp_len_minus_1;
-        s1n_0 = LOADU(in_even + (size_t)((len - 1) / 2) * stride);
+        s1n_0 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride);
         /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */
         tmp_len_minus_1 = SUB(s1n_0, SAR(ADD3(d1n_0, d1n_0, two), 2));
         STORE(tmp + PARALLEL_COLS_53 * (len - 1), tmp_len_minus_1);
@@ -712,7 +712,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2(
         STORE(tmp + PARALLEL_COLS_53 * (len - 2),
               ADD(d1n_0, SAR(ADD(s0n_0, tmp_len_minus_1), 1)));
 
-        s1n_1 = LOADU(in_even + (size_t)((len - 1) / 2) * stride + VREG_INT_COUNT);
+        s1n_1 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride + VREG_INT_COUNT);
         /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */
         tmp_len_minus_1 = SUB(s1n_1, SAR(ADD3(d1n_1, d1n_1, two), 2));
         STORE(tmp + PARALLEL_COLS_53 * (len - 1) + VREG_INT_COUNT,
@@ -740,16 +740,16 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2(
     const OPJ_INT32 sn,
     const OPJ_INT32 len,
     OPJ_INT32* tiledp_col,
-    const size_t stride)
+    const OPJ_SIZE_T stride)
 {
     OPJ_INT32 i;
-    size_t j;
+    OPJ_SIZE_T j;
 
     VREG s1_0, s2_0, dc_0, dn_0;
     VREG s1_1, s2_1, dc_1, dn_1;
     const VREG two = LOAD_CST(2);
 
-    const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride];
+    const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride];
     const OPJ_INT32* in_odd = &tiledp_col[0];
 
     assert(len > 2);
@@ -764,7 +764,7 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2(
     /* Note: loads of input even/odd values must be done in a unaligned */
     /* fashion. But stores in tmp can be done with aligned store, since */
     /* the temporary buffer is properly aligned */
-    assert((size_t)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
+    assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0);
 
     s1_0 = LOADU(in_even + stride);
     /* in_odd[0] - ((in_even[0] + s1 + 2) >> 2); */
@@ -809,9 +809,9 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2(
 
     if (!(len & 1)) {
         /*dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1); */
-        dn_0 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride),
+        dn_0 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride),
                    SAR(ADD3(s1_0, s1_0, two), 2));
-        dn_1 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride + VREG_INT_COUNT),
+        dn_1 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride + VREG_INT_COUNT),
                    SAR(ADD3(s1_1, s1_1, two), 2));
 
         /* tmp[len - 2] = s1 + ((dn + dc) >> 1); */
@@ -851,7 +851,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
                              const OPJ_INT32 sn,
                              const OPJ_INT32 len,
                              OPJ_INT32* tiledp_col,
-                             const size_t stride)
+                             const OPJ_SIZE_T stride)
 {
     OPJ_INT32 i, j;
     OPJ_INT32 d1c, d1n, s1n, s0c, s0n;
@@ -862,15 +862,15 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
     /* accesses and explicit interleaving. */
 
     s1n = tiledp_col[0];
-    d1n = tiledp_col[(size_t)sn * stride];
+    d1n = tiledp_col[(OPJ_SIZE_T)sn * stride];
     s0n = s1n - ((d1n + 1) >> 1);
 
     for (i = 0, j = 0; i < (len - 3); i += 2, j++) {
         d1c = d1n;
         s0c = s0n;
 
-        s1n = tiledp_col[(size_t)(j + 1) * stride];
-        d1n = tiledp_col[(size_t)(sn + j + 1) * stride];
+        s1n = tiledp_col[(OPJ_SIZE_T)(j + 1) * stride];
+        d1n = tiledp_col[(OPJ_SIZE_T)(sn + j + 1) * stride];
 
         s0n = s1n - ((d1c + d1n + 2) >> 2);
 
@@ -882,7 +882,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
 
     if (len & 1) {
         tmp[len - 1] =
-            tiledp_col[(size_t)((len - 1) / 2) * stride] -
+            tiledp_col[(OPJ_SIZE_T)((len - 1) / 2) * stride] -
             ((d1n + 1) >> 1);
         tmp[len - 2] = d1n + ((s0n + tmp[len - 1]) >> 1);
     } else {
@@ -890,7 +890,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp,
     }
 
     for (i = 0; i < len; ++i) {
-        tiledp_col[(size_t)i * stride] = tmp[i];
+        tiledp_col[(OPJ_SIZE_T)i * stride] = tmp[i];
     }
 }
 
@@ -901,11 +901,11 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
                              const OPJ_INT32 sn,
                              const OPJ_INT32 len,
                              OPJ_INT32* tiledp_col,
-                             const size_t stride)
+                             const OPJ_SIZE_T stride)
 {
     OPJ_INT32 i, j;
     OPJ_INT32 s1, s2, dc, dn;
-    const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride];
+    const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride];
     const OPJ_INT32* in_odd = &tiledp_col[0];
 
     assert(len > 2);
@@ -918,9 +918,9 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
     tmp[0] = in_even[0] + dc;
     for (i = 1, j = 1; i < (len - 2 - !(len & 1)); i += 2, j++) {
 
-        s2 = in_even[(size_t)(j + 1) * stride];
+        s2 = in_even[(OPJ_SIZE_T)(j + 1) * stride];
 
-        dn = in_odd[(size_t)j * stride] - ((s1 + s2 + 2) >> 2);
+        dn = in_odd[(OPJ_SIZE_T)j * stride] - ((s1 + s2 + 2) >> 2);
         tmp[i  ] = dc;
         tmp[i + 1] = s1 + ((dn + dc) >> 1);
 
@@ -929,7 +929,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
     }
     tmp[i] = dc;
     if (!(len & 1)) {
-        dn = in_odd[(size_t)(len / 2 - 1) * stride] - ((s1 + 1) >> 1);
+        dn = in_odd[(OPJ_SIZE_T)(len / 2 - 1) * stride] - ((s1 + 1) >> 1);
         tmp[len - 2] = s1 + ((dn + dc) >> 1);
         tmp[len - 1] = dn;
     } else {
@@ -937,7 +937,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
     }
 
     for (i = 0; i < len; ++i) {
-        tiledp_col[(size_t)i * stride] = tmp[i];
+        tiledp_col[(OPJ_SIZE_T)i * stride] = tmp[i];
     }
 }
 #endif /* !defined(STANDARD_SLOW_VERSION) */
@@ -948,7 +948,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp,
 /* Performs interleave, inverse wavelet transform and copy back to buffer */
 static void opj_idwt53_v(const opj_dwt_t *dwt,
                          OPJ_INT32* tiledp_col,
-                         size_t stride,
+                         OPJ_SIZE_T stride,
                          OPJ_INT32 nb_cols)
 {
 #ifdef STANDARD_SLOW_VERSION
@@ -996,14 +996,14 @@ static void opj_idwt53_v(const opj_dwt_t *dwt,
             OPJ_INT32* out = dwt->mem;
             for (c = 0; c < nb_cols; c++, tiledp_col++) {
                 OPJ_INT32 i;
-                const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride];
+                const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride];
                 const OPJ_INT32* in_odd = &tiledp_col[0];
 
                 out[1] = in_odd[0] - ((in_even[0] + 1) >> 1);
                 out[0] = in_even[0] + out[1];
 
                 for (i = 0; i < len; ++i) {
-                    tiledp_col[(size_t)i * stride] = out[i];
+                    tiledp_col[(OPJ_SIZE_T)i * stride] = out[i];
                 }
             }
 
@@ -1113,7 +1113,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
 
     OPJ_INT32 rw;           /* width of the resolution level computed   */
     OPJ_INT32 rh;           /* height of the resolution level computed  */
-    size_t l_data_size;
+    OPJ_SIZE_T l_data_size;
 
     opj_tcd_resolution_t * l_cur_res = 0;
     opj_tcd_resolution_t * l_last_res = 0;
@@ -1343,11 +1343,11 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
     job = (opj_dwd_decode_v_job_t*)user_data;
     for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j;
             j += PARALLEL_COLS_53) {
-        opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w,
+        opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w,
                      PARALLEL_COLS_53);
     }
     if (j < job->max_j)
-        opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w,
+        opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w,
                      (OPJ_INT32)(job->max_j - j));
 
     opj_aligned_free(job->v.mem);
@@ -1374,7 +1374,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
     OPJ_UINT32 w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions -
                                                                1].x1 -
                                 tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
-    size_t h_mem_size;
+    OPJ_SIZE_T h_mem_size;
     int num_threads;
 
     if (numres == 1U) {
@@ -1415,7 +1415,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
 
         if (num_threads <= 1 || rh <= 1) {
             for (j = 0; j < rh; ++j) {
-                opj_idwt53_h(&h, &tiledp[(size_t)j * w]);
+                opj_idwt53_h(&h, &tiledp[(OPJ_SIZE_T)j * w]);
             }
         } else {
             OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
@@ -1467,10 +1467,10 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
         if (num_threads <= 1 || rw <= 1) {
             for (j = 0; j + PARALLEL_COLS_53 <= rw;
                     j += PARALLEL_COLS_53) {
-                opj_idwt53_v(&v, &tiledp[j], (size_t)w, PARALLEL_COLS_53);
+                opj_idwt53_v(&v, &tiledp[j], (OPJ_SIZE_T)w, PARALLEL_COLS_53);
             }
             if (j < rw) {
-                opj_idwt53_v(&v, &tiledp[j], (size_t)w, (OPJ_INT32)(rw - j));
+                opj_idwt53_v(&v, &tiledp[j], (OPJ_SIZE_T)w, (OPJ_INT32)(rw - j));
             }
         } else {
             OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
@@ -1922,7 +1922,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
     OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 -
                                  tr->y0);  /* height of the resolution level computed */
 
-    size_t h_mem_size;
+    OPJ_SIZE_T h_mem_size;
 
     /* Compute the intersection of the area of interest, expressed in tile coordinates */
     /* with the tile coordinates */
@@ -2152,8 +2152,8 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt,
     OPJ_UINT32 x1 = dwt->win_l_x1;
 
     for (k = 0; k < 2; ++k) {
-        if (remaining_height >= 4 && ((size_t) a & 0x0f) == 0 &&
-                ((size_t) bi & 0x0f) == 0 && (width & 0x0f) == 0) {
+        if (remaining_height >= 4 && ((OPJ_SIZE_T) a & 0x0f) == 0 &&
+                ((OPJ_SIZE_T) bi & 0x0f) == 0 && (width & 0x0f) == 0) {
             /* Fast code path */
             for (i = x0; i < x1; ++i) {
                 OPJ_UINT32 j = i;
@@ -2230,16 +2230,16 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt,
     OPJ_UINT32 i;
 
     for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) {
-        memcpy(&bi[i * 2], &a[i * (size_t)width],
-               (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
+        memcpy(&bi[i * 2], &a[i * (OPJ_SIZE_T)width],
+               (OPJ_SIZE_T)nb_elts_read * sizeof(OPJ_FLOAT32));
     }
 
-    a += (OPJ_UINT32)dwt->sn * (size_t)width;
+    a += (OPJ_UINT32)dwt->sn * (OPJ_SIZE_T)width;
     bi = dwt->wavelet + 1 - dwt->cas;
 
     for (i = dwt->win_h_x0; i < dwt->win_h_x1; ++i) {
-        memcpy(&bi[i * 2], &a[i * (size_t)width],
-               (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
+        memcpy(&bi[i * 2], &a[i * (OPJ_SIZE_T)width],
+               (OPJ_SIZE_T)nb_elts_read * sizeof(OPJ_FLOAT32));
     }
 }
 
@@ -2497,7 +2497,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
                                                                1].x1 -
                                 tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
 
-    size_t l_data_size;
+    OPJ_SIZE_T l_data_size;
 
     l_data_size = opj_dwt_max_resolution(res, numres);
     /* overflow check */
@@ -2546,9 +2546,9 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
 
             for (k = 0; k < rw; k++) {
                 aj[k      ] = h.wavelet[k].f[0];
-                aj[k + (size_t)w  ] = h.wavelet[k].f[1];
-                aj[k + (size_t)w * 2] = h.wavelet[k].f[2];
-                aj[k + (size_t)w * 3] = h.wavelet[k].f[3];
+                aj[k + (OPJ_SIZE_T)w  ] = h.wavelet[k].f[1];
+                aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2];
+                aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3];
             }
 
             aj += w * 4;
@@ -2561,10 +2561,10 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
             for (k = 0; k < rw; k++) {
                 switch (rh - j) {
                 case 3:
-                    aj[k + (size_t)w * 2] = h.wavelet[k].f[2];
+                    aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2];
                 /* FALLTHRU */
                 case 2:
-                    aj[k + (size_t)w  ] = h.wavelet[k].f[1];
+                    aj[k + (OPJ_SIZE_T)w  ] = h.wavelet[k].f[1];
                 /* FALLTHRU */
                 case 1:
                     aj[k] = h.wavelet[k].f[0];
@@ -2587,7 +2587,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
             opj_v4dwt_decode(&v);
 
             for (k = 0; k < rh; ++k) {
-                memcpy(&aj[k * (size_t)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32));
+                memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32));
             }
             aj += 4;
         }
@@ -2601,7 +2601,8 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
             opj_v4dwt_decode(&v);
 
             for (k = 0; k < rh; ++k) {
-                memcpy(&aj[k * (size_t)w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32));
+                memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k],
+                       (OPJ_SIZE_T)j * sizeof(OPJ_FLOAT32));
             }
         }
     }
@@ -2631,7 +2632,7 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
     OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 -
                                  tr->y0);    /* height of the resolution level computed */
 
-    size_t l_data_size;
+    OPJ_SIZE_T l_data_size;
 
     /* Compute the intersection of the area of interest, expressed in tile coordinates */
     /* with the tile coordinates */
diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c
index 40459bdb..6a2d8d43 100644
--- a/src/lib/openjp2/sparse_array.c
+++ b/src/lib/openjp2/sparse_array.c
@@ -142,14 +142,14 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
             if (is_read_op) {
                 if (src_block == NULL) {
                     if (buf_col_stride == 1) {
-                        OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                        OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride +
                                               (x - x0) * buf_col_stride;
                         for (j = 0; j < y_incr; j++) {
                             memset(dest_ptr, 0, sizeof(OPJ_INT32) * x_incr);
                             dest_ptr += buf_line_stride;
                         }
                     } else {
-                        OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                        OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride +
                                               (x - x0) * buf_col_stride;
                         for (j = 0; j < y_incr; j++) {
                             OPJ_UINT32 k;
@@ -161,9 +161,10 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                     }
                 } else {
                     const OPJ_INT32* OPJ_RESTRICT src_ptr = src_block + block_y_offset *
-                                                            (size_t)block_width + block_x_offset;
+                                                            (OPJ_SIZE_T)block_width + block_x_offset;
                     if (buf_col_stride == 1) {
-                        OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                        OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride
+                                                           +
                                                            (x - x0) * buf_col_stride;
                         if (x_incr == 4) {
                             // Same code as general branch, but the compiler
@@ -181,7 +182,8 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                             }
                         }
                     } else {
-                        OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                        OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride
+                                                           +
                                                            (x - x0) * buf_col_stride;
                         if (x_incr == 1) {
                             for (j = 0; j < y_incr; j++) {
@@ -240,9 +242,9 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
 
                 if (buf_col_stride == 1) {
                     OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *
-                                                       (size_t)block_width + block_x_offset;
+                                                       (OPJ_SIZE_T)block_width + block_x_offset;
                     const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
-                                                            (size_t)buf_line_stride + (x - x0) * buf_col_stride;
+                                                            (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride;
                     if (x_incr == 4) {
                         // Same code as general branch, but the compiler
                         // can have an efficient memcpy()
@@ -260,9 +262,9 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                     }
                 } else {
                     OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *
-                                                       (size_t)block_width + block_x_offset;
+                                                       (OPJ_SIZE_T)block_width + block_x_offset;
                     const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
-                                                            (size_t)buf_line_stride + (x - x0) * buf_col_stride;
+                                                            (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride;
                     if (x_incr == 1) {
                         for (j = 0; j < y_incr; j++) {
                             *dest_ptr = *src_ptr;
diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c
index 0cc6f250..a583e692 100644
--- a/src/lib/openjp2/t1.c
+++ b/src/lib/openjp2/t1.c
@@ -1452,7 +1452,7 @@ static OPJ_BOOL opj_t1_allocate_buffers(
     if (!t1->encoder) {
         OPJ_UINT32 datasize = w * h;
 
-        if (datasize > (size_t)t1->datasize) {
+        if (datasize > t1->datasize) {
             opj_aligned_free(t1->data);
             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
             if (!t1->data) {
@@ -1751,8 +1751,8 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
             }
         }
     } else if (tccp->qmfbid == 1) {
-        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w +
-                                                   (size_t)x];
+        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
+                                                       (OPJ_SIZE_T)x];
         for (j = 0; j < cblk_h; ++j) {
             i = 0;
             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
@@ -1760,19 +1760,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
-                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 0U] = tmp0 / 2;
-                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 1U] = tmp1 / 2;
-                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 2U] = tmp2 / 2;
-                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 3U] = tmp3 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
             }
             for (; i < cblk_w; ++i) {
                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
-                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i] = tmp / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
             }
         }
     } else {        /* if (tccp->qmfbid == 0) */
-        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(size_t)y *
-                                                     tile_w + (size_t)x];
+        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
+                                                         tile_w + (OPJ_SIZE_T)x];
         for (j = 0; j < cblk_h; ++j) {
             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
             for (i = 0; i < cblk_w; ++i) {
@@ -2134,7 +2134,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         OPJ_UINT32 cblk_w;
                         OPJ_UINT32 cblk_h;
                         OPJ_UINT32 i, j, tileLineAdvance;
-                        size_t tileIndex = 0;
+                        OPJ_SIZE_T tileIndex = 0;
 
                         OPJ_INT32 x = cblk->x0 - band->x0;
                         OPJ_INT32 y = cblk->y0 - band->y0;
@@ -2158,7 +2158,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         cblk_h = t1->h;
                         tileLineAdvance = tile_w - cblk_w;
 
-                        tiledp = &tilec->data[(size_t)y * tile_w + (size_t)x];
+                        tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
                         t1->data = tiledp;
                         t1->data_stride = tile_w;
                         if (tccp->qmfbid == 1) {
diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c
index bd5d4960..c652f8ba 100644
--- a/src/lib/openjp2/tcd.c
+++ b/src/lib/openjp2/tcd.c
@@ -807,11 +807,11 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
         }
 
         if (isEncoder) {
-            size_t l_tile_data_size;
+            OPJ_SIZE_T l_tile_data_size;
 
             /* compute l_data_size with overflow check */
-            size_t w = (size_t)(l_tilec->x1 - l_tilec->x0);
-            size_t h = (size_t)(l_tilec->y1 - l_tilec->y0);
+            OPJ_SIZE_T w = (OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0);
+            OPJ_SIZE_T h = (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0);
 
             /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */
             if (h > 0 && w > SIZE_MAX / h) {
@@ -1366,7 +1366,8 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd,
                 p_cstr_info->tile[p_tile_no].pdy[i] = (int)l_tccp->prch[i];
             }
             p_cstr_info->tile[p_tile_no].packet = (opj_packet_info_t*) opj_calloc((
-                    size_t)p_cstr_info->numcomps * (size_t)p_cstr_info->numlayers * l_num_packs,
+                    OPJ_SIZE_T)p_cstr_info->numcomps * (OPJ_SIZE_T)p_cstr_info->numlayers *
+                                                  l_num_packs,
                                                   sizeof(opj_packet_info_t));
             if (!p_cstr_info->tile[p_tile_no].packet) {
                 /* FIXME event manager error callback */
@@ -1462,11 +1463,11 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd,
             opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]);
             opj_tcd_resolution_t *l_res = &
                                           (tilec->resolutions[tilec->minimum_num_resolutions - 1]);
-            size_t l_data_size;
+            OPJ_SIZE_T l_data_size;
 
             /* compute l_data_size with overflow check */
-            size_t res_w = (size_t)(l_res->x1 - l_res->x0);
-            size_t res_h = (size_t)(l_res->y1 - l_res->y0);
+            OPJ_SIZE_T res_w = (OPJ_SIZE_T)(l_res->x1 - l_res->x0);
+            OPJ_SIZE_T res_h = (OPJ_SIZE_T)(l_res->y1 - l_res->y0);
 
             /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */
             if (res_h > 0 && res_w > SIZE_MAX / res_h) {
@@ -1577,9 +1578,9 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd,
             opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]);
             opj_image_comp_t* image_comp = &(p_tcd->image->comps[compno]);
             opj_tcd_resolution_t *res = tilec->resolutions + image_comp->resno_decoded;
-            size_t w = res->win_x1 - res->win_x0;
-            size_t h = res->win_y1 - res->win_y0;
-            size_t l_data_size;
+            OPJ_SIZE_T w = res->win_x1 - res->win_x0;
+            OPJ_SIZE_T h = res->win_y1 - res->win_y0;
+            OPJ_SIZE_T l_data_size;
 
             opj_image_data_free(tilec->data_win);
             tilec->data_win = NULL;
@@ -1980,17 +1981,17 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager)
                                           p_tcd->image->comps[1].resno_decoded;
         opj_tcd_resolution_t* res_comp2 = l_tile->comps[2].resolutions +
                                           p_tcd->image->comps[2].resno_decoded;
-        size_t l_res_samples = (size_t)(res_comp0->x1 - res_comp0->x0) *
-                               (size_t)(res_comp0->y1 - res_comp0->y0);
+        OPJ_SIZE_T l_res_samples = (OPJ_SIZE_T)(res_comp0->x1 - res_comp0->x0) *
+                                   (OPJ_SIZE_T)(res_comp0->y1 - res_comp0->y0);
         /* testcase 1336.pdf.asan.47.376 */
         if (p_tcd->image->comps[0].resno_decoded !=
                 p_tcd->image->comps[1].resno_decoded ||
                 p_tcd->image->comps[0].resno_decoded !=
                 p_tcd->image->comps[2].resno_decoded ||
-                (size_t)(res_comp1->x1 - res_comp1->x0) *
-                (size_t)(res_comp1->y1 - res_comp1->y0) != l_res_samples ||
-                (size_t)(res_comp2->x1 - res_comp2->x0) *
-                (size_t)(res_comp2->y1 - res_comp2->y0) != l_res_samples) {
+                (OPJ_SIZE_T)(res_comp1->x1 - res_comp1->x0) *
+                (OPJ_SIZE_T)(res_comp1->y1 - res_comp1->y0) != l_res_samples ||
+                (OPJ_SIZE_T)(res_comp2->x1 - res_comp2->x0) *
+                (OPJ_SIZE_T)(res_comp2->y1 - res_comp2->y0) != l_res_samples) {
             opj_event_msg(p_manager, EVT_ERROR,
                           "Tiles don't all have the same dimension. Skip the MCT step.\n");
             return OPJ_FALSE;
@@ -2271,7 +2272,7 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd)
     opj_tccp_t * l_tccp = 00;
     opj_image_comp_t * l_img_comp = 00;
     opj_tcd_tile_t * l_tile;
-    size_t l_nb_elem, i;
+    OPJ_SIZE_T l_nb_elem, i;
     OPJ_INT32 * l_current_ptr;
 
     l_tile = p_tcd->tcd_image->tiles;
@@ -2281,8 +2282,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd)
 
     for (compno = 0; compno < l_tile->numcomps; compno++) {
         l_current_ptr = l_tile_comp->data;
-        l_nb_elem = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) *
-                    (size_t)(l_tile_comp->y1 - l_tile_comp->y0);
+        l_nb_elem = (OPJ_SIZE_T)(l_tile_comp->x1 - l_tile_comp->x0) *
+                    (OPJ_SIZE_T)(l_tile_comp->y1 - l_tile_comp->y0);
 
         if (l_tccp->qmfbid == 1) {
             for (i = 0; i < l_nb_elem; ++i) {
@@ -2308,8 +2309,8 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd)
 {
     opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles;
     opj_tcd_tilecomp_t * l_tile_comp = p_tcd->tcd_image->tiles->comps;
-    size_t samples = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) *
-                     (size_t)(l_tile_comp->y1 - l_tile_comp->y0);
+    OPJ_SIZE_T samples = (OPJ_SIZE_T)(l_tile_comp->x1 - l_tile_comp->x0) *
+                         (OPJ_SIZE_T)(l_tile_comp->y1 - l_tile_comp->y0);
     OPJ_UINT32 i;
     OPJ_BYTE ** l_data = 00;
     opj_tcp_t * l_tcp = p_tcd->tcp;
@@ -2511,8 +2512,8 @@ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd,
     for (i = 0; i < p_tcd->image->numcomps; ++i) {
         l_size_comp = l_img_comp->prec >> 3; /*(/ 8)*/
         l_remaining = l_img_comp->prec & 7;  /* (%8) */
-        l_nb_elem = (size_t)(l_tilec->x1 - l_tilec->x0) *
-                    (size_t)(l_tilec->y1 - l_tilec->y0);
+        l_nb_elem = (OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0) *
+                    (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0);
 
         if (l_remaining) {
             ++l_size_comp;
-- 
cgit v1.2.3