From f9e9942330f476b66ac4a35d0ae521200878f343 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:29 +0200 Subject: Sub-tile decoding: only allocate tile component buffer of the needed dimension Instead of being the full tile size. * Use a sparse array mechanism to store code-blocks and intermediate stages of IDWT. * IDWT, DC level shift and MCT stages are done just on that smaller array. * Improve copy of tile component array to final image, by saving an intermediate buffer. * For full-tile decoding at reduced resolution, only allocate the tile buffer to the reduced size, instead of the full-resolution size. --- src/lib/openjp2/sparse_array.c | 233 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 src/lib/openjp2/sparse_array.c (limited to 'src/lib/openjp2/sparse_array.c') diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c new file mode 100644 index 00000000..fb552f8b --- /dev/null +++ b/src/lib/openjp2/sparse_array.c @@ -0,0 +1,233 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2017, IntoPix SA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opj_includes.h" + + +struct opj_sparse_array_int32 { + OPJ_UINT32 width; + OPJ_UINT32 height; + OPJ_UINT32 block_width; + OPJ_UINT32 block_height; + OPJ_UINT32 block_count_hor; + OPJ_UINT32 block_count_ver; + OPJ_INT32** data_blocks; +}; + +opj_sparse_array_int32_t* opj_sparse_array_int32_create(OPJ_UINT32 width, + OPJ_UINT32 height, + OPJ_UINT32 block_width, + OPJ_UINT32 block_height) +{ + opj_sparse_array_int32_t* sa; + + if (width == 0 || height == 0 || block_width == 0 || block_height == 0) { + return NULL; + } + if (block_width > ((OPJ_UINT32)~0U) / block_height / sizeof(OPJ_INT32)) { + return NULL; + } + + sa = opj_calloc(1, sizeof(opj_sparse_array_int32_t)); + sa->width = width; + sa->height = height; + sa->block_width = block_width; + sa->block_height = block_height; + sa->block_count_hor = opj_uint_ceildiv(width, block_width); + sa->block_count_ver = opj_uint_ceildiv(height, block_height); + if (sa->block_count_hor > ((OPJ_UINT32)~0U) / sa->block_count_ver) { + opj_free(sa); + return NULL; + } + sa->data_blocks = opj_calloc(sizeof(OPJ_INT32*), + sa->block_count_hor * sa->block_count_ver); + if (sa->data_blocks == NULL) { + opj_free(sa); + return NULL; + } + + return sa; +} + +void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa) +{ + if (sa) { + OPJ_UINT32 i; + for (i = 0; i < sa->block_count_hor * sa->block_count_ver; i++) { + if (sa->data_blocks[i]) { + opj_free(sa->data_blocks[i]); + } + } + opj_free(sa->data_blocks); + opj_free(sa); + } +} + +OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1) +{ + return !(x0 >= sa->width || x1 <= x0 || x1 > sa->width || + y0 >= sa->height || y1 <= y0 || y1 > sa->height); +} + +static OPJ_BOOL opj_sparse_array_int32_read_or_write( + opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1, + OPJ_INT32* buf, + OPJ_UINT32 buf_col_stride, + OPJ_UINT32 buf_line_stride, + OPJ_BOOL forgiving, + OPJ_BOOL is_read_op) +{ + OPJ_UINT32 y, block_y; + OPJ_UINT32 y_incr = 0; + if (!opj_sparse_array_is_region_valid(sa, x0, y0, x1, y1)) { + return forgiving; + } + + block_y = y0 / sa->block_height; + for (y = y0; y < y1; block_y ++, y += y_incr) { + OPJ_UINT32 x, block_x; + OPJ_UINT32 x_incr = 0; + OPJ_UINT32 block_y_offset; + y_incr = (y == y0) ? sa->block_height - (y0 % sa->block_height) : + sa->block_height; + block_y_offset = sa->block_height - y_incr; + y_incr = opj_uint_min(y_incr, y1 - y); + block_x = x0 / sa->block_width; + for (x = x0; x < x1; block_x ++, x += x_incr) { + OPJ_UINT32 j; + OPJ_UINT32 block_x_offset; + OPJ_INT32* src_block; + x_incr = (x == x0) ? sa->block_width - (x0 % sa->block_width) : sa->block_width; + block_x_offset = sa->block_width - x_incr; + x_incr = opj_uint_min(x_incr, x1 - x); + src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x]; + if (is_read_op) { + if (src_block == NULL) { + for (j = 0; j < y_incr; j++) { + if (buf_col_stride == 1) { + memset(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, + 0, + sizeof(OPJ_INT32) * x_incr); + } else { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) = 0; + } + } + } + } else { + for (j = 0; j < y_incr; j++) { + if (buf_col_stride == 1) { + memcpy(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, + src_block + (block_y_offset + j) * sa->block_width + block_x_offset, + sizeof(OPJ_INT32) * x_incr); + } else { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) = + *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k); + } + } + } + } + } else { + if (src_block == NULL) { + src_block = opj_calloc(1, + sa->block_width * sa->block_height * sizeof(OPJ_INT32)); + if (src_block == NULL) { + return OPJ_FALSE; + } + sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block; + } + + for (j = 0; j < y_incr; j++) { + if (buf_col_stride == 1) { + memcpy(src_block + (block_y_offset + j) * sa->block_width + block_x_offset, + buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, + sizeof(OPJ_INT32) * x_incr); + } else { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k) = + *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride); + } + } + } + } + } + } + + return OPJ_TRUE; +} + +OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1, + OPJ_INT32* dest, + OPJ_UINT32 dest_col_stride, + OPJ_UINT32 dest_line_stride, + OPJ_BOOL forgiving) +{ + return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1, + dest, + dest_col_stride, + dest_line_stride, + forgiving, + OPJ_TRUE); +} + +OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1, + const OPJ_INT32* src, + OPJ_UINT32 src_col_stride, + OPJ_UINT32 src_line_stride, + OPJ_BOOL forgiving) +{ + return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1, + (OPJ_INT32*)src, + src_col_stride, + src_line_stride, + forgiving, + OPJ_FALSE); +} -- cgit v1.2.3 From 98b93103613c90753fb6c57696f9403f8ea0b1d6 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:44 +0200 Subject: Various changes to allow tile buffers of more than 4giga pixels Untested though, since that means a tile buffer of at least 16 GB. So there might be places where uint32 overflow on multiplication still occur... --- src/lib/openjp2/dwt.c | 94 ++++++++++++++++++++++-------------------- src/lib/openjp2/j2k.c | 2 +- src/lib/openjp2/mct.c | 22 +++++----- src/lib/openjp2/mct.h | 12 +++--- src/lib/openjp2/sparse_array.c | 25 ++++++----- src/lib/openjp2/t1.c | 23 ++++++----- src/lib/openjp2/tcd.c | 43 ++++++++++--------- src/lib/openjp2/tcd.h | 4 +- 8 files changed, 119 insertions(+), 106 deletions(-) (limited to 'src/lib/openjp2/sparse_array.c') diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 81a8150a..d0cd3b4e 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -279,7 +279,7 @@ static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, l_src += 2; } /* b[i*x]=a[2*i+cas]; */ - l_dest = b + sn * x; + l_dest = b + (size_t)sn * (size_t)x; l_src = a + 1 - cas; i = dn; @@ -325,7 +325,7 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) bi += 2; ai += x; } - ai = a + (v->sn * x); + ai = a + (v->sn * (size_t)x); bi = v->mem + 1 - v->cas; i = v->dn ; while (i--) { @@ -616,7 +616,7 @@ static void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col, const OPJ_INT32* tmp, OPJ_INT32 len, - OPJ_INT32 stride) + size_t stride) { OPJ_INT32 i; for (i = 0; i < len; ++i) { @@ -625,9 +625,9 @@ void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col, PARALLEL_COLS_53 * sizeof(OPJ_INT32)) would do but would be a tiny bit slower. We can take here advantage of our knowledge of alignment */ - STOREU(&tiledp_col[i * stride + 0], + STOREU(&tiledp_col[(size_t)i * stride + 0], LOAD(&tmp[PARALLEL_COLS_53 * i + 0])); - STOREU(&tiledp_col[i * stride + VREG_INT_COUNT], + STOREU(&tiledp_col[(size_t)i * stride + VREG_INT_COUNT], LOAD(&tmp[PARALLEL_COLS_53 * i + VREG_INT_COUNT])); } } @@ -639,12 +639,13 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const OPJ_INT32 stride) + const size_t stride) { const OPJ_INT32* in_even = &tiledp_col[0]; - const OPJ_INT32* in_odd = &tiledp_col[sn * stride]; + const OPJ_INT32* in_odd = &tiledp_col[(size_t)sn * stride]; - OPJ_INT32 i, j; + OPJ_INT32 i; + size_t j; VREG d1c_0, d1n_0, s1n_0, s0c_0, s0n_0; VREG d1c_1, d1n_1, s1n_1, s0c_1, s0n_1; const VREG two = LOAD_CST(2); @@ -703,7 +704,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( if (len & 1) { VREG tmp_len_minus_1; - s1n_0 = LOADU(in_even + ((len - 1) / 2) * stride); + s1n_0 = LOADU(in_even + (size_t)((len - 1) / 2) * stride); /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */ tmp_len_minus_1 = SUB(s1n_0, SAR(ADD3(d1n_0, d1n_0, two), 2)); STORE(tmp + PARALLEL_COLS_53 * (len - 1), tmp_len_minus_1); @@ -711,7 +712,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( STORE(tmp + PARALLEL_COLS_53 * (len - 2), ADD(d1n_0, SAR(ADD(s0n_0, tmp_len_minus_1), 1))); - s1n_1 = LOADU(in_even + ((len - 1) / 2) * stride + VREG_INT_COUNT); + s1n_1 = LOADU(in_even + (size_t)((len - 1) / 2) * stride + VREG_INT_COUNT); /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */ tmp_len_minus_1 = SUB(s1n_1, SAR(ADD3(d1n_1, d1n_1, two), 2)); STORE(tmp + PARALLEL_COLS_53 * (len - 1) + VREG_INT_COUNT, @@ -739,15 +740,16 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const OPJ_INT32 stride) + const size_t stride) { - OPJ_INT32 i, j; + OPJ_INT32 i; + size_t j; VREG s1_0, s2_0, dc_0, dn_0; VREG s1_1, s2_1, dc_1, dn_1; const VREG two = LOAD_CST(2); - const OPJ_INT32* in_even = &tiledp_col[sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; assert(len > 2); @@ -807,9 +809,9 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( if (!(len & 1)) { /*dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1); */ - dn_0 = SUB(LOADU(in_odd + (len / 2 - 1) * stride), + dn_0 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride), SAR(ADD3(s1_0, s1_0, two), 2)); - dn_1 = SUB(LOADU(in_odd + (len / 2 - 1) * stride + VREG_INT_COUNT), + dn_1 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride + VREG_INT_COUNT), SAR(ADD3(s1_1, s1_1, two), 2)); /* tmp[len - 2] = s1 + ((dn + dc) >> 1); */ @@ -849,7 +851,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const OPJ_INT32 stride) + const size_t stride) { OPJ_INT32 i, j; OPJ_INT32 d1c, d1n, s1n, s0c, s0n; @@ -860,15 +862,15 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, /* accesses and explicit interleaving. */ s1n = tiledp_col[0]; - d1n = tiledp_col[sn * stride]; + d1n = tiledp_col[(size_t)sn * stride]; s0n = s1n - ((d1n + 1) >> 1); for (i = 0, j = 0; i < (len - 3); i += 2, j++) { d1c = d1n; s0c = s0n; - s1n = tiledp_col[(j + 1) * stride]; - d1n = tiledp_col[(sn + j + 1) * stride]; + s1n = tiledp_col[(size_t)(j + 1) * stride]; + d1n = tiledp_col[(size_t)(sn + j + 1) * stride]; s0n = s1n - ((d1c + d1n + 2) >> 2); @@ -880,7 +882,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, if (len & 1) { tmp[len - 1] = - tiledp_col[((len - 1) / 2) * stride] - + tiledp_col[(size_t)((len - 1) / 2) * stride] - ((d1n + 1) >> 1); tmp[len - 2] = d1n + ((s0n + tmp[len - 1]) >> 1); } else { @@ -888,7 +890,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, } for (i = 0; i < len; ++i) { - tiledp_col[i * stride] = tmp[i]; + tiledp_col[(size_t)i * stride] = tmp[i]; } } @@ -899,11 +901,11 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const OPJ_INT32 stride) + const size_t stride) { OPJ_INT32 i, j; OPJ_INT32 s1, s2, dc, dn; - const OPJ_INT32* in_even = &tiledp_col[sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; assert(len > 2); @@ -916,9 +918,9 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, tmp[0] = in_even[0] + dc; for (i = 1, j = 1; i < (len - 2 - !(len & 1)); i += 2, j++) { - s2 = in_even[(j + 1) * stride]; + s2 = in_even[(size_t)(j + 1) * stride]; - dn = in_odd[j * stride] - ((s1 + s2 + 2) >> 2); + dn = in_odd[(size_t)j * stride] - ((s1 + s2 + 2) >> 2); tmp[i ] = dc; tmp[i + 1] = s1 + ((dn + dc) >> 1); @@ -927,7 +929,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, } tmp[i] = dc; if (!(len & 1)) { - dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1); + dn = in_odd[(size_t)(len / 2 - 1) * stride] - ((s1 + 1) >> 1); tmp[len - 2] = s1 + ((dn + dc) >> 1); tmp[len - 1] = dn; } else { @@ -935,7 +937,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, } for (i = 0; i < len; ++i) { - tiledp_col[i * stride] = tmp[i]; + tiledp_col[(size_t)i * stride] = tmp[i]; } } #endif /* !defined(STANDARD_SLOW_VERSION) */ @@ -946,7 +948,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, /* Performs interleave, inverse wavelet transform and copy back to buffer */ static void opj_idwt53_v(const opj_dwt_t *dwt, OPJ_INT32* tiledp_col, - OPJ_INT32 stride, + size_t stride, OPJ_INT32 nb_cols) { #ifdef STANDARD_SLOW_VERSION @@ -994,14 +996,14 @@ static void opj_idwt53_v(const opj_dwt_t *dwt, OPJ_INT32* out = dwt->mem; for (c = 0; c < nb_cols; c++, tiledp_col++) { OPJ_INT32 i; - const OPJ_INT32* in_even = &tiledp_col[sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; out[1] = in_odd[0] - ((in_even[0] + 1) >> 1); out[0] = in_even[0] + out[1]; for (i = 0; i < len; ++i) { - tiledp_col[i * stride] = out[i]; + tiledp_col[(size_t)i * stride] = out[i]; } } @@ -1341,11 +1343,11 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) job = (opj_dwd_decode_v_job_t*)user_data; for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j; j += PARALLEL_COLS_53) { - opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w, + opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w, PARALLEL_COLS_53); } if (j < job->max_j) - opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w, + opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w, (OPJ_INT32)(job->max_j - j)); opj_aligned_free(job->v.mem); @@ -1413,7 +1415,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, if (num_threads <= 1 || rh <= 1) { for (j = 0; j < rh; ++j) { - opj_idwt53_h(&h, &tiledp[j * w]); + opj_idwt53_h(&h, &tiledp[(size_t)j * w]); } } else { OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; @@ -1465,10 +1467,10 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, if (num_threads <= 1 || rw <= 1) { for (j = 0; j + PARALLEL_COLS_53 <= rw; j += PARALLEL_COLS_53) { - opj_idwt53_v(&v, &tiledp[j], (OPJ_INT32)w, PARALLEL_COLS_53); + opj_idwt53_v(&v, &tiledp[j], (size_t)w, PARALLEL_COLS_53); } if (j < rw) { - opj_idwt53_v(&v, &tiledp[j], (OPJ_INT32)w, (OPJ_INT32)(rw - j)); + opj_idwt53_v(&v, &tiledp[j], (size_t)w, (OPJ_INT32)(rw - j)); } } else { OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; @@ -2035,14 +2037,16 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, OPJ_UINT32 i; for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) { - memcpy(&bi[i * 2], &a[i * width], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); + memcpy(&bi[i * 2], &a[i * (size_t)width], + (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); } - a += (OPJ_UINT32)dwt->sn * width; + a += (OPJ_UINT32)dwt->sn * (size_t)width; bi = dwt->wavelet + 1 - dwt->cas; for (i = dwt->win_h_x0; i < dwt->win_h_x1; ++i) { - memcpy(&bi[i * 2], &a[i * width], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); + memcpy(&bi[i * 2], &a[i * (size_t)width], + (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); } } @@ -2325,9 +2329,9 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, for (k = 0; k < rw; k++) { aj[k ] = h.wavelet[k].f[0]; - aj[k + w ] = h.wavelet[k].f[1]; - aj[k + w * 2] = h.wavelet[k].f[2]; - aj[k + w * 3] = h.wavelet[k].f[3]; + aj[k + (size_t)w ] = h.wavelet[k].f[1]; + aj[k + (size_t)w * 2] = h.wavelet[k].f[2]; + aj[k + (size_t)w * 3] = h.wavelet[k].f[3]; } aj += w * 4; @@ -2340,10 +2344,10 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, for (k = 0; k < rw; k++) { switch (rh - j) { case 3: - aj[k + w * 2] = h.wavelet[k].f[2]; + aj[k + (size_t)w * 2] = h.wavelet[k].f[2]; /* FALLTHRU */ case 2: - aj[k + w ] = h.wavelet[k].f[1]; + aj[k + (size_t)w ] = h.wavelet[k].f[1]; /* FALLTHRU */ case 1: aj[k] = h.wavelet[k].f[0]; @@ -2366,7 +2370,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, opj_v4dwt_decode(&v); for (k = 0; k < rh; ++k) { - memcpy(&aj[k * w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); + memcpy(&aj[k * (size_t)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); } aj += 4; } @@ -2380,7 +2384,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, opj_v4dwt_decode(&v); for (k = 0; k < rh; ++k) { - memcpy(&aj[k * w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32)); + memcpy(&aj[k * (size_t)w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32)); } } } diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c index 400fcaa4..aef0c1c0 100644 --- a/src/lib/openjp2/j2k.c +++ b/src/lib/openjp2/j2k.c @@ -10923,7 +10923,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k, { OPJ_UINT32 i, j; OPJ_UINT32 l_nb_tiles; - OPJ_UINT32 l_max_tile_size = 0, l_current_tile_size; + OPJ_SIZE_T l_max_tile_size = 0, l_current_tile_size; OPJ_BYTE * l_current_data = 00; OPJ_BOOL l_reuse_data = OPJ_FALSE; opj_tcd_t* p_tcd = 00; diff --git a/src/lib/openjp2/mct.c b/src/lib/openjp2/mct.c index 8c82ee20..b79d4b87 100644 --- a/src/lib/openjp2/mct.c +++ b/src/lib/openjp2/mct.c @@ -77,7 +77,7 @@ void opj_mct_encode( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_SIZE_T i; const OPJ_SIZE_T len = n; @@ -119,7 +119,7 @@ void opj_mct_encode( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_SIZE_T i; const OPJ_SIZE_T len = n; @@ -146,7 +146,7 @@ void opj_mct_decode( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_SIZE_T i; const OPJ_SIZE_T len = n; @@ -181,7 +181,7 @@ void opj_mct_decode( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_UINT32 i; for (i = 0; i < n; ++i) { @@ -214,7 +214,7 @@ void opj_mct_encode_real( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_SIZE_T i; const OPJ_SIZE_T len = n; @@ -359,7 +359,7 @@ void opj_mct_encode_real( OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, OPJ_INT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_UINT32 i; for (i = 0; i < n; ++i) { @@ -386,7 +386,7 @@ void opj_mct_decode_real( OPJ_FLOAT32* OPJ_RESTRICT c0, OPJ_FLOAT32* OPJ_RESTRICT c1, OPJ_FLOAT32* OPJ_RESTRICT c2, - OPJ_UINT32 n) + OPJ_SIZE_T n) { OPJ_UINT32 i; #ifdef __SSE__ @@ -451,13 +451,13 @@ OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno) OPJ_BOOL opj_mct_encode_custom( OPJ_BYTE * pCodingdata, - OPJ_UINT32 n, + OPJ_SIZE_T n, OPJ_BYTE ** pData, OPJ_UINT32 pNbComp, OPJ_UINT32 isSigned) { OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata; - OPJ_UINT32 i; + OPJ_SIZE_T i; OPJ_UINT32 j; OPJ_UINT32 k; OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp; @@ -505,13 +505,13 @@ OPJ_BOOL opj_mct_encode_custom( OPJ_BOOL opj_mct_decode_custom( OPJ_BYTE * pDecodingData, - OPJ_UINT32 n, + OPJ_SIZE_T n, OPJ_BYTE ** pData, OPJ_UINT32 pNbComp, OPJ_UINT32 isSigned) { OPJ_FLOAT32 * lMct; - OPJ_UINT32 i; + OPJ_SIZE_T i; OPJ_UINT32 j; OPJ_UINT32 k; diff --git a/src/lib/openjp2/mct.h b/src/lib/openjp2/mct.h index 0ed980e8..2e37ce73 100644 --- a/src/lib/openjp2/mct.h +++ b/src/lib/openjp2/mct.h @@ -61,7 +61,7 @@ Apply a reversible multi-component transform to an image @param n Number of samples for each component */ void opj_mct_encode(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n); + OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); /** Apply a reversible multi-component inverse transform to an image @param c0 Samples for luminance component @@ -70,7 +70,7 @@ Apply a reversible multi-component inverse transform to an image @param n Number of samples for each component */ void opj_mct_decode(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n); + OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); /** Get norm of the basis function used for the reversible multi-component transform @param compno Number of the component (0->Y, 1->U, 2->V) @@ -86,7 +86,7 @@ Apply an irreversible multi-component transform to an image @param n Number of samples for each component */ void opj_mct_encode_real(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, - OPJ_INT32* OPJ_RESTRICT c2, OPJ_UINT32 n); + OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); /** Apply an irreversible multi-component inverse transform to an image @param c0 Samples for luminance component @@ -95,7 +95,7 @@ Apply an irreversible multi-component inverse transform to an image @param n Number of samples for each component */ void opj_mct_decode_real(OPJ_FLOAT32* OPJ_RESTRICT c0, - OPJ_FLOAT32* OPJ_RESTRICT c1, OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_UINT32 n); + OPJ_FLOAT32* OPJ_RESTRICT c1, OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); /** Get norm of the basis function used for the irreversible multi-component transform @param compno Number of the component (0->Y, 1->U, 2->V) @@ -114,7 +114,7 @@ FIXME DOC */ OPJ_BOOL opj_mct_encode_custom( OPJ_BYTE * p_coding_data, - OPJ_UINT32 n, + OPJ_SIZE_T n, OPJ_BYTE ** p_data, OPJ_UINT32 p_nb_comp, OPJ_UINT32 is_signed); @@ -129,7 +129,7 @@ FIXME DOC */ OPJ_BOOL opj_mct_decode_custom( OPJ_BYTE * pDecodingData, - OPJ_UINT32 n, + OPJ_SIZE_T n, OPJ_BYTE ** pData, OPJ_UINT32 pNbComp, OPJ_UINT32 isSigned); diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index fb552f8b..3402dca2 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -141,27 +141,30 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( if (src_block == NULL) { for (j = 0; j < y_incr; j++) { if (buf_col_stride == 1) { - memset(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, + memset(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, 0, sizeof(OPJ_INT32) * x_incr); } else { OPJ_UINT32 k; for (k = 0; k < x_incr; k++) { - *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) = 0; + *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride) + = 0; } } } } else { for (j = 0; j < y_incr; j++) { if (buf_col_stride == 1) { - memcpy(buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, - src_block + (block_y_offset + j) * sa->block_width + block_x_offset, + memcpy(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, + src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset, sizeof(OPJ_INT32) * x_incr); } else { OPJ_UINT32 k; for (k = 0; k < x_incr; k++) { - *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride) = - *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k); + *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride) + = + *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset + + k); } } } @@ -178,14 +181,16 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( for (j = 0; j < y_incr; j++) { if (buf_col_stride == 1) { - memcpy(src_block + (block_y_offset + j) * sa->block_width + block_x_offset, - buf + (y - y0 + j) * buf_line_stride + (x - x0) * buf_col_stride, + memcpy(src_block + (block_y_offset + j) * (size_t)sa->block_width + + block_x_offset, + buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, sizeof(OPJ_INT32) * x_incr); } else { OPJ_UINT32 k; for (k = 0; k < x_incr; k++) { - *(src_block + (block_y_offset + j) * sa->block_width + block_x_offset + k) = - *(buf + (y - y0 + j) * buf_line_stride + (x - x0 + k) * buf_col_stride); + *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset + + k) = + *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride); } } } diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 953c7ab1..44a2f243 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1696,8 +1696,8 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) } } } else if (tccp->qmfbid == 1) { - OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + - (OPJ_UINT32)x]; + OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w + + (size_t)x]; for (j = 0; j < cblk_h; ++j) { i = 0; for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) { @@ -1705,19 +1705,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U]; OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U]; OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U]; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 0U] = tmp0 / 2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 1U] = tmp1 / 2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 2U] = tmp2 / 2; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 3U] = tmp3 / 2; + ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 0U] = tmp0 / 2; + ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 1U] = tmp1 / 2; + ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 2U] = tmp2 / 2; + ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 3U] = tmp3 / 2; } for (; i < cblk_w; ++i) { OPJ_INT32 tmp = datap[(j * cblk_w) + i]; - ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp / 2; + ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i] = tmp / 2; } } } else { /* if (tccp->qmfbid == 0) */ - OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * - tile_w + (OPJ_UINT32)x]; + OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(size_t)y * + tile_w + (size_t)x]; for (j = 0; j < cblk_h; ++j) { OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp; for (i = 0; i < cblk_w; ++i) { @@ -2052,7 +2052,8 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, OPJ_INT32* OPJ_RESTRICT tiledp; OPJ_UINT32 cblk_w; OPJ_UINT32 cblk_h; - OPJ_UINT32 i, j, tileIndex = 0, tileLineAdvance; + OPJ_UINT32 i, j, tileLineAdvance; + size_t tileIndex = 0; OPJ_INT32 x = cblk->x0 - band->x0; OPJ_INT32 y = cblk->y0 - band->y0; @@ -2076,7 +2077,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, cblk_h = t1->h; tileLineAdvance = tile_w - cblk_w; - tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x]; + tiledp = &tilec->data[(size_t)y * tile_w + (size_t)x]; t1->data = tiledp; t1->data_stride = tile_w; if (tccp->qmfbid == 1) { diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index 1213f757..485eb9e2 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -1974,17 +1974,17 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) p_tcd->image->comps[1].resno_decoded; opj_tcd_resolution_t* res_comp2 = l_tile->comps[2].resolutions + p_tcd->image->comps[2].resno_decoded; - OPJ_INT32 l_res_samples = (OPJ_INT32)(res_comp0->x1 - res_comp0->x0) * - (res_comp0->y1 - res_comp0->y0); + size_t l_res_samples = (size_t)(res_comp0->x1 - res_comp0->x0) * + (size_t)(res_comp0->y1 - res_comp0->y0); /* testcase 1336.pdf.asan.47.376 */ if (p_tcd->image->comps[0].resno_decoded != p_tcd->image->comps[1].resno_decoded || p_tcd->image->comps[0].resno_decoded != p_tcd->image->comps[2].resno_decoded || - (res_comp1->x1 - res_comp1->x0) * (res_comp1->y1 - - res_comp1->y0) != l_res_samples || - (res_comp2->x1 - res_comp2->x0) * (res_comp2->y1 - - res_comp2->y0) != l_res_samples) { + (size_t)(res_comp1->x1 - res_comp1->x0) * + (size_t)(res_comp1->y1 - res_comp1->y0) != l_res_samples || + (size_t)(res_comp2->x1 - res_comp2->x0) * + (size_t)(res_comp2->y1 - res_comp2->y0) != l_res_samples) { opj_event_msg(p_manager, EVT_ERROR, "Tiles don't all have the same dimension. Skip the MCT step.\n"); return OPJ_FALSE; @@ -2225,9 +2225,10 @@ static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct) } } -OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd) +OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd) { - OPJ_UINT32 i, l_data_size = 0; + OPJ_UINT32 i; + OPJ_SIZE_T l_data_size = 0; opj_image_comp_t * l_img_comp = 00; opj_tcd_tilecomp_t * l_tilec = 00; OPJ_UINT32 l_size_comp, l_remaining; @@ -2246,8 +2247,8 @@ OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd) l_size_comp = 4; } - l_data_size += l_size_comp * (OPJ_UINT32)((l_tilec->x1 - l_tilec->x0) * - (l_tilec->y1 - l_tilec->y0)); + l_data_size += l_size_comp * ((OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0) * + (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0)); ++l_img_comp; ++l_tilec; } @@ -2262,7 +2263,7 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd) opj_tccp_t * l_tccp = 00; opj_image_comp_t * l_img_comp = 00; opj_tcd_tile_t * l_tile; - OPJ_UINT32 l_nb_elem, i; + size_t l_nb_elem, i; OPJ_INT32 * l_current_ptr; l_tile = p_tcd->tcd_image->tiles; @@ -2272,8 +2273,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd) for (compno = 0; compno < l_tile->numcomps; compno++) { l_current_ptr = l_tile_comp->data; - l_nb_elem = (OPJ_UINT32)((l_tile_comp->x1 - l_tile_comp->x0) * - (l_tile_comp->y1 - l_tile_comp->y0)); + l_nb_elem = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) * + (size_t)(l_tile_comp->y1 - l_tile_comp->y0); if (l_tccp->qmfbid == 1) { for (i = 0; i < l_nb_elem; ++i) { @@ -2299,8 +2300,8 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd) { opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles; opj_tcd_tilecomp_t * l_tile_comp = p_tcd->tcd_image->tiles->comps; - OPJ_UINT32 samples = (OPJ_UINT32)((l_tile_comp->x1 - l_tile_comp->x0) * - (l_tile_comp->y1 - l_tile_comp->y0)); + size_t samples = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) * + (size_t)(l_tile_comp->y1 - l_tile_comp->y0); OPJ_UINT32 i; OPJ_BYTE ** l_data = 00; opj_tcp_t * l_tcp = p_tcd->tcp; @@ -2482,13 +2483,15 @@ static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd, OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, OPJ_BYTE * p_src, - OPJ_UINT32 p_src_length) + OPJ_SIZE_T p_src_length) { - OPJ_UINT32 i, j, l_data_size = 0; + OPJ_UINT32 i; + OPJ_SIZE_T j; + OPJ_SIZE_T l_data_size = 0; opj_image_comp_t * l_img_comp = 00; opj_tcd_tilecomp_t * l_tilec = 00; OPJ_UINT32 l_size_comp, l_remaining; - OPJ_UINT32 l_nb_elem; + OPJ_SIZE_T l_nb_elem; l_data_size = opj_tcd_get_encoded_tile_size(p_tcd); if (l_data_size != p_src_length) { @@ -2500,8 +2503,8 @@ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, for (i = 0; i < p_tcd->image->numcomps; ++i) { l_size_comp = l_img_comp->prec >> 3; /*(/ 8)*/ l_remaining = l_img_comp->prec & 7; /* (%8) */ - l_nb_elem = (OPJ_UINT32)((l_tilec->x1 - l_tilec->x0) * (l_tilec->y1 - - l_tilec->y0)); + l_nb_elem = (size_t)(l_tilec->x1 - l_tilec->x0) * + (size_t)(l_tilec->y1 - l_tilec->y0); if (l_remaining) { ++l_size_comp; diff --git a/src/lib/openjp2/tcd.h b/src/lib/openjp2/tcd.h index 7c974c53..63c22c45 100644 --- a/src/lib/openjp2/tcd.h +++ b/src/lib/openjp2/tcd.h @@ -409,7 +409,7 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd, /** * */ -OPJ_UINT32 opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd); +OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd); /** * Initialize the tile coder and may reuse some meory. @@ -428,7 +428,7 @@ OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd, */ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, OPJ_BYTE * p_src, - OPJ_UINT32 p_src_length); + OPJ_SIZE_T p_src_length); /** * Allocates tile component data -- cgit v1.2.3 From b2cc8f7f81242f967b65e76de043e5e31663d793 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:50 +0200 Subject: Optimize reading/write into sparse array --- src/lib/openjp2/sparse_array.c | 126 ++++++++++++++++++++++++------------ src/lib/openjp2/sparse_array.h | 4 +- src/lib/openjp2/test_sparse_array.c | 26 +++++++- 3 files changed, 111 insertions(+), 45 deletions(-) (limited to 'src/lib/openjp2/sparse_array.c') diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index 3402dca2..b0634f67 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -91,7 +91,7 @@ void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa) } } -OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -102,7 +102,7 @@ OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, } static OPJ_BOOL opj_sparse_array_int32_read_or_write( - opj_sparse_array_int32_t* sa, + const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -115,6 +115,8 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( { OPJ_UINT32 y, block_y; OPJ_UINT32 y_incr = 0; + const OPJ_UINT32 block_width = sa->block_width; + if (!opj_sparse_array_is_region_valid(sa, x0, y0, x1, y1)) { return forgiving; } @@ -128,43 +130,64 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( sa->block_height; block_y_offset = sa->block_height - y_incr; y_incr = opj_uint_min(y_incr, y1 - y); - block_x = x0 / sa->block_width; + block_x = x0 / block_width; for (x = x0; x < x1; block_x ++, x += x_incr) { OPJ_UINT32 j; OPJ_UINT32 block_x_offset; OPJ_INT32* src_block; - x_incr = (x == x0) ? sa->block_width - (x0 % sa->block_width) : sa->block_width; - block_x_offset = sa->block_width - x_incr; + x_incr = (x == x0) ? block_width - (x0 % block_width) : block_width; + block_x_offset = block_width - x_incr; x_incr = opj_uint_min(x_incr, x1 - x); src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x]; if (is_read_op) { if (src_block == NULL) { - for (j = 0; j < y_incr; j++) { - if (buf_col_stride == 1) { - memset(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, - 0, - sizeof(OPJ_INT32) * x_incr); - } else { + if (buf_col_stride == 1) { + OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { + memset(dest_ptr, 0, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + } + } else { + OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { OPJ_UINT32 k; for (k = 0; k < x_incr; k++) { - *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride) - = 0; + dest_ptr[k * buf_col_stride] = 0; } + dest_ptr += buf_line_stride; } } } else { - for (j = 0; j < y_incr; j++) { - if (buf_col_stride == 1) { - memcpy(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, - src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset, - sizeof(OPJ_INT32) * x_incr); + const OPJ_INT32* OPJ_RESTRICT src_ptr = src_block + block_y_offset * + (size_t)block_width + block_x_offset; + if (buf_col_stride == 1) { + OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + src_ptr += block_width; + } + } else { + OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + if (x_incr == 1) { + for (j = 0; j < y_incr; j++) { + *dest_ptr = *src_ptr; + dest_ptr += buf_line_stride; + src_ptr += block_width; + } } else { - OPJ_UINT32 k; - for (k = 0; k < x_incr; k++) { - *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride) - = - *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset + - k); + /* General case */ + for (j = 0; j < y_incr; j++) { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + } + dest_ptr += buf_line_stride; + src_ptr += block_width; } } } @@ -179,18 +202,36 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block; } - for (j = 0; j < y_incr; j++) { - if (buf_col_stride == 1) { - memcpy(src_block + (block_y_offset + j) * (size_t)sa->block_width + - block_x_offset, - buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, - sizeof(OPJ_INT32) * x_incr); + if (buf_col_stride == 1) { + OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * + (size_t)block_width + block_x_offset; + const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * + (size_t)buf_line_stride + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += block_width; + src_ptr += buf_line_stride; + } + } else { + OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * + (size_t)block_width + block_x_offset; + const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * + (size_t)buf_line_stride + (x - x0) * buf_col_stride; + if (x_incr == 1) { + for (j = 0; j < y_incr; j++) { + *dest_ptr = *src_ptr; + src_ptr += buf_line_stride; + dest_ptr += block_width; + } } else { - OPJ_UINT32 k; - for (k = 0; k < x_incr; k++) { - *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset + - k) = - *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride); + /* General case */ + for (j = 0; j < y_incr; j++) { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + dest_ptr[k] = src_ptr[k * buf_col_stride]; + } + src_ptr += buf_line_stride; + dest_ptr += block_width; } } } @@ -201,7 +242,7 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( return OPJ_TRUE; } -OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -211,12 +252,13 @@ OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, OPJ_UINT32 dest_line_stride, OPJ_BOOL forgiving) { - return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1, - dest, - dest_col_stride, - dest_line_stride, - forgiving, - OPJ_TRUE); + return opj_sparse_array_int32_read_or_write( + (opj_sparse_array_int32_t*)sa, x0, y0, x1, y1, + dest, + dest_col_stride, + dest_line_stride, + forgiving, + OPJ_TRUE); } OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa, diff --git a/src/lib/openjp2/sparse_array.h b/src/lib/openjp2/sparse_array.h index 485cafea..130fe13e 100644 --- a/src/lib/openjp2/sparse_array.h +++ b/src/lib/openjp2/sparse_array.h @@ -77,7 +77,7 @@ void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa); * @param y1 bottom y coordinate (not included) of the region. Must be greater than y0. * @return OPJ_TRUE or OPJ_FALSE. */ -OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -99,7 +99,7 @@ OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, * @param forgiving if set to TRUE and the region is invalid, OPJ_TRUE will still be returned. * @return OPJ_TRUE in case of success. */ -OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, diff --git a/src/lib/openjp2/test_sparse_array.c b/src/lib/openjp2/test_sparse_array.c index 0b49110f..8e136451 100644 --- a/src/lib/openjp2/test_sparse_array.c +++ b/src/lib/openjp2/test_sparse_array.c @@ -92,6 +92,7 @@ int main() ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, OPJ_FALSE); assert(ret); + buffer[0] = 2; ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, OPJ_FALSE); @@ -105,6 +106,29 @@ int main() assert(buffer[0] == 2); assert(buffer[1] == 0xFF); + buffer[0] = 0xFF; + buffer[1] = 0xFF; + buffer[2] = 0xFF; + ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 2, buffer, 0, 1, + OPJ_FALSE); + assert(ret); + assert(buffer[0] == 2); + assert(buffer[1] == 0); + assert(buffer[2] == 0xFF); + + buffer[0] = 3; + ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 0, 1, + OPJ_FALSE); + assert(ret); + + buffer[0] = 0; + buffer[1] = 0xFF; + ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, + OPJ_FALSE); + assert(ret); + assert(buffer[0] == 3); + assert(buffer[1] == 0xFF); + w = 15 + 1; h = 17 + 1; memset(buffer, 0xFF, sizeof(buffer)); @@ -114,7 +138,7 @@ int main() for (j = 0; j < h; j++) { for (i = 0; i < w; i++) { if (i == 4 - 2 && j == 5 - 1) { - assert(buffer[ j * w + i ] == 2); + assert(buffer[ j * w + i ] == 3); } else { assert(buffer[ j * w + i ] == 0); } -- cgit v1.2.3 From 873004c615b1bed3ce780e869288602af86fdee5 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:31:00 +0200 Subject: Sub-tile decoding: speed up vertical pass in IDWT5x3 by processing 4 cols at a time --- src/lib/openjp2/dwt.c | 143 ++++++++++++++++++++++++++++++++++++----- src/lib/openjp2/sparse_array.c | 47 +++++++++++--- 2 files changed, 165 insertions(+), 25 deletions(-) (limited to 'src/lib/openjp2/sparse_array.c') diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 153bfa40..ae1cbd50 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -1551,6 +1551,7 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest, OPJ_INT32 cas, opj_sparse_array_int32_t* sa, OPJ_UINT32 sa_col, + OPJ_UINT32 nb_cols, OPJ_UINT32 sn, OPJ_UINT32 win_l_y0, OPJ_UINT32 win_l_y1, @@ -1560,15 +1561,15 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest, OPJ_BOOL ret; ret = opj_sparse_array_int32_read(sa, sa_col, win_l_y0, - sa_col + 1, win_l_y1, - dest + cas + 2 * win_l_y0, - 0, 2, OPJ_TRUE); + sa_col + nb_cols, win_l_y1, + dest + cas * 4 + 2 * 4 * win_l_y0, + 1, 2 * 4, OPJ_TRUE); assert(ret); ret = opj_sparse_array_int32_read(sa, sa_col, sn + win_h_y0, - sa_col + 1, sn + win_h_y1, - dest + 1 - cas + 2 * win_h_y0, - 0, 2, OPJ_TRUE); + sa_col + nb_cols, sn + win_h_y1, + dest + (1 - cas) * 4 + 2 * 4 * win_h_y0, + 1, 2 * 4, OPJ_TRUE); assert(ret); OPJ_UNUSED(ret); } @@ -1648,6 +1649,109 @@ static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, } } +#define OPJ_S_off(i,off) a[(OPJ_UINT32)(i)*2*4+off] +#define OPJ_D_off(i,off) a[(1+(OPJ_UINT32)(i)*2)*4+off] +#define OPJ_S__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=sn?OPJ_S_off(sn-1,off):OPJ_S_off(i,off))) +#define OPJ_D__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=dn?OPJ_D_off(dn-1,off):OPJ_D_off(i,off))) +#define OPJ_SS__off(i,off) ((i)<0?OPJ_S_off(0,off):((i)>=dn?OPJ_S_off(dn-1,off):OPJ_S_off(i,off))) +#define OPJ_DD__off(i,off) ((i)<0?OPJ_D_off(0,off):((i)>=sn?OPJ_D_off(sn-1,off):OPJ_D_off(i,off))) + +static void opj_dwt_decode_partial_1_parallel(OPJ_INT32 *a, + OPJ_UINT32 nb_cols, + OPJ_INT32 dn, OPJ_INT32 sn, + OPJ_INT32 cas, + OPJ_INT32 win_l_x0, + OPJ_INT32 win_l_x1, + OPJ_INT32 win_h_x0, + OPJ_INT32 win_h_x1) +{ + OPJ_INT32 i; + OPJ_UINT32 off; + + (void)nb_cols; + + if (!cas) { + if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ + + /* Naive version is : + for (i = win_l_x0; i < i_max; i++) { + OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; + } + for (i = win_h_x0; i < win_h_x1; i++) { + OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; + } + but the compiler doesn't manage to unroll it to avoid bound + checking in OPJ_S_ and OPJ_D_ macros + */ + + i = win_l_x0; + if (i < win_l_x1) { + OPJ_INT32 i_max; + + /* Left-most case */ + for (off = 0; off < 4; off++) { + OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2; + } + i ++; + + i_max = win_l_x1; + if (i_max > dn) { + i_max = dn; + } + for (; i < i_max; i++) { + /* No bound checking */ + for (off = 0; off < 4; off++) { + OPJ_S_off(i, off) -= (OPJ_D_off(i - 1, off) + OPJ_D_off(i, off) + 2) >> 2; + } + } + for (; i < win_l_x1; i++) { + /* Right-most case */ + for (off = 0; off < 4; off++) { + OPJ_S_off(i, off) -= (OPJ_D__off(i - 1, off) + OPJ_D__off(i, off) + 2) >> 2; + } + } + } + + i = win_h_x0; + if (i < win_h_x1) { + OPJ_INT32 i_max = win_h_x1; + if (i_max >= sn) { + i_max = sn - 1; + } + for (; i < i_max; i++) { + /* No bound checking */ + for (off = 0; off < 4; off++) { + OPJ_D_off(i, off) += (OPJ_S_off(i, off) + OPJ_S_off(i + 1, off)) >> 1; + } + } + for (; i < win_h_x1; i++) { + /* Right-most case */ + for (off = 0; off < 4; off++) { + OPJ_D_off(i, off) += (OPJ_S__off(i, off) + OPJ_S__off(i + 1, off)) >> 1; + } + } + } + } + } else { + if (!sn && dn == 1) { /* NEW : CASE ONE ELEMENT */ + for (off = 0; off < 4; off++) { + OPJ_S_off(0, off) /= 2; + } + } else { + for (i = win_l_x0; i < win_l_x1; i++) { + for (off = 0; off < 4; off++) { + OPJ_D_off(i, off) -= (OPJ_SS__off(i, off) + OPJ_SS__off(i + 1, off) + 2) >> 2; + } + } + for (i = win_h_x0; i < win_h_x1; i++) { + for (off = 0; off < 4; off++) { + OPJ_S_off(i, off) += (OPJ_DD__off(i, off) + OPJ_DD__off(i - 1, off)) >> 1; + } + } + } + } +} + static void opj_dwt_get_band_coordinates(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 resno, OPJ_UINT32 bandno, @@ -1804,13 +1908,14 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( } h_mem_size = opj_dwt_max_resolution(tr, numres); /* overflow check */ - if (h_mem_size > (SIZE_MAX / sizeof(OPJ_INT32))) { + /* in vertical pass, we process 4 columns at a time */ + if (h_mem_size > (SIZE_MAX / (4 * sizeof(OPJ_INT32)))) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); return OPJ_FALSE; } - h_mem_size *= sizeof(OPJ_INT32); + h_mem_size *= 4 * sizeof(OPJ_INT32); h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size); if (! h.mem) { /* FIXME event manager error callback */ @@ -1946,31 +2051,35 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( } } - for (i = win_tr_x0; i < win_tr_x1; ++i) { + for (i = win_tr_x0; i < win_tr_x1;) { + OPJ_UINT32 nb_cols = opj_uint_min(4U, win_tr_x1 - i); opj_dwt_interleave_partial_v(v.mem, v.cas, sa, i, + nb_cols, (OPJ_UINT32)v.sn, win_ll_y0, win_ll_y1, win_lh_y0, win_lh_y1); - opj_dwt_decode_partial_1(v.mem, v.dn, v.sn, v.cas, - (OPJ_INT32)win_ll_y0, - (OPJ_INT32)win_ll_y1, - (OPJ_INT32)win_lh_y0, - (OPJ_INT32)win_lh_y1); + opj_dwt_decode_partial_1_parallel(v.mem, nb_cols, v.dn, v.sn, v.cas, + (OPJ_INT32)win_ll_y0, + (OPJ_INT32)win_ll_y1, + (OPJ_INT32)win_lh_y0, + (OPJ_INT32)win_lh_y1); if (!opj_sparse_array_int32_write(sa, i, win_tr_y0, - i + 1, win_tr_y1, - v.mem + win_tr_y0, - 0, 1, OPJ_TRUE)) { + i + nb_cols, win_tr_y1, + v.mem + 4 * win_tr_y0, + 1, 4, OPJ_TRUE)) { /* FIXME event manager error callback */ opj_sparse_array_int32_free(sa); opj_aligned_free(h.mem); return OPJ_FALSE; } + + i += nb_cols; } } opj_aligned_free(h.mem); diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index b0634f67..48c4b23b 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -165,10 +165,20 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( if (buf_col_stride == 1) { OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + (x - x0) * buf_col_stride; - for (j = 0; j < y_incr; j++) { - memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); - dest_ptr += buf_line_stride; - src_ptr += block_width; + if (x_incr == 4) { + // Same code as general branch, but the compiler + // can have an efficient memcpy() + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + src_ptr += block_width; + } + } else { + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + src_ptr += block_width; + } } } else { OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + @@ -179,6 +189,17 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( dest_ptr += buf_line_stride; src_ptr += block_width; } + } else if (y_incr == 1 && buf_col_stride == 2) { + OPJ_UINT32 k; + for (k = 0; k < (x_incr & ~3U); k += 4) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1]; + dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2]; + dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3]; + } + for (; k < x_incr; k++) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + } } else { /* General case */ for (j = 0; j < y_incr; j++) { @@ -207,10 +228,20 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( (size_t)block_width + block_x_offset; const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * (size_t)buf_line_stride + (x - x0) * buf_col_stride; - for (j = 0; j < y_incr; j++) { - memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); - dest_ptr += block_width; - src_ptr += buf_line_stride; + if (x_incr == 4) { + // Same code as general branch, but the compiler + // can have an efficient memcpy() + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += block_width; + src_ptr += buf_line_stride; + } + } else { + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += block_width; + src_ptr += buf_line_stride; + } } } else { OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * -- cgit v1.2.3 From 7017e67a01c378a7a1ee5e34dd544c793b5c23e4 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:31:10 +0200 Subject: sparse_array: optimizations for lossy case --- src/lib/openjp2/sparse_array.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'src/lib/openjp2/sparse_array.c') diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index 48c4b23b..40459bdb 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -200,6 +200,21 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( for (; k < x_incr; k++) { dest_ptr[k * buf_col_stride] = src_ptr[k]; } + } else if (x_incr >= 8 && buf_col_stride == 8) { + for (j = 0; j < y_incr; j++) { + OPJ_UINT32 k; + for (k = 0; k < (x_incr & ~3U); k += 4) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1]; + dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2]; + dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3]; + } + for (; k < x_incr; k++) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + } + dest_ptr += buf_line_stride; + src_ptr += block_width; + } } else { /* General case */ for (j = 0; j < y_incr; j++) { @@ -254,6 +269,21 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( src_ptr += buf_line_stride; dest_ptr += block_width; } + } else if (x_incr >= 8 && buf_col_stride == 8) { + for (j = 0; j < y_incr; j++) { + OPJ_UINT32 k; + for (k = 0; k < (x_incr & ~3U); k += 4) { + dest_ptr[k] = src_ptr[k * buf_col_stride]; + dest_ptr[k + 1] = src_ptr[(k + 1) * buf_col_stride]; + dest_ptr[k + 2] = src_ptr[(k + 2) * buf_col_stride]; + dest_ptr[k + 3] = src_ptr[(k + 3) * buf_col_stride]; + } + for (; k < x_incr; k++) { + dest_ptr[k] = src_ptr[k * buf_col_stride]; + } + src_ptr += buf_line_stride; + dest_ptr += block_width; + } } else { /* General case */ for (j = 0; j < y_incr; j++) { -- cgit v1.2.3 From 579b8937eae7e6b6868b8b5c6286a742c10a5130 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 4 Sep 2017 17:35:52 +0200 Subject: Replace uses of size_t by OPJ_SIZE_T --- src/lib/openjp2/dwt.c | 113 +++++++++++++++++++++-------------------- src/lib/openjp2/sparse_array.c | 20 ++++---- src/lib/openjp2/t1.c | 24 ++++----- src/lib/openjp2/tcd.c | 47 ++++++++--------- 4 files changed, 104 insertions(+), 100 deletions(-) (limited to 'src/lib/openjp2/sparse_array.c') diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c index 719c7330..18270045 100644 --- a/src/lib/openjp2/dwt.c +++ b/src/lib/openjp2/dwt.c @@ -279,7 +279,7 @@ static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, l_src += 2; } /* b[i*x]=a[2*i+cas]; */ - l_dest = b + (size_t)sn * (size_t)x; + l_dest = b + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)x; l_src = a + 1 - cas; i = dn; @@ -325,7 +325,7 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) bi += 2; ai += x; } - ai = a + (v->sn * (size_t)x); + ai = a + (v->sn * (OPJ_SIZE_T)x); bi = v->mem + 1 - v->cas; i = v->dn ; while (i--) { @@ -616,7 +616,7 @@ static void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col, const OPJ_INT32* tmp, OPJ_INT32 len, - size_t stride) + OPJ_SIZE_T stride) { OPJ_INT32 i; for (i = 0; i < len; ++i) { @@ -625,9 +625,9 @@ void opj_idwt53_v_final_memcpy(OPJ_INT32* tiledp_col, PARALLEL_COLS_53 * sizeof(OPJ_INT32)) would do but would be a tiny bit slower. We can take here advantage of our knowledge of alignment */ - STOREU(&tiledp_col[(size_t)i * stride + 0], + STOREU(&tiledp_col[(OPJ_SIZE_T)i * stride + 0], LOAD(&tmp[PARALLEL_COLS_53 * i + 0])); - STOREU(&tiledp_col[(size_t)i * stride + VREG_INT_COUNT], + STOREU(&tiledp_col[(OPJ_SIZE_T)i * stride + VREG_INT_COUNT], LOAD(&tmp[PARALLEL_COLS_53 * i + VREG_INT_COUNT])); } } @@ -639,13 +639,13 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const size_t stride) + const OPJ_SIZE_T stride) { const OPJ_INT32* in_even = &tiledp_col[0]; - const OPJ_INT32* in_odd = &tiledp_col[(size_t)sn * stride]; + const OPJ_INT32* in_odd = &tiledp_col[(OPJ_SIZE_T)sn * stride]; OPJ_INT32 i; - size_t j; + OPJ_SIZE_T j; VREG d1c_0, d1n_0, s1n_0, s0c_0, s0n_0; VREG d1c_1, d1n_1, s1n_1, s0c_1, s0n_1; const VREG two = LOAD_CST(2); @@ -662,7 +662,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( /* Note: loads of input even/odd values must be done in a unaligned */ /* fashion. But stores in tmp can be done with aligned store, since */ /* the temporary buffer is properly aligned */ - assert((size_t)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0); + assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0); s1n_0 = LOADU(in_even + 0); s1n_1 = LOADU(in_even + VREG_INT_COUNT); @@ -704,7 +704,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( if (len & 1) { VREG tmp_len_minus_1; - s1n_0 = LOADU(in_even + (size_t)((len - 1) / 2) * stride); + s1n_0 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride); /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */ tmp_len_minus_1 = SUB(s1n_0, SAR(ADD3(d1n_0, d1n_0, two), 2)); STORE(tmp + PARALLEL_COLS_53 * (len - 1), tmp_len_minus_1); @@ -712,7 +712,7 @@ static void opj_idwt53_v_cas0_mcols_SSE2_OR_AVX2( STORE(tmp + PARALLEL_COLS_53 * (len - 2), ADD(d1n_0, SAR(ADD(s0n_0, tmp_len_minus_1), 1))); - s1n_1 = LOADU(in_even + (size_t)((len - 1) / 2) * stride + VREG_INT_COUNT); + s1n_1 = LOADU(in_even + (OPJ_SIZE_T)((len - 1) / 2) * stride + VREG_INT_COUNT); /* tmp_len_minus_1 = s1n - ((d1n + 1) >> 1); */ tmp_len_minus_1 = SUB(s1n_1, SAR(ADD3(d1n_1, d1n_1, two), 2)); STORE(tmp + PARALLEL_COLS_53 * (len - 1) + VREG_INT_COUNT, @@ -740,16 +740,16 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const size_t stride) + const OPJ_SIZE_T stride) { OPJ_INT32 i; - size_t j; + OPJ_SIZE_T j; VREG s1_0, s2_0, dc_0, dn_0; VREG s1_1, s2_1, dc_1, dn_1; const VREG two = LOAD_CST(2); - const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; assert(len > 2); @@ -764,7 +764,7 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( /* Note: loads of input even/odd values must be done in a unaligned */ /* fashion. But stores in tmp can be done with aligned store, since */ /* the temporary buffer is properly aligned */ - assert((size_t)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0); + assert((OPJ_SIZE_T)tmp % (sizeof(OPJ_INT32) * VREG_INT_COUNT) == 0); s1_0 = LOADU(in_even + stride); /* in_odd[0] - ((in_even[0] + s1 + 2) >> 2); */ @@ -809,9 +809,9 @@ static void opj_idwt53_v_cas1_mcols_SSE2_OR_AVX2( if (!(len & 1)) { /*dn = in_odd[(len / 2 - 1) * stride] - ((s1 + 1) >> 1); */ - dn_0 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride), + dn_0 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride), SAR(ADD3(s1_0, s1_0, two), 2)); - dn_1 = SUB(LOADU(in_odd + (size_t)(len / 2 - 1) * stride + VREG_INT_COUNT), + dn_1 = SUB(LOADU(in_odd + (OPJ_SIZE_T)(len / 2 - 1) * stride + VREG_INT_COUNT), SAR(ADD3(s1_1, s1_1, two), 2)); /* tmp[len - 2] = s1 + ((dn + dc) >> 1); */ @@ -851,7 +851,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const size_t stride) + const OPJ_SIZE_T stride) { OPJ_INT32 i, j; OPJ_INT32 d1c, d1n, s1n, s0c, s0n; @@ -862,15 +862,15 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, /* accesses and explicit interleaving. */ s1n = tiledp_col[0]; - d1n = tiledp_col[(size_t)sn * stride]; + d1n = tiledp_col[(OPJ_SIZE_T)sn * stride]; s0n = s1n - ((d1n + 1) >> 1); for (i = 0, j = 0; i < (len - 3); i += 2, j++) { d1c = d1n; s0c = s0n; - s1n = tiledp_col[(size_t)(j + 1) * stride]; - d1n = tiledp_col[(size_t)(sn + j + 1) * stride]; + s1n = tiledp_col[(OPJ_SIZE_T)(j + 1) * stride]; + d1n = tiledp_col[(OPJ_SIZE_T)(sn + j + 1) * stride]; s0n = s1n - ((d1c + d1n + 2) >> 2); @@ -882,7 +882,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, if (len & 1) { tmp[len - 1] = - tiledp_col[(size_t)((len - 1) / 2) * stride] - + tiledp_col[(OPJ_SIZE_T)((len - 1) / 2) * stride] - ((d1n + 1) >> 1); tmp[len - 2] = d1n + ((s0n + tmp[len - 1]) >> 1); } else { @@ -890,7 +890,7 @@ static void opj_idwt3_v_cas0(OPJ_INT32* tmp, } for (i = 0; i < len; ++i) { - tiledp_col[(size_t)i * stride] = tmp[i]; + tiledp_col[(OPJ_SIZE_T)i * stride] = tmp[i]; } } @@ -901,11 +901,11 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, const OPJ_INT32 sn, const OPJ_INT32 len, OPJ_INT32* tiledp_col, - const size_t stride) + const OPJ_SIZE_T stride) { OPJ_INT32 i, j; OPJ_INT32 s1, s2, dc, dn; - const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; assert(len > 2); @@ -918,9 +918,9 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, tmp[0] = in_even[0] + dc; for (i = 1, j = 1; i < (len - 2 - !(len & 1)); i += 2, j++) { - s2 = in_even[(size_t)(j + 1) * stride]; + s2 = in_even[(OPJ_SIZE_T)(j + 1) * stride]; - dn = in_odd[(size_t)j * stride] - ((s1 + s2 + 2) >> 2); + dn = in_odd[(OPJ_SIZE_T)j * stride] - ((s1 + s2 + 2) >> 2); tmp[i ] = dc; tmp[i + 1] = s1 + ((dn + dc) >> 1); @@ -929,7 +929,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, } tmp[i] = dc; if (!(len & 1)) { - dn = in_odd[(size_t)(len / 2 - 1) * stride] - ((s1 + 1) >> 1); + dn = in_odd[(OPJ_SIZE_T)(len / 2 - 1) * stride] - ((s1 + 1) >> 1); tmp[len - 2] = s1 + ((dn + dc) >> 1); tmp[len - 1] = dn; } else { @@ -937,7 +937,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, } for (i = 0; i < len; ++i) { - tiledp_col[(size_t)i * stride] = tmp[i]; + tiledp_col[(OPJ_SIZE_T)i * stride] = tmp[i]; } } #endif /* !defined(STANDARD_SLOW_VERSION) */ @@ -948,7 +948,7 @@ static void opj_idwt3_v_cas1(OPJ_INT32* tmp, /* Performs interleave, inverse wavelet transform and copy back to buffer */ static void opj_idwt53_v(const opj_dwt_t *dwt, OPJ_INT32* tiledp_col, - size_t stride, + OPJ_SIZE_T stride, OPJ_INT32 nb_cols) { #ifdef STANDARD_SLOW_VERSION @@ -996,14 +996,14 @@ static void opj_idwt53_v(const opj_dwt_t *dwt, OPJ_INT32* out = dwt->mem; for (c = 0; c < nb_cols; c++, tiledp_col++) { OPJ_INT32 i; - const OPJ_INT32* in_even = &tiledp_col[(size_t)sn * stride]; + const OPJ_INT32* in_even = &tiledp_col[(OPJ_SIZE_T)sn * stride]; const OPJ_INT32* in_odd = &tiledp_col[0]; out[1] = in_odd[0] - ((in_even[0] + 1) >> 1); out[0] = in_even[0] + out[1]; for (i = 0; i < len; ++i) { - tiledp_col[(size_t)i * stride] = out[i]; + tiledp_col[(OPJ_SIZE_T)i * stride] = out[i]; } } @@ -1113,7 +1113,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, OPJ_INT32 rw; /* width of the resolution level computed */ OPJ_INT32 rh; /* height of the resolution level computed */ - size_t l_data_size; + OPJ_SIZE_T l_data_size; opj_tcd_resolution_t * l_cur_res = 0; opj_tcd_resolution_t * l_last_res = 0; @@ -1343,11 +1343,11 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) job = (opj_dwd_decode_v_job_t*)user_data; for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j; j += PARALLEL_COLS_53) { - opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w, + opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w, PARALLEL_COLS_53); } if (j < job->max_j) - opj_idwt53_v(&job->v, &job->tiledp[j], (size_t)job->w, + opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w, (OPJ_INT32)(job->max_j - j)); opj_aligned_free(job->v.mem); @@ -1374,7 +1374,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, OPJ_UINT32 w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1 - tilec->resolutions[tilec->minimum_num_resolutions - 1].x0); - size_t h_mem_size; + OPJ_SIZE_T h_mem_size; int num_threads; if (numres == 1U) { @@ -1415,7 +1415,7 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, if (num_threads <= 1 || rh <= 1) { for (j = 0; j < rh; ++j) { - opj_idwt53_h(&h, &tiledp[(size_t)j * w]); + opj_idwt53_h(&h, &tiledp[(OPJ_SIZE_T)j * w]); } } else { OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; @@ -1467,10 +1467,10 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, if (num_threads <= 1 || rw <= 1) { for (j = 0; j + PARALLEL_COLS_53 <= rw; j += PARALLEL_COLS_53) { - opj_idwt53_v(&v, &tiledp[j], (size_t)w, PARALLEL_COLS_53); + opj_idwt53_v(&v, &tiledp[j], (OPJ_SIZE_T)w, PARALLEL_COLS_53); } if (j < rw) { - opj_idwt53_v(&v, &tiledp[j], (size_t)w, (OPJ_INT32)(rw - j)); + opj_idwt53_v(&v, &tiledp[j], (OPJ_SIZE_T)w, (OPJ_INT32)(rw - j)); } } else { OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; @@ -1922,7 +1922,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ - size_t h_mem_size; + OPJ_SIZE_T h_mem_size; /* Compute the intersection of the area of interest, expressed in tile coordinates */ /* with the tile coordinates */ @@ -2152,8 +2152,8 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, OPJ_UINT32 x1 = dwt->win_l_x1; for (k = 0; k < 2; ++k) { - if (remaining_height >= 4 && ((size_t) a & 0x0f) == 0 && - ((size_t) bi & 0x0f) == 0 && (width & 0x0f) == 0) { + if (remaining_height >= 4 && ((OPJ_SIZE_T) a & 0x0f) == 0 && + ((OPJ_SIZE_T) bi & 0x0f) == 0 && (width & 0x0f) == 0) { /* Fast code path */ for (i = x0; i < x1; ++i) { OPJ_UINT32 j = i; @@ -2230,16 +2230,16 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, OPJ_UINT32 i; for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) { - memcpy(&bi[i * 2], &a[i * (size_t)width], - (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); + memcpy(&bi[i * 2], &a[i * (OPJ_SIZE_T)width], + (OPJ_SIZE_T)nb_elts_read * sizeof(OPJ_FLOAT32)); } - a += (OPJ_UINT32)dwt->sn * (size_t)width; + a += (OPJ_UINT32)dwt->sn * (OPJ_SIZE_T)width; bi = dwt->wavelet + 1 - dwt->cas; for (i = dwt->win_h_x0; i < dwt->win_h_x1; ++i) { - memcpy(&bi[i * 2], &a[i * (size_t)width], - (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); + memcpy(&bi[i * 2], &a[i * (OPJ_SIZE_T)width], + (OPJ_SIZE_T)nb_elts_read * sizeof(OPJ_FLOAT32)); } } @@ -2497,7 +2497,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, 1].x1 - tilec->resolutions[tilec->minimum_num_resolutions - 1].x0); - size_t l_data_size; + OPJ_SIZE_T l_data_size; l_data_size = opj_dwt_max_resolution(res, numres); /* overflow check */ @@ -2546,9 +2546,9 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, for (k = 0; k < rw; k++) { aj[k ] = h.wavelet[k].f[0]; - aj[k + (size_t)w ] = h.wavelet[k].f[1]; - aj[k + (size_t)w * 2] = h.wavelet[k].f[2]; - aj[k + (size_t)w * 3] = h.wavelet[k].f[3]; + aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; + aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; + aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3]; } aj += w * 4; @@ -2561,10 +2561,10 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, for (k = 0; k < rw; k++) { switch (rh - j) { case 3: - aj[k + (size_t)w * 2] = h.wavelet[k].f[2]; + aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; /* FALLTHRU */ case 2: - aj[k + (size_t)w ] = h.wavelet[k].f[1]; + aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; /* FALLTHRU */ case 1: aj[k] = h.wavelet[k].f[0]; @@ -2587,7 +2587,7 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, opj_v4dwt_decode(&v); for (k = 0; k < rh; ++k) { - memcpy(&aj[k * (size_t)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); + memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); } aj += 4; } @@ -2601,7 +2601,8 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, opj_v4dwt_decode(&v); for (k = 0; k < rh; ++k) { - memcpy(&aj[k * (size_t)w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32)); + memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], + (OPJ_SIZE_T)j * sizeof(OPJ_FLOAT32)); } } } @@ -2631,7 +2632,7 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ - size_t l_data_size; + OPJ_SIZE_T l_data_size; /* Compute the intersection of the area of interest, expressed in tile coordinates */ /* with the tile coordinates */ diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index 40459bdb..6a2d8d43 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -142,14 +142,14 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( if (is_read_op) { if (src_block == NULL) { if (buf_col_stride == 1) { - OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride; for (j = 0; j < y_incr; j++) { memset(dest_ptr, 0, sizeof(OPJ_INT32) * x_incr); dest_ptr += buf_line_stride; } } else { - OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride; for (j = 0; j < y_incr; j++) { OPJ_UINT32 k; @@ -161,9 +161,10 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( } } else { const OPJ_INT32* OPJ_RESTRICT src_ptr = src_block + block_y_offset * - (size_t)block_width + block_x_offset; + (OPJ_SIZE_T)block_width + block_x_offset; if (buf_col_stride == 1) { - OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride + + (x - x0) * buf_col_stride; if (x_incr == 4) { // Same code as general branch, but the compiler @@ -181,7 +182,8 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( } } } else { - OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride + + (x - x0) * buf_col_stride; if (x_incr == 1) { for (j = 0; j < y_incr; j++) { @@ -240,9 +242,9 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( if (buf_col_stride == 1) { OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * - (size_t)block_width + block_x_offset; + (OPJ_SIZE_T)block_width + block_x_offset; const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * - (size_t)buf_line_stride + (x - x0) * buf_col_stride; + (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride; if (x_incr == 4) { // Same code as general branch, but the compiler // can have an efficient memcpy() @@ -260,9 +262,9 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( } } else { OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * - (size_t)block_width + block_x_offset; + (OPJ_SIZE_T)block_width + block_x_offset; const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * - (size_t)buf_line_stride + (x - x0) * buf_col_stride; + (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride; if (x_incr == 1) { for (j = 0; j < y_incr; j++) { *dest_ptr = *src_ptr; diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 0cc6f250..a583e692 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -1452,7 +1452,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( if (!t1->encoder) { OPJ_UINT32 datasize = w * h; - if (datasize > (size_t)t1->datasize) { + if (datasize > t1->datasize) { opj_aligned_free(t1->data); t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32)); if (!t1->data) { @@ -1751,8 +1751,8 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) } } } else if (tccp->qmfbid == 1) { - OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w + - (size_t)x]; + OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + + (OPJ_SIZE_T)x]; for (j = 0; j < cblk_h; ++j) { i = 0; for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) { @@ -1760,19 +1760,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U]; OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U]; OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U]; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 0U] = tmp0 / 2; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 1U] = tmp1 / 2; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 2U] = tmp2 / 2; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 3U] = tmp3 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2; } for (; i < cblk_w; ++i) { OPJ_INT32 tmp = datap[(j * cblk_w) + i]; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i] = tmp / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2; } } } else { /* if (tccp->qmfbid == 0) */ - OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(size_t)y * - tile_w + (size_t)x]; + OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y * + tile_w + (OPJ_SIZE_T)x]; for (j = 0; j < cblk_h; ++j) { OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp; for (i = 0; i < cblk_w; ++i) { @@ -2134,7 +2134,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, OPJ_UINT32 cblk_w; OPJ_UINT32 cblk_h; OPJ_UINT32 i, j, tileLineAdvance; - size_t tileIndex = 0; + OPJ_SIZE_T tileIndex = 0; OPJ_INT32 x = cblk->x0 - band->x0; OPJ_INT32 y = cblk->y0 - band->y0; @@ -2158,7 +2158,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, cblk_h = t1->h; tileLineAdvance = tile_w - cblk_w; - tiledp = &tilec->data[(size_t)y * tile_w + (size_t)x]; + tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; t1->data = tiledp; t1->data_stride = tile_w; if (tccp->qmfbid == 1) { diff --git a/src/lib/openjp2/tcd.c b/src/lib/openjp2/tcd.c index bd5d4960..c652f8ba 100644 --- a/src/lib/openjp2/tcd.c +++ b/src/lib/openjp2/tcd.c @@ -807,11 +807,11 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, } if (isEncoder) { - size_t l_tile_data_size; + OPJ_SIZE_T l_tile_data_size; /* compute l_data_size with overflow check */ - size_t w = (size_t)(l_tilec->x1 - l_tilec->x0); - size_t h = (size_t)(l_tilec->y1 - l_tilec->y0); + OPJ_SIZE_T w = (OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0); + OPJ_SIZE_T h = (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0); /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ if (h > 0 && w > SIZE_MAX / h) { @@ -1366,7 +1366,8 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, p_cstr_info->tile[p_tile_no].pdy[i] = (int)l_tccp->prch[i]; } p_cstr_info->tile[p_tile_no].packet = (opj_packet_info_t*) opj_calloc(( - size_t)p_cstr_info->numcomps * (size_t)p_cstr_info->numlayers * l_num_packs, + OPJ_SIZE_T)p_cstr_info->numcomps * (OPJ_SIZE_T)p_cstr_info->numlayers * + l_num_packs, sizeof(opj_packet_info_t)); if (!p_cstr_info->tile[p_tile_no].packet) { /* FIXME event manager error callback */ @@ -1462,11 +1463,11 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]); opj_tcd_resolution_t *l_res = & (tilec->resolutions[tilec->minimum_num_resolutions - 1]); - size_t l_data_size; + OPJ_SIZE_T l_data_size; /* compute l_data_size with overflow check */ - size_t res_w = (size_t)(l_res->x1 - l_res->x0); - size_t res_h = (size_t)(l_res->y1 - l_res->y0); + OPJ_SIZE_T res_w = (OPJ_SIZE_T)(l_res->x1 - l_res->x0); + OPJ_SIZE_T res_h = (OPJ_SIZE_T)(l_res->y1 - l_res->y0); /* issue 733, l_data_size == 0U, probably something wrong should be checked before getting here */ if (res_h > 0 && res_w > SIZE_MAX / res_h) { @@ -1577,9 +1578,9 @@ OPJ_BOOL opj_tcd_decode_tile(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec = &(p_tcd->tcd_image->tiles->comps[compno]); opj_image_comp_t* image_comp = &(p_tcd->image->comps[compno]); opj_tcd_resolution_t *res = tilec->resolutions + image_comp->resno_decoded; - size_t w = res->win_x1 - res->win_x0; - size_t h = res->win_y1 - res->win_y0; - size_t l_data_size; + OPJ_SIZE_T w = res->win_x1 - res->win_x0; + OPJ_SIZE_T h = res->win_y1 - res->win_y0; + OPJ_SIZE_T l_data_size; opj_image_data_free(tilec->data_win); tilec->data_win = NULL; @@ -1980,17 +1981,17 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) p_tcd->image->comps[1].resno_decoded; opj_tcd_resolution_t* res_comp2 = l_tile->comps[2].resolutions + p_tcd->image->comps[2].resno_decoded; - size_t l_res_samples = (size_t)(res_comp0->x1 - res_comp0->x0) * - (size_t)(res_comp0->y1 - res_comp0->y0); + OPJ_SIZE_T l_res_samples = (OPJ_SIZE_T)(res_comp0->x1 - res_comp0->x0) * + (OPJ_SIZE_T)(res_comp0->y1 - res_comp0->y0); /* testcase 1336.pdf.asan.47.376 */ if (p_tcd->image->comps[0].resno_decoded != p_tcd->image->comps[1].resno_decoded || p_tcd->image->comps[0].resno_decoded != p_tcd->image->comps[2].resno_decoded || - (size_t)(res_comp1->x1 - res_comp1->x0) * - (size_t)(res_comp1->y1 - res_comp1->y0) != l_res_samples || - (size_t)(res_comp2->x1 - res_comp2->x0) * - (size_t)(res_comp2->y1 - res_comp2->y0) != l_res_samples) { + (OPJ_SIZE_T)(res_comp1->x1 - res_comp1->x0) * + (OPJ_SIZE_T)(res_comp1->y1 - res_comp1->y0) != l_res_samples || + (OPJ_SIZE_T)(res_comp2->x1 - res_comp2->x0) * + (OPJ_SIZE_T)(res_comp2->y1 - res_comp2->y0) != l_res_samples) { opj_event_msg(p_manager, EVT_ERROR, "Tiles don't all have the same dimension. Skip the MCT step.\n"); return OPJ_FALSE; @@ -2271,7 +2272,7 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd) opj_tccp_t * l_tccp = 00; opj_image_comp_t * l_img_comp = 00; opj_tcd_tile_t * l_tile; - size_t l_nb_elem, i; + OPJ_SIZE_T l_nb_elem, i; OPJ_INT32 * l_current_ptr; l_tile = p_tcd->tcd_image->tiles; @@ -2281,8 +2282,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd) for (compno = 0; compno < l_tile->numcomps; compno++) { l_current_ptr = l_tile_comp->data; - l_nb_elem = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) * - (size_t)(l_tile_comp->y1 - l_tile_comp->y0); + l_nb_elem = (OPJ_SIZE_T)(l_tile_comp->x1 - l_tile_comp->x0) * + (OPJ_SIZE_T)(l_tile_comp->y1 - l_tile_comp->y0); if (l_tccp->qmfbid == 1) { for (i = 0; i < l_nb_elem; ++i) { @@ -2308,8 +2309,8 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd) { opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles; opj_tcd_tilecomp_t * l_tile_comp = p_tcd->tcd_image->tiles->comps; - size_t samples = (size_t)(l_tile_comp->x1 - l_tile_comp->x0) * - (size_t)(l_tile_comp->y1 - l_tile_comp->y0); + OPJ_SIZE_T samples = (OPJ_SIZE_T)(l_tile_comp->x1 - l_tile_comp->x0) * + (OPJ_SIZE_T)(l_tile_comp->y1 - l_tile_comp->y0); OPJ_UINT32 i; OPJ_BYTE ** l_data = 00; opj_tcp_t * l_tcp = p_tcd->tcp; @@ -2511,8 +2512,8 @@ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, for (i = 0; i < p_tcd->image->numcomps; ++i) { l_size_comp = l_img_comp->prec >> 3; /*(/ 8)*/ l_remaining = l_img_comp->prec & 7; /* (%8) */ - l_nb_elem = (size_t)(l_tilec->x1 - l_tilec->x0) * - (size_t)(l_tilec->y1 - l_tilec->y0); + l_nb_elem = (OPJ_SIZE_T)(l_tilec->x1 - l_tilec->x0) * + (OPJ_SIZE_T)(l_tilec->y1 - l_tilec->y0); if (l_remaining) { ++l_size_comp; -- cgit v1.2.3