/*@}*/
-/**
-Virtual function type for wavelet transform in 1-D
-*/
-typedef void (*DWT1DFN)(const opj_dwt_t* v);
-
/** @name Local static functions */
/*@{*/
/**
Forward lazy transform (horizontal)
*/
-static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
+static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a,
+ OPJ_INT32 * OPJ_RESTRICT b,
+ OPJ_INT32 dn,
OPJ_INT32 sn, OPJ_INT32 cas);
/**
Forward lazy transform (vertical)
*/
-static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
+static void opj_dwt_deinterleave_v(const OPJ_INT32 * OPJ_RESTRICT a,
+ OPJ_INT32 * OPJ_RESTRICT b,
+ OPJ_INT32 dn,
OPJ_INT32 sn, OPJ_UINT32 x, OPJ_INT32 cas);
-/**
-Forward 5-3 wavelet transform in 1-D
-*/
-static void opj_dwt_encode_1(void *a, OPJ_INT32 dn, OPJ_INT32 sn,
- OPJ_INT32 cas);
+
/**
Forward 9-7 wavelet transform in 1-D
*/
opj_tcd_tilecomp_t* tilec,
OPJ_UINT32 numres);
+/* Forward transform, for the vertical pass, processing cols columns */
+/* where cols <= NB_ELTS_V8 */
+/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */
+typedef void (*opj_encode_and_deinterleave_v_fnptr_type)(
+ void *array,
+ void *tmp,
+ OPJ_UINT32 height,
+ OPJ_BOOL even,
+ OPJ_UINT32 stride_width,
+ OPJ_UINT32 cols);
+
/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */
-typedef void (*opj_encode_one_row_fnptr_type)(void *, OPJ_INT32, OPJ_INT32,
- OPJ_INT32);
+typedef void (*opj_encode_and_deinterleave_h_one_row_fnptr_type)(
+ void *row,
+ void *tmp,
+ OPJ_UINT32 width,
+ OPJ_BOOL even);
static OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
opj_tcd_tilecomp_t * tilec,
- opj_encode_one_row_fnptr_type p_function);
+ opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v,
+ opj_encode_and_deinterleave_h_one_row_fnptr_type
+ p_encode_and_deinterleave_h_one_row);
static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r,
OPJ_UINT32 i);
/* <summary> */
/* Forward lazy transform (horizontal). */
/* </summary> */
-static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
+static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a,
+ OPJ_INT32 * OPJ_RESTRICT b,
+ OPJ_INT32 dn,
OPJ_INT32 sn, OPJ_INT32 cas)
{
OPJ_INT32 i;
- OPJ_INT32 * l_dest = b;
- OPJ_INT32 * l_src = a + cas;
+ OPJ_INT32 * OPJ_RESTRICT l_dest = b;
+ const OPJ_INT32 * OPJ_RESTRICT l_src = a + cas;
for (i = 0; i < sn; ++i) {
*l_dest++ = *l_src;
/* <summary> */
/* Forward lazy transform (vertical). */
/* </summary> */
-static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
+static void opj_dwt_deinterleave_v(const OPJ_INT32 * OPJ_RESTRICT a,
+ OPJ_INT32 * OPJ_RESTRICT b,
+ OPJ_INT32 dn,
OPJ_INT32 sn, OPJ_UINT32 x, OPJ_INT32 cas)
{
OPJ_INT32 i = sn;
- OPJ_INT32 * l_dest = b;
- OPJ_INT32 * l_src = a + cas;
+ OPJ_INT32 * OPJ_RESTRICT l_dest = b;
+ const OPJ_INT32 * OPJ_RESTRICT l_src = a + cas;
while (i--) {
*l_dest = *l_src;
/* </summary> */
static void opj_dwt_interleave_h(const opj_dwt_t* h, OPJ_INT32 *a)
{
- OPJ_INT32 *ai = a;
+ const OPJ_INT32 *ai = a;
OPJ_INT32 *bi = h->mem + h->cas;
OPJ_INT32 i = h->sn;
while (i--) {
/* </summary> */
static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
{
- OPJ_INT32 *ai = a;
+ const OPJ_INT32 *ai = a;
OPJ_INT32 *bi = v->mem + v->cas;
OPJ_INT32 i = v->sn;
while (i--) {
#endif /* STANDARD_SLOW_VERSION */
-/* <summary> */
-/* Forward 5-3 wavelet transform in 1-D. */
-/* </summary> */
-static void opj_dwt_encode_1(void *aIn, OPJ_INT32 dn, OPJ_INT32 sn,
- OPJ_INT32 cas)
-{
- OPJ_INT32 i;
- OPJ_INT32* a = (OPJ_INT32*)aIn;
-
- if (!cas) {
- if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */
- for (i = 0; i < dn; i++) {
- OPJ_D(i) -= (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
- }
- for (i = 0; i < sn; i++) {
- OPJ_S(i) += (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
- }
- }
- } else {
- if (!sn && dn == 1) { /* NEW : CASE ONE ELEMENT */
- OPJ_S(0) *= 2;
- } else {
- for (i = 0; i < dn; i++) {
- OPJ_S(i) -= (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1;
- }
- for (i = 0; i < sn; i++) {
- OPJ_D(i) += (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2;
- }
- }
- }
-}
-
#ifdef STANDARD_SLOW_VERSION
/* <summary> */
/* Inverse 5-3 wavelet transform in 1-D. */
==========================================================
*/
+/** Process one line for the horizontal pass of the 5x3 forward transform */
+static
+void opj_dwt_encode_and_deinterleave_h_one_row(void* rowIn,
+ void* tmpIn,
+ OPJ_UINT32 width,
+ OPJ_BOOL even)
+{
+ OPJ_INT32* OPJ_RESTRICT row = (OPJ_INT32*)rowIn;
+ OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32*)tmpIn;
+ const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1);
+ const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn);
+
+ if (even) {
+ if (width > 1) {
+ OPJ_INT32 i;
+ for (i = 0; i < sn - 1; i++) {
+ tmp[sn + i] = row[2 * i + 1] - ((row[(i) * 2] + row[(i + 1) * 2]) >> 1);
+ }
+ if ((width % 2) == 0) {
+ tmp[sn + i] = row[2 * i + 1] - row[(i) * 2];
+ }
+ row[0] += (tmp[sn] + tmp[sn] + 2) >> 2;
+ for (i = 1; i < dn; i++) {
+ row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + i] + 2) >> 2);
+ }
+ if ((width % 2) == 1) {
+ row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + (i - 1)] + 2) >> 2);
+ }
+ memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32));
+ }
+ } else {
+ if (width == 1) {
+ row[0] *= 2;
+ } else {
+ OPJ_INT32 i;
+ tmp[sn + 0] = row[0] - row[1];
+ for (i = 1; i < sn; i++) {
+ tmp[sn + i] = row[2 * i] - ((row[2 * i + 1] + row[2 * (i - 1) + 1]) >> 1);
+ }
+ if ((width % 2) == 1) {
+ tmp[sn + i] = row[2 * i] - row[2 * (i - 1) + 1];
+ }
+
+ for (i = 0; i < dn - 1; i++) {
+ row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i + 1] + 2) >> 2);
+ }
+ if ((width % 2) == 0) {
+ row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i] + 2) >> 2);
+ }
+ memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32));
+ }
+ }
+}
+
+/** Process one line for the horizontal pass of the 9x7 forward transform */
+static
+void opj_dwt_encode_and_deinterleave_h_one_row_real(void* rowIn,
+ void* tmpIn,
+ OPJ_UINT32 width,
+ OPJ_BOOL even)
+{
+ OPJ_FLOAT32* OPJ_RESTRICT row = (OPJ_FLOAT32*)rowIn;
+ OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32*)tmpIn;
+ const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1);
+ const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn);
+ memcpy(tmp, row, width * sizeof(OPJ_FLOAT32));
+ opj_dwt_encode_1_real(tmp, dn, sn, even ? 0 : 1);
+ opj_dwt_deinterleave_h((OPJ_INT32 * OPJ_RESTRICT)tmp,
+ (OPJ_INT32 * OPJ_RESTRICT)row,
+ dn, sn, even ? 0 : 1);
+}
typedef struct {
opj_dwt_t h;
- OPJ_UINT32 rw;
- OPJ_UINT32 w;
+ OPJ_UINT32 rw; /* Width of the resolution to process */
+ OPJ_UINT32 w; /* Width of tiledp */
OPJ_INT32 * OPJ_RESTRICT tiledp;
OPJ_UINT32 min_j;
OPJ_UINT32 max_j;
- opj_encode_one_row_fnptr_type p_function;
+ opj_encode_and_deinterleave_h_one_row_fnptr_type p_function;
} opj_dwt_encode_h_job_t;
static void opj_dwt_encode_h_func(void* user_data, opj_tls_t* tls)
job = (opj_dwt_encode_h_job_t*)user_data;
for (j = job->min_j; j < job->max_j; j++) {
OPJ_INT32* OPJ_RESTRICT aj = job->tiledp + j * job->w;
- OPJ_UINT32 k;
- for (k = 0; k < job->rw; k++) {
- job->h.mem[k] = aj[k];
- }
- (*job->p_function)(job->h.mem, job->h.dn, job->h.sn, job->h.cas);
- opj_dwt_deinterleave_h(job->h.mem, aj, job->h.dn, job->h.sn, job->h.cas);
+ (*job->p_function)(aj, job->h.mem, job->rw,
+ job->h.cas == 0 ? OPJ_TRUE : OPJ_FALSE);
}
opj_aligned_free(job->h.mem);
OPJ_INT32 * OPJ_RESTRICT tiledp;
OPJ_UINT32 min_j;
OPJ_UINT32 max_j;
- opj_encode_one_row_fnptr_type p_function;
+ opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v;
} opj_dwt_encode_v_job_t;
static void opj_dwt_encode_v_func(void* user_data, opj_tls_t* tls)
(void)tls;
job = (opj_dwt_encode_v_job_t*)user_data;
- for (j = job->min_j; j < job->max_j; j++) {
- OPJ_INT32* OPJ_RESTRICT aj = job->tiledp + j;
+ for (j = job->min_j; j + NB_ELTS_V8 - 1 < job->max_j; j += NB_ELTS_V8) {
+ (*job->p_encode_and_deinterleave_v)(job->tiledp + j,
+ job->v.mem,
+ job->rh,
+ job->v.cas == 0,
+ job->w,
+ NB_ELTS_V8);
+ }
+ if (j < job->max_j) {
+ (*job->p_encode_and_deinterleave_v)(job->tiledp + j,
+ job->v.mem,
+ job->rh,
+ job->v.cas == 0,
+ job->w,
+ job->max_j - j);
+ }
+
+ opj_aligned_free(job->v.mem);
+ opj_free(job);
+}
+
+/** Fetch up to cols <= NB_ELTS_V8 for each line, and put them in tmpOut */
+/* that has a NB_ELTS_V8 interleave factor. */
+static void opj_dwt_fetch_cols_vertical_pass(const void *arrayIn,
+ void *tmpOut,
+ OPJ_UINT32 height,
+ OPJ_UINT32 stride_width,
+ OPJ_UINT32 cols)
+{
+ const OPJ_INT32* OPJ_RESTRICT array = (const OPJ_INT32 * OPJ_RESTRICT)arrayIn;
+ OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpOut;
+ if (cols == NB_ELTS_V8) {
OPJ_UINT32 k;
- for (k = 0; k < job->rh; ++k) {
- job->v.mem[k] = aj[k * job->w];
+ for (k = 0; k < height; ++k) {
+ memcpy(tmp + NB_ELTS_V8 * k,
+ array + k * stride_width,
+ NB_ELTS_V8 * sizeof(OPJ_INT32));
+ }
+ } else {
+ OPJ_UINT32 k;
+ for (k = 0; k < height; ++k) {
+ OPJ_UINT32 c;
+ for (c = 0; c < cols; c++) {
+ tmp[NB_ELTS_V8 * k + c] = array[c + k * stride_width];
+ }
+ for (; c < NB_ELTS_V8; c++) {
+ tmp[NB_ELTS_V8 * k + c] = 0;
+ }
}
+ }
+}
- (*job->p_function)(job->v.mem, job->v.dn, job->v.sn, job->v.cas);
+/* Deinterleave result of forward transform, where cols <= NB_ELTS_V8 */
+/* and src contains NB_ELTS_V8 consecutive values for up to NB_ELTS_V8 */
+/* columns. */
+static INLINE void opj_dwt_deinterleave_v_cols(
+ const OPJ_INT32 * OPJ_RESTRICT src,
+ OPJ_INT32 * OPJ_RESTRICT dst,
+ OPJ_INT32 dn,
+ OPJ_INT32 sn,
+ OPJ_UINT32 stride_width,
+ OPJ_INT32 cas,
+ OPJ_UINT32 cols)
+{
+ OPJ_INT32 i = sn;
+ OPJ_INT32 * OPJ_RESTRICT l_dest = dst;
+ const OPJ_INT32 * OPJ_RESTRICT l_src = src + cas * NB_ELTS_V8;
+ OPJ_UINT32 c;
- opj_dwt_deinterleave_v(job->v.mem, aj, job->v.dn, job->v.sn, job->w,
- job->v.cas);
+ while (i--) {
+ for (c = 0; c < cols; c++) {
+ l_dest[c] = l_src[c];
+ }
+ l_dest += stride_width;
+ l_src += 2 * NB_ELTS_V8;
}
- opj_aligned_free(job->v.mem);
- opj_free(job);
+ l_dest = dst + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)stride_width;
+ l_src = src + (1 - cas) * NB_ELTS_V8;
+
+ i = dn;
+ while (i--) {
+ for (c = 0; c < cols; c++) {
+ l_dest[c] = l_src[c];
+ }
+ l_dest += stride_width;
+ l_src += 2 * NB_ELTS_V8;
+ }
+}
+
+
+/* Forward 5-3 transform, for the vertical pass, processing cols columns */
+/* where cols <= NB_ELTS_V8 */
+static void opj_dwt_encode_and_deinterleave_v(
+ void *arrayIn,
+ void *tmpIn,
+ OPJ_UINT32 height,
+ OPJ_BOOL even,
+ OPJ_UINT32 stride_width,
+ OPJ_UINT32 cols)
+{
+ OPJ_INT32* OPJ_RESTRICT array = (OPJ_INT32 * OPJ_RESTRICT)arrayIn;
+ OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpIn;
+ const OPJ_UINT32 sn = (height + (even ? 1 : 0)) >> 1;
+ const OPJ_UINT32 dn = height - sn;
+
+ opj_dwt_fetch_cols_vertical_pass(arrayIn, tmpIn, height, stride_width, cols);
+
+#define OPJ_Sc(i) tmp[(i)*2* NB_ELTS_V8 + c]
+#define OPJ_Dc(i) tmp[((1+(i)*2))* NB_ELTS_V8 + c]
+
+#ifdef __SSE2__
+ if (height == 1) {
+ if (!even) {
+ OPJ_UINT32 c;
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ tmp[c] *= 2;
+ }
+ }
+ } else if (even) {
+ OPJ_UINT32 c;
+ OPJ_UINT32 i;
+ i = 0;
+ if (i + 1 < sn) {
+ __m128i xmm_Si_0 = *(const __m128i*)(tmp + 4 * 0);
+ __m128i xmm_Si_1 = *(const __m128i*)(tmp + 4 * 1);
+ for (; i + 1 < sn; i++) {
+ __m128i xmm_Sip1_0 = *(const __m128i*)(tmp +
+ (i + 1) * 2 * NB_ELTS_V8 + 4 * 0);
+ __m128i xmm_Sip1_1 = *(const __m128i*)(tmp +
+ (i + 1) * 2 * NB_ELTS_V8 + 4 * 1);
+ __m128i xmm_Di_0 = *(const __m128i*)(tmp +
+ (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
+ __m128i xmm_Di_1 = *(const __m128i*)(tmp +
+ (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
+ xmm_Di_0 = _mm_sub_epi32(xmm_Di_0,
+ _mm_srai_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), 1));
+ xmm_Di_1 = _mm_sub_epi32(xmm_Di_1,
+ _mm_srai_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), 1));
+ *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Di_0;
+ *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Di_1;
+ xmm_Si_0 = xmm_Sip1_0;
+ xmm_Si_1 = xmm_Sip1_1;
+ }
+ }
+ if (((height) % 2) == 0) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Dc(i) -= OPJ_Sc(i);
+ }
+ }
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2;
+ }
+ i = 1;
+ if (i < dn) {
+ __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 +
+ (i - 1) * 2) * NB_ELTS_V8 + 4 * 0);
+ __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 +
+ (i - 1) * 2) * NB_ELTS_V8 + 4 * 1);
+ const __m128i xmm_two = _mm_set1_epi32(2);
+ for (; i < dn; i++) {
+ __m128i xmm_Di_0 = *(const __m128i*)(tmp +
+ (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
+ __m128i xmm_Di_1 = *(const __m128i*)(tmp +
+ (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
+ __m128i xmm_Si_0 = *(const __m128i*)(tmp +
+ (i * 2) * NB_ELTS_V8 + 4 * 0);
+ __m128i xmm_Si_1 = *(const __m128i*)(tmp +
+ (i * 2) * NB_ELTS_V8 + 4 * 1);
+ xmm_Si_0 = _mm_add_epi32(xmm_Si_0,
+ _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_0, xmm_Di_0), xmm_two), 2));
+ xmm_Si_1 = _mm_add_epi32(xmm_Si_1,
+ _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_1, xmm_Di_1), xmm_two), 2));
+ *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0;
+ *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1;
+ xmm_Dim1_0 = xmm_Di_0;
+ xmm_Dim1_1 = xmm_Di_1;
+ }
+ }
+ if (((height) % 2) == 1) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2;
+ }
+ }
+ } else {
+ OPJ_UINT32 c;
+ OPJ_UINT32 i;
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Sc(0) -= OPJ_Dc(0);
+ }
+ i = 1;
+ if (i < sn) {
+ __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 +
+ (i - 1) * 2) * NB_ELTS_V8 + 4 * 0);
+ __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 +
+ (i - 1) * 2) * NB_ELTS_V8 + 4 * 1);
+ for (; i < sn; i++) {
+ __m128i xmm_Di_0 = *(const __m128i*)(tmp +
+ (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
+ __m128i xmm_Di_1 = *(const __m128i*)(tmp +
+ (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
+ __m128i xmm_Si_0 = *(const __m128i*)(tmp +
+ (i * 2) * NB_ELTS_V8 + 4 * 0);
+ __m128i xmm_Si_1 = *(const __m128i*)(tmp +
+ (i * 2) * NB_ELTS_V8 + 4 * 1);
+ xmm_Si_0 = _mm_sub_epi32(xmm_Si_0,
+ _mm_srai_epi32(_mm_add_epi32(xmm_Di_0, xmm_Dim1_0), 1));
+ xmm_Si_1 = _mm_sub_epi32(xmm_Si_1,
+ _mm_srai_epi32(_mm_add_epi32(xmm_Di_1, xmm_Dim1_1), 1));
+ *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0;
+ *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1;
+ xmm_Dim1_0 = xmm_Di_0;
+ xmm_Dim1_1 = xmm_Di_1;
+ }
+ }
+ if (((height) % 2) == 1) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Sc(i) -= OPJ_Dc(i - 1);
+ }
+ }
+ i = 0;
+ if (i + 1 < dn) {
+ __m128i xmm_Si_0 = *((const __m128i*)(tmp + 4 * 0));
+ __m128i xmm_Si_1 = *((const __m128i*)(tmp + 4 * 1));
+ const __m128i xmm_two = _mm_set1_epi32(2);
+ for (; i + 1 < dn; i++) {
+ __m128i xmm_Sip1_0 = *(const __m128i*)(tmp +
+ (i + 1) * 2 * NB_ELTS_V8 + 4 * 0);
+ __m128i xmm_Sip1_1 = *(const __m128i*)(tmp +
+ (i + 1) * 2 * NB_ELTS_V8 + 4 * 1);
+ __m128i xmm_Di_0 = *(const __m128i*)(tmp +
+ (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
+ __m128i xmm_Di_1 = *(const __m128i*)(tmp +
+ (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
+ xmm_Di_0 = _mm_add_epi32(xmm_Di_0,
+ _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), xmm_two), 2));
+ xmm_Di_1 = _mm_add_epi32(xmm_Di_1,
+ _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), xmm_two), 2));
+ *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Di_0;
+ *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Di_1;
+ xmm_Si_0 = xmm_Sip1_0;
+ xmm_Si_1 = xmm_Sip1_1;
+ }
+ }
+ if (((height) % 2) == 0) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2;
+ }
+ }
+ }
+#else
+ if (even) {
+ OPJ_UINT32 c;
+ if (height > 1) {
+ OPJ_UINT32 i;
+ for (i = 0; i + 1 < sn; i++) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Dc(i) -= (OPJ_Sc(i) + OPJ_Sc(i + 1)) >> 1;
+ }
+ }
+ if (((height) % 2) == 0) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Dc(i) -= OPJ_Sc(i);
+ }
+ }
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2;
+ }
+ for (i = 1; i < dn; i++) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i) + 2) >> 2;
+ }
+ }
+ if (((height) % 2) == 1) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2;
+ }
+ }
+ }
+ } else {
+ OPJ_UINT32 c;
+ if (height == 1) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Sc(0) *= 2;
+ }
+ } else {
+ OPJ_UINT32 i;
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Sc(0) -= OPJ_Dc(0);
+ }
+ for (i = 1; i < sn; i++) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Sc(i) -= (OPJ_Dc(i) + OPJ_Dc(i - 1)) >> 1;
+ }
+ }
+ if (((height) % 2) == 1) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Sc(i) -= OPJ_Dc(i - 1);
+ }
+ }
+ for (i = 0; i + 1 < dn; i++) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i + 1) + 2) >> 2;
+ }
+ }
+ if (((height) % 2) == 0) {
+ for (c = 0; c < NB_ELTS_V8; c++) {
+ OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2;
+ }
+ }
+ }
+ }
+#endif
+
+ if (cols == NB_ELTS_V8) {
+ opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn,
+ stride_width, even ? 0 : 1, NB_ELTS_V8);
+ } else {
+ opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn,
+ stride_width, even ? 0 : 1, cols);
+ }
}
+/* Forward 9-7 transform, for the vertical pass, processing cols columns */
+/* where cols <= NB_ELTS_V8 */
+static void opj_dwt_encode_and_deinterleave_v_real(
+ void *arrayIn,
+ void *tmpIn,
+ OPJ_UINT32 height,
+ OPJ_BOOL even,
+ OPJ_UINT32 stride_width,
+ OPJ_UINT32 cols)
+{
+ OPJ_FLOAT32* OPJ_RESTRICT array = (OPJ_FLOAT32 * OPJ_RESTRICT)arrayIn;
+ OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32 * OPJ_RESTRICT)tmpIn;
+ OPJ_UINT32 c;
+ const OPJ_INT32 sn = (OPJ_INT32)((height + (even ? 1 : 0)) >> 1);
+ const OPJ_INT32 dn = (OPJ_INT32)(height - (OPJ_UINT32)sn);
+ for (c = 0; c < cols; c++) {
+ OPJ_UINT32 k;
+ for (k = 0; k < height; ++k) {
+ tmp[k] = array[c + k * stride_width];
+ }
+
+ opj_dwt_encode_1_real(tmp, dn, sn, even ? 0 : 1);
+
+ opj_dwt_deinterleave_v((OPJ_INT32*)tmpIn,
+ ((OPJ_INT32*)(arrayIn)) + c,
+ dn, sn, stride_width, even ? 0 : 1);
+ }
+}
+
+
/* <summary> */
/* Forward 5-3 wavelet transform in 2-D. */
/* </summary> */
static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
opj_tcd_tilecomp_t * tilec,
- opj_encode_one_row_fnptr_type p_function)
+ opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v,
+ opj_encode_and_deinterleave_h_one_row_fnptr_type
+ p_encode_and_deinterleave_h_one_row)
{
OPJ_INT32 i;
OPJ_INT32 *bj = 00;
l_data_size = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions);
/* overflow check */
- if (l_data_size > (SIZE_MAX / sizeof(OPJ_INT32))) {
+ if (l_data_size > (SIZE_MAX / (NB_ELTS_V8 * sizeof(OPJ_INT32)))) {
/* FIXME event manager error callback */
return OPJ_FALSE;
}
- l_data_size *= sizeof(OPJ_INT32);
+ l_data_size *= NB_ELTS_V8 * sizeof(OPJ_INT32);
bj = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size);
/* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */
/* in that case, so do not error out */
dn = (OPJ_INT32)(rh - rh1);
/* Perform vertical pass */
- if (num_threads <= 1 || rw <= 1) {
- for (j = 0; j < rw; ++j) {
- OPJ_INT32* OPJ_RESTRICT aj = tiledp + j;
- OPJ_UINT32 k;
- for (k = 0; k < rh; ++k) {
- bj[k] = aj[k * w];
- }
-
- (*p_function)(bj, dn, sn, cas_col);
-
- opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col);
+ if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) {
+ for (j = 0; j + NB_ELTS_V8 - 1 < rw; j += NB_ELTS_V8) {
+ p_encode_and_deinterleave_v(tiledp + j,
+ bj,
+ rh,
+ cas_col == 0,
+ w,
+ NB_ELTS_V8);
+ }
+ if (j < rw) {
+ p_encode_and_deinterleave_v(tiledp + j,
+ bj,
+ rh,
+ cas_col == 0,
+ w,
+ rw - j);
}
} else {
OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
if (rw < num_jobs) {
num_jobs = rw;
}
- step_j = (rw / num_jobs);
+ step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8;
for (j = 0; j < num_jobs; j++) {
opj_dwt_encode_v_job_t* job;
job->w = w;
job->tiledp = tiledp;
job->min_j = j * step_j;
- job->max_j = (j + 1U) * step_j; /* this can overflow */
- if (j == (num_jobs - 1U)) { /* this will take care of the overflow */
- job->max_j = rw;
- }
- job->p_function = p_function;
+ job->max_j = (j + 1 == num_jobs) ? rw : (j + 1) * step_j;
+ job->p_encode_and_deinterleave_v = p_encode_and_deinterleave_v;
opj_thread_pool_submit_job(tp, opj_dwt_encode_v_func, job);
}
opj_thread_pool_wait_completion(tp, 0);
if (num_threads <= 1 || rh <= 1) {
for (j = 0; j < rh; j++) {
OPJ_INT32* OPJ_RESTRICT aj = tiledp + j * w;
- OPJ_UINT32 k;
- for (k = 0; k < rw; k++) {
- bj[k] = aj[k];
- }
- (*p_function)(bj, dn, sn, cas_row);
- opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row);
+ (*p_encode_and_deinterleave_h_one_row)(aj, bj, rw,
+ cas_row == 0 ? OPJ_TRUE : OPJ_FALSE);
}
} else {
OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
if (j == (num_jobs - 1U)) { /* this will take care of the overflow */
job->max_j = rh;
}
- job->p_function = p_function;
+ job->p_function = p_encode_and_deinterleave_h_one_row;
opj_thread_pool_submit_job(tp, opj_dwt_encode_h_func, job);
}
opj_thread_pool_wait_completion(tp, 0);
OPJ_BOOL opj_dwt_encode(opj_tcd_t *p_tcd,
opj_tcd_tilecomp_t * tilec)
{
- return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec, opj_dwt_encode_1);
+ return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec,
+ opj_dwt_encode_and_deinterleave_v,
+ opj_dwt_encode_and_deinterleave_h_one_row);
}
/* <summary> */
opj_tcd_tilecomp_t * tilec)
{
return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec,
- opj_dwt_encode_1_real);
+ opj_dwt_encode_and_deinterleave_v_real,
+ opj_dwt_encode_and_deinterleave_h_one_row_real);
}
/* <summary> */
#endif
}
+typedef struct {
+ opj_v8dwt_t h;
+ OPJ_UINT32 rw;
+ OPJ_UINT32 w;
+ OPJ_FLOAT32 * OPJ_RESTRICT aj;
+ OPJ_UINT32 nb_rows;
+} opj_dwt97_decode_h_job_t;
+
+static void opj_dwt97_decode_h_func(void* user_data, opj_tls_t* tls)
+{
+ OPJ_UINT32 j;
+ opj_dwt97_decode_h_job_t* job;
+ OPJ_FLOAT32 * OPJ_RESTRICT aj;
+ OPJ_UINT32 w;
+ (void)tls;
+
+ job = (opj_dwt97_decode_h_job_t*)user_data;
+ w = job->w;
+
+ assert((job->nb_rows % NB_ELTS_V8) == 0);
+
+ aj = job->aj;
+ for (j = 0; j + NB_ELTS_V8 <= job->nb_rows; j += NB_ELTS_V8) {
+ OPJ_UINT32 k;
+ opj_v8dwt_interleave_h(&job->h, aj, job->w, NB_ELTS_V8);
+ opj_v8dwt_decode(&job->h);
+
+ /* To be adapted if NB_ELTS_V8 changes */
+ for (k = 0; k < job->rw; k++) {
+ aj[k ] = job->h.wavelet[k].f[0];
+ aj[k + (OPJ_SIZE_T)w ] = job->h.wavelet[k].f[1];
+ aj[k + (OPJ_SIZE_T)w * 2] = job->h.wavelet[k].f[2];
+ aj[k + (OPJ_SIZE_T)w * 3] = job->h.wavelet[k].f[3];
+ }
+ for (k = 0; k < job->rw; k++) {
+ aj[k + (OPJ_SIZE_T)w * 4] = job->h.wavelet[k].f[4];
+ aj[k + (OPJ_SIZE_T)w * 5] = job->h.wavelet[k].f[5];
+ aj[k + (OPJ_SIZE_T)w * 6] = job->h.wavelet[k].f[6];
+ aj[k + (OPJ_SIZE_T)w * 7] = job->h.wavelet[k].f[7];
+ }
+
+ aj += w * NB_ELTS_V8;
+ }
+
+ opj_aligned_free(job->h.wavelet);
+ opj_free(job);
+}
+
+
+typedef struct {
+ opj_v8dwt_t v;
+ OPJ_UINT32 rh;
+ OPJ_UINT32 w;
+ OPJ_FLOAT32 * OPJ_RESTRICT aj;
+ OPJ_UINT32 nb_columns;
+} opj_dwt97_decode_v_job_t;
+
+static void opj_dwt97_decode_v_func(void* user_data, opj_tls_t* tls)
+{
+ OPJ_UINT32 j;
+ opj_dwt97_decode_v_job_t* job;
+ OPJ_FLOAT32 * OPJ_RESTRICT aj;
+ (void)tls;
+
+ job = (opj_dwt97_decode_v_job_t*)user_data;
+
+ assert((job->nb_columns % NB_ELTS_V8) == 0);
+
+ aj = job->aj;
+ for (j = 0; j + NB_ELTS_V8 <= job->nb_columns; j += NB_ELTS_V8) {
+ OPJ_UINT32 k;
+
+ opj_v8dwt_interleave_v(&job->v, aj, job->w, NB_ELTS_V8);
+ opj_v8dwt_decode(&job->v);
+
+ for (k = 0; k < job->rh; ++k) {
+ memcpy(&aj[k * (OPJ_SIZE_T)job->w], &job->v.wavelet[k],
+ NB_ELTS_V8 * sizeof(OPJ_FLOAT32));
+ }
+ aj += NB_ELTS_V8;
+ }
+
+ opj_aligned_free(job->v.wavelet);
+ opj_free(job);
+}
+
/* <summary> */
/* Inverse 9-7 wavelet transform in 2-D. */
/* </summary> */
static
-OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+OPJ_BOOL opj_dwt_decode_tile_97(opj_thread_pool_t* tp,
+ opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
OPJ_UINT32 numres)
{
opj_v8dwt_t h;
tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
OPJ_SIZE_T l_data_size;
+ const int num_threads = opj_thread_pool_get_thread_count(tp);
- l_data_size = opj_dwt_max_resolution(res, numres);
- /* overflow check */
- if (l_data_size > (SIZE_MAX - 5U)) {
- /* FIXME event manager error callback */
- return OPJ_FALSE;
+ if (numres == 1) {
+ return OPJ_TRUE;
}
- l_data_size += 5U;
+
+ l_data_size = opj_dwt_max_resolution(res, numres);
/* overflow check */
if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) {
/* FIXME event manager error callback */
h.win_l_x1 = (OPJ_UINT32)h.sn;
h.win_h_x0 = 0;
h.win_h_x1 = (OPJ_UINT32)h.dn;
- for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) {
- OPJ_UINT32 k;
- opj_v8dwt_interleave_h(&h, aj, w, rh - j);
- opj_v8dwt_decode(&h);
- /* To be adapted if NB_ELTS_V8 changes */
- for (k = 0; k < rw; k++) {
- aj[k ] = h.wavelet[k].f[0];
- aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1];
- aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2];
- aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3];
+ if (num_threads <= 1 || rh < 2 * NB_ELTS_V8) {
+ for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) {
+ OPJ_UINT32 k;
+ opj_v8dwt_interleave_h(&h, aj, w, NB_ELTS_V8);
+ opj_v8dwt_decode(&h);
+
+ /* To be adapted if NB_ELTS_V8 changes */
+ for (k = 0; k < rw; k++) {
+ aj[k ] = h.wavelet[k].f[0];
+ aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1];
+ aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2];
+ aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3];
+ }
+ for (k = 0; k < rw; k++) {
+ aj[k + (OPJ_SIZE_T)w * 4] = h.wavelet[k].f[4];
+ aj[k + (OPJ_SIZE_T)w * 5] = h.wavelet[k].f[5];
+ aj[k + (OPJ_SIZE_T)w * 6] = h.wavelet[k].f[6];
+ aj[k + (OPJ_SIZE_T)w * 7] = h.wavelet[k].f[7];
+ }
+
+ aj += w * NB_ELTS_V8;
}
- for (k = 0; k < rw; k++) {
- aj[k + (OPJ_SIZE_T)w * 4] = h.wavelet[k].f[4];
- aj[k + (OPJ_SIZE_T)w * 5] = h.wavelet[k].f[5];
- aj[k + (OPJ_SIZE_T)w * 6] = h.wavelet[k].f[6];
- aj[k + (OPJ_SIZE_T)w * 7] = h.wavelet[k].f[7];
+ } else {
+ OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
+ OPJ_UINT32 step_j;
+
+ if ((rh / NB_ELTS_V8) < num_jobs) {
+ num_jobs = rh / NB_ELTS_V8;
}
+ step_j = ((rh / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8;
+ for (j = 0; j < num_jobs; j++) {
+ opj_dwt97_decode_h_job_t* job;
- aj += w * NB_ELTS_V8;
+ job = (opj_dwt97_decode_h_job_t*) opj_malloc(sizeof(opj_dwt97_decode_h_job_t));
+ if (!job) {
+ opj_thread_pool_wait_completion(tp, 0);
+ opj_aligned_free(h.wavelet);
+ return OPJ_FALSE;
+ }
+ job->h.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t));
+ if (!job->h.wavelet) {
+ opj_thread_pool_wait_completion(tp, 0);
+ opj_free(job);
+ opj_aligned_free(h.wavelet);
+ return OPJ_FALSE;
+ }
+ job->h.dn = h.dn;
+ job->h.sn = h.sn;
+ job->h.cas = h.cas;
+ job->h.win_l_x0 = h.win_l_x0;
+ job->h.win_l_x1 = h.win_l_x1;
+ job->h.win_h_x0 = h.win_h_x0;
+ job->h.win_h_x1 = h.win_h_x1;
+ job->rw = rw;
+ job->w = w;
+ job->aj = aj;
+ job->nb_rows = (j + 1 == num_jobs) ? (rh & (OPJ_UINT32)~
+ (NB_ELTS_V8 - 1)) - j * step_j : step_j;
+ aj += w * job->nb_rows;
+ opj_thread_pool_submit_job(tp, opj_dwt97_decode_h_func, job);
+ }
+ opj_thread_pool_wait_completion(tp, 0);
+ j = rh & (OPJ_UINT32)~(NB_ELTS_V8 - 1);
}
if (j < rh) {
v.win_h_x1 = (OPJ_UINT32)v.dn;
aj = (OPJ_FLOAT32*) tilec->data;
- for (j = rw; j > (NB_ELTS_V8 - 1); j -= NB_ELTS_V8) {
- OPJ_UINT32 k;
+ if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) {
+ for (j = rw; j > (NB_ELTS_V8 - 1); j -= NB_ELTS_V8) {
+ OPJ_UINT32 k;
- opj_v8dwt_interleave_v(&v, aj, w, NB_ELTS_V8);
- opj_v8dwt_decode(&v);
+ opj_v8dwt_interleave_v(&v, aj, w, NB_ELTS_V8);
+ opj_v8dwt_decode(&v);
- for (k = 0; k < rh; ++k) {
- memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], NB_ELTS_V8 * sizeof(OPJ_FLOAT32));
+ for (k = 0; k < rh; ++k) {
+ memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], NB_ELTS_V8 * sizeof(OPJ_FLOAT32));
+ }
+ aj += NB_ELTS_V8;
+ }
+ } else {
+ /* "bench_dwt -I" shows that scaling is poor, likely due to RAM
+ transfer being the limiting factor. So limit the number of
+ threads.
+ */
+ OPJ_UINT32 num_jobs = opj_uint_max((OPJ_UINT32)num_threads / 2, 2U);
+ OPJ_UINT32 step_j;
+
+ if ((rw / NB_ELTS_V8) < num_jobs) {
+ num_jobs = rw / NB_ELTS_V8;
+ }
+ step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8;
+ for (j = 0; j < num_jobs; j++) {
+ opj_dwt97_decode_v_job_t* job;
+
+ job = (opj_dwt97_decode_v_job_t*) opj_malloc(sizeof(opj_dwt97_decode_v_job_t));
+ if (!job) {
+ opj_thread_pool_wait_completion(tp, 0);
+ opj_aligned_free(h.wavelet);
+ return OPJ_FALSE;
+ }
+ job->v.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t));
+ if (!job->v.wavelet) {
+ opj_thread_pool_wait_completion(tp, 0);
+ opj_free(job);
+ opj_aligned_free(h.wavelet);
+ return OPJ_FALSE;
+ }
+ job->v.dn = v.dn;
+ job->v.sn = v.sn;
+ job->v.cas = v.cas;
+ job->v.win_l_x0 = v.win_l_x0;
+ job->v.win_l_x1 = v.win_l_x1;
+ job->v.win_h_x0 = v.win_h_x0;
+ job->v.win_h_x1 = v.win_h_x1;
+ job->rh = rh;
+ job->w = w;
+ job->aj = aj;
+ job->nb_columns = (j + 1 == num_jobs) ? (rw & (OPJ_UINT32)~
+ (NB_ELTS_V8 - 1)) - j * step_j : step_j;
+ aj += job->nb_columns;
+ opj_thread_pool_submit_job(tp, opj_dwt97_decode_v_func, job);
}
- aj += NB_ELTS_V8;
+ opj_thread_pool_wait_completion(tp, 0);
}
if (rw & (NB_ELTS_V8 - 1)) {
l_data_size = opj_dwt_max_resolution(tr, numres);
/* overflow check */
- if (l_data_size > (SIZE_MAX - 5U)) {
- /* FIXME event manager error callback */
- opj_sparse_array_int32_free(sa);
- return OPJ_FALSE;
- }
- l_data_size += 5U;
- /* overflow check */
if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) {
/* FIXME event manager error callback */
opj_sparse_array_int32_free(sa);
OPJ_UINT32 numres)
{
if (p_tcd->whole_tile_decoding) {
- return opj_dwt_decode_tile_97(tilec, numres);
+ return opj_dwt_decode_tile_97(p_tcd->thread_pool, tilec, numres);
} else {
return opj_dwt_decode_partial_97(tilec, numres);
}