summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorEven Rouault <even.rouault@spatialys.com>2017-09-01 21:17:26 +0200
committerEven Rouault <even.rouault@spatialys.com>2017-09-01 22:23:29 +0200
commit4c7effa6bc37beb2a8e2f29ecf5845cde93f6b88 (patch)
treebedc0dc6b1670c5fc06af8540ffb9a3e4ff1a87c /src
parent2c365fe0ecc8c6597db491a953a91308b1d0d4b1 (diff)
opj_t1_clbl_decode_processor(): use SSE2 in subtile decoding code path, for irreversible
Diffstat (limited to 'src')
-rw-r--r--src/lib/openjp2/t1.c36
1 files changed, 35 insertions, 1 deletions
diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c
index 54fb814a..0cc6f250 100644
--- a/src/lib/openjp2/t1.c
+++ b/src/lib/openjp2/t1.c
@@ -38,7 +38,20 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#define OPJ_SKIP_POISON
#include "opj_includes.h"
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#if defined(__GNUC__)
+#pragma GCC poison malloc calloc realloc free
+#endif
+
#include "t1_luts.h"
/** @defgroup T1 T1 - Implementation of the tier-1 coding */
@@ -1710,7 +1723,28 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
datap[i] /= 2;
}
} else { /* if (tccp->qmfbid == 0) */
- for (i = 0; i < cblk_size; ++i) {
+ i = 0;
+#ifdef __SSE2__
+ {
+ const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize);
+ for (; i < (cblk_size & ~15U); i += 16) {
+ __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+ datap + 0)));
+ __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+ datap + 4)));
+ __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+ datap + 8)));
+ __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+ datap + 12)));
+ _mm_store_ps((float*)(datap + 0), _mm_mul_ps(xmm0_data, xmm_stepsize));
+ _mm_store_ps((float*)(datap + 4), _mm_mul_ps(xmm1_data, xmm_stepsize));
+ _mm_store_ps((float*)(datap + 8), _mm_mul_ps(xmm2_data, xmm_stepsize));
+ _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
+ datap += 16;
+ }
+ }
+#endif
+ for (; i < cblk_size; ++i) {
OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
memcpy(datap, &tmp, sizeof(tmp));
datap++;