summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authorEven Rouault <even.rouault@spatialys.com>2017-09-01 22:09:58 +0200
committerEven Rouault <even.rouault@spatialys.com>2017-09-01 22:23:29 +0200
commitc1e0fba0c46cb528a08b89b986e86ff0f4792558 (patch)
tree8984bb53ab56de225d3420fdcb83cbf59e08e427 /src/lib
parent676d4c807f0162877f9f32e4cdda2e242788d410 (diff)
opj_v4dwt_decode_step1_sse(): rework a bit to improve code generation
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/openjp2/dwt.c21
1 files changed, 13 insertions, 8 deletions
diff --git a/src/lib/openjp2/dwt.c b/src/lib/openjp2/dwt.c
index 71597f81..719c7330 100644
--- a/src/lib/openjp2/dwt.c
+++ b/src/lib/openjp2/dwt.c
@@ -2274,14 +2274,19 @@ static void opj_v4dwt_decode_step1_sse(opj_v4_t* w,
__m128* OPJ_RESTRICT vw = (__m128*) w;
OPJ_UINT32 i;
/* 4x unrolled loop */
- for (i = start; i + 3 < end; i += 4) {
- vw[2 * i] = _mm_mul_ps(vw[2 * i], c);
- vw[2 * i + 2] = _mm_mul_ps(vw[2 * i + 2], c);
- vw[2 * i + 4] = _mm_mul_ps(vw[2 * i + 4], c);
- vw[2 * i + 6] = _mm_mul_ps(vw[2 * i + 6], c);
- }
- for (; i < end; ++i) {
- vw[2 * i] = _mm_mul_ps(vw[2 * i], c);
+ vw += 2 * start;
+ for (i = start; i + 3 < end; i += 4, vw += 8) {
+ __m128 xmm0 = _mm_mul_ps(vw[0], c);
+ __m128 xmm2 = _mm_mul_ps(vw[2], c);
+ __m128 xmm4 = _mm_mul_ps(vw[4], c);
+ __m128 xmm6 = _mm_mul_ps(vw[6], c);
+ vw[0] = xmm0;
+ vw[2] = xmm2;
+ vw[4] = xmm4;
+ vw[6] = xmm6;
+ }
+ for (; i < end; ++i, vw += 2) {
+ vw[0] = _mm_mul_ps(vw[0], c);
}
}