summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authorEven Rouault <even.rouault@spatialys.com>2017-05-31 17:15:27 +0200
committerEven Rouault <even.rouault@spatialys.com>2017-06-02 18:20:58 +0200
commitaa7a8a4398b3792b4a52fb443d49d317a9a4984f (patch)
treee54adf4aceb28cc1037771f9ee9b4b696295b5f9 /src/lib
parent68557ff5036e68112bcbf194d2a7f48b1f58b752 (diff)
T1: loop unrolling in dec_sigpass_raw and dec_refpass_raw
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/openjp2/t1.c136
1 files changed, 119 insertions, 17 deletions
diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c
index bfff8503..44855f22 100644
--- a/src/lib/openjp2/t1.c
+++ b/src/lib/openjp2/t1.c
@@ -630,28 +630,91 @@ static void opj_t1_dec_sigpass_raw(
OPJ_INT32 bpno,
OPJ_INT32 cblksty)
{
- OPJ_INT32 one, half, oneplushalf, vsc;
+ OPJ_INT32 one, half, oneplushalf;
OPJ_UINT32 i, j, k;
opj_flag_t *flagsp = &T1_FLAGS(0, 0);
one = 1 << bpno;
half = one >> 1;
oneplushalf = one | half;
- for (k = 0; k < t1->h; k += 4) {
- for (i = 0; i < t1->w; ++i) {
- opj_flag_t *flagsp2 = flagsp + i;
- for (j = k; j < k + 4 && j < t1->h; ++j) {
- vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 ||
- j == t1->h - 1)) ? 1 : 0;
+ if ((cblksty & J2K_CCP_CBLKSTY_VSC)) {
+ OPJ_INT32 *data1 = t1->data;
+ for (k = 0; k < t1->h; k += 4) {
+ for (i = 0; i < t1->w; ++i) {
+ OPJ_INT32* data2 = data1 + i;
+ opj_flag_t *flagsp2 = flagsp + i;
+ for (j = k; j < k + 4 && j < t1->h; ++j) {
+ OPJ_INT32 vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0;
+ opj_t1_dec_sigpass_step_raw(
+ t1,
+ flagsp2,
+ data2,
+ oneplushalf,
+ vsc,
+ j - k);
+ data2 += t1->w;
+ }
+ }
+ data1 += t1->w << 2;
+ flagsp += t1->flags_stride;
+ }
+ } else {
+ OPJ_INT32 *data1 = t1->data;
+ for (k = 0; k < (t1->h & ~3U); k += 4) {
+ for (i = 0; i < t1->w; ++i) {
+ OPJ_INT32* data2 = data1 + i;
+ opj_flag_t *flagsp2 = flagsp + i;
+ opj_t1_dec_sigpass_step_raw(
+ t1,
+ flagsp2,
+ data2,
+ oneplushalf,
+ 0, /* vsc */
+ 0U);
+ data2 += t1->w;
+ opj_t1_dec_sigpass_step_raw(
+ t1,
+ flagsp2,
+ data2,
+ oneplushalf,
+ 0, /* vsc */
+ 1U);
+ data2 += t1->w;
+ opj_t1_dec_sigpass_step_raw(
+ t1,
+ flagsp2,
+ data2,
+ oneplushalf,
+ 0, /* vsc */
+ 2U);
+ data2 += t1->w;
opj_t1_dec_sigpass_step_raw(
t1,
flagsp2,
- &t1->data[(j * t1->w) + i],
+ data2,
oneplushalf,
- vsc,
- j - k);
+ 0, /* vsc */
+ 3U);
+ data2 += t1->w;
+ }
+ data1 += t1->w << 2;
+ flagsp += t1->flags_stride;
+ }
+ if (k < t1->h) {
+ for (i = 0; i < t1->w; ++i) {
+ OPJ_INT32* data2 = data1 + i;
+ opj_flag_t *flagsp2 = flagsp + i;
+ for (j = 0; j < t1->h - k; ++j) {
+ opj_t1_dec_sigpass_step_raw(
+ t1,
+ flagsp2,
+ data2,
+ oneplushalf,
+ 0, /* vsc */
+ j);
+ data2 += t1->w;
+ }
}
}
- flagsp += t1->flags_stride;
}
}
@@ -798,13 +861,14 @@ static INLINE void opj_t1_dec_refpass_step_raw(
OPJ_INT32 neghalf,
OPJ_UINT32 ci)
{
- OPJ_INT32 v, t;
+ OPJ_UINT32 v;
+ OPJ_INT32 t;
opj_raw_t *raw = t1->raw; /* RAW component */
if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
(T1_SIGMA_THIS << (ci * 3U))) {
- v = (OPJ_INT32)opj_raw_decode(raw);
+ v = opj_raw_decode(raw);
t = v ? poshalf : neghalf;
*datap += *datap < 0 ? -t : t;
*flagsp |= T1_MU_THIS << (ci * 3U);
@@ -981,23 +1045,61 @@ static void opj_t1_dec_refpass_raw(
{
OPJ_INT32 one, poshalf, neghalf;
OPJ_UINT32 i, j, k;
+ OPJ_INT32 *data1 = t1->data;
opj_flag_t *flagsp1 = &T1_FLAGS(0, 0);
one = 1 << bpno;
poshalf = one >> 1;
neghalf = bpno > 0 ? -poshalf : -1;
- for (k = 0; k < t1->h; k += 4) {
+ for (k = 0; k < (t1->h & ~3U); k += 4) {
+ for (i = 0; i < t1->w; ++i) {
+ OPJ_INT32 *data2 = data1 + i;
+ opj_flag_t *flagsp2 = flagsp1 + i;
+ opj_t1_dec_refpass_step_raw(
+ t1,
+ flagsp2,
+ data2,
+ poshalf,
+ neghalf, 0U);
+ data2 += t1->w;
+ opj_t1_dec_refpass_step_raw(
+ t1,
+ flagsp2,
+ data2,
+ poshalf,
+ neghalf, 1U);
+ data2 += t1->w;
+ opj_t1_dec_refpass_step_raw(
+ t1,
+ flagsp2,
+ data2,
+ poshalf,
+ neghalf, 2U);
+ data2 += t1->w;
+ opj_t1_dec_refpass_step_raw(
+ t1,
+ flagsp2,
+ data2,
+ poshalf,
+ neghalf, 3U);
+ data2 += t1->w;
+ }
+ data1 += t1->w << 2;
+ flagsp1 += t1->flags_stride;
+ }
+ if (k < t1->h) {
for (i = 0; i < t1->w; ++i) {
+ OPJ_INT32 *data2 = data1 + i;
opj_flag_t *flagsp2 = flagsp1 + i;
- for (j = k; j < k + 4 && j < t1->h; ++j) {
+ for (j = k; j < t1->h; ++j) {
opj_t1_dec_refpass_step_raw(
t1,
flagsp2,
- &t1->data[(j * t1->w) + i],
+ data2,
poshalf,
neghalf, j - k);
+ data2 += t1->w;
}
}
- flagsp1 += t1->flags_stride;
}
}