summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorEven Rouault <even.rouault@spatialys.com>2016-05-23 13:45:15 +0200
committerEven Rouault <even.rouault@spatialys.com>2016-05-23 13:45:15 +0200
commit107eb31531ca688e2799406e69e9383efc13448f (patch)
treeea630b3364734ee4d1bf9652897aa97d4309d335 /src
parent8371491a9968a31ce16d6ce37b775ef3c7d090c8 (diff)
Improve perf of opj_t1_dec_sigpass_mqc_vsc() and opj_t1_dec_refpass_mqc_vsc() with loop unrolling
Diffstat (limited to 'src')
-rw-r--r--src/lib/openjp2/t1.c92
1 files changed, 67 insertions, 25 deletions
diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c
index 0023ad74..277261d7 100644
--- a/src/lib/openjp2/t1.c
+++ b/src/lib/openjp2/t1.c
@@ -660,27 +660,48 @@ static void opj_t1_dec_sigpass_mqc_vsc(
{
OPJ_INT32 one, half, oneplushalf, vsc;
OPJ_UINT32 i, j, k;
- opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1];
+ OPJ_INT32 *data1 = t1->data;
+ opj_flag_t *flags1 = &t1->flags[1];
+ opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1];
one = 1 << bpno;
half = one >> 1;
oneplushalf = one | half;
- for (k = 0; k < t1->h; k += 4) {
+ for (k = 0; k < (t1->h & ~3); k += 4) {
for (i = 0; i < t1->w; ++i) {
+ OPJ_INT32 *data2 = data1 + i;
+ opj_flag_t *flags2 = flags1 + i;
opj_colflag_t *colflags2 = colflags1 + i;
- for (j = k; j < k + 4 && j < t1->h; ++j) {
- vsc = (j == k + 3 || j == t1->h - 1) ? 1 : 0;
- opj_t1_dec_sigpass_step_mqc_vsc(
- t1,
- &t1->flags[((j+1) * t1->flags_stride) + i + 1],
- colflags2,
- &t1->data[(j * t1->w) + i],
- oneplushalf,
- vsc,
- j - k);
- }
+ flags2 += t1->flags_stride;
+ opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 0);
+ data2 += t1->w;
+ flags2 += t1->flags_stride;
+ opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 1);
+ data2 += t1->w;
+ flags2 += t1->flags_stride;
+ opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0, 2);
+ data2 += t1->w;
+ flags2 += t1->flags_stride;
+ opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 1, 3);
+ data2 += t1->w;
}
+ data1 += t1->w << 2;
+ flags1 += t1->flags_stride << 2;
colflags1 += t1->flags_stride;
}
+ for (i = 0; i < t1->w; ++i) {
+ opj_colflag_t *colflags2 = colflags1 + i;
+ for (j = k; j < t1->h; ++j) {
+ vsc = (j == t1->h - 1) ? 1 : 0;
+ opj_t1_dec_sigpass_step_mqc_vsc(
+ t1,
+ &t1->flags[((j+1) * t1->flags_stride) + i + 1],
+ colflags2,
+ &t1->data[(j * t1->w) + i],
+ oneplushalf,
+ vsc,
+ j - k);
+ }
+ }
} /* VSC and BYPASS by Antonin */
@@ -921,27 +942,48 @@ static void opj_t1_dec_refpass_mqc_vsc(
OPJ_INT32 one, poshalf, neghalf;
OPJ_UINT32 i, j, k;
OPJ_INT32 vsc;
+ OPJ_INT32 *data1 = t1->data;
+ opj_flag_t *flags1 = &t1->flags[1];
opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1];
one = 1 << bpno;
poshalf = one >> 1;
neghalf = bpno > 0 ? -poshalf : -1;
- for (k = 0; k < t1->h; k += 4) {
+ for (k = 0; k < (t1->h & ~3); k += 4) {
for (i = 0; i < t1->w; ++i) {
+ OPJ_INT32 *data2 = data1 + i;
+ opj_flag_t *flags2 = flags1 + i;
opj_colflag_t *colflags2 = colflags1 + i;
- for (j = k; j < k + 4 && j < t1->h; ++j) {
- vsc = ((j == k + 3 || j == t1->h - 1)) ? 1 : 0;
- opj_t1_dec_refpass_step_mqc_vsc(
- t1,
- &t1->flags[((j+1) * t1->flags_stride) + i + 1],
- colflags2,
- &t1->data[(j * t1->w) + i],
- poshalf,
- neghalf,
- vsc, j - k);
- }
+ flags2 += t1->flags_stride;
+ opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 0);
+ data2 += t1->w;
+ flags2 += t1->flags_stride;
+ opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 1);
+ data2 += t1->w;
+ flags2 += t1->flags_stride;
+ opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 0, 2);
+ data2 += t1->w;
+ flags2 += t1->flags_stride;
+ opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf, 1, 3);
+ data2 += t1->w;
}
+ data1 += t1->w << 2;
+ flags1 += t1->flags_stride << 2;
colflags1 += t1->flags_stride;
}
+ for (i = 0; i < t1->w; ++i) {
+ opj_colflag_t *colflags2 = colflags1 + i;
+ for (j = k; j < t1->h; ++j) {
+ vsc = (j == t1->h - 1) ? 1 : 0;
+ opj_t1_dec_refpass_step_mqc_vsc(
+ t1,
+ &t1->flags[((j+1) * t1->flags_stride) + i + 1],
+ colflags2,
+ &t1->data[(j * t1->w) + i],
+ poshalf, neghalf,
+ vsc,
+ j - k);
+ }
+ }
} /* VSC and BYPASS by Antonin */