summaryrefslogtreecommitdiff
path: root/src/lib/openjp2/sparse_array.c
diff options
context:
space:
mode:
authorEven Rouault <even.rouault@spatialys.com>2017-09-01 16:31:00 +0200
committerEven Rouault <even.rouault@spatialys.com>2017-09-01 16:31:00 +0200
commit873004c615b1bed3ce780e869288602af86fdee5 (patch)
treee99b1c7a83b2b2bddef8b987a4fc1f2290968e48 /src/lib/openjp2/sparse_array.c
parentccac773556070ede24ea3dfbdec47c2b3c5be5c4 (diff)
Sub-tile decoding: speed up vertical pass in IDWT5x3 by processing 4 cols at a time
Diffstat (limited to 'src/lib/openjp2/sparse_array.c')
-rw-r--r--src/lib/openjp2/sparse_array.c47
1 files changed, 39 insertions, 8 deletions
diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c
index b0634f67..48c4b23b 100644
--- a/src/lib/openjp2/sparse_array.c
+++ b/src/lib/openjp2/sparse_array.c
@@ -165,10 +165,20 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
if (buf_col_stride == 1) {
OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
(x - x0) * buf_col_stride;
- for (j = 0; j < y_incr; j++) {
- memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
- dest_ptr += buf_line_stride;
- src_ptr += block_width;
+ if (x_incr == 4) {
+ // Same code as general branch, but the compiler
+ // can have an efficient memcpy()
+ for (j = 0; j < y_incr; j++) {
+ memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+ dest_ptr += buf_line_stride;
+ src_ptr += block_width;
+ }
+ } else {
+ for (j = 0; j < y_incr; j++) {
+ memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+ dest_ptr += buf_line_stride;
+ src_ptr += block_width;
+ }
}
} else {
OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
@@ -179,6 +189,17 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
dest_ptr += buf_line_stride;
src_ptr += block_width;
}
+ } else if (y_incr == 1 && buf_col_stride == 2) {
+ OPJ_UINT32 k;
+ for (k = 0; k < (x_incr & ~3U); k += 4) {
+ dest_ptr[k * buf_col_stride] = src_ptr[k];
+ dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1];
+ dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2];
+ dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3];
+ }
+ for (; k < x_incr; k++) {
+ dest_ptr[k * buf_col_stride] = src_ptr[k];
+ }
} else {
/* General case */
for (j = 0; j < y_incr; j++) {
@@ -207,10 +228,20 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
(size_t)block_width + block_x_offset;
const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
(size_t)buf_line_stride + (x - x0) * buf_col_stride;
- for (j = 0; j < y_incr; j++) {
- memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
- dest_ptr += block_width;
- src_ptr += buf_line_stride;
+ if (x_incr == 4) {
+ // Same code as general branch, but the compiler
+ // can have an efficient memcpy()
+ for (j = 0; j < y_incr; j++) {
+ memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+ dest_ptr += block_width;
+ src_ptr += buf_line_stride;
+ }
+ } else {
+ for (j = 0; j < y_incr; j++) {
+ memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+ dest_ptr += block_width;
+ src_ptr += buf_line_stride;
+ }
}
} else {
OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *