diff options
| author | Even Rouault <even.rouault@spatialys.com> | 2017-09-01 16:31:00 +0200 |
|---|---|---|
| committer | Even Rouault <even.rouault@spatialys.com> | 2017-09-01 16:31:00 +0200 |
| commit | 873004c615b1bed3ce780e869288602af86fdee5 (patch) | |
| tree | e99b1c7a83b2b2bddef8b987a4fc1f2290968e48 /src/lib/openjp2/sparse_array.c | |
| parent | ccac773556070ede24ea3dfbdec47c2b3c5be5c4 (diff) | |
Sub-tile decoding: speed up vertical pass in IDWT5x3 by processing 4 cols at a time
Diffstat (limited to 'src/lib/openjp2/sparse_array.c')
| -rw-r--r-- | src/lib/openjp2/sparse_array.c | 47 |
1 files changed, 39 insertions, 8 deletions
diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index b0634f67..48c4b23b 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -165,10 +165,20 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( if (buf_col_stride == 1) { OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + (x - x0) * buf_col_stride; - for (j = 0; j < y_incr; j++) { - memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); - dest_ptr += buf_line_stride; - src_ptr += block_width; + if (x_incr == 4) { + // Same code as general branch, but the compiler + // can have an efficient memcpy() + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + src_ptr += block_width; + } + } else { + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + src_ptr += block_width; + } } } else { OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + @@ -179,6 +189,17 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( dest_ptr += buf_line_stride; src_ptr += block_width; } + } else if (y_incr == 1 && buf_col_stride == 2) { + OPJ_UINT32 k; + for (k = 0; k < (x_incr & ~3U); k += 4) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1]; + dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2]; + dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3]; + } + for (; k < x_incr; k++) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + } } else { /* General case */ for (j = 0; j < y_incr; j++) { @@ -207,10 +228,20 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( (size_t)block_width + block_x_offset; const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * (size_t)buf_line_stride + (x - x0) * buf_col_stride; - for (j = 0; j < y_incr; j++) { - memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); - dest_ptr += block_width; - src_ptr += buf_line_stride; + if (x_incr == 4) { + // Same code as general branch, but the compiler + // can have an efficient memcpy() + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += block_width; + src_ptr += buf_line_stride; + } + } else { + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += block_width; + src_ptr += buf_line_stride; + } } } else { OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * |
