if (buf_col_stride == 1) {
OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
(x - x0) * buf_col_stride;
- for (j = 0; j < y_incr; j++) {
- memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
- dest_ptr += buf_line_stride;
- src_ptr += block_width;
+ if (x_incr == 4) {
+ // Same code as general branch, but the compiler
+ // can have an efficient memcpy()
+ for (j = 0; j < y_incr; j++) {
+ memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+ dest_ptr += buf_line_stride;
+ src_ptr += block_width;
+ }
+ } else {
+ for (j = 0; j < y_incr; j++) {
+ memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+ dest_ptr += buf_line_stride;
+ src_ptr += block_width;
+ }
}
} else {
OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
dest_ptr += buf_line_stride;
src_ptr += block_width;
}
+ } else if (y_incr == 1 && buf_col_stride == 2) {
+ OPJ_UINT32 k;
+ for (k = 0; k < (x_incr & ~3U); k += 4) {
+ dest_ptr[k * buf_col_stride] = src_ptr[k];
+ dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1];
+ dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2];
+ dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3];
+ }
+ for (; k < x_incr; k++) {
+ dest_ptr[k * buf_col_stride] = src_ptr[k];
+ }
} else {
/* General case */
for (j = 0; j < y_incr; j++) {
(size_t)block_width + block_x_offset;
const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
(size_t)buf_line_stride + (x - x0) * buf_col_stride;
- for (j = 0; j < y_incr; j++) {
- memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
- dest_ptr += block_width;
- src_ptr += buf_line_stride;
+ if (x_incr == 4) {
+ // Same code as general branch, but the compiler
+ // can have an efficient memcpy()
+ for (j = 0; j < y_incr; j++) {
+ memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+ dest_ptr += block_width;
+ src_ptr += buf_line_stride;
+ }
+ } else {
+ for (j = 0; j < y_incr; j++) {
+ memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+ dest_ptr += block_width;
+ src_ptr += buf_line_stride;
+ }
}
} else {
OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *