From f9e9942330f476b66ac4a35d0ae521200878f343 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:29 +0200 Subject: Sub-tile decoding: only allocate tile component buffer of the needed dimension Instead of being the full tile size. * Use a sparse array mechanism to store code-blocks and intermediate stages of IDWT. * IDWT, DC level shift and MCT stages are done just on that smaller array. * Improve copy of tile component array to final image, by saving an intermediate buffer. * For full-tile decoding at reduced resolution, only allocate the tile buffer to the reduced size, instead of the full-resolution size. --- src/lib/openjp2/sparse_array.h | 141 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 src/lib/openjp2/sparse_array.h (limited to 'src/lib/openjp2/sparse_array.h') diff --git a/src/lib/openjp2/sparse_array.h b/src/lib/openjp2/sparse_array.h new file mode 100644 index 00000000..485cafea --- /dev/null +++ b/src/lib/openjp2/sparse_array.h @@ -0,0 +1,141 @@ +/* + * The copyright in this software is being made available under the 2-clauses + * BSD License, included below. This software may be subject to other third + * party and contributor rights, including patent rights, and no such rights + * are granted under this license. + * + * Copyright (c) 2017, IntoPix SA + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opj_includes.h" + +#ifndef OPJ_SPARSE_ARRAY_H +#define OPJ_SPARSE_ARRAY_H +/** +@file sparse_array.h +@brief Sparse array management + +The functions in this file manage sparse arrays. Sparse arrays are arrays with +potential big dimensions, but with very few samples actually set. Such sparse +arrays require allocating a low amount of memory, by just allocating memory +for blocks of the array that are set. The minimum memory allocation unit is a +a block. There is a trade-off to pick up an appropriate dimension for blocks. +If it is too big, and pixels set are far from each other, too much memory will +be used. If blocks are too small, the book-keeping costs of blocks will raise. +*/ + +/** @defgroup SPARSE_ARRAY SPARSE ARRAYS - Sparse arrays */ +/*@{*/ + +/** Opaque type for sparse arrays that contain int32 values */ +typedef struct opj_sparse_array_int32 opj_sparse_array_int32_t; + +/** Creates a new sparse array. + * @param width total width of the array. + * @param height total height of the array + * @param block_width width of a block. + * @param block_height height of a block. + * @return a new sparse array instance, or NULL in case of failure. + */ +opj_sparse_array_int32_t* opj_sparse_array_int32_create(OPJ_UINT32 width, + OPJ_UINT32 height, + OPJ_UINT32 block_width, + OPJ_UINT32 block_height); + +/** Frees a sparse array. + * @param sa sparse array instance. + */ +void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa); + +/** Returns whether region bounds are valid (non empty and within array bounds) + * @param sa sparse array instance. + * @param x0 left x coordinate of the region. + * @param y0 top x coordinate of the region. + * @param x1 right x coordinate (not included) of the region. Must be greater than x0. + * @param y1 bottom y coordinate (not included) of the region. Must be greater than y0. + * @return OPJ_TRUE or OPJ_FALSE. + */ +OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1); + +/** Read the content of a rectangular region of the sparse array into a + * user buffer. + * + * Regions not written with opj_sparse_array_int32_write() are read as 0. + * + * @param sa sparse array instance. + * @param x0 left x coordinate of the region to read in the sparse array. + * @param y0 top x coordinate of the region to read in the sparse array. + * @param x1 right x coordinate (not included) of the region to read in the sparse array. Must be greater than x0. + * @param y1 bottom y coordinate (not included) of the region to read in the sparse array. Must be greater than y0. + * @param dest user buffer to fill. Must be at least sizeof(int32) * ( (y1 - y0 - 1) * dest_line_stride + (x1 - x0 - 1) * dest_col_stride + 1) bytes large. + * @param dest_col_stride spacing (in elements, not in bytes) in x dimension between consecutive elements of the user buffer. + * @param dest_line_stride spacing (in elements, not in bytes) in y dimension between consecutive elements of the user buffer. + * @param forgiving if set to TRUE and the region is invalid, OPJ_TRUE will still be returned. + * @return OPJ_TRUE in case of success. + */ +OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1, + OPJ_INT32* dest, + OPJ_UINT32 dest_col_stride, + OPJ_UINT32 dest_line_stride, + OPJ_BOOL forgiving); + + +/** Write the content of a rectangular region into the sparse array from a + * user buffer. + * + * Blocks intersecting the region are allocated, if not already done. + * + * @param sa sparse array instance. + * @param x0 left x coordinate of the region to write into the sparse array. + * @param y0 top x coordinate of the region to write into the sparse array. + * @param x1 right x coordinate (not included) of the region to write into the sparse array. Must be greater than x0. + * @param y1 bottom y coordinate (not included) of the region to write into the sparse array. Must be greater than y0. + * @param src user buffer to fill. Must be at least sizeof(int32) * ( (y1 - y0 - 1) * src_line_stride + (x1 - x0 - 1) * src_col_stride + 1) bytes large. + * @param src_col_stride spacing (in elements, not in bytes) in x dimension between consecutive elements of the user buffer. + * @param src_line_stride spacing (in elements, not in bytes) in y dimension between consecutive elements of the user buffer. + * @param forgiving if set to TRUE and the region is invalid, OPJ_TRUE will still be returned. + * @return OPJ_TRUE in case of success. + */ +OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa, + OPJ_UINT32 x0, + OPJ_UINT32 y0, + OPJ_UINT32 x1, + OPJ_UINT32 y1, + const OPJ_INT32* src, + OPJ_UINT32 src_col_stride, + OPJ_UINT32 src_line_stride, + OPJ_BOOL forgiving); + +/*@}*/ + +#endif /* OPJ_SPARSE_ARRAY_H */ \ No newline at end of file -- cgit v1.2.3 From b2cc8f7f81242f967b65e76de043e5e31663d793 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Fri, 1 Sep 2017 16:30:50 +0200 Subject: Optimize reading/write into sparse array --- src/lib/openjp2/sparse_array.c | 126 ++++++++++++++++++++++++------------ src/lib/openjp2/sparse_array.h | 4 +- src/lib/openjp2/test_sparse_array.c | 26 +++++++- 3 files changed, 111 insertions(+), 45 deletions(-) (limited to 'src/lib/openjp2/sparse_array.h') diff --git a/src/lib/openjp2/sparse_array.c b/src/lib/openjp2/sparse_array.c index 3402dca2..b0634f67 100644 --- a/src/lib/openjp2/sparse_array.c +++ b/src/lib/openjp2/sparse_array.c @@ -91,7 +91,7 @@ void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa) } } -OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -102,7 +102,7 @@ OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, } static OPJ_BOOL opj_sparse_array_int32_read_or_write( - opj_sparse_array_int32_t* sa, + const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -115,6 +115,8 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( { OPJ_UINT32 y, block_y; OPJ_UINT32 y_incr = 0; + const OPJ_UINT32 block_width = sa->block_width; + if (!opj_sparse_array_is_region_valid(sa, x0, y0, x1, y1)) { return forgiving; } @@ -128,43 +130,64 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( sa->block_height; block_y_offset = sa->block_height - y_incr; y_incr = opj_uint_min(y_incr, y1 - y); - block_x = x0 / sa->block_width; + block_x = x0 / block_width; for (x = x0; x < x1; block_x ++, x += x_incr) { OPJ_UINT32 j; OPJ_UINT32 block_x_offset; OPJ_INT32* src_block; - x_incr = (x == x0) ? sa->block_width - (x0 % sa->block_width) : sa->block_width; - block_x_offset = sa->block_width - x_incr; + x_incr = (x == x0) ? block_width - (x0 % block_width) : block_width; + block_x_offset = block_width - x_incr; x_incr = opj_uint_min(x_incr, x1 - x); src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x]; if (is_read_op) { if (src_block == NULL) { - for (j = 0; j < y_incr; j++) { - if (buf_col_stride == 1) { - memset(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, - 0, - sizeof(OPJ_INT32) * x_incr); - } else { + if (buf_col_stride == 1) { + OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { + memset(dest_ptr, 0, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + } + } else { + OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { OPJ_UINT32 k; for (k = 0; k < x_incr; k++) { - *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride) - = 0; + dest_ptr[k * buf_col_stride] = 0; } + dest_ptr += buf_line_stride; } } } else { - for (j = 0; j < y_incr; j++) { - if (buf_col_stride == 1) { - memcpy(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, - src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset, - sizeof(OPJ_INT32) * x_incr); + const OPJ_INT32* OPJ_RESTRICT src_ptr = src_block + block_y_offset * + (size_t)block_width + block_x_offset; + if (buf_col_stride == 1) { + OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += buf_line_stride; + src_ptr += block_width; + } + } else { + OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride + + (x - x0) * buf_col_stride; + if (x_incr == 1) { + for (j = 0; j < y_incr; j++) { + *dest_ptr = *src_ptr; + dest_ptr += buf_line_stride; + src_ptr += block_width; + } } else { - OPJ_UINT32 k; - for (k = 0; k < x_incr; k++) { - *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride) - = - *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset + - k); + /* General case */ + for (j = 0; j < y_incr; j++) { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + dest_ptr[k * buf_col_stride] = src_ptr[k]; + } + dest_ptr += buf_line_stride; + src_ptr += block_width; } } } @@ -179,18 +202,36 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block; } - for (j = 0; j < y_incr; j++) { - if (buf_col_stride == 1) { - memcpy(src_block + (block_y_offset + j) * (size_t)sa->block_width + - block_x_offset, - buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride, - sizeof(OPJ_INT32) * x_incr); + if (buf_col_stride == 1) { + OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * + (size_t)block_width + block_x_offset; + const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * + (size_t)buf_line_stride + (x - x0) * buf_col_stride; + for (j = 0; j < y_incr; j++) { + memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); + dest_ptr += block_width; + src_ptr += buf_line_stride; + } + } else { + OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * + (size_t)block_width + block_x_offset; + const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * + (size_t)buf_line_stride + (x - x0) * buf_col_stride; + if (x_incr == 1) { + for (j = 0; j < y_incr; j++) { + *dest_ptr = *src_ptr; + src_ptr += buf_line_stride; + dest_ptr += block_width; + } } else { - OPJ_UINT32 k; - for (k = 0; k < x_incr; k++) { - *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset + - k) = - *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride); + /* General case */ + for (j = 0; j < y_incr; j++) { + OPJ_UINT32 k; + for (k = 0; k < x_incr; k++) { + dest_ptr[k] = src_ptr[k * buf_col_stride]; + } + src_ptr += buf_line_stride; + dest_ptr += block_width; } } } @@ -201,7 +242,7 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write( return OPJ_TRUE; } -OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -211,12 +252,13 @@ OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, OPJ_UINT32 dest_line_stride, OPJ_BOOL forgiving) { - return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1, - dest, - dest_col_stride, - dest_line_stride, - forgiving, - OPJ_TRUE); + return opj_sparse_array_int32_read_or_write( + (opj_sparse_array_int32_t*)sa, x0, y0, x1, y1, + dest, + dest_col_stride, + dest_line_stride, + forgiving, + OPJ_TRUE); } OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa, diff --git a/src/lib/openjp2/sparse_array.h b/src/lib/openjp2/sparse_array.h index 485cafea..130fe13e 100644 --- a/src/lib/openjp2/sparse_array.h +++ b/src/lib/openjp2/sparse_array.h @@ -77,7 +77,7 @@ void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa); * @param y1 bottom y coordinate (not included) of the region. Must be greater than y0. * @return OPJ_TRUE or OPJ_FALSE. */ -OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, @@ -99,7 +99,7 @@ OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa, * @param forgiving if set to TRUE and the region is invalid, OPJ_TRUE will still be returned. * @return OPJ_TRUE in case of success. */ -OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa, +OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa, OPJ_UINT32 x0, OPJ_UINT32 y0, OPJ_UINT32 x1, diff --git a/src/lib/openjp2/test_sparse_array.c b/src/lib/openjp2/test_sparse_array.c index 0b49110f..8e136451 100644 --- a/src/lib/openjp2/test_sparse_array.c +++ b/src/lib/openjp2/test_sparse_array.c @@ -92,6 +92,7 @@ int main() ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, OPJ_FALSE); assert(ret); + buffer[0] = 2; ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, OPJ_FALSE); @@ -105,6 +106,29 @@ int main() assert(buffer[0] == 2); assert(buffer[1] == 0xFF); + buffer[0] = 0xFF; + buffer[1] = 0xFF; + buffer[2] = 0xFF; + ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 2, buffer, 0, 1, + OPJ_FALSE); + assert(ret); + assert(buffer[0] == 2); + assert(buffer[1] == 0); + assert(buffer[2] == 0xFF); + + buffer[0] = 3; + ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 0, 1, + OPJ_FALSE); + assert(ret); + + buffer[0] = 0; + buffer[1] = 0xFF; + ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1, + OPJ_FALSE); + assert(ret); + assert(buffer[0] == 3); + assert(buffer[1] == 0xFF); + w = 15 + 1; h = 17 + 1; memset(buffer, 0xFF, sizeof(buffer)); @@ -114,7 +138,7 @@ int main() for (j = 0; j < h; j++) { for (i = 0; i < w; i++) { if (i == 4 - 2 && j == 5 - 1) { - assert(buffer[ j * w + i ] == 2); + assert(buffer[ j * w + i ] == 3); } else { assert(buffer[ j * w + i ] == 0); } -- cgit v1.2.3