include(TestLargeFiles)
OPJ_TEST_LARGE_FILES(OPJ_HAVE_LARGEFILES)
+# Allocating Aligned Memory Blocks
+include(CheckIncludeFiles)
+check_include_files(malloc.h HAVE_MALLOC_H)
+include(CheckSymbolExists)
+# _aligned_alloc https://msdn.microsoft.com/en-us/library/8z34s9c6.aspx
+check_symbol_exists(_aligned_malloc malloc.h HAVE__ALIGNED_MALLOC)
+# posix_memalign
+check_symbol_exists(posix_memalign stdlib.h HAVE_POSIX_MEMALIGN)
+# memalign (obsolete)
+check_symbol_exists(memalign malloc.h HAVE_MEMALIGN)
#-----------------------------------------------------------------------------
# Build Library
if(BUILD_JPIP_SERVER)
${CMAKE_CURRENT_SOURCE_DIR}/opj_codec.h
${CMAKE_CURRENT_SOURCE_DIR}/opj_includes.h
${CMAKE_CURRENT_SOURCE_DIR}/opj_intmath.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.c
${CMAKE_CURRENT_SOURCE_DIR}/opj_malloc.h
${CMAKE_CURRENT_SOURCE_DIR}/opj_stdint.h
)
OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */
OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
-
- h.mem = (OPJ_INT32*)
- opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32));
+
+ if (numres == 1U) {
+ return OPJ_TRUE;
+ }
+ h.mem = (OPJ_INT32*)opj_aligned_malloc(opj_dwt_max_resolution(tr, numres) * sizeof(OPJ_INT32));
if (! h.mem){
/* FIXME event manager error callback */
return OPJ_FALSE;
OPJ_UINT32 l_nb_tiles;
OPJ_UINT32 l_max_tile_size = 0, l_current_tile_size;
OPJ_BYTE * l_current_data = 00;
+ OPJ_BOOL l_reuse_data = OPJ_FALSE;
opj_tcd_t* p_tcd = 00;
/* preconditions */
p_tcd = p_j2k->m_tcd;
l_nb_tiles = p_j2k->m_cp.th * p_j2k->m_cp.tw;
+ if (l_nb_tiles == 1) {
+ l_reuse_data = OPJ_TRUE;
+#ifdef __SSE__
+ for (j=0;j<p_j2k->m_tcd->image->numcomps;++j) {
+ opj_image_comp_t * l_img_comp = p_tcd->image->comps + j;
+ if (((size_t)l_img_comp->data & 0xFU) != 0U) { /* tile data shall be aligned on 16 bytes */
+ l_reuse_data = OPJ_FALSE;
+ }
+ }
+#endif
+ }
for (i=0;i<l_nb_tiles;++i) {
if (! opj_j2k_pre_write_tile(p_j2k,i,p_stream,p_manager)) {
if (l_current_data) {
/* otherwise, allocate the data */
for (j=0;j<p_j2k->m_tcd->image->numcomps;++j) {
opj_tcd_tilecomp_t* l_tilec = p_tcd->tcd_image->tiles->comps + j;
- if (l_nb_tiles == 1) {
+ if (l_reuse_data) {
opj_image_comp_t * l_img_comp = p_tcd->image->comps + j;
l_tilec->data = l_img_comp->data;
l_tilec->ownsData = OPJ_FALSE;
}
}
l_current_tile_size = opj_tcd_get_encoded_tile_size(p_j2k->m_tcd);
- if (l_nb_tiles > 1) {
+ if (!l_reuse_data) {
if (l_current_tile_size > l_max_tile_size) {
OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(l_current_data, l_current_tile_size);
if (! l_new_current_data) {
{
OPJ_SIZE_T i;
const OPJ_SIZE_T len = n;
+ /* buffer are aligned on 16 bytes */
+ assert( ((size_t)c0 & 0xf) == 0 );
+ assert( ((size_t)c1 & 0xf) == 0 );
+ assert( ((size_t)c2 & 0xf) == 0 );
for(i = 0; i < (len & ~3U); i += 4) {
__m128i y, u, v;
#cmakedefine _FILE_OFFSET_BITS @_FILE_OFFSET_BITS@
#cmakedefine OPJ_HAVE_FSEEKO @OPJ_HAVE_FSEEKO@
+/* find whether or not have <malloc.h> */
+#cmakedefine HAVE_MALLOC_H
+/* check if function `aligned_alloc` exists */
+#cmakedefine HAVE_ALIGNED_ALLOC
+/* check if function `_aligned_malloc` exists */
+#cmakedefine HAVE__ALIGNED_MALLOC
+/* check if function `memalign` exists */
+#cmakedefine HAVE_MEMALIGN
+/* check if function `posix_memalign` exists */
+#cmakedefine HAVE_POSIX_MEMALIGN
+
/* Byte order. */
/* All compilers that support Mac OS X define either __BIG_ENDIAN__ or
__LITTLE_ENDIAN__ to match the endianness of the architecture being
#cmakedefine OPJ_BIG_ENDIAN
#elif defined(__BIG_ENDIAN__)
# define OPJ_BIG_ENDIAN
-#endif
\ No newline at end of file
+#endif
--- /dev/null
+/*
+ * The copyright in this software is being made available under the 2-clauses
+ * BSD License, included below. This software may be subject to other third
+ * party and contributor rights, including patent rights, and no such rights
+ * are granted under this license.
+ *
+ * Copyright (c) 2015, Mathieu Malaterre <mathieu.malaterre@gmail.com>
+ * Copyright (c) 2015, Matthieu Darbois
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#define OPJ_SKIP_POISON
+#include "opj_includes.h"
+
+static INLINE void *opj_aligned_alloc_n(size_t alignment, size_t size)
+{
+ void* ptr;
+
+ /* alignment shall be power of 2 */
+ assert( (alignment != 0U) && ((alignment & (alignment - 1U)) == 0U));
+
+ if (size == 0U) { /* prevent implementation defined behavior of realloc */
+ return NULL;
+ }
+
+#if defined(HAVE_POSIX_MEMALIGN)
+ /* aligned_alloc requires c11, restrict to posix_memalign for now. Quote:
+ * This function was introduced in POSIX 1003.1d. Although this function is
+ * superseded by aligned_alloc, it is more portable to older POSIX systems
+ * that do not support ISO C11. */
+ if (posix_memalign (&ptr, alignment, size))
+ {
+ ptr = NULL;
+ }
+ /* older linux */
+#elif defined(HAVE_MEMALIGN)
+ ptr = memalign( alignment, size );
+/* _MSC_VER */
+#elif defined(HAVE__ALIGNED_MALLOC)
+ ptr = _aligned_malloc(size, alignment);
+#else
+/* TODO: _mm_malloc(x,y) */
+#error missing aligned alloc function
+#endif
+ return ptr;
+}
+static INLINE void *opj_aligned_realloc_n(void *ptr, size_t alignment, size_t new_size)
+{
+ void *r_ptr;
+
+ /* alignment shall be power of 2 */
+ assert( (alignment != 0U) && ((alignment & (alignment - 1U)) == 0U));
+
+ if (new_size == 0U) { /* prevent implementation defined behavior of realloc */
+ return NULL;
+ }
+
+/* no portable aligned realloc */
+#if defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN)
+ /* glibc doc states one can mixed aligned malloc with realloc */
+ r_ptr = realloc( ptr, new_size ); /* fast path */
+ /* we simply use `size_t` to cast, since we are only interest in binary AND
+ * operator */
+ if( ((size_t)r_ptr & (alignment - 1U)) != 0U ) {
+ /* this is non-trivial to implement a portable aligned realloc, so use a
+ * simple approach where we do not need a function that return the size of an
+ * allocated array (eg. _msize on Windows, malloc_size on MacOS,
+ * malloc_usable_size on systems with glibc) */
+ void *a_ptr = opj_aligned_alloc_n(alignment, new_size);
+ if (a_ptr != NULL) {
+ memcpy(a_ptr, r_ptr, new_size);
+ }
+ free( r_ptr );
+ r_ptr = a_ptr;
+ }
+/* _MSC_VER */
+#elif defined(HAVE__ALIGNED_MALLOC)
+ r_ptr = _aligned_realloc( ptr, new_size, alignment );
+#else
+/* TODO: _mm_malloc(x,y) */
+#error missing aligned realloc function
+#endif
+ return r_ptr;
+}
+void * opj_malloc(size_t size)
+{
+ if (size == 0U) { /* prevent implementation defined behavior of realloc */
+ return NULL;
+ }
+ return malloc(size);
+}
+void * opj_calloc(size_t num, size_t size)
+{
+ if (size == 0U) { /* prevent implementation defined behavior of realloc */
+ return NULL;
+ }
+ /* according to C89 standard, num == 0 shall return a valid pointer */
+ return calloc(num, size);
+}
+
+void *opj_aligned_malloc(size_t size)
+{
+ return opj_aligned_alloc_n(16U, size);
+}
+void * opj_aligned_realloc(void *ptr, size_t size)
+{
+ return opj_aligned_realloc_n(ptr, 16U, size);
+}
+
+void opj_aligned_free(void* ptr)
+{
+#ifdef HAVE__ALIGNED_MALLOC
+ _aligned_free( ptr );
+#else
+ free( ptr );
+#endif
+}
+
+void * opj_realloc(void *ptr, size_t new_size)
+{
+ if (new_size == 0U) { /* prevent implementation defined behavior of realloc */
+ return NULL;
+ }
+ return realloc(ptr, new_size);
+}
+void opj_free(void *ptr)
+{
+ free(ptr);
+}
*/
#ifndef __OPJ_MALLOC_H
#define __OPJ_MALLOC_H
+
+#include <stddef.h>
/**
@file opj_malloc.h
@brief Internal functions
@param size Bytes to allocate
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
*/
-#ifdef ALLOC_PERF_OPT
-void * OPJ_CALLCONV opj_malloc(size_t size);
-#else
-/* prevent assertion on overflow for MSVC */
-#ifdef _MSC_VER
-#define opj_malloc(size) ((size_t)(size) >= (size_t)-0x100 ? NULL : malloc(size))
-#else
-#define opj_malloc(size) malloc(size)
-#endif
-#endif
+void * opj_malloc(size_t size);
/**
Allocate a memory block with elements initialized to 0
@param size Bytes per block to allocate
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
*/
-#ifdef ALLOC_PERF_OPT
-void * OPJ_CALLCONV opj_calloc(size_t _NumOfElements, size_t _SizeOfElements);
-#else
-/* prevent assertion on overflow for MSVC */
-#ifdef _MSC_VER
-#define opj_calloc(num, size) ((size_t)(num) != 0 && (size_t)(num) >= (size_t)-0x100 / (size_t)(size) ? NULL : calloc(num, size))
-#else
-#define opj_calloc(num, size) calloc(num, size)
-#endif
-#endif
+void * opj_calloc(size_t numOfElements, size_t sizeOfElements);
/**
Allocate memory aligned to a 16 byte boundary
@param size Bytes to allocate
@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
*/
-/* FIXME: These should be set with cmake tests, but we're currently not requiring use of cmake */
-#ifdef _WIN32
- /* Someone should tell the mingw people that their malloc.h ought to provide _mm_malloc() */
- #ifdef __GNUC__
- #include <mm_malloc.h>
- #define HAVE_MM_MALLOC
- #else /* MSVC, Intel C++ */
- #include <malloc.h>
- #ifdef _mm_malloc
- #define HAVE_MM_MALLOC
- #endif
- #endif
-#else /* Not _WIN32 */
- #if defined(__sun)
- #define HAVE_MEMALIGN
- #elif defined(__FreeBSD__)
- #define HAVE_POSIX_MEMALIGN
- /* Linux x86_64 and OSX always align allocations to 16 bytes */
- #elif !defined(__amd64__) && !defined(__APPLE__) && !defined(_AIX)
- #define HAVE_MEMALIGN
- #include <malloc.h>
- #endif
-#endif
-
-#define opj_aligned_malloc(size) malloc(size)
-#define opj_aligned_free(m) free(m)
-
-#ifdef HAVE_MM_MALLOC
- #undef opj_aligned_malloc
- #define opj_aligned_malloc(size) _mm_malloc(size, 16)
- #undef opj_aligned_free
- #define opj_aligned_free(m) _mm_free(m)
-#endif
-
-#ifdef HAVE_MEMALIGN
- extern void* memalign(size_t, size_t);
- #undef opj_aligned_malloc
- #define opj_aligned_malloc(size) memalign(16, (size))
- #undef opj_aligned_free
- #define opj_aligned_free(m) free(m)
-#endif
-
-#ifdef HAVE_POSIX_MEMALIGN
- #undef opj_aligned_malloc
- extern int posix_memalign(void**, size_t, size_t);
-
- static INLINE void* __attribute__ ((malloc)) opj_aligned_malloc(size_t size){
- void* mem = NULL;
- posix_memalign(&mem, 16, size);
- return mem;
- }
- #undef opj_aligned_free
- #define opj_aligned_free(m) free(m)
-#endif
-
-#ifdef ALLOC_PERF_OPT
- #undef opj_aligned_malloc
- #define opj_aligned_malloc(size) opj_malloc(size)
- #undef opj_aligned_free
- #define opj_aligned_free(m) opj_free(m)
-#endif
+void * opj_aligned_malloc(size_t size);
+void * opj_aligned_realloc(void *ptr, size_t size);
+void opj_aligned_free(void* ptr);
/**
Reallocate memory blocks.
@param s New size in bytes
@return Returns a void pointer to the reallocated (and possibly moved) memory block
*/
-#ifdef ALLOC_PERF_OPT
-void * OPJ_CALLCONV opj_realloc(void * m, size_t s);
-#else
-/* prevent assertion on overflow for MSVC */
-#ifdef _MSC_VER
-#define opj_realloc(m, s) ((size_t)(s) >= (size_t)-0x100 ? NULL : realloc(m, s))
-#else
-#define opj_realloc(m, s) realloc(m, s)
-#endif
-#endif
+void * opj_realloc(void * m, size_t s);
/**
Deallocates or frees a memory block.
@param m Previously allocated memory block to be freed
*/
-#ifdef ALLOC_PERF_OPT
-void OPJ_CALLCONV opj_free(void * m);
-#else
-#define opj_free(m) free(m)
-#endif
+void opj_free(void * m);
-#ifdef __GNUC__
+#if defined(__GNUC__) && !defined(OPJ_SKIP_POISON)
#pragma GCC poison malloc calloc realloc free
#endif
OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec)
{
if ((l_tilec->data == 00) || ((l_tilec->data_size_needed > l_tilec->data_size) && (l_tilec->ownsData == OPJ_FALSE))) {
- l_tilec->data = (OPJ_INT32 *) opj_malloc(l_tilec->data_size_needed);
+ l_tilec->data = (OPJ_INT32 *) opj_aligned_malloc(l_tilec->data_size_needed);
if (! l_tilec->data ) {
return OPJ_FALSE;
}
l_tilec->ownsData = OPJ_TRUE;
}
else if (l_tilec->data_size_needed > l_tilec->data_size) {
- OPJ_INT32 * new_data = (OPJ_INT32 *) opj_realloc(l_tilec->data, l_tilec->data_size_needed);
- /* opj_event_msg(p_manager, EVT_ERROR, "Not enough memory to handle tile datan"); */
- /* fprintf(stderr, "Not enough memory to handle tile data"); */
- if (! new_data) {
- opj_free(l_tilec->data);
- l_tilec->data = NULL;
+ /* We don't need to keep old data */
+ opj_aligned_free(l_tilec->data);
+ l_tilec->data = (OPJ_INT32 *) opj_aligned_malloc(l_tilec->data_size_needed);
+ if (! l_tilec->data ) {
l_tilec->data_size = 0;
l_tilec->data_size_needed = 0;
l_tilec->ownsData = OPJ_FALSE;
return OPJ_FALSE;
}
- l_tilec->data = new_data;
/*fprintf(stderr, "tReallocate data of tilec (int): from %d to %d x OPJ_UINT32n", l_tilec->data_size, l_data_size);*/
l_tilec->data_size = l_tilec->data_size_needed;
l_tilec->ownsData = OPJ_TRUE;
l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0, (OPJ_INT32) (numbps - l_step_size->expn)))) * fraction;
l_band->numbps = l_step_size->expn + (OPJ_INT32)l_tccp->numgbits - 1; /* WHY -1 ? */
- if (! l_band->precincts) {
+ if (!l_band->precincts && (l_nb_precincts > 0U)) {
l_band->precincts = (opj_tcd_precinct_t *) opj_malloc( /*3 * */ l_nb_precinct_size);
if (! l_band->precincts) {
return OPJ_FALSE;
/*fprintf(stderr, "\t\t\t\t precinct_cw = %d x recinct_ch = %d\n",l_current_precinct->cw, l_current_precinct->ch); */
l_nb_code_blocks_size = l_nb_code_blocks * (OPJ_UINT32)sizeof_block;
- if (! l_current_precinct->cblks.blocks) {
+ if (!l_current_precinct->cblks.blocks && (l_nb_code_blocks > 0U)) {
l_current_precinct->cblks.blocks = opj_malloc(l_nb_code_blocks_size);
if (! l_current_precinct->cblks.blocks ) {
return OPJ_FALSE;
}
if (l_tile_comp->ownsData && l_tile_comp->data) {
- opj_free(l_tile_comp->data);
+ opj_aligned_free(l_tile_comp->data);
l_tile_comp->data = 00;
l_tile_comp->ownsData = 0;
l_tile_comp->data_size = 0;