Merge branch 'master' of https://github.com/uclouvain/openjpeg into tier1_optimizatio...
[openjpeg.git] / src / lib / openjp2 / mct.c
index 8b0276f32d847c52f95c8650bff5432e80c3d289..e1f2f50dd487d97b8efa25ef9608fcf690426f1c 100644 (file)
@@ -74,13 +74,17 @@ const OPJ_FLOAT64 * opj_mct_get_mct_norms_real ()
 /* </summary> */
 #ifdef __SSE2__
 void opj_mct_encode(
-               OPJ_INT32* restrict c0,
-               OPJ_INT32* restrict c1,
-               OPJ_INT32* restrict c2,
+               OPJ_INT32* OPJ_RESTRICT c0,
+               OPJ_INT32* OPJ_RESTRICT c1,
+               OPJ_INT32* OPJ_RESTRICT c2,
                OPJ_UINT32 n)
 {
        OPJ_SIZE_T i;
        const OPJ_SIZE_T len = n;
+       /* buffer are aligned on 16 bytes */
+       assert( ((size_t)c0 & 0xf) == 0 );
+       assert( ((size_t)c1 & 0xf) == 0 );
+       assert( ((size_t)c2 & 0xf) == 0 );
        
        for(i = 0; i < (len & ~3U); i += 4) {
                __m128i y, u, v;
@@ -112,9 +116,9 @@ void opj_mct_encode(
 }
 #else
 void opj_mct_encode(
-               OPJ_INT32* restrict c0,
-               OPJ_INT32* restrict c1,
-               OPJ_INT32* restrict c2,
+               OPJ_INT32* OPJ_RESTRICT c0,
+               OPJ_INT32* OPJ_RESTRICT c1,
+               OPJ_INT32* OPJ_RESTRICT c2,
                OPJ_UINT32 n)
 {
        OPJ_SIZE_T i;
@@ -139,9 +143,9 @@ void opj_mct_encode(
 /* </summary> */
 #ifdef __SSE2__
 void opj_mct_decode(
-               OPJ_INT32* restrict c0,
-               OPJ_INT32* restrict c1,
-               OPJ_INT32* restrict c2,
+               OPJ_INT32* OPJ_RESTRICT c0,
+               OPJ_INT32* OPJ_RESTRICT c1,
+               OPJ_INT32* OPJ_RESTRICT c2,
                OPJ_UINT32 n)
 {
        OPJ_SIZE_T i;
@@ -174,9 +178,9 @@ void opj_mct_decode(
 }
 #else
 void opj_mct_decode(
-               OPJ_INT32* restrict c0,
-               OPJ_INT32* restrict c1, 
-               OPJ_INT32* restrict c2, 
+               OPJ_INT32* OPJ_RESTRICT c0,
+               OPJ_INT32* OPJ_RESTRICT c1, 
+               OPJ_INT32* OPJ_RESTRICT c2, 
                OPJ_UINT32 n)
 {
        OPJ_UINT32 i;
@@ -206,9 +210,9 @@ OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) {
 /* </summary> */
 #ifdef __SSE4_1__
 void opj_mct_encode_real(
-                                                                                                OPJ_INT32* restrict c0,
-                                                                                                OPJ_INT32* restrict c1,
-                                                                                                OPJ_INT32* restrict c2,
+                                                                                                OPJ_INT32* OPJ_RESTRICT c0,
+                                                                                                OPJ_INT32* OPJ_RESTRICT c1,
+                                                                                                OPJ_INT32* OPJ_RESTRICT c2,
                                                                                                 OPJ_UINT32 n)
 {
        OPJ_SIZE_T i;
@@ -347,9 +351,9 @@ void opj_mct_encode_real(
 }
 #else
 void opj_mct_encode_real(
-               OPJ_INT32* restrict c0,
-               OPJ_INT32* restrict c1,
-               OPJ_INT32* restrict c2,
+               OPJ_INT32* OPJ_RESTRICT c0,
+               OPJ_INT32* OPJ_RESTRICT c1,
+               OPJ_INT32* OPJ_RESTRICT c2,
                OPJ_UINT32 n)
 {
        OPJ_UINT32 i;
@@ -371,9 +375,9 @@ void opj_mct_encode_real(
 /* Inverse irreversible MCT. */
 /* </summary> */
 void opj_mct_decode_real(
-               OPJ_FLOAT32* restrict c0,
-               OPJ_FLOAT32* restrict c1,
-               OPJ_FLOAT32* restrict c2,
+               OPJ_FLOAT32* OPJ_RESTRICT c0,
+               OPJ_FLOAT32* OPJ_RESTRICT c1,
+               OPJ_FLOAT32* OPJ_RESTRICT c2,
                OPJ_UINT32 n)
 {
        OPJ_UINT32 i;