Fix bad call to fclose with NULL pointer
[openjpeg.git] / src / bin / jp2 / converttif.c
index 471f01fff3583b90d0d13c503c15404cd0d277b5..143d3be6ff635c3adc21c2b9ca8321fff7ef0802 100644 (file)
@@ -12,6 +12,7 @@
  * Copyright (c) 2003-2014, Antonin Descampe
  * Copyright (c) 2005, Herve Drolon, FreeImage Team
  * Copyright (c) 2006-2007, Parvatha Elangovan
+ * Copyright (c) 2015, Matthieu Darbois
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  TIFF IMAGE FORMAT
  
  <<-- <<-- <<-- <<-- */
-typedef void (* tif_32stoX)(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length);
+#define PUTBITS2(s, nb) \
+       trailing <<= remaining; \
+       trailing |= (unsigned int)((s) >> (nb - remaining)); \
+       *pDst++ = (OPJ_BYTE)trailing; \
+       trailing = (unsigned int)((s) & ((1U << (nb - remaining)) - 1U)); \
+       if (nb >= (remaining + 8)) { \
+               *pDst++ = (OPJ_BYTE)(trailing >> (nb - (remaining + 8))); \
+               trailing &= (unsigned int)((1U << (nb - (remaining + 8))) - 1U); \
+               remaining += 16 - nb; \
+       } else { \
+               remaining += 8 - nb; \
+       }
+
+#define PUTBITS(s, nb) \
+  if (nb >= remaining) { \
+               PUTBITS2(s, nb) \
+       } else { \
+               trailing <<= nb; \
+               trailing |= (unsigned int)(s); \
+               remaining -= nb; \
+       }
+#define FLUSHBITS() \
+       if (remaining != 8) { \
+               trailing <<= remaining; \
+               *pDst++ = (OPJ_BYTE)trailing; \
+       }
 
-static void tif_32sto1u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
+static void tif_32sto3u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)8U); i+=8U) {
+       
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
                OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
                OPJ_UINT32 src1 = (OPJ_UINT32)pSrc[i+1];
                OPJ_UINT32 src2 = (OPJ_UINT32)pSrc[i+2];
@@ -69,98 +96,204 @@ static void tif_32sto1u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length
                OPJ_UINT32 src5 = (OPJ_UINT32)pSrc[i+5];
                OPJ_UINT32 src6 = (OPJ_UINT32)pSrc[i+6];
                OPJ_UINT32 src7 = (OPJ_UINT32)pSrc[i+7];
+                       
+               *pDst++ = (OPJ_BYTE)((src0 << 5) | (src1 << 2) | (src2 >> 1));
+               *pDst++ = (OPJ_BYTE)((src2 << 7) | (src3 << 4) | (src4 << 1) | (src5 >> 2));
+               *pDst++ = (OPJ_BYTE)((src5 << 6) | (src6 << 3) | (src7));
+       }
                
-               *pDst++ = (src0 << 7) | (src1 << 6) | (src2 << 5) | (src3 << 4) | (src4 << 3) | (src5 << 2) | (src6 << 1) | src7;
+       if (length & 7U) {
+               unsigned int trailing = 0U;
+               int remaining = 8U;
+               length &= 7U;
+               PUTBITS((OPJ_UINT32)pSrc[i+0], 3)
+               if (length > 1U) {
+                       PUTBITS((OPJ_UINT32)pSrc[i+1], 3)
+                       if (length > 2U) {
+                               PUTBITS((OPJ_UINT32)pSrc[i+2], 3)
+                               if (length > 3U) {
+                                       PUTBITS((OPJ_UINT32)pSrc[i+3], 3)
+                                       if (length > 4U) {
+                                               PUTBITS((OPJ_UINT32)pSrc[i+4], 3)
+                                               if (length > 5U) {
+                                                       PUTBITS((OPJ_UINT32)pSrc[i+5], 3)
+                                                       if (length > 6U) {
+                                                               PUTBITS((OPJ_UINT32)pSrc[i+6], 3)
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+               FLUSHBITS()
        }
+}
+
+static void tif_32sto5u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
+{
+       OPJ_SIZE_T i;
        
-       if (length & 7U) {
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
                OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
-               OPJ_UINT32 src1 = 0U;
-               OPJ_UINT32 src2 = 0U;
-               OPJ_UINT32 src3 = 0U;
-               OPJ_UINT32 src4 = 0U;
-               OPJ_UINT32 src5 = 0U;
-               OPJ_UINT32 src6 = 0U;
-               length = length & 7U;
+               OPJ_UINT32 src1 = (OPJ_UINT32)pSrc[i+1];
+               OPJ_UINT32 src2 = (OPJ_UINT32)pSrc[i+2];
+               OPJ_UINT32 src3 = (OPJ_UINT32)pSrc[i+3];
+               OPJ_UINT32 src4 = (OPJ_UINT32)pSrc[i+4];
+               OPJ_UINT32 src5 = (OPJ_UINT32)pSrc[i+5];
+               OPJ_UINT32 src6 = (OPJ_UINT32)pSrc[i+6];
+               OPJ_UINT32 src7 = (OPJ_UINT32)pSrc[i+7];
                
+               *pDst++ = (OPJ_BYTE)((src0 << 3) | (src1 >> 2));
+               *pDst++ = (OPJ_BYTE)((src1 << 6) | (src2 << 1) | (src3 >> 4));
+               *pDst++ = (OPJ_BYTE)((src3 << 4) | (src4 >> 1));
+               *pDst++ = (OPJ_BYTE)((src4 << 7) | (src5 << 2) | (src6 >> 3));
+               *pDst++ = (OPJ_BYTE)((src6 << 5) | (src7));
+
+       }
+       
+       if (length & 7U) {
+               unsigned int trailing = 0U;
+               int remaining = 8U;
+               length &= 7U;
+               PUTBITS((OPJ_UINT32)pSrc[i+0], 5)
                if (length > 1U) {
-                       src1 = (OPJ_UINT32)pSrc[i+1];
+                       PUTBITS((OPJ_UINT32)pSrc[i+1], 5)
                        if (length > 2U) {
-                               src2 = (OPJ_UINT32)pSrc[i+2];
+                               PUTBITS((OPJ_UINT32)pSrc[i+2], 5)
                                if (length > 3U) {
-                                       src3 = (OPJ_UINT32)pSrc[i+3];
+                                       PUTBITS((OPJ_UINT32)pSrc[i+3], 5)
                                        if (length > 4U) {
-                                               src4 = (OPJ_UINT32)pSrc[i+4];
+                                               PUTBITS((OPJ_UINT32)pSrc[i+4], 5)
                                                if (length > 5U) {
-                                                       src5 = (OPJ_UINT32)pSrc[i+5];
+                                                       PUTBITS((OPJ_UINT32)pSrc[i+5], 5)
                                                        if (length > 6U) {
-                                                               src6 = (OPJ_UINT32)pSrc[i+6];
+                                                               PUTBITS((OPJ_UINT32)pSrc[i+6], 5)
                                                        }
                                                }
                                        }
                                }
                        }
                }
-               *pDst++ = (src0 << 7) | (src1 << 6) | (src2 << 5) | (src3 << 4) | (src4 << 3) | (src5 << 2) | (src6 << 1);
+               FLUSHBITS()
        }
 }
 
-static void tif_32sto2u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
+static void tif_32sto7u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)4U); i+=4U) {
+       
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
                OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
                OPJ_UINT32 src1 = (OPJ_UINT32)pSrc[i+1];
                OPJ_UINT32 src2 = (OPJ_UINT32)pSrc[i+2];
                OPJ_UINT32 src3 = (OPJ_UINT32)pSrc[i+3];
+               OPJ_UINT32 src4 = (OPJ_UINT32)pSrc[i+4];
+               OPJ_UINT32 src5 = (OPJ_UINT32)pSrc[i+5];
+               OPJ_UINT32 src6 = (OPJ_UINT32)pSrc[i+6];
+               OPJ_UINT32 src7 = (OPJ_UINT32)pSrc[i+7];
                
-               *pDst++ = (src0 << 6) | (src1 << 4) | (src2 << 2) | src3;
+               *pDst++ = (OPJ_BYTE)((src0 << 1) | (src1 >> 6));
+               *pDst++ = (OPJ_BYTE)((src1 << 2) | (src2 >> 5));
+               *pDst++ = (OPJ_BYTE)((src2 << 3) | (src3 >> 4));
+               *pDst++ = (OPJ_BYTE)((src3 << 4) | (src4 >> 3));
+               *pDst++ = (OPJ_BYTE)((src4 << 5) | (src5 >> 2));
+               *pDst++ = (OPJ_BYTE)((src5 << 6) | (src6 >> 1));
+               *pDst++ = (OPJ_BYTE)((src6 << 7) | (src7));
        }
        
-       if (length & 3U) {
-               OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
-               OPJ_UINT32 src1 = 0U;
-               OPJ_UINT32 src2 = 0U;
-               length = length & 3U;
-               
+       if (length & 7U) {
+               unsigned int trailing = 0U;
+               int remaining = 8U;
+               length &= 7U;
+               PUTBITS((OPJ_UINT32)pSrc[i+0], 7)
                if (length > 1U) {
-                       src1 = (OPJ_UINT32)pSrc[i+1];
+                       PUTBITS((OPJ_UINT32)pSrc[i+1], 7)
                        if (length > 2U) {
-                               src2 = (OPJ_UINT32)pSrc[i+2];
+                               PUTBITS((OPJ_UINT32)pSrc[i+2], 7)
+                               if (length > 3U) {
+                                       PUTBITS((OPJ_UINT32)pSrc[i+3], 7)
+                                       if (length > 4U) {
+                                               PUTBITS((OPJ_UINT32)pSrc[i+4], 7)
+                                               if (length > 5U) {
+                                                       PUTBITS((OPJ_UINT32)pSrc[i+5], 7)
+                                                       if (length > 6U) {
+                                                               PUTBITS((OPJ_UINT32)pSrc[i+6], 7)
+                                                       }
+                                               }
+                                       }
+                               }
                        }
                }
-               *pDst++ = (src0 << 6) | (src1 << 4) | (src2 << 2);
+               FLUSHBITS()
        }
 }
 
-static void tif_32sto4u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
+static void tif_32sto9u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)2U); i+=2U) {
+       
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
                OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
                OPJ_UINT32 src1 = (OPJ_UINT32)pSrc[i+1];
+               OPJ_UINT32 src2 = (OPJ_UINT32)pSrc[i+2];
+               OPJ_UINT32 src3 = (OPJ_UINT32)pSrc[i+3];
+               OPJ_UINT32 src4 = (OPJ_UINT32)pSrc[i+4];
+               OPJ_UINT32 src5 = (OPJ_UINT32)pSrc[i+5];
+               OPJ_UINT32 src6 = (OPJ_UINT32)pSrc[i+6];
+               OPJ_UINT32 src7 = (OPJ_UINT32)pSrc[i+7];
                
-               *pDst++ = (src0 << 4) | src1;
+               *pDst++ = (OPJ_BYTE)((src0 >> 1));
+               *pDst++ = (OPJ_BYTE)((src0 << 7) | (src1 >> 2));
+               *pDst++ = (OPJ_BYTE)((src1 << 6) | (src2 >> 3));
+               *pDst++ = (OPJ_BYTE)((src2 << 5) | (src3 >> 4));
+               *pDst++ = (OPJ_BYTE)((src3 << 4) | (src4 >> 5));
+               *pDst++ = (OPJ_BYTE)((src4 << 3) | (src5 >> 6));
+               *pDst++ = (OPJ_BYTE)((src5 << 2) | (src6 >> 7));
+               *pDst++ = (OPJ_BYTE)((src6 << 1) | (src7 >> 8));
+               *pDst++ = (OPJ_BYTE)(src7);
        }
        
-       if (length & 1U) {
-               OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
-               *pDst++ = (src0 << 4);
+       if (length & 7U) {
+               unsigned int trailing = 0U;
+               int remaining = 8U;
+               length &= 7U;
+               PUTBITS2((OPJ_UINT32)pSrc[i+0], 9)
+               if (length > 1U) {
+                       PUTBITS2((OPJ_UINT32)pSrc[i+1], 9)
+                       if (length > 2U) {
+                               PUTBITS2((OPJ_UINT32)pSrc[i+2], 9)
+                               if (length > 3U) {
+                                       PUTBITS2((OPJ_UINT32)pSrc[i+3], 9)
+                                       if (length > 4U) {
+                                               PUTBITS2((OPJ_UINT32)pSrc[i+4], 9)
+                                               if (length > 5U) {
+                                                       PUTBITS2((OPJ_UINT32)pSrc[i+5], 9)
+                                                       if (length > 6U) {
+                                                               PUTBITS2((OPJ_UINT32)pSrc[i+6], 9)
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+               FLUSHBITS()
        }
 }
 
-static void tif_32sto6u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
+static void tif_32sto10u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)4U); i+=4U) {
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)3U); i+=4U) {
                OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
                OPJ_UINT32 src1 = (OPJ_UINT32)pSrc[i+1];
                OPJ_UINT32 src2 = (OPJ_UINT32)pSrc[i+2];
                OPJ_UINT32 src3 = (OPJ_UINT32)pSrc[i+3];
                
-               *pDst++ = (src0 << 2) | (src1 >> 4);
-               *pDst++ = ((src1 & 0xFU) << 4) | (src2 >> 2);
-               *pDst++ = ((src2 & 0x3U) << 6) | src3;
+               *pDst++ = (OPJ_BYTE)(src0 >> 2);
+               *pDst++ = (OPJ_BYTE)(((src0 & 0x3U) << 6) | (src1 >> 4));
+               *pDst++ = (OPJ_BYTE)(((src1 & 0xFU) << 4) | (src2 >> 6));
+               *pDst++ = (OPJ_BYTE)(((src2 & 0x3FU) << 2) | (src3 >> 8));
+               *pDst++ = (OPJ_BYTE)(src3);
        }
        
        if (length & 3U) {
@@ -175,94 +308,158 @@ static void tif_32sto6u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length
                                src2 = (OPJ_UINT32)pSrc[i+2];
                        }
                }
-               *pDst++ = (src0 << 2) | (src1 >> 4);
+               *pDst++ = (OPJ_BYTE)(src0 >> 2);
+               *pDst++ = (OPJ_BYTE)(((src0 & 0x3U) << 6) | (src1 >> 4));
                if (length > 1U) {
-                       *pDst++ = ((src1 & 0xFU) << 4) | (src2 >> 2);
+                       *pDst++ = (OPJ_BYTE)(((src1 & 0xFU) << 4) | (src2 >> 6));
                        if (length > 2U) {
-                               *pDst++ = ((src2 & 0x3U) << 6);
+                               *pDst++ = (OPJ_BYTE)(((src2 & 0x3FU) << 2));
                        }
                }
        }
 }
-static void tif_32sto8u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
+static void tif_32sto11u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < length; ++i) {
-               pDst[i] = (OPJ_BYTE)pSrc[i];
-       }
-}
-static void tif_32sto10u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
-{
-       OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)4U); i+=4U) {
+       
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
                OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
                OPJ_UINT32 src1 = (OPJ_UINT32)pSrc[i+1];
                OPJ_UINT32 src2 = (OPJ_UINT32)pSrc[i+2];
                OPJ_UINT32 src3 = (OPJ_UINT32)pSrc[i+3];
+               OPJ_UINT32 src4 = (OPJ_UINT32)pSrc[i+4];
+               OPJ_UINT32 src5 = (OPJ_UINT32)pSrc[i+5];
+               OPJ_UINT32 src6 = (OPJ_UINT32)pSrc[i+6];
+               OPJ_UINT32 src7 = (OPJ_UINT32)pSrc[i+7];
                
-               *pDst++ = src0 >> 2;
-               *pDst++ = ((src0 & 0x3U) << 6) | (src1 >> 4);
-               *pDst++ = ((src1 & 0xFU) << 4) | (src2 >> 6);
-               *pDst++ = ((src2 & 0x3FU) << 2) | (src3 >> 8);
-               *pDst++ = src3;
+               *pDst++ = (OPJ_BYTE)((src0 >> 3));
+               *pDst++ = (OPJ_BYTE)((src0 << 5) | (src1 >> 6));
+               *pDst++ = (OPJ_BYTE)((src1 << 2) | (src2 >> 9));
+               *pDst++ = (OPJ_BYTE)((src2 >> 1));
+               *pDst++ = (OPJ_BYTE)((src2 << 7) | (src3 >> 4));
+               *pDst++ = (OPJ_BYTE)((src3 << 4) | (src4 >> 7));
+               *pDst++ = (OPJ_BYTE)((src4 << 1) | (src5 >> 10));
+               *pDst++ = (OPJ_BYTE)((src5 >> 2));
+               *pDst++ = (OPJ_BYTE)((src5 << 6) | (src6 >> 5));
+               *pDst++ = (OPJ_BYTE)((src6 << 3) | (src7 >> 8));
+               *pDst++ = (OPJ_BYTE)(src7);
        }
        
-       if (length & 3U) {
-               OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
-               OPJ_UINT32 src1 = 0U;
-               OPJ_UINT32 src2 = 0U;
-               length = length & 3U;
-               
-               if (length > 1U) {
-                       src1 = (OPJ_UINT32)pSrc[i+1];
-                       if (length > 2U) {
-                               src2 = (OPJ_UINT32)pSrc[i+2];
-                       }
-               }
-               *pDst++ = src0 >> 2;
-               *pDst++ = ((src0 & 0x3U) << 6) | (src1 >> 4);
+       if (length & 7U) {
+               unsigned int trailing = 0U;
+               int remaining = 8U;
+               length &= 7U;
+               PUTBITS2((OPJ_UINT32)pSrc[i+0], 11)
                if (length > 1U) {
-                       *pDst++ = ((src1 & 0xFU) << 4) | (src2 >> 6);
+                       PUTBITS2((OPJ_UINT32)pSrc[i+1], 11)
                        if (length > 2U) {
-                               *pDst++ = ((src2 & 0x3FU) << 2);
+                               PUTBITS2((OPJ_UINT32)pSrc[i+2], 11)
+                               if (length > 3U) {
+                                       PUTBITS2((OPJ_UINT32)pSrc[i+3], 11)
+                                       if (length > 4U) {
+                                               PUTBITS2((OPJ_UINT32)pSrc[i+4], 11)
+                                               if (length > 5U) {
+                                                       PUTBITS2((OPJ_UINT32)pSrc[i+5], 11)
+                                                       if (length > 6U) {
+                                                               PUTBITS2((OPJ_UINT32)pSrc[i+6], 11)
+                                                       }
+                                               }
+                                       }
+                               }
                        }
                }
+               FLUSHBITS()
        }
 }
 static void tif_32sto12u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)2U); i+=2U) {
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)1U); i+=2U) {
                OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
                OPJ_UINT32 src1 = (OPJ_UINT32)pSrc[i+1];
                
-               *pDst++ = src0 >> 4;
-               *pDst++ = ((src0 & 0xFU) << 4) | (src1 >> 8);
-               *pDst++ = src1;
+               *pDst++ = (OPJ_BYTE)(src0 >> 4);
+               *pDst++ = (OPJ_BYTE)(((src0 & 0xFU) << 4) | (src1 >> 8));
+               *pDst++ = (OPJ_BYTE)(src1);
        }
        
        if (length & 1U) {
                OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
-               *pDst++ = src0 >> 4;
-               *pDst++ = ((src0 & 0xFU) << 4);
+               *pDst++ = (OPJ_BYTE)(src0 >> 4);
+               *pDst++ = (OPJ_BYTE)(((src0 & 0xFU) << 4));
+       }
+}
+static void tif_32sto13u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
+{
+       OPJ_SIZE_T i;
+       
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
+               OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
+               OPJ_UINT32 src1 = (OPJ_UINT32)pSrc[i+1];
+               OPJ_UINT32 src2 = (OPJ_UINT32)pSrc[i+2];
+               OPJ_UINT32 src3 = (OPJ_UINT32)pSrc[i+3];
+               OPJ_UINT32 src4 = (OPJ_UINT32)pSrc[i+4];
+               OPJ_UINT32 src5 = (OPJ_UINT32)pSrc[i+5];
+               OPJ_UINT32 src6 = (OPJ_UINT32)pSrc[i+6];
+               OPJ_UINT32 src7 = (OPJ_UINT32)pSrc[i+7];
+               
+               *pDst++ = (OPJ_BYTE)((src0 >> 5));
+               *pDst++ = (OPJ_BYTE)((src0 << 3) | (src1 >> 10));
+               *pDst++ = (OPJ_BYTE)((src1 >> 2));
+               *pDst++ = (OPJ_BYTE)((src1 << 6) | (src2 >> 7));
+               *pDst++ = (OPJ_BYTE)((src2 << 1) | (src3 >> 12));
+               *pDst++ = (OPJ_BYTE)((src3 >> 4));
+               *pDst++ = (OPJ_BYTE)((src3 << 4) | (src4 >> 9));
+               *pDst++ = (OPJ_BYTE)((src4 >> 1));
+               *pDst++ = (OPJ_BYTE)((src4 << 7) | (src5 >> 6));
+               *pDst++ = (OPJ_BYTE)((src5 << 2) | (src6 >> 11));
+               *pDst++ = (OPJ_BYTE)((src6 >> 3));
+               *pDst++ = (OPJ_BYTE)((src6 << 5) | (src7 >> 8));
+               *pDst++ = (OPJ_BYTE)(src7);
+       }
+       
+       if (length & 7U) {
+               unsigned int trailing = 0U;
+               int remaining = 8U;
+               length &= 7U;
+               PUTBITS2((OPJ_UINT32)pSrc[i+0], 13)
+               if (length > 1U) {
+                       PUTBITS2((OPJ_UINT32)pSrc[i+1], 13)
+                       if (length > 2U) {
+                               PUTBITS2((OPJ_UINT32)pSrc[i+2], 13)
+                               if (length > 3U) {
+                                       PUTBITS2((OPJ_UINT32)pSrc[i+3], 13)
+                                       if (length > 4U) {
+                                               PUTBITS2((OPJ_UINT32)pSrc[i+4], 13)
+                                               if (length > 5U) {
+                                                       PUTBITS2((OPJ_UINT32)pSrc[i+5], 13)
+                                                       if (length > 6U) {
+                                                               PUTBITS2((OPJ_UINT32)pSrc[i+6], 13)
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+               FLUSHBITS()
        }
 }
 static void tif_32sto14u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)4U); i+=4U) {
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)3U); i+=4U) {
                OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
                OPJ_UINT32 src1 = (OPJ_UINT32)pSrc[i+1];
                OPJ_UINT32 src2 = (OPJ_UINT32)pSrc[i+2];
                OPJ_UINT32 src3 = (OPJ_UINT32)pSrc[i+3];
                
-               *pDst++ = src0 >> 6;
-               *pDst++ = ((src0 & 0x3FU) << 2) | (src1 >> 12);
-               *pDst++ = src1 >> 4;
-               *pDst++ = ((src1 & 0xFU) << 4) | (src2 >> 10);
-               *pDst++ = src2 >> 2;
-               *pDst++ = ((src2 & 0x3U) << 6) | (src3 >> 8);
-               *pDst++ = src3;
+               *pDst++ = (OPJ_BYTE)(src0 >> 6);
+               *pDst++ = (OPJ_BYTE)(((src0 & 0x3FU) << 2) | (src1 >> 12));
+               *pDst++ = (OPJ_BYTE)(src1 >> 4);
+               *pDst++ = (OPJ_BYTE)(((src1 & 0xFU) << 4) | (src2 >> 10));
+               *pDst++ = (OPJ_BYTE)(src2 >> 2);
+               *pDst++ = (OPJ_BYTE)(((src2 & 0x3U) << 6) | (src3 >> 8));
+               *pDst++ = (OPJ_BYTE)(src3);
        }
        
        if (length & 3U) {
@@ -277,73 +474,80 @@ static void tif_32sto14u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T lengt
                                src2 = (OPJ_UINT32)pSrc[i+2];
                        }
                }
-               *pDst++ = src0 >> 6;
-               *pDst++ = ((src0 & 0x3FU) << 2) | (src1 >> 12);
+               *pDst++ = (OPJ_BYTE)(src0 >> 6);
+               *pDst++ = (OPJ_BYTE)(((src0 & 0x3FU) << 2) | (src1 >> 12));
                if (length > 1U) {
-                       *pDst++ = src1 >> 4;
-                       *pDst++ = ((src1 & 0xFU) << 4) | (src2 >> 10);
+                       *pDst++ = (OPJ_BYTE)(src1 >> 4);
+                       *pDst++ = (OPJ_BYTE)(((src1 & 0xFU) << 4) | (src2 >> 10));
                        if (length > 2U) {
-                               *pDst++ = src2 >> 2;
-                               *pDst++ = ((src2 & 0x3U) << 6);
+                               *pDst++ = (OPJ_BYTE)(src2 >> 2);
+                               *pDst++ = (OPJ_BYTE)(((src2 & 0x3U) << 6));
                        }
                }
        }
 }
-static void tif_32sto16u(const OPJ_INT32* pSrc, OPJ_UINT16* pDst, OPJ_SIZE_T length)
+static void tif_32sto15u(const OPJ_INT32* pSrc, OPJ_BYTE* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < length; ++i) {
-               pDst[i] = (OPJ_UINT16)pSrc[i];
-       }
-}
-
-typedef void (* convert_32s_PXCX)(OPJ_INT32 const* const* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length, OPJ_INT32 adjust);
-static void convert_32s_P1C1(OPJ_INT32 const* const* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length, OPJ_INT32 adjust)
-{
-       OPJ_SIZE_T i;
-       const OPJ_INT32* pSrc0 = pSrc[0];
-       
-       for (i = 0; i < length; i++) {
-               pDst[i] = pSrc0[i] + adjust;
-       }
-}
-static void convert_32s_P2C2(OPJ_INT32 const* const* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length, OPJ_INT32 adjust)
-{
-       OPJ_SIZE_T i;
-       const OPJ_INT32* pSrc0 = pSrc[0];
-       const OPJ_INT32* pSrc1 = pSrc[1];
        
-       for (i = 0; i < length; i++) {
-               pDst[2*i+0] = pSrc0[i] + adjust;
-               pDst[2*i+1] = pSrc1[i] + adjust;
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
+               OPJ_UINT32 src0 = (OPJ_UINT32)pSrc[i+0];
+               OPJ_UINT32 src1 = (OPJ_UINT32)pSrc[i+1];
+               OPJ_UINT32 src2 = (OPJ_UINT32)pSrc[i+2];
+               OPJ_UINT32 src3 = (OPJ_UINT32)pSrc[i+3];
+               OPJ_UINT32 src4 = (OPJ_UINT32)pSrc[i+4];
+               OPJ_UINT32 src5 = (OPJ_UINT32)pSrc[i+5];
+               OPJ_UINT32 src6 = (OPJ_UINT32)pSrc[i+6];
+               OPJ_UINT32 src7 = (OPJ_UINT32)pSrc[i+7];
+               
+               *pDst++ = (OPJ_BYTE)((src0 >> 7));
+               *pDst++ = (OPJ_BYTE)((src0 << 1) | (src1 >> 14));
+               *pDst++ = (OPJ_BYTE)((src1 >> 6));
+               *pDst++ = (OPJ_BYTE)((src1 << 2) | (src2 >> 13));
+               *pDst++ = (OPJ_BYTE)((src2 >> 5));
+               *pDst++ = (OPJ_BYTE)((src2 << 3) | (src3 >> 12));
+               *pDst++ = (OPJ_BYTE)((src3 >> 4));
+               *pDst++ = (OPJ_BYTE)((src3 << 4) | (src4 >> 11));
+               *pDst++ = (OPJ_BYTE)((src4 >> 3));
+               *pDst++ = (OPJ_BYTE)((src4 << 5) | (src5 >> 10));
+               *pDst++ = (OPJ_BYTE)((src5 >> 2));
+               *pDst++ = (OPJ_BYTE)((src5 << 6) | (src6 >> 9));
+               *pDst++ = (OPJ_BYTE)((src6 >> 1));
+               *pDst++ = (OPJ_BYTE)((src6 << 7) | (src7 >> 8));
+               *pDst++ = (OPJ_BYTE)(src7);
        }
-}
-static void convert_32s_P3C3(OPJ_INT32 const* const* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length, OPJ_INT32 adjust)
-{
-       OPJ_SIZE_T i;
-       const OPJ_INT32* pSrc0 = pSrc[0];
-       const OPJ_INT32* pSrc1 = pSrc[1];
-       const OPJ_INT32* pSrc2 = pSrc[2];
        
-       for (i = 0; i < length; i++) {
-               pDst[3*i+0] = pSrc0[i] + adjust;
-               pDst[3*i+1] = pSrc1[i] + adjust;
-               pDst[3*i+2] = pSrc2[i] + adjust;
+       if (length & 7U) {
+               unsigned int trailing = 0U;
+               int remaining = 8U;
+               length &= 7U;
+               PUTBITS2((OPJ_UINT32)pSrc[i+0], 15)
+               if (length > 1U) {
+                       PUTBITS2((OPJ_UINT32)pSrc[i+1], 15)
+                       if (length > 2U) {
+                               PUTBITS2((OPJ_UINT32)pSrc[i+2], 15)
+                               if (length > 3U) {
+                                       PUTBITS2((OPJ_UINT32)pSrc[i+3], 15)
+                                       if (length > 4U) {
+                                               PUTBITS2((OPJ_UINT32)pSrc[i+4], 15)
+                                               if (length > 5U) {
+                                                       PUTBITS2((OPJ_UINT32)pSrc[i+5], 15)
+                                                       if (length > 6U) {
+                                                               PUTBITS2((OPJ_UINT32)pSrc[i+6], 15)
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+               FLUSHBITS()
        }
 }
-static void convert_32s_P4C4(OPJ_INT32 const* const* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length, OPJ_INT32 adjust)
+static void tif_32sto16u(const OPJ_INT32* pSrc, OPJ_UINT16* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       const OPJ_INT32* pSrc0 = pSrc[0];
-       const OPJ_INT32* pSrc1 = pSrc[1];
-       const OPJ_INT32* pSrc2 = pSrc[2];
-       const OPJ_INT32* pSrc3 = pSrc[3];
-       
-       for (i = 0; i < length; i++) {
-               pDst[4*i+0] = pSrc0[i] + adjust;
-               pDst[4*i+1] = pSrc1[i] + adjust;
-               pDst[4*i+2] = pSrc2[i] + adjust;
-               pDst[4*i+3] = pSrc3[i] + adjust;
+       for (i = 0; i < length; ++i) {
+               pDst[i] = (OPJ_UINT16)pSrc[i];
        }
 }
 
@@ -351,7 +555,6 @@ int imagetotif(opj_image_t * image, const char *outfile)
 {
        int width, height;
        int bps,adjust, sgnd;
-       int has_alpha;
        int tiPhoto;
        TIFF *tif;
        tdata_t buf;
@@ -361,15 +564,25 @@ int imagetotif(opj_image_t * image, const char *outfile)
        OPJ_INT32* buffer32s = NULL;
        OPJ_INT32 const* planes[4];
        convert_32s_PXCX cvtPxToCx = NULL;
-       tif_32stoX cvt32sToTif = NULL;
-       
-       has_alpha = 0;
+       convert_32sXXx_C1R cvt32sToTif = NULL;
+
        bps = (int)image->comps[0].prec;
        planes[0] = image->comps[0].data;
        
        numcomps = image->numcomps;
        
-       if (numcomps > 2U) {
+       if (image->color_space == OPJ_CLRSPC_CMYK) {
+               if (numcomps < 4U) {
+                       fprintf(stderr,"imagetotif: CMYK images shall be composed of at least 4 planes.\n");
+                       fprintf(stderr,"\tAborting\n");
+                       return 1;
+               }
+               tiPhoto = PHOTOMETRIC_SEPARATED;
+               if (numcomps > 4U) {
+                       numcomps = 4U; /* Alpha not supported */
+               }
+       }
+       else if (numcomps > 2U) {
                tiPhoto = PHOTOMETRIC_RGB;
                if (numcomps > 4U) {
                        numcomps = 4U;
@@ -398,10 +611,10 @@ int imagetotif(opj_image_t * image, const char *outfile)
                return 1;
        }
        
-       if((bps > 16) || ((bps != 1) && (bps & 1))) bps = 0;
+       if(bps > 16) bps = 0;
        if(bps == 0)
        {
-               fprintf(stderr,"imagetotif: Bits=%d, Only 1, 2, 4, 6, 8, 10, 12, 14 and 16 bits implemented\n",bps);
+               fprintf(stderr,"imagetotif: Bits=%d, Only 1 to 16 bits implemented\n",bps);
                fprintf(stderr,"\tAborting\n");
                return 1;
        }
@@ -414,50 +627,47 @@ int imagetotif(opj_image_t * image, const char *outfile)
        for (i = 0U; i < numcomps; ++i) {
                clip_component(&(image->comps[i]), image->comps[0].prec);
        }
-       switch (numcomps) {
+       cvtPxToCx = convert_32s_PXCX_LUT[numcomps];
+       switch (bps) {
                case 1:
-                       cvtPxToCx = convert_32s_P1C1;
-                       break;
                case 2:
-                       cvtPxToCx = convert_32s_P2C2;
-                       break;
-               case 3:
-                       cvtPxToCx = convert_32s_P3C3;
-                       break;
                case 4:
-                       cvtPxToCx = convert_32s_P4C4;
-                       break;
-               default:
-                       /* never here */
-                       break;
-       }
-       switch (bps) {
-               case 1:
-                       cvt32sToTif = tif_32sto1u;
+               case 6:
+               case 8:
+                       cvt32sToTif = convert_32sXXu_C1R_LUT[bps];
                        break;
-               case 2:
-                       cvt32sToTif = tif_32sto2u;
+               case 3:
+                       cvt32sToTif = tif_32sto3u;
                        break;
-               case 4:
-                       cvt32sToTif = tif_32sto4u;
+               case 5:
+                       cvt32sToTif = tif_32sto5u;
                        break;
-               case 6:
-                       cvt32sToTif = tif_32sto6u;
+               case 7:
+                       cvt32sToTif = tif_32sto7u;
                        break;
-               case 8:
-                       cvt32sToTif = tif_32sto8u;
+               case 9:
+                       cvt32sToTif = tif_32sto9u;
                        break;
                case 10:
                        cvt32sToTif = tif_32sto10u;
                        break;
+               case 11:
+                       cvt32sToTif = tif_32sto11u;
+                       break;
                case 12:
                        cvt32sToTif = tif_32sto12u;
                        break;
+               case 13:
+                       cvt32sToTif = tif_32sto13u;
+                       break;
                case 14:
                        cvt32sToTif = tif_32sto14u;
                        break;
+               case 15:
+                       cvt32sToTif = tif_32sto15u;
+                       break;
                case 16:
-                       cvt32sToTif = (tif_32stoX)tif_32sto16u;
+                       cvt32sToTif = (convert_32sXXx_C1R)tif_32sto16u;
                        break;
                default:
                        /* never here */
@@ -476,8 +686,9 @@ int imagetotif(opj_image_t * image, const char *outfile)
        TIFFSetField(tif, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG);
        TIFFSetField(tif, TIFFTAG_PHOTOMETRIC, tiPhoto);
        TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, 1);
+       
        strip_size = TIFFStripSize(tif);
-       rowStride = ((OPJ_SIZE_T)width * numcomps * bps + 7U) / 8U;
+       rowStride = ((OPJ_SIZE_T)width * numcomps * (OPJ_SIZE_T)bps + 7U) / 8U;
        if (rowStride != (OPJ_SIZE_T)strip_size) {
                fprintf(stderr, "Invalid TIFF strip size\n");
                TIFFClose(tif);
@@ -488,7 +699,7 @@ int imagetotif(opj_image_t * image, const char *outfile)
                TIFFClose(tif);
                return 1;
        }
-       buffer32s = malloc((OPJ_SIZE_T)width * numcomps * sizeof(OPJ_INT32));
+       buffer32s = (OPJ_INT32 *)malloc((OPJ_SIZE_T)width * numcomps * sizeof(OPJ_INT32));
        if (buffer32s == NULL) {
                _TIFFfree(buf);
                TIFFClose(tif);
@@ -496,8 +707,8 @@ int imagetotif(opj_image_t * image, const char *outfile)
        }
        
        for (i = 0; i < image->comps[0].h; ++i) {
-               cvtPxToCx(planes, buffer32s, width, adjust);
-               cvt32sToTif(buffer32s, buf, width * numcomps);
+               cvtPxToCx(planes, buffer32s, (OPJ_SIZE_T)width, adjust);
+               cvt32sToTif(buffer32s, (OPJ_BYTE *)buf, (OPJ_SIZE_T)width * numcomps);
                (void)TIFFWriteEncodedStrip(tif, i, (void*)buf, strip_size);
                planes[0] += width;
                planes[1] += width;
@@ -511,39 +722,63 @@ int imagetotif(opj_image_t * image, const char *outfile)
        return 0;
 }/* imagetotif() */
 
-typedef void (* tif_Xto32s)(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length);
+#define GETBITS(dest, nb) { \
+       int needed = (nb); \
+       unsigned int dst = 0U; \
+       if (available == 0) { \
+               val = *pSrc++; \
+               available = 8; \
+       } \
+       while (needed > available) { \
+               dst |= val & ((1U << available) - 1U); \
+               needed -= available; \
+               dst <<= needed; \
+               val = *pSrc++; \
+               available = 8; \
+       } \
+       dst |= (val >> (available - needed)) & ((1U << needed) - 1U); \
+       available -= needed; \
+       dest = (OPJ_INT32)dst; \
+}
 
-static void tif_1uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
+static void tif_3uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)8U); i+=8U) {
-               OPJ_UINT8 val = *pSrc++;
-               pDst[i+0] =  val >> 7;
-               pDst[i+1] = (val >> 6) & 0x1U;
-               pDst[i+2] = (val >> 5) & 0x1U;
-               pDst[i+3] = (val >> 4) & 0x1U;
-               pDst[i+4] = (val >> 3) & 0x1U;
-               pDst[i+5] = (val >> 2) & 0x1U;
-               pDst[i+6] = (val >> 1) & 0x1U;
-               pDst[i+7] = val & 0x1U;
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               OPJ_UINT32 val2 = *pSrc++;
+               
+               pDst[i+0] = (OPJ_INT32)((val0 >> 5));
+               pDst[i+1] = (OPJ_INT32)(((val0 & 0x1FU) >> 2));
+               pDst[i+2] = (OPJ_INT32)(((val0 & 0x3U) << 1) | (val1 >> 7));
+               pDst[i+3] = (OPJ_INT32)(((val1 & 0x7FU) >> 4));
+               pDst[i+4] = (OPJ_INT32)(((val1 & 0xFU) >> 1));
+               pDst[i+5] = (OPJ_INT32)(((val1 & 0x1U) << 2) | (val2 >> 6));
+               pDst[i+6] = (OPJ_INT32)(((val2 & 0x3FU) >> 3));
+               pDst[i+7] = (OPJ_INT32)(((val2 & 0x7U)));
+               
        }
        if (length & 7U) {
-               OPJ_UINT8 val = *pSrc++;
+               unsigned int val;
+               int available = 0;
+               
                length = length & 7U;
-               pDst[i+0] =  val >> 7;
+               
+               GETBITS(pDst[i+0], 3)
                
                if (length > 1U) {
-                       pDst[i+1] = (val >> 6) & 0x1U;
+                       GETBITS(pDst[i+1], 3)
                        if (length > 2U) {
-                               pDst[i+2] = (val >> 5) & 0x1U;
+                               GETBITS(pDst[i+2], 3)
                                if (length > 3U) {
-                                       pDst[i+3] = (val >> 4) & 0x1U;
+                                       GETBITS(pDst[i+3], 3)
                                        if (length > 4U) {
-                                               pDst[i+4] = (val >> 3) & 0x1U;
+                                               GETBITS(pDst[i+4], 3)
                                                if (length > 5U) {
-                                                       pDst[i+5] = (val >> 2) & 0x1U;
+                                                       GETBITS(pDst[i+5], 3)
                                                        if (length > 6U) {
-                                                               pDst[i+6] = (val >> 1) & 0x1U;
+                                                               GETBITS(pDst[i+6], 3)
                                                        }
                                                }
                                        }
@@ -552,106 +787,238 @@ static void tif_1uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length
                }
        }
 }
-static void tif_2uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
+static void tif_5uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)4U); i+=4U) {
-               OPJ_UINT8 val = *pSrc++;
-               pDst[i+0] =  val >> 6;
-               pDst[i+1] = (val >> 4) & 0x3U;
-               pDst[i+2] = (val >> 2) & 0x3U;
-               pDst[i+3] = val & 0x3U;
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               OPJ_UINT32 val2 = *pSrc++;
+               OPJ_UINT32 val3 = *pSrc++;
+               OPJ_UINT32 val4 = *pSrc++;
+               
+               pDst[i+0] = (OPJ_INT32)((val0 >> 3));
+               pDst[i+1] = (OPJ_INT32)(((val0 & 0x7U) << 2) | (val1 >> 6));
+               pDst[i+2] = (OPJ_INT32)(((val1 & 0x3FU) >> 1));
+               pDst[i+3] = (OPJ_INT32)(((val1 & 0x1U) << 4) | (val2 >> 4));
+               pDst[i+4] = (OPJ_INT32)(((val2 & 0xFU) << 1) | (val3 >> 7));
+               pDst[i+5] = (OPJ_INT32)(((val3 & 0x7FU) >> 2));
+               pDst[i+6] = (OPJ_INT32)(((val3 & 0x3U) << 3) | (val4 >> 5));
+               pDst[i+7] = (OPJ_INT32)(((val4 & 0x1FU)));
+               
        }
-       if (length & 3U) {
-               OPJ_UINT8 val = *pSrc++;
-               length = length & 3U;
-               pDst[i+0] =  val >> 6;
+       if (length & 7U) {
+               unsigned int val;
+               int available = 0;
+               
+               length = length & 7U;
+               
+               GETBITS(pDst[i+0], 5)
                
                if (length > 1U) {
-                       pDst[i+1] = (val >> 4) & 0x3U;
+                       GETBITS(pDst[i+1], 5)
                        if (length > 2U) {
-                               pDst[i+2] = (val >> 2) & 0x3U;
-                               
+                               GETBITS(pDst[i+2], 5)
+                               if (length > 3U) {
+                                       GETBITS(pDst[i+3], 5)
+                                       if (length > 4U) {
+                                               GETBITS(pDst[i+4], 5)
+                                               if (length > 5U) {
+                                                       GETBITS(pDst[i+5], 5)
+                                                       if (length > 6U) {
+                                                               GETBITS(pDst[i+6], 5)
+                                                       }
+                                               }
+                                       }
+                               }
                        }
                }
        }
 }
-static void tif_4uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
+static void tif_7uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)2U); i+=2U) {
-               OPJ_UINT8 val = *pSrc++;
-               pDst[i+0] = val >> 4;
-               pDst[i+1] = val & 0xFU;
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               OPJ_UINT32 val2 = *pSrc++;
+               OPJ_UINT32 val3 = *pSrc++;
+               OPJ_UINT32 val4 = *pSrc++;
+               OPJ_UINT32 val5 = *pSrc++;
+               OPJ_UINT32 val6 = *pSrc++;
+               
+               pDst[i+0] = (OPJ_INT32)((val0 >> 1));
+               pDst[i+1] = (OPJ_INT32)(((val0 & 0x1U) << 6) | (val1 >> 2));
+               pDst[i+2] = (OPJ_INT32)(((val1 & 0x3U) << 5) | (val2 >> 3));
+               pDst[i+3] = (OPJ_INT32)(((val2 & 0x7U) << 4) | (val3 >> 4));
+               pDst[i+4] = (OPJ_INT32)(((val3 & 0xFU) << 3) | (val4 >> 5));
+               pDst[i+5] = (OPJ_INT32)(((val4 & 0x1FU) << 2) | (val5 >> 6));
+               pDst[i+6] = (OPJ_INT32)(((val5 & 0x3FU) << 1) | (val6 >> 7));
+               pDst[i+7] = (OPJ_INT32)(((val6 & 0x7FU)));
+               
        }
-       if (length & 1U) {
-               OPJ_UINT8 val = *pSrc++;
-               pDst[i+0] = val >> 4;
+       if (length & 7U) {
+               unsigned int val;
+               int available = 0;
+               
+               length = length & 7U;
+               
+               GETBITS(pDst[i+0], 7)
+               
+               if (length > 1U) {
+                       GETBITS(pDst[i+1], 7)
+                       if (length > 2U) {
+                               GETBITS(pDst[i+2], 7)
+                               if (length > 3U) {
+                                       GETBITS(pDst[i+3], 7)
+                                       if (length > 4U) {
+                                               GETBITS(pDst[i+4], 7)
+                                               if (length > 5U) {
+                                                       GETBITS(pDst[i+5], 7)
+                                                       if (length > 6U) {
+                                                               GETBITS(pDst[i+6], 7)
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
        }
 }
-static void tif_6uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
+static void tif_9uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)4U); i+=4U) {
-               OPJ_UINT8 val0 = *pSrc++;
-               OPJ_UINT8 val1 = *pSrc++;
-               OPJ_UINT8 val2 = *pSrc++;
-               pDst[i+0] = val0 >> 2;
-               pDst[i+1] = ((val0 & 0x3U) << 4) | (val1 >> 4);
-               pDst[i+2] = ((val1 & 0xFU) << 2) | (val2 >> 6);
-               pDst[i+3] = val2 & 0x3FU;
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               OPJ_UINT32 val2 = *pSrc++;
+               OPJ_UINT32 val3 = *pSrc++;
+               OPJ_UINT32 val4 = *pSrc++;
+               OPJ_UINT32 val5 = *pSrc++;
+               OPJ_UINT32 val6 = *pSrc++;
+               OPJ_UINT32 val7 = *pSrc++;
+               OPJ_UINT32 val8 = *pSrc++;
+               
+               pDst[i+0] = (OPJ_INT32)((val0 << 1) | (val1 >> 7));
+               pDst[i+1] = (OPJ_INT32)(((val1 & 0x7FU) << 2) | (val2 >> 6));
+               pDst[i+2] = (OPJ_INT32)(((val2 & 0x3FU) << 3) | (val3 >> 5));
+               pDst[i+3] = (OPJ_INT32)(((val3 & 0x1FU) << 4) | (val4 >> 4));
+               pDst[i+4] = (OPJ_INT32)(((val4 & 0xFU) << 5) | (val5 >> 3));
+               pDst[i+5] = (OPJ_INT32)(((val5 & 0x7U) << 6) | (val6 >> 2));
+               pDst[i+6] = (OPJ_INT32)(((val6 & 0x3U) << 7) | (val7 >> 1));
+               pDst[i+7] = (OPJ_INT32)(((val7 & 0x1U) << 8) | (val8));
                
        }
-       if (length & 3U) {
-               OPJ_UINT8 val0 = *pSrc++;
-               length = length & 3U;
-               pDst[i+0] =  val0 >> 2;
+       if (length & 7U) {
+               unsigned int val;
+               int available = 0;
+               
+               length = length & 7U;
+               
+               GETBITS(pDst[i+0], 9)
                
                if (length > 1U) {
-                       OPJ_UINT8 val1 = *pSrc++;
-                       pDst[i+1] = ((val0 & 0x3U) << 4) | (val1 >> 4);
+                       GETBITS(pDst[i+1], 9)
                        if (length > 2U) {
-                               OPJ_UINT8 val2 = *pSrc++;
-                               pDst[i+2] = ((val1 & 0xFU) << 2) | (val2 >> 6);
+                               GETBITS(pDst[i+2], 9)
+                               if (length > 3U) {
+                                       GETBITS(pDst[i+3], 9)
+                                       if (length > 4U) {
+                                               GETBITS(pDst[i+4], 9)
+                                               if (length > 5U) {
+                                                       GETBITS(pDst[i+5], 9)
+                                                       if (length > 6U) {
+                                                               GETBITS(pDst[i+6], 9)
+                                                       }
+                                               }
+                                       }
+                               }
                        }
                }
        }
 }
-static void tif_8uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
+static void tif_10uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < length; ++i) {
-               pDst[i] = pSrc[i];
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)3U); i+=4U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               OPJ_UINT32 val2 = *pSrc++;
+               OPJ_UINT32 val3 = *pSrc++;
+               OPJ_UINT32 val4 = *pSrc++;
+               
+               pDst[i+0] = (OPJ_INT32)((val0 << 2) | (val1 >> 6));
+               pDst[i+1] = (OPJ_INT32)(((val1 & 0x3FU) << 4) | (val2 >> 4));
+               pDst[i+2] = (OPJ_INT32)(((val2 & 0xFU) << 6) | (val3 >> 2));
+               pDst[i+3] = (OPJ_INT32)(((val3 & 0x3U) << 8) | val4);
+               
+       }
+       if (length & 3U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               length = length & 3U;
+               pDst[i+0] = (OPJ_INT32)((val0 << 2) | (val1 >> 6));
+               
+               if (length > 1U) {
+                       OPJ_UINT32 val2 = *pSrc++;
+                       pDst[i+1] = (OPJ_INT32)(((val1 & 0x3FU) << 4) | (val2 >> 4));
+                       if (length > 2U) {
+                               OPJ_UINT32 val3 = *pSrc++;
+                               pDst[i+2] = (OPJ_INT32)(((val2 & 0xFU) << 6) | (val3 >> 2));
+                       }
+               }
        }
 }
-static void tif_10uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
+static void tif_11uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)4U); i+=4U) {
-               OPJ_INT32 val0 = *pSrc++;
-               OPJ_INT32 val1 = *pSrc++;
-               OPJ_INT32 val2 = *pSrc++;
-               OPJ_INT32 val3 = *pSrc++;
-               OPJ_INT32 val4 = *pSrc++;
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               OPJ_UINT32 val2 = *pSrc++;
+               OPJ_UINT32 val3 = *pSrc++;
+               OPJ_UINT32 val4 = *pSrc++;
+               OPJ_UINT32 val5 = *pSrc++;
+               OPJ_UINT32 val6 = *pSrc++;
+               OPJ_UINT32 val7 = *pSrc++;
+               OPJ_UINT32 val8 = *pSrc++;
+               OPJ_UINT32 val9 = *pSrc++;
+               OPJ_UINT32 val10 = *pSrc++;
                
-               pDst[i+0] = (val0 << 2) | (val1 >> 6);
-               pDst[i+1] = ((val1 & 0x3FU) << 4) | (val2 >> 4);
-               pDst[i+2] = ((val2 & 0xFU) << 6) | (val3 >> 2);
-               pDst[i+3] = ((val3 & 0x3U) << 8) | val4;
+               pDst[i+0] = (OPJ_INT32)((val0 << 3) | (val1 >> 5));
+               pDst[i+1] = (OPJ_INT32)(((val1 & 0x1FU) << 6) | (val2 >> 2));
+               pDst[i+2] = (OPJ_INT32)(((val2 & 0x3U) << 9) | (val3 << 1) | (val4 >> 7));
+               pDst[i+3] = (OPJ_INT32)(((val4 & 0x7FU) << 4) | (val5 >> 4));
+               pDst[i+4] = (OPJ_INT32)(((val5 & 0xFU) << 7) | (val6 >> 1));
+               pDst[i+5] = (OPJ_INT32)(((val6 & 0x1U) << 10) | (val7 << 2) | (val8 >> 6));
+               pDst[i+6] = (OPJ_INT32)(((val8 & 0x3FU) << 5) | (val9 >> 3));
+               pDst[i+7] = (OPJ_INT32)(((val9 & 0x7U) << 8) | (val10));
                
        }
-       if (length & 3U) {
-               OPJ_INT32 val0 = *pSrc++;
-               OPJ_INT32 val1 = *pSrc++;
-               length = length & 3U;
-               pDst[i+0] = (val0 << 2) | (val1 >> 6);
+       if (length & 7U) {
+               unsigned int val;
+               int available = 0;
+               
+               length = length & 7U;
+               
+               GETBITS(pDst[i+0], 11)
                
                if (length > 1U) {
-                       OPJ_INT32 val2 = *pSrc++;
-                       pDst[i+1] = ((val1 & 0x3FU) << 4) | (val2 >> 4);
+                       GETBITS(pDst[i+1], 11)
                        if (length > 2U) {
-                               OPJ_INT32 val3 = *pSrc++;
-                               pDst[i+2] = ((val2 & 0xFU) << 6) | (val3 >> 2);
+                               GETBITS(pDst[i+2], 11)
+                               if (length > 3U) {
+                                       GETBITS(pDst[i+3], 11)
+                                       if (length > 4U) {
+                                               GETBITS(pDst[i+4], 11)
+                                               if (length > 5U) {
+                                                       GETBITS(pDst[i+5], 11)
+                                                       if (length > 6U) {
+                                                               GETBITS(pDst[i+6], 11)
+                                                       }
+                                               }
+                                       }
+                               }
                        }
                }
        }
@@ -659,127 +1026,180 @@ static void tif_10uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T lengt
 static void tif_12uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)2U); i+=2U) {
-               OPJ_INT32 val0 = *pSrc++;
-               OPJ_INT32 val1 = *pSrc++;
-               OPJ_INT32 val2 = *pSrc++;
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)1U); i+=2U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               OPJ_UINT32 val2 = *pSrc++;
 
-               pDst[i+0] = (val0 << 4) | (val1 >> 4);
-               pDst[i+1] = ((val1 & 0xFU) << 8) | val2;
+               pDst[i+0] = (OPJ_INT32)((val0 << 4) | (val1 >> 4));
+               pDst[i+1] = (OPJ_INT32)(((val1 & 0xFU) << 8) | val2);
        }
        if (length & 1U) {
-               OPJ_INT32 val0 = *pSrc++;
-               OPJ_INT32 val1 = *pSrc++;
-               pDst[i+0] = (val0 << 4) | (val1 >> 4);
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               pDst[i+0] = (OPJ_INT32)((val0 << 4) | (val1 >> 4));
        }
 }
-static void tif_14uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
+static void tif_13uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < (length & -(OPJ_SIZE_T)4U); i+=4U) {
-               OPJ_INT32 val0 = *pSrc++;
-               OPJ_INT32 val1 = *pSrc++;
-               OPJ_INT32 val2 = *pSrc++;
-               OPJ_INT32 val3 = *pSrc++;
-               OPJ_INT32 val4 = *pSrc++;
-               OPJ_INT32 val5 = *pSrc++;
-               OPJ_INT32 val6 = *pSrc++;
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               OPJ_UINT32 val2 = *pSrc++;
+               OPJ_UINT32 val3 = *pSrc++;
+               OPJ_UINT32 val4 = *pSrc++;
+               OPJ_UINT32 val5 = *pSrc++;
+               OPJ_UINT32 val6 = *pSrc++;
+               OPJ_UINT32 val7 = *pSrc++;
+               OPJ_UINT32 val8 = *pSrc++;
+               OPJ_UINT32 val9 = *pSrc++;
+               OPJ_UINT32 val10 = *pSrc++;
+               OPJ_UINT32 val11 = *pSrc++;
+               OPJ_UINT32 val12 = *pSrc++;
                
-               pDst[i+0] = (val0 << 6) | (val1 >> 2);
-               pDst[i+1] = ((val1 & 0x3U) << 12) | (val2 << 4) | (val3 >> 4);
-               pDst[i+2] = ((val3 & 0xFU) << 10) | (val4 << 2) | (val5 >> 6);
-               pDst[i+3] = ((val5 & 0x3FU) << 8) | val6;
+               pDst[i+0] = (OPJ_INT32)((val0 << 5) | (val1 >> 3));
+               pDst[i+1] = (OPJ_INT32)(((val1 & 0x7U) << 10) | (val2 << 2) | (val3 >> 6));
+               pDst[i+2] = (OPJ_INT32)(((val3 & 0x3FU) << 7) | (val4 >> 1));
+               pDst[i+3] = (OPJ_INT32)(((val4 & 0x1U) << 12) | (val5 << 4) | (val6 >> 4));
+               pDst[i+4] = (OPJ_INT32)(((val6 & 0xFU) << 9) | (val7 << 1) | (val8 >> 7));
+               pDst[i+5] = (OPJ_INT32)(((val8 & 0x7FU) << 6) | (val9 >> 2));
+               pDst[i+6] = (OPJ_INT32)(((val9 & 0x3U) << 11) | (val10 << 3) | (val11 >> 5));
+               pDst[i+7] = (OPJ_INT32)(((val11 & 0x1FU) << 8) | (val12));
                
        }
-       if (length & 3U) {
-               OPJ_INT32 val0 = *pSrc++;
-               OPJ_INT32 val1 = *pSrc++;
-               length = length & 3U;
-               pDst[i+0] = (val0 << 6) | (val1 >> 2);
+       if (length & 7U) {
+               unsigned int val;
+               int available = 0;
+               
+               length = length & 7U;
+               
+               GETBITS(pDst[i+0], 13)
                
                if (length > 1U) {
-                       OPJ_INT32 val2 = *pSrc++;
-                       OPJ_INT32 val3 = *pSrc++;
-                       pDst[i+1] = ((val1 & 0x3U) << 12) | (val2 << 4) | (val3 >> 4);
+                       GETBITS(pDst[i+1], 13)
                        if (length > 2U) {
-                               OPJ_INT32 val4 = *pSrc++;
-                               OPJ_INT32 val5 = *pSrc++;
-                               pDst[i+2] = ((val3 & 0xFU) << 10) | (val4 << 2) | (val5 >> 6);
+                               GETBITS(pDst[i+2], 13)
+                               if (length > 3U) {
+                                       GETBITS(pDst[i+3], 13)
+                                       if (length > 4U) {
+                                               GETBITS(pDst[i+4], 13)
+                                               if (length > 5U) {
+                                                       GETBITS(pDst[i+5], 13)
+                                                       if (length > 6U) {
+                                                               GETBITS(pDst[i+6], 13)
+                                                       }
+                                               }
+                                       }
+                               }
                        }
                }
        }
 }
-#if 0
-static void tif_16uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
+static void tif_14uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       for (i = 0; i < length; i++) {
-               OPJ_INT32 val0 = *pSrc++;
-               OPJ_INT32 val1 = *pSrc++;
-#ifdef OPJ_BIG_ENDIAN
-               pDst[i] = (val0 << 8) | val1;
-#else
-               pDst[i] = (val1 << 8) | val0;
-#endif
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)3U); i+=4U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               OPJ_UINT32 val2 = *pSrc++;
+               OPJ_UINT32 val3 = *pSrc++;
+               OPJ_UINT32 val4 = *pSrc++;
+               OPJ_UINT32 val5 = *pSrc++;
+               OPJ_UINT32 val6 = *pSrc++;
+               
+               pDst[i+0] = (OPJ_INT32)((val0 << 6) | (val1 >> 2));
+               pDst[i+1] = (OPJ_INT32)(((val1 & 0x3U) << 12) | (val2 << 4) | (val3 >> 4));
+               pDst[i+2] = (OPJ_INT32)(((val3 & 0xFU) << 10) | (val4 << 2) | (val5 >> 6));
+               pDst[i+3] = (OPJ_INT32)(((val5 & 0x3FU) << 8) | val6);
+               
        }
-}
-#else
-/* seems that libtiff decodes this to machine endianness */
-static void tif_16uto32s(const OPJ_UINT16* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
-{
-       OPJ_SIZE_T i;
-       for (i = 0; i < length; i++) {
-               pDst[i] = pSrc[i];
+       if (length & 3U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               length = length & 3U;
+               pDst[i+0] = (OPJ_INT32)((val0 << 6) | (val1 >> 2));
+               
+               if (length > 1U) {
+                       OPJ_UINT32 val2 = *pSrc++;
+                       OPJ_UINT32 val3 = *pSrc++;
+                       pDst[i+1] = (OPJ_INT32)(((val1 & 0x3U) << 12) | (val2 << 4) | (val3 >> 4));
+                       if (length > 2U) {
+                               OPJ_UINT32 val4 = *pSrc++;
+                               OPJ_UINT32 val5 = *pSrc++;
+                               pDst[i+2] = (OPJ_INT32)(((val3 & 0xFU) << 10) | (val4 << 2) | (val5 >> 6));
+                       }
+               }
        }
 }
-#endif
-
-typedef void (* convert_32s_CXPX)(const OPJ_INT32* pSrc, OPJ_INT32* const* pDst, OPJ_SIZE_T length);
-static void convert_32s_C1P1(const OPJ_INT32* pSrc, OPJ_INT32* const* pDst, OPJ_SIZE_T length)
-{
-       memcpy(pDst[0], pSrc, length * sizeof(OPJ_INT32));
-}
-static void convert_32s_C2P2(const OPJ_INT32* pSrc, OPJ_INT32* const* pDst, OPJ_SIZE_T length)
+static void tif_15uto32s(const OPJ_BYTE* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       OPJ_INT32* pDst0 = pDst[0];
-       OPJ_INT32* pDst1 = pDst[1];
-       
-       for (i = 0; i < length; i++) {
-               pDst0[i] = pSrc[2*i+0];
-               pDst1[i] = pSrc[2*i+1];
+       for (i = 0; i < (length & ~(OPJ_SIZE_T)7U); i+=8U) {
+               OPJ_UINT32 val0 = *pSrc++;
+               OPJ_UINT32 val1 = *pSrc++;
+               OPJ_UINT32 val2 = *pSrc++;
+               OPJ_UINT32 val3 = *pSrc++;
+               OPJ_UINT32 val4 = *pSrc++;
+               OPJ_UINT32 val5 = *pSrc++;
+               OPJ_UINT32 val6 = *pSrc++;
+               OPJ_UINT32 val7 = *pSrc++;
+               OPJ_UINT32 val8 = *pSrc++;
+               OPJ_UINT32 val9 = *pSrc++;
+               OPJ_UINT32 val10 = *pSrc++;
+               OPJ_UINT32 val11 = *pSrc++;
+               OPJ_UINT32 val12 = *pSrc++;
+               OPJ_UINT32 val13 = *pSrc++;
+               OPJ_UINT32 val14 = *pSrc++;
+               
+               pDst[i+0] = (OPJ_INT32)((val0 << 7) | (val1 >> 1));
+               pDst[i+1] = (OPJ_INT32)(((val1 & 0x1U) << 14) | (val2 << 6) | (val3 >> 2));
+               pDst[i+2] = (OPJ_INT32)(((val3 & 0x3U) << 13) | (val4 << 5) | (val5 >> 3));
+               pDst[i+3] = (OPJ_INT32)(((val5 & 0x7U) << 12) | (val6 << 4) | (val7 >> 4));
+               pDst[i+4] = (OPJ_INT32)(((val7 & 0xFU) << 11) | (val8 << 3) | (val9 >> 5));
+               pDst[i+5] = (OPJ_INT32)(((val9 & 0x1FU) << 10) | (val10 << 2) | (val11 >> 6));
+               pDst[i+6] = (OPJ_INT32)(((val11 & 0x3FU) << 9) | (val12 << 1) | (val13 >> 7));
+               pDst[i+7] = (OPJ_INT32)(((val13 & 0x7FU) << 8) | (val14));
+               
        }
-}
-static void convert_32s_C3P3(const OPJ_INT32* pSrc, OPJ_INT32* const* pDst, OPJ_SIZE_T length)
-{
-       OPJ_SIZE_T i;
-       OPJ_INT32* pDst0 = pDst[0];
-       OPJ_INT32* pDst1 = pDst[1];
-       OPJ_INT32* pDst2 = pDst[2];
-       
-       for (i = 0; i < length; i++) {
-               pDst0[i] = pSrc[3*i+0];
-               pDst1[i] = pSrc[3*i+1];
-               pDst2[i] = pSrc[3*i+2];
+       if (length & 7U) {
+               unsigned int val;
+               int available = 0;
+               
+               length = length & 7U;
+               
+               GETBITS(pDst[i+0], 15)
+
+               if (length > 1U) {
+                       GETBITS(pDst[i+1], 15)
+                       if (length > 2U) {
+                               GETBITS(pDst[i+2], 15)
+                               if (length > 3U) {
+                                       GETBITS(pDst[i+3], 15)
+                                       if (length > 4U) {
+                                               GETBITS(pDst[i+4], 15)
+                                               if (length > 5U) {
+                                                       GETBITS(pDst[i+5], 15)
+                                                       if (length > 6U) {
+                                                               GETBITS(pDst[i+6], 15)
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
        }
 }
-static void convert_32s_C4P4(const OPJ_INT32* pSrc, OPJ_INT32* const* pDst, OPJ_SIZE_T length)
+
+/* seems that libtiff decodes this to machine endianness */
+static void tif_16uto32s(const OPJ_UINT16* pSrc, OPJ_INT32* pDst, OPJ_SIZE_T length)
 {
        OPJ_SIZE_T i;
-       OPJ_INT32* pDst0 = pDst[0];
-       OPJ_INT32* pDst1 = pDst[1];
-       OPJ_INT32* pDst2 = pDst[2];
-       OPJ_INT32* pDst3 = pDst[3];
-       
        for (i = 0; i < length; i++) {
-               pDst0[i] = pSrc[4*i+0];
-               pDst1[i] = pSrc[4*i+1];
-               pDst2[i] = pSrc[4*i+2];
-               pDst3[i] = pSrc[4*i+3];
+               pDst[i] = pSrc[i];
        }
 }
 
-
 /*
  * libtiff/tif_getimage.c : 1,2,4,8,16 bitspersample accepted
  * CINEMA                 : 12 bit precision
@@ -793,14 +1213,14 @@ opj_image_t* tiftoimage(const char *filename, opj_cparameters_t *parameters)
        tstrip_t strip;
        tsize_t strip_size;
        int j, currentPlane, numcomps = 0, w, h;
-       OPJ_COLOR_SPACE color_space;
+       OPJ_COLOR_SPACE color_space = OPJ_CLRSPC_UNKNOWN;
        opj_image_cmptparm_t cmptparm[4]; /* RGBA */
        opj_image_t *image = NULL;
        int has_alpha = 0;
        unsigned short tiBps, tiPhoto, tiSf, tiSpp, tiPC;
        unsigned int tiWidth, tiHeight;
        OPJ_BOOL is_cinema = OPJ_IS_CINEMA(parameters->rsiz);
-       tif_Xto32s cvtTifTo32s = NULL;
+       convert_XXx32s_C1R cvtTifTo32s = NULL;
        convert_32s_CXPX cvtCxToPx = NULL;
        OPJ_INT32* buffer32s = NULL;
        OPJ_INT32* planes[4];
@@ -826,18 +1246,14 @@ opj_image_t* tiftoimage(const char *filename, opj_cparameters_t *parameters)
        w= (int)tiWidth;
        h= (int)tiHeight;
        
-       if((tiBps > 16U) || ((tiBps != 1U) && (tiBps & 1U))) tiBps = 0U;
-       if(tiPhoto != PHOTOMETRIC_MINISBLACK && tiPhoto != PHOTOMETRIC_RGB) tiPhoto = 0;
-       
-       if( !tiBps || !tiPhoto)
-       {
-               if( !tiBps)
-                       fprintf(stderr,"tiftoimage: Bits=%d, Only 1, 2, 4, 6, 8, 10, 12, 14 and 16 bits implemented\n",tiBps);
-               else
-                       if( !tiPhoto)
-                               fprintf(stderr,"tiftoimage: Bad color format %d.\n\tOnly RGB(A)"
-                                                               " and GRAY(A) has been implemented\n",(int) tiPhoto);
-               
+       if(tiBps > 16U) {
+               fprintf(stderr,"tiftoimage: Bits=%d, Only 1 to 16 bits implemented\n",tiBps);
+               fprintf(stderr,"\tAborting\n");
+               TIFFClose(tif);
+               return NULL;
+       }
+       if(tiPhoto != PHOTOMETRIC_MINISBLACK && tiPhoto != PHOTOMETRIC_RGB) {
+               fprintf(stderr,"tiftoimage: Bad color format %d.\n\tOnly RGB(A) and GRAY(A) has been implemented\n",(int) tiPhoto);
                fprintf(stderr,"\tAborting\n");
                TIFFClose(tif);
                return NULL;
@@ -845,31 +1261,45 @@ opj_image_t* tiftoimage(const char *filename, opj_cparameters_t *parameters)
        
        switch (tiBps) {
                case 1:
-                       cvtTifTo32s = tif_1uto32s;
-                       break;
                case 2:
-                       cvtTifTo32s = tif_2uto32s;
-                       break;
                case 4:
-                       cvtTifTo32s = tif_4uto32s;
-                       break;
                case 6:
-                       cvtTifTo32s = tif_6uto32s;
-                       break;
                case 8:
-                       cvtTifTo32s = tif_8uto32s;
+                       cvtTifTo32s = convert_XXu32s_C1R_LUT[tiBps];
+                       break;
+               /* others are specific to TIFF */
+               case 3:
+                       cvtTifTo32s = tif_3uto32s;
+                       break;
+               case 5:
+                       cvtTifTo32s = tif_5uto32s;
+                       break;
+               case 7:
+                       cvtTifTo32s = tif_7uto32s;
+                       break;
+               case 9:
+                       cvtTifTo32s = tif_9uto32s;
                        break;
                case 10:
                        cvtTifTo32s = tif_10uto32s;
                        break;
+               case 11:
+                       cvtTifTo32s = tif_11uto32s;
+                       break;
                case 12:
                        cvtTifTo32s = tif_12uto32s;
                        break;
+               case 13:
+                       cvtTifTo32s = tif_13uto32s;
+                       break;
                case 14:
                        cvtTifTo32s = tif_14uto32s;
                        break;
+               case 15:
+                       cvtTifTo32s = tif_15uto32s;
+                       break;
                case 16:
-                       cvtTifTo32s = (tif_Xto32s)tif_16uto32s;
+                       cvtTifTo32s = (convert_XXx32s_C1R)tif_16uto32s;
                        break;
                default:
                        /* never here */
@@ -928,25 +1358,9 @@ opj_image_t* tiftoimage(const char *filename, opj_cparameters_t *parameters)
                color_space = OPJ_CLRSPC_GRAY;
        }
        
-       switch (numcomps) {
-               case 1:
-                       cvtCxToPx = convert_32s_C1P1;
-                       break;
-               case 2:
-                       cvtCxToPx = convert_32s_C2P2;
-                       break;
-               case 3:
-                       cvtCxToPx = convert_32s_C3P3;
-                       break;
-               case 4:
-                       cvtCxToPx = convert_32s_C4P4;
-                       break;
-               default:
-                       /* never here */
-                       break;
-       }
+       cvtCxToPx = convert_32s_CXPX_LUT[numcomps];
        if (tiPC == PLANARCONFIG_SEPARATE) {
-               cvtCxToPx = convert_32s_C1P1; /* override */
+               cvtCxToPx = convert_32s_CXPX_LUT[1]; /* override */
                tiSpp = 1U; /* consider only one sample per plane */
        }
 
@@ -978,6 +1392,7 @@ opj_image_t* tiftoimage(const char *filename, opj_cparameters_t *parameters)
        {
                planes[j] = image->comps[j].data;
        }
+       image->comps[numcomps - 1].alpha = (OPJ_UINT16)(1 - (numcomps & 1));
                
        strip_size = TIFFStripSize(tif);
        
@@ -988,7 +1403,7 @@ opj_image_t* tiftoimage(const char *filename, opj_cparameters_t *parameters)
                return NULL;
        }
        rowStride = ((OPJ_SIZE_T)w * tiSpp * tiBps + 7U) / 8U;
-       buffer32s = malloc((OPJ_SIZE_T)w * tiSpp * sizeof(OPJ_INT32));
+       buffer32s = (OPJ_INT32 *)malloc((OPJ_SIZE_T)w * tiSpp * sizeof(OPJ_INT32));
        if (buffer32s == NULL) {
                _TIFFfree(buf);
                TIFFClose(tif);
@@ -1006,14 +1421,14 @@ opj_image_t* tiftoimage(const char *filename, opj_cparameters_t *parameters)
                for(; (h > 0) && (strip < TIFFNumberOfStrips(tif)); strip++)
                {
                                const OPJ_UINT8 *dat8;
-                               tsize_t ssize;
+                               OPJ_SIZE_T ssize;
                                
-                               ssize = TIFFReadEncodedStrip(tif, strip, buf, strip_size);
+                               ssize = (OPJ_SIZE_T)TIFFReadEncodedStrip(tif, strip, buf, strip_size);
                                dat8 = (const OPJ_UINT8*)buf;
                                
                                while (ssize >= rowStride) {
-                                       cvtTifTo32s(dat8, buffer32s, w * tiSpp);
-                                       cvtCxToPx(buffer32s, planes, w);
+                                       cvtTifTo32s(dat8, buffer32s, (OPJ_SIZE_T)w * tiSpp);
+                                       cvtCxToPx(buffer32s, planes, (OPJ_SIZE_T)w);
                                        planes[0] += w;
                                        planes[1] += w;
                                        planes[2] += w;