summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorEven Rouault <even.rouault@spatialys.com>2020-05-24 15:38:21 +0200
committerEven Rouault <even.rouault@spatialys.com>2020-05-24 15:38:48 +0200
commit1c5627ee7406f84cfb40809b7ac31c63342427df (patch)
treeaf950f3432f4983fc45316d35240d3bc0e5fe5bb /src
parent1e931fdb3655c64ab60ea5657f79309331a86485 (diff)
T1 encoder: speed-up by aggressive inlining and more cache friendly data organization
~ 9% speed improvement seen on 10980x10980 uint16 image, T36JTT_20160914T074612_B02.tif opj_compress time from 17.2s to 15.8s
Diffstat (limited to 'src')
-rw-r--r--src/lib/openjp2/mqc.c176
-rw-r--r--src/lib/openjp2/mqc.h9
-rw-r--r--src/lib/openjp2/mqc_inl.h90
-rw-r--r--src/lib/openjp2/t1.c576
-rw-r--r--src/lib/openjp2/t1.h1
5 files changed, 460 insertions, 392 deletions
diff --git a/src/lib/openjp2/mqc.c b/src/lib/openjp2/mqc.c
index 6299b171..4cbfabd0 100644
--- a/src/lib/openjp2/mqc.c
+++ b/src/lib/openjp2/mqc.c
@@ -47,27 +47,6 @@
/*@{*/
/**
-Output a byte, doing bit-stuffing if necessary.
-After a 0xff byte, the next byte must be smaller than 0x90.
-@param mqc MQC handle
-*/
-static void opj_mqc_byteout(opj_mqc_t *mqc);
-/**
-Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000
-@param mqc MQC handle
-*/
-static void opj_mqc_renorme(opj_mqc_t *mqc);
-/**
-Encode the most probable symbol
-@param mqc MQC handle
-*/
-static void opj_mqc_codemps(opj_mqc_t *mqc);
-/**
-Encode the most least symbol
-@param mqc MQC handle
-*/
-static void opj_mqc_codelps(opj_mqc_t *mqc);
-/**
Fill mqc->c with 1's for flushing
@param mqc MQC handle
*/
@@ -182,80 +161,6 @@ static const opj_mqc_state_t mqc_states[47 * 2] = {
==========================================================
*/
-static void opj_mqc_byteout(opj_mqc_t *mqc)
-{
- /* bp is initialized to start - 1 in opj_mqc_init_enc() */
- /* but this is safe, see opj_tcd_code_block_enc_allocate_data() */
- assert(mqc->bp >= mqc->start - 1);
- if (*mqc->bp == 0xff) {
- mqc->bp++;
- *mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
- mqc->c &= 0xfffff;
- mqc->ct = 7;
- } else {
- if ((mqc->c & 0x8000000) == 0) {
- mqc->bp++;
- *mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
- mqc->c &= 0x7ffff;
- mqc->ct = 8;
- } else {
- (*mqc->bp)++;
- if (*mqc->bp == 0xff) {
- mqc->c &= 0x7ffffff;
- mqc->bp++;
- *mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
- mqc->c &= 0xfffff;
- mqc->ct = 7;
- } else {
- mqc->bp++;
- *mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
- mqc->c &= 0x7ffff;
- mqc->ct = 8;
- }
- }
- }
-}
-
-static void opj_mqc_renorme(opj_mqc_t *mqc)
-{
- do {
- mqc->a <<= 1;
- mqc->c <<= 1;
- mqc->ct--;
- if (mqc->ct == 0) {
- opj_mqc_byteout(mqc);
- }
- } while ((mqc->a & 0x8000) == 0);
-}
-
-static void opj_mqc_codemps(opj_mqc_t *mqc)
-{
- mqc->a -= (*mqc->curctx)->qeval;
- if ((mqc->a & 0x8000) == 0) {
- if (mqc->a < (*mqc->curctx)->qeval) {
- mqc->a = (*mqc->curctx)->qeval;
- } else {
- mqc->c += (*mqc->curctx)->qeval;
- }
- *mqc->curctx = (*mqc->curctx)->nmps;
- opj_mqc_renorme(mqc);
- } else {
- mqc->c += (*mqc->curctx)->qeval;
- }
-}
-
-static void opj_mqc_codelps(opj_mqc_t *mqc)
-{
- mqc->a -= (*mqc->curctx)->qeval;
- if (mqc->a < (*mqc->curctx)->qeval) {
- mqc->c += (*mqc->curctx)->qeval;
- } else {
- mqc->a = (*mqc->curctx)->qeval;
- }
- *mqc->curctx = (*mqc->curctx)->nlps;
- opj_mqc_renorme(mqc);
-}
-
static void opj_mqc_setbits(opj_mqc_t *mqc)
{
OPJ_UINT32 tempc = mqc->c + mqc->a;
@@ -303,14 +208,6 @@ void opj_mqc_init_enc(opj_mqc_t *mqc, OPJ_BYTE *bp)
mqc->end_of_byte_stream_counter = 0;
}
-void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d)
-{
- if ((*mqc->curctx)->mps == d) {
- opj_mqc_codemps(mqc);
- } else {
- opj_mqc_codelps(mqc);
- }
-}
void opj_mqc_flush(opj_mqc_t *mqc)
{
@@ -329,8 +226,6 @@ void opj_mqc_flush(opj_mqc_t *mqc)
}
}
-#define BYPASS_CT_INIT 0xDEADBEEF
-
void opj_mqc_bypass_init_enc(opj_mqc_t *mqc)
{
/* This function is normally called after at least one opj_mqc_flush() */
@@ -475,6 +370,43 @@ void opj_mqc_erterm_enc(opj_mqc_t *mqc)
}
}
+static INLINE void opj_mqc_renorme(opj_mqc_t *mqc)
+{
+ opj_mqc_renorme_macro(mqc, mqc->a, mqc->c, mqc->ct);
+}
+
+/**
+Encode the most probable symbol
+@param mqc MQC handle
+*/
+static INLINE void opj_mqc_codemps(opj_mqc_t *mqc)
+{
+ opj_mqc_codemps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct);
+}
+
+/**
+Encode the most least symbol
+@param mqc MQC handle
+*/
+static INLINE void opj_mqc_codelps(opj_mqc_t *mqc)
+{
+ opj_mqc_codelps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct);
+}
+
+/**
+Encode a symbol using the MQ-coder
+@param mqc MQC handle
+@param d The symbol to be encoded (0 or 1)
+*/
+static INLINE void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d)
+{
+ if ((*mqc->curctx)->mps == d) {
+ opj_mqc_codemps(mqc);
+ } else {
+ opj_mqc_codelps(mqc);
+ }
+}
+
void opj_mqc_segmark_enc(opj_mqc_t *mqc)
{
OPJ_UINT32 i;
@@ -557,4 +489,36 @@ void opj_mqc_setstate(opj_mqc_t *mqc, OPJ_UINT32 ctxno, OPJ_UINT32 msb,
mqc->ctxs[ctxno] = &mqc_states[msb + (OPJ_UINT32)(prob << 1)];
}
-
+void opj_mqc_byteout(opj_mqc_t *mqc)
+{
+ /* bp is initialized to start - 1 in opj_mqc_init_enc() */
+ /* but this is safe, see opj_tcd_code_block_enc_allocate_data() */
+ assert(mqc->bp >= mqc->start - 1);
+ if (*mqc->bp == 0xff) {
+ mqc->bp++;
+ *mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
+ mqc->c &= 0xfffff;
+ mqc->ct = 7;
+ } else {
+ if ((mqc->c & 0x8000000) == 0) {
+ mqc->bp++;
+ *mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
+ mqc->c &= 0x7ffff;
+ mqc->ct = 8;
+ } else {
+ (*mqc->bp)++;
+ if (*mqc->bp == 0xff) {
+ mqc->c &= 0x7ffffff;
+ mqc->bp++;
+ *mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
+ mqc->c &= 0xfffff;
+ mqc->ct = 7;
+ } else {
+ mqc->bp++;
+ *mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
+ mqc->c &= 0x7ffff;
+ mqc->ct = 8;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/lib/openjp2/mqc.h b/src/lib/openjp2/mqc.h
index 69a2a79d..9850fed0 100644
--- a/src/lib/openjp2/mqc.h
+++ b/src/lib/openjp2/mqc.h
@@ -96,6 +96,8 @@ typedef struct opj_mqc {
OPJ_BYTE backup[OPJ_COMMON_CBLK_DATA_EXTRA];
} opj_mqc_t;
+#define BYPASS_CT_INIT 0xDEADBEEF
+
#include "mqc_inl.h"
/** @name Exported functions */
@@ -135,12 +137,7 @@ Set the current context used for coding/decoding
@param ctxno Number that identifies the context
*/
#define opj_mqc_setcurctx(mqc, ctxno) (mqc)->curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
-/**
-Encode a symbol using the MQ-coder
-@param mqc MQC handle
-@param d The symbol to be encoded (0 or 1)
-*/
-void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d);
+
/**
Flush the encoder, so that all remaining data is written
@param mqc MQC handle
diff --git a/src/lib/openjp2/mqc_inl.h b/src/lib/openjp2/mqc_inl.h
index 310a3287..0031b94b 100644
--- a/src/lib/openjp2/mqc_inl.h
+++ b/src/lib/openjp2/mqc_inl.h
@@ -156,13 +156,13 @@ static INLINE OPJ_UINT32 opj_mqc_raw_decode(opj_mqc_t *mqc)
} \
}
-#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \
+#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \
register const opj_mqc_state_t **curctx = mqc->curctx; \
register OPJ_UINT32 c = mqc->c; \
register OPJ_UINT32 a = mqc->a; \
register OPJ_UINT32 ct = mqc->ct
-#define UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \
+#define UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \
mqc->curctx = curctx; \
mqc->c = c; \
mqc->a = a; \
@@ -193,4 +193,90 @@ Decode a symbol
#define opj_mqc_decode(d, mqc) \
opj_mqc_decode_macro(d, mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct)
+/**
+Output a byte, doing bit-stuffing if necessary.
+After a 0xff byte, the next byte must be smaller than 0x90.
+@param mqc MQC handle
+*/
+void opj_mqc_byteout(opj_mqc_t *mqc);
+
+/**
+Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000
+@param mqc MQC handle
+@param a_ value of mqc->a
+@param c_ value of mqc->c_
+@param ct_ value of mqc->ct_
+*/
+#define opj_mqc_renorme_macro(mqc, a_, c_, ct_) \
+{ \
+ do { \
+ a_ <<= 1; \
+ c_ <<= 1; \
+ ct_--; \
+ if (ct_ == 0) { \
+ mqc->c = c_; \
+ opj_mqc_byteout(mqc); \
+ c_ = mqc->c; \
+ ct_ = mqc->ct; \
+ } \
+ } while( (a_ & 0x8000) == 0); \
+}
+
+#define opj_mqc_codemps_macro(mqc, curctx, a, c, ct) \
+{ \
+ a -= (*curctx)->qeval; \
+ if ((a & 0x8000) == 0) { \
+ if (a < (*curctx)->qeval) { \
+ a = (*curctx)->qeval; \
+ } else { \
+ c += (*curctx)->qeval; \
+ } \
+ *curctx = (*curctx)->nmps; \
+ opj_mqc_renorme_macro(mqc, a, c, ct); \
+ } else { \
+ c += (*curctx)->qeval; \
+ } \
+}
+
+#define opj_mqc_codelps_macro(mqc, curctx, a, c, ct) \
+{ \
+ a -= (*curctx)->qeval; \
+ if (a < (*curctx)->qeval) { \
+ c += (*curctx)->qeval; \
+ } else { \
+ a = (*curctx)->qeval; \
+ } \
+ *curctx = (*curctx)->nlps; \
+ opj_mqc_renorme_macro(mqc, a, c, ct); \
+}
+
+#define opj_mqc_encode_macro(mqc, curctx, a, c, ct, d) \
+{ \
+ if ((*curctx)->mps == (d)) { \
+ opj_mqc_codemps_macro(mqc, curctx, a, c, ct); \
+ } else { \
+ opj_mqc_codelps_macro(mqc, curctx, a, c, ct); \
+ } \
+}
+
+
+#define opj_mqc_bypass_enc_macro(mqc, c, ct, d) \
+{\
+ if (ct == BYPASS_CT_INIT) {\
+ ct = 8;\
+ }\
+ ct--;\
+ c = c + ((d) << ct);\
+ if (ct == 0) {\
+ *mqc->bp = (OPJ_BYTE)c;\
+ ct = 8;\
+ /* If the previous byte was 0xff, make sure that the next msb is 0 */ \
+ if (*mqc->bp == 0xff) {\
+ ct = 7;\
+ }\
+ mqc->bp++;\
+ c = 0;\
+ }\
+}
+
#endif /* OPJ_MQC_INL_H */
diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c
index 937f420a..92030b21 100644
--- a/src/lib/openjp2/t1.c
+++ b/src/lib/openjp2/t1.c
@@ -61,6 +61,13 @@
#define opj_t1_setcurctx(curctx, ctxno) curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
+/* Macros to deal with signed integer with just MSB bit set for
+ * negative values (smr = signed magnitude representation) */
+#define opj_smr_abs(x) (((OPJ_UINT32)(x)) & 0x7FFFFFFFU)
+#define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31)
+#define opj_to_smr(x) ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U))
+
+
/** @name Local static functions */
/*@{*/
@@ -329,61 +336,53 @@ static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
/**
Encode significant pass
*/
-static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
- opj_flag_t *flagsp,
- OPJ_INT32 *datap,
- OPJ_INT32 bpno,
- OPJ_INT32 one,
- OPJ_INT32 *nmsedec,
- OPJ_BYTE type,
- OPJ_UINT32 ci,
- OPJ_UINT32 vsc)
-{
- OPJ_UINT32 v;
-
- opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
-
- OPJ_UINT32 const flags = *flagsp;
-
- if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
- (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
- OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
- v = (opj_int_abs(*datap) & one) ? 1 : 0;
-#ifdef DEBUG_ENC_SIG
- fprintf(stderr, " ctxt1=%d\n", ctxt1);
-#endif
- opj_mqc_setcurctx(mqc, ctxt1);
- if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */
- opj_mqc_bypass_enc(mqc, v);
- } else {
- opj_mqc_encode(mqc, v);
- }
- if (v) {
- OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
- *flagsp,
- flagsp[-1], flagsp[1],
- ci);
- OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
- v = *datap < 0 ? 1U : 0U;
- *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
- (OPJ_UINT32)bpno);
-#ifdef DEBUG_ENC_SIG
- fprintf(stderr, " ctxt2=%d\n", ctxt2);
-#endif
- opj_mqc_setcurctx(mqc, ctxt2);
- if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */
- opj_mqc_bypass_enc(mqc, v);
- } else {
- OPJ_UINT32 spb = opj_t1_getspb(lu);
-#ifdef DEBUG_ENC_SIG
- fprintf(stderr, " spb=%d\n", spb);
-#endif
- opj_mqc_encode(mqc, v ^ spb);
- }
- opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
- }
- *flagsp |= T1_PI_THIS << (ci * 3U);
- }
+#define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \
+{ \
+ OPJ_UINT32 v; \
+ const OPJ_UINT32 ci = (ciIn); \
+ const OPJ_UINT32 vsc = (vscIn); \
+ const OPJ_INT32* l_datap = (datapIn); \
+ opj_flag_t* flagsp = (flagspIn); \
+ OPJ_UINT32 const flags = *flagsp; \
+ if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
+ (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
+ OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
+ v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
+/* #ifdef DEBUG_ENC_SIG */ \
+/* fprintf(stderr, " ctxt1=%d\n", ctxt1); */ \
+/* #endif */ \
+ opj_t1_setcurctx(curctx, ctxt1); \
+ if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \
+ opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
+ } else { \
+ opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
+ } \
+ if (v) { \
+ OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
+ *flagsp, \
+ flagsp[-1], flagsp[1], \
+ ci); \
+ OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
+ v = opj_smr_sign(*l_datap); \
+ *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
+ (OPJ_UINT32)bpno); \
+/* #ifdef DEBUG_ENC_SIG */ \
+/* fprintf(stderr, " ctxt2=%d\n", ctxt2); */ \
+/* #endif */ \
+ opj_t1_setcurctx(curctx, ctxt2); \
+ if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \
+ opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
+ } else { \
+ OPJ_UINT32 spb = opj_t1_getspb(lu); \
+/* #ifdef DEBUG_ENC_SIG */ \
+/* fprintf(stderr, " spb=%d\n", spb); */ \
+/* #endif */ \
+ opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
+ } \
+ opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \
+ } \
+ *flagsp |= T1_PI_THIS << (ci * 3U); \
+ } \
}
static INLINE void opj_t1_dec_sigpass_step_raw(
@@ -464,63 +463,64 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1,
OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
opj_flag_t* f = &T1_FLAGS(0, 0);
OPJ_UINT32 const extra = 2;
+ opj_mqc_t* mqc = &(t1->mqc);
+ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
+ const OPJ_INT32* datap = t1->data;
*nmsedec = 0;
#ifdef DEBUG_ENC_SIG
fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
#endif
- for (k = 0; k < (t1->h & ~3U); k += 4) {
+ for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
+ const OPJ_UINT32 w = t1->w;
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " k=%d\n", k);
#endif
- for (i = 0; i < t1->w; ++i) {
+ for (i = 0; i < w; ++i, ++f, datap += 4) {
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " i=%d\n", i);
#endif
if (*f == 0U) {
/* Nothing to do for any of the 4 data points */
- f++;
continue;
}
- opj_t1_enc_sigpass_step(
- t1,
+ opj_t1_enc_sigpass_step_macro(
+ mqc, curctx, a, c, ct,
f,
- &t1->data[((k + 0) * t1->data_stride) + i],
+ &datap[0],
bpno,
one,
nmsedec,
type,
0, cblksty & J2K_CCP_CBLKSTY_VSC);
- opj_t1_enc_sigpass_step(
- t1,
+ opj_t1_enc_sigpass_step_macro(
+ mqc, curctx, a, c, ct,
f,
- &t1->data[((k + 1) * t1->data_stride) + i],
+ &datap[1],
bpno,
one,
nmsedec,
type,
1, 0);
- opj_t1_enc_sigpass_step(
- t1,
+ opj_t1_enc_sigpass_step_macro(
+ mqc, curctx, a, c, ct,
f,
- &t1->data[((k + 2) * t1->data_stride) + i],
+ &datap[2],
bpno,
one,
nmsedec,
type,
2, 0);
- opj_t1_enc_sigpass_step(
- t1,
+ opj_t1_enc_sigpass_step_macro(
+ mqc, curctx, a, c, ct,
f,
- &t1->data[((k + 3) * t1->data_stride) + i],
+ &datap[3],
bpno,
one,
nmsedec,
type,
3, 0);
- ++f;
}
- f += extra;
}
if (k < t1->h) {
@@ -528,20 +528,20 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1,
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " k=%d\n", k);
#endif
- for (i = 0; i < t1->w; ++i) {
+ for (i = 0; i < t1->w; ++i, ++f) {
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " i=%d\n", i);
#endif
if (*f == 0U) {
/* Nothing to do for any of the 4 data points */
- f++;
+ datap += (t1->h - k);
continue;
}
- for (j = k; j < t1->h; ++j) {
- opj_t1_enc_sigpass_step(
- t1,
+ for (j = k; j < t1->h; ++j, ++datap) {
+ opj_t1_enc_sigpass_step_macro(
+ mqc, curctx, a, c, ct,
f,
- &t1->data[(j * t1->data_stride) + i],
+ &datap[0],
bpno,
one,
nmsedec,
@@ -549,9 +549,10 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1,
j - k,
(j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
}
- ++f;
}
}
+
+ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
}
static void opj_t1_dec_sigpass_raw(
@@ -626,7 +627,7 @@ static void opj_t1_dec_sigpass_raw(
register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
const OPJ_UINT32 l_w = w; \
opj_mqc_t* mqc = &(t1->mqc); \
- DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
register OPJ_UINT32 v; \
one = 1 << bpno; \
half = one >> 1; \
@@ -651,7 +652,7 @@ static void opj_t1_dec_sigpass_raw(
} \
} \
} \
- UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
if( k < h ) { \
for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
for (j = 0; j < h - k; ++j) { \
@@ -715,38 +716,27 @@ static void opj_t1_dec_sigpass_mqc(
/**
Encode refinement pass step
*/
-static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1,
- opj_flag_t *flagsp,
- OPJ_INT32 *datap,
- OPJ_INT32 bpno,
- OPJ_INT32 one,
- OPJ_INT32 *nmsedec,
- OPJ_BYTE type,
- OPJ_UINT32 ci)
-{
- OPJ_UINT32 v;
-
- opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
-
- OPJ_UINT32 const shift_flags =
- (*flagsp >> (ci * 3U));
-
- if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) {
- OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
- *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
- (OPJ_UINT32)bpno);
- v = (opj_int_abs(*datap) & one) ? 1 : 0;
-#ifdef DEBUG_ENC_REF
- fprintf(stderr, " ctxt=%d\n", ctxt);
-#endif
- opj_mqc_setcurctx(mqc, ctxt);
- if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */
- opj_mqc_bypass_enc(mqc, v);
- } else {
- opj_mqc_encode(mqc, v);
- }
- *flagsp |= T1_MU_THIS << (ci * 3U);
- }
+#define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \
+{\
+ OPJ_UINT32 v; \
+ if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \
+ const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \
+ OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \
+ OPJ_UINT32 abs_data = opj_smr_abs(*datap); \
+ *nmsedec += opj_t1_getnmsedec_ref(abs_data, \
+ (OPJ_UINT32)bpno); \
+ v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \
+/* #ifdef DEBUG_ENC_REF */ \
+/* fprintf(stderr, " ctxt=%d\n", ctxt); */ \
+/* #endif */ \
+ opj_t1_setcurctx(curctx, ctxt); \
+ if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \
+ opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
+ } else { \
+ opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
+ } \
+ flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \
+ } \
}
@@ -807,100 +797,104 @@ static void opj_t1_enc_refpass(
const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
opj_flag_t* f = &T1_FLAGS(0, 0);
const OPJ_UINT32 extra = 2U;
+ opj_mqc_t* mqc = &(t1->mqc);
+ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
+ const OPJ_INT32* datap = t1->data;
*nmsedec = 0;
#ifdef DEBUG_ENC_REF
fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
#endif
- for (k = 0; k < (t1->h & ~3U); k += 4) {
+ for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
#ifdef DEBUG_ENC_REF
fprintf(stderr, " k=%d\n", k);
#endif
- for (i = 0; i < t1->w; ++i) {
+ for (i = 0; i < t1->w; ++i, f++, datap += 4) {
+ const OPJ_UINT32 flags = *f;
+ OPJ_UINT32 flagsUpdated = flags;
#ifdef DEBUG_ENC_REF
fprintf(stderr, " i=%d\n", i);
#endif
- if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
+ if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
/* none significant */
- f++;
continue;
}
- if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
+ if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
/* all processed by sigpass */
- f++;
continue;
}
- opj_t1_enc_refpass_step(
- t1,
- f,
- &t1->data[((k + 0) * t1->data_stride) + i],
+ opj_t1_enc_refpass_step_macro(
+ mqc, curctx, a, c, ct,
+ flags, flagsUpdated,
+ &datap[0],
bpno,
one,
nmsedec,
type,
0);
- opj_t1_enc_refpass_step(
- t1,
- f,
- &t1->data[((k + 1) * t1->data_stride) + i],
+ opj_t1_enc_refpass_step_macro(
+ mqc, curctx, a, c, ct,
+ flags, flagsUpdated,
+ &datap[1],
bpno,
one,
nmsedec,
type,
1);
- opj_t1_enc_refpass_step(
- t1,
- f,
- &t1->data[((k + 2) * t1->data_stride) + i],
+ opj_t1_enc_refpass_step_macro(
+ mqc, curctx, a, c, ct,
+ flags, flagsUpdated,
+ &datap[2],
bpno,
one,
nmsedec,
type,
2);
- opj_t1_enc_refpass_step(
- t1,
- f,
- &t1->data[((k + 3) * t1->data_stride) + i],
+ opj_t1_enc_refpass_step_macro(
+ mqc, curctx, a, c, ct,
+ flags, flagsUpdated,
+ &datap[3],
bpno,
one,
nmsedec,
type,
3);
- ++f;
+ *f = flagsUpdated;
}
- f += extra;
}
if (k < t1->h) {
OPJ_UINT32 j;
+ const OPJ_UINT32 remaining_lines = t1->h - k;
#ifdef DEBUG_ENC_REF
fprintf(stderr, " k=%d\n", k);
#endif
- for (i = 0; i < t1->w; ++i) {
+ for (i = 0; i < t1->w; ++i, ++f) {
#ifdef DEBUG_ENC_REF
fprintf(stderr, " i=%d\n", i);
#endif
if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
/* none significant */
- f++;
+ datap += remaining_lines;
continue;
}
- for (j = k; j < t1->h; ++j) {
- opj_t1_enc_refpass_step(
- t1,
- f,
- &t1->data[(j * t1->data_stride) + i],
+ for (j = 0; j < remaining_lines; ++j, datap ++) {
+ opj_t1_enc_refpass_step_macro(
+ mqc, curctx, a, c, ct,
+ *f, *f,
+ &datap[0],
bpno,
one,
nmsedec,
type,
- j - k);
+ j);
}
- ++f;
}
}
+
+ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
}
@@ -968,7 +962,7 @@ static void opj_t1_dec_refpass_raw(
register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
const OPJ_UINT32 l_w = w; \
opj_mqc_t* mqc = &(t1->mqc); \
- DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
register OPJ_UINT32 v; \
one = 1 << bpno; \
poshalf = one >> 1; \
@@ -992,7 +986,7 @@ static void opj_t1_dec_refpass_raw(
} \
} \
} \
- UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
if( k < h ) { \
for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
for (j = 0; j < h - k; ++j) { \
@@ -1030,86 +1024,71 @@ static void opj_t1_dec_refpass_mqc(
/**
Encode clean-up pass step
*/
-static void opj_t1_enc_clnpass_step(
- opj_t1_t *t1,
- opj_flag_t *flagsp,
- OPJ_INT32 *datap,
- OPJ_INT32 bpno,
- OPJ_INT32 one,
- OPJ_INT32 *nmsedec,
- OPJ_UINT32 agg,
- OPJ_UINT32 runlen,
- OPJ_UINT32 lim,
- OPJ_UINT32 cblksty)
-{
- OPJ_UINT32 v;
- OPJ_UINT32 ci;
- opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
-
- const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 |
- T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
-
- if ((*flagsp & check) == check) {
- if (runlen == 0) {
- *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
- } else if (runlen == 1) {
- *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3);
- } else if (runlen == 2) {
- *flagsp &= ~(T1_PI_2 | T1_PI_3);
- } else if (runlen == 3) {
- *flagsp &= ~(T1_PI_3);
- }
- return;
- }
-
- for (ci = runlen; ci < lim; ++ci) {
- OPJ_UINT32 vsc;
- opj_flag_t flags;
- OPJ_UINT32 ctxt1;
-
- flags = *flagsp;
-
- if ((agg != 0) && (ci == runlen)) {
- goto LABEL_PARTIAL;
- }
-
- if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
- ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
-#ifdef DEBUG_ENC_CLN
- printf(" ctxt1=%d\n", ctxt1);
-#endif
- opj_mqc_setcurctx(mqc, ctxt1);
- v = (opj_int_abs(*datap) & one) ? 1 : 0;
- opj_mqc_encode(mqc, v);
- if (v) {
- OPJ_UINT32 ctxt2, spb;
- OPJ_UINT32 lu;
-LABEL_PARTIAL:
- lu = opj_t1_getctxtno_sc_or_spb_index(
- *flagsp,
- flagsp[-1], flagsp[1],
- ci);
- *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
- (OPJ_UINT32)bpno);
- ctxt2 = opj_t1_getctxno_sc(lu);
-#ifdef DEBUG_ENC_CLN
- printf(" ctxt2=%d\n", ctxt2);
-#endif
- opj_mqc_setcurctx(mqc, ctxt2);
-
- v = *datap < 0 ? 1U : 0U;
- spb = opj_t1_getspb(lu);
-#ifdef DEBUG_ENC_CLN
- printf(" spb=%d\n", spb);
-#endif
- opj_mqc_encode(mqc, v ^ spb);
- vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0;
- opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc);
- }
- }
- *flagsp &= ~(T1_PI_THIS << (3U * ci));
- datap += t1->data_stride;
- }
+#define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \
+{ \
+ OPJ_UINT32 v; \
+ OPJ_UINT32 ci; \
+ opj_flag_t* const flagsp = (flagspIn); \
+ const OPJ_INT32* l_datap = (datapIn); \
+ const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \
+ T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
+ \
+ if ((*flagsp & check) == check) { \
+ if (runlen == 0) { \
+ *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
+ } else if (runlen == 1) { \
+ *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \
+ } else if (runlen == 2) { \
+ *flagsp &= ~(T1_PI_2 | T1_PI_3); \
+ } else if (runlen == 3) { \
+ *flagsp &= ~(T1_PI_3); \
+ } \
+ } \
+ else \
+ for (ci = runlen; ci < lim; ++ci) { \
+ OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \
+ if ((agg != 0) && (ci == runlen)) { \
+ goto_PARTIAL = OPJ_TRUE; \
+ } \
+ else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \
+ OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \
+/* #ifdef DEBUG_ENC_CLN */ \
+/* printf(" ctxt1=%d\n", ctxt1); */ \
+/* #endif */ \
+ opj_t1_setcurctx(curctx, ctxt1); \
+ v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
+ opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
+ if (v) { \
+ goto_PARTIAL = OPJ_TRUE; \
+ } \
+ } \
+ if( goto_PARTIAL ) { \
+ OPJ_UINT32 vsc; \
+ OPJ_UINT32 ctxt2, spb; \
+ OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
+ *flagsp, \
+ flagsp[-1], flagsp[1], \
+ ci); \
+ *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
+ (OPJ_UINT32)bpno); \
+ ctxt2 = opj_t1_getctxno_sc(lu); \
+/* #ifdef DEBUG_ENC_CLN */ \
+/* printf(" ctxt2=%d\n", ctxt2); */ \
+/* #endif */ \
+ opj_t1_setcurctx(curctx, ctxt2); \
+ \
+ v = opj_smr_sign(*l_datap); \
+ spb = opj_t1_getspb(lu); \
+/* #ifdef DEBUG_ENC_CLN */ \
+/* printf(" spb=%d\n", spb); */\
+/* #endif */ \
+ opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
+ vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \
+ opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \
+ } \
+ *flagsp &= ~(T1_PI_THIS << (3U * ci)); \
+ l_datap ++; \
+ } \
}
#define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
@@ -1165,47 +1144,50 @@ static void opj_t1_enc_clnpass(
{
OPJ_UINT32 i, k;
const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
- OPJ_UINT32 agg, runlen;
-
- opj_mqc_t *mqc = &(t1->mqc); /* MQC component */
+ opj_mqc_t* mqc = &(t1->mqc);
+ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
+ const OPJ_INT32* datap = t1->data;
+ opj_flag_t *f = &T1_FLAGS(0, 0);
+ const OPJ_UINT32 extra = 2U;
*nmsedec = 0;
#ifdef DEBUG_ENC_CLN
printf("enc_clnpass: bpno=%d\n", bpno);
#endif
- for (k = 0; k < (t1->h & ~3U); k += 4) {
+ for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
#ifdef DEBUG_ENC_CLN
printf(" k=%d\n", k);
#endif
- for (i = 0; i < t1->w; ++i) {
+ for (i = 0; i < t1->w; ++i, f++) {
+ OPJ_UINT32 agg, runlen;
#ifdef DEBUG_ENC_CLN
printf(" i=%d\n", i);
#endif
- agg = !(T1_FLAGS(i, k));
+ agg = !*f;
#ifdef DEBUG_ENC_CLN
printf(" agg=%d\n", agg);
#endif
if (agg) {
- for (runlen = 0; runlen < 4; ++runlen) {
- if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) {
+ for (runlen = 0; runlen < 4; ++runlen, ++datap) {
+ if (opj_smr_abs(*datap) & (OPJ_UINT32)one) {
break;
}
}
- opj_mqc_setcurctx(mqc, T1_CTXNO_AGG);
- opj_mqc_encode(mqc, runlen != 4);
+ opj_t1_setcurctx(curctx, T1_CTXNO_AGG);
+ opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4);
if (runlen == 4) {
continue;
}
- opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
- opj_mqc_encode(mqc, runlen >> 1);
- opj_mqc_encode(mqc, runlen & 1);
+ opj_t1_setcurctx(curctx, T1_CTXNO_UNI);
+ opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1);
+ opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1);
} else {
runlen = 0;
}
- opj_t1_enc_clnpass_step(
- t1,
- &T1_FLAGS(i, k),
- &t1->data[((k + runlen) * t1->data_stride) + i],
+ opj_t1_enc_clnpass_step_macro(
+ mqc, curctx, a, c, ct,
+ f,
+ datap,
bpno,
one,
nmsedec,
@@ -1213,23 +1195,24 @@ static void opj_t1_enc_clnpass(
runlen,
4U,
cblksty);
+ datap += 4 - runlen;
}
}
if (k < t1->h) {
- agg = 0;
- runlen = 0;
+ const OPJ_UINT32 agg = 0;
+ const OPJ_UINT32 runlen = 0;
#ifdef DEBUG_ENC_CLN
printf(" k=%d\n", k);
#endif
- for (i = 0; i < t1->w; ++i) {
+ for (i = 0; i < t1->w; ++i, f++) {
#ifdef DEBUG_ENC_CLN
printf(" i=%d\n", i);
printf(" agg=%d\n", agg);
#endif
- opj_t1_enc_clnpass_step(
- t1,
- &T1_FLAGS(i, k),
- &t1->data[((k + runlen) * t1->data_stride) + i],
+ opj_t1_enc_clnpass_step_macro(
+ mqc, curctx, a, c, ct,
+ f,
+ datap,
bpno,
one,
nmsedec,
@@ -1237,8 +1220,11 @@ static void opj_t1_enc_clnpass(
runlen,
t1->h - k,
cblksty);
+ datap += t1->h - k;
}
}
+
+ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
}
#define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
@@ -1250,7 +1236,7 @@ static void opj_t1_enc_clnpass(
opj_mqc_t* mqc = &(t1->mqc); \
register OPJ_INT32 *data = t1->data; \
register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
- DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
register OPJ_UINT32 v; \
one = 1 << bpno; \
half = one >> 1; \
@@ -1319,7 +1305,7 @@ static void opj_t1_enc_clnpass(
*flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
} \
} \
- UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
if( k < h ) { \
for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
for (j = 0; j < h - k; ++j) { \
@@ -1427,7 +1413,7 @@ static OPJ_FLOAT64 opj_t1_getwmsedec(
w2 = opj_dwt_getnorm(level, orient);
} else { /* if (qmfbid == 0) */
const OPJ_INT32 log2_gain = (orient == 0) ? 0 :
- (orient == 3) ? 2 : 1;
+ (orient == 3) ? 2 : 1;
w2 = opj_dwt_getnorm_real(level, orient);
/* Not sure this is right. But preserves past behaviour */
stepsize /= (1 << log2_gain);
@@ -1454,7 +1440,7 @@ static OPJ_BOOL opj_t1_allocate_buffers(
assert(w * h <= 4096);
/* encoder uses tile buffer, so no need to allocate */
- if (!t1->encoder) {
+ {
OPJ_UINT32 datasize = w * h;
if (datasize > t1->datasize) {
@@ -1564,8 +1550,7 @@ void opj_t1_destroy(opj_t1_t *p_t1)
return;
}
- /* encoder uses tile buffer, so no need to free */
- if (!p_t1->encoder && p_t1->data) {
+ if (p_t1->data) {
opj_aligned_free(p_t1->data);
p_t1->data = 00;
}
@@ -2140,8 +2125,7 @@ static void opj_t1_clbl_encode_processor(void* user_data, opj_tls_t* tls)
OPJ_INT32* OPJ_RESTRICT tiledp;
OPJ_UINT32 cblk_w;
OPJ_UINT32 cblk_h;
- OPJ_UINT32 i, j, tileLineAdvance;
- OPJ_SIZE_T tileIndex = 0;
+ OPJ_UINT32 i, j;
OPJ_INT32 x = cblk->x0 - band->x0;
OPJ_INT32 y = cblk->y0 - band->y0;
@@ -2177,11 +2161,9 @@ static void opj_t1_clbl_encode_processor(void* user_data, opj_tls_t* tls)
cblk_w = t1->w;
cblk_h = t1->h;
- tileLineAdvance = tile_w - cblk_w;
tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
- t1->data = tiledp;
- t1->data_stride = tile_w;
+
if (tccp->qmfbid == 1) {
/* Do multiplication on unsigned type, even if the
* underlying type is signed, to avoid potential
@@ -2192,22 +2174,52 @@ static void opj_t1_clbl_encode_processor(void* user_data, opj_tls_t* tls)
* Fixes https://github.com/uclouvain/openjpeg/issues/1053
*/
OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
- for (j = 0; j < cblk_h; ++j) {
+ OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data;
+ /* Change from "natural" order to "zigzag" order of T1 passes */
+ for (j = 0; j < (cblk_h & ~3U); j += 4) {
for (i = 0; i < cblk_w; ++i) {
- tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS;
- tileIndex++;
+ t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS;
+ t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS;
+ t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS;
+ t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS;
+ t1data += 4;
+ }
+ }
+ if (j < cblk_h) {
+ for (i = 0; i < cblk_w; ++i) {
+ OPJ_UINT32 k;
+ for (k = j; k < cblk_h; k++) {
+ t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS;
+ t1data ++;
+ }
}
- tileIndex += tileLineAdvance;
}
} else { /* if (tccp->qmfbid == 0) */
- for (j = 0; j < cblk_h; ++j) {
+ OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp;
+ OPJ_INT32* OPJ_RESTRICT t1data = t1->data;
+ /* Change from "natural" order to "zigzag" order of T1 passes */
+ for (j = 0; j < (cblk_h & ~3U); j += 4) {
+ for (i = 0; i < cblk_w; ++i) {
+ t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] /
+ band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
+ t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] /
+ band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
+ t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] /
+ band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
+ t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] /
+ band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
+ t1data += 4;
+ }
+ }
+ if (j < cblk_h) {
for (i = 0; i < cblk_w; ++i) {
- OPJ_FLOAT32 tmp = ((OPJ_FLOAT32*)tiledp)[tileIndex];
- tiledp[tileIndex] = (OPJ_INT32)opj_lrintf((tmp / band->stepsize) *
- (1 << T1_NMSEDEC_FRACBITS));
- tileIndex++;
+ OPJ_UINT32 k;
+ for (k = j; k < cblk_h; k++) {
+ t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize)
+ * (1 << T1_NMSEDEC_FRACBITS));
+ t1data ++;
+ }
}
- tileIndex += tileLineAdvance;
}
}
@@ -2363,6 +2375,7 @@ static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
OPJ_UINT32 i, j;
OPJ_BYTE type = T1_TYPE_MQ;
OPJ_FLOAT64 tempwmsedec;
+ OPJ_INT32* datap;
#ifdef EXTRA_DEBUG
printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
@@ -2372,10 +2385,19 @@ static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
max = 0;
- for (i = 0; i < t1->w; ++i) {
- for (j = 0; j < t1->h; ++j) {
- OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]);
- max = opj_int_max(max, tmp);
+ datap = t1->data;
+ for (j = 0; j < t1->h; ++j) {
+ const OPJ_UINT32 w = t1->w;
+ for (i = 0; i < w; ++i, ++datap) {
+ OPJ_INT32 tmp = *datap;
+ if (tmp < 0) {
+ OPJ_UINT32 tmp_unsigned;
+ max = opj_int_max(max, -tmp);
+ tmp_unsigned = opj_to_smr(tmp);
+ memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32));
+ } else {
+ max = opj_int_max(max, tmp);
+ }
}
}
diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h
index bc8a8111..81ad0d00 100644
--- a/src/lib/openjp2/t1.h
+++ b/src/lib/openjp2/t1.h
@@ -198,7 +198,6 @@ typedef struct opj_t1 {
OPJ_UINT32 h;
OPJ_UINT32 datasize;
OPJ_UINT32 flagssize;
- OPJ_UINT32 data_stride;
OPJ_BOOL encoder;
/* Thre 3 variables below are only used by the decoder */