Cleanup code related to quality layer allocation, and add a few safety checks
[openjpeg.git] / src / lib / openjp2 / t1.c
1 /*
2  * The copyright in this software is being made available under the 2-clauses
3  * BSD License, included below. This software may be subject to other third
4  * party and contributor rights, including patent rights, and no such rights
5  * are granted under this license.
6  *
7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8  * Copyright (c) 2002-2014, Professor Benoit Macq
9  * Copyright (c) 2001-2003, David Janssens
10  * Copyright (c) 2002-2003, Yannick Verschueren
11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
12  * Copyright (c) 2003-2014, Antonin Descampe
13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
14  * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
15  * Copyright (c) 2012, Carl Hetherington
16  * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
17  * All rights reserved.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions
21  * are met:
22  * 1. Redistributions of source code must retain the above copyright
23  *    notice, this list of conditions and the following disclaimer.
24  * 2. Redistributions in binary form must reproduce the above copyright
25  *    notice, this list of conditions and the following disclaimer in the
26  *    documentation and/or other materials provided with the distribution.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40
41 #define OPJ_SKIP_POISON
42 #include "opj_includes.h"
43
44 #ifdef __SSE__
45 #include <xmmintrin.h>
46 #endif
47 #ifdef __SSE2__
48 #include <emmintrin.h>
49 #endif
50
51 #if defined(__GNUC__)
52 #pragma GCC poison malloc calloc realloc free
53 #endif
54
55 #include "t1_luts.h"
56
57 /** @defgroup T1 T1 - Implementation of the tier-1 coding */
58 /*@{*/
59
60 #define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
61
62 #define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
63
64 /* Macros to deal with signed integer with just MSB bit set for
65  * negative values (smr = signed magnitude representation) */
66 #define opj_smr_abs(x)  (((OPJ_UINT32)(x)) & 0x7FFFFFFFU)
67 #define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31)
68 #define opj_to_smr(x)   ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U))
69
70
71 /** @name Local static functions */
72 /*@{*/
73
74 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
75 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
76 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
77 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
78 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
79                                        OPJ_UINT32 s, OPJ_UINT32 stride,
80                                        OPJ_UINT32 vsc);
81
82
83 /**
84 Decode significant pass
85 */
86
87 static INLINE void opj_t1_dec_sigpass_step_raw(
88     opj_t1_t *t1,
89     opj_flag_t *flagsp,
90     OPJ_INT32 *datap,
91     OPJ_INT32 oneplushalf,
92     OPJ_UINT32 vsc,
93     OPJ_UINT32 row);
94 static INLINE void opj_t1_dec_sigpass_step_mqc(
95     opj_t1_t *t1,
96     opj_flag_t *flagsp,
97     OPJ_INT32 *datap,
98     OPJ_INT32 oneplushalf,
99     OPJ_UINT32 row,
100     OPJ_UINT32 flags_stride,
101     OPJ_UINT32 vsc);
102
103 /**
104 Encode significant pass
105 */
106 static void opj_t1_enc_sigpass(opj_t1_t *t1,
107                                OPJ_INT32 bpno,
108                                OPJ_INT32 *nmsedec,
109                                OPJ_BYTE type,
110                                OPJ_UINT32 cblksty);
111
112 /**
113 Decode significant pass
114 */
115 static void opj_t1_dec_sigpass_raw(
116     opj_t1_t *t1,
117     OPJ_INT32 bpno,
118     OPJ_INT32 cblksty);
119
120 /**
121 Encode refinement pass
122 */
123 static void opj_t1_enc_refpass(opj_t1_t *t1,
124                                OPJ_INT32 bpno,
125                                OPJ_INT32 *nmsedec,
126                                OPJ_BYTE type);
127
128 /**
129 Decode refinement pass
130 */
131 static void opj_t1_dec_refpass_raw(
132     opj_t1_t *t1,
133     OPJ_INT32 bpno);
134
135
136 /**
137 Decode refinement pass
138 */
139
140 static INLINE void  opj_t1_dec_refpass_step_raw(
141     opj_t1_t *t1,
142     opj_flag_t *flagsp,
143     OPJ_INT32 *datap,
144     OPJ_INT32 poshalf,
145     OPJ_UINT32 row);
146 static INLINE void opj_t1_dec_refpass_step_mqc(
147     opj_t1_t *t1,
148     opj_flag_t *flagsp,
149     OPJ_INT32 *datap,
150     OPJ_INT32 poshalf,
151     OPJ_UINT32 row);
152
153
154 /**
155 Decode clean-up pass
156 */
157
158 static void opj_t1_dec_clnpass_step(
159     opj_t1_t *t1,
160     opj_flag_t *flagsp,
161     OPJ_INT32 *datap,
162     OPJ_INT32 oneplushalf,
163     OPJ_UINT32 row,
164     OPJ_UINT32 vsc);
165
166 /**
167 Encode clean-up pass
168 */
169 static void opj_t1_enc_clnpass(
170     opj_t1_t *t1,
171     OPJ_INT32 bpno,
172     OPJ_INT32 *nmsedec,
173     OPJ_UINT32 cblksty);
174
175 static OPJ_FLOAT64 opj_t1_getwmsedec(
176     OPJ_INT32 nmsedec,
177     OPJ_UINT32 compno,
178     OPJ_UINT32 level,
179     OPJ_UINT32 orient,
180     OPJ_INT32 bpno,
181     OPJ_UINT32 qmfbid,
182     OPJ_FLOAT64 stepsize,
183     OPJ_UINT32 numcomps,
184     const OPJ_FLOAT64 * mct_norms,
185     OPJ_UINT32 mct_numcomps);
186
187 /** Return "cumwmsedec" that should be used to increase tile->distotile */
188 static double opj_t1_encode_cblk(opj_t1_t *t1,
189                                  opj_tcd_cblk_enc_t* cblk,
190                                  OPJ_UINT32 orient,
191                                  OPJ_UINT32 compno,
192                                  OPJ_UINT32 level,
193                                  OPJ_UINT32 qmfbid,
194                                  OPJ_FLOAT64 stepsize,
195                                  OPJ_UINT32 cblksty,
196                                  OPJ_UINT32 numcomps,
197                                  const OPJ_FLOAT64 * mct_norms,
198                                  OPJ_UINT32 mct_numcomps);
199
200 /**
201 Decode 1 code-block
202 @param t1 T1 handle
203 @param cblk Code-block coding parameters
204 @param orient
205 @param roishift Region of interest shifting value
206 @param cblksty Code-block style
207 @param p_manager the event manager
208 @param p_manager_mutex mutex for the event manager
209 @param check_pterm whether PTERM correct termination should be checked
210 */
211 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
212                                    opj_tcd_cblk_dec_t* cblk,
213                                    OPJ_UINT32 orient,
214                                    OPJ_UINT32 roishift,
215                                    OPJ_UINT32 cblksty,
216                                    opj_event_mgr_t *p_manager,
217                                    opj_mutex_t* p_manager_mutex,
218                                    OPJ_BOOL check_pterm);
219
220 /**
221 Decode 1 HT code-block
222 @param t1 T1 handle
223 @param cblk Code-block coding parameters
224 @param orient
225 @param roishift Region of interest shifting value
226 @param cblksty Code-block style
227 @param p_manager the event manager
228 @param p_manager_mutex mutex for the event manager
229 @param check_pterm whether PTERM correct termination should be checked
230 */
231 OPJ_BOOL opj_t1_ht_decode_cblk(opj_t1_t *t1,
232                                opj_tcd_cblk_dec_t* cblk,
233                                OPJ_UINT32 orient,
234                                OPJ_UINT32 roishift,
235                                OPJ_UINT32 cblksty,
236                                opj_event_mgr_t *p_manager,
237                                opj_mutex_t* p_manager_mutex,
238                                OPJ_BOOL check_pterm);
239
240
241 static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
242                                         OPJ_UINT32 w,
243                                         OPJ_UINT32 h);
244
245 /*@}*/
246
247 /*@}*/
248
249 /* ----------------------------------------------------------------------- */
250
251 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
252 {
253     return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
254 }
255
256 static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
257         OPJ_UINT32 pfX,
258         OPJ_UINT32 nfX,
259         OPJ_UINT32 ci)
260 {
261     /*
262       0 pfX T1_CHI_THIS           T1_LUT_SGN_W
263       1 tfX T1_SIGMA_1            T1_LUT_SIG_N
264       2 nfX T1_CHI_THIS           T1_LUT_SGN_E
265       3 tfX T1_SIGMA_3            T1_LUT_SIG_W
266       4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
267       5 tfX T1_SIGMA_5            T1_LUT_SIG_E
268       6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
269       7 tfX T1_SIGMA_7            T1_LUT_SIG_S
270     */
271
272     OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
273                                          T1_SIGMA_7);
274
275     lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
276     lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
277     if (ci == 0U) {
278         lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
279     } else {
280         lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
281     }
282     lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
283     return lu;
284 }
285
286 static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
287 {
288     return lut_ctxno_sc[lu];
289 }
290
291 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
292 {
293     OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
294     OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
295     return tmp2;
296 }
297
298 static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
299 {
300     return lut_spb[lu];
301 }
302
303 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
304 {
305     if (bitpos > 0) {
306         return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
307     }
308
309     return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
310 }
311
312 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
313 {
314     if (bitpos > 0) {
315         return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
316     }
317
318     return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
319 }
320
321 #define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
322 { \
323     /* east */ \
324     flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
325  \
326     /* mark target as significant */ \
327     flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
328  \
329     /* west */ \
330     flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
331  \
332     /* north-west, north, north-east */ \
333     if (ci == 0U && !(vsc)) { \
334         opj_flag_t* north = flagsp - (stride); \
335         *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
336         north[-1] |= T1_SIGMA_17; \
337         north[1] |= T1_SIGMA_15; \
338     } \
339  \
340     /* south-west, south, south-east */ \
341     if (ci == 3U) { \
342         opj_flag_t* south = flagsp + (stride); \
343         *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
344         south[-1] |= T1_SIGMA_2; \
345         south[1] |= T1_SIGMA_0; \
346     } \
347 }
348
349
350 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
351                                        OPJ_UINT32 s, OPJ_UINT32 stride,
352                                        OPJ_UINT32 vsc)
353 {
354     opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
355 }
356
357 /**
358 Encode significant pass
359 */
360 #define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \
361 { \
362     OPJ_UINT32 v; \
363     const OPJ_UINT32 ci = (ciIn); \
364     const OPJ_UINT32 vsc = (vscIn); \
365     const OPJ_INT32* l_datap = (datapIn); \
366     opj_flag_t* flagsp = (flagspIn); \
367     OPJ_UINT32 const flags = *flagsp; \
368     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
369             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
370         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
371         v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
372 /* #ifdef DEBUG_ENC_SIG */ \
373 /*        fprintf(stderr, "   ctxt1=%d\n", ctxt1); */ \
374 /* #endif */ \
375         opj_t1_setcurctx(curctx, ctxt1); \
376         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
377             opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
378         } else { \
379             opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
380         } \
381         if (v) { \
382             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
383                                 *flagsp, \
384                                 flagsp[-1], flagsp[1], \
385                                 ci); \
386             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
387             v = opj_smr_sign(*l_datap); \
388             *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
389                                               (OPJ_UINT32)bpno); \
390 /* #ifdef DEBUG_ENC_SIG */ \
391 /*            fprintf(stderr, "   ctxt2=%d\n", ctxt2); */ \
392 /* #endif */ \
393             opj_t1_setcurctx(curctx, ctxt2); \
394             if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
395                 opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
396             } else { \
397                 OPJ_UINT32 spb = opj_t1_getspb(lu); \
398 /* #ifdef DEBUG_ENC_SIG */ \
399 /*                fprintf(stderr, "   spb=%d\n", spb); */ \
400 /* #endif */ \
401                 opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
402             } \
403             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \
404         } \
405         *flagsp |= T1_PI_THIS << (ci * 3U); \
406     } \
407 }
408
409 static INLINE void opj_t1_dec_sigpass_step_raw(
410     opj_t1_t *t1,
411     opj_flag_t *flagsp,
412     OPJ_INT32 *datap,
413     OPJ_INT32 oneplushalf,
414     OPJ_UINT32 vsc,
415     OPJ_UINT32 ci)
416 {
417     OPJ_UINT32 v;
418     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
419
420     OPJ_UINT32 const flags = *flagsp;
421
422     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
423             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
424         if (opj_mqc_raw_decode(mqc)) {
425             v = opj_mqc_raw_decode(mqc);
426             *datap = v ? -oneplushalf : oneplushalf;
427             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
428         }
429         *flagsp |= T1_PI_THIS << (ci * 3U);
430     }
431 }
432
433 #define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
434                                           data_stride, ci, mqc, curctx, \
435                                           v, a, c, ct, oneplushalf, vsc) \
436 { \
437     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
438         (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
439         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
440         opj_t1_setcurctx(curctx, ctxt1); \
441         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
442         if (v) { \
443             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
444                                 flags, \
445                                 flagsp[-1], flagsp[1], \
446                                 ci); \
447             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
448             OPJ_UINT32 spb = opj_t1_getspb(lu); \
449             opj_t1_setcurctx(curctx, ctxt2); \
450             opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
451             v = v ^ spb; \
452             data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
453             opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
454         } \
455         flags |= T1_PI_THIS << (ci * 3U); \
456     } \
457 }
458
459 static INLINE void opj_t1_dec_sigpass_step_mqc(
460     opj_t1_t *t1,
461     opj_flag_t *flagsp,
462     OPJ_INT32 *datap,
463     OPJ_INT32 oneplushalf,
464     OPJ_UINT32 ci,
465     OPJ_UINT32 flags_stride,
466     OPJ_UINT32 vsc)
467 {
468     OPJ_UINT32 v;
469
470     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
471     opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
472                                       0, ci, mqc, mqc->curctx,
473                                       v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
474 }
475
476 static void opj_t1_enc_sigpass(opj_t1_t *t1,
477                                OPJ_INT32 bpno,
478                                OPJ_INT32 *nmsedec,
479                                OPJ_BYTE type,
480                                OPJ_UINT32 cblksty
481                               )
482 {
483     OPJ_UINT32 i, k;
484     OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
485     opj_flag_t* f = &T1_FLAGS(0, 0);
486     OPJ_UINT32 const extra = 2;
487     opj_mqc_t* mqc = &(t1->mqc);
488     DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
489     const OPJ_INT32* datap = t1->data;
490
491     *nmsedec = 0;
492 #ifdef DEBUG_ENC_SIG
493     fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
494 #endif
495     for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
496         const OPJ_UINT32 w = t1->w;
497 #ifdef DEBUG_ENC_SIG
498         fprintf(stderr, " k=%d\n", k);
499 #endif
500         for (i = 0; i < w; ++i, ++f, datap += 4) {
501 #ifdef DEBUG_ENC_SIG
502             fprintf(stderr, " i=%d\n", i);
503 #endif
504             if (*f == 0U) {
505                 /* Nothing to do for any of the 4 data points */
506                 continue;
507             }
508             opj_t1_enc_sigpass_step_macro(
509                 mqc, curctx, a, c, ct,
510                 f,
511                 &datap[0],
512                 bpno,
513                 one,
514                 nmsedec,
515                 type,
516                 0, cblksty & J2K_CCP_CBLKSTY_VSC);
517             opj_t1_enc_sigpass_step_macro(
518                 mqc, curctx, a, c, ct,
519                 f,
520                 &datap[1],
521                 bpno,
522                 one,
523                 nmsedec,
524                 type,
525                 1, 0);
526             opj_t1_enc_sigpass_step_macro(
527                 mqc, curctx, a, c, ct,
528                 f,
529                 &datap[2],
530                 bpno,
531                 one,
532                 nmsedec,
533                 type,
534                 2, 0);
535             opj_t1_enc_sigpass_step_macro(
536                 mqc, curctx, a, c, ct,
537                 f,
538                 &datap[3],
539                 bpno,
540                 one,
541                 nmsedec,
542                 type,
543                 3, 0);
544         }
545     }
546
547     if (k < t1->h) {
548         OPJ_UINT32 j;
549 #ifdef DEBUG_ENC_SIG
550         fprintf(stderr, " k=%d\n", k);
551 #endif
552         for (i = 0; i < t1->w; ++i, ++f) {
553 #ifdef DEBUG_ENC_SIG
554             fprintf(stderr, " i=%d\n", i);
555 #endif
556             if (*f == 0U) {
557                 /* Nothing to do for any of the 4 data points */
558                 datap += (t1->h - k);
559                 continue;
560             }
561             for (j = k; j < t1->h; ++j, ++datap) {
562                 opj_t1_enc_sigpass_step_macro(
563                     mqc, curctx, a, c, ct,
564                     f,
565                     &datap[0],
566                     bpno,
567                     one,
568                     nmsedec,
569                     type,
570                     j - k,
571                     (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
572             }
573         }
574     }
575
576     UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
577 }
578
579 static void opj_t1_dec_sigpass_raw(
580     opj_t1_t *t1,
581     OPJ_INT32 bpno,
582     OPJ_INT32 cblksty)
583 {
584     OPJ_INT32 one, half, oneplushalf;
585     OPJ_UINT32 i, j, k;
586     OPJ_INT32 *data = t1->data;
587     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
588     const OPJ_UINT32 l_w = t1->w;
589     one = 1 << bpno;
590     half = one >> 1;
591     oneplushalf = one | half;
592
593     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
594         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
595             opj_flag_t flags = *flagsp;
596             if (flags != 0) {
597                 opj_t1_dec_sigpass_step_raw(
598                     t1,
599                     flagsp,
600                     data,
601                     oneplushalf,
602                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
603                     0U);
604                 opj_t1_dec_sigpass_step_raw(
605                     t1,
606                     flagsp,
607                     data + l_w,
608                     oneplushalf,
609                     OPJ_FALSE, /* vsc */
610                     1U);
611                 opj_t1_dec_sigpass_step_raw(
612                     t1,
613                     flagsp,
614                     data + 2 * l_w,
615                     oneplushalf,
616                     OPJ_FALSE, /* vsc */
617                     2U);
618                 opj_t1_dec_sigpass_step_raw(
619                     t1,
620                     flagsp,
621                     data + 3 * l_w,
622                     oneplushalf,
623                     OPJ_FALSE, /* vsc */
624                     3U);
625             }
626         }
627     }
628     if (k < t1->h) {
629         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
630             for (j = 0; j < t1->h - k; ++j) {
631                 opj_t1_dec_sigpass_step_raw(
632                     t1,
633                     flagsp,
634                     data + j * l_w,
635                     oneplushalf,
636                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
637                     j);
638             }
639         }
640     }
641 }
642
643 #define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
644 { \
645         OPJ_INT32 one, half, oneplushalf; \
646         OPJ_UINT32 i, j, k; \
647         register OPJ_INT32 *data = t1->data; \
648         register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
649         const OPJ_UINT32 l_w = w; \
650         opj_mqc_t* mqc = &(t1->mqc); \
651         DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
652         register OPJ_UINT32 v; \
653         one = 1 << bpno; \
654         half = one >> 1; \
655         oneplushalf = one | half; \
656         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
657                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
658                         opj_flag_t flags = *flagsp; \
659                         if( flags != 0 ) { \
660                             opj_t1_dec_sigpass_step_mqc_macro( \
661                                 flags, flagsp, flags_stride, data, \
662                                 l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
663                             opj_t1_dec_sigpass_step_mqc_macro( \
664                                 flags, flagsp, flags_stride, data, \
665                                 l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
666                             opj_t1_dec_sigpass_step_mqc_macro( \
667                                 flags, flagsp, flags_stride, data, \
668                                 l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
669                             opj_t1_dec_sigpass_step_mqc_macro( \
670                                 flags, flagsp, flags_stride, data, \
671                                 l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
672                             *flagsp = flags; \
673                         } \
674                 } \
675         } \
676         UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
677         if( k < h ) { \
678             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
679                 for (j = 0; j < h - k; ++j) { \
680                         opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
681                             data + j * l_w, oneplushalf, j, flags_stride, vsc); \
682                 } \
683             } \
684         } \
685 }
686
687 static void opj_t1_dec_sigpass_mqc_64x64_novsc(
688     opj_t1_t *t1,
689     OPJ_INT32 bpno)
690 {
691     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
692 }
693
694 static void opj_t1_dec_sigpass_mqc_64x64_vsc(
695     opj_t1_t *t1,
696     OPJ_INT32 bpno)
697 {
698     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
699 }
700
701 static void opj_t1_dec_sigpass_mqc_generic_novsc(
702     opj_t1_t *t1,
703     OPJ_INT32 bpno)
704 {
705     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
706                                     t1->w + 2U);
707 }
708
709 static void opj_t1_dec_sigpass_mqc_generic_vsc(
710     opj_t1_t *t1,
711     OPJ_INT32 bpno)
712 {
713     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
714                                     t1->w + 2U);
715 }
716
717 static void opj_t1_dec_sigpass_mqc(
718     opj_t1_t *t1,
719     OPJ_INT32 bpno,
720     OPJ_INT32 cblksty)
721 {
722     if (t1->w == 64 && t1->h == 64) {
723         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
724             opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
725         } else {
726             opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
727         }
728     } else {
729         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
730             opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
731         } else {
732             opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
733         }
734     }
735 }
736
737 /**
738 Encode refinement pass step
739 */
740 #define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \
741 {\
742     OPJ_UINT32 v; \
743     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \
744         const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \
745         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \
746         OPJ_UINT32 abs_data = opj_smr_abs(*datap); \
747         *nmsedec += opj_t1_getnmsedec_ref(abs_data, \
748                                           (OPJ_UINT32)bpno); \
749         v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \
750 /* #ifdef DEBUG_ENC_REF */ \
751 /*        fprintf(stderr, "  ctxt=%d\n", ctxt); */ \
752 /* #endif */ \
753         opj_t1_setcurctx(curctx, ctxt); \
754         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
755             opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
756         } else { \
757             opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
758         } \
759         flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \
760     } \
761 }
762
763
764 static INLINE void opj_t1_dec_refpass_step_raw(
765     opj_t1_t *t1,
766     opj_flag_t *flagsp,
767     OPJ_INT32 *datap,
768     OPJ_INT32 poshalf,
769     OPJ_UINT32 ci)
770 {
771     OPJ_UINT32 v;
772
773     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
774
775     if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
776             (T1_SIGMA_THIS << (ci * 3U))) {
777         v = opj_mqc_raw_decode(mqc);
778         *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
779         *flagsp |= T1_MU_THIS << (ci * 3U);
780     }
781 }
782
783 #define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
784                                           mqc, curctx, v, a, c, ct, poshalf) \
785 { \
786     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
787             (T1_SIGMA_THIS << (ci * 3U))) { \
788         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
789         opj_t1_setcurctx(curctx, ctxt); \
790         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
791         data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
792         flags |= T1_MU_THIS << (ci * 3U); \
793     } \
794 }
795
796 static INLINE void opj_t1_dec_refpass_step_mqc(
797     opj_t1_t *t1,
798     opj_flag_t *flagsp,
799     OPJ_INT32 *datap,
800     OPJ_INT32 poshalf,
801     OPJ_UINT32 ci)
802 {
803     OPJ_UINT32 v;
804
805     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
806     opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
807                                       mqc, mqc->curctx, v, mqc->a, mqc->c,
808                                       mqc->ct, poshalf);
809 }
810
811 static void opj_t1_enc_refpass(
812     opj_t1_t *t1,
813     OPJ_INT32 bpno,
814     OPJ_INT32 *nmsedec,
815     OPJ_BYTE type)
816 {
817     OPJ_UINT32 i, k;
818     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
819     opj_flag_t* f = &T1_FLAGS(0, 0);
820     const OPJ_UINT32 extra = 2U;
821     opj_mqc_t* mqc = &(t1->mqc);
822     DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
823     const OPJ_INT32* datap = t1->data;
824
825     *nmsedec = 0;
826 #ifdef DEBUG_ENC_REF
827     fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
828 #endif
829     for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
830 #ifdef DEBUG_ENC_REF
831         fprintf(stderr, " k=%d\n", k);
832 #endif
833         for (i = 0; i < t1->w; ++i, f++, datap += 4) {
834             const OPJ_UINT32 flags = *f;
835             OPJ_UINT32 flagsUpdated = flags;
836 #ifdef DEBUG_ENC_REF
837             fprintf(stderr, " i=%d\n", i);
838 #endif
839             if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
840                 /* none significant */
841                 continue;
842             }
843             if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
844                     (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
845                 /* all processed by sigpass */
846                 continue;
847             }
848
849             opj_t1_enc_refpass_step_macro(
850                 mqc, curctx, a, c, ct,
851                 flags, flagsUpdated,
852                 &datap[0],
853                 bpno,
854                 one,
855                 nmsedec,
856                 type,
857                 0);
858             opj_t1_enc_refpass_step_macro(
859                 mqc, curctx, a, c, ct,
860                 flags, flagsUpdated,
861                 &datap[1],
862                 bpno,
863                 one,
864                 nmsedec,
865                 type,
866                 1);
867             opj_t1_enc_refpass_step_macro(
868                 mqc, curctx, a, c, ct,
869                 flags, flagsUpdated,
870                 &datap[2],
871                 bpno,
872                 one,
873                 nmsedec,
874                 type,
875                 2);
876             opj_t1_enc_refpass_step_macro(
877                 mqc, curctx, a, c, ct,
878                 flags, flagsUpdated,
879                 &datap[3],
880                 bpno,
881                 one,
882                 nmsedec,
883                 type,
884                 3);
885             *f = flagsUpdated;
886         }
887     }
888
889     if (k < t1->h) {
890         OPJ_UINT32 j;
891         const OPJ_UINT32 remaining_lines = t1->h - k;
892 #ifdef DEBUG_ENC_REF
893         fprintf(stderr, " k=%d\n", k);
894 #endif
895         for (i = 0; i < t1->w; ++i, ++f) {
896 #ifdef DEBUG_ENC_REF
897             fprintf(stderr, " i=%d\n", i);
898 #endif
899             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
900                 /* none significant */
901                 datap += remaining_lines;
902                 continue;
903             }
904             for (j = 0; j < remaining_lines; ++j, datap ++) {
905                 opj_t1_enc_refpass_step_macro(
906                     mqc, curctx, a, c, ct,
907                     *f, *f,
908                     &datap[0],
909                     bpno,
910                     one,
911                     nmsedec,
912                     type,
913                     j);
914             }
915         }
916     }
917
918     UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
919 }
920
921
922 static void opj_t1_dec_refpass_raw(
923     opj_t1_t *t1,
924     OPJ_INT32 bpno)
925 {
926     OPJ_INT32 one, poshalf;
927     OPJ_UINT32 i, j, k;
928     OPJ_INT32 *data = t1->data;
929     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
930     const OPJ_UINT32 l_w = t1->w;
931     one = 1 << bpno;
932     poshalf = one >> 1;
933     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
934         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
935             opj_flag_t flags = *flagsp;
936             if (flags != 0) {
937                 opj_t1_dec_refpass_step_raw(
938                     t1,
939                     flagsp,
940                     data,
941                     poshalf,
942                     0U);
943                 opj_t1_dec_refpass_step_raw(
944                     t1,
945                     flagsp,
946                     data + l_w,
947                     poshalf,
948                     1U);
949                 opj_t1_dec_refpass_step_raw(
950                     t1,
951                     flagsp,
952                     data + 2 * l_w,
953                     poshalf,
954                     2U);
955                 opj_t1_dec_refpass_step_raw(
956                     t1,
957                     flagsp,
958                     data + 3 * l_w,
959                     poshalf,
960                     3U);
961             }
962         }
963     }
964     if (k < t1->h) {
965         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
966             for (j = 0; j < t1->h - k; ++j) {
967                 opj_t1_dec_refpass_step_raw(
968                     t1,
969                     flagsp,
970                     data + j * l_w,
971                     poshalf,
972                     j);
973             }
974         }
975     }
976 }
977
978 #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
979 { \
980         OPJ_INT32 one, poshalf; \
981         OPJ_UINT32 i, j, k; \
982         register OPJ_INT32 *data = t1->data; \
983         register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
984         const OPJ_UINT32 l_w = w; \
985         opj_mqc_t* mqc = &(t1->mqc); \
986         DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
987         register OPJ_UINT32 v; \
988         one = 1 << bpno; \
989         poshalf = one >> 1; \
990         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
991                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
992                         opj_flag_t flags = *flagsp; \
993                         if( flags != 0 ) { \
994                             opj_t1_dec_refpass_step_mqc_macro( \
995                                 flags, data, l_w, 0, \
996                                 mqc, curctx, v, a, c, ct, poshalf); \
997                             opj_t1_dec_refpass_step_mqc_macro( \
998                                 flags, data, l_w, 1, \
999                                 mqc, curctx, v, a, c, ct, poshalf); \
1000                             opj_t1_dec_refpass_step_mqc_macro( \
1001                                 flags, data, l_w, 2, \
1002                                 mqc, curctx, v, a, c, ct, poshalf); \
1003                             opj_t1_dec_refpass_step_mqc_macro( \
1004                                 flags, data, l_w, 3, \
1005                                 mqc, curctx, v, a, c, ct, poshalf); \
1006                             *flagsp = flags; \
1007                         } \
1008                 } \
1009         } \
1010         UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1011         if( k < h ) { \
1012             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1013                 for (j = 0; j < h - k; ++j) { \
1014                         opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
1015                 } \
1016             } \
1017         } \
1018 }
1019
1020 static void opj_t1_dec_refpass_mqc_64x64(
1021     opj_t1_t *t1,
1022     OPJ_INT32 bpno)
1023 {
1024     opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
1025 }
1026
1027 static void opj_t1_dec_refpass_mqc_generic(
1028     opj_t1_t *t1,
1029     OPJ_INT32 bpno)
1030 {
1031     opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
1032 }
1033
1034 static void opj_t1_dec_refpass_mqc(
1035     opj_t1_t *t1,
1036     OPJ_INT32 bpno)
1037 {
1038     if (t1->w == 64 && t1->h == 64) {
1039         opj_t1_dec_refpass_mqc_64x64(t1, bpno);
1040     } else {
1041         opj_t1_dec_refpass_mqc_generic(t1, bpno);
1042     }
1043 }
1044
1045 /**
1046 Encode clean-up pass step
1047 */
1048 #define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \
1049 { \
1050     OPJ_UINT32 v; \
1051     OPJ_UINT32 ci; \
1052     opj_flag_t* const flagsp = (flagspIn); \
1053     const OPJ_INT32* l_datap = (datapIn); \
1054     const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \
1055                               T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1056  \
1057     if ((*flagsp & check) == check) { \
1058         if (runlen == 0) { \
1059             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1060         } else if (runlen == 1) { \
1061             *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \
1062         } else if (runlen == 2) { \
1063             *flagsp &= ~(T1_PI_2 | T1_PI_3); \
1064         } else if (runlen == 3) { \
1065             *flagsp &= ~(T1_PI_3); \
1066         } \
1067     } \
1068     else \
1069     for (ci = runlen; ci < lim; ++ci) { \
1070         OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \
1071         if ((agg != 0) && (ci == runlen)) { \
1072             goto_PARTIAL = OPJ_TRUE; \
1073         } \
1074         else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \
1075             OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \
1076 /* #ifdef DEBUG_ENC_CLN */ \
1077 /*            printf("   ctxt1=%d\n", ctxt1); */ \
1078 /* #endif */ \
1079             opj_t1_setcurctx(curctx, ctxt1); \
1080             v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
1081             opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
1082             if (v) { \
1083                 goto_PARTIAL = OPJ_TRUE; \
1084             } \
1085         } \
1086         if( goto_PARTIAL ) { \
1087             OPJ_UINT32 vsc; \
1088             OPJ_UINT32 ctxt2, spb; \
1089             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1090                         *flagsp, \
1091                         flagsp[-1], flagsp[1], \
1092                         ci); \
1093             *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
1094                                                 (OPJ_UINT32)bpno); \
1095             ctxt2 = opj_t1_getctxno_sc(lu); \
1096 /* #ifdef DEBUG_ENC_CLN */ \
1097 /*           printf("   ctxt2=%d\n", ctxt2); */ \
1098 /* #endif */ \
1099             opj_t1_setcurctx(curctx, ctxt2); \
1100  \
1101             v = opj_smr_sign(*l_datap); \
1102             spb = opj_t1_getspb(lu); \
1103 /* #ifdef DEBUG_ENC_CLN */ \
1104 /*           printf("   spb=%d\n", spb); */\
1105 /* #endif */ \
1106             opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
1107             vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \
1108             opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \
1109         } \
1110         *flagsp &= ~(T1_PI_THIS << (3U * ci)); \
1111         l_datap ++; \
1112     } \
1113 }
1114
1115 #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
1116                                       flags, flagsp, flags_stride, data, \
1117                                       data_stride, ci, mqc, curctx, \
1118                                       v, a, c, ct, oneplushalf, vsc) \
1119 { \
1120     if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
1121         do { \
1122             if( !partial ) { \
1123                 OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
1124                 opj_t1_setcurctx(curctx, ctxt1); \
1125                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1126                 if( !v ) \
1127                     break; \
1128             } \
1129             { \
1130                 OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1131                                     flags, flagsp[-1], flagsp[1], \
1132                                     ci); \
1133                 opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
1134                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1135                 v = v ^ opj_t1_getspb(lu); \
1136                 data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
1137                 opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
1138             } \
1139         } while(0); \
1140     } \
1141 }
1142
1143 static void opj_t1_dec_clnpass_step(
1144     opj_t1_t *t1,
1145     opj_flag_t *flagsp,
1146     OPJ_INT32 *datap,
1147     OPJ_INT32 oneplushalf,
1148     OPJ_UINT32 ci,
1149     OPJ_UINT32 vsc)
1150 {
1151     OPJ_UINT32 v;
1152
1153     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1154     opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
1155                                   *flagsp, flagsp, t1->w + 2U, datap,
1156                                   0, ci, mqc, mqc->curctx,
1157                                   v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
1158 }
1159
1160 static void opj_t1_enc_clnpass(
1161     opj_t1_t *t1,
1162     OPJ_INT32 bpno,
1163     OPJ_INT32 *nmsedec,
1164     OPJ_UINT32 cblksty)
1165 {
1166     OPJ_UINT32 i, k;
1167     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
1168     opj_mqc_t* mqc = &(t1->mqc);
1169     DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1170     const OPJ_INT32* datap = t1->data;
1171     opj_flag_t *f = &T1_FLAGS(0, 0);
1172     const OPJ_UINT32 extra = 2U;
1173
1174     *nmsedec = 0;
1175 #ifdef DEBUG_ENC_CLN
1176     printf("enc_clnpass: bpno=%d\n", bpno);
1177 #endif
1178     for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
1179 #ifdef DEBUG_ENC_CLN
1180         printf(" k=%d\n", k);
1181 #endif
1182         for (i = 0; i < t1->w; ++i, f++) {
1183             OPJ_UINT32 agg, runlen;
1184 #ifdef DEBUG_ENC_CLN
1185             printf("  i=%d\n", i);
1186 #endif
1187             agg = !*f;
1188 #ifdef DEBUG_ENC_CLN
1189             printf("   agg=%d\n", agg);
1190 #endif
1191             if (agg) {
1192                 for (runlen = 0; runlen < 4; ++runlen, ++datap) {
1193                     if (opj_smr_abs(*datap) & (OPJ_UINT32)one) {
1194                         break;
1195                     }
1196                 }
1197                 opj_t1_setcurctx(curctx, T1_CTXNO_AGG);
1198                 opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4);
1199                 if (runlen == 4) {
1200                     continue;
1201                 }
1202                 opj_t1_setcurctx(curctx, T1_CTXNO_UNI);
1203                 opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1);
1204                 opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1);
1205             } else {
1206                 runlen = 0;
1207             }
1208             opj_t1_enc_clnpass_step_macro(
1209                 mqc, curctx, a, c, ct,
1210                 f,
1211                 datap,
1212                 bpno,
1213                 one,
1214                 nmsedec,
1215                 agg,
1216                 runlen,
1217                 4U,
1218                 cblksty);
1219             datap += 4 - runlen;
1220         }
1221     }
1222     if (k < t1->h) {
1223         const OPJ_UINT32 agg = 0;
1224         const OPJ_UINT32 runlen = 0;
1225 #ifdef DEBUG_ENC_CLN
1226         printf(" k=%d\n", k);
1227 #endif
1228         for (i = 0; i < t1->w; ++i, f++) {
1229 #ifdef DEBUG_ENC_CLN
1230             printf("  i=%d\n", i);
1231             printf("   agg=%d\n", agg);
1232 #endif
1233             opj_t1_enc_clnpass_step_macro(
1234                 mqc, curctx, a, c, ct,
1235                 f,
1236                 datap,
1237                 bpno,
1238                 one,
1239                 nmsedec,
1240                 agg,
1241                 runlen,
1242                 t1->h - k,
1243                 cblksty);
1244             datap += t1->h - k;
1245         }
1246     }
1247
1248     UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1249 }
1250
1251 #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
1252 { \
1253     OPJ_INT32 one, half, oneplushalf; \
1254     OPJ_UINT32 runlen; \
1255     OPJ_UINT32 i, j, k; \
1256     const OPJ_UINT32 l_w = w; \
1257     opj_mqc_t* mqc = &(t1->mqc); \
1258     register OPJ_INT32 *data = t1->data; \
1259     register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
1260     DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1261     register OPJ_UINT32 v; \
1262     one = 1 << bpno; \
1263     half = one >> 1; \
1264     oneplushalf = one | half; \
1265     for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
1266         for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1267             opj_flag_t flags = *flagsp; \
1268             if (flags == 0) { \
1269                 OPJ_UINT32 partial = OPJ_TRUE; \
1270                 opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
1271                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1272                 if (!v) { \
1273                     continue; \
1274                 } \
1275                 opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
1276                 opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
1277                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1278                 runlen = (runlen << 1) | v; \
1279                 switch(runlen) { \
1280                     case 0: \
1281                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
1282                                             flags, flagsp, flags_stride, data, \
1283                                             l_w, 0, mqc, curctx, \
1284                                             v, a, c, ct, oneplushalf, vsc); \
1285                         partial = OPJ_FALSE; \
1286                         /* FALLTHRU */ \
1287                     case 1: \
1288                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1289                                             flags, flagsp, flags_stride, data, \
1290                                             l_w, 1, mqc, curctx, \
1291                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1292                         partial = OPJ_FALSE; \
1293                         /* FALLTHRU */ \
1294                     case 2: \
1295                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1296                                             flags, flagsp, flags_stride, data, \
1297                                             l_w, 2, mqc, curctx, \
1298                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1299                         partial = OPJ_FALSE; \
1300                         /* FALLTHRU */ \
1301                     case 3: \
1302                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1303                                             flags, flagsp, flags_stride, data, \
1304                                             l_w, 3, mqc, curctx, \
1305                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1306                         break; \
1307                 } \
1308             } else { \
1309                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1310                                     flags, flagsp, flags_stride, data, \
1311                                     l_w, 0, mqc, curctx, \
1312                                     v, a, c, ct, oneplushalf, vsc); \
1313                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1314                                     flags, flagsp, flags_stride, data, \
1315                                     l_w, 1, mqc, curctx, \
1316                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1317                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1318                                     flags, flagsp, flags_stride, data, \
1319                                     l_w, 2, mqc, curctx, \
1320                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1321                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1322                                     flags, flagsp, flags_stride, data, \
1323                                     l_w, 3, mqc, curctx, \
1324                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1325             } \
1326             *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1327         } \
1328     } \
1329     UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1330     if( k < h ) { \
1331         for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
1332             for (j = 0; j < h - k; ++j) { \
1333                 opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
1334             } \
1335             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1336         } \
1337     } \
1338 }
1339
1340 static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
1341 {
1342     if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
1343         opj_mqc_t* mqc = &(t1->mqc);
1344         OPJ_UINT32 v, v2;
1345         opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1346         opj_mqc_decode(v, mqc);
1347         opj_mqc_decode(v2, mqc);
1348         v = (v << 1) | v2;
1349         opj_mqc_decode(v2, mqc);
1350         v = (v << 1) | v2;
1351         opj_mqc_decode(v2, mqc);
1352         v = (v << 1) | v2;
1353         /*
1354         if (v!=0xa) {
1355             opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
1356         }
1357         */
1358     }
1359 }
1360
1361 static void opj_t1_dec_clnpass_64x64_novsc(
1362     opj_t1_t *t1,
1363     OPJ_INT32 bpno)
1364 {
1365     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
1366 }
1367
1368 static void opj_t1_dec_clnpass_64x64_vsc(
1369     opj_t1_t *t1,
1370     OPJ_INT32 bpno)
1371 {
1372     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
1373 }
1374
1375 static void opj_t1_dec_clnpass_generic_novsc(
1376     opj_t1_t *t1,
1377     OPJ_INT32 bpno)
1378 {
1379     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
1380                                 t1->w + 2U);
1381 }
1382
1383 static void opj_t1_dec_clnpass_generic_vsc(
1384     opj_t1_t *t1,
1385     OPJ_INT32 bpno)
1386 {
1387     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
1388                                 t1->w + 2U);
1389 }
1390
1391 static void opj_t1_dec_clnpass(
1392     opj_t1_t *t1,
1393     OPJ_INT32 bpno,
1394     OPJ_INT32 cblksty)
1395 {
1396     if (t1->w == 64 && t1->h == 64) {
1397         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1398             opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
1399         } else {
1400             opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
1401         }
1402     } else {
1403         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1404             opj_t1_dec_clnpass_generic_vsc(t1, bpno);
1405         } else {
1406             opj_t1_dec_clnpass_generic_novsc(t1, bpno);
1407         }
1408     }
1409     opj_t1_dec_clnpass_check_segsym(t1, cblksty);
1410 }
1411
1412
1413 static OPJ_FLOAT64 opj_t1_getwmsedec(
1414     OPJ_INT32 nmsedec,
1415     OPJ_UINT32 compno,
1416     OPJ_UINT32 level,
1417     OPJ_UINT32 orient,
1418     OPJ_INT32 bpno,
1419     OPJ_UINT32 qmfbid,
1420     OPJ_FLOAT64 stepsize,
1421     OPJ_UINT32 numcomps,
1422     const OPJ_FLOAT64 * mct_norms,
1423     OPJ_UINT32 mct_numcomps)
1424 {
1425     OPJ_FLOAT64 w1 = 1, w2, wmsedec;
1426     OPJ_ARG_NOT_USED(numcomps);
1427
1428     if (mct_norms && (compno < mct_numcomps)) {
1429         w1 = mct_norms[compno];
1430     }
1431
1432     if (qmfbid == 1) {
1433         w2 = opj_dwt_getnorm(level, orient);
1434     } else {    /* if (qmfbid == 0) */
1435         const OPJ_INT32 log2_gain = (orient == 0) ? 0 :
1436                                     (orient == 3) ? 2 : 1;
1437         w2 = opj_dwt_getnorm_real(level, orient);
1438         /* Not sure this is right. But preserves past behaviour */
1439         stepsize /= (1 << log2_gain);
1440     }
1441
1442     wmsedec = w1 * w2 * stepsize * (1 << bpno);
1443     wmsedec *= wmsedec * nmsedec / 8192.0;
1444
1445     return wmsedec;
1446 }
1447
1448 static OPJ_BOOL opj_t1_allocate_buffers(
1449     opj_t1_t *t1,
1450     OPJ_UINT32 w,
1451     OPJ_UINT32 h)
1452 {
1453     OPJ_UINT32 flagssize;
1454     OPJ_UINT32 flags_stride;
1455
1456     /* No risk of overflow. Prior checks ensure those assert are met */
1457     /* They are per the specification */
1458     assert(w <= 1024);
1459     assert(h <= 1024);
1460     assert(w * h <= 4096);
1461
1462     /* encoder uses tile buffer, so no need to allocate */
1463     {
1464         OPJ_UINT32 datasize = w * h;
1465
1466         if (datasize > t1->datasize) {
1467             opj_aligned_free(t1->data);
1468             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
1469             if (!t1->data) {
1470                 /* FIXME event manager error callback */
1471                 return OPJ_FALSE;
1472             }
1473             t1->datasize = datasize;
1474         }
1475         /* memset first arg is declared to never be null by gcc */
1476         if (t1->data != NULL) {
1477             memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
1478         }
1479     }
1480
1481     flags_stride = w + 2U; /* can't be 0U */
1482
1483     flagssize = (h + 3U) / 4U + 2U;
1484
1485     flagssize *= flags_stride;
1486     {
1487         opj_flag_t* p;
1488         OPJ_UINT32 x;
1489         OPJ_UINT32 flags_height = (h + 3U) / 4U;
1490
1491         if (flagssize > t1->flagssize) {
1492
1493             opj_aligned_free(t1->flags);
1494             t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
1495                             opj_flag_t));
1496             if (!t1->flags) {
1497                 /* FIXME event manager error callback */
1498                 return OPJ_FALSE;
1499             }
1500         }
1501         t1->flagssize = flagssize;
1502
1503         memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
1504
1505         p = &t1->flags[0];
1506         for (x = 0; x < flags_stride; ++x) {
1507             /* magic value to hopefully stop any passes being interested in this entry */
1508             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1509         }
1510
1511         p = &t1->flags[((flags_height + 1) * flags_stride)];
1512         for (x = 0; x < flags_stride; ++x) {
1513             /* magic value to hopefully stop any passes being interested in this entry */
1514             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1515         }
1516
1517         if (h % 4) {
1518             OPJ_UINT32 v = 0;
1519             p = &t1->flags[((flags_height) * flags_stride)];
1520             if (h % 4 == 1) {
1521                 v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
1522             } else if (h % 4 == 2) {
1523                 v |= T1_PI_2 | T1_PI_3;
1524             } else if (h % 4 == 3) {
1525                 v |= T1_PI_3;
1526             }
1527             for (x = 0; x < flags_stride; ++x) {
1528                 *p++ = v;
1529             }
1530         }
1531     }
1532
1533     t1->w = w;
1534     t1->h = h;
1535
1536     return OPJ_TRUE;
1537 }
1538
1539 /* ----------------------------------------------------------------------- */
1540
1541 /* ----------------------------------------------------------------------- */
1542 /**
1543  * Creates a new Tier 1 handle
1544  * and initializes the look-up tables of the Tier-1 coder/decoder
1545  * @return a new T1 handle if successful, returns NULL otherwise
1546 */
1547 opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
1548 {
1549     opj_t1_t *l_t1 = 00;
1550
1551     l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
1552     if (!l_t1) {
1553         return 00;
1554     }
1555
1556     l_t1->encoder = isEncoder;
1557
1558     return l_t1;
1559 }
1560
1561
1562 /**
1563  * Destroys a previously created T1 handle
1564  *
1565  * @param p_t1 Tier 1 handle to destroy
1566 */
1567 void opj_t1_destroy(opj_t1_t *p_t1)
1568 {
1569     if (! p_t1) {
1570         return;
1571     }
1572
1573     if (p_t1->data) {
1574         opj_aligned_free(p_t1->data);
1575         p_t1->data = 00;
1576     }
1577
1578     if (p_t1->flags) {
1579         opj_aligned_free(p_t1->flags);
1580         p_t1->flags = 00;
1581     }
1582
1583     opj_free(p_t1->cblkdatabuffer);
1584
1585     opj_free(p_t1);
1586 }
1587
1588 typedef struct {
1589     OPJ_BOOL whole_tile_decoding;
1590     OPJ_UINT32 resno;
1591     opj_tcd_cblk_dec_t* cblk;
1592     opj_tcd_band_t* band;
1593     opj_tcd_tilecomp_t* tilec;
1594     opj_tccp_t* tccp;
1595     OPJ_BOOL mustuse_cblkdatabuffer;
1596     volatile OPJ_BOOL* pret;
1597     opj_event_mgr_t *p_manager;
1598     opj_mutex_t* p_manager_mutex;
1599     OPJ_BOOL check_pterm;
1600 } opj_t1_cblk_decode_processing_job_t;
1601
1602 static void opj_t1_destroy_wrapper(void* t1)
1603 {
1604     opj_t1_destroy((opj_t1_t*) t1);
1605 }
1606
1607 static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
1608 {
1609     opj_tcd_cblk_dec_t* cblk;
1610     opj_tcd_band_t* band;
1611     opj_tcd_tilecomp_t* tilec;
1612     opj_tccp_t* tccp;
1613     OPJ_INT32* OPJ_RESTRICT datap;
1614     OPJ_UINT32 cblk_w, cblk_h;
1615     OPJ_INT32 x, y;
1616     OPJ_UINT32 i, j;
1617     opj_t1_cblk_decode_processing_job_t* job;
1618     opj_t1_t* t1;
1619     OPJ_UINT32 resno;
1620     OPJ_UINT32 tile_w;
1621
1622     job = (opj_t1_cblk_decode_processing_job_t*) user_data;
1623
1624     cblk = job->cblk;
1625
1626     if (!job->whole_tile_decoding) {
1627         cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1628         cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1629
1630         cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
1631                              cblk_w * cblk_h);
1632         if (cblk->decoded_data == NULL) {
1633             if (job->p_manager_mutex) {
1634                 opj_mutex_lock(job->p_manager_mutex);
1635             }
1636             opj_event_msg(job->p_manager, EVT_ERROR,
1637                           "Cannot allocate cblk->decoded_data\n");
1638             if (job->p_manager_mutex) {
1639                 opj_mutex_unlock(job->p_manager_mutex);
1640             }
1641             *(job->pret) = OPJ_FALSE;
1642             opj_free(job);
1643             return;
1644         }
1645         /* Zero-init required */
1646         memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
1647     } else if (cblk->decoded_data) {
1648         /* Not sure if that code path can happen, but better be */
1649         /* safe than sorry */
1650         opj_aligned_free(cblk->decoded_data);
1651         cblk->decoded_data = NULL;
1652     }
1653
1654     resno = job->resno;
1655     band = job->band;
1656     tilec = job->tilec;
1657     tccp = job->tccp;
1658     tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
1659                           -
1660                           tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
1661
1662     if (!*(job->pret)) {
1663         opj_free(job);
1664         return;
1665     }
1666
1667     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
1668     if (t1 == NULL) {
1669         t1 = opj_t1_create(OPJ_FALSE);
1670         if (t1 == NULL) {
1671             opj_event_msg(job->p_manager, EVT_ERROR,
1672                           "Cannot allocate Tier 1 handle\n");
1673             *(job->pret) = OPJ_FALSE;
1674             opj_free(job);
1675             return;
1676         }
1677         if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) {
1678             opj_event_msg(job->p_manager, EVT_ERROR,
1679                           "Unable to set t1 handle as TLS\n");
1680             opj_t1_destroy(t1);
1681             *(job->pret) = OPJ_FALSE;
1682             opj_free(job);
1683             return;
1684         }
1685     }
1686     t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
1687
1688     if ((tccp->cblksty & J2K_CCP_CBLKSTY_HT) != 0) {
1689         if (OPJ_FALSE == opj_t1_ht_decode_cblk(
1690                     t1,
1691                     cblk,
1692                     band->bandno,
1693                     (OPJ_UINT32)tccp->roishift,
1694                     tccp->cblksty,
1695                     job->p_manager,
1696                     job->p_manager_mutex,
1697                     job->check_pterm)) {
1698             *(job->pret) = OPJ_FALSE;
1699             opj_free(job);
1700             return;
1701         }
1702     } else {
1703         if (OPJ_FALSE == opj_t1_decode_cblk(
1704                     t1,
1705                     cblk,
1706                     band->bandno,
1707                     (OPJ_UINT32)tccp->roishift,
1708                     tccp->cblksty,
1709                     job->p_manager,
1710                     job->p_manager_mutex,
1711                     job->check_pterm)) {
1712             *(job->pret) = OPJ_FALSE;
1713             opj_free(job);
1714             return;
1715         }
1716     }
1717
1718     x = cblk->x0 - band->x0;
1719     y = cblk->y0 - band->y0;
1720     if (band->bandno & 1) {
1721         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1722         x += pres->x1 - pres->x0;
1723     }
1724     if (band->bandno & 2) {
1725         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1726         y += pres->y1 - pres->y0;
1727     }
1728
1729     datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
1730     cblk_w = t1->w;
1731     cblk_h = t1->h;
1732
1733     if (tccp->roishift) {
1734         if (tccp->roishift >= 31) {
1735             for (j = 0; j < cblk_h; ++j) {
1736                 for (i = 0; i < cblk_w; ++i) {
1737                     datap[(j * cblk_w) + i] = 0;
1738                 }
1739             }
1740         } else {
1741             OPJ_INT32 thresh = 1 << tccp->roishift;
1742             for (j = 0; j < cblk_h; ++j) {
1743                 for (i = 0; i < cblk_w; ++i) {
1744                     OPJ_INT32 val = datap[(j * cblk_w) + i];
1745                     OPJ_INT32 mag = abs(val);
1746                     if (mag >= thresh) {
1747                         mag >>= tccp->roishift;
1748                         datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
1749                     }
1750                 }
1751             }
1752         }
1753     }
1754
1755     /* Both can be non NULL if for example decoding a full tile and then */
1756     /* partially a tile. In which case partial decoding should be the */
1757     /* priority */
1758     assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
1759
1760     if (cblk->decoded_data) {
1761         OPJ_UINT32 cblk_size = cblk_w * cblk_h;
1762         if (tccp->qmfbid == 1) {
1763             for (i = 0; i < cblk_size; ++i) {
1764                 datap[i] /= 2;
1765             }
1766         } else {        /* if (tccp->qmfbid == 0) */
1767             const float stepsize = 0.5f * band->stepsize;
1768             i = 0;
1769 #ifdef __SSE2__
1770             {
1771                 const __m128 xmm_stepsize = _mm_set1_ps(stepsize);
1772                 for (; i < (cblk_size & ~15U); i += 16) {
1773                     __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1774                                                            datap + 0)));
1775                     __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1776                                                            datap + 4)));
1777                     __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1778                                                            datap + 8)));
1779                     __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1780                                                            datap + 12)));
1781                     _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
1782                     _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
1783                     _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
1784                     _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
1785                     datap += 16;
1786                 }
1787             }
1788 #endif
1789             for (; i < cblk_size; ++i) {
1790                 OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize;
1791                 memcpy(datap, &tmp, sizeof(tmp));
1792                 datap++;
1793             }
1794         }
1795     } else if (tccp->qmfbid == 1) {
1796         OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
1797                                                        (OPJ_SIZE_T)x];
1798         for (j = 0; j < cblk_h; ++j) {
1799             i = 0;
1800             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
1801                 OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
1802                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
1803                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
1804                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
1805                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
1806                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
1807                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
1808                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
1809             }
1810             for (; i < cblk_w; ++i) {
1811                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
1812                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
1813             }
1814         }
1815     } else {        /* if (tccp->qmfbid == 0) */
1816         const float stepsize = 0.5f * band->stepsize;
1817         OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
1818                                                          tile_w + (OPJ_SIZE_T)x];
1819         for (j = 0; j < cblk_h; ++j) {
1820             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
1821             for (i = 0; i < cblk_w; ++i) {
1822                 OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize;
1823                 *tiledp2 = tmp;
1824                 datap++;
1825                 tiledp2++;
1826             }
1827             tiledp += tile_w;
1828         }
1829     }
1830
1831     opj_free(job);
1832 }
1833
1834
1835 void opj_t1_decode_cblks(opj_tcd_t* tcd,
1836                          volatile OPJ_BOOL* pret,
1837                          opj_tcd_tilecomp_t* tilec,
1838                          opj_tccp_t* tccp,
1839                          opj_event_mgr_t *p_manager,
1840                          opj_mutex_t* p_manager_mutex,
1841                          OPJ_BOOL check_pterm
1842                         )
1843 {
1844     opj_thread_pool_t* tp = tcd->thread_pool;
1845     OPJ_UINT32 resno, bandno, precno, cblkno;
1846
1847 #ifdef DEBUG_VERBOSE
1848     OPJ_UINT32 codeblocks_decoded = 0;
1849     printf("Enter opj_t1_decode_cblks()\n");
1850 #endif
1851
1852     for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
1853         opj_tcd_resolution_t* res = &tilec->resolutions[resno];
1854
1855         for (bandno = 0; bandno < res->numbands; ++bandno) {
1856             opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
1857
1858             for (precno = 0; precno < res->pw * res->ph; ++precno) {
1859                 opj_tcd_precinct_t* precinct = &band->precincts[precno];
1860
1861                 if (!opj_tcd_is_subband_area_of_interest(tcd,
1862                         tilec->compno,
1863                         resno,
1864                         band->bandno,
1865                         (OPJ_UINT32)precinct->x0,
1866                         (OPJ_UINT32)precinct->y0,
1867                         (OPJ_UINT32)precinct->x1,
1868                         (OPJ_UINT32)precinct->y1)) {
1869                     for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1870                         opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1871                         if (cblk->decoded_data) {
1872 #ifdef DEBUG_VERBOSE
1873                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1874                                    cblk->x0, cblk->y0, resno, bandno);
1875 #endif
1876                             opj_aligned_free(cblk->decoded_data);
1877                             cblk->decoded_data = NULL;
1878                         }
1879                     }
1880                     continue;
1881                 }
1882
1883                 for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1884                     opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1885                     opj_t1_cblk_decode_processing_job_t* job;
1886
1887                     if (!opj_tcd_is_subband_area_of_interest(tcd,
1888                             tilec->compno,
1889                             resno,
1890                             band->bandno,
1891                             (OPJ_UINT32)cblk->x0,
1892                             (OPJ_UINT32)cblk->y0,
1893                             (OPJ_UINT32)cblk->x1,
1894                             (OPJ_UINT32)cblk->y1)) {
1895                         if (cblk->decoded_data) {
1896 #ifdef DEBUG_VERBOSE
1897                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1898                                    cblk->x0, cblk->y0, resno, bandno);
1899 #endif
1900                             opj_aligned_free(cblk->decoded_data);
1901                             cblk->decoded_data = NULL;
1902                         }
1903                         continue;
1904                     }
1905
1906                     if (!tcd->whole_tile_decoding) {
1907                         OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1908                         OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1909                         if (cblk->decoded_data != NULL) {
1910 #ifdef DEBUG_VERBOSE
1911                             printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
1912                                    cblk->x0, cblk->y0, resno, bandno);
1913 #endif
1914                             continue;
1915                         }
1916                         if (cblk_w == 0 || cblk_h == 0) {
1917                             continue;
1918                         }
1919 #ifdef DEBUG_VERBOSE
1920                         printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
1921                                cblk->x0, cblk->y0, resno, bandno);
1922 #endif
1923                     }
1924
1925                     job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
1926                             sizeof(opj_t1_cblk_decode_processing_job_t));
1927                     if (!job) {
1928                         *pret = OPJ_FALSE;
1929                         return;
1930                     }
1931                     job->whole_tile_decoding = tcd->whole_tile_decoding;
1932                     job->resno = resno;
1933                     job->cblk = cblk;
1934                     job->band = band;
1935                     job->tilec = tilec;
1936                     job->tccp = tccp;
1937                     job->pret = pret;
1938                     job->p_manager_mutex = p_manager_mutex;
1939                     job->p_manager = p_manager;
1940                     job->check_pterm = check_pterm;
1941                     job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
1942                     opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
1943 #ifdef DEBUG_VERBOSE
1944                     codeblocks_decoded ++;
1945 #endif
1946                     if (!(*pret)) {
1947                         return;
1948                     }
1949                 } /* cblkno */
1950             } /* precno */
1951         } /* bandno */
1952     } /* resno */
1953
1954 #ifdef DEBUG_VERBOSE
1955     printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
1956 #endif
1957     return;
1958 }
1959
1960
1961 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
1962                                    opj_tcd_cblk_dec_t* cblk,
1963                                    OPJ_UINT32 orient,
1964                                    OPJ_UINT32 roishift,
1965                                    OPJ_UINT32 cblksty,
1966                                    opj_event_mgr_t *p_manager,
1967                                    opj_mutex_t* p_manager_mutex,
1968                                    OPJ_BOOL check_pterm)
1969 {
1970     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1971
1972     OPJ_INT32 bpno_plus_one;
1973     OPJ_UINT32 passtype;
1974     OPJ_UINT32 segno, passno;
1975     OPJ_BYTE* cblkdata = NULL;
1976     OPJ_UINT32 cblkdataindex = 0;
1977     OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
1978     OPJ_INT32* original_t1_data = NULL;
1979
1980     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
1981
1982     if (!opj_t1_allocate_buffers(
1983                 t1,
1984                 (OPJ_UINT32)(cblk->x1 - cblk->x0),
1985                 (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
1986         return OPJ_FALSE;
1987     }
1988
1989     bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
1990     if (bpno_plus_one >= 31) {
1991         if (p_manager_mutex) {
1992             opj_mutex_lock(p_manager_mutex);
1993         }
1994         opj_event_msg(p_manager, EVT_WARNING,
1995                       "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
1996                       bpno_plus_one);
1997         if (p_manager_mutex) {
1998             opj_mutex_unlock(p_manager_mutex);
1999         }
2000         return OPJ_FALSE;
2001     }
2002     passtype = 2;
2003
2004     opj_mqc_resetstates(mqc);
2005     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2006     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2007     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2008
2009     /* Even if we have a single chunk, in multi-threaded decoding */
2010     /* the insertion of our synthetic marker might potentially override */
2011     /* valid codestream of other codeblocks decoded in parallel. */
2012     if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
2013         OPJ_UINT32 i;
2014         OPJ_UINT32 cblk_len;
2015
2016         /* Compute whole codeblock length from chunk lengths */
2017         cblk_len = 0;
2018         for (i = 0; i < cblk->numchunks; i++) {
2019             cblk_len += cblk->chunks[i].len;
2020         }
2021
2022         /* Allocate temporary memory if needed */
2023         if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
2024             cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
2025                                               cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
2026             if (cblkdata == NULL) {
2027                 return OPJ_FALSE;
2028             }
2029             t1->cblkdatabuffer = cblkdata;
2030             memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
2031             t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
2032         }
2033
2034         /* Concatenate all chunks */
2035         cblkdata = t1->cblkdatabuffer;
2036         cblk_len = 0;
2037         for (i = 0; i < cblk->numchunks; i++) {
2038             memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
2039             cblk_len += cblk->chunks[i].len;
2040         }
2041     } else if (cblk->numchunks == 1) {
2042         cblkdata = cblk->chunks[0].data;
2043     } else {
2044         /* Not sure if that can happen in practice, but avoid Coverity to */
2045         /* think we will dereference a null cblkdta pointer */
2046         return OPJ_TRUE;
2047     }
2048
2049     /* For subtile decoding, directly decode in the decoded_data buffer of */
2050     /* the code-block. Hack t1->data to point to it, and restore it later */
2051     if (cblk->decoded_data) {
2052         original_t1_data = t1->data;
2053         t1->data = cblk->decoded_data;
2054     }
2055
2056     for (segno = 0; segno < cblk->real_num_segs; ++segno) {
2057         opj_tcd_seg_t *seg = &cblk->segs[segno];
2058
2059         /* BYPASS mode */
2060         type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
2061                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2062
2063         if (type == T1_TYPE_RAW) {
2064             opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2065                                  OPJ_COMMON_CBLK_DATA_EXTRA);
2066         } else {
2067             opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2068                              OPJ_COMMON_CBLK_DATA_EXTRA);
2069         }
2070         cblkdataindex += seg->len;
2071
2072         for (passno = 0; (passno < seg->real_num_passes) &&
2073                 (bpno_plus_one >= 1); ++passno) {
2074             switch (passtype) {
2075             case 0:
2076                 if (type == T1_TYPE_RAW) {
2077                     opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2078                 } else {
2079                     opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2080                 }
2081                 break;
2082             case 1:
2083                 if (type == T1_TYPE_RAW) {
2084                     opj_t1_dec_refpass_raw(t1, bpno_plus_one);
2085                 } else {
2086                     opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
2087                 }
2088                 break;
2089             case 2:
2090                 opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2091                 break;
2092             }
2093
2094             if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
2095                 opj_mqc_resetstates(mqc);
2096                 opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2097                 opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2098                 opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2099             }
2100             if (++passtype == 3) {
2101                 passtype = 0;
2102                 bpno_plus_one--;
2103             }
2104         }
2105
2106         opq_mqc_finish_dec(mqc);
2107     }
2108
2109     if (check_pterm) {
2110         if (mqc->bp + 2 < mqc->end) {
2111             if (p_manager_mutex) {
2112                 opj_mutex_lock(p_manager_mutex);
2113             }
2114             opj_event_msg(p_manager, EVT_WARNING,
2115                           "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
2116                           (int)(mqc->end - mqc->bp) - 2,
2117                           (int)(mqc->bp - mqc->start),
2118                           (int)(mqc->end - mqc->start));
2119             if (p_manager_mutex) {
2120                 opj_mutex_unlock(p_manager_mutex);
2121             }
2122         } else if (mqc->end_of_byte_stream_counter > 2) {
2123             if (p_manager_mutex) {
2124                 opj_mutex_lock(p_manager_mutex);
2125             }
2126             opj_event_msg(p_manager, EVT_WARNING,
2127                           "PTERM check failure: %d synthetized 0xFF markers read\n",
2128                           mqc->end_of_byte_stream_counter);
2129             if (p_manager_mutex) {
2130                 opj_mutex_unlock(p_manager_mutex);
2131             }
2132         }
2133     }
2134
2135     /* Restore original t1->data is needed */
2136     if (cblk->decoded_data) {
2137         t1->data = original_t1_data;
2138     }
2139
2140     return OPJ_TRUE;
2141 }
2142
2143
2144 typedef struct {
2145     OPJ_UINT32 compno;
2146     OPJ_UINT32 resno;
2147     opj_tcd_cblk_enc_t* cblk;
2148     opj_tcd_tile_t *tile;
2149     opj_tcd_band_t* band;
2150     opj_tcd_tilecomp_t* tilec;
2151     opj_tccp_t* tccp;
2152     const OPJ_FLOAT64 * mct_norms;
2153     OPJ_UINT32 mct_numcomps;
2154     volatile OPJ_BOOL* pret;
2155     opj_mutex_t* mutex;
2156 } opj_t1_cblk_encode_processing_job_t;
2157
2158 /** Procedure to deal with a asynchronous code-block encoding job.
2159  *
2160  * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure
2161  * @param tls       TLS handle.
2162  */
2163 static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls)
2164 {
2165     opj_t1_cblk_encode_processing_job_t* job =
2166         (opj_t1_cblk_encode_processing_job_t*)user_data;
2167     opj_tcd_cblk_enc_t* cblk = job->cblk;
2168     const opj_tcd_band_t* band = job->band;
2169     const opj_tcd_tilecomp_t* tilec = job->tilec;
2170     const opj_tccp_t* tccp = job->tccp;
2171     const OPJ_UINT32 resno = job->resno;
2172     opj_t1_t* t1;
2173     const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
2174
2175     OPJ_INT32* OPJ_RESTRICT tiledp;
2176     OPJ_UINT32 cblk_w;
2177     OPJ_UINT32 cblk_h;
2178     OPJ_UINT32 i, j;
2179
2180     OPJ_INT32 x = cblk->x0 - band->x0;
2181     OPJ_INT32 y = cblk->y0 - band->y0;
2182
2183     if (!*(job->pret)) {
2184         opj_free(job);
2185         return;
2186     }
2187
2188     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
2189     if (t1 == NULL) {
2190         t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */
2191         opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
2192     }
2193
2194     if (band->bandno & 1) {
2195         opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2196         x += pres->x1 - pres->x0;
2197     }
2198     if (band->bandno & 2) {
2199         opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2200         y += pres->y1 - pres->y0;
2201     }
2202
2203     if (!opj_t1_allocate_buffers(
2204                 t1,
2205                 (OPJ_UINT32)(cblk->x1 - cblk->x0),
2206                 (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2207         *(job->pret) = OPJ_FALSE;
2208         opj_free(job);
2209         return;
2210     }
2211
2212     cblk_w = t1->w;
2213     cblk_h = t1->h;
2214
2215     tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
2216
2217     if (tccp->qmfbid == 1) {
2218         /* Do multiplication on unsigned type, even if the
2219             * underlying type is signed, to avoid potential
2220             * int overflow on large value (the output will be
2221             * incorrect in such situation, but whatever...)
2222             * This assumes complement-to-2 signed integer
2223             * representation
2224             * Fixes https://github.com/uclouvain/openjpeg/issues/1053
2225             */
2226         OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
2227         OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data;
2228         /* Change from "natural" order to "zigzag" order of T1 passes */
2229         for (j = 0; j < (cblk_h & ~3U); j += 4) {
2230             for (i = 0; i < cblk_w; ++i) {
2231                 t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2232                 t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2233                 t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2234                 t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2235                 t1data += 4;
2236             }
2237         }
2238         if (j < cblk_h) {
2239             for (i = 0; i < cblk_w; ++i) {
2240                 OPJ_UINT32 k;
2241                 for (k = j; k < cblk_h; k++) {
2242                     t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS;
2243                     t1data ++;
2244                 }
2245             }
2246         }
2247     } else {        /* if (tccp->qmfbid == 0) */
2248         OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp;
2249         OPJ_INT32* OPJ_RESTRICT t1data = t1->data;
2250         /* Change from "natural" order to "zigzag" order of T1 passes */
2251         for (j = 0; j < (cblk_h & ~3U); j += 4) {
2252             for (i = 0; i < cblk_w; ++i) {
2253                 t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] /
2254                                                    band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2255                 t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] /
2256                                                    band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2257                 t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] /
2258                                                    band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2259                 t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] /
2260                                                    band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2261                 t1data += 4;
2262             }
2263         }
2264         if (j < cblk_h) {
2265             for (i = 0; i < cblk_w; ++i) {
2266                 OPJ_UINT32 k;
2267                 for (k = j; k < cblk_h; k++) {
2268                     t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize)
2269                                                       * (1 << T1_NMSEDEC_FRACBITS));
2270                     t1data ++;
2271                 }
2272             }
2273         }
2274     }
2275
2276     {
2277         OPJ_FLOAT64 cumwmsedec =
2278             opj_t1_encode_cblk(
2279                 t1,
2280                 cblk,
2281                 band->bandno,
2282                 job->compno,
2283                 tilec->numresolutions - 1 - resno,
2284                 tccp->qmfbid,
2285                 band->stepsize,
2286                 tccp->cblksty,
2287                 job->tile->numcomps,
2288                 job->mct_norms,
2289                 job->mct_numcomps);
2290         if (job->mutex) {
2291             opj_mutex_lock(job->mutex);
2292         }
2293         job->tile->distotile += cumwmsedec;
2294         if (job->mutex) {
2295             opj_mutex_unlock(job->mutex);
2296         }
2297     }
2298
2299     opj_free(job);
2300 }
2301
2302
2303 OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd,
2304                              opj_tcd_tile_t *tile,
2305                              opj_tcp_t *tcp,
2306                              const OPJ_FLOAT64 * mct_norms,
2307                              OPJ_UINT32 mct_numcomps
2308                             )
2309 {
2310     volatile OPJ_BOOL ret = OPJ_TRUE;
2311     opj_thread_pool_t* tp = tcd->thread_pool;
2312     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
2313     opj_mutex_t* mutex = opj_mutex_create();
2314
2315     tile->distotile = 0;
2316
2317     for (compno = 0; compno < tile->numcomps; ++compno) {
2318         opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
2319         opj_tccp_t* tccp = &tcp->tccps[compno];
2320
2321         for (resno = 0; resno < tilec->numresolutions; ++resno) {
2322             opj_tcd_resolution_t *res = &tilec->resolutions[resno];
2323
2324             for (bandno = 0; bandno < res->numbands; ++bandno) {
2325                 opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
2326
2327                 /* Skip empty bands */
2328                 if (opj_tcd_is_band_empty(band)) {
2329                     continue;
2330                 }
2331                 for (precno = 0; precno < res->pw * res->ph; ++precno) {
2332                     opj_tcd_precinct_t *prc = &band->precincts[precno];
2333
2334                     for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
2335                         opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
2336
2337                         opj_t1_cblk_encode_processing_job_t* job =
2338                             (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1,
2339                                     sizeof(opj_t1_cblk_encode_processing_job_t));
2340                         if (!job) {
2341                             ret = OPJ_FALSE;
2342                             goto end;
2343                         }
2344                         job->compno = compno;
2345                         job->tile = tile;
2346                         job->resno = resno;
2347                         job->cblk = cblk;
2348                         job->band = band;
2349                         job->tilec = tilec;
2350                         job->tccp = tccp;
2351                         job->mct_norms = mct_norms;
2352                         job->mct_numcomps = mct_numcomps;
2353                         job->pret = &ret;
2354                         job->mutex = mutex;
2355                         opj_thread_pool_submit_job(tp, opj_t1_cblk_encode_processor, job);
2356
2357                     } /* cblkno */
2358                 } /* precno */
2359             } /* bandno */
2360         } /* resno  */
2361     } /* compno  */
2362
2363 end:
2364     opj_thread_pool_wait_completion(tcd->thread_pool, 0);
2365     if (mutex) {
2366         opj_mutex_destroy(mutex);
2367     }
2368
2369     return ret;
2370 }
2371
2372 /* Returns whether the pass (bpno, passtype) is terminated */
2373 static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
2374                                    OPJ_UINT32 cblksty,
2375                                    OPJ_INT32 bpno,
2376                                    OPJ_UINT32 passtype)
2377 {
2378     /* Is it the last cleanup pass ? */
2379     if (passtype == 2 && bpno == 0) {
2380         return OPJ_TRUE;
2381     }
2382
2383     if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
2384         return OPJ_TRUE;
2385     }
2386
2387     if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
2388         /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
2389         if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
2390             return OPJ_TRUE;
2391         }
2392         /* and beyond terminate all the magnitude refinement passes (in raw) */
2393         /* and cleanup passes (in MQC) */
2394         if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
2395             return OPJ_TRUE;
2396         }
2397     }
2398
2399     return OPJ_FALSE;
2400 }
2401
2402
2403 static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
2404                                       opj_tcd_cblk_enc_t* cblk,
2405                                       OPJ_UINT32 orient,
2406                                       OPJ_UINT32 compno,
2407                                       OPJ_UINT32 level,
2408                                       OPJ_UINT32 qmfbid,
2409                                       OPJ_FLOAT64 stepsize,
2410                                       OPJ_UINT32 cblksty,
2411                                       OPJ_UINT32 numcomps,
2412                                       const OPJ_FLOAT64 * mct_norms,
2413                                       OPJ_UINT32 mct_numcomps)
2414 {
2415     OPJ_FLOAT64 cumwmsedec = 0.0;
2416
2417     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2418
2419     OPJ_UINT32 passno;
2420     OPJ_INT32 bpno;
2421     OPJ_UINT32 passtype;
2422     OPJ_INT32 nmsedec = 0;
2423     OPJ_INT32 max;
2424     OPJ_UINT32 i, j;
2425     OPJ_BYTE type = T1_TYPE_MQ;
2426     OPJ_FLOAT64 tempwmsedec;
2427     OPJ_INT32* datap;
2428
2429 #ifdef EXTRA_DEBUG
2430     printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
2431            cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
2432 #endif
2433
2434     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2435
2436     max = 0;
2437     datap = t1->data;
2438     for (j = 0; j < t1->h; ++j) {
2439         const OPJ_UINT32 w = t1->w;
2440         for (i = 0; i < w; ++i, ++datap) {
2441             OPJ_INT32 tmp = *datap;
2442             if (tmp < 0) {
2443                 OPJ_UINT32 tmp_unsigned;
2444                 if (tmp == INT_MIN) {
2445                     /* To avoid undefined behaviour when negating INT_MIN */
2446                     /* but if we go here, it means we have supplied an input */
2447                     /* with more bit depth than we we can really support. */
2448                     /* Cf https://github.com/uclouvain/openjpeg/issues/1432 */
2449                     tmp = INT_MIN + 1;
2450                 }
2451                 max = opj_int_max(max, -tmp);
2452                 tmp_unsigned = opj_to_smr(tmp);
2453                 memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32));
2454             } else {
2455                 max = opj_int_max(max, tmp);
2456             }
2457         }
2458     }
2459
2460     cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
2461                                       T1_NMSEDEC_FRACBITS) : 0;
2462     if (cblk->numbps == 0) {
2463         cblk->totalpasses = 0;
2464         return cumwmsedec;
2465     }
2466
2467     bpno = (OPJ_INT32)(cblk->numbps - 1);
2468     passtype = 2;
2469
2470     opj_mqc_resetstates(mqc);
2471     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2472     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2473     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2474     opj_mqc_init_enc(mqc, cblk->data);
2475
2476     for (passno = 0; bpno >= 0; ++passno) {
2477         opj_tcd_pass_t *pass = &cblk->passes[passno];
2478         type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
2479                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2480
2481         /* If the previous pass was terminating, we need to reset the encoder */
2482         if (passno > 0 && cblk->passes[passno - 1].term) {
2483             if (type == T1_TYPE_RAW) {
2484                 opj_mqc_bypass_init_enc(mqc);
2485             } else {
2486                 opj_mqc_restart_init_enc(mqc);
2487             }
2488         }
2489
2490         switch (passtype) {
2491         case 0:
2492             opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
2493             break;
2494         case 1:
2495             opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
2496             break;
2497         case 2:
2498             opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
2499             /* code switch SEGMARK (i.e. SEGSYM) */
2500             if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
2501                 opj_mqc_segmark_enc(mqc);
2502             }
2503             break;
2504         }
2505
2506         tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
2507                                         stepsize, numcomps, mct_norms, mct_numcomps) ;
2508         cumwmsedec += tempwmsedec;
2509         pass->distortiondec = cumwmsedec;
2510
2511         if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
2512             /* If it is a terminated pass, terminate it */
2513             if (type == T1_TYPE_RAW) {
2514                 opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
2515             } else {
2516                 if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
2517                     opj_mqc_erterm_enc(mqc);
2518                 } else {
2519                     opj_mqc_flush(mqc);
2520                 }
2521             }
2522             pass->term = 1;
2523             pass->rate = opj_mqc_numbytes(mqc);
2524         } else {
2525             /* Non terminated pass */
2526             OPJ_UINT32 rate_extra_bytes;
2527             if (type == T1_TYPE_RAW) {
2528                 rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
2529                                        mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
2530             } else {
2531                 rate_extra_bytes = 3;
2532             }
2533             pass->term = 0;
2534             pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
2535         }
2536
2537         if (++passtype == 3) {
2538             passtype = 0;
2539             bpno--;
2540         }
2541
2542         /* Code-switch "RESET" */
2543         if (cblksty & J2K_CCP_CBLKSTY_RESET) {
2544             opj_mqc_reset_enc(mqc);
2545         }
2546     }
2547
2548     cblk->totalpasses = passno;
2549
2550     if (cblk->totalpasses) {
2551         /* Make sure that pass rates are increasing */
2552         OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
2553         for (passno = cblk->totalpasses; passno > 0;) {
2554             opj_tcd_pass_t *pass = &cblk->passes[--passno];
2555             if (pass->rate > last_pass_rate) {
2556                 pass->rate = last_pass_rate;
2557             } else {
2558                 last_pass_rate = pass->rate;
2559             }
2560         }
2561     }
2562
2563     for (passno = 0; passno < cblk->totalpasses; passno++) {
2564         opj_tcd_pass_t *pass = &cblk->passes[passno];
2565
2566         /* Prevent generation of FF as last data byte of a pass*/
2567         /* For terminating passes, the flushing procedure ensured this already */
2568         assert(pass->rate > 0);
2569         if (cblk->data[pass->rate - 1] == 0xFF) {
2570             pass->rate--;
2571         }
2572         pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
2573     }
2574
2575 #ifdef EXTRA_DEBUG
2576     printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
2577
2578     /* Check that there not 0xff >=0x90 sequences */
2579     if (cblk->totalpasses) {
2580         OPJ_UINT32 i;
2581         OPJ_UINT32 len = opj_mqc_numbytes(mqc);
2582         for (i = 1; i < len; ++i) {
2583             if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
2584                 printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
2585                 abort();
2586             }
2587         }
2588     }
2589 #endif
2590
2591     return cumwmsedec;
2592 }