T1 encoder: speed-up by aggressive inlining and more cache friendly data organization
[openjpeg.git] / src / lib / openjp2 / t1.c
1 /*
2  * The copyright in this software is being made available under the 2-clauses
3  * BSD License, included below. This software may be subject to other third
4  * party and contributor rights, including patent rights, and no such rights
5  * are granted under this license.
6  *
7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8  * Copyright (c) 2002-2014, Professor Benoit Macq
9  * Copyright (c) 2001-2003, David Janssens
10  * Copyright (c) 2002-2003, Yannick Verschueren
11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
12  * Copyright (c) 2003-2014, Antonin Descampe
13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
14  * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
15  * Copyright (c) 2012, Carl Hetherington
16  * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
17  * All rights reserved.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions
21  * are met:
22  * 1. Redistributions of source code must retain the above copyright
23  *    notice, this list of conditions and the following disclaimer.
24  * 2. Redistributions in binary form must reproduce the above copyright
25  *    notice, this list of conditions and the following disclaimer in the
26  *    documentation and/or other materials provided with the distribution.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40
41 #define OPJ_SKIP_POISON
42 #include "opj_includes.h"
43
44 #ifdef __SSE__
45 #include <xmmintrin.h>
46 #endif
47 #ifdef __SSE2__
48 #include <emmintrin.h>
49 #endif
50
51 #if defined(__GNUC__)
52 #pragma GCC poison malloc calloc realloc free
53 #endif
54
55 #include "t1_luts.h"
56
57 /** @defgroup T1 T1 - Implementation of the tier-1 coding */
58 /*@{*/
59
60 #define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
61
62 #define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
63
64 /* Macros to deal with signed integer with just MSB bit set for
65  * negative values (smr = signed magnitude representation) */
66 #define opj_smr_abs(x)  (((OPJ_UINT32)(x)) & 0x7FFFFFFFU)
67 #define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31)
68 #define opj_to_smr(x)   ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U))
69
70
71 /** @name Local static functions */
72 /*@{*/
73
74 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
75 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
76 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
77 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
78 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
79                                        OPJ_UINT32 s, OPJ_UINT32 stride,
80                                        OPJ_UINT32 vsc);
81
82
83 /**
84 Decode significant pass
85 */
86
87 static INLINE void opj_t1_dec_sigpass_step_raw(
88     opj_t1_t *t1,
89     opj_flag_t *flagsp,
90     OPJ_INT32 *datap,
91     OPJ_INT32 oneplushalf,
92     OPJ_UINT32 vsc,
93     OPJ_UINT32 row);
94 static INLINE void opj_t1_dec_sigpass_step_mqc(
95     opj_t1_t *t1,
96     opj_flag_t *flagsp,
97     OPJ_INT32 *datap,
98     OPJ_INT32 oneplushalf,
99     OPJ_UINT32 row,
100     OPJ_UINT32 flags_stride,
101     OPJ_UINT32 vsc);
102
103 /**
104 Encode significant pass
105 */
106 static void opj_t1_enc_sigpass(opj_t1_t *t1,
107                                OPJ_INT32 bpno,
108                                OPJ_INT32 *nmsedec,
109                                OPJ_BYTE type,
110                                OPJ_UINT32 cblksty);
111
112 /**
113 Decode significant pass
114 */
115 static void opj_t1_dec_sigpass_raw(
116     opj_t1_t *t1,
117     OPJ_INT32 bpno,
118     OPJ_INT32 cblksty);
119
120 /**
121 Encode refinement pass
122 */
123 static void opj_t1_enc_refpass(opj_t1_t *t1,
124                                OPJ_INT32 bpno,
125                                OPJ_INT32 *nmsedec,
126                                OPJ_BYTE type);
127
128 /**
129 Decode refinement pass
130 */
131 static void opj_t1_dec_refpass_raw(
132     opj_t1_t *t1,
133     OPJ_INT32 bpno);
134
135
136 /**
137 Decode refinement pass
138 */
139
140 static INLINE void  opj_t1_dec_refpass_step_raw(
141     opj_t1_t *t1,
142     opj_flag_t *flagsp,
143     OPJ_INT32 *datap,
144     OPJ_INT32 poshalf,
145     OPJ_UINT32 row);
146 static INLINE void opj_t1_dec_refpass_step_mqc(
147     opj_t1_t *t1,
148     opj_flag_t *flagsp,
149     OPJ_INT32 *datap,
150     OPJ_INT32 poshalf,
151     OPJ_UINT32 row);
152
153
154 /**
155 Decode clean-up pass
156 */
157
158 static void opj_t1_dec_clnpass_step(
159     opj_t1_t *t1,
160     opj_flag_t *flagsp,
161     OPJ_INT32 *datap,
162     OPJ_INT32 oneplushalf,
163     OPJ_UINT32 row,
164     OPJ_UINT32 vsc);
165
166 /**
167 Encode clean-up pass
168 */
169 static void opj_t1_enc_clnpass(
170     opj_t1_t *t1,
171     OPJ_INT32 bpno,
172     OPJ_INT32 *nmsedec,
173     OPJ_UINT32 cblksty);
174
175 static OPJ_FLOAT64 opj_t1_getwmsedec(
176     OPJ_INT32 nmsedec,
177     OPJ_UINT32 compno,
178     OPJ_UINT32 level,
179     OPJ_UINT32 orient,
180     OPJ_INT32 bpno,
181     OPJ_UINT32 qmfbid,
182     OPJ_FLOAT64 stepsize,
183     OPJ_UINT32 numcomps,
184     const OPJ_FLOAT64 * mct_norms,
185     OPJ_UINT32 mct_numcomps);
186
187 /** Return "cumwmsedec" that should be used to increase tile->distotile */
188 static double opj_t1_encode_cblk(opj_t1_t *t1,
189                                  opj_tcd_cblk_enc_t* cblk,
190                                  OPJ_UINT32 orient,
191                                  OPJ_UINT32 compno,
192                                  OPJ_UINT32 level,
193                                  OPJ_UINT32 qmfbid,
194                                  OPJ_FLOAT64 stepsize,
195                                  OPJ_UINT32 cblksty,
196                                  OPJ_UINT32 numcomps,
197                                  const OPJ_FLOAT64 * mct_norms,
198                                  OPJ_UINT32 mct_numcomps);
199
200 /**
201 Decode 1 code-block
202 @param t1 T1 handle
203 @param cblk Code-block coding parameters
204 @param orient
205 @param roishift Region of interest shifting value
206 @param cblksty Code-block style
207 @param p_manager the event manager
208 @param p_manager_mutex mutex for the event manager
209 @param check_pterm whether PTERM correct termination should be checked
210 */
211 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
212                                    opj_tcd_cblk_dec_t* cblk,
213                                    OPJ_UINT32 orient,
214                                    OPJ_UINT32 roishift,
215                                    OPJ_UINT32 cblksty,
216                                    opj_event_mgr_t *p_manager,
217                                    opj_mutex_t* p_manager_mutex,
218                                    OPJ_BOOL check_pterm);
219
220 static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
221                                         OPJ_UINT32 w,
222                                         OPJ_UINT32 h);
223
224 /*@}*/
225
226 /*@}*/
227
228 /* ----------------------------------------------------------------------- */
229
230 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
231 {
232     return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
233 }
234
235 static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
236         OPJ_UINT32 pfX,
237         OPJ_UINT32 nfX,
238         OPJ_UINT32 ci)
239 {
240     /*
241       0 pfX T1_CHI_THIS           T1_LUT_SGN_W
242       1 tfX T1_SIGMA_1            T1_LUT_SIG_N
243       2 nfX T1_CHI_THIS           T1_LUT_SGN_E
244       3 tfX T1_SIGMA_3            T1_LUT_SIG_W
245       4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
246       5 tfX T1_SIGMA_5            T1_LUT_SIG_E
247       6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
248       7 tfX T1_SIGMA_7            T1_LUT_SIG_S
249     */
250
251     OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
252                                          T1_SIGMA_7);
253
254     lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
255     lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
256     if (ci == 0U) {
257         lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
258     } else {
259         lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
260     }
261     lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
262     return lu;
263 }
264
265 static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
266 {
267     return lut_ctxno_sc[lu];
268 }
269
270 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
271 {
272     OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
273     OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
274     return tmp2;
275 }
276
277 static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
278 {
279     return lut_spb[lu];
280 }
281
282 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
283 {
284     if (bitpos > 0) {
285         return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
286     }
287
288     return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
289 }
290
291 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
292 {
293     if (bitpos > 0) {
294         return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
295     }
296
297     return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
298 }
299
300 #define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
301 { \
302     /* east */ \
303     flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
304  \
305     /* mark target as significant */ \
306     flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
307  \
308     /* west */ \
309     flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
310  \
311     /* north-west, north, north-east */ \
312     if (ci == 0U && !(vsc)) { \
313         opj_flag_t* north = flagsp - (stride); \
314         *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
315         north[-1] |= T1_SIGMA_17; \
316         north[1] |= T1_SIGMA_15; \
317     } \
318  \
319     /* south-west, south, south-east */ \
320     if (ci == 3U) { \
321         opj_flag_t* south = flagsp + (stride); \
322         *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
323         south[-1] |= T1_SIGMA_2; \
324         south[1] |= T1_SIGMA_0; \
325     } \
326 }
327
328
329 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
330                                        OPJ_UINT32 s, OPJ_UINT32 stride,
331                                        OPJ_UINT32 vsc)
332 {
333     opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
334 }
335
336 /**
337 Encode significant pass
338 */
339 #define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \
340 { \
341     OPJ_UINT32 v; \
342     const OPJ_UINT32 ci = (ciIn); \
343     const OPJ_UINT32 vsc = (vscIn); \
344     const OPJ_INT32* l_datap = (datapIn); \
345     opj_flag_t* flagsp = (flagspIn); \
346     OPJ_UINT32 const flags = *flagsp; \
347     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
348             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
349         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
350         v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
351 /* #ifdef DEBUG_ENC_SIG */ \
352 /*        fprintf(stderr, "   ctxt1=%d\n", ctxt1); */ \
353 /* #endif */ \
354         opj_t1_setcurctx(curctx, ctxt1); \
355         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
356             opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
357         } else { \
358             opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
359         } \
360         if (v) { \
361             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
362                                 *flagsp, \
363                                 flagsp[-1], flagsp[1], \
364                                 ci); \
365             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
366             v = opj_smr_sign(*l_datap); \
367             *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
368                                               (OPJ_UINT32)bpno); \
369 /* #ifdef DEBUG_ENC_SIG */ \
370 /*            fprintf(stderr, "   ctxt2=%d\n", ctxt2); */ \
371 /* #endif */ \
372             opj_t1_setcurctx(curctx, ctxt2); \
373             if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
374                 opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
375             } else { \
376                 OPJ_UINT32 spb = opj_t1_getspb(lu); \
377 /* #ifdef DEBUG_ENC_SIG */ \
378 /*                fprintf(stderr, "   spb=%d\n", spb); */ \
379 /* #endif */ \
380                 opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
381             } \
382             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \
383         } \
384         *flagsp |= T1_PI_THIS << (ci * 3U); \
385     } \
386 }
387
388 static INLINE void opj_t1_dec_sigpass_step_raw(
389     opj_t1_t *t1,
390     opj_flag_t *flagsp,
391     OPJ_INT32 *datap,
392     OPJ_INT32 oneplushalf,
393     OPJ_UINT32 vsc,
394     OPJ_UINT32 ci)
395 {
396     OPJ_UINT32 v;
397     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
398
399     OPJ_UINT32 const flags = *flagsp;
400
401     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
402             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
403         if (opj_mqc_raw_decode(mqc)) {
404             v = opj_mqc_raw_decode(mqc);
405             *datap = v ? -oneplushalf : oneplushalf;
406             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
407         }
408         *flagsp |= T1_PI_THIS << (ci * 3U);
409     }
410 }
411
412 #define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
413                                           data_stride, ci, mqc, curctx, \
414                                           v, a, c, ct, oneplushalf, vsc) \
415 { \
416     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
417         (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
418         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
419         opj_t1_setcurctx(curctx, ctxt1); \
420         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
421         if (v) { \
422             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
423                                 flags, \
424                                 flagsp[-1], flagsp[1], \
425                                 ci); \
426             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
427             OPJ_UINT32 spb = opj_t1_getspb(lu); \
428             opj_t1_setcurctx(curctx, ctxt2); \
429             opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
430             v = v ^ spb; \
431             data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
432             opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
433         } \
434         flags |= T1_PI_THIS << (ci * 3U); \
435     } \
436 }
437
438 static INLINE void opj_t1_dec_sigpass_step_mqc(
439     opj_t1_t *t1,
440     opj_flag_t *flagsp,
441     OPJ_INT32 *datap,
442     OPJ_INT32 oneplushalf,
443     OPJ_UINT32 ci,
444     OPJ_UINT32 flags_stride,
445     OPJ_UINT32 vsc)
446 {
447     OPJ_UINT32 v;
448
449     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
450     opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
451                                       0, ci, mqc, mqc->curctx,
452                                       v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
453 }
454
455 static void opj_t1_enc_sigpass(opj_t1_t *t1,
456                                OPJ_INT32 bpno,
457                                OPJ_INT32 *nmsedec,
458                                OPJ_BYTE type,
459                                OPJ_UINT32 cblksty
460                               )
461 {
462     OPJ_UINT32 i, k;
463     OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
464     opj_flag_t* f = &T1_FLAGS(0, 0);
465     OPJ_UINT32 const extra = 2;
466     opj_mqc_t* mqc = &(t1->mqc);
467     DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
468     const OPJ_INT32* datap = t1->data;
469
470     *nmsedec = 0;
471 #ifdef DEBUG_ENC_SIG
472     fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
473 #endif
474     for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
475         const OPJ_UINT32 w = t1->w;
476 #ifdef DEBUG_ENC_SIG
477         fprintf(stderr, " k=%d\n", k);
478 #endif
479         for (i = 0; i < w; ++i, ++f, datap += 4) {
480 #ifdef DEBUG_ENC_SIG
481             fprintf(stderr, " i=%d\n", i);
482 #endif
483             if (*f == 0U) {
484                 /* Nothing to do for any of the 4 data points */
485                 continue;
486             }
487             opj_t1_enc_sigpass_step_macro(
488                 mqc, curctx, a, c, ct,
489                 f,
490                 &datap[0],
491                 bpno,
492                 one,
493                 nmsedec,
494                 type,
495                 0, cblksty & J2K_CCP_CBLKSTY_VSC);
496             opj_t1_enc_sigpass_step_macro(
497                 mqc, curctx, a, c, ct,
498                 f,
499                 &datap[1],
500                 bpno,
501                 one,
502                 nmsedec,
503                 type,
504                 1, 0);
505             opj_t1_enc_sigpass_step_macro(
506                 mqc, curctx, a, c, ct,
507                 f,
508                 &datap[2],
509                 bpno,
510                 one,
511                 nmsedec,
512                 type,
513                 2, 0);
514             opj_t1_enc_sigpass_step_macro(
515                 mqc, curctx, a, c, ct,
516                 f,
517                 &datap[3],
518                 bpno,
519                 one,
520                 nmsedec,
521                 type,
522                 3, 0);
523         }
524     }
525
526     if (k < t1->h) {
527         OPJ_UINT32 j;
528 #ifdef DEBUG_ENC_SIG
529         fprintf(stderr, " k=%d\n", k);
530 #endif
531         for (i = 0; i < t1->w; ++i, ++f) {
532 #ifdef DEBUG_ENC_SIG
533             fprintf(stderr, " i=%d\n", i);
534 #endif
535             if (*f == 0U) {
536                 /* Nothing to do for any of the 4 data points */
537                 datap += (t1->h - k);
538                 continue;
539             }
540             for (j = k; j < t1->h; ++j, ++datap) {
541                 opj_t1_enc_sigpass_step_macro(
542                     mqc, curctx, a, c, ct,
543                     f,
544                     &datap[0],
545                     bpno,
546                     one,
547                     nmsedec,
548                     type,
549                     j - k,
550                     (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
551             }
552         }
553     }
554
555     UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
556 }
557
558 static void opj_t1_dec_sigpass_raw(
559     opj_t1_t *t1,
560     OPJ_INT32 bpno,
561     OPJ_INT32 cblksty)
562 {
563     OPJ_INT32 one, half, oneplushalf;
564     OPJ_UINT32 i, j, k;
565     OPJ_INT32 *data = t1->data;
566     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
567     const OPJ_UINT32 l_w = t1->w;
568     one = 1 << bpno;
569     half = one >> 1;
570     oneplushalf = one | half;
571
572     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
573         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
574             opj_flag_t flags = *flagsp;
575             if (flags != 0) {
576                 opj_t1_dec_sigpass_step_raw(
577                     t1,
578                     flagsp,
579                     data,
580                     oneplushalf,
581                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
582                     0U);
583                 opj_t1_dec_sigpass_step_raw(
584                     t1,
585                     flagsp,
586                     data + l_w,
587                     oneplushalf,
588                     OPJ_FALSE, /* vsc */
589                     1U);
590                 opj_t1_dec_sigpass_step_raw(
591                     t1,
592                     flagsp,
593                     data + 2 * l_w,
594                     oneplushalf,
595                     OPJ_FALSE, /* vsc */
596                     2U);
597                 opj_t1_dec_sigpass_step_raw(
598                     t1,
599                     flagsp,
600                     data + 3 * l_w,
601                     oneplushalf,
602                     OPJ_FALSE, /* vsc */
603                     3U);
604             }
605         }
606     }
607     if (k < t1->h) {
608         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
609             for (j = 0; j < t1->h - k; ++j) {
610                 opj_t1_dec_sigpass_step_raw(
611                     t1,
612                     flagsp,
613                     data + j * l_w,
614                     oneplushalf,
615                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
616                     j);
617             }
618         }
619     }
620 }
621
622 #define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
623 { \
624         OPJ_INT32 one, half, oneplushalf; \
625         OPJ_UINT32 i, j, k; \
626         register OPJ_INT32 *data = t1->data; \
627         register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
628         const OPJ_UINT32 l_w = w; \
629         opj_mqc_t* mqc = &(t1->mqc); \
630         DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
631         register OPJ_UINT32 v; \
632         one = 1 << bpno; \
633         half = one >> 1; \
634         oneplushalf = one | half; \
635         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
636                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
637                         opj_flag_t flags = *flagsp; \
638                         if( flags != 0 ) { \
639                             opj_t1_dec_sigpass_step_mqc_macro( \
640                                 flags, flagsp, flags_stride, data, \
641                                 l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
642                             opj_t1_dec_sigpass_step_mqc_macro( \
643                                 flags, flagsp, flags_stride, data, \
644                                 l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
645                             opj_t1_dec_sigpass_step_mqc_macro( \
646                                 flags, flagsp, flags_stride, data, \
647                                 l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
648                             opj_t1_dec_sigpass_step_mqc_macro( \
649                                 flags, flagsp, flags_stride, data, \
650                                 l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
651                             *flagsp = flags; \
652                         } \
653                 } \
654         } \
655         UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
656         if( k < h ) { \
657             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
658                 for (j = 0; j < h - k; ++j) { \
659                         opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
660                             data + j * l_w, oneplushalf, j, flags_stride, vsc); \
661                 } \
662             } \
663         } \
664 }
665
666 static void opj_t1_dec_sigpass_mqc_64x64_novsc(
667     opj_t1_t *t1,
668     OPJ_INT32 bpno)
669 {
670     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
671 }
672
673 static void opj_t1_dec_sigpass_mqc_64x64_vsc(
674     opj_t1_t *t1,
675     OPJ_INT32 bpno)
676 {
677     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
678 }
679
680 static void opj_t1_dec_sigpass_mqc_generic_novsc(
681     opj_t1_t *t1,
682     OPJ_INT32 bpno)
683 {
684     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
685                                     t1->w + 2U);
686 }
687
688 static void opj_t1_dec_sigpass_mqc_generic_vsc(
689     opj_t1_t *t1,
690     OPJ_INT32 bpno)
691 {
692     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
693                                     t1->w + 2U);
694 }
695
696 static void opj_t1_dec_sigpass_mqc(
697     opj_t1_t *t1,
698     OPJ_INT32 bpno,
699     OPJ_INT32 cblksty)
700 {
701     if (t1->w == 64 && t1->h == 64) {
702         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
703             opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
704         } else {
705             opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
706         }
707     } else {
708         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
709             opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
710         } else {
711             opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
712         }
713     }
714 }
715
716 /**
717 Encode refinement pass step
718 */
719 #define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \
720 {\
721     OPJ_UINT32 v; \
722     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \
723         const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \
724         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \
725         OPJ_UINT32 abs_data = opj_smr_abs(*datap); \
726         *nmsedec += opj_t1_getnmsedec_ref(abs_data, \
727                                           (OPJ_UINT32)bpno); \
728         v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \
729 /* #ifdef DEBUG_ENC_REF */ \
730 /*        fprintf(stderr, "  ctxt=%d\n", ctxt); */ \
731 /* #endif */ \
732         opj_t1_setcurctx(curctx, ctxt); \
733         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
734             opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
735         } else { \
736             opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
737         } \
738         flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \
739     } \
740 }
741
742
743 static INLINE void opj_t1_dec_refpass_step_raw(
744     opj_t1_t *t1,
745     opj_flag_t *flagsp,
746     OPJ_INT32 *datap,
747     OPJ_INT32 poshalf,
748     OPJ_UINT32 ci)
749 {
750     OPJ_UINT32 v;
751
752     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
753
754     if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
755             (T1_SIGMA_THIS << (ci * 3U))) {
756         v = opj_mqc_raw_decode(mqc);
757         *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
758         *flagsp |= T1_MU_THIS << (ci * 3U);
759     }
760 }
761
762 #define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
763                                           mqc, curctx, v, a, c, ct, poshalf) \
764 { \
765     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
766             (T1_SIGMA_THIS << (ci * 3U))) { \
767         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
768         opj_t1_setcurctx(curctx, ctxt); \
769         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
770         data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
771         flags |= T1_MU_THIS << (ci * 3U); \
772     } \
773 }
774
775 static INLINE void opj_t1_dec_refpass_step_mqc(
776     opj_t1_t *t1,
777     opj_flag_t *flagsp,
778     OPJ_INT32 *datap,
779     OPJ_INT32 poshalf,
780     OPJ_UINT32 ci)
781 {
782     OPJ_UINT32 v;
783
784     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
785     opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
786                                       mqc, mqc->curctx, v, mqc->a, mqc->c,
787                                       mqc->ct, poshalf);
788 }
789
790 static void opj_t1_enc_refpass(
791     opj_t1_t *t1,
792     OPJ_INT32 bpno,
793     OPJ_INT32 *nmsedec,
794     OPJ_BYTE type)
795 {
796     OPJ_UINT32 i, k;
797     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
798     opj_flag_t* f = &T1_FLAGS(0, 0);
799     const OPJ_UINT32 extra = 2U;
800     opj_mqc_t* mqc = &(t1->mqc);
801     DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
802     const OPJ_INT32* datap = t1->data;
803
804     *nmsedec = 0;
805 #ifdef DEBUG_ENC_REF
806     fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
807 #endif
808     for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
809 #ifdef DEBUG_ENC_REF
810         fprintf(stderr, " k=%d\n", k);
811 #endif
812         for (i = 0; i < t1->w; ++i, f++, datap += 4) {
813             const OPJ_UINT32 flags = *f;
814             OPJ_UINT32 flagsUpdated = flags;
815 #ifdef DEBUG_ENC_REF
816             fprintf(stderr, " i=%d\n", i);
817 #endif
818             if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
819                 /* none significant */
820                 continue;
821             }
822             if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
823                     (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
824                 /* all processed by sigpass */
825                 continue;
826             }
827
828             opj_t1_enc_refpass_step_macro(
829                 mqc, curctx, a, c, ct,
830                 flags, flagsUpdated,
831                 &datap[0],
832                 bpno,
833                 one,
834                 nmsedec,
835                 type,
836                 0);
837             opj_t1_enc_refpass_step_macro(
838                 mqc, curctx, a, c, ct,
839                 flags, flagsUpdated,
840                 &datap[1],
841                 bpno,
842                 one,
843                 nmsedec,
844                 type,
845                 1);
846             opj_t1_enc_refpass_step_macro(
847                 mqc, curctx, a, c, ct,
848                 flags, flagsUpdated,
849                 &datap[2],
850                 bpno,
851                 one,
852                 nmsedec,
853                 type,
854                 2);
855             opj_t1_enc_refpass_step_macro(
856                 mqc, curctx, a, c, ct,
857                 flags, flagsUpdated,
858                 &datap[3],
859                 bpno,
860                 one,
861                 nmsedec,
862                 type,
863                 3);
864             *f = flagsUpdated;
865         }
866     }
867
868     if (k < t1->h) {
869         OPJ_UINT32 j;
870         const OPJ_UINT32 remaining_lines = t1->h - k;
871 #ifdef DEBUG_ENC_REF
872         fprintf(stderr, " k=%d\n", k);
873 #endif
874         for (i = 0; i < t1->w; ++i, ++f) {
875 #ifdef DEBUG_ENC_REF
876             fprintf(stderr, " i=%d\n", i);
877 #endif
878             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
879                 /* none significant */
880                 datap += remaining_lines;
881                 continue;
882             }
883             for (j = 0; j < remaining_lines; ++j, datap ++) {
884                 opj_t1_enc_refpass_step_macro(
885                     mqc, curctx, a, c, ct,
886                     *f, *f,
887                     &datap[0],
888                     bpno,
889                     one,
890                     nmsedec,
891                     type,
892                     j);
893             }
894         }
895     }
896
897     UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
898 }
899
900
901 static void opj_t1_dec_refpass_raw(
902     opj_t1_t *t1,
903     OPJ_INT32 bpno)
904 {
905     OPJ_INT32 one, poshalf;
906     OPJ_UINT32 i, j, k;
907     OPJ_INT32 *data = t1->data;
908     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
909     const OPJ_UINT32 l_w = t1->w;
910     one = 1 << bpno;
911     poshalf = one >> 1;
912     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
913         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
914             opj_flag_t flags = *flagsp;
915             if (flags != 0) {
916                 opj_t1_dec_refpass_step_raw(
917                     t1,
918                     flagsp,
919                     data,
920                     poshalf,
921                     0U);
922                 opj_t1_dec_refpass_step_raw(
923                     t1,
924                     flagsp,
925                     data + l_w,
926                     poshalf,
927                     1U);
928                 opj_t1_dec_refpass_step_raw(
929                     t1,
930                     flagsp,
931                     data + 2 * l_w,
932                     poshalf,
933                     2U);
934                 opj_t1_dec_refpass_step_raw(
935                     t1,
936                     flagsp,
937                     data + 3 * l_w,
938                     poshalf,
939                     3U);
940             }
941         }
942     }
943     if (k < t1->h) {
944         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
945             for (j = 0; j < t1->h - k; ++j) {
946                 opj_t1_dec_refpass_step_raw(
947                     t1,
948                     flagsp,
949                     data + j * l_w,
950                     poshalf,
951                     j);
952             }
953         }
954     }
955 }
956
957 #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
958 { \
959         OPJ_INT32 one, poshalf; \
960         OPJ_UINT32 i, j, k; \
961         register OPJ_INT32 *data = t1->data; \
962         register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
963         const OPJ_UINT32 l_w = w; \
964         opj_mqc_t* mqc = &(t1->mqc); \
965         DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
966         register OPJ_UINT32 v; \
967         one = 1 << bpno; \
968         poshalf = one >> 1; \
969         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
970                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
971                         opj_flag_t flags = *flagsp; \
972                         if( flags != 0 ) { \
973                             opj_t1_dec_refpass_step_mqc_macro( \
974                                 flags, data, l_w, 0, \
975                                 mqc, curctx, v, a, c, ct, poshalf); \
976                             opj_t1_dec_refpass_step_mqc_macro( \
977                                 flags, data, l_w, 1, \
978                                 mqc, curctx, v, a, c, ct, poshalf); \
979                             opj_t1_dec_refpass_step_mqc_macro( \
980                                 flags, data, l_w, 2, \
981                                 mqc, curctx, v, a, c, ct, poshalf); \
982                             opj_t1_dec_refpass_step_mqc_macro( \
983                                 flags, data, l_w, 3, \
984                                 mqc, curctx, v, a, c, ct, poshalf); \
985                             *flagsp = flags; \
986                         } \
987                 } \
988         } \
989         UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
990         if( k < h ) { \
991             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
992                 for (j = 0; j < h - k; ++j) { \
993                         opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
994                 } \
995             } \
996         } \
997 }
998
999 static void opj_t1_dec_refpass_mqc_64x64(
1000     opj_t1_t *t1,
1001     OPJ_INT32 bpno)
1002 {
1003     opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
1004 }
1005
1006 static void opj_t1_dec_refpass_mqc_generic(
1007     opj_t1_t *t1,
1008     OPJ_INT32 bpno)
1009 {
1010     opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
1011 }
1012
1013 static void opj_t1_dec_refpass_mqc(
1014     opj_t1_t *t1,
1015     OPJ_INT32 bpno)
1016 {
1017     if (t1->w == 64 && t1->h == 64) {
1018         opj_t1_dec_refpass_mqc_64x64(t1, bpno);
1019     } else {
1020         opj_t1_dec_refpass_mqc_generic(t1, bpno);
1021     }
1022 }
1023
1024 /**
1025 Encode clean-up pass step
1026 */
1027 #define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \
1028 { \
1029     OPJ_UINT32 v; \
1030     OPJ_UINT32 ci; \
1031     opj_flag_t* const flagsp = (flagspIn); \
1032     const OPJ_INT32* l_datap = (datapIn); \
1033     const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \
1034                               T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1035  \
1036     if ((*flagsp & check) == check) { \
1037         if (runlen == 0) { \
1038             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1039         } else if (runlen == 1) { \
1040             *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \
1041         } else if (runlen == 2) { \
1042             *flagsp &= ~(T1_PI_2 | T1_PI_3); \
1043         } else if (runlen == 3) { \
1044             *flagsp &= ~(T1_PI_3); \
1045         } \
1046     } \
1047     else \
1048     for (ci = runlen; ci < lim; ++ci) { \
1049         OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \
1050         if ((agg != 0) && (ci == runlen)) { \
1051             goto_PARTIAL = OPJ_TRUE; \
1052         } \
1053         else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \
1054             OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \
1055 /* #ifdef DEBUG_ENC_CLN */ \
1056 /*            printf("   ctxt1=%d\n", ctxt1); */ \
1057 /* #endif */ \
1058             opj_t1_setcurctx(curctx, ctxt1); \
1059             v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
1060             opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
1061             if (v) { \
1062                 goto_PARTIAL = OPJ_TRUE; \
1063             } \
1064         } \
1065         if( goto_PARTIAL ) { \
1066             OPJ_UINT32 vsc; \
1067             OPJ_UINT32 ctxt2, spb; \
1068             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1069                         *flagsp, \
1070                         flagsp[-1], flagsp[1], \
1071                         ci); \
1072             *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
1073                                                 (OPJ_UINT32)bpno); \
1074             ctxt2 = opj_t1_getctxno_sc(lu); \
1075 /* #ifdef DEBUG_ENC_CLN */ \
1076 /*           printf("   ctxt2=%d\n", ctxt2); */ \
1077 /* #endif */ \
1078             opj_t1_setcurctx(curctx, ctxt2); \
1079  \
1080             v = opj_smr_sign(*l_datap); \
1081             spb = opj_t1_getspb(lu); \
1082 /* #ifdef DEBUG_ENC_CLN */ \
1083 /*           printf("   spb=%d\n", spb); */\
1084 /* #endif */ \
1085             opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
1086             vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \
1087             opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \
1088         } \
1089         *flagsp &= ~(T1_PI_THIS << (3U * ci)); \
1090         l_datap ++; \
1091     } \
1092 }
1093
1094 #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
1095                                       flags, flagsp, flags_stride, data, \
1096                                       data_stride, ci, mqc, curctx, \
1097                                       v, a, c, ct, oneplushalf, vsc) \
1098 { \
1099     if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
1100         do { \
1101             if( !partial ) { \
1102                 OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
1103                 opj_t1_setcurctx(curctx, ctxt1); \
1104                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1105                 if( !v ) \
1106                     break; \
1107             } \
1108             { \
1109                 OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1110                                     flags, flagsp[-1], flagsp[1], \
1111                                     ci); \
1112                 opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
1113                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1114                 v = v ^ opj_t1_getspb(lu); \
1115                 data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
1116                 opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
1117             } \
1118         } while(0); \
1119     } \
1120 }
1121
1122 static void opj_t1_dec_clnpass_step(
1123     opj_t1_t *t1,
1124     opj_flag_t *flagsp,
1125     OPJ_INT32 *datap,
1126     OPJ_INT32 oneplushalf,
1127     OPJ_UINT32 ci,
1128     OPJ_UINT32 vsc)
1129 {
1130     OPJ_UINT32 v;
1131
1132     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1133     opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
1134                                   *flagsp, flagsp, t1->w + 2U, datap,
1135                                   0, ci, mqc, mqc->curctx,
1136                                   v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
1137 }
1138
1139 static void opj_t1_enc_clnpass(
1140     opj_t1_t *t1,
1141     OPJ_INT32 bpno,
1142     OPJ_INT32 *nmsedec,
1143     OPJ_UINT32 cblksty)
1144 {
1145     OPJ_UINT32 i, k;
1146     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
1147     opj_mqc_t* mqc = &(t1->mqc);
1148     DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1149     const OPJ_INT32* datap = t1->data;
1150     opj_flag_t *f = &T1_FLAGS(0, 0);
1151     const OPJ_UINT32 extra = 2U;
1152
1153     *nmsedec = 0;
1154 #ifdef DEBUG_ENC_CLN
1155     printf("enc_clnpass: bpno=%d\n", bpno);
1156 #endif
1157     for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
1158 #ifdef DEBUG_ENC_CLN
1159         printf(" k=%d\n", k);
1160 #endif
1161         for (i = 0; i < t1->w; ++i, f++) {
1162             OPJ_UINT32 agg, runlen;
1163 #ifdef DEBUG_ENC_CLN
1164             printf("  i=%d\n", i);
1165 #endif
1166             agg = !*f;
1167 #ifdef DEBUG_ENC_CLN
1168             printf("   agg=%d\n", agg);
1169 #endif
1170             if (agg) {
1171                 for (runlen = 0; runlen < 4; ++runlen, ++datap) {
1172                     if (opj_smr_abs(*datap) & (OPJ_UINT32)one) {
1173                         break;
1174                     }
1175                 }
1176                 opj_t1_setcurctx(curctx, T1_CTXNO_AGG);
1177                 opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4);
1178                 if (runlen == 4) {
1179                     continue;
1180                 }
1181                 opj_t1_setcurctx(curctx, T1_CTXNO_UNI);
1182                 opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1);
1183                 opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1);
1184             } else {
1185                 runlen = 0;
1186             }
1187             opj_t1_enc_clnpass_step_macro(
1188                 mqc, curctx, a, c, ct,
1189                 f,
1190                 datap,
1191                 bpno,
1192                 one,
1193                 nmsedec,
1194                 agg,
1195                 runlen,
1196                 4U,
1197                 cblksty);
1198             datap += 4 - runlen;
1199         }
1200     }
1201     if (k < t1->h) {
1202         const OPJ_UINT32 agg = 0;
1203         const OPJ_UINT32 runlen = 0;
1204 #ifdef DEBUG_ENC_CLN
1205         printf(" k=%d\n", k);
1206 #endif
1207         for (i = 0; i < t1->w; ++i, f++) {
1208 #ifdef DEBUG_ENC_CLN
1209             printf("  i=%d\n", i);
1210             printf("   agg=%d\n", agg);
1211 #endif
1212             opj_t1_enc_clnpass_step_macro(
1213                 mqc, curctx, a, c, ct,
1214                 f,
1215                 datap,
1216                 bpno,
1217                 one,
1218                 nmsedec,
1219                 agg,
1220                 runlen,
1221                 t1->h - k,
1222                 cblksty);
1223             datap += t1->h - k;
1224         }
1225     }
1226
1227     UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
1228 }
1229
1230 #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
1231 { \
1232     OPJ_INT32 one, half, oneplushalf; \
1233     OPJ_UINT32 runlen; \
1234     OPJ_UINT32 i, j, k; \
1235     const OPJ_UINT32 l_w = w; \
1236     opj_mqc_t* mqc = &(t1->mqc); \
1237     register OPJ_INT32 *data = t1->data; \
1238     register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
1239     DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1240     register OPJ_UINT32 v; \
1241     one = 1 << bpno; \
1242     half = one >> 1; \
1243     oneplushalf = one | half; \
1244     for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
1245         for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1246             opj_flag_t flags = *flagsp; \
1247             if (flags == 0) { \
1248                 OPJ_UINT32 partial = OPJ_TRUE; \
1249                 opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
1250                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1251                 if (!v) { \
1252                     continue; \
1253                 } \
1254                 opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
1255                 opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
1256                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1257                 runlen = (runlen << 1) | v; \
1258                 switch(runlen) { \
1259                     case 0: \
1260                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
1261                                             flags, flagsp, flags_stride, data, \
1262                                             l_w, 0, mqc, curctx, \
1263                                             v, a, c, ct, oneplushalf, vsc); \
1264                         partial = OPJ_FALSE; \
1265                         /* FALLTHRU */ \
1266                     case 1: \
1267                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1268                                             flags, flagsp, flags_stride, data, \
1269                                             l_w, 1, mqc, curctx, \
1270                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1271                         partial = OPJ_FALSE; \
1272                         /* FALLTHRU */ \
1273                     case 2: \
1274                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1275                                             flags, flagsp, flags_stride, data, \
1276                                             l_w, 2, mqc, curctx, \
1277                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1278                         partial = OPJ_FALSE; \
1279                         /* FALLTHRU */ \
1280                     case 3: \
1281                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1282                                             flags, flagsp, flags_stride, data, \
1283                                             l_w, 3, mqc, curctx, \
1284                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1285                         break; \
1286                 } \
1287             } else { \
1288                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1289                                     flags, flagsp, flags_stride, data, \
1290                                     l_w, 0, mqc, curctx, \
1291                                     v, a, c, ct, oneplushalf, vsc); \
1292                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1293                                     flags, flagsp, flags_stride, data, \
1294                                     l_w, 1, mqc, curctx, \
1295                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1296                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1297                                     flags, flagsp, flags_stride, data, \
1298                                     l_w, 2, mqc, curctx, \
1299                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1300                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1301                                     flags, flagsp, flags_stride, data, \
1302                                     l_w, 3, mqc, curctx, \
1303                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1304             } \
1305             *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1306         } \
1307     } \
1308     UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
1309     if( k < h ) { \
1310         for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
1311             for (j = 0; j < h - k; ++j) { \
1312                 opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
1313             } \
1314             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1315         } \
1316     } \
1317 }
1318
1319 static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
1320 {
1321     if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
1322         opj_mqc_t* mqc = &(t1->mqc);
1323         OPJ_UINT32 v, v2;
1324         opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1325         opj_mqc_decode(v, mqc);
1326         opj_mqc_decode(v2, mqc);
1327         v = (v << 1) | v2;
1328         opj_mqc_decode(v2, mqc);
1329         v = (v << 1) | v2;
1330         opj_mqc_decode(v2, mqc);
1331         v = (v << 1) | v2;
1332         /*
1333         if (v!=0xa) {
1334             opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
1335         }
1336         */
1337     }
1338 }
1339
1340 static void opj_t1_dec_clnpass_64x64_novsc(
1341     opj_t1_t *t1,
1342     OPJ_INT32 bpno)
1343 {
1344     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
1345 }
1346
1347 static void opj_t1_dec_clnpass_64x64_vsc(
1348     opj_t1_t *t1,
1349     OPJ_INT32 bpno)
1350 {
1351     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
1352 }
1353
1354 static void opj_t1_dec_clnpass_generic_novsc(
1355     opj_t1_t *t1,
1356     OPJ_INT32 bpno)
1357 {
1358     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
1359                                 t1->w + 2U);
1360 }
1361
1362 static void opj_t1_dec_clnpass_generic_vsc(
1363     opj_t1_t *t1,
1364     OPJ_INT32 bpno)
1365 {
1366     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
1367                                 t1->w + 2U);
1368 }
1369
1370 static void opj_t1_dec_clnpass(
1371     opj_t1_t *t1,
1372     OPJ_INT32 bpno,
1373     OPJ_INT32 cblksty)
1374 {
1375     if (t1->w == 64 && t1->h == 64) {
1376         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1377             opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
1378         } else {
1379             opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
1380         }
1381     } else {
1382         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1383             opj_t1_dec_clnpass_generic_vsc(t1, bpno);
1384         } else {
1385             opj_t1_dec_clnpass_generic_novsc(t1, bpno);
1386         }
1387     }
1388     opj_t1_dec_clnpass_check_segsym(t1, cblksty);
1389 }
1390
1391
1392 /** mod fixed_quality */
1393 static OPJ_FLOAT64 opj_t1_getwmsedec(
1394     OPJ_INT32 nmsedec,
1395     OPJ_UINT32 compno,
1396     OPJ_UINT32 level,
1397     OPJ_UINT32 orient,
1398     OPJ_INT32 bpno,
1399     OPJ_UINT32 qmfbid,
1400     OPJ_FLOAT64 stepsize,
1401     OPJ_UINT32 numcomps,
1402     const OPJ_FLOAT64 * mct_norms,
1403     OPJ_UINT32 mct_numcomps)
1404 {
1405     OPJ_FLOAT64 w1 = 1, w2, wmsedec;
1406     OPJ_ARG_NOT_USED(numcomps);
1407
1408     if (mct_norms && (compno < mct_numcomps)) {
1409         w1 = mct_norms[compno];
1410     }
1411
1412     if (qmfbid == 1) {
1413         w2 = opj_dwt_getnorm(level, orient);
1414     } else {    /* if (qmfbid == 0) */
1415         const OPJ_INT32 log2_gain = (orient == 0) ? 0 :
1416                                     (orient == 3) ? 2 : 1;
1417         w2 = opj_dwt_getnorm_real(level, orient);
1418         /* Not sure this is right. But preserves past behaviour */
1419         stepsize /= (1 << log2_gain);
1420     }
1421
1422     wmsedec = w1 * w2 * stepsize * (1 << bpno);
1423     wmsedec *= wmsedec * nmsedec / 8192.0;
1424
1425     return wmsedec;
1426 }
1427
1428 static OPJ_BOOL opj_t1_allocate_buffers(
1429     opj_t1_t *t1,
1430     OPJ_UINT32 w,
1431     OPJ_UINT32 h)
1432 {
1433     OPJ_UINT32 flagssize;
1434     OPJ_UINT32 flags_stride;
1435
1436     /* No risk of overflow. Prior checks ensure those assert are met */
1437     /* They are per the specification */
1438     assert(w <= 1024);
1439     assert(h <= 1024);
1440     assert(w * h <= 4096);
1441
1442     /* encoder uses tile buffer, so no need to allocate */
1443     {
1444         OPJ_UINT32 datasize = w * h;
1445
1446         if (datasize > t1->datasize) {
1447             opj_aligned_free(t1->data);
1448             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
1449             if (!t1->data) {
1450                 /* FIXME event manager error callback */
1451                 return OPJ_FALSE;
1452             }
1453             t1->datasize = datasize;
1454         }
1455         /* memset first arg is declared to never be null by gcc */
1456         if (t1->data != NULL) {
1457             memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
1458         }
1459     }
1460
1461     flags_stride = w + 2U; /* can't be 0U */
1462
1463     flagssize = (h + 3U) / 4U + 2U;
1464
1465     flagssize *= flags_stride;
1466     {
1467         opj_flag_t* p;
1468         OPJ_UINT32 x;
1469         OPJ_UINT32 flags_height = (h + 3U) / 4U;
1470
1471         if (flagssize > t1->flagssize) {
1472
1473             opj_aligned_free(t1->flags);
1474             t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
1475                             opj_flag_t));
1476             if (!t1->flags) {
1477                 /* FIXME event manager error callback */
1478                 return OPJ_FALSE;
1479             }
1480         }
1481         t1->flagssize = flagssize;
1482
1483         memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
1484
1485         p = &t1->flags[0];
1486         for (x = 0; x < flags_stride; ++x) {
1487             /* magic value to hopefully stop any passes being interested in this entry */
1488             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1489         }
1490
1491         p = &t1->flags[((flags_height + 1) * flags_stride)];
1492         for (x = 0; x < flags_stride; ++x) {
1493             /* magic value to hopefully stop any passes being interested in this entry */
1494             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1495         }
1496
1497         if (h % 4) {
1498             OPJ_UINT32 v = 0;
1499             p = &t1->flags[((flags_height) * flags_stride)];
1500             if (h % 4 == 1) {
1501                 v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
1502             } else if (h % 4 == 2) {
1503                 v |= T1_PI_2 | T1_PI_3;
1504             } else if (h % 4 == 3) {
1505                 v |= T1_PI_3;
1506             }
1507             for (x = 0; x < flags_stride; ++x) {
1508                 *p++ = v;
1509             }
1510         }
1511     }
1512
1513     t1->w = w;
1514     t1->h = h;
1515
1516     return OPJ_TRUE;
1517 }
1518
1519 /* ----------------------------------------------------------------------- */
1520
1521 /* ----------------------------------------------------------------------- */
1522 /**
1523  * Creates a new Tier 1 handle
1524  * and initializes the look-up tables of the Tier-1 coder/decoder
1525  * @return a new T1 handle if successful, returns NULL otherwise
1526 */
1527 opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
1528 {
1529     opj_t1_t *l_t1 = 00;
1530
1531     l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
1532     if (!l_t1) {
1533         return 00;
1534     }
1535
1536     l_t1->encoder = isEncoder;
1537
1538     return l_t1;
1539 }
1540
1541
1542 /**
1543  * Destroys a previously created T1 handle
1544  *
1545  * @param p_t1 Tier 1 handle to destroy
1546 */
1547 void opj_t1_destroy(opj_t1_t *p_t1)
1548 {
1549     if (! p_t1) {
1550         return;
1551     }
1552
1553     if (p_t1->data) {
1554         opj_aligned_free(p_t1->data);
1555         p_t1->data = 00;
1556     }
1557
1558     if (p_t1->flags) {
1559         opj_aligned_free(p_t1->flags);
1560         p_t1->flags = 00;
1561     }
1562
1563     opj_free(p_t1->cblkdatabuffer);
1564
1565     opj_free(p_t1);
1566 }
1567
1568 typedef struct {
1569     OPJ_BOOL whole_tile_decoding;
1570     OPJ_UINT32 resno;
1571     opj_tcd_cblk_dec_t* cblk;
1572     opj_tcd_band_t* band;
1573     opj_tcd_tilecomp_t* tilec;
1574     opj_tccp_t* tccp;
1575     OPJ_BOOL mustuse_cblkdatabuffer;
1576     volatile OPJ_BOOL* pret;
1577     opj_event_mgr_t *p_manager;
1578     opj_mutex_t* p_manager_mutex;
1579     OPJ_BOOL check_pterm;
1580 } opj_t1_cblk_decode_processing_job_t;
1581
1582 static void opj_t1_destroy_wrapper(void* t1)
1583 {
1584     opj_t1_destroy((opj_t1_t*) t1);
1585 }
1586
1587 static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
1588 {
1589     opj_tcd_cblk_dec_t* cblk;
1590     opj_tcd_band_t* band;
1591     opj_tcd_tilecomp_t* tilec;
1592     opj_tccp_t* tccp;
1593     OPJ_INT32* OPJ_RESTRICT datap;
1594     OPJ_UINT32 cblk_w, cblk_h;
1595     OPJ_INT32 x, y;
1596     OPJ_UINT32 i, j;
1597     opj_t1_cblk_decode_processing_job_t* job;
1598     opj_t1_t* t1;
1599     OPJ_UINT32 resno;
1600     OPJ_UINT32 tile_w;
1601
1602     job = (opj_t1_cblk_decode_processing_job_t*) user_data;
1603
1604     cblk = job->cblk;
1605
1606     if (!job->whole_tile_decoding) {
1607         cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1608         cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1609
1610         cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
1611                              cblk_w * cblk_h);
1612         if (cblk->decoded_data == NULL) {
1613             if (job->p_manager_mutex) {
1614                 opj_mutex_lock(job->p_manager_mutex);
1615             }
1616             opj_event_msg(job->p_manager, EVT_ERROR,
1617                           "Cannot allocate cblk->decoded_data\n");
1618             if (job->p_manager_mutex) {
1619                 opj_mutex_unlock(job->p_manager_mutex);
1620             }
1621             *(job->pret) = OPJ_FALSE;
1622             opj_free(job);
1623             return;
1624         }
1625         /* Zero-init required */
1626         memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
1627     } else if (cblk->decoded_data) {
1628         /* Not sure if that code path can happen, but better be */
1629         /* safe than sorry */
1630         opj_aligned_free(cblk->decoded_data);
1631         cblk->decoded_data = NULL;
1632     }
1633
1634     resno = job->resno;
1635     band = job->band;
1636     tilec = job->tilec;
1637     tccp = job->tccp;
1638     tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
1639                           -
1640                           tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
1641
1642     if (!*(job->pret)) {
1643         opj_free(job);
1644         return;
1645     }
1646
1647     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
1648     if (t1 == NULL) {
1649         t1 = opj_t1_create(OPJ_FALSE);
1650         opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
1651     }
1652     t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
1653
1654     if (OPJ_FALSE == opj_t1_decode_cblk(
1655                 t1,
1656                 cblk,
1657                 band->bandno,
1658                 (OPJ_UINT32)tccp->roishift,
1659                 tccp->cblksty,
1660                 job->p_manager,
1661                 job->p_manager_mutex,
1662                 job->check_pterm)) {
1663         *(job->pret) = OPJ_FALSE;
1664         opj_free(job);
1665         return;
1666     }
1667
1668     x = cblk->x0 - band->x0;
1669     y = cblk->y0 - band->y0;
1670     if (band->bandno & 1) {
1671         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1672         x += pres->x1 - pres->x0;
1673     }
1674     if (band->bandno & 2) {
1675         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1676         y += pres->y1 - pres->y0;
1677     }
1678
1679     datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
1680     cblk_w = t1->w;
1681     cblk_h = t1->h;
1682
1683     if (tccp->roishift) {
1684         if (tccp->roishift >= 31) {
1685             for (j = 0; j < cblk_h; ++j) {
1686                 for (i = 0; i < cblk_w; ++i) {
1687                     datap[(j * cblk_w) + i] = 0;
1688                 }
1689             }
1690         } else {
1691             OPJ_INT32 thresh = 1 << tccp->roishift;
1692             for (j = 0; j < cblk_h; ++j) {
1693                 for (i = 0; i < cblk_w; ++i) {
1694                     OPJ_INT32 val = datap[(j * cblk_w) + i];
1695                     OPJ_INT32 mag = abs(val);
1696                     if (mag >= thresh) {
1697                         mag >>= tccp->roishift;
1698                         datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
1699                     }
1700                 }
1701             }
1702         }
1703     }
1704
1705     /* Both can be non NULL if for example decoding a full tile and then */
1706     /* partially a tile. In which case partial decoding should be the */
1707     /* priority */
1708     assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
1709
1710     if (cblk->decoded_data) {
1711         OPJ_UINT32 cblk_size = cblk_w * cblk_h;
1712         if (tccp->qmfbid == 1) {
1713             for (i = 0; i < cblk_size; ++i) {
1714                 datap[i] /= 2;
1715             }
1716         } else {        /* if (tccp->qmfbid == 0) */
1717             const float stepsize = 0.5f * band->stepsize;
1718             i = 0;
1719 #ifdef __SSE2__
1720             {
1721                 const __m128 xmm_stepsize = _mm_set1_ps(stepsize);
1722                 for (; i < (cblk_size & ~15U); i += 16) {
1723                     __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1724                                                            datap + 0)));
1725                     __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1726                                                            datap + 4)));
1727                     __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1728                                                            datap + 8)));
1729                     __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1730                                                            datap + 12)));
1731                     _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
1732                     _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
1733                     _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
1734                     _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
1735                     datap += 16;
1736                 }
1737             }
1738 #endif
1739             for (; i < cblk_size; ++i) {
1740                 OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize;
1741                 memcpy(datap, &tmp, sizeof(tmp));
1742                 datap++;
1743             }
1744         }
1745     } else if (tccp->qmfbid == 1) {
1746         OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
1747                                                        (OPJ_SIZE_T)x];
1748         for (j = 0; j < cblk_h; ++j) {
1749             i = 0;
1750             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
1751                 OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
1752                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
1753                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
1754                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
1755                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
1756                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
1757                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
1758                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
1759             }
1760             for (; i < cblk_w; ++i) {
1761                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
1762                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
1763             }
1764         }
1765     } else {        /* if (tccp->qmfbid == 0) */
1766         const float stepsize = 0.5f * band->stepsize;
1767         OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
1768                                                          tile_w + (OPJ_SIZE_T)x];
1769         for (j = 0; j < cblk_h; ++j) {
1770             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
1771             for (i = 0; i < cblk_w; ++i) {
1772                 OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize;
1773                 *tiledp2 = tmp;
1774                 datap++;
1775                 tiledp2++;
1776             }
1777             tiledp += tile_w;
1778         }
1779     }
1780
1781     opj_free(job);
1782 }
1783
1784
1785 void opj_t1_decode_cblks(opj_tcd_t* tcd,
1786                          volatile OPJ_BOOL* pret,
1787                          opj_tcd_tilecomp_t* tilec,
1788                          opj_tccp_t* tccp,
1789                          opj_event_mgr_t *p_manager,
1790                          opj_mutex_t* p_manager_mutex,
1791                          OPJ_BOOL check_pterm
1792                         )
1793 {
1794     opj_thread_pool_t* tp = tcd->thread_pool;
1795     OPJ_UINT32 resno, bandno, precno, cblkno;
1796
1797 #ifdef DEBUG_VERBOSE
1798     OPJ_UINT32 codeblocks_decoded = 0;
1799     printf("Enter opj_t1_decode_cblks()\n");
1800 #endif
1801
1802     for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
1803         opj_tcd_resolution_t* res = &tilec->resolutions[resno];
1804
1805         for (bandno = 0; bandno < res->numbands; ++bandno) {
1806             opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
1807
1808             for (precno = 0; precno < res->pw * res->ph; ++precno) {
1809                 opj_tcd_precinct_t* precinct = &band->precincts[precno];
1810
1811                 if (!opj_tcd_is_subband_area_of_interest(tcd,
1812                         tilec->compno,
1813                         resno,
1814                         band->bandno,
1815                         (OPJ_UINT32)precinct->x0,
1816                         (OPJ_UINT32)precinct->y0,
1817                         (OPJ_UINT32)precinct->x1,
1818                         (OPJ_UINT32)precinct->y1)) {
1819                     for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1820                         opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1821                         if (cblk->decoded_data) {
1822 #ifdef DEBUG_VERBOSE
1823                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1824                                    cblk->x0, cblk->y0, resno, bandno);
1825 #endif
1826                             opj_aligned_free(cblk->decoded_data);
1827                             cblk->decoded_data = NULL;
1828                         }
1829                     }
1830                     continue;
1831                 }
1832
1833                 for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1834                     opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1835                     opj_t1_cblk_decode_processing_job_t* job;
1836
1837                     if (!opj_tcd_is_subband_area_of_interest(tcd,
1838                             tilec->compno,
1839                             resno,
1840                             band->bandno,
1841                             (OPJ_UINT32)cblk->x0,
1842                             (OPJ_UINT32)cblk->y0,
1843                             (OPJ_UINT32)cblk->x1,
1844                             (OPJ_UINT32)cblk->y1)) {
1845                         if (cblk->decoded_data) {
1846 #ifdef DEBUG_VERBOSE
1847                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1848                                    cblk->x0, cblk->y0, resno, bandno);
1849 #endif
1850                             opj_aligned_free(cblk->decoded_data);
1851                             cblk->decoded_data = NULL;
1852                         }
1853                         continue;
1854                     }
1855
1856                     if (!tcd->whole_tile_decoding) {
1857                         OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1858                         OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1859                         if (cblk->decoded_data != NULL) {
1860 #ifdef DEBUG_VERBOSE
1861                             printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
1862                                    cblk->x0, cblk->y0, resno, bandno);
1863 #endif
1864                             continue;
1865                         }
1866                         if (cblk_w == 0 || cblk_h == 0) {
1867                             continue;
1868                         }
1869 #ifdef DEBUG_VERBOSE
1870                         printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
1871                                cblk->x0, cblk->y0, resno, bandno);
1872 #endif
1873                     }
1874
1875                     job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
1876                             sizeof(opj_t1_cblk_decode_processing_job_t));
1877                     if (!job) {
1878                         *pret = OPJ_FALSE;
1879                         return;
1880                     }
1881                     job->whole_tile_decoding = tcd->whole_tile_decoding;
1882                     job->resno = resno;
1883                     job->cblk = cblk;
1884                     job->band = band;
1885                     job->tilec = tilec;
1886                     job->tccp = tccp;
1887                     job->pret = pret;
1888                     job->p_manager_mutex = p_manager_mutex;
1889                     job->p_manager = p_manager;
1890                     job->check_pterm = check_pterm;
1891                     job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
1892                     opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
1893 #ifdef DEBUG_VERBOSE
1894                     codeblocks_decoded ++;
1895 #endif
1896                     if (!(*pret)) {
1897                         return;
1898                     }
1899                 } /* cblkno */
1900             } /* precno */
1901         } /* bandno */
1902     } /* resno */
1903
1904 #ifdef DEBUG_VERBOSE
1905     printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
1906 #endif
1907     return;
1908 }
1909
1910
1911 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
1912                                    opj_tcd_cblk_dec_t* cblk,
1913                                    OPJ_UINT32 orient,
1914                                    OPJ_UINT32 roishift,
1915                                    OPJ_UINT32 cblksty,
1916                                    opj_event_mgr_t *p_manager,
1917                                    opj_mutex_t* p_manager_mutex,
1918                                    OPJ_BOOL check_pterm)
1919 {
1920     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1921
1922     OPJ_INT32 bpno_plus_one;
1923     OPJ_UINT32 passtype;
1924     OPJ_UINT32 segno, passno;
1925     OPJ_BYTE* cblkdata = NULL;
1926     OPJ_UINT32 cblkdataindex = 0;
1927     OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
1928     OPJ_INT32* original_t1_data = NULL;
1929
1930     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
1931
1932     if (!opj_t1_allocate_buffers(
1933                 t1,
1934                 (OPJ_UINT32)(cblk->x1 - cblk->x0),
1935                 (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
1936         return OPJ_FALSE;
1937     }
1938
1939     bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
1940     if (bpno_plus_one >= 31) {
1941         if (p_manager_mutex) {
1942             opj_mutex_lock(p_manager_mutex);
1943         }
1944         opj_event_msg(p_manager, EVT_WARNING,
1945                       "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
1946                       bpno_plus_one);
1947         if (p_manager_mutex) {
1948             opj_mutex_unlock(p_manager_mutex);
1949         }
1950         return OPJ_FALSE;
1951     }
1952     passtype = 2;
1953
1954     opj_mqc_resetstates(mqc);
1955     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
1956     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
1957     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
1958
1959     /* Even if we have a single chunk, in multi-threaded decoding */
1960     /* the insertion of our synthetic marker might potentially override */
1961     /* valid codestream of other codeblocks decoded in parallel. */
1962     if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
1963         OPJ_UINT32 i;
1964         OPJ_UINT32 cblk_len;
1965
1966         /* Compute whole codeblock length from chunk lengths */
1967         cblk_len = 0;
1968         for (i = 0; i < cblk->numchunks; i++) {
1969             cblk_len += cblk->chunks[i].len;
1970         }
1971
1972         /* Allocate temporary memory if needed */
1973         if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
1974             cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
1975                                               cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
1976             if (cblkdata == NULL) {
1977                 return OPJ_FALSE;
1978             }
1979             t1->cblkdatabuffer = cblkdata;
1980             memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
1981             t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
1982         }
1983
1984         /* Concatenate all chunks */
1985         cblkdata = t1->cblkdatabuffer;
1986         cblk_len = 0;
1987         for (i = 0; i < cblk->numchunks; i++) {
1988             memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
1989             cblk_len += cblk->chunks[i].len;
1990         }
1991     } else if (cblk->numchunks == 1) {
1992         cblkdata = cblk->chunks[0].data;
1993     } else {
1994         /* Not sure if that can happen in practice, but avoid Coverity to */
1995         /* think we will dereference a null cblkdta pointer */
1996         return OPJ_TRUE;
1997     }
1998
1999     /* For subtile decoding, directly decode in the decoded_data buffer of */
2000     /* the code-block. Hack t1->data to point to it, and restore it later */
2001     if (cblk->decoded_data) {
2002         original_t1_data = t1->data;
2003         t1->data = cblk->decoded_data;
2004     }
2005
2006     for (segno = 0; segno < cblk->real_num_segs; ++segno) {
2007         opj_tcd_seg_t *seg = &cblk->segs[segno];
2008
2009         /* BYPASS mode */
2010         type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
2011                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2012
2013         if (type == T1_TYPE_RAW) {
2014             opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2015                                  OPJ_COMMON_CBLK_DATA_EXTRA);
2016         } else {
2017             opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2018                              OPJ_COMMON_CBLK_DATA_EXTRA);
2019         }
2020         cblkdataindex += seg->len;
2021
2022         for (passno = 0; (passno < seg->real_num_passes) &&
2023                 (bpno_plus_one >= 1); ++passno) {
2024             switch (passtype) {
2025             case 0:
2026                 if (type == T1_TYPE_RAW) {
2027                     opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2028                 } else {
2029                     opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2030                 }
2031                 break;
2032             case 1:
2033                 if (type == T1_TYPE_RAW) {
2034                     opj_t1_dec_refpass_raw(t1, bpno_plus_one);
2035                 } else {
2036                     opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
2037                 }
2038                 break;
2039             case 2:
2040                 opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2041                 break;
2042             }
2043
2044             if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
2045                 opj_mqc_resetstates(mqc);
2046                 opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2047                 opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2048                 opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2049             }
2050             if (++passtype == 3) {
2051                 passtype = 0;
2052                 bpno_plus_one--;
2053             }
2054         }
2055
2056         opq_mqc_finish_dec(mqc);
2057     }
2058
2059     if (check_pterm) {
2060         if (mqc->bp + 2 < mqc->end) {
2061             if (p_manager_mutex) {
2062                 opj_mutex_lock(p_manager_mutex);
2063             }
2064             opj_event_msg(p_manager, EVT_WARNING,
2065                           "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
2066                           (int)(mqc->end - mqc->bp) - 2,
2067                           (int)(mqc->bp - mqc->start),
2068                           (int)(mqc->end - mqc->start));
2069             if (p_manager_mutex) {
2070                 opj_mutex_unlock(p_manager_mutex);
2071             }
2072         } else if (mqc->end_of_byte_stream_counter > 2) {
2073             if (p_manager_mutex) {
2074                 opj_mutex_lock(p_manager_mutex);
2075             }
2076             opj_event_msg(p_manager, EVT_WARNING,
2077                           "PTERM check failure: %d synthetized 0xFF markers read\n",
2078                           mqc->end_of_byte_stream_counter);
2079             if (p_manager_mutex) {
2080                 opj_mutex_unlock(p_manager_mutex);
2081             }
2082         }
2083     }
2084
2085     /* Restore original t1->data is needed */
2086     if (cblk->decoded_data) {
2087         t1->data = original_t1_data;
2088     }
2089
2090     return OPJ_TRUE;
2091 }
2092
2093
2094 typedef struct {
2095     OPJ_UINT32 compno;
2096     OPJ_UINT32 resno;
2097     opj_tcd_cblk_enc_t* cblk;
2098     opj_tcd_tile_t *tile;
2099     opj_tcd_band_t* band;
2100     opj_tcd_tilecomp_t* tilec;
2101     opj_tccp_t* tccp;
2102     const OPJ_FLOAT64 * mct_norms;
2103     OPJ_UINT32 mct_numcomps;
2104     volatile OPJ_BOOL* pret;
2105     opj_mutex_t* mutex;
2106 } opj_t1_cblk_encode_processing_job_t;
2107
2108 /** Procedure to deal with a asynchronous code-block encoding job.
2109  *
2110  * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure
2111  * @param tls       TLS handle.
2112  */
2113 static void opj_t1_clbl_encode_processor(void* user_data, opj_tls_t* tls)
2114 {
2115     opj_t1_cblk_encode_processing_job_t* job =
2116         (opj_t1_cblk_encode_processing_job_t*)user_data;
2117     opj_tcd_cblk_enc_t* cblk = job->cblk;
2118     const opj_tcd_band_t* band = job->band;
2119     const opj_tcd_tilecomp_t* tilec = job->tilec;
2120     const opj_tccp_t* tccp = job->tccp;
2121     const OPJ_UINT32 resno = job->resno;
2122     opj_t1_t* t1;
2123     const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
2124
2125     OPJ_INT32* OPJ_RESTRICT tiledp;
2126     OPJ_UINT32 cblk_w;
2127     OPJ_UINT32 cblk_h;
2128     OPJ_UINT32 i, j;
2129
2130     OPJ_INT32 x = cblk->x0 - band->x0;
2131     OPJ_INT32 y = cblk->y0 - band->y0;
2132
2133     if (!*(job->pret)) {
2134         opj_free(job);
2135         return;
2136     }
2137
2138     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
2139     if (t1 == NULL) {
2140         t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */
2141         opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
2142     }
2143
2144     if (band->bandno & 1) {
2145         opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2146         x += pres->x1 - pres->x0;
2147     }
2148     if (band->bandno & 2) {
2149         opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2150         y += pres->y1 - pres->y0;
2151     }
2152
2153     if (!opj_t1_allocate_buffers(
2154                 t1,
2155                 (OPJ_UINT32)(cblk->x1 - cblk->x0),
2156                 (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2157         *(job->pret) = OPJ_FALSE;
2158         opj_free(job);
2159         return;
2160     }
2161
2162     cblk_w = t1->w;
2163     cblk_h = t1->h;
2164
2165     tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
2166
2167     if (tccp->qmfbid == 1) {
2168         /* Do multiplication on unsigned type, even if the
2169             * underlying type is signed, to avoid potential
2170             * int overflow on large value (the output will be
2171             * incorrect in such situation, but whatever...)
2172             * This assumes complement-to-2 signed integer
2173             * representation
2174             * Fixes https://github.com/uclouvain/openjpeg/issues/1053
2175             */
2176         OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
2177         OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data;
2178         /* Change from "natural" order to "zigzag" order of T1 passes */
2179         for (j = 0; j < (cblk_h & ~3U); j += 4) {
2180             for (i = 0; i < cblk_w; ++i) {
2181                 t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2182                 t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2183                 t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2184                 t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS;
2185                 t1data += 4;
2186             }
2187         }
2188         if (j < cblk_h) {
2189             for (i = 0; i < cblk_w; ++i) {
2190                 OPJ_UINT32 k;
2191                 for (k = j; k < cblk_h; k++) {
2192                     t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS;
2193                     t1data ++;
2194                 }
2195             }
2196         }
2197     } else {        /* if (tccp->qmfbid == 0) */
2198         OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp;
2199         OPJ_INT32* OPJ_RESTRICT t1data = t1->data;
2200         /* Change from "natural" order to "zigzag" order of T1 passes */
2201         for (j = 0; j < (cblk_h & ~3U); j += 4) {
2202             for (i = 0; i < cblk_w; ++i) {
2203                 t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] /
2204                                                    band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2205                 t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] /
2206                                                    band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2207                 t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] /
2208                                                    band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2209                 t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] /
2210                                                    band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
2211                 t1data += 4;
2212             }
2213         }
2214         if (j < cblk_h) {
2215             for (i = 0; i < cblk_w; ++i) {
2216                 OPJ_UINT32 k;
2217                 for (k = j; k < cblk_h; k++) {
2218                     t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize)
2219                                                       * (1 << T1_NMSEDEC_FRACBITS));
2220                     t1data ++;
2221                 }
2222             }
2223         }
2224     }
2225
2226     {
2227         OPJ_FLOAT64 cumwmsedec =
2228             opj_t1_encode_cblk(
2229                 t1,
2230                 cblk,
2231                 band->bandno,
2232                 job->compno,
2233                 tilec->numresolutions - 1 - resno,
2234                 tccp->qmfbid,
2235                 band->stepsize,
2236                 tccp->cblksty,
2237                 job->tile->numcomps,
2238                 job->mct_norms,
2239                 job->mct_numcomps);
2240         if (job->mutex) {
2241             opj_mutex_lock(job->mutex);
2242         }
2243         job->tile->distotile += cumwmsedec;
2244         if (job->mutex) {
2245             opj_mutex_unlock(job->mutex);
2246         }
2247     }
2248
2249     opj_free(job);
2250 }
2251
2252
2253 OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd,
2254                              opj_tcd_tile_t *tile,
2255                              opj_tcp_t *tcp,
2256                              const OPJ_FLOAT64 * mct_norms,
2257                              OPJ_UINT32 mct_numcomps
2258                             )
2259 {
2260     volatile OPJ_BOOL ret = OPJ_TRUE;
2261     opj_thread_pool_t* tp = tcd->thread_pool;
2262     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
2263     opj_mutex_t* mutex = opj_mutex_create();
2264
2265     tile->distotile = 0;        /* fixed_quality */
2266
2267     for (compno = 0; compno < tile->numcomps; ++compno) {
2268         opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
2269         opj_tccp_t* tccp = &tcp->tccps[compno];
2270
2271         for (resno = 0; resno < tilec->numresolutions; ++resno) {
2272             opj_tcd_resolution_t *res = &tilec->resolutions[resno];
2273
2274             for (bandno = 0; bandno < res->numbands; ++bandno) {
2275                 opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
2276
2277                 /* Skip empty bands */
2278                 if (opj_tcd_is_band_empty(band)) {
2279                     continue;
2280                 }
2281                 for (precno = 0; precno < res->pw * res->ph; ++precno) {
2282                     opj_tcd_precinct_t *prc = &band->precincts[precno];
2283
2284                     for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
2285                         opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
2286
2287                         opj_t1_cblk_encode_processing_job_t* job =
2288                             (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1,
2289                                     sizeof(opj_t1_cblk_encode_processing_job_t));
2290                         if (!job) {
2291                             ret = OPJ_FALSE;
2292                             goto end;
2293                         }
2294                         job->compno = compno;
2295                         job->tile = tile;
2296                         job->resno = resno;
2297                         job->cblk = cblk;
2298                         job->band = band;
2299                         job->tilec = tilec;
2300                         job->tccp = tccp;
2301                         job->mct_norms = mct_norms;
2302                         job->mct_numcomps = mct_numcomps;
2303                         job->pret = &ret;
2304                         job->mutex = mutex;
2305                         opj_thread_pool_submit_job(tp, opj_t1_clbl_encode_processor, job);
2306
2307                     } /* cblkno */
2308                 } /* precno */
2309             } /* bandno */
2310         } /* resno  */
2311     } /* compno  */
2312
2313 end:
2314     opj_thread_pool_wait_completion(tcd->thread_pool, 0);
2315     if (mutex) {
2316         opj_mutex_destroy(mutex);
2317     }
2318
2319     return ret;
2320 }
2321
2322 /* Returns whether the pass (bpno, passtype) is terminated */
2323 static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
2324                                    OPJ_UINT32 cblksty,
2325                                    OPJ_INT32 bpno,
2326                                    OPJ_UINT32 passtype)
2327 {
2328     /* Is it the last cleanup pass ? */
2329     if (passtype == 2 && bpno == 0) {
2330         return OPJ_TRUE;
2331     }
2332
2333     if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
2334         return OPJ_TRUE;
2335     }
2336
2337     if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
2338         /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
2339         if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
2340             return OPJ_TRUE;
2341         }
2342         /* and beyond terminate all the magnitude refinement passes (in raw) */
2343         /* and cleanup passes (in MQC) */
2344         if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
2345             return OPJ_TRUE;
2346         }
2347     }
2348
2349     return OPJ_FALSE;
2350 }
2351
2352
2353 /** mod fixed_quality */
2354 static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
2355                                       opj_tcd_cblk_enc_t* cblk,
2356                                       OPJ_UINT32 orient,
2357                                       OPJ_UINT32 compno,
2358                                       OPJ_UINT32 level,
2359                                       OPJ_UINT32 qmfbid,
2360                                       OPJ_FLOAT64 stepsize,
2361                                       OPJ_UINT32 cblksty,
2362                                       OPJ_UINT32 numcomps,
2363                                       const OPJ_FLOAT64 * mct_norms,
2364                                       OPJ_UINT32 mct_numcomps)
2365 {
2366     OPJ_FLOAT64 cumwmsedec = 0.0;
2367
2368     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2369
2370     OPJ_UINT32 passno;
2371     OPJ_INT32 bpno;
2372     OPJ_UINT32 passtype;
2373     OPJ_INT32 nmsedec = 0;
2374     OPJ_INT32 max;
2375     OPJ_UINT32 i, j;
2376     OPJ_BYTE type = T1_TYPE_MQ;
2377     OPJ_FLOAT64 tempwmsedec;
2378     OPJ_INT32* datap;
2379
2380 #ifdef EXTRA_DEBUG
2381     printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
2382            cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
2383 #endif
2384
2385     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2386
2387     max = 0;
2388     datap = t1->data;
2389     for (j = 0; j < t1->h; ++j) {
2390         const OPJ_UINT32 w = t1->w;
2391         for (i = 0; i < w; ++i, ++datap) {
2392             OPJ_INT32 tmp = *datap;
2393             if (tmp < 0) {
2394                 OPJ_UINT32 tmp_unsigned;
2395                 max = opj_int_max(max, -tmp);
2396                 tmp_unsigned = opj_to_smr(tmp);
2397                 memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32));
2398             } else {
2399                 max = opj_int_max(max, tmp);
2400             }
2401         }
2402     }
2403
2404     cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
2405                                       T1_NMSEDEC_FRACBITS) : 0;
2406     if (cblk->numbps == 0) {
2407         cblk->totalpasses = 0;
2408         return cumwmsedec;
2409     }
2410
2411     bpno = (OPJ_INT32)(cblk->numbps - 1);
2412     passtype = 2;
2413
2414     opj_mqc_resetstates(mqc);
2415     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2416     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2417     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2418     opj_mqc_init_enc(mqc, cblk->data);
2419
2420     for (passno = 0; bpno >= 0; ++passno) {
2421         opj_tcd_pass_t *pass = &cblk->passes[passno];
2422         type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
2423                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2424
2425         /* If the previous pass was terminating, we need to reset the encoder */
2426         if (passno > 0 && cblk->passes[passno - 1].term) {
2427             if (type == T1_TYPE_RAW) {
2428                 opj_mqc_bypass_init_enc(mqc);
2429             } else {
2430                 opj_mqc_restart_init_enc(mqc);
2431             }
2432         }
2433
2434         switch (passtype) {
2435         case 0:
2436             opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
2437             break;
2438         case 1:
2439             opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
2440             break;
2441         case 2:
2442             opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
2443             /* code switch SEGMARK (i.e. SEGSYM) */
2444             if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
2445                 opj_mqc_segmark_enc(mqc);
2446             }
2447             break;
2448         }
2449
2450         /* fixed_quality */
2451         tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
2452                                         stepsize, numcomps, mct_norms, mct_numcomps) ;
2453         cumwmsedec += tempwmsedec;
2454         pass->distortiondec = cumwmsedec;
2455
2456         if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
2457             /* If it is a terminated pass, terminate it */
2458             if (type == T1_TYPE_RAW) {
2459                 opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
2460             } else {
2461                 if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
2462                     opj_mqc_erterm_enc(mqc);
2463                 } else {
2464                     opj_mqc_flush(mqc);
2465                 }
2466             }
2467             pass->term = 1;
2468             pass->rate = opj_mqc_numbytes(mqc);
2469         } else {
2470             /* Non terminated pass */
2471             OPJ_UINT32 rate_extra_bytes;
2472             if (type == T1_TYPE_RAW) {
2473                 rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
2474                                        mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
2475             } else {
2476                 rate_extra_bytes = 3;
2477             }
2478             pass->term = 0;
2479             pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
2480         }
2481
2482         if (++passtype == 3) {
2483             passtype = 0;
2484             bpno--;
2485         }
2486
2487         /* Code-switch "RESET" */
2488         if (cblksty & J2K_CCP_CBLKSTY_RESET) {
2489             opj_mqc_reset_enc(mqc);
2490         }
2491     }
2492
2493     cblk->totalpasses = passno;
2494
2495     if (cblk->totalpasses) {
2496         /* Make sure that pass rates are increasing */
2497         OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
2498         for (passno = cblk->totalpasses; passno > 0;) {
2499             opj_tcd_pass_t *pass = &cblk->passes[--passno];
2500             if (pass->rate > last_pass_rate) {
2501                 pass->rate = last_pass_rate;
2502             } else {
2503                 last_pass_rate = pass->rate;
2504             }
2505         }
2506     }
2507
2508     for (passno = 0; passno < cblk->totalpasses; passno++) {
2509         opj_tcd_pass_t *pass = &cblk->passes[passno];
2510
2511         /* Prevent generation of FF as last data byte of a pass*/
2512         /* For terminating passes, the flushing procedure ensured this already */
2513         assert(pass->rate > 0);
2514         if (cblk->data[pass->rate - 1] == 0xFF) {
2515             pass->rate--;
2516         }
2517         pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
2518     }
2519
2520 #ifdef EXTRA_DEBUG
2521     printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
2522
2523     /* Check that there not 0xff >=0x90 sequences */
2524     if (cblk->totalpasses) {
2525         OPJ_UINT32 i;
2526         OPJ_UINT32 len = opj_mqc_numbytes(mqc);
2527         for (i = 1; i < len; ++i) {
2528             if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
2529                 printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
2530                 abort();
2531             }
2532         }
2533     }
2534 #endif
2535
2536     return cumwmsedec;
2537 }