Irreversible decoding: align code more closely to the standard by avoid messing up...
[openjpeg.git] / src / lib / openjp2 / t1.c
1 /*
2  * The copyright in this software is being made available under the 2-clauses
3  * BSD License, included below. This software may be subject to other third
4  * party and contributor rights, including patent rights, and no such rights
5  * are granted under this license.
6  *
7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8  * Copyright (c) 2002-2014, Professor Benoit Macq
9  * Copyright (c) 2001-2003, David Janssens
10  * Copyright (c) 2002-2003, Yannick Verschueren
11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
12  * Copyright (c) 2003-2014, Antonin Descampe
13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
14  * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
15  * Copyright (c) 2012, Carl Hetherington
16  * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
17  * All rights reserved.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions
21  * are met:
22  * 1. Redistributions of source code must retain the above copyright
23  *    notice, this list of conditions and the following disclaimer.
24  * 2. Redistributions in binary form must reproduce the above copyright
25  *    notice, this list of conditions and the following disclaimer in the
26  *    documentation and/or other materials provided with the distribution.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40
41 #define OPJ_SKIP_POISON
42 #include "opj_includes.h"
43
44 #ifdef __SSE__
45 #include <xmmintrin.h>
46 #endif
47 #ifdef __SSE2__
48 #include <emmintrin.h>
49 #endif
50
51 #if defined(__GNUC__)
52 #pragma GCC poison malloc calloc realloc free
53 #endif
54
55 #include "t1_luts.h"
56
57 /** @defgroup T1 T1 - Implementation of the tier-1 coding */
58 /*@{*/
59
60 #define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
61
62 #define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
63
64 /** @name Local static functions */
65 /*@{*/
66
67 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
68 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
69 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
70 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
71 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
72                                        OPJ_UINT32 s, OPJ_UINT32 stride,
73                                        OPJ_UINT32 vsc);
74
75
76 /**
77 Decode significant pass
78 */
79
80 static INLINE void opj_t1_dec_sigpass_step_raw(
81     opj_t1_t *t1,
82     opj_flag_t *flagsp,
83     OPJ_INT32 *datap,
84     OPJ_INT32 oneplushalf,
85     OPJ_UINT32 vsc,
86     OPJ_UINT32 row);
87 static INLINE void opj_t1_dec_sigpass_step_mqc(
88     opj_t1_t *t1,
89     opj_flag_t *flagsp,
90     OPJ_INT32 *datap,
91     OPJ_INT32 oneplushalf,
92     OPJ_UINT32 row,
93     OPJ_UINT32 flags_stride,
94     OPJ_UINT32 vsc);
95
96 /**
97 Encode significant pass
98 */
99 static void opj_t1_enc_sigpass(opj_t1_t *t1,
100                                OPJ_INT32 bpno,
101                                OPJ_INT32 *nmsedec,
102                                OPJ_BYTE type,
103                                OPJ_UINT32 cblksty);
104
105 /**
106 Decode significant pass
107 */
108 static void opj_t1_dec_sigpass_raw(
109     opj_t1_t *t1,
110     OPJ_INT32 bpno,
111     OPJ_INT32 cblksty);
112
113 /**
114 Encode refinement pass
115 */
116 static void opj_t1_enc_refpass(opj_t1_t *t1,
117                                OPJ_INT32 bpno,
118                                OPJ_INT32 *nmsedec,
119                                OPJ_BYTE type);
120
121 /**
122 Decode refinement pass
123 */
124 static void opj_t1_dec_refpass_raw(
125     opj_t1_t *t1,
126     OPJ_INT32 bpno);
127
128
129 /**
130 Decode refinement pass
131 */
132
133 static INLINE void  opj_t1_dec_refpass_step_raw(
134     opj_t1_t *t1,
135     opj_flag_t *flagsp,
136     OPJ_INT32 *datap,
137     OPJ_INT32 poshalf,
138     OPJ_UINT32 row);
139 static INLINE void opj_t1_dec_refpass_step_mqc(
140     opj_t1_t *t1,
141     opj_flag_t *flagsp,
142     OPJ_INT32 *datap,
143     OPJ_INT32 poshalf,
144     OPJ_UINT32 row);
145
146
147 /**
148 Decode clean-up pass
149 */
150
151 static void opj_t1_dec_clnpass_step(
152     opj_t1_t *t1,
153     opj_flag_t *flagsp,
154     OPJ_INT32 *datap,
155     OPJ_INT32 oneplushalf,
156     OPJ_UINT32 row,
157     OPJ_UINT32 vsc);
158
159 /**
160 Encode clean-up pass
161 */
162 static void opj_t1_enc_clnpass(
163     opj_t1_t *t1,
164     OPJ_INT32 bpno,
165     OPJ_INT32 *nmsedec,
166     OPJ_UINT32 cblksty);
167
168 static OPJ_FLOAT64 opj_t1_getwmsedec(
169     OPJ_INT32 nmsedec,
170     OPJ_UINT32 compno,
171     OPJ_UINT32 level,
172     OPJ_UINT32 orient,
173     OPJ_INT32 bpno,
174     OPJ_UINT32 qmfbid,
175     OPJ_FLOAT64 stepsize,
176     OPJ_UINT32 numcomps,
177     const OPJ_FLOAT64 * mct_norms,
178     OPJ_UINT32 mct_numcomps);
179
180 /** Return "cumwmsedec" that should be used to increase tile->distotile */
181 static double opj_t1_encode_cblk(opj_t1_t *t1,
182                                  opj_tcd_cblk_enc_t* cblk,
183                                  OPJ_UINT32 orient,
184                                  OPJ_UINT32 compno,
185                                  OPJ_UINT32 level,
186                                  OPJ_UINT32 qmfbid,
187                                  OPJ_FLOAT64 stepsize,
188                                  OPJ_UINT32 cblksty,
189                                  OPJ_UINT32 numcomps,
190                                  const OPJ_FLOAT64 * mct_norms,
191                                  OPJ_UINT32 mct_numcomps);
192
193 /**
194 Decode 1 code-block
195 @param t1 T1 handle
196 @param cblk Code-block coding parameters
197 @param orient
198 @param roishift Region of interest shifting value
199 @param cblksty Code-block style
200 @param p_manager the event manager
201 @param p_manager_mutex mutex for the event manager
202 @param check_pterm whether PTERM correct termination should be checked
203 */
204 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
205                                    opj_tcd_cblk_dec_t* cblk,
206                                    OPJ_UINT32 orient,
207                                    OPJ_UINT32 roishift,
208                                    OPJ_UINT32 cblksty,
209                                    opj_event_mgr_t *p_manager,
210                                    opj_mutex_t* p_manager_mutex,
211                                    OPJ_BOOL check_pterm);
212
213 static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
214                                         OPJ_UINT32 w,
215                                         OPJ_UINT32 h);
216
217 /*@}*/
218
219 /*@}*/
220
221 /* ----------------------------------------------------------------------- */
222
223 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
224 {
225     return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
226 }
227
228 static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
229         OPJ_UINT32 pfX,
230         OPJ_UINT32 nfX,
231         OPJ_UINT32 ci)
232 {
233     /*
234       0 pfX T1_CHI_THIS           T1_LUT_SGN_W
235       1 tfX T1_SIGMA_1            T1_LUT_SIG_N
236       2 nfX T1_CHI_THIS           T1_LUT_SGN_E
237       3 tfX T1_SIGMA_3            T1_LUT_SIG_W
238       4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
239       5 tfX T1_SIGMA_5            T1_LUT_SIG_E
240       6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
241       7 tfX T1_SIGMA_7            T1_LUT_SIG_S
242     */
243
244     OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
245                                          T1_SIGMA_7);
246
247     lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
248     lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
249     if (ci == 0U) {
250         lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
251     } else {
252         lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
253     }
254     lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
255     return lu;
256 }
257
258 static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
259 {
260     return lut_ctxno_sc[lu];
261 }
262
263 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
264 {
265     OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
266     OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
267     return tmp2;
268 }
269
270 static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
271 {
272     return lut_spb[lu];
273 }
274
275 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
276 {
277     if (bitpos > 0) {
278         return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
279     }
280
281     return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
282 }
283
284 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
285 {
286     if (bitpos > 0) {
287         return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
288     }
289
290     return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
291 }
292
293 #define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
294 { \
295     /* east */ \
296     flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
297  \
298     /* mark target as significant */ \
299     flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
300  \
301     /* west */ \
302     flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
303  \
304     /* north-west, north, north-east */ \
305     if (ci == 0U && !(vsc)) { \
306         opj_flag_t* north = flagsp - (stride); \
307         *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
308         north[-1] |= T1_SIGMA_17; \
309         north[1] |= T1_SIGMA_15; \
310     } \
311  \
312     /* south-west, south, south-east */ \
313     if (ci == 3U) { \
314         opj_flag_t* south = flagsp + (stride); \
315         *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
316         south[-1] |= T1_SIGMA_2; \
317         south[1] |= T1_SIGMA_0; \
318     } \
319 }
320
321
322 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
323                                        OPJ_UINT32 s, OPJ_UINT32 stride,
324                                        OPJ_UINT32 vsc)
325 {
326     opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
327 }
328
329 /**
330 Encode significant pass
331 */
332 static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
333         opj_flag_t *flagsp,
334         OPJ_INT32 *datap,
335         OPJ_INT32 bpno,
336         OPJ_INT32 one,
337         OPJ_INT32 *nmsedec,
338         OPJ_BYTE type,
339         OPJ_UINT32 ci,
340         OPJ_UINT32 vsc)
341 {
342     OPJ_UINT32 v;
343
344     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
345
346     OPJ_UINT32 const flags = *flagsp;
347
348     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
349             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
350         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
351         v = (opj_int_abs(*datap) & one) ? 1 : 0;
352 #ifdef DEBUG_ENC_SIG
353         fprintf(stderr, "   ctxt1=%d\n", ctxt1);
354 #endif
355         opj_mqc_setcurctx(mqc, ctxt1);
356         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
357             opj_mqc_bypass_enc(mqc, v);
358         } else {
359             opj_mqc_encode(mqc, v);
360         }
361         if (v) {
362             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
363                                 *flagsp,
364                                 flagsp[-1], flagsp[1],
365                                 ci);
366             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
367             v = *datap < 0 ? 1U : 0U;
368             *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
369                                               (OPJ_UINT32)bpno);
370 #ifdef DEBUG_ENC_SIG
371             fprintf(stderr, "   ctxt2=%d\n", ctxt2);
372 #endif
373             opj_mqc_setcurctx(mqc, ctxt2);
374             if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
375                 opj_mqc_bypass_enc(mqc, v);
376             } else {
377                 OPJ_UINT32 spb = opj_t1_getspb(lu);
378 #ifdef DEBUG_ENC_SIG
379                 fprintf(stderr, "   spb=%d\n", spb);
380 #endif
381                 opj_mqc_encode(mqc, v ^ spb);
382             }
383             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
384         }
385         *flagsp |= T1_PI_THIS << (ci * 3U);
386     }
387 }
388
389 static INLINE void opj_t1_dec_sigpass_step_raw(
390     opj_t1_t *t1,
391     opj_flag_t *flagsp,
392     OPJ_INT32 *datap,
393     OPJ_INT32 oneplushalf,
394     OPJ_UINT32 vsc,
395     OPJ_UINT32 ci)
396 {
397     OPJ_UINT32 v;
398     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
399
400     OPJ_UINT32 const flags = *flagsp;
401
402     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
403             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
404         if (opj_mqc_raw_decode(mqc)) {
405             v = opj_mqc_raw_decode(mqc);
406             *datap = v ? -oneplushalf : oneplushalf;
407             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
408         }
409         *flagsp |= T1_PI_THIS << (ci * 3U);
410     }
411 }
412
413 #define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
414                                           data_stride, ci, mqc, curctx, \
415                                           v, a, c, ct, oneplushalf, vsc) \
416 { \
417     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
418         (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
419         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
420         opj_t1_setcurctx(curctx, ctxt1); \
421         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
422         if (v) { \
423             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
424                                 flags, \
425                                 flagsp[-1], flagsp[1], \
426                                 ci); \
427             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
428             OPJ_UINT32 spb = opj_t1_getspb(lu); \
429             opj_t1_setcurctx(curctx, ctxt2); \
430             opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
431             v = v ^ spb; \
432             data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
433             opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
434         } \
435         flags |= T1_PI_THIS << (ci * 3U); \
436     } \
437 }
438
439 static INLINE void opj_t1_dec_sigpass_step_mqc(
440     opj_t1_t *t1,
441     opj_flag_t *flagsp,
442     OPJ_INT32 *datap,
443     OPJ_INT32 oneplushalf,
444     OPJ_UINT32 ci,
445     OPJ_UINT32 flags_stride,
446     OPJ_UINT32 vsc)
447 {
448     OPJ_UINT32 v;
449
450     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
451     opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
452                                       0, ci, mqc, mqc->curctx,
453                                       v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
454 }
455
456 static void opj_t1_enc_sigpass(opj_t1_t *t1,
457                                OPJ_INT32 bpno,
458                                OPJ_INT32 *nmsedec,
459                                OPJ_BYTE type,
460                                OPJ_UINT32 cblksty
461                               )
462 {
463     OPJ_UINT32 i, k;
464     OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
465     opj_flag_t* f = &T1_FLAGS(0, 0);
466     OPJ_UINT32 const extra = 2;
467
468     *nmsedec = 0;
469 #ifdef DEBUG_ENC_SIG
470     fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
471 #endif
472     for (k = 0; k < (t1->h & ~3U); k += 4) {
473 #ifdef DEBUG_ENC_SIG
474         fprintf(stderr, " k=%d\n", k);
475 #endif
476         for (i = 0; i < t1->w; ++i) {
477 #ifdef DEBUG_ENC_SIG
478             fprintf(stderr, " i=%d\n", i);
479 #endif
480             if (*f == 0U) {
481                 /* Nothing to do for any of the 4 data points */
482                 f++;
483                 continue;
484             }
485             opj_t1_enc_sigpass_step(
486                 t1,
487                 f,
488                 &t1->data[((k + 0) * t1->data_stride) + i],
489                 bpno,
490                 one,
491                 nmsedec,
492                 type,
493                 0, cblksty & J2K_CCP_CBLKSTY_VSC);
494             opj_t1_enc_sigpass_step(
495                 t1,
496                 f,
497                 &t1->data[((k + 1) * t1->data_stride) + i],
498                 bpno,
499                 one,
500                 nmsedec,
501                 type,
502                 1, 0);
503             opj_t1_enc_sigpass_step(
504                 t1,
505                 f,
506                 &t1->data[((k + 2) * t1->data_stride) + i],
507                 bpno,
508                 one,
509                 nmsedec,
510                 type,
511                 2, 0);
512             opj_t1_enc_sigpass_step(
513                 t1,
514                 f,
515                 &t1->data[((k + 3) * t1->data_stride) + i],
516                 bpno,
517                 one,
518                 nmsedec,
519                 type,
520                 3, 0);
521             ++f;
522         }
523         f += extra;
524     }
525
526     if (k < t1->h) {
527         OPJ_UINT32 j;
528 #ifdef DEBUG_ENC_SIG
529         fprintf(stderr, " k=%d\n", k);
530 #endif
531         for (i = 0; i < t1->w; ++i) {
532 #ifdef DEBUG_ENC_SIG
533             fprintf(stderr, " i=%d\n", i);
534 #endif
535             if (*f == 0U) {
536                 /* Nothing to do for any of the 4 data points */
537                 f++;
538                 continue;
539             }
540             for (j = k; j < t1->h; ++j) {
541                 opj_t1_enc_sigpass_step(
542                     t1,
543                     f,
544                     &t1->data[(j * t1->data_stride) + i],
545                     bpno,
546                     one,
547                     nmsedec,
548                     type,
549                     j - k,
550                     (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
551             }
552             ++f;
553         }
554     }
555 }
556
557 static void opj_t1_dec_sigpass_raw(
558     opj_t1_t *t1,
559     OPJ_INT32 bpno,
560     OPJ_INT32 cblksty)
561 {
562     OPJ_INT32 one, half, oneplushalf;
563     OPJ_UINT32 i, j, k;
564     OPJ_INT32 *data = t1->data;
565     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
566     const OPJ_UINT32 l_w = t1->w;
567     one = 1 << bpno;
568     half = one >> 1;
569     oneplushalf = one | half;
570
571     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
572         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
573             opj_flag_t flags = *flagsp;
574             if (flags != 0) {
575                 opj_t1_dec_sigpass_step_raw(
576                     t1,
577                     flagsp,
578                     data,
579                     oneplushalf,
580                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
581                     0U);
582                 opj_t1_dec_sigpass_step_raw(
583                     t1,
584                     flagsp,
585                     data + l_w,
586                     oneplushalf,
587                     OPJ_FALSE, /* vsc */
588                     1U);
589                 opj_t1_dec_sigpass_step_raw(
590                     t1,
591                     flagsp,
592                     data + 2 * l_w,
593                     oneplushalf,
594                     OPJ_FALSE, /* vsc */
595                     2U);
596                 opj_t1_dec_sigpass_step_raw(
597                     t1,
598                     flagsp,
599                     data + 3 * l_w,
600                     oneplushalf,
601                     OPJ_FALSE, /* vsc */
602                     3U);
603             }
604         }
605     }
606     if (k < t1->h) {
607         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
608             for (j = 0; j < t1->h - k; ++j) {
609                 opj_t1_dec_sigpass_step_raw(
610                     t1,
611                     flagsp,
612                     data + j * l_w,
613                     oneplushalf,
614                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
615                     j);
616             }
617         }
618     }
619 }
620
621 #define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
622 { \
623         OPJ_INT32 one, half, oneplushalf; \
624         OPJ_UINT32 i, j, k; \
625         register OPJ_INT32 *data = t1->data; \
626         register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
627         const OPJ_UINT32 l_w = w; \
628         opj_mqc_t* mqc = &(t1->mqc); \
629         DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
630         register OPJ_UINT32 v; \
631         one = 1 << bpno; \
632         half = one >> 1; \
633         oneplushalf = one | half; \
634         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
635                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
636                         opj_flag_t flags = *flagsp; \
637                         if( flags != 0 ) { \
638                             opj_t1_dec_sigpass_step_mqc_macro( \
639                                 flags, flagsp, flags_stride, data, \
640                                 l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
641                             opj_t1_dec_sigpass_step_mqc_macro( \
642                                 flags, flagsp, flags_stride, data, \
643                                 l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
644                             opj_t1_dec_sigpass_step_mqc_macro( \
645                                 flags, flagsp, flags_stride, data, \
646                                 l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
647                             opj_t1_dec_sigpass_step_mqc_macro( \
648                                 flags, flagsp, flags_stride, data, \
649                                 l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
650                             *flagsp = flags; \
651                         } \
652                 } \
653         } \
654         UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
655         if( k < h ) { \
656             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
657                 for (j = 0; j < h - k; ++j) { \
658                         opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
659                             data + j * l_w, oneplushalf, j, flags_stride, vsc); \
660                 } \
661             } \
662         } \
663 }
664
665 static void opj_t1_dec_sigpass_mqc_64x64_novsc(
666     opj_t1_t *t1,
667     OPJ_INT32 bpno)
668 {
669     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
670 }
671
672 static void opj_t1_dec_sigpass_mqc_64x64_vsc(
673     opj_t1_t *t1,
674     OPJ_INT32 bpno)
675 {
676     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
677 }
678
679 static void opj_t1_dec_sigpass_mqc_generic_novsc(
680     opj_t1_t *t1,
681     OPJ_INT32 bpno)
682 {
683     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
684                                     t1->w + 2U);
685 }
686
687 static void opj_t1_dec_sigpass_mqc_generic_vsc(
688     opj_t1_t *t1,
689     OPJ_INT32 bpno)
690 {
691     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
692                                     t1->w + 2U);
693 }
694
695 static void opj_t1_dec_sigpass_mqc(
696     opj_t1_t *t1,
697     OPJ_INT32 bpno,
698     OPJ_INT32 cblksty)
699 {
700     if (t1->w == 64 && t1->h == 64) {
701         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
702             opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
703         } else {
704             opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
705         }
706     } else {
707         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
708             opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
709         } else {
710             opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
711         }
712     }
713 }
714
715 /**
716 Encode refinement pass step
717 */
718 static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1,
719         opj_flag_t *flagsp,
720         OPJ_INT32 *datap,
721         OPJ_INT32 bpno,
722         OPJ_INT32 one,
723         OPJ_INT32 *nmsedec,
724         OPJ_BYTE type,
725         OPJ_UINT32 ci)
726 {
727     OPJ_UINT32 v;
728
729     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
730
731     OPJ_UINT32 const shift_flags =
732         (*flagsp >> (ci * 3U));
733
734     if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) {
735         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
736         *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
737                                           (OPJ_UINT32)bpno);
738         v = (opj_int_abs(*datap) & one) ? 1 : 0;
739 #ifdef DEBUG_ENC_REF
740         fprintf(stderr, "  ctxt=%d\n", ctxt);
741 #endif
742         opj_mqc_setcurctx(mqc, ctxt);
743         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
744             opj_mqc_bypass_enc(mqc, v);
745         } else {
746             opj_mqc_encode(mqc, v);
747         }
748         *flagsp |= T1_MU_THIS << (ci * 3U);
749     }
750 }
751
752
753 static INLINE void opj_t1_dec_refpass_step_raw(
754     opj_t1_t *t1,
755     opj_flag_t *flagsp,
756     OPJ_INT32 *datap,
757     OPJ_INT32 poshalf,
758     OPJ_UINT32 ci)
759 {
760     OPJ_UINT32 v;
761
762     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
763
764     if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
765             (T1_SIGMA_THIS << (ci * 3U))) {
766         v = opj_mqc_raw_decode(mqc);
767         *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
768         *flagsp |= T1_MU_THIS << (ci * 3U);
769     }
770 }
771
772 #define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
773                                           mqc, curctx, v, a, c, ct, poshalf) \
774 { \
775     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
776             (T1_SIGMA_THIS << (ci * 3U))) { \
777         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
778         opj_t1_setcurctx(curctx, ctxt); \
779         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
780         data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
781         flags |= T1_MU_THIS << (ci * 3U); \
782     } \
783 }
784
785 static INLINE void opj_t1_dec_refpass_step_mqc(
786     opj_t1_t *t1,
787     opj_flag_t *flagsp,
788     OPJ_INT32 *datap,
789     OPJ_INT32 poshalf,
790     OPJ_UINT32 ci)
791 {
792     OPJ_UINT32 v;
793
794     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
795     opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
796                                       mqc, mqc->curctx, v, mqc->a, mqc->c,
797                                       mqc->ct, poshalf);
798 }
799
800 static void opj_t1_enc_refpass(
801     opj_t1_t *t1,
802     OPJ_INT32 bpno,
803     OPJ_INT32 *nmsedec,
804     OPJ_BYTE type)
805 {
806     OPJ_UINT32 i, k;
807     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
808     opj_flag_t* f = &T1_FLAGS(0, 0);
809     const OPJ_UINT32 extra = 2U;
810
811     *nmsedec = 0;
812 #ifdef DEBUG_ENC_REF
813     fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
814 #endif
815     for (k = 0; k < (t1->h & ~3U); k += 4) {
816 #ifdef DEBUG_ENC_REF
817         fprintf(stderr, " k=%d\n", k);
818 #endif
819         for (i = 0; i < t1->w; ++i) {
820 #ifdef DEBUG_ENC_REF
821             fprintf(stderr, " i=%d\n", i);
822 #endif
823             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
824                 /* none significant */
825                 f++;
826                 continue;
827             }
828             if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
829                     (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
830                 /* all processed by sigpass */
831                 f++;
832                 continue;
833             }
834
835             opj_t1_enc_refpass_step(
836                 t1,
837                 f,
838                 &t1->data[((k + 0) * t1->data_stride) + i],
839                 bpno,
840                 one,
841                 nmsedec,
842                 type,
843                 0);
844             opj_t1_enc_refpass_step(
845                 t1,
846                 f,
847                 &t1->data[((k + 1) * t1->data_stride) + i],
848                 bpno,
849                 one,
850                 nmsedec,
851                 type,
852                 1);
853             opj_t1_enc_refpass_step(
854                 t1,
855                 f,
856                 &t1->data[((k + 2) * t1->data_stride) + i],
857                 bpno,
858                 one,
859                 nmsedec,
860                 type,
861                 2);
862             opj_t1_enc_refpass_step(
863                 t1,
864                 f,
865                 &t1->data[((k + 3) * t1->data_stride) + i],
866                 bpno,
867                 one,
868                 nmsedec,
869                 type,
870                 3);
871             ++f;
872         }
873         f += extra;
874     }
875
876     if (k < t1->h) {
877         OPJ_UINT32 j;
878 #ifdef DEBUG_ENC_REF
879         fprintf(stderr, " k=%d\n", k);
880 #endif
881         for (i = 0; i < t1->w; ++i) {
882 #ifdef DEBUG_ENC_REF
883             fprintf(stderr, " i=%d\n", i);
884 #endif
885             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
886                 /* none significant */
887                 f++;
888                 continue;
889             }
890             for (j = k; j < t1->h; ++j) {
891                 opj_t1_enc_refpass_step(
892                     t1,
893                     f,
894                     &t1->data[(j * t1->data_stride) + i],
895                     bpno,
896                     one,
897                     nmsedec,
898                     type,
899                     j - k);
900             }
901             ++f;
902         }
903     }
904 }
905
906
907 static void opj_t1_dec_refpass_raw(
908     opj_t1_t *t1,
909     OPJ_INT32 bpno)
910 {
911     OPJ_INT32 one, poshalf;
912     OPJ_UINT32 i, j, k;
913     OPJ_INT32 *data = t1->data;
914     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
915     const OPJ_UINT32 l_w = t1->w;
916     one = 1 << bpno;
917     poshalf = one >> 1;
918     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
919         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
920             opj_flag_t flags = *flagsp;
921             if (flags != 0) {
922                 opj_t1_dec_refpass_step_raw(
923                     t1,
924                     flagsp,
925                     data,
926                     poshalf,
927                     0U);
928                 opj_t1_dec_refpass_step_raw(
929                     t1,
930                     flagsp,
931                     data + l_w,
932                     poshalf,
933                     1U);
934                 opj_t1_dec_refpass_step_raw(
935                     t1,
936                     flagsp,
937                     data + 2 * l_w,
938                     poshalf,
939                     2U);
940                 opj_t1_dec_refpass_step_raw(
941                     t1,
942                     flagsp,
943                     data + 3 * l_w,
944                     poshalf,
945                     3U);
946             }
947         }
948     }
949     if (k < t1->h) {
950         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
951             for (j = 0; j < t1->h - k; ++j) {
952                 opj_t1_dec_refpass_step_raw(
953                     t1,
954                     flagsp,
955                     data + j * l_w,
956                     poshalf,
957                     j);
958             }
959         }
960     }
961 }
962
963 #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
964 { \
965         OPJ_INT32 one, poshalf; \
966         OPJ_UINT32 i, j, k; \
967         register OPJ_INT32 *data = t1->data; \
968         register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
969         const OPJ_UINT32 l_w = w; \
970         opj_mqc_t* mqc = &(t1->mqc); \
971         DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
972         register OPJ_UINT32 v; \
973         one = 1 << bpno; \
974         poshalf = one >> 1; \
975         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
976                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
977                         opj_flag_t flags = *flagsp; \
978                         if( flags != 0 ) { \
979                             opj_t1_dec_refpass_step_mqc_macro( \
980                                 flags, data, l_w, 0, \
981                                 mqc, curctx, v, a, c, ct, poshalf); \
982                             opj_t1_dec_refpass_step_mqc_macro( \
983                                 flags, data, l_w, 1, \
984                                 mqc, curctx, v, a, c, ct, poshalf); \
985                             opj_t1_dec_refpass_step_mqc_macro( \
986                                 flags, data, l_w, 2, \
987                                 mqc, curctx, v, a, c, ct, poshalf); \
988                             opj_t1_dec_refpass_step_mqc_macro( \
989                                 flags, data, l_w, 3, \
990                                 mqc, curctx, v, a, c, ct, poshalf); \
991                             *flagsp = flags; \
992                         } \
993                 } \
994         } \
995         UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
996         if( k < h ) { \
997             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
998                 for (j = 0; j < h - k; ++j) { \
999                         opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
1000                 } \
1001             } \
1002         } \
1003 }
1004
1005 static void opj_t1_dec_refpass_mqc_64x64(
1006     opj_t1_t *t1,
1007     OPJ_INT32 bpno)
1008 {
1009     opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
1010 }
1011
1012 static void opj_t1_dec_refpass_mqc_generic(
1013     opj_t1_t *t1,
1014     OPJ_INT32 bpno)
1015 {
1016     opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
1017 }
1018
1019 static void opj_t1_dec_refpass_mqc(
1020     opj_t1_t *t1,
1021     OPJ_INT32 bpno)
1022 {
1023     if (t1->w == 64 && t1->h == 64) {
1024         opj_t1_dec_refpass_mqc_64x64(t1, bpno);
1025     } else {
1026         opj_t1_dec_refpass_mqc_generic(t1, bpno);
1027     }
1028 }
1029
1030 /**
1031 Encode clean-up pass step
1032 */
1033 static void opj_t1_enc_clnpass_step(
1034     opj_t1_t *t1,
1035     opj_flag_t *flagsp,
1036     OPJ_INT32 *datap,
1037     OPJ_INT32 bpno,
1038     OPJ_INT32 one,
1039     OPJ_INT32 *nmsedec,
1040     OPJ_UINT32 agg,
1041     OPJ_UINT32 runlen,
1042     OPJ_UINT32 lim,
1043     OPJ_UINT32 cblksty)
1044 {
1045     OPJ_UINT32 v;
1046     OPJ_UINT32 ci;
1047     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1048
1049     const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 |
1050                               T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1051
1052     if ((*flagsp & check) == check) {
1053         if (runlen == 0) {
1054             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1055         } else if (runlen == 1) {
1056             *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3);
1057         } else if (runlen == 2) {
1058             *flagsp &= ~(T1_PI_2 | T1_PI_3);
1059         } else if (runlen == 3) {
1060             *flagsp &= ~(T1_PI_3);
1061         }
1062         return;
1063     }
1064
1065     for (ci = runlen; ci < lim; ++ci) {
1066         OPJ_UINT32 vsc;
1067         opj_flag_t flags;
1068         OPJ_UINT32 ctxt1;
1069
1070         flags = *flagsp;
1071
1072         if ((agg != 0) && (ci == runlen)) {
1073             goto LABEL_PARTIAL;
1074         }
1075
1076         if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
1077             ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
1078 #ifdef DEBUG_ENC_CLN
1079             printf("   ctxt1=%d\n", ctxt1);
1080 #endif
1081             opj_mqc_setcurctx(mqc, ctxt1);
1082             v = (opj_int_abs(*datap) & one) ? 1 : 0;
1083             opj_mqc_encode(mqc, v);
1084             if (v) {
1085                 OPJ_UINT32 ctxt2, spb;
1086                 OPJ_UINT32 lu;
1087 LABEL_PARTIAL:
1088                 lu = opj_t1_getctxtno_sc_or_spb_index(
1089                          *flagsp,
1090                          flagsp[-1], flagsp[1],
1091                          ci);
1092                 *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
1093                                                   (OPJ_UINT32)bpno);
1094                 ctxt2 = opj_t1_getctxno_sc(lu);
1095 #ifdef DEBUG_ENC_CLN
1096                 printf("   ctxt2=%d\n", ctxt2);
1097 #endif
1098                 opj_mqc_setcurctx(mqc, ctxt2);
1099
1100                 v = *datap < 0 ? 1U : 0U;
1101                 spb = opj_t1_getspb(lu);
1102 #ifdef DEBUG_ENC_CLN
1103                 printf("   spb=%d\n", spb);
1104 #endif
1105                 opj_mqc_encode(mqc, v ^ spb);
1106                 vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0;
1107                 opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc);
1108             }
1109         }
1110         *flagsp &= ~(T1_PI_THIS << (3U * ci));
1111         datap += t1->data_stride;
1112     }
1113 }
1114
1115 #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
1116                                       flags, flagsp, flags_stride, data, \
1117                                       data_stride, ci, mqc, curctx, \
1118                                       v, a, c, ct, oneplushalf, vsc) \
1119 { \
1120     if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
1121         do { \
1122             if( !partial ) { \
1123                 OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
1124                 opj_t1_setcurctx(curctx, ctxt1); \
1125                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1126                 if( !v ) \
1127                     break; \
1128             } \
1129             { \
1130                 OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1131                                     flags, flagsp[-1], flagsp[1], \
1132                                     ci); \
1133                 opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
1134                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1135                 v = v ^ opj_t1_getspb(lu); \
1136                 data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
1137                 opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
1138             } \
1139         } while(0); \
1140     } \
1141 }
1142
1143 static void opj_t1_dec_clnpass_step(
1144     opj_t1_t *t1,
1145     opj_flag_t *flagsp,
1146     OPJ_INT32 *datap,
1147     OPJ_INT32 oneplushalf,
1148     OPJ_UINT32 ci,
1149     OPJ_UINT32 vsc)
1150 {
1151     OPJ_UINT32 v;
1152
1153     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1154     opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
1155                                   *flagsp, flagsp, t1->w + 2U, datap,
1156                                   0, ci, mqc, mqc->curctx,
1157                                   v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
1158 }
1159
1160 static void opj_t1_enc_clnpass(
1161     opj_t1_t *t1,
1162     OPJ_INT32 bpno,
1163     OPJ_INT32 *nmsedec,
1164     OPJ_UINT32 cblksty)
1165 {
1166     OPJ_UINT32 i, k;
1167     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
1168     OPJ_UINT32 agg, runlen;
1169
1170     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1171
1172     *nmsedec = 0;
1173 #ifdef DEBUG_ENC_CLN
1174     printf("enc_clnpass: bpno=%d\n", bpno);
1175 #endif
1176     for (k = 0; k < (t1->h & ~3U); k += 4) {
1177 #ifdef DEBUG_ENC_CLN
1178         printf(" k=%d\n", k);
1179 #endif
1180         for (i = 0; i < t1->w; ++i) {
1181 #ifdef DEBUG_ENC_CLN
1182             printf("  i=%d\n", i);
1183 #endif
1184             agg = !(T1_FLAGS(i, k));
1185 #ifdef DEBUG_ENC_CLN
1186             printf("   agg=%d\n", agg);
1187 #endif
1188             if (agg) {
1189                 for (runlen = 0; runlen < 4; ++runlen) {
1190                     if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) {
1191                         break;
1192                     }
1193                 }
1194                 opj_mqc_setcurctx(mqc, T1_CTXNO_AGG);
1195                 opj_mqc_encode(mqc, runlen != 4);
1196                 if (runlen == 4) {
1197                     continue;
1198                 }
1199                 opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1200                 opj_mqc_encode(mqc, runlen >> 1);
1201                 opj_mqc_encode(mqc, runlen & 1);
1202             } else {
1203                 runlen = 0;
1204             }
1205             opj_t1_enc_clnpass_step(
1206                 t1,
1207                 &T1_FLAGS(i, k),
1208                 &t1->data[((k + runlen) * t1->data_stride) + i],
1209                 bpno,
1210                 one,
1211                 nmsedec,
1212                 agg,
1213                 runlen,
1214                 4U,
1215                 cblksty);
1216         }
1217     }
1218     if (k < t1->h) {
1219         agg = 0;
1220         runlen = 0;
1221 #ifdef DEBUG_ENC_CLN
1222         printf(" k=%d\n", k);
1223 #endif
1224         for (i = 0; i < t1->w; ++i) {
1225 #ifdef DEBUG_ENC_CLN
1226             printf("  i=%d\n", i);
1227             printf("   agg=%d\n", agg);
1228 #endif
1229             opj_t1_enc_clnpass_step(
1230                 t1,
1231                 &T1_FLAGS(i, k),
1232                 &t1->data[((k + runlen) * t1->data_stride) + i],
1233                 bpno,
1234                 one,
1235                 nmsedec,
1236                 agg,
1237                 runlen,
1238                 t1->h - k,
1239                 cblksty);
1240         }
1241     }
1242 }
1243
1244 #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
1245 { \
1246     OPJ_INT32 one, half, oneplushalf; \
1247     OPJ_UINT32 runlen; \
1248     OPJ_UINT32 i, j, k; \
1249     const OPJ_UINT32 l_w = w; \
1250     opj_mqc_t* mqc = &(t1->mqc); \
1251     register OPJ_INT32 *data = t1->data; \
1252     register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
1253     DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
1254     register OPJ_UINT32 v; \
1255     one = 1 << bpno; \
1256     half = one >> 1; \
1257     oneplushalf = one | half; \
1258     for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
1259         for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1260             opj_flag_t flags = *flagsp; \
1261             if (flags == 0) { \
1262                 OPJ_UINT32 partial = OPJ_TRUE; \
1263                 opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
1264                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1265                 if (!v) { \
1266                     continue; \
1267                 } \
1268                 opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
1269                 opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
1270                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1271                 runlen = (runlen << 1) | v; \
1272                 switch(runlen) { \
1273                     case 0: \
1274                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
1275                                             flags, flagsp, flags_stride, data, \
1276                                             l_w, 0, mqc, curctx, \
1277                                             v, a, c, ct, oneplushalf, vsc); \
1278                         partial = OPJ_FALSE; \
1279                         /* FALLTHRU */ \
1280                     case 1: \
1281                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1282                                             flags, flagsp, flags_stride, data, \
1283                                             l_w, 1, mqc, curctx, \
1284                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1285                         partial = OPJ_FALSE; \
1286                         /* FALLTHRU */ \
1287                     case 2: \
1288                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1289                                             flags, flagsp, flags_stride, data, \
1290                                             l_w, 2, mqc, curctx, \
1291                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1292                         partial = OPJ_FALSE; \
1293                         /* FALLTHRU */ \
1294                     case 3: \
1295                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1296                                             flags, flagsp, flags_stride, data, \
1297                                             l_w, 3, mqc, curctx, \
1298                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1299                         break; \
1300                 } \
1301             } else { \
1302                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1303                                     flags, flagsp, flags_stride, data, \
1304                                     l_w, 0, mqc, curctx, \
1305                                     v, a, c, ct, oneplushalf, vsc); \
1306                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1307                                     flags, flagsp, flags_stride, data, \
1308                                     l_w, 1, mqc, curctx, \
1309                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1310                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1311                                     flags, flagsp, flags_stride, data, \
1312                                     l_w, 2, mqc, curctx, \
1313                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1314                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1315                                     flags, flagsp, flags_stride, data, \
1316                                     l_w, 3, mqc, curctx, \
1317                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1318             } \
1319             *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1320         } \
1321     } \
1322     UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
1323     if( k < h ) { \
1324         for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
1325             for (j = 0; j < h - k; ++j) { \
1326                 opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
1327             } \
1328             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1329         } \
1330     } \
1331 }
1332
1333 static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
1334 {
1335     if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
1336         opj_mqc_t* mqc = &(t1->mqc);
1337         OPJ_UINT32 v, v2;
1338         opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1339         opj_mqc_decode(v, mqc);
1340         opj_mqc_decode(v2, mqc);
1341         v = (v << 1) | v2;
1342         opj_mqc_decode(v2, mqc);
1343         v = (v << 1) | v2;
1344         opj_mqc_decode(v2, mqc);
1345         v = (v << 1) | v2;
1346         /*
1347         if (v!=0xa) {
1348             opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
1349         }
1350         */
1351     }
1352 }
1353
1354 static void opj_t1_dec_clnpass_64x64_novsc(
1355     opj_t1_t *t1,
1356     OPJ_INT32 bpno)
1357 {
1358     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
1359 }
1360
1361 static void opj_t1_dec_clnpass_64x64_vsc(
1362     opj_t1_t *t1,
1363     OPJ_INT32 bpno)
1364 {
1365     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
1366 }
1367
1368 static void opj_t1_dec_clnpass_generic_novsc(
1369     opj_t1_t *t1,
1370     OPJ_INT32 bpno)
1371 {
1372     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
1373                                 t1->w + 2U);
1374 }
1375
1376 static void opj_t1_dec_clnpass_generic_vsc(
1377     opj_t1_t *t1,
1378     OPJ_INT32 bpno)
1379 {
1380     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
1381                                 t1->w + 2U);
1382 }
1383
1384 static void opj_t1_dec_clnpass(
1385     opj_t1_t *t1,
1386     OPJ_INT32 bpno,
1387     OPJ_INT32 cblksty)
1388 {
1389     if (t1->w == 64 && t1->h == 64) {
1390         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1391             opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
1392         } else {
1393             opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
1394         }
1395     } else {
1396         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1397             opj_t1_dec_clnpass_generic_vsc(t1, bpno);
1398         } else {
1399             opj_t1_dec_clnpass_generic_novsc(t1, bpno);
1400         }
1401     }
1402     opj_t1_dec_clnpass_check_segsym(t1, cblksty);
1403 }
1404
1405
1406 /** mod fixed_quality */
1407 static OPJ_FLOAT64 opj_t1_getwmsedec(
1408     OPJ_INT32 nmsedec,
1409     OPJ_UINT32 compno,
1410     OPJ_UINT32 level,
1411     OPJ_UINT32 orient,
1412     OPJ_INT32 bpno,
1413     OPJ_UINT32 qmfbid,
1414     OPJ_FLOAT64 stepsize,
1415     OPJ_UINT32 numcomps,
1416     const OPJ_FLOAT64 * mct_norms,
1417     OPJ_UINT32 mct_numcomps)
1418 {
1419     OPJ_FLOAT64 w1 = 1, w2, wmsedec;
1420     OPJ_ARG_NOT_USED(numcomps);
1421
1422     if (mct_norms && (compno < mct_numcomps)) {
1423         w1 = mct_norms[compno];
1424     }
1425
1426     if (qmfbid == 1) {
1427         w2 = opj_dwt_getnorm(level, orient);
1428     } else {    /* if (qmfbid == 0) */
1429         w2 = opj_dwt_getnorm_real(level, orient);
1430     }
1431
1432     wmsedec = w1 * w2 * stepsize * (1 << bpno);
1433     wmsedec *= wmsedec * nmsedec / 8192.0;
1434
1435     return wmsedec;
1436 }
1437
1438 static OPJ_BOOL opj_t1_allocate_buffers(
1439     opj_t1_t *t1,
1440     OPJ_UINT32 w,
1441     OPJ_UINT32 h)
1442 {
1443     OPJ_UINT32 flagssize;
1444     OPJ_UINT32 flags_stride;
1445
1446     /* No risk of overflow. Prior checks ensure those assert are met */
1447     /* They are per the specification */
1448     assert(w <= 1024);
1449     assert(h <= 1024);
1450     assert(w * h <= 4096);
1451
1452     /* encoder uses tile buffer, so no need to allocate */
1453     if (!t1->encoder) {
1454         OPJ_UINT32 datasize = w * h;
1455
1456         if (datasize > t1->datasize) {
1457             opj_aligned_free(t1->data);
1458             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
1459             if (!t1->data) {
1460                 /* FIXME event manager error callback */
1461                 return OPJ_FALSE;
1462             }
1463             t1->datasize = datasize;
1464         }
1465         /* memset first arg is declared to never be null by gcc */
1466         if (t1->data != NULL) {
1467             memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
1468         }
1469     }
1470
1471     flags_stride = w + 2U; /* can't be 0U */
1472
1473     flagssize = (h + 3U) / 4U + 2U;
1474
1475     flagssize *= flags_stride;
1476     {
1477         opj_flag_t* p;
1478         OPJ_UINT32 x;
1479         OPJ_UINT32 flags_height = (h + 3U) / 4U;
1480
1481         if (flagssize > t1->flagssize) {
1482
1483             opj_aligned_free(t1->flags);
1484             t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
1485                             opj_flag_t));
1486             if (!t1->flags) {
1487                 /* FIXME event manager error callback */
1488                 return OPJ_FALSE;
1489             }
1490         }
1491         t1->flagssize = flagssize;
1492
1493         memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
1494
1495         p = &t1->flags[0];
1496         for (x = 0; x < flags_stride; ++x) {
1497             /* magic value to hopefully stop any passes being interested in this entry */
1498             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1499         }
1500
1501         p = &t1->flags[((flags_height + 1) * flags_stride)];
1502         for (x = 0; x < flags_stride; ++x) {
1503             /* magic value to hopefully stop any passes being interested in this entry */
1504             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1505         }
1506
1507         if (h % 4) {
1508             OPJ_UINT32 v = 0;
1509             p = &t1->flags[((flags_height) * flags_stride)];
1510             if (h % 4 == 1) {
1511                 v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
1512             } else if (h % 4 == 2) {
1513                 v |= T1_PI_2 | T1_PI_3;
1514             } else if (h % 4 == 3) {
1515                 v |= T1_PI_3;
1516             }
1517             for (x = 0; x < flags_stride; ++x) {
1518                 *p++ = v;
1519             }
1520         }
1521     }
1522
1523     t1->w = w;
1524     t1->h = h;
1525
1526     return OPJ_TRUE;
1527 }
1528
1529 /* ----------------------------------------------------------------------- */
1530
1531 /* ----------------------------------------------------------------------- */
1532 /**
1533  * Creates a new Tier 1 handle
1534  * and initializes the look-up tables of the Tier-1 coder/decoder
1535  * @return a new T1 handle if successful, returns NULL otherwise
1536 */
1537 opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
1538 {
1539     opj_t1_t *l_t1 = 00;
1540
1541     l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
1542     if (!l_t1) {
1543         return 00;
1544     }
1545
1546     l_t1->encoder = isEncoder;
1547
1548     return l_t1;
1549 }
1550
1551
1552 /**
1553  * Destroys a previously created T1 handle
1554  *
1555  * @param p_t1 Tier 1 handle to destroy
1556 */
1557 void opj_t1_destroy(opj_t1_t *p_t1)
1558 {
1559     if (! p_t1) {
1560         return;
1561     }
1562
1563     /* encoder uses tile buffer, so no need to free */
1564     if (!p_t1->encoder && p_t1->data) {
1565         opj_aligned_free(p_t1->data);
1566         p_t1->data = 00;
1567     }
1568
1569     if (p_t1->flags) {
1570         opj_aligned_free(p_t1->flags);
1571         p_t1->flags = 00;
1572     }
1573
1574     opj_free(p_t1->cblkdatabuffer);
1575
1576     opj_free(p_t1);
1577 }
1578
1579 typedef struct {
1580     OPJ_BOOL whole_tile_decoding;
1581     OPJ_UINT32 resno;
1582     opj_tcd_cblk_dec_t* cblk;
1583     opj_tcd_band_t* band;
1584     opj_tcd_tilecomp_t* tilec;
1585     opj_tccp_t* tccp;
1586     OPJ_BOOL mustuse_cblkdatabuffer;
1587     volatile OPJ_BOOL* pret;
1588     opj_event_mgr_t *p_manager;
1589     opj_mutex_t* p_manager_mutex;
1590     OPJ_BOOL check_pterm;
1591 } opj_t1_cblk_decode_processing_job_t;
1592
1593 static void opj_t1_destroy_wrapper(void* t1)
1594 {
1595     opj_t1_destroy((opj_t1_t*) t1);
1596 }
1597
1598 static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
1599 {
1600     opj_tcd_cblk_dec_t* cblk;
1601     opj_tcd_band_t* band;
1602     opj_tcd_tilecomp_t* tilec;
1603     opj_tccp_t* tccp;
1604     OPJ_INT32* OPJ_RESTRICT datap;
1605     OPJ_UINT32 cblk_w, cblk_h;
1606     OPJ_INT32 x, y;
1607     OPJ_UINT32 i, j;
1608     opj_t1_cblk_decode_processing_job_t* job;
1609     opj_t1_t* t1;
1610     OPJ_UINT32 resno;
1611     OPJ_UINT32 tile_w;
1612
1613     job = (opj_t1_cblk_decode_processing_job_t*) user_data;
1614
1615     cblk = job->cblk;
1616
1617     if (!job->whole_tile_decoding) {
1618         cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1619         cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1620
1621         cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
1622                              cblk_w * cblk_h);
1623         if (cblk->decoded_data == NULL) {
1624             if (job->p_manager_mutex) {
1625                 opj_mutex_lock(job->p_manager_mutex);
1626             }
1627             opj_event_msg(job->p_manager, EVT_ERROR,
1628                           "Cannot allocate cblk->decoded_data\n");
1629             if (job->p_manager_mutex) {
1630                 opj_mutex_unlock(job->p_manager_mutex);
1631             }
1632             *(job->pret) = OPJ_FALSE;
1633             opj_free(job);
1634             return;
1635         }
1636         /* Zero-init required */
1637         memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
1638     } else if (cblk->decoded_data) {
1639         /* Not sure if that code path can happen, but better be */
1640         /* safe than sorry */
1641         opj_aligned_free(cblk->decoded_data);
1642         cblk->decoded_data = NULL;
1643     }
1644
1645     resno = job->resno;
1646     band = job->band;
1647     tilec = job->tilec;
1648     tccp = job->tccp;
1649     tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
1650                           -
1651                           tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
1652
1653     if (!*(job->pret)) {
1654         opj_free(job);
1655         return;
1656     }
1657
1658     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
1659     if (t1 == NULL) {
1660         t1 = opj_t1_create(OPJ_FALSE);
1661         opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
1662     }
1663     t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
1664
1665     if (OPJ_FALSE == opj_t1_decode_cblk(
1666                 t1,
1667                 cblk,
1668                 band->bandno,
1669                 (OPJ_UINT32)tccp->roishift,
1670                 tccp->cblksty,
1671                 job->p_manager,
1672                 job->p_manager_mutex,
1673                 job->check_pterm)) {
1674         *(job->pret) = OPJ_FALSE;
1675         opj_free(job);
1676         return;
1677     }
1678
1679     x = cblk->x0 - band->x0;
1680     y = cblk->y0 - band->y0;
1681     if (band->bandno & 1) {
1682         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1683         x += pres->x1 - pres->x0;
1684     }
1685     if (band->bandno & 2) {
1686         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1687         y += pres->y1 - pres->y0;
1688     }
1689
1690     datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
1691     cblk_w = t1->w;
1692     cblk_h = t1->h;
1693
1694     if (tccp->roishift) {
1695         if (tccp->roishift >= 31) {
1696             for (j = 0; j < cblk_h; ++j) {
1697                 for (i = 0; i < cblk_w; ++i) {
1698                     datap[(j * cblk_w) + i] = 0;
1699                 }
1700             }
1701         } else {
1702             OPJ_INT32 thresh = 1 << tccp->roishift;
1703             for (j = 0; j < cblk_h; ++j) {
1704                 for (i = 0; i < cblk_w; ++i) {
1705                     OPJ_INT32 val = datap[(j * cblk_w) + i];
1706                     OPJ_INT32 mag = abs(val);
1707                     if (mag >= thresh) {
1708                         mag >>= tccp->roishift;
1709                         datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
1710                     }
1711                 }
1712             }
1713         }
1714     }
1715
1716     /* Both can be non NULL if for example decoding a full tile and then */
1717     /* partially a tile. In which case partial decoding should be the */
1718     /* priority */
1719     assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
1720
1721     if (cblk->decoded_data) {
1722         OPJ_UINT32 cblk_size = cblk_w * cblk_h;
1723         if (tccp->qmfbid == 1) {
1724             for (i = 0; i < cblk_size; ++i) {
1725                 datap[i] /= 2;
1726             }
1727         } else {        /* if (tccp->qmfbid == 0) */
1728             const float stepsize = 0.5f * band->stepsize;
1729             i = 0;
1730 #ifdef __SSE2__
1731             {
1732                 const __m128 xmm_stepsize = _mm_set1_ps(stepsize);
1733                 for (; i < (cblk_size & ~15U); i += 16) {
1734                     __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1735                                                            datap + 0)));
1736                     __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1737                                                            datap + 4)));
1738                     __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1739                                                            datap + 8)));
1740                     __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1741                                                            datap + 12)));
1742                     _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
1743                     _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
1744                     _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
1745                     _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
1746                     datap += 16;
1747                 }
1748             }
1749 #endif
1750             for (; i < cblk_size; ++i) {
1751                 OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize;
1752                 memcpy(datap, &tmp, sizeof(tmp));
1753                 datap++;
1754             }
1755         }
1756     } else if (tccp->qmfbid == 1) {
1757         OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
1758                                                        (OPJ_SIZE_T)x];
1759         for (j = 0; j < cblk_h; ++j) {
1760             i = 0;
1761             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
1762                 OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
1763                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
1764                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
1765                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
1766                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
1767                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
1768                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
1769                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
1770             }
1771             for (; i < cblk_w; ++i) {
1772                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
1773                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
1774             }
1775         }
1776     } else {        /* if (tccp->qmfbid == 0) */
1777         const float stepsize = 0.5f * band->stepsize;
1778         OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
1779                                                          tile_w + (OPJ_SIZE_T)x];
1780         for (j = 0; j < cblk_h; ++j) {
1781             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
1782             for (i = 0; i < cblk_w; ++i) {
1783                 OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize;
1784                 *tiledp2 = tmp;
1785                 datap++;
1786                 tiledp2++;
1787             }
1788             tiledp += tile_w;
1789         }
1790     }
1791
1792     opj_free(job);
1793 }
1794
1795
1796 void opj_t1_decode_cblks(opj_tcd_t* tcd,
1797                          volatile OPJ_BOOL* pret,
1798                          opj_tcd_tilecomp_t* tilec,
1799                          opj_tccp_t* tccp,
1800                          opj_event_mgr_t *p_manager,
1801                          opj_mutex_t* p_manager_mutex,
1802                          OPJ_BOOL check_pterm
1803                         )
1804 {
1805     opj_thread_pool_t* tp = tcd->thread_pool;
1806     OPJ_UINT32 resno, bandno, precno, cblkno;
1807
1808 #ifdef DEBUG_VERBOSE
1809     OPJ_UINT32 codeblocks_decoded = 0;
1810     printf("Enter opj_t1_decode_cblks()\n");
1811 #endif
1812
1813     for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
1814         opj_tcd_resolution_t* res = &tilec->resolutions[resno];
1815
1816         for (bandno = 0; bandno < res->numbands; ++bandno) {
1817             opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
1818
1819             for (precno = 0; precno < res->pw * res->ph; ++precno) {
1820                 opj_tcd_precinct_t* precinct = &band->precincts[precno];
1821
1822                 if (!opj_tcd_is_subband_area_of_interest(tcd,
1823                         tilec->compno,
1824                         resno,
1825                         band->bandno,
1826                         (OPJ_UINT32)precinct->x0,
1827                         (OPJ_UINT32)precinct->y0,
1828                         (OPJ_UINT32)precinct->x1,
1829                         (OPJ_UINT32)precinct->y1)) {
1830                     for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1831                         opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1832                         if (cblk->decoded_data) {
1833 #ifdef DEBUG_VERBOSE
1834                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1835                                    cblk->x0, cblk->y0, resno, bandno);
1836 #endif
1837                             opj_aligned_free(cblk->decoded_data);
1838                             cblk->decoded_data = NULL;
1839                         }
1840                     }
1841                     continue;
1842                 }
1843
1844                 for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1845                     opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1846                     opj_t1_cblk_decode_processing_job_t* job;
1847
1848                     if (!opj_tcd_is_subband_area_of_interest(tcd,
1849                             tilec->compno,
1850                             resno,
1851                             band->bandno,
1852                             (OPJ_UINT32)cblk->x0,
1853                             (OPJ_UINT32)cblk->y0,
1854                             (OPJ_UINT32)cblk->x1,
1855                             (OPJ_UINT32)cblk->y1)) {
1856                         if (cblk->decoded_data) {
1857 #ifdef DEBUG_VERBOSE
1858                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1859                                    cblk->x0, cblk->y0, resno, bandno);
1860 #endif
1861                             opj_aligned_free(cblk->decoded_data);
1862                             cblk->decoded_data = NULL;
1863                         }
1864                         continue;
1865                     }
1866
1867                     if (!tcd->whole_tile_decoding) {
1868                         OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1869                         OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1870                         if (cblk->decoded_data != NULL) {
1871 #ifdef DEBUG_VERBOSE
1872                             printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
1873                                    cblk->x0, cblk->y0, resno, bandno);
1874 #endif
1875                             continue;
1876                         }
1877                         if (cblk_w == 0 || cblk_h == 0) {
1878                             continue;
1879                         }
1880 #ifdef DEBUG_VERBOSE
1881                         printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
1882                                cblk->x0, cblk->y0, resno, bandno);
1883 #endif
1884                     }
1885
1886                     job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
1887                             sizeof(opj_t1_cblk_decode_processing_job_t));
1888                     if (!job) {
1889                         *pret = OPJ_FALSE;
1890                         return;
1891                     }
1892                     job->whole_tile_decoding = tcd->whole_tile_decoding;
1893                     job->resno = resno;
1894                     job->cblk = cblk;
1895                     job->band = band;
1896                     job->tilec = tilec;
1897                     job->tccp = tccp;
1898                     job->pret = pret;
1899                     job->p_manager_mutex = p_manager_mutex;
1900                     job->p_manager = p_manager;
1901                     job->check_pterm = check_pterm;
1902                     job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
1903                     opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
1904 #ifdef DEBUG_VERBOSE
1905                     codeblocks_decoded ++;
1906 #endif
1907                     if (!(*pret)) {
1908                         return;
1909                     }
1910                 } /* cblkno */
1911             } /* precno */
1912         } /* bandno */
1913     } /* resno */
1914
1915 #ifdef DEBUG_VERBOSE
1916     printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
1917 #endif
1918     return;
1919 }
1920
1921
1922 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
1923                                    opj_tcd_cblk_dec_t* cblk,
1924                                    OPJ_UINT32 orient,
1925                                    OPJ_UINT32 roishift,
1926                                    OPJ_UINT32 cblksty,
1927                                    opj_event_mgr_t *p_manager,
1928                                    opj_mutex_t* p_manager_mutex,
1929                                    OPJ_BOOL check_pterm)
1930 {
1931     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1932
1933     OPJ_INT32 bpno_plus_one;
1934     OPJ_UINT32 passtype;
1935     OPJ_UINT32 segno, passno;
1936     OPJ_BYTE* cblkdata = NULL;
1937     OPJ_UINT32 cblkdataindex = 0;
1938     OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
1939     OPJ_INT32* original_t1_data = NULL;
1940
1941     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
1942
1943     if (!opj_t1_allocate_buffers(
1944                 t1,
1945                 (OPJ_UINT32)(cblk->x1 - cblk->x0),
1946                 (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
1947         return OPJ_FALSE;
1948     }
1949
1950     bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
1951     if (bpno_plus_one >= 31) {
1952         if (p_manager_mutex) {
1953             opj_mutex_lock(p_manager_mutex);
1954         }
1955         opj_event_msg(p_manager, EVT_WARNING,
1956                       "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
1957                       bpno_plus_one);
1958         if (p_manager_mutex) {
1959             opj_mutex_unlock(p_manager_mutex);
1960         }
1961         return OPJ_FALSE;
1962     }
1963     passtype = 2;
1964
1965     opj_mqc_resetstates(mqc);
1966     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
1967     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
1968     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
1969
1970     /* Even if we have a single chunk, in multi-threaded decoding */
1971     /* the insertion of our synthetic marker might potentially override */
1972     /* valid codestream of other codeblocks decoded in parallel. */
1973     if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
1974         OPJ_UINT32 i;
1975         OPJ_UINT32 cblk_len;
1976
1977         /* Compute whole codeblock length from chunk lengths */
1978         cblk_len = 0;
1979         for (i = 0; i < cblk->numchunks; i++) {
1980             cblk_len += cblk->chunks[i].len;
1981         }
1982
1983         /* Allocate temporary memory if needed */
1984         if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
1985             cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
1986                                               cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
1987             if (cblkdata == NULL) {
1988                 return OPJ_FALSE;
1989             }
1990             t1->cblkdatabuffer = cblkdata;
1991             memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
1992             t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
1993         }
1994
1995         /* Concatenate all chunks */
1996         cblkdata = t1->cblkdatabuffer;
1997         cblk_len = 0;
1998         for (i = 0; i < cblk->numchunks; i++) {
1999             memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
2000             cblk_len += cblk->chunks[i].len;
2001         }
2002     } else if (cblk->numchunks == 1) {
2003         cblkdata = cblk->chunks[0].data;
2004     } else {
2005         /* Not sure if that can happen in practice, but avoid Coverity to */
2006         /* think we will dereference a null cblkdta pointer */
2007         return OPJ_TRUE;
2008     }
2009
2010     /* For subtile decoding, directly decode in the decoded_data buffer of */
2011     /* the code-block. Hack t1->data to point to it, and restore it later */
2012     if (cblk->decoded_data) {
2013         original_t1_data = t1->data;
2014         t1->data = cblk->decoded_data;
2015     }
2016
2017     for (segno = 0; segno < cblk->real_num_segs; ++segno) {
2018         opj_tcd_seg_t *seg = &cblk->segs[segno];
2019
2020         /* BYPASS mode */
2021         type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
2022                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2023
2024         if (type == T1_TYPE_RAW) {
2025             opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2026                                  OPJ_COMMON_CBLK_DATA_EXTRA);
2027         } else {
2028             opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2029                              OPJ_COMMON_CBLK_DATA_EXTRA);
2030         }
2031         cblkdataindex += seg->len;
2032
2033         for (passno = 0; (passno < seg->real_num_passes) &&
2034                 (bpno_plus_one >= 1); ++passno) {
2035             switch (passtype) {
2036             case 0:
2037                 if (type == T1_TYPE_RAW) {
2038                     opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2039                 } else {
2040                     opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2041                 }
2042                 break;
2043             case 1:
2044                 if (type == T1_TYPE_RAW) {
2045                     opj_t1_dec_refpass_raw(t1, bpno_plus_one);
2046                 } else {
2047                     opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
2048                 }
2049                 break;
2050             case 2:
2051                 opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2052                 break;
2053             }
2054
2055             if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
2056                 opj_mqc_resetstates(mqc);
2057                 opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2058                 opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2059                 opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2060             }
2061             if (++passtype == 3) {
2062                 passtype = 0;
2063                 bpno_plus_one--;
2064             }
2065         }
2066
2067         opq_mqc_finish_dec(mqc);
2068     }
2069
2070     if (check_pterm) {
2071         if (mqc->bp + 2 < mqc->end) {
2072             if (p_manager_mutex) {
2073                 opj_mutex_lock(p_manager_mutex);
2074             }
2075             opj_event_msg(p_manager, EVT_WARNING,
2076                           "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
2077                           (int)(mqc->end - mqc->bp) - 2,
2078                           (int)(mqc->bp - mqc->start),
2079                           (int)(mqc->end - mqc->start));
2080             if (p_manager_mutex) {
2081                 opj_mutex_unlock(p_manager_mutex);
2082             }
2083         } else if (mqc->end_of_byte_stream_counter > 2) {
2084             if (p_manager_mutex) {
2085                 opj_mutex_lock(p_manager_mutex);
2086             }
2087             opj_event_msg(p_manager, EVT_WARNING,
2088                           "PTERM check failure: %d synthetized 0xFF markers read\n",
2089                           mqc->end_of_byte_stream_counter);
2090             if (p_manager_mutex) {
2091                 opj_mutex_unlock(p_manager_mutex);
2092             }
2093         }
2094     }
2095
2096     /* Restore original t1->data is needed */
2097     if (cblk->decoded_data) {
2098         t1->data = original_t1_data;
2099     }
2100
2101     return OPJ_TRUE;
2102 }
2103
2104
2105 typedef struct {
2106     OPJ_UINT32 compno;
2107     OPJ_UINT32 resno;
2108     opj_tcd_cblk_enc_t* cblk;
2109     opj_tcd_tile_t *tile;
2110     opj_tcd_band_t* band;
2111     opj_tcd_tilecomp_t* tilec;
2112     opj_tccp_t* tccp;
2113     const OPJ_FLOAT64 * mct_norms;
2114     OPJ_UINT32 mct_numcomps;
2115     volatile OPJ_BOOL* pret;
2116     opj_mutex_t* mutex;
2117 } opj_t1_cblk_encode_processing_job_t;
2118
2119 /** Procedure to deal with a asynchronous code-block encoding job.
2120  *
2121  * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure
2122  * @param tls       TLS handle.
2123  */
2124 static void opj_t1_clbl_encode_processor(void* user_data, opj_tls_t* tls)
2125 {
2126     opj_t1_cblk_encode_processing_job_t* job =
2127         (opj_t1_cblk_encode_processing_job_t*)user_data;
2128     opj_tcd_cblk_enc_t* cblk = job->cblk;
2129     const opj_tcd_band_t* band = job->band;
2130     const opj_tcd_tilecomp_t* tilec = job->tilec;
2131     const opj_tccp_t* tccp = job->tccp;
2132     const OPJ_UINT32 resno = job->resno;
2133     opj_t1_t* t1;
2134     const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
2135
2136     OPJ_INT32* OPJ_RESTRICT tiledp;
2137     OPJ_UINT32 cblk_w;
2138     OPJ_UINT32 cblk_h;
2139     OPJ_UINT32 i, j, tileLineAdvance;
2140     OPJ_SIZE_T tileIndex = 0;
2141
2142     OPJ_INT32 x = cblk->x0 - band->x0;
2143     OPJ_INT32 y = cblk->y0 - band->y0;
2144
2145     if (!*(job->pret)) {
2146         opj_free(job);
2147         return;
2148     }
2149
2150     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
2151     if (t1 == NULL) {
2152         t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */
2153         opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
2154     }
2155
2156     if (band->bandno & 1) {
2157         opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2158         x += pres->x1 - pres->x0;
2159     }
2160     if (band->bandno & 2) {
2161         opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2162         y += pres->y1 - pres->y0;
2163     }
2164
2165     if (!opj_t1_allocate_buffers(
2166                 t1,
2167                 (OPJ_UINT32)(cblk->x1 - cblk->x0),
2168                 (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2169         *(job->pret) = OPJ_FALSE;
2170         opj_free(job);
2171         return;
2172     }
2173
2174     cblk_w = t1->w;
2175     cblk_h = t1->h;
2176     tileLineAdvance = tile_w - cblk_w;
2177
2178     tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
2179     t1->data = tiledp;
2180     t1->data_stride = tile_w;
2181     if (tccp->qmfbid == 1) {
2182         /* Do multiplication on unsigned type, even if the
2183             * underlying type is signed, to avoid potential
2184             * int overflow on large value (the output will be
2185             * incorrect in such situation, but whatever...)
2186             * This assumes complement-to-2 signed integer
2187             * representation
2188             * Fixes https://github.com/uclouvain/openjpeg/issues/1053
2189             */
2190         OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
2191         for (j = 0; j < cblk_h; ++j) {
2192             for (i = 0; i < cblk_w; ++i) {
2193                 tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS;
2194                 tileIndex++;
2195             }
2196             tileIndex += tileLineAdvance;
2197         }
2198     } else {        /* if (tccp->qmfbid == 0) */
2199         for (j = 0; j < cblk_h; ++j) {
2200             for (i = 0; i < cblk_w; ++i) {
2201                 OPJ_FLOAT32 tmp = ((OPJ_FLOAT32*)tiledp)[tileIndex];
2202                 tiledp[tileIndex] = (OPJ_INT32)opj_lrintf((tmp / band->stepsize) *
2203                                     (1 << T1_NMSEDEC_FRACBITS));
2204                 tileIndex++;
2205             }
2206             tileIndex += tileLineAdvance;
2207         }
2208     }
2209
2210     {
2211         OPJ_FLOAT64 cumwmsedec =
2212             opj_t1_encode_cblk(
2213                 t1,
2214                 cblk,
2215                 band->bandno,
2216                 job->compno,
2217                 tilec->numresolutions - 1 - resno,
2218                 tccp->qmfbid,
2219                 band->stepsize,
2220                 tccp->cblksty,
2221                 job->tile->numcomps,
2222                 job->mct_norms,
2223                 job->mct_numcomps);
2224         if (job->mutex) {
2225             opj_mutex_lock(job->mutex);
2226         }
2227         job->tile->distotile += cumwmsedec;
2228         if (job->mutex) {
2229             opj_mutex_unlock(job->mutex);
2230         }
2231     }
2232
2233     opj_free(job);
2234 }
2235
2236
2237 OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd,
2238                              opj_tcd_tile_t *tile,
2239                              opj_tcp_t *tcp,
2240                              const OPJ_FLOAT64 * mct_norms,
2241                              OPJ_UINT32 mct_numcomps
2242                             )
2243 {
2244     volatile OPJ_BOOL ret = OPJ_TRUE;
2245     opj_thread_pool_t* tp = tcd->thread_pool;
2246     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
2247     opj_mutex_t* mutex = opj_mutex_create();
2248
2249     tile->distotile = 0;        /* fixed_quality */
2250
2251     for (compno = 0; compno < tile->numcomps; ++compno) {
2252         opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
2253         opj_tccp_t* tccp = &tcp->tccps[compno];
2254
2255         for (resno = 0; resno < tilec->numresolutions; ++resno) {
2256             opj_tcd_resolution_t *res = &tilec->resolutions[resno];
2257
2258             for (bandno = 0; bandno < res->numbands; ++bandno) {
2259                 opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
2260
2261                 /* Skip empty bands */
2262                 if (opj_tcd_is_band_empty(band)) {
2263                     continue;
2264                 }
2265                 for (precno = 0; precno < res->pw * res->ph; ++precno) {
2266                     opj_tcd_precinct_t *prc = &band->precincts[precno];
2267
2268                     for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
2269                         opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
2270
2271                         opj_t1_cblk_encode_processing_job_t* job =
2272                             (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1,
2273                                     sizeof(opj_t1_cblk_encode_processing_job_t));
2274                         if (!job) {
2275                             ret = OPJ_FALSE;
2276                             goto end;
2277                         }
2278                         job->compno = compno;
2279                         job->tile = tile;
2280                         job->resno = resno;
2281                         job->cblk = cblk;
2282                         job->band = band;
2283                         job->tilec = tilec;
2284                         job->tccp = tccp;
2285                         job->mct_norms = mct_norms;
2286                         job->mct_numcomps = mct_numcomps;
2287                         job->pret = &ret;
2288                         job->mutex = mutex;
2289                         opj_thread_pool_submit_job(tp, opj_t1_clbl_encode_processor, job);
2290
2291                     } /* cblkno */
2292                 } /* precno */
2293             } /* bandno */
2294         } /* resno  */
2295     } /* compno  */
2296
2297 end:
2298     opj_thread_pool_wait_completion(tcd->thread_pool, 0);
2299     if (mutex) {
2300         opj_mutex_destroy(mutex);
2301     }
2302
2303     return ret;
2304 }
2305
2306 /* Returns whether the pass (bpno, passtype) is terminated */
2307 static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
2308                                    OPJ_UINT32 cblksty,
2309                                    OPJ_INT32 bpno,
2310                                    OPJ_UINT32 passtype)
2311 {
2312     /* Is it the last cleanup pass ? */
2313     if (passtype == 2 && bpno == 0) {
2314         return OPJ_TRUE;
2315     }
2316
2317     if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
2318         return OPJ_TRUE;
2319     }
2320
2321     if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
2322         /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
2323         if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
2324             return OPJ_TRUE;
2325         }
2326         /* and beyond terminate all the magnitude refinement passes (in raw) */
2327         /* and cleanup passes (in MQC) */
2328         if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
2329             return OPJ_TRUE;
2330         }
2331     }
2332
2333     return OPJ_FALSE;
2334 }
2335
2336
2337 /** mod fixed_quality */
2338 static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
2339                                       opj_tcd_cblk_enc_t* cblk,
2340                                       OPJ_UINT32 orient,
2341                                       OPJ_UINT32 compno,
2342                                       OPJ_UINT32 level,
2343                                       OPJ_UINT32 qmfbid,
2344                                       OPJ_FLOAT64 stepsize,
2345                                       OPJ_UINT32 cblksty,
2346                                       OPJ_UINT32 numcomps,
2347                                       const OPJ_FLOAT64 * mct_norms,
2348                                       OPJ_UINT32 mct_numcomps)
2349 {
2350     OPJ_FLOAT64 cumwmsedec = 0.0;
2351
2352     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2353
2354     OPJ_UINT32 passno;
2355     OPJ_INT32 bpno;
2356     OPJ_UINT32 passtype;
2357     OPJ_INT32 nmsedec = 0;
2358     OPJ_INT32 max;
2359     OPJ_UINT32 i, j;
2360     OPJ_BYTE type = T1_TYPE_MQ;
2361     OPJ_FLOAT64 tempwmsedec;
2362
2363 #ifdef EXTRA_DEBUG
2364     printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
2365            cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
2366 #endif
2367
2368     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2369
2370     max = 0;
2371     for (i = 0; i < t1->w; ++i) {
2372         for (j = 0; j < t1->h; ++j) {
2373             OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]);
2374             max = opj_int_max(max, tmp);
2375         }
2376     }
2377
2378     cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
2379                                       T1_NMSEDEC_FRACBITS) : 0;
2380     if (cblk->numbps == 0) {
2381         cblk->totalpasses = 0;
2382         return cumwmsedec;
2383     }
2384
2385     bpno = (OPJ_INT32)(cblk->numbps - 1);
2386     passtype = 2;
2387
2388     opj_mqc_resetstates(mqc);
2389     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2390     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2391     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2392     opj_mqc_init_enc(mqc, cblk->data);
2393
2394     for (passno = 0; bpno >= 0; ++passno) {
2395         opj_tcd_pass_t *pass = &cblk->passes[passno];
2396         type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
2397                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2398
2399         /* If the previous pass was terminating, we need to reset the encoder */
2400         if (passno > 0 && cblk->passes[passno - 1].term) {
2401             if (type == T1_TYPE_RAW) {
2402                 opj_mqc_bypass_init_enc(mqc);
2403             } else {
2404                 opj_mqc_restart_init_enc(mqc);
2405             }
2406         }
2407
2408         switch (passtype) {
2409         case 0:
2410             opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
2411             break;
2412         case 1:
2413             opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
2414             break;
2415         case 2:
2416             opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
2417             /* code switch SEGMARK (i.e. SEGSYM) */
2418             if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
2419                 opj_mqc_segmark_enc(mqc);
2420             }
2421             break;
2422         }
2423
2424         /* fixed_quality */
2425         tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
2426                                         stepsize, numcomps, mct_norms, mct_numcomps) ;
2427         cumwmsedec += tempwmsedec;
2428         pass->distortiondec = cumwmsedec;
2429
2430         if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
2431             /* If it is a terminated pass, terminate it */
2432             if (type == T1_TYPE_RAW) {
2433                 opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
2434             } else {
2435                 if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
2436                     opj_mqc_erterm_enc(mqc);
2437                 } else {
2438                     opj_mqc_flush(mqc);
2439                 }
2440             }
2441             pass->term = 1;
2442             pass->rate = opj_mqc_numbytes(mqc);
2443         } else {
2444             /* Non terminated pass */
2445             OPJ_UINT32 rate_extra_bytes;
2446             if (type == T1_TYPE_RAW) {
2447                 rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
2448                                        mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
2449             } else {
2450                 rate_extra_bytes = 3;
2451             }
2452             pass->term = 0;
2453             pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
2454         }
2455
2456         if (++passtype == 3) {
2457             passtype = 0;
2458             bpno--;
2459         }
2460
2461         /* Code-switch "RESET" */
2462         if (cblksty & J2K_CCP_CBLKSTY_RESET) {
2463             opj_mqc_reset_enc(mqc);
2464         }
2465     }
2466
2467     cblk->totalpasses = passno;
2468
2469     if (cblk->totalpasses) {
2470         /* Make sure that pass rates are increasing */
2471         OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
2472         for (passno = cblk->totalpasses; passno > 0;) {
2473             opj_tcd_pass_t *pass = &cblk->passes[--passno];
2474             if (pass->rate > last_pass_rate) {
2475                 pass->rate = last_pass_rate;
2476             } else {
2477                 last_pass_rate = pass->rate;
2478             }
2479         }
2480     }
2481
2482     for (passno = 0; passno < cblk->totalpasses; passno++) {
2483         opj_tcd_pass_t *pass = &cblk->passes[passno];
2484
2485         /* Prevent generation of FF as last data byte of a pass*/
2486         /* For terminating passes, the flushing procedure ensured this already */
2487         assert(pass->rate > 0);
2488         if (cblk->data[pass->rate - 1] == 0xFF) {
2489             pass->rate--;
2490         }
2491         pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
2492     }
2493
2494 #ifdef EXTRA_DEBUG
2495     printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
2496
2497     /* Check that there not 0xff >=0x90 sequences */
2498     if (cblk->totalpasses) {
2499         OPJ_UINT32 i;
2500         OPJ_UINT32 len = opj_mqc_numbytes(mqc);
2501         for (i = 1; i < len; ++i) {
2502             if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
2503                 printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
2504                 abort();
2505             }
2506         }
2507     }
2508 #endif
2509
2510     return cumwmsedec;
2511 }