Merge pull request #1258 from sebras/fix-issue-1257
[openjpeg.git] / src / lib / openjp2 / t1.c
1 /*
2  * The copyright in this software is being made available under the 2-clauses
3  * BSD License, included below. This software may be subject to other third
4  * party and contributor rights, including patent rights, and no such rights
5  * are granted under this license.
6  *
7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8  * Copyright (c) 2002-2014, Professor Benoit Macq
9  * Copyright (c) 2001-2003, David Janssens
10  * Copyright (c) 2002-2003, Yannick Verschueren
11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
12  * Copyright (c) 2003-2014, Antonin Descampe
13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
14  * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
15  * Copyright (c) 2012, Carl Hetherington
16  * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
17  * All rights reserved.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions
21  * are met:
22  * 1. Redistributions of source code must retain the above copyright
23  *    notice, this list of conditions and the following disclaimer.
24  * 2. Redistributions in binary form must reproduce the above copyright
25  *    notice, this list of conditions and the following disclaimer in the
26  *    documentation and/or other materials provided with the distribution.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40
41 #define OPJ_SKIP_POISON
42 #include "opj_includes.h"
43
44 #ifdef __SSE__
45 #include <xmmintrin.h>
46 #endif
47 #ifdef __SSE2__
48 #include <emmintrin.h>
49 #endif
50
51 #if defined(__GNUC__)
52 #pragma GCC poison malloc calloc realloc free
53 #endif
54
55 #include "t1_luts.h"
56
57 /** @defgroup T1 T1 - Implementation of the tier-1 coding */
58 /*@{*/
59
60 #define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
61
62 #define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
63
64 /** @name Local static functions */
65 /*@{*/
66
67 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
68 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
69 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
70 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
71 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
72                                        OPJ_UINT32 s, OPJ_UINT32 stride,
73                                        OPJ_UINT32 vsc);
74
75
76 /**
77 Decode significant pass
78 */
79
80 static INLINE void opj_t1_dec_sigpass_step_raw(
81     opj_t1_t *t1,
82     opj_flag_t *flagsp,
83     OPJ_INT32 *datap,
84     OPJ_INT32 oneplushalf,
85     OPJ_UINT32 vsc,
86     OPJ_UINT32 row);
87 static INLINE void opj_t1_dec_sigpass_step_mqc(
88     opj_t1_t *t1,
89     opj_flag_t *flagsp,
90     OPJ_INT32 *datap,
91     OPJ_INT32 oneplushalf,
92     OPJ_UINT32 row,
93     OPJ_UINT32 flags_stride,
94     OPJ_UINT32 vsc);
95
96 /**
97 Encode significant pass
98 */
99 static void opj_t1_enc_sigpass(opj_t1_t *t1,
100                                OPJ_INT32 bpno,
101                                OPJ_INT32 *nmsedec,
102                                OPJ_BYTE type,
103                                OPJ_UINT32 cblksty);
104
105 /**
106 Decode significant pass
107 */
108 static void opj_t1_dec_sigpass_raw(
109     opj_t1_t *t1,
110     OPJ_INT32 bpno,
111     OPJ_INT32 cblksty);
112
113 /**
114 Encode refinement pass
115 */
116 static void opj_t1_enc_refpass(opj_t1_t *t1,
117                                OPJ_INT32 bpno,
118                                OPJ_INT32 *nmsedec,
119                                OPJ_BYTE type);
120
121 /**
122 Decode refinement pass
123 */
124 static void opj_t1_dec_refpass_raw(
125     opj_t1_t *t1,
126     OPJ_INT32 bpno);
127
128
129 /**
130 Decode refinement pass
131 */
132
133 static INLINE void  opj_t1_dec_refpass_step_raw(
134     opj_t1_t *t1,
135     opj_flag_t *flagsp,
136     OPJ_INT32 *datap,
137     OPJ_INT32 poshalf,
138     OPJ_UINT32 row);
139 static INLINE void opj_t1_dec_refpass_step_mqc(
140     opj_t1_t *t1,
141     opj_flag_t *flagsp,
142     OPJ_INT32 *datap,
143     OPJ_INT32 poshalf,
144     OPJ_UINT32 row);
145
146
147 /**
148 Decode clean-up pass
149 */
150
151 static void opj_t1_dec_clnpass_step(
152     opj_t1_t *t1,
153     opj_flag_t *flagsp,
154     OPJ_INT32 *datap,
155     OPJ_INT32 oneplushalf,
156     OPJ_UINT32 row,
157     OPJ_UINT32 vsc);
158
159 /**
160 Encode clean-up pass
161 */
162 static void opj_t1_enc_clnpass(
163     opj_t1_t *t1,
164     OPJ_INT32 bpno,
165     OPJ_INT32 *nmsedec,
166     OPJ_UINT32 cblksty);
167
168 static OPJ_FLOAT64 opj_t1_getwmsedec(
169     OPJ_INT32 nmsedec,
170     OPJ_UINT32 compno,
171     OPJ_UINT32 level,
172     OPJ_UINT32 orient,
173     OPJ_INT32 bpno,
174     OPJ_UINT32 qmfbid,
175     OPJ_FLOAT64 stepsize,
176     OPJ_UINT32 numcomps,
177     const OPJ_FLOAT64 * mct_norms,
178     OPJ_UINT32 mct_numcomps);
179
180 static void opj_t1_encode_cblk(opj_t1_t *t1,
181                                opj_tcd_cblk_enc_t* cblk,
182                                OPJ_UINT32 orient,
183                                OPJ_UINT32 compno,
184                                OPJ_UINT32 level,
185                                OPJ_UINT32 qmfbid,
186                                OPJ_FLOAT64 stepsize,
187                                OPJ_UINT32 cblksty,
188                                OPJ_UINT32 numcomps,
189                                opj_tcd_tile_t * tile,
190                                const OPJ_FLOAT64 * mct_norms,
191                                OPJ_UINT32 mct_numcomps);
192
193 /**
194 Decode 1 code-block
195 @param t1 T1 handle
196 @param cblk Code-block coding parameters
197 @param orient
198 @param roishift Region of interest shifting value
199 @param cblksty Code-block style
200 @param p_manager the event manager
201 @param p_manager_mutex mutex for the event manager
202 @param check_pterm whether PTERM correct termination should be checked
203 */
204 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
205                                    opj_tcd_cblk_dec_t* cblk,
206                                    OPJ_UINT32 orient,
207                                    OPJ_UINT32 roishift,
208                                    OPJ_UINT32 cblksty,
209                                    opj_event_mgr_t *p_manager,
210                                    opj_mutex_t* p_manager_mutex,
211                                    OPJ_BOOL check_pterm);
212
213 static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
214                                         OPJ_UINT32 w,
215                                         OPJ_UINT32 h);
216
217 /*@}*/
218
219 /*@}*/
220
221 /* ----------------------------------------------------------------------- */
222
223 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
224 {
225     return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
226 }
227
228 static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
229         OPJ_UINT32 pfX,
230         OPJ_UINT32 nfX,
231         OPJ_UINT32 ci)
232 {
233     /*
234       0 pfX T1_CHI_THIS           T1_LUT_SGN_W
235       1 tfX T1_SIGMA_1            T1_LUT_SIG_N
236       2 nfX T1_CHI_THIS           T1_LUT_SGN_E
237       3 tfX T1_SIGMA_3            T1_LUT_SIG_W
238       4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
239       5 tfX T1_SIGMA_5            T1_LUT_SIG_E
240       6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
241       7 tfX T1_SIGMA_7            T1_LUT_SIG_S
242     */
243
244     OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
245                                          T1_SIGMA_7);
246
247     lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
248     lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
249     if (ci == 0U) {
250         lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
251     } else {
252         lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
253     }
254     lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
255     return lu;
256 }
257
258 static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
259 {
260     return lut_ctxno_sc[lu];
261 }
262
263 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
264 {
265     OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
266     OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
267     return tmp2;
268 }
269
270 static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
271 {
272     return lut_spb[lu];
273 }
274
275 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
276 {
277     if (bitpos > 0) {
278         return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
279     }
280
281     return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
282 }
283
284 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
285 {
286     if (bitpos > 0) {
287         return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
288     }
289
290     return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
291 }
292
293 #define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
294 { \
295     /* east */ \
296     flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
297  \
298     /* mark target as significant */ \
299     flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
300  \
301     /* west */ \
302     flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
303  \
304     /* north-west, north, north-east */ \
305     if (ci == 0U && !(vsc)) { \
306         opj_flag_t* north = flagsp - (stride); \
307         *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
308         north[-1] |= T1_SIGMA_17; \
309         north[1] |= T1_SIGMA_15; \
310     } \
311  \
312     /* south-west, south, south-east */ \
313     if (ci == 3U) { \
314         opj_flag_t* south = flagsp + (stride); \
315         *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
316         south[-1] |= T1_SIGMA_2; \
317         south[1] |= T1_SIGMA_0; \
318     } \
319 }
320
321
322 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
323                                        OPJ_UINT32 s, OPJ_UINT32 stride,
324                                        OPJ_UINT32 vsc)
325 {
326     opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
327 }
328
329 /**
330 Encode significant pass
331 */
332 static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
333         opj_flag_t *flagsp,
334         OPJ_INT32 *datap,
335         OPJ_INT32 bpno,
336         OPJ_INT32 one,
337         OPJ_INT32 *nmsedec,
338         OPJ_BYTE type,
339         OPJ_UINT32 ci,
340         OPJ_UINT32 vsc)
341 {
342     OPJ_UINT32 v;
343
344     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
345
346     OPJ_UINT32 const flags = *flagsp;
347
348     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
349             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
350         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
351         v = (opj_int_abs(*datap) & one) ? 1 : 0;
352 #ifdef DEBUG_ENC_SIG
353         fprintf(stderr, "   ctxt1=%d\n", ctxt1);
354 #endif
355         opj_mqc_setcurctx(mqc, ctxt1);
356         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
357             opj_mqc_bypass_enc(mqc, v);
358         } else {
359             opj_mqc_encode(mqc, v);
360         }
361         if (v) {
362             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
363                                 *flagsp,
364                                 flagsp[-1], flagsp[1],
365                                 ci);
366             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
367             v = *datap < 0 ? 1U : 0U;
368             *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
369                                               (OPJ_UINT32)bpno);
370 #ifdef DEBUG_ENC_SIG
371             fprintf(stderr, "   ctxt2=%d\n", ctxt2);
372 #endif
373             opj_mqc_setcurctx(mqc, ctxt2);
374             if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
375                 opj_mqc_bypass_enc(mqc, v);
376             } else {
377                 OPJ_UINT32 spb = opj_t1_getspb(lu);
378 #ifdef DEBUG_ENC_SIG
379                 fprintf(stderr, "   spb=%d\n", spb);
380 #endif
381                 opj_mqc_encode(mqc, v ^ spb);
382             }
383             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
384         }
385         *flagsp |= T1_PI_THIS << (ci * 3U);
386     }
387 }
388
389 static INLINE void opj_t1_dec_sigpass_step_raw(
390     opj_t1_t *t1,
391     opj_flag_t *flagsp,
392     OPJ_INT32 *datap,
393     OPJ_INT32 oneplushalf,
394     OPJ_UINT32 vsc,
395     OPJ_UINT32 ci)
396 {
397     OPJ_UINT32 v;
398     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
399
400     OPJ_UINT32 const flags = *flagsp;
401
402     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
403             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
404         if (opj_mqc_raw_decode(mqc)) {
405             v = opj_mqc_raw_decode(mqc);
406             *datap = v ? -oneplushalf : oneplushalf;
407             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
408         }
409         *flagsp |= T1_PI_THIS << (ci * 3U);
410     }
411 }
412
413 #define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
414                                           data_stride, ci, mqc, curctx, \
415                                           v, a, c, ct, oneplushalf, vsc) \
416 { \
417     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
418         (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
419         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
420         opj_t1_setcurctx(curctx, ctxt1); \
421         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
422         if (v) { \
423             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
424                                 flags, \
425                                 flagsp[-1], flagsp[1], \
426                                 ci); \
427             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
428             OPJ_UINT32 spb = opj_t1_getspb(lu); \
429             opj_t1_setcurctx(curctx, ctxt2); \
430             opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
431             v = v ^ spb; \
432             data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
433             opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
434         } \
435         flags |= T1_PI_THIS << (ci * 3U); \
436     } \
437 }
438
439 static INLINE void opj_t1_dec_sigpass_step_mqc(
440     opj_t1_t *t1,
441     opj_flag_t *flagsp,
442     OPJ_INT32 *datap,
443     OPJ_INT32 oneplushalf,
444     OPJ_UINT32 ci,
445     OPJ_UINT32 flags_stride,
446     OPJ_UINT32 vsc)
447 {
448     OPJ_UINT32 v;
449
450     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
451     opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
452                                       0, ci, mqc, mqc->curctx,
453                                       v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
454 }
455
456 static void opj_t1_enc_sigpass(opj_t1_t *t1,
457                                OPJ_INT32 bpno,
458                                OPJ_INT32 *nmsedec,
459                                OPJ_BYTE type,
460                                OPJ_UINT32 cblksty
461                               )
462 {
463     OPJ_UINT32 i, k;
464     OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
465     opj_flag_t* f = &T1_FLAGS(0, 0);
466     OPJ_UINT32 const extra = 2;
467
468     *nmsedec = 0;
469 #ifdef DEBUG_ENC_SIG
470     fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
471 #endif
472     for (k = 0; k < (t1->h & ~3U); k += 4) {
473 #ifdef DEBUG_ENC_SIG
474         fprintf(stderr, " k=%d\n", k);
475 #endif
476         for (i = 0; i < t1->w; ++i) {
477 #ifdef DEBUG_ENC_SIG
478             fprintf(stderr, " i=%d\n", i);
479 #endif
480             if (*f == 0U) {
481                 /* Nothing to do for any of the 4 data points */
482                 f++;
483                 continue;
484             }
485             opj_t1_enc_sigpass_step(
486                 t1,
487                 f,
488                 &t1->data[((k + 0) * t1->data_stride) + i],
489                 bpno,
490                 one,
491                 nmsedec,
492                 type,
493                 0, cblksty & J2K_CCP_CBLKSTY_VSC);
494             opj_t1_enc_sigpass_step(
495                 t1,
496                 f,
497                 &t1->data[((k + 1) * t1->data_stride) + i],
498                 bpno,
499                 one,
500                 nmsedec,
501                 type,
502                 1, 0);
503             opj_t1_enc_sigpass_step(
504                 t1,
505                 f,
506                 &t1->data[((k + 2) * t1->data_stride) + i],
507                 bpno,
508                 one,
509                 nmsedec,
510                 type,
511                 2, 0);
512             opj_t1_enc_sigpass_step(
513                 t1,
514                 f,
515                 &t1->data[((k + 3) * t1->data_stride) + i],
516                 bpno,
517                 one,
518                 nmsedec,
519                 type,
520                 3, 0);
521             ++f;
522         }
523         f += extra;
524     }
525
526     if (k < t1->h) {
527         OPJ_UINT32 j;
528 #ifdef DEBUG_ENC_SIG
529         fprintf(stderr, " k=%d\n", k);
530 #endif
531         for (i = 0; i < t1->w; ++i) {
532 #ifdef DEBUG_ENC_SIG
533             fprintf(stderr, " i=%d\n", i);
534 #endif
535             if (*f == 0U) {
536                 /* Nothing to do for any of the 4 data points */
537                 f++;
538                 continue;
539             }
540             for (j = k; j < t1->h; ++j) {
541                 opj_t1_enc_sigpass_step(
542                     t1,
543                     f,
544                     &t1->data[(j * t1->data_stride) + i],
545                     bpno,
546                     one,
547                     nmsedec,
548                     type,
549                     j - k,
550                     (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
551             }
552             ++f;
553         }
554     }
555 }
556
557 static void opj_t1_dec_sigpass_raw(
558     opj_t1_t *t1,
559     OPJ_INT32 bpno,
560     OPJ_INT32 cblksty)
561 {
562     OPJ_INT32 one, half, oneplushalf;
563     OPJ_UINT32 i, j, k;
564     OPJ_INT32 *data = t1->data;
565     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
566     const OPJ_UINT32 l_w = t1->w;
567     one = 1 << bpno;
568     half = one >> 1;
569     oneplushalf = one | half;
570
571     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
572         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
573             opj_flag_t flags = *flagsp;
574             if (flags != 0) {
575                 opj_t1_dec_sigpass_step_raw(
576                     t1,
577                     flagsp,
578                     data,
579                     oneplushalf,
580                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
581                     0U);
582                 opj_t1_dec_sigpass_step_raw(
583                     t1,
584                     flagsp,
585                     data + l_w,
586                     oneplushalf,
587                     OPJ_FALSE, /* vsc */
588                     1U);
589                 opj_t1_dec_sigpass_step_raw(
590                     t1,
591                     flagsp,
592                     data + 2 * l_w,
593                     oneplushalf,
594                     OPJ_FALSE, /* vsc */
595                     2U);
596                 opj_t1_dec_sigpass_step_raw(
597                     t1,
598                     flagsp,
599                     data + 3 * l_w,
600                     oneplushalf,
601                     OPJ_FALSE, /* vsc */
602                     3U);
603             }
604         }
605     }
606     if (k < t1->h) {
607         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
608             for (j = 0; j < t1->h - k; ++j) {
609                 opj_t1_dec_sigpass_step_raw(
610                     t1,
611                     flagsp,
612                     data + j * l_w,
613                     oneplushalf,
614                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
615                     j);
616             }
617         }
618     }
619 }
620
621 #define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
622 { \
623         OPJ_INT32 one, half, oneplushalf; \
624         OPJ_UINT32 i, j, k; \
625         register OPJ_INT32 *data = t1->data; \
626         register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
627         const OPJ_UINT32 l_w = w; \
628         opj_mqc_t* mqc = &(t1->mqc); \
629         DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
630         register OPJ_UINT32 v; \
631         one = 1 << bpno; \
632         half = one >> 1; \
633         oneplushalf = one | half; \
634         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
635                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
636                         opj_flag_t flags = *flagsp; \
637                         if( flags != 0 ) { \
638                             opj_t1_dec_sigpass_step_mqc_macro( \
639                                 flags, flagsp, flags_stride, data, \
640                                 l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
641                             opj_t1_dec_sigpass_step_mqc_macro( \
642                                 flags, flagsp, flags_stride, data, \
643                                 l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
644                             opj_t1_dec_sigpass_step_mqc_macro( \
645                                 flags, flagsp, flags_stride, data, \
646                                 l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
647                             opj_t1_dec_sigpass_step_mqc_macro( \
648                                 flags, flagsp, flags_stride, data, \
649                                 l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
650                             *flagsp = flags; \
651                         } \
652                 } \
653         } \
654         UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
655         if( k < h ) { \
656             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
657                 for (j = 0; j < h - k; ++j) { \
658                         opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
659                             data + j * l_w, oneplushalf, j, flags_stride, vsc); \
660                 } \
661             } \
662         } \
663 }
664
665 static void opj_t1_dec_sigpass_mqc_64x64_novsc(
666     opj_t1_t *t1,
667     OPJ_INT32 bpno)
668 {
669     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
670 }
671
672 static void opj_t1_dec_sigpass_mqc_64x64_vsc(
673     opj_t1_t *t1,
674     OPJ_INT32 bpno)
675 {
676     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
677 }
678
679 static void opj_t1_dec_sigpass_mqc_generic_novsc(
680     opj_t1_t *t1,
681     OPJ_INT32 bpno)
682 {
683     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
684                                     t1->w + 2U);
685 }
686
687 static void opj_t1_dec_sigpass_mqc_generic_vsc(
688     opj_t1_t *t1,
689     OPJ_INT32 bpno)
690 {
691     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
692                                     t1->w + 2U);
693 }
694
695 static void opj_t1_dec_sigpass_mqc(
696     opj_t1_t *t1,
697     OPJ_INT32 bpno,
698     OPJ_INT32 cblksty)
699 {
700     if (t1->w == 64 && t1->h == 64) {
701         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
702             opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
703         } else {
704             opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
705         }
706     } else {
707         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
708             opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
709         } else {
710             opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
711         }
712     }
713 }
714
715 /**
716 Encode refinement pass step
717 */
718 static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1,
719         opj_flag_t *flagsp,
720         OPJ_INT32 *datap,
721         OPJ_INT32 bpno,
722         OPJ_INT32 one,
723         OPJ_INT32 *nmsedec,
724         OPJ_BYTE type,
725         OPJ_UINT32 ci)
726 {
727     OPJ_UINT32 v;
728
729     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
730
731     OPJ_UINT32 const shift_flags =
732         (*flagsp >> (ci * 3U));
733
734     if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) {
735         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
736         *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
737                                           (OPJ_UINT32)bpno);
738         v = (opj_int_abs(*datap) & one) ? 1 : 0;
739 #ifdef DEBUG_ENC_REF
740         fprintf(stderr, "  ctxt=%d\n", ctxt);
741 #endif
742         opj_mqc_setcurctx(mqc, ctxt);
743         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
744             opj_mqc_bypass_enc(mqc, v);
745         } else {
746             opj_mqc_encode(mqc, v);
747         }
748         *flagsp |= T1_MU_THIS << (ci * 3U);
749     }
750 }
751
752
753 static INLINE void opj_t1_dec_refpass_step_raw(
754     opj_t1_t *t1,
755     opj_flag_t *flagsp,
756     OPJ_INT32 *datap,
757     OPJ_INT32 poshalf,
758     OPJ_UINT32 ci)
759 {
760     OPJ_UINT32 v;
761
762     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
763
764     if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
765             (T1_SIGMA_THIS << (ci * 3U))) {
766         v = opj_mqc_raw_decode(mqc);
767         *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
768         *flagsp |= T1_MU_THIS << (ci * 3U);
769     }
770 }
771
772 #define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
773                                           mqc, curctx, v, a, c, ct, poshalf) \
774 { \
775     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
776             (T1_SIGMA_THIS << (ci * 3U))) { \
777         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
778         opj_t1_setcurctx(curctx, ctxt); \
779         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
780         data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
781         flags |= T1_MU_THIS << (ci * 3U); \
782     } \
783 }
784
785 static INLINE void opj_t1_dec_refpass_step_mqc(
786     opj_t1_t *t1,
787     opj_flag_t *flagsp,
788     OPJ_INT32 *datap,
789     OPJ_INT32 poshalf,
790     OPJ_UINT32 ci)
791 {
792     OPJ_UINT32 v;
793
794     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
795     opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
796                                       mqc, mqc->curctx, v, mqc->a, mqc->c,
797                                       mqc->ct, poshalf);
798 }
799
800 static void opj_t1_enc_refpass(
801     opj_t1_t *t1,
802     OPJ_INT32 bpno,
803     OPJ_INT32 *nmsedec,
804     OPJ_BYTE type)
805 {
806     OPJ_UINT32 i, k;
807     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
808     opj_flag_t* f = &T1_FLAGS(0, 0);
809     const OPJ_UINT32 extra = 2U;
810
811     *nmsedec = 0;
812 #ifdef DEBUG_ENC_REF
813     fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
814 #endif
815     for (k = 0; k < (t1->h & ~3U); k += 4) {
816 #ifdef DEBUG_ENC_REF
817         fprintf(stderr, " k=%d\n", k);
818 #endif
819         for (i = 0; i < t1->w; ++i) {
820 #ifdef DEBUG_ENC_REF
821             fprintf(stderr, " i=%d\n", i);
822 #endif
823             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
824                 /* none significant */
825                 f++;
826                 continue;
827             }
828             if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
829                     (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
830                 /* all processed by sigpass */
831                 f++;
832                 continue;
833             }
834
835             opj_t1_enc_refpass_step(
836                 t1,
837                 f,
838                 &t1->data[((k + 0) * t1->data_stride) + i],
839                 bpno,
840                 one,
841                 nmsedec,
842                 type,
843                 0);
844             opj_t1_enc_refpass_step(
845                 t1,
846                 f,
847                 &t1->data[((k + 1) * t1->data_stride) + i],
848                 bpno,
849                 one,
850                 nmsedec,
851                 type,
852                 1);
853             opj_t1_enc_refpass_step(
854                 t1,
855                 f,
856                 &t1->data[((k + 2) * t1->data_stride) + i],
857                 bpno,
858                 one,
859                 nmsedec,
860                 type,
861                 2);
862             opj_t1_enc_refpass_step(
863                 t1,
864                 f,
865                 &t1->data[((k + 3) * t1->data_stride) + i],
866                 bpno,
867                 one,
868                 nmsedec,
869                 type,
870                 3);
871             ++f;
872         }
873         f += extra;
874     }
875
876     if (k < t1->h) {
877         OPJ_UINT32 j;
878 #ifdef DEBUG_ENC_REF
879         fprintf(stderr, " k=%d\n", k);
880 #endif
881         for (i = 0; i < t1->w; ++i) {
882 #ifdef DEBUG_ENC_REF
883             fprintf(stderr, " i=%d\n", i);
884 #endif
885             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
886                 /* none significant */
887                 f++;
888                 continue;
889             }
890             for (j = k; j < t1->h; ++j) {
891                 opj_t1_enc_refpass_step(
892                     t1,
893                     f,
894                     &t1->data[(j * t1->data_stride) + i],
895                     bpno,
896                     one,
897                     nmsedec,
898                     type,
899                     j - k);
900             }
901             ++f;
902         }
903     }
904 }
905
906
907 static void opj_t1_dec_refpass_raw(
908     opj_t1_t *t1,
909     OPJ_INT32 bpno)
910 {
911     OPJ_INT32 one, poshalf;
912     OPJ_UINT32 i, j, k;
913     OPJ_INT32 *data = t1->data;
914     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
915     const OPJ_UINT32 l_w = t1->w;
916     one = 1 << bpno;
917     poshalf = one >> 1;
918     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
919         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
920             opj_flag_t flags = *flagsp;
921             if (flags != 0) {
922                 opj_t1_dec_refpass_step_raw(
923                     t1,
924                     flagsp,
925                     data,
926                     poshalf,
927                     0U);
928                 opj_t1_dec_refpass_step_raw(
929                     t1,
930                     flagsp,
931                     data + l_w,
932                     poshalf,
933                     1U);
934                 opj_t1_dec_refpass_step_raw(
935                     t1,
936                     flagsp,
937                     data + 2 * l_w,
938                     poshalf,
939                     2U);
940                 opj_t1_dec_refpass_step_raw(
941                     t1,
942                     flagsp,
943                     data + 3 * l_w,
944                     poshalf,
945                     3U);
946             }
947         }
948     }
949     if (k < t1->h) {
950         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
951             for (j = 0; j < t1->h - k; ++j) {
952                 opj_t1_dec_refpass_step_raw(
953                     t1,
954                     flagsp,
955                     data + j * l_w,
956                     poshalf,
957                     j);
958             }
959         }
960     }
961 }
962
963 #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
964 { \
965         OPJ_INT32 one, poshalf; \
966         OPJ_UINT32 i, j, k; \
967         register OPJ_INT32 *data = t1->data; \
968         register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
969         const OPJ_UINT32 l_w = w; \
970         opj_mqc_t* mqc = &(t1->mqc); \
971         DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
972         register OPJ_UINT32 v; \
973         one = 1 << bpno; \
974         poshalf = one >> 1; \
975         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
976                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
977                         opj_flag_t flags = *flagsp; \
978                         if( flags != 0 ) { \
979                             opj_t1_dec_refpass_step_mqc_macro( \
980                                 flags, data, l_w, 0, \
981                                 mqc, curctx, v, a, c, ct, poshalf); \
982                             opj_t1_dec_refpass_step_mqc_macro( \
983                                 flags, data, l_w, 1, \
984                                 mqc, curctx, v, a, c, ct, poshalf); \
985                             opj_t1_dec_refpass_step_mqc_macro( \
986                                 flags, data, l_w, 2, \
987                                 mqc, curctx, v, a, c, ct, poshalf); \
988                             opj_t1_dec_refpass_step_mqc_macro( \
989                                 flags, data, l_w, 3, \
990                                 mqc, curctx, v, a, c, ct, poshalf); \
991                             *flagsp = flags; \
992                         } \
993                 } \
994         } \
995         UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
996         if( k < h ) { \
997             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
998                 for (j = 0; j < h - k; ++j) { \
999                         opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
1000                 } \
1001             } \
1002         } \
1003 }
1004
1005 static void opj_t1_dec_refpass_mqc_64x64(
1006     opj_t1_t *t1,
1007     OPJ_INT32 bpno)
1008 {
1009     opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
1010 }
1011
1012 static void opj_t1_dec_refpass_mqc_generic(
1013     opj_t1_t *t1,
1014     OPJ_INT32 bpno)
1015 {
1016     opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
1017 }
1018
1019 static void opj_t1_dec_refpass_mqc(
1020     opj_t1_t *t1,
1021     OPJ_INT32 bpno)
1022 {
1023     if (t1->w == 64 && t1->h == 64) {
1024         opj_t1_dec_refpass_mqc_64x64(t1, bpno);
1025     } else {
1026         opj_t1_dec_refpass_mqc_generic(t1, bpno);
1027     }
1028 }
1029
1030 /**
1031 Encode clean-up pass step
1032 */
1033 static void opj_t1_enc_clnpass_step(
1034     opj_t1_t *t1,
1035     opj_flag_t *flagsp,
1036     OPJ_INT32 *datap,
1037     OPJ_INT32 bpno,
1038     OPJ_INT32 one,
1039     OPJ_INT32 *nmsedec,
1040     OPJ_UINT32 agg,
1041     OPJ_UINT32 runlen,
1042     OPJ_UINT32 lim,
1043     OPJ_UINT32 cblksty)
1044 {
1045     OPJ_UINT32 v;
1046     OPJ_UINT32 ci;
1047     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1048
1049     const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 |
1050                               T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1051
1052     if ((*flagsp & check) == check) {
1053         if (runlen == 0) {
1054             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1055         } else if (runlen == 1) {
1056             *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3);
1057         } else if (runlen == 2) {
1058             *flagsp &= ~(T1_PI_2 | T1_PI_3);
1059         } else if (runlen == 3) {
1060             *flagsp &= ~(T1_PI_3);
1061         }
1062         return;
1063     }
1064
1065     for (ci = runlen; ci < lim; ++ci) {
1066         OPJ_UINT32 vsc;
1067         opj_flag_t flags;
1068         OPJ_UINT32 ctxt1;
1069
1070         flags = *flagsp;
1071
1072         if ((agg != 0) && (ci == runlen)) {
1073             goto LABEL_PARTIAL;
1074         }
1075
1076         if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
1077             ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
1078 #ifdef DEBUG_ENC_CLN
1079             printf("   ctxt1=%d\n", ctxt1);
1080 #endif
1081             opj_mqc_setcurctx(mqc, ctxt1);
1082             v = (opj_int_abs(*datap) & one) ? 1 : 0;
1083             opj_mqc_encode(mqc, v);
1084             if (v) {
1085                 OPJ_UINT32 ctxt2, spb;
1086                 OPJ_UINT32 lu;
1087 LABEL_PARTIAL:
1088                 lu = opj_t1_getctxtno_sc_or_spb_index(
1089                          *flagsp,
1090                          flagsp[-1], flagsp[1],
1091                          ci);
1092                 *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
1093                                                   (OPJ_UINT32)bpno);
1094                 ctxt2 = opj_t1_getctxno_sc(lu);
1095 #ifdef DEBUG_ENC_CLN
1096                 printf("   ctxt2=%d\n", ctxt2);
1097 #endif
1098                 opj_mqc_setcurctx(mqc, ctxt2);
1099
1100                 v = *datap < 0 ? 1U : 0U;
1101                 spb = opj_t1_getspb(lu);
1102 #ifdef DEBUG_ENC_CLN
1103                 printf("   spb=%d\n", spb);
1104 #endif
1105                 opj_mqc_encode(mqc, v ^ spb);
1106                 vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0;
1107                 opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc);
1108             }
1109         }
1110         *flagsp &= ~(T1_PI_THIS << (3U * ci));
1111         datap += t1->data_stride;
1112     }
1113 }
1114
1115 #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
1116                                       flags, flagsp, flags_stride, data, \
1117                                       data_stride, ci, mqc, curctx, \
1118                                       v, a, c, ct, oneplushalf, vsc) \
1119 { \
1120     if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
1121         do { \
1122             if( !partial ) { \
1123                 OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
1124                 opj_t1_setcurctx(curctx, ctxt1); \
1125                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1126                 if( !v ) \
1127                     break; \
1128             } \
1129             { \
1130                 OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1131                                     flags, flagsp[-1], flagsp[1], \
1132                                     ci); \
1133                 opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
1134                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1135                 v = v ^ opj_t1_getspb(lu); \
1136                 data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
1137                 opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
1138             } \
1139         } while(0); \
1140     } \
1141 }
1142
1143 static void opj_t1_dec_clnpass_step(
1144     opj_t1_t *t1,
1145     opj_flag_t *flagsp,
1146     OPJ_INT32 *datap,
1147     OPJ_INT32 oneplushalf,
1148     OPJ_UINT32 ci,
1149     OPJ_UINT32 vsc)
1150 {
1151     OPJ_UINT32 v;
1152
1153     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1154     opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
1155                                   *flagsp, flagsp, t1->w + 2U, datap,
1156                                   0, ci, mqc, mqc->curctx,
1157                                   v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
1158 }
1159
1160 static void opj_t1_enc_clnpass(
1161     opj_t1_t *t1,
1162     OPJ_INT32 bpno,
1163     OPJ_INT32 *nmsedec,
1164     OPJ_UINT32 cblksty)
1165 {
1166     OPJ_UINT32 i, k;
1167     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
1168     OPJ_UINT32 agg, runlen;
1169
1170     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1171
1172     *nmsedec = 0;
1173 #ifdef DEBUG_ENC_CLN
1174     printf("enc_clnpass: bpno=%d\n", bpno);
1175 #endif
1176     for (k = 0; k < (t1->h & ~3U); k += 4) {
1177 #ifdef DEBUG_ENC_CLN
1178         printf(" k=%d\n", k);
1179 #endif
1180         for (i = 0; i < t1->w; ++i) {
1181 #ifdef DEBUG_ENC_CLN
1182             printf("  i=%d\n", i);
1183 #endif
1184             agg = !(T1_FLAGS(i, k));
1185 #ifdef DEBUG_ENC_CLN
1186             printf("   agg=%d\n", agg);
1187 #endif
1188             if (agg) {
1189                 for (runlen = 0; runlen < 4; ++runlen) {
1190                     if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) {
1191                         break;
1192                     }
1193                 }
1194                 opj_mqc_setcurctx(mqc, T1_CTXNO_AGG);
1195                 opj_mqc_encode(mqc, runlen != 4);
1196                 if (runlen == 4) {
1197                     continue;
1198                 }
1199                 opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1200                 opj_mqc_encode(mqc, runlen >> 1);
1201                 opj_mqc_encode(mqc, runlen & 1);
1202             } else {
1203                 runlen = 0;
1204             }
1205             opj_t1_enc_clnpass_step(
1206                 t1,
1207                 &T1_FLAGS(i, k),
1208                 &t1->data[((k + runlen) * t1->data_stride) + i],
1209                 bpno,
1210                 one,
1211                 nmsedec,
1212                 agg,
1213                 runlen,
1214                 4U,
1215                 cblksty);
1216         }
1217     }
1218     if (k < t1->h) {
1219         agg = 0;
1220         runlen = 0;
1221 #ifdef DEBUG_ENC_CLN
1222         printf(" k=%d\n", k);
1223 #endif
1224         for (i = 0; i < t1->w; ++i) {
1225 #ifdef DEBUG_ENC_CLN
1226             printf("  i=%d\n", i);
1227             printf("   agg=%d\n", agg);
1228 #endif
1229             opj_t1_enc_clnpass_step(
1230                 t1,
1231                 &T1_FLAGS(i, k),
1232                 &t1->data[((k + runlen) * t1->data_stride) + i],
1233                 bpno,
1234                 one,
1235                 nmsedec,
1236                 agg,
1237                 runlen,
1238                 t1->h - k,
1239                 cblksty);
1240         }
1241     }
1242 }
1243
1244 #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
1245 { \
1246     OPJ_INT32 one, half, oneplushalf; \
1247     OPJ_UINT32 runlen; \
1248     OPJ_UINT32 i, j, k; \
1249     const OPJ_UINT32 l_w = w; \
1250     opj_mqc_t* mqc = &(t1->mqc); \
1251     register OPJ_INT32 *data = t1->data; \
1252     register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
1253     DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
1254     register OPJ_UINT32 v; \
1255     one = 1 << bpno; \
1256     half = one >> 1; \
1257     oneplushalf = one | half; \
1258     for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
1259         for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1260             opj_flag_t flags = *flagsp; \
1261             if (flags == 0) { \
1262                 OPJ_UINT32 partial = OPJ_TRUE; \
1263                 opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
1264                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1265                 if (!v) { \
1266                     continue; \
1267                 } \
1268                 opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
1269                 opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
1270                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1271                 runlen = (runlen << 1) | v; \
1272                 switch(runlen) { \
1273                     case 0: \
1274                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
1275                                             flags, flagsp, flags_stride, data, \
1276                                             l_w, 0, mqc, curctx, \
1277                                             v, a, c, ct, oneplushalf, vsc); \
1278                         partial = OPJ_FALSE; \
1279                         /* FALLTHRU */ \
1280                     case 1: \
1281                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1282                                             flags, flagsp, flags_stride, data, \
1283                                             l_w, 1, mqc, curctx, \
1284                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1285                         partial = OPJ_FALSE; \
1286                         /* FALLTHRU */ \
1287                     case 2: \
1288                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1289                                             flags, flagsp, flags_stride, data, \
1290                                             l_w, 2, mqc, curctx, \
1291                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1292                         partial = OPJ_FALSE; \
1293                         /* FALLTHRU */ \
1294                     case 3: \
1295                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1296                                             flags, flagsp, flags_stride, data, \
1297                                             l_w, 3, mqc, curctx, \
1298                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1299                         break; \
1300                 } \
1301             } else { \
1302                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1303                                     flags, flagsp, flags_stride, data, \
1304                                     l_w, 0, mqc, curctx, \
1305                                     v, a, c, ct, oneplushalf, vsc); \
1306                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1307                                     flags, flagsp, flags_stride, data, \
1308                                     l_w, 1, mqc, curctx, \
1309                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1310                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1311                                     flags, flagsp, flags_stride, data, \
1312                                     l_w, 2, mqc, curctx, \
1313                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1314                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1315                                     flags, flagsp, flags_stride, data, \
1316                                     l_w, 3, mqc, curctx, \
1317                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1318             } \
1319             *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1320         } \
1321     } \
1322     UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
1323     if( k < h ) { \
1324         for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
1325             for (j = 0; j < h - k; ++j) { \
1326                 opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
1327             } \
1328             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1329         } \
1330     } \
1331 }
1332
1333 static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
1334 {
1335     if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
1336         opj_mqc_t* mqc = &(t1->mqc);
1337         OPJ_UINT32 v, v2;
1338         opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1339         opj_mqc_decode(v, mqc);
1340         opj_mqc_decode(v2, mqc);
1341         v = (v << 1) | v2;
1342         opj_mqc_decode(v2, mqc);
1343         v = (v << 1) | v2;
1344         opj_mqc_decode(v2, mqc);
1345         v = (v << 1) | v2;
1346         /*
1347         if (v!=0xa) {
1348             opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
1349         }
1350         */
1351     }
1352 }
1353
1354 static void opj_t1_dec_clnpass_64x64_novsc(
1355     opj_t1_t *t1,
1356     OPJ_INT32 bpno)
1357 {
1358     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
1359 }
1360
1361 static void opj_t1_dec_clnpass_64x64_vsc(
1362     opj_t1_t *t1,
1363     OPJ_INT32 bpno)
1364 {
1365     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
1366 }
1367
1368 static void opj_t1_dec_clnpass_generic_novsc(
1369     opj_t1_t *t1,
1370     OPJ_INT32 bpno)
1371 {
1372     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
1373                                 t1->w + 2U);
1374 }
1375
1376 static void opj_t1_dec_clnpass_generic_vsc(
1377     opj_t1_t *t1,
1378     OPJ_INT32 bpno)
1379 {
1380     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
1381                                 t1->w + 2U);
1382 }
1383
1384 static void opj_t1_dec_clnpass(
1385     opj_t1_t *t1,
1386     OPJ_INT32 bpno,
1387     OPJ_INT32 cblksty)
1388 {
1389     if (t1->w == 64 && t1->h == 64) {
1390         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1391             opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
1392         } else {
1393             opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
1394         }
1395     } else {
1396         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1397             opj_t1_dec_clnpass_generic_vsc(t1, bpno);
1398         } else {
1399             opj_t1_dec_clnpass_generic_novsc(t1, bpno);
1400         }
1401     }
1402     opj_t1_dec_clnpass_check_segsym(t1, cblksty);
1403 }
1404
1405
1406 /** mod fixed_quality */
1407 static OPJ_FLOAT64 opj_t1_getwmsedec(
1408     OPJ_INT32 nmsedec,
1409     OPJ_UINT32 compno,
1410     OPJ_UINT32 level,
1411     OPJ_UINT32 orient,
1412     OPJ_INT32 bpno,
1413     OPJ_UINT32 qmfbid,
1414     OPJ_FLOAT64 stepsize,
1415     OPJ_UINT32 numcomps,
1416     const OPJ_FLOAT64 * mct_norms,
1417     OPJ_UINT32 mct_numcomps)
1418 {
1419     OPJ_FLOAT64 w1 = 1, w2, wmsedec;
1420     OPJ_ARG_NOT_USED(numcomps);
1421
1422     if (mct_norms && (compno < mct_numcomps)) {
1423         w1 = mct_norms[compno];
1424     }
1425
1426     if (qmfbid == 1) {
1427         w2 = opj_dwt_getnorm(level, orient);
1428     } else {    /* if (qmfbid == 0) */
1429         w2 = opj_dwt_getnorm_real(level, orient);
1430     }
1431
1432     wmsedec = w1 * w2 * stepsize * (1 << bpno);
1433     wmsedec *= wmsedec * nmsedec / 8192.0;
1434
1435     return wmsedec;
1436 }
1437
1438 static OPJ_BOOL opj_t1_allocate_buffers(
1439     opj_t1_t *t1,
1440     OPJ_UINT32 w,
1441     OPJ_UINT32 h)
1442 {
1443     OPJ_UINT32 flagssize;
1444     OPJ_UINT32 flags_stride;
1445
1446     /* No risk of overflow. Prior checks ensure those assert are met */
1447     /* They are per the specification */
1448     assert(w <= 1024);
1449     assert(h <= 1024);
1450     assert(w * h <= 4096);
1451
1452     /* encoder uses tile buffer, so no need to allocate */
1453     if (!t1->encoder) {
1454         OPJ_UINT32 datasize = w * h;
1455
1456         if (datasize > t1->datasize) {
1457             opj_aligned_free(t1->data);
1458             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
1459             if (!t1->data) {
1460                 /* FIXME event manager error callback */
1461                 return OPJ_FALSE;
1462             }
1463             t1->datasize = datasize;
1464         }
1465         /* memset first arg is declared to never be null by gcc */
1466         if (t1->data != NULL) {
1467             memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
1468         }
1469     }
1470
1471     flags_stride = w + 2U; /* can't be 0U */
1472
1473     flagssize = (h + 3U) / 4U + 2U;
1474
1475     flagssize *= flags_stride;
1476     {
1477         opj_flag_t* p;
1478         OPJ_UINT32 x;
1479         OPJ_UINT32 flags_height = (h + 3U) / 4U;
1480
1481         if (flagssize > t1->flagssize) {
1482
1483             opj_aligned_free(t1->flags);
1484             t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
1485                             opj_flag_t));
1486             if (!t1->flags) {
1487                 /* FIXME event manager error callback */
1488                 return OPJ_FALSE;
1489             }
1490         }
1491         t1->flagssize = flagssize;
1492
1493         memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
1494
1495         p = &t1->flags[0];
1496         for (x = 0; x < flags_stride; ++x) {
1497             /* magic value to hopefully stop any passes being interested in this entry */
1498             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1499         }
1500
1501         p = &t1->flags[((flags_height + 1) * flags_stride)];
1502         for (x = 0; x < flags_stride; ++x) {
1503             /* magic value to hopefully stop any passes being interested in this entry */
1504             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1505         }
1506
1507         if (h % 4) {
1508             OPJ_UINT32 v = 0;
1509             p = &t1->flags[((flags_height) * flags_stride)];
1510             if (h % 4 == 1) {
1511                 v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
1512             } else if (h % 4 == 2) {
1513                 v |= T1_PI_2 | T1_PI_3;
1514             } else if (h % 4 == 3) {
1515                 v |= T1_PI_3;
1516             }
1517             for (x = 0; x < flags_stride; ++x) {
1518                 *p++ = v;
1519             }
1520         }
1521     }
1522
1523     t1->w = w;
1524     t1->h = h;
1525
1526     return OPJ_TRUE;
1527 }
1528
1529 /* ----------------------------------------------------------------------- */
1530
1531 /* ----------------------------------------------------------------------- */
1532 /**
1533  * Creates a new Tier 1 handle
1534  * and initializes the look-up tables of the Tier-1 coder/decoder
1535  * @return a new T1 handle if successful, returns NULL otherwise
1536 */
1537 opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
1538 {
1539     opj_t1_t *l_t1 = 00;
1540
1541     l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
1542     if (!l_t1) {
1543         return 00;
1544     }
1545
1546     l_t1->encoder = isEncoder;
1547
1548     return l_t1;
1549 }
1550
1551
1552 /**
1553  * Destroys a previously created T1 handle
1554  *
1555  * @param p_t1 Tier 1 handle to destroy
1556 */
1557 void opj_t1_destroy(opj_t1_t *p_t1)
1558 {
1559     if (! p_t1) {
1560         return;
1561     }
1562
1563     /* encoder uses tile buffer, so no need to free */
1564     if (!p_t1->encoder && p_t1->data) {
1565         opj_aligned_free(p_t1->data);
1566         p_t1->data = 00;
1567     }
1568
1569     if (p_t1->flags) {
1570         opj_aligned_free(p_t1->flags);
1571         p_t1->flags = 00;
1572     }
1573
1574     opj_free(p_t1->cblkdatabuffer);
1575
1576     opj_free(p_t1);
1577 }
1578
1579 typedef struct {
1580     OPJ_BOOL whole_tile_decoding;
1581     OPJ_UINT32 resno;
1582     opj_tcd_cblk_dec_t* cblk;
1583     opj_tcd_band_t* band;
1584     opj_tcd_tilecomp_t* tilec;
1585     opj_tccp_t* tccp;
1586     OPJ_BOOL mustuse_cblkdatabuffer;
1587     volatile OPJ_BOOL* pret;
1588     opj_event_mgr_t *p_manager;
1589     opj_mutex_t* p_manager_mutex;
1590     OPJ_BOOL check_pterm;
1591 } opj_t1_cblk_decode_processing_job_t;
1592
1593 static void opj_t1_destroy_wrapper(void* t1)
1594 {
1595     opj_t1_destroy((opj_t1_t*) t1);
1596 }
1597
1598 static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
1599 {
1600     opj_tcd_cblk_dec_t* cblk;
1601     opj_tcd_band_t* band;
1602     opj_tcd_tilecomp_t* tilec;
1603     opj_tccp_t* tccp;
1604     OPJ_INT32* OPJ_RESTRICT datap;
1605     OPJ_UINT32 cblk_w, cblk_h;
1606     OPJ_INT32 x, y;
1607     OPJ_UINT32 i, j;
1608     opj_t1_cblk_decode_processing_job_t* job;
1609     opj_t1_t* t1;
1610     OPJ_UINT32 resno;
1611     OPJ_UINT32 tile_w;
1612
1613     job = (opj_t1_cblk_decode_processing_job_t*) user_data;
1614
1615     cblk = job->cblk;
1616
1617     if (!job->whole_tile_decoding) {
1618         cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1619         cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1620
1621         cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
1622                              cblk_w * cblk_h);
1623         if (cblk->decoded_data == NULL) {
1624             if (job->p_manager_mutex) {
1625                 opj_mutex_lock(job->p_manager_mutex);
1626             }
1627             opj_event_msg(job->p_manager, EVT_ERROR,
1628                           "Cannot allocate cblk->decoded_data\n");
1629             if (job->p_manager_mutex) {
1630                 opj_mutex_unlock(job->p_manager_mutex);
1631             }
1632             *(job->pret) = OPJ_FALSE;
1633             opj_free(job);
1634             return;
1635         }
1636         /* Zero-init required */
1637         memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
1638     } else if (cblk->decoded_data) {
1639         /* Not sure if that code path can happen, but better be */
1640         /* safe than sorry */
1641         opj_aligned_free(cblk->decoded_data);
1642         cblk->decoded_data = NULL;
1643     }
1644
1645     resno = job->resno;
1646     band = job->band;
1647     tilec = job->tilec;
1648     tccp = job->tccp;
1649     tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
1650                           -
1651                           tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
1652
1653     if (!*(job->pret)) {
1654         opj_free(job);
1655         return;
1656     }
1657
1658     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
1659     if (t1 == NULL) {
1660         t1 = opj_t1_create(OPJ_FALSE);
1661         if (t1 == NULL) {
1662             opj_event_msg(job->p_manager, EVT_ERROR,
1663                           "Cannot allocate Tier 1 handle\n");
1664             *(job->pret) = OPJ_FALSE;
1665             opj_free(job);
1666             return;
1667         }
1668         if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) {
1669             opj_event_msg(job->p_manager, EVT_ERROR,
1670                           "Unable to set t1 handle as TLS\n");
1671             opj_t1_destroy(t1);
1672             *(job->pret) = OPJ_FALSE;
1673             opj_free(job);
1674             return;
1675         }
1676     }
1677     t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
1678
1679     if (OPJ_FALSE == opj_t1_decode_cblk(
1680                 t1,
1681                 cblk,
1682                 band->bandno,
1683                 (OPJ_UINT32)tccp->roishift,
1684                 tccp->cblksty,
1685                 job->p_manager,
1686                 job->p_manager_mutex,
1687                 job->check_pterm)) {
1688         *(job->pret) = OPJ_FALSE;
1689         opj_free(job);
1690         return;
1691     }
1692
1693     x = cblk->x0 - band->x0;
1694     y = cblk->y0 - band->y0;
1695     if (band->bandno & 1) {
1696         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1697         x += pres->x1 - pres->x0;
1698     }
1699     if (band->bandno & 2) {
1700         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1701         y += pres->y1 - pres->y0;
1702     }
1703
1704     datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
1705     cblk_w = t1->w;
1706     cblk_h = t1->h;
1707
1708     if (tccp->roishift) {
1709         if (tccp->roishift >= 31) {
1710             for (j = 0; j < cblk_h; ++j) {
1711                 for (i = 0; i < cblk_w; ++i) {
1712                     datap[(j * cblk_w) + i] = 0;
1713                 }
1714             }
1715         } else {
1716             OPJ_INT32 thresh = 1 << tccp->roishift;
1717             for (j = 0; j < cblk_h; ++j) {
1718                 for (i = 0; i < cblk_w; ++i) {
1719                     OPJ_INT32 val = datap[(j * cblk_w) + i];
1720                     OPJ_INT32 mag = abs(val);
1721                     if (mag >= thresh) {
1722                         mag >>= tccp->roishift;
1723                         datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
1724                     }
1725                 }
1726             }
1727         }
1728     }
1729
1730     /* Both can be non NULL if for example decoding a full tile and then */
1731     /* partially a tile. In which case partial decoding should be the */
1732     /* priority */
1733     assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
1734
1735     if (cblk->decoded_data) {
1736         OPJ_UINT32 cblk_size = cblk_w * cblk_h;
1737         if (tccp->qmfbid == 1) {
1738             for (i = 0; i < cblk_size; ++i) {
1739                 datap[i] /= 2;
1740             }
1741         } else {        /* if (tccp->qmfbid == 0) */
1742             i = 0;
1743 #ifdef __SSE2__
1744             {
1745                 const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize);
1746                 for (; i < (cblk_size & ~15U); i += 16) {
1747                     __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1748                                                            datap + 0)));
1749                     __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1750                                                            datap + 4)));
1751                     __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1752                                                            datap + 8)));
1753                     __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1754                                                            datap + 12)));
1755                     _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
1756                     _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
1757                     _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
1758                     _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
1759                     datap += 16;
1760                 }
1761             }
1762 #endif
1763             for (; i < cblk_size; ++i) {
1764                 OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
1765                 memcpy(datap, &tmp, sizeof(tmp));
1766                 datap++;
1767             }
1768         }
1769     } else if (tccp->qmfbid == 1) {
1770         OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
1771                                                        (OPJ_SIZE_T)x];
1772         for (j = 0; j < cblk_h; ++j) {
1773             i = 0;
1774             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
1775                 OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
1776                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
1777                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
1778                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
1779                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
1780                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
1781                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
1782                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
1783             }
1784             for (; i < cblk_w; ++i) {
1785                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
1786                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
1787             }
1788         }
1789     } else {        /* if (tccp->qmfbid == 0) */
1790         OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
1791                                                          tile_w + (OPJ_SIZE_T)x];
1792         for (j = 0; j < cblk_h; ++j) {
1793             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
1794             for (i = 0; i < cblk_w; ++i) {
1795                 OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * band->stepsize;
1796                 *tiledp2 = tmp;
1797                 datap++;
1798                 tiledp2++;
1799             }
1800             tiledp += tile_w;
1801         }
1802     }
1803
1804     opj_free(job);
1805 }
1806
1807
1808 void opj_t1_decode_cblks(opj_tcd_t* tcd,
1809                          volatile OPJ_BOOL* pret,
1810                          opj_tcd_tilecomp_t* tilec,
1811                          opj_tccp_t* tccp,
1812                          opj_event_mgr_t *p_manager,
1813                          opj_mutex_t* p_manager_mutex,
1814                          OPJ_BOOL check_pterm
1815                         )
1816 {
1817     opj_thread_pool_t* tp = tcd->thread_pool;
1818     OPJ_UINT32 resno, bandno, precno, cblkno;
1819
1820 #ifdef DEBUG_VERBOSE
1821     OPJ_UINT32 codeblocks_decoded = 0;
1822     printf("Enter opj_t1_decode_cblks()\n");
1823 #endif
1824
1825     for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
1826         opj_tcd_resolution_t* res = &tilec->resolutions[resno];
1827
1828         for (bandno = 0; bandno < res->numbands; ++bandno) {
1829             opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
1830
1831             for (precno = 0; precno < res->pw * res->ph; ++precno) {
1832                 opj_tcd_precinct_t* precinct = &band->precincts[precno];
1833
1834                 if (!opj_tcd_is_subband_area_of_interest(tcd,
1835                         tilec->compno,
1836                         resno,
1837                         band->bandno,
1838                         (OPJ_UINT32)precinct->x0,
1839                         (OPJ_UINT32)precinct->y0,
1840                         (OPJ_UINT32)precinct->x1,
1841                         (OPJ_UINT32)precinct->y1)) {
1842                     for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1843                         opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1844                         if (cblk->decoded_data) {
1845 #ifdef DEBUG_VERBOSE
1846                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1847                                    cblk->x0, cblk->y0, resno, bandno);
1848 #endif
1849                             opj_aligned_free(cblk->decoded_data);
1850                             cblk->decoded_data = NULL;
1851                         }
1852                     }
1853                     continue;
1854                 }
1855
1856                 for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1857                     opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1858                     opj_t1_cblk_decode_processing_job_t* job;
1859
1860                     if (!opj_tcd_is_subband_area_of_interest(tcd,
1861                             tilec->compno,
1862                             resno,
1863                             band->bandno,
1864                             (OPJ_UINT32)cblk->x0,
1865                             (OPJ_UINT32)cblk->y0,
1866                             (OPJ_UINT32)cblk->x1,
1867                             (OPJ_UINT32)cblk->y1)) {
1868                         if (cblk->decoded_data) {
1869 #ifdef DEBUG_VERBOSE
1870                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1871                                    cblk->x0, cblk->y0, resno, bandno);
1872 #endif
1873                             opj_aligned_free(cblk->decoded_data);
1874                             cblk->decoded_data = NULL;
1875                         }
1876                         continue;
1877                     }
1878
1879                     if (!tcd->whole_tile_decoding) {
1880                         OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1881                         OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1882                         if (cblk->decoded_data != NULL) {
1883 #ifdef DEBUG_VERBOSE
1884                             printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
1885                                    cblk->x0, cblk->y0, resno, bandno);
1886 #endif
1887                             continue;
1888                         }
1889                         if (cblk_w == 0 || cblk_h == 0) {
1890                             continue;
1891                         }
1892 #ifdef DEBUG_VERBOSE
1893                         printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
1894                                cblk->x0, cblk->y0, resno, bandno);
1895 #endif
1896                     }
1897
1898                     job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
1899                             sizeof(opj_t1_cblk_decode_processing_job_t));
1900                     if (!job) {
1901                         *pret = OPJ_FALSE;
1902                         return;
1903                     }
1904                     job->whole_tile_decoding = tcd->whole_tile_decoding;
1905                     job->resno = resno;
1906                     job->cblk = cblk;
1907                     job->band = band;
1908                     job->tilec = tilec;
1909                     job->tccp = tccp;
1910                     job->pret = pret;
1911                     job->p_manager_mutex = p_manager_mutex;
1912                     job->p_manager = p_manager;
1913                     job->check_pterm = check_pterm;
1914                     job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
1915                     opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
1916 #ifdef DEBUG_VERBOSE
1917                     codeblocks_decoded ++;
1918 #endif
1919                     if (!(*pret)) {
1920                         return;
1921                     }
1922                 } /* cblkno */
1923             } /* precno */
1924         } /* bandno */
1925     } /* resno */
1926
1927 #ifdef DEBUG_VERBOSE
1928     printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
1929 #endif
1930     return;
1931 }
1932
1933
1934 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
1935                                    opj_tcd_cblk_dec_t* cblk,
1936                                    OPJ_UINT32 orient,
1937                                    OPJ_UINT32 roishift,
1938                                    OPJ_UINT32 cblksty,
1939                                    opj_event_mgr_t *p_manager,
1940                                    opj_mutex_t* p_manager_mutex,
1941                                    OPJ_BOOL check_pterm)
1942 {
1943     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1944
1945     OPJ_INT32 bpno_plus_one;
1946     OPJ_UINT32 passtype;
1947     OPJ_UINT32 segno, passno;
1948     OPJ_BYTE* cblkdata = NULL;
1949     OPJ_UINT32 cblkdataindex = 0;
1950     OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
1951     OPJ_INT32* original_t1_data = NULL;
1952
1953     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
1954
1955     if (!opj_t1_allocate_buffers(
1956                 t1,
1957                 (OPJ_UINT32)(cblk->x1 - cblk->x0),
1958                 (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
1959         return OPJ_FALSE;
1960     }
1961
1962     bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
1963     if (bpno_plus_one >= 31) {
1964         if (p_manager_mutex) {
1965             opj_mutex_lock(p_manager_mutex);
1966         }
1967         opj_event_msg(p_manager, EVT_WARNING,
1968                       "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
1969                       bpno_plus_one);
1970         if (p_manager_mutex) {
1971             opj_mutex_unlock(p_manager_mutex);
1972         }
1973         return OPJ_FALSE;
1974     }
1975     passtype = 2;
1976
1977     opj_mqc_resetstates(mqc);
1978     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
1979     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
1980     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
1981
1982     /* Even if we have a single chunk, in multi-threaded decoding */
1983     /* the insertion of our synthetic marker might potentially override */
1984     /* valid codestream of other codeblocks decoded in parallel. */
1985     if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
1986         OPJ_UINT32 i;
1987         OPJ_UINT32 cblk_len;
1988
1989         /* Compute whole codeblock length from chunk lengths */
1990         cblk_len = 0;
1991         for (i = 0; i < cblk->numchunks; i++) {
1992             cblk_len += cblk->chunks[i].len;
1993         }
1994
1995         /* Allocate temporary memory if needed */
1996         if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
1997             cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
1998                                               cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
1999             if (cblkdata == NULL) {
2000                 return OPJ_FALSE;
2001             }
2002             t1->cblkdatabuffer = cblkdata;
2003             memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
2004             t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
2005         }
2006
2007         /* Concatenate all chunks */
2008         cblkdata = t1->cblkdatabuffer;
2009         cblk_len = 0;
2010         for (i = 0; i < cblk->numchunks; i++) {
2011             memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
2012             cblk_len += cblk->chunks[i].len;
2013         }
2014     } else if (cblk->numchunks == 1) {
2015         cblkdata = cblk->chunks[0].data;
2016     } else {
2017         /* Not sure if that can happen in practice, but avoid Coverity to */
2018         /* think we will dereference a null cblkdta pointer */
2019         return OPJ_TRUE;
2020     }
2021
2022     /* For subtile decoding, directly decode in the decoded_data buffer of */
2023     /* the code-block. Hack t1->data to point to it, and restore it later */
2024     if (cblk->decoded_data) {
2025         original_t1_data = t1->data;
2026         t1->data = cblk->decoded_data;
2027     }
2028
2029     for (segno = 0; segno < cblk->real_num_segs; ++segno) {
2030         opj_tcd_seg_t *seg = &cblk->segs[segno];
2031
2032         /* BYPASS mode */
2033         type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
2034                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2035
2036         if (type == T1_TYPE_RAW) {
2037             opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2038                                  OPJ_COMMON_CBLK_DATA_EXTRA);
2039         } else {
2040             opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2041                              OPJ_COMMON_CBLK_DATA_EXTRA);
2042         }
2043         cblkdataindex += seg->len;
2044
2045         for (passno = 0; (passno < seg->real_num_passes) &&
2046                 (bpno_plus_one >= 1); ++passno) {
2047             switch (passtype) {
2048             case 0:
2049                 if (type == T1_TYPE_RAW) {
2050                     opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2051                 } else {
2052                     opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2053                 }
2054                 break;
2055             case 1:
2056                 if (type == T1_TYPE_RAW) {
2057                     opj_t1_dec_refpass_raw(t1, bpno_plus_one);
2058                 } else {
2059                     opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
2060                 }
2061                 break;
2062             case 2:
2063                 opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2064                 break;
2065             }
2066
2067             if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
2068                 opj_mqc_resetstates(mqc);
2069                 opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2070                 opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2071                 opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2072             }
2073             if (++passtype == 3) {
2074                 passtype = 0;
2075                 bpno_plus_one--;
2076             }
2077         }
2078
2079         opq_mqc_finish_dec(mqc);
2080     }
2081
2082     if (check_pterm) {
2083         if (mqc->bp + 2 < mqc->end) {
2084             if (p_manager_mutex) {
2085                 opj_mutex_lock(p_manager_mutex);
2086             }
2087             opj_event_msg(p_manager, EVT_WARNING,
2088                           "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
2089                           (int)(mqc->end - mqc->bp) - 2,
2090                           (int)(mqc->bp - mqc->start),
2091                           (int)(mqc->end - mqc->start));
2092             if (p_manager_mutex) {
2093                 opj_mutex_unlock(p_manager_mutex);
2094             }
2095         } else if (mqc->end_of_byte_stream_counter > 2) {
2096             if (p_manager_mutex) {
2097                 opj_mutex_lock(p_manager_mutex);
2098             }
2099             opj_event_msg(p_manager, EVT_WARNING,
2100                           "PTERM check failure: %d synthetized 0xFF markers read\n",
2101                           mqc->end_of_byte_stream_counter);
2102             if (p_manager_mutex) {
2103                 opj_mutex_unlock(p_manager_mutex);
2104             }
2105         }
2106     }
2107
2108     /* Restore original t1->data is needed */
2109     if (cblk->decoded_data) {
2110         t1->data = original_t1_data;
2111     }
2112
2113     return OPJ_TRUE;
2114 }
2115
2116
2117
2118
2119 OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
2120                              opj_tcd_tile_t *tile,
2121                              opj_tcp_t *tcp,
2122                              const OPJ_FLOAT64 * mct_norms,
2123                              OPJ_UINT32 mct_numcomps
2124                             )
2125 {
2126     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
2127
2128     tile->distotile = 0;        /* fixed_quality */
2129
2130     for (compno = 0; compno < tile->numcomps; ++compno) {
2131         opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
2132         opj_tccp_t* tccp = &tcp->tccps[compno];
2133         OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
2134
2135         for (resno = 0; resno < tilec->numresolutions; ++resno) {
2136             opj_tcd_resolution_t *res = &tilec->resolutions[resno];
2137
2138             for (bandno = 0; bandno < res->numbands; ++bandno) {
2139                 opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
2140                 OPJ_INT32 bandconst;
2141
2142                 /* Skip empty bands */
2143                 if (opj_tcd_is_band_empty(band)) {
2144                     continue;
2145                 }
2146
2147                 bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192));
2148                 for (precno = 0; precno < res->pw * res->ph; ++precno) {
2149                     opj_tcd_precinct_t *prc = &band->precincts[precno];
2150
2151                     for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
2152                         opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
2153                         OPJ_INT32* OPJ_RESTRICT tiledp;
2154                         OPJ_UINT32 cblk_w;
2155                         OPJ_UINT32 cblk_h;
2156                         OPJ_UINT32 i, j, tileLineAdvance;
2157                         OPJ_SIZE_T tileIndex = 0;
2158
2159                         OPJ_INT32 x = cblk->x0 - band->x0;
2160                         OPJ_INT32 y = cblk->y0 - band->y0;
2161                         if (band->bandno & 1) {
2162                             opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2163                             x += pres->x1 - pres->x0;
2164                         }
2165                         if (band->bandno & 2) {
2166                             opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2167                             y += pres->y1 - pres->y0;
2168                         }
2169
2170                         if (!opj_t1_allocate_buffers(
2171                                     t1,
2172                                     (OPJ_UINT32)(cblk->x1 - cblk->x0),
2173                                     (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2174                             return OPJ_FALSE;
2175                         }
2176
2177                         cblk_w = t1->w;
2178                         cblk_h = t1->h;
2179                         tileLineAdvance = tile_w - cblk_w;
2180
2181                         tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
2182                         t1->data = tiledp;
2183                         t1->data_stride = tile_w;
2184                         if (tccp->qmfbid == 1) {
2185                             /* Do multiplication on unsigned type, even if the
2186                              * underlying type is signed, to avoid potential
2187                              * int overflow on large value (the output will be
2188                              * incorrect in such situation, but whatever...)
2189                              * This assumes complement-to-2 signed integer
2190                              * representation
2191                              * Fixes https://github.com/uclouvain/openjpeg/issues/1053
2192                              */
2193                             OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
2194                             for (j = 0; j < cblk_h; ++j) {
2195                                 for (i = 0; i < cblk_w; ++i) {
2196                                     tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS;
2197                                     tileIndex++;
2198                                 }
2199                                 tileIndex += tileLineAdvance;
2200                             }
2201                         } else {        /* if (tccp->qmfbid == 0) */
2202                             for (j = 0; j < cblk_h; ++j) {
2203                                 for (i = 0; i < cblk_w; ++i) {
2204                                     OPJ_INT32 tmp = tiledp[tileIndex];
2205                                     tiledp[tileIndex] =
2206                                         opj_int_fix_mul_t1(
2207                                             tmp,
2208                                             bandconst);
2209                                     tileIndex++;
2210                                 }
2211                                 tileIndex += tileLineAdvance;
2212                             }
2213                         }
2214
2215                         opj_t1_encode_cblk(
2216                             t1,
2217                             cblk,
2218                             band->bandno,
2219                             compno,
2220                             tilec->numresolutions - 1 - resno,
2221                             tccp->qmfbid,
2222                             band->stepsize,
2223                             tccp->cblksty,
2224                             tile->numcomps,
2225                             tile,
2226                             mct_norms,
2227                             mct_numcomps);
2228
2229                     } /* cblkno */
2230                 } /* precno */
2231             } /* bandno */
2232         } /* resno  */
2233     } /* compno  */
2234     return OPJ_TRUE;
2235 }
2236
2237 /* Returns whether the pass (bpno, passtype) is terminated */
2238 static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
2239                                    OPJ_UINT32 cblksty,
2240                                    OPJ_INT32 bpno,
2241                                    OPJ_UINT32 passtype)
2242 {
2243     /* Is it the last cleanup pass ? */
2244     if (passtype == 2 && bpno == 0) {
2245         return OPJ_TRUE;
2246     }
2247
2248     if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
2249         return OPJ_TRUE;
2250     }
2251
2252     if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
2253         /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
2254         if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
2255             return OPJ_TRUE;
2256         }
2257         /* and beyond terminate all the magnitude refinement passes (in raw) */
2258         /* and cleanup passes (in MQC) */
2259         if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
2260             return OPJ_TRUE;
2261         }
2262     }
2263
2264     return OPJ_FALSE;
2265 }
2266
2267
2268 /** mod fixed_quality */
2269 static void opj_t1_encode_cblk(opj_t1_t *t1,
2270                                opj_tcd_cblk_enc_t* cblk,
2271                                OPJ_UINT32 orient,
2272                                OPJ_UINT32 compno,
2273                                OPJ_UINT32 level,
2274                                OPJ_UINT32 qmfbid,
2275                                OPJ_FLOAT64 stepsize,
2276                                OPJ_UINT32 cblksty,
2277                                OPJ_UINT32 numcomps,
2278                                opj_tcd_tile_t * tile,
2279                                const OPJ_FLOAT64 * mct_norms,
2280                                OPJ_UINT32 mct_numcomps)
2281 {
2282     OPJ_FLOAT64 cumwmsedec = 0.0;
2283
2284     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2285
2286     OPJ_UINT32 passno;
2287     OPJ_INT32 bpno;
2288     OPJ_UINT32 passtype;
2289     OPJ_INT32 nmsedec = 0;
2290     OPJ_INT32 max;
2291     OPJ_UINT32 i, j;
2292     OPJ_BYTE type = T1_TYPE_MQ;
2293     OPJ_FLOAT64 tempwmsedec;
2294
2295 #ifdef EXTRA_DEBUG
2296     printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
2297            cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
2298 #endif
2299
2300     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2301
2302     max = 0;
2303     for (i = 0; i < t1->w; ++i) {
2304         for (j = 0; j < t1->h; ++j) {
2305             OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]);
2306             max = opj_int_max(max, tmp);
2307         }
2308     }
2309
2310     cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
2311                                       T1_NMSEDEC_FRACBITS) : 0;
2312     if (cblk->numbps == 0) {
2313         cblk->totalpasses = 0;
2314         return;
2315     }
2316
2317     bpno = (OPJ_INT32)(cblk->numbps - 1);
2318     passtype = 2;
2319
2320     opj_mqc_resetstates(mqc);
2321     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2322     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2323     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2324     opj_mqc_init_enc(mqc, cblk->data);
2325
2326     for (passno = 0; bpno >= 0; ++passno) {
2327         opj_tcd_pass_t *pass = &cblk->passes[passno];
2328         type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
2329                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2330
2331         /* If the previous pass was terminating, we need to reset the encoder */
2332         if (passno > 0 && cblk->passes[passno - 1].term) {
2333             if (type == T1_TYPE_RAW) {
2334                 opj_mqc_bypass_init_enc(mqc);
2335             } else {
2336                 opj_mqc_restart_init_enc(mqc);
2337             }
2338         }
2339
2340         switch (passtype) {
2341         case 0:
2342             opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
2343             break;
2344         case 1:
2345             opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
2346             break;
2347         case 2:
2348             opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
2349             /* code switch SEGMARK (i.e. SEGSYM) */
2350             if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
2351                 opj_mqc_segmark_enc(mqc);
2352             }
2353             break;
2354         }
2355
2356         /* fixed_quality */
2357         tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
2358                                         stepsize, numcomps, mct_norms, mct_numcomps) ;
2359         cumwmsedec += tempwmsedec;
2360         tile->distotile += tempwmsedec;
2361         pass->distortiondec = cumwmsedec;
2362
2363         if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
2364             /* If it is a terminated pass, terminate it */
2365             if (type == T1_TYPE_RAW) {
2366                 opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
2367             } else {
2368                 if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
2369                     opj_mqc_erterm_enc(mqc);
2370                 } else {
2371                     opj_mqc_flush(mqc);
2372                 }
2373             }
2374             pass->term = 1;
2375             pass->rate = opj_mqc_numbytes(mqc);
2376         } else {
2377             /* Non terminated pass */
2378             OPJ_UINT32 rate_extra_bytes;
2379             if (type == T1_TYPE_RAW) {
2380                 rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
2381                                        mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
2382             } else {
2383                 rate_extra_bytes = 3;
2384             }
2385             pass->term = 0;
2386             pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
2387         }
2388
2389         if (++passtype == 3) {
2390             passtype = 0;
2391             bpno--;
2392         }
2393
2394         /* Code-switch "RESET" */
2395         if (cblksty & J2K_CCP_CBLKSTY_RESET) {
2396             opj_mqc_reset_enc(mqc);
2397         }
2398     }
2399
2400     cblk->totalpasses = passno;
2401
2402     if (cblk->totalpasses) {
2403         /* Make sure that pass rates are increasing */
2404         OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
2405         for (passno = cblk->totalpasses; passno > 0;) {
2406             opj_tcd_pass_t *pass = &cblk->passes[--passno];
2407             if (pass->rate > last_pass_rate) {
2408                 pass->rate = last_pass_rate;
2409             } else {
2410                 last_pass_rate = pass->rate;
2411             }
2412         }
2413     }
2414
2415     for (passno = 0; passno < cblk->totalpasses; passno++) {
2416         opj_tcd_pass_t *pass = &cblk->passes[passno];
2417
2418         /* Prevent generation of FF as last data byte of a pass*/
2419         /* For terminating passes, the flushing procedure ensured this already */
2420         assert(pass->rate > 0);
2421         if (cblk->data[pass->rate - 1] == 0xFF) {
2422             pass->rate--;
2423         }
2424         pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
2425     }
2426
2427 #ifdef EXTRA_DEBUG
2428     printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
2429
2430     /* Check that there not 0xff >=0x90 sequences */
2431     if (cblk->totalpasses) {
2432         OPJ_UINT32 i;
2433         OPJ_UINT32 len = opj_mqc_numbytes(mqc);
2434         for (i = 1; i < len; ++i) {
2435             if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
2436                 printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
2437                 abort();
2438             }
2439         }
2440     }
2441 #endif
2442 }