diff options
| author | Even Rouault <even.rouault@spatialys.com> | 2017-05-20 14:05:07 +0200 |
|---|---|---|
| committer | Even Rouault <even.rouault@spatialys.com> | 2017-05-23 16:16:32 +0200 |
| commit | cd12414c6b11295ae8540df2ef77eb9c624cd264 (patch) | |
| tree | aba668cf5ef928072ea4f4565da0c8be8c50d064 /src/lib/openjp2/t1.h | |
| parent | 53d46fc7330ed652db66aa37b498fbfa27be625c (diff) | |
T1: use more compact flags to optimize cache usage in encoder passes. (#172)
Ported from Carl Hetherington work (actually through Matthieu Darbois's port
on top of OpenJPEG 2.1.0)
Can reduce total encoding time by 10-15%
WARNING: VSC mode is not implemented, and so is a temporary regression
that must be fixed.
Diffstat (limited to 'src/lib/openjp2/t1.h')
| -rw-r--r-- | src/lib/openjp2/t1.h | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h index 31a2d9f3..f5ce9108 100644 --- a/src/lib/openjp2/t1.h +++ b/src/lib/openjp2/t1.h @@ -11,6 +11,7 @@ * Copyright (c) 2003-2007, Francois-Olivier Devaux * Copyright (c) 2003-2014, Antonin Descampe * Copyright (c) 2005, Herve Drolon, FreeImage Team + * Copyright (c) 2012, Carl Hetherington * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -112,6 +113,101 @@ in T1.C are used by some function in TCD.C. #define T1_COLFLAG_VISIT_ROW_3 (T1_COLFLAG_VISIT_ROW_0 << (3U * T1_COLFLAG_RBS)) #define T1_COLFLAG_REFINE_ROW_3 (T1_COLFLAG_REFINE_ROW_0 << (3U * T1_COLFLAG_RBS)) + +/* BEGINNING of flags that apply to opj_flag_enc_t */ +/** We hold the state of individual data points for the T1 encoder using + * a single 32-bit flags word to hold the state of 4 data points. This corresponds + * to the 4-point-high columns that the data is processed in. + * + * These #defines declare the layout of a 32-bit flags word. + * + * This is currently done for encoding only. + */ + +#define T1_SIGMA_0 (1U << 0) +#define T1_SIGMA_1 (1U << 1) +#define T1_SIGMA_2 (1U << 2) +#define T1_SIGMA_3 (1U << 3) +#define T1_SIGMA_4 (1U << 4) +#define T1_SIGMA_5 (1U << 5) +#define T1_SIGMA_6 (1U << 6) +#define T1_SIGMA_7 (1U << 7) +#define T1_SIGMA_8 (1U << 8) +#define T1_SIGMA_9 (1U << 9) +#define T1_SIGMA_10 (1U << 10) +#define T1_SIGMA_11 (1U << 11) +#define T1_SIGMA_12 (1U << 12) +#define T1_SIGMA_13 (1U << 13) +#define T1_SIGMA_14 (1U << 14) +#define T1_SIGMA_15 (1U << 15) +#define T1_SIGMA_16 (1U << 16) +#define T1_SIGMA_17 (1U << 17) + +#define T1_CHI_0 (1U << 18) +#define T1_CHI_0_I 18 +#define T1_CHI_1 (1U << 19) +#define T1_CHI_1_I 19 +#define T1_MU_0 (1U << 20) +#define T1_PI_0 (1U << 21) +#define T1_CHI_2 (1U << 22) +#define T1_CHI_2_I 22 +#define T1_MU_1 (1U << 23) +#define T1_PI_1 (1U << 24) +#define T1_CHI_3 (1U << 25) +#define T1_MU_2 (1U << 26) +#define T1_PI_2 (1U << 27) +#define T1_CHI_4 (1U << 28) +#define T1_MU_3 (1U << 29) +#define T1_PI_3 (1U << 30) +#define T1_CHI_5 (1U << 31) + + +/** As an example, the bits T1_SIGMA_3, T1_SIGMA_4 and T1_SIGMA_5 + * indicate the significance state of the west neighbour of data point zero + * of our four, the point itself, and its east neighbour respectively. + * Many of the bits are arranged so that given a flags word, you can + * look at the values for the data point 0, then shift the flags + * word right by 3 bits and look at the same bit positions to see the + * values for data point 1. + * + * The #defines below help a bit with this; say you have a flags word + * f, you can do things like + * + * (f & T1_SIGMA_THIS) + * + * to see the significance bit of data point 0, then do + * + * ((f >> 3) & T1_SIGMA_THIS) + * + * to see the significance bit of data point 1. + */ + +#define T1_SIGMA_NW T1_SIGMA_0 +#define T1_SIGMA_N T1_SIGMA_1 +#define T1_SIGMA_NE T1_SIGMA_2 +#define T1_SIGMA_W T1_SIGMA_3 +#define T1_SIGMA_THIS T1_SIGMA_4 +#define T1_SIGMA_E T1_SIGMA_5 +#define T1_SIGMA_SW T1_SIGMA_6 +#define T1_SIGMA_S T1_SIGMA_7 +#define T1_SIGMA_SE T1_SIGMA_8 +#define T1_SIGMA_NEIGHBOURS (T1_SIGMA_NW | T1_SIGMA_N | T1_SIGMA_NE | T1_SIGMA_W | T1_SIGMA_E | T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE) + +#define T1_CHI_THIS T1_CHI_1 +#define T1_CHI_THIS_I T1_CHI_1_I +#define T1_MU_THIS T1_MU_0 +#define T1_PI_THIS T1_PI_0 + +#define T1_LUT_SGN_W (1U << 0) +#define T1_LUT_SIG_N (1U << 1) +#define T1_LUT_SGN_E (1U << 2) +#define T1_LUT_SIG_W (1U << 3) +#define T1_LUT_SGN_N (1U << 4) +#define T1_LUT_SIG_E (1U << 5) +#define T1_LUT_SGN_S (1U << 6) +#define T1_LUT_SIG_S (1U << 7) +/* END of flags that apply to opj_flag_enc_t */ + /* ----------------------------------------------------------------------- */ typedef OPJ_UINT16 opj_flag_t; @@ -119,6 +215,8 @@ typedef OPJ_UINT16 opj_flag_t; /** Flags for 4 consecutive rows of a column */ typedef OPJ_UINT16 opj_colflag_t; +typedef OPJ_UINT32 opj_flag_enc_t; + /** Tier-1 coding (coding of code-block coefficients) */ @@ -130,12 +228,15 @@ typedef struct opj_t1 { opj_raw_t *raw; OPJ_INT32 *data; + /** Flags used by decoder */ opj_flag_t *flags; /** Addition flag array such that colflags[1+0] is for state of col=0,row=0..3, colflags[1+1] for col=1, row=0..3, colflags[1+flags_stride] for col=0,row=4..7, ... This array avoids too much cache trashing when processing by 4 vertical samples as done in the various decoding steps. */ opj_colflag_t* colflags; + /** Flags used by encoder */ + opj_flag_enc_t *enc_flags; OPJ_UINT32 w; OPJ_UINT32 h; OPJ_UINT32 datasize; |
