summaryrefslogtreecommitdiff
path: root/src/lib/openjp2/t1.h
diff options
context:
space:
mode:
authorEven Rouault <even.rouault@spatialys.com>2017-05-20 14:05:07 +0200
committerEven Rouault <even.rouault@spatialys.com>2017-05-23 16:16:32 +0200
commitcd12414c6b11295ae8540df2ef77eb9c624cd264 (patch)
treeaba668cf5ef928072ea4f4565da0c8be8c50d064 /src/lib/openjp2/t1.h
parent53d46fc7330ed652db66aa37b498fbfa27be625c (diff)
T1: use more compact flags to optimize cache usage in encoder passes. (#172)
Ported from Carl Hetherington work (actually through Matthieu Darbois's port on top of OpenJPEG 2.1.0) Can reduce total encoding time by 10-15% WARNING: VSC mode is not implemented, and so is a temporary regression that must be fixed.
Diffstat (limited to 'src/lib/openjp2/t1.h')
-rw-r--r--src/lib/openjp2/t1.h101
1 files changed, 101 insertions, 0 deletions
diff --git a/src/lib/openjp2/t1.h b/src/lib/openjp2/t1.h
index 31a2d9f3..f5ce9108 100644
--- a/src/lib/openjp2/t1.h
+++ b/src/lib/openjp2/t1.h
@@ -11,6 +11,7 @@
* Copyright (c) 2003-2007, Francois-Olivier Devaux
* Copyright (c) 2003-2014, Antonin Descampe
* Copyright (c) 2005, Herve Drolon, FreeImage Team
+ * Copyright (c) 2012, Carl Hetherington
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -112,6 +113,101 @@ in T1.C are used by some function in TCD.C.
#define T1_COLFLAG_VISIT_ROW_3 (T1_COLFLAG_VISIT_ROW_0 << (3U * T1_COLFLAG_RBS))
#define T1_COLFLAG_REFINE_ROW_3 (T1_COLFLAG_REFINE_ROW_0 << (3U * T1_COLFLAG_RBS))
+
+/* BEGINNING of flags that apply to opj_flag_enc_t */
+/** We hold the state of individual data points for the T1 encoder using
+ * a single 32-bit flags word to hold the state of 4 data points. This corresponds
+ * to the 4-point-high columns that the data is processed in.
+ *
+ * These #defines declare the layout of a 32-bit flags word.
+ *
+ * This is currently done for encoding only.
+ */
+
+#define T1_SIGMA_0 (1U << 0)
+#define T1_SIGMA_1 (1U << 1)
+#define T1_SIGMA_2 (1U << 2)
+#define T1_SIGMA_3 (1U << 3)
+#define T1_SIGMA_4 (1U << 4)
+#define T1_SIGMA_5 (1U << 5)
+#define T1_SIGMA_6 (1U << 6)
+#define T1_SIGMA_7 (1U << 7)
+#define T1_SIGMA_8 (1U << 8)
+#define T1_SIGMA_9 (1U << 9)
+#define T1_SIGMA_10 (1U << 10)
+#define T1_SIGMA_11 (1U << 11)
+#define T1_SIGMA_12 (1U << 12)
+#define T1_SIGMA_13 (1U << 13)
+#define T1_SIGMA_14 (1U << 14)
+#define T1_SIGMA_15 (1U << 15)
+#define T1_SIGMA_16 (1U << 16)
+#define T1_SIGMA_17 (1U << 17)
+
+#define T1_CHI_0 (1U << 18)
+#define T1_CHI_0_I 18
+#define T1_CHI_1 (1U << 19)
+#define T1_CHI_1_I 19
+#define T1_MU_0 (1U << 20)
+#define T1_PI_0 (1U << 21)
+#define T1_CHI_2 (1U << 22)
+#define T1_CHI_2_I 22
+#define T1_MU_1 (1U << 23)
+#define T1_PI_1 (1U << 24)
+#define T1_CHI_3 (1U << 25)
+#define T1_MU_2 (1U << 26)
+#define T1_PI_2 (1U << 27)
+#define T1_CHI_4 (1U << 28)
+#define T1_MU_3 (1U << 29)
+#define T1_PI_3 (1U << 30)
+#define T1_CHI_5 (1U << 31)
+
+
+/** As an example, the bits T1_SIGMA_3, T1_SIGMA_4 and T1_SIGMA_5
+ * indicate the significance state of the west neighbour of data point zero
+ * of our four, the point itself, and its east neighbour respectively.
+ * Many of the bits are arranged so that given a flags word, you can
+ * look at the values for the data point 0, then shift the flags
+ * word right by 3 bits and look at the same bit positions to see the
+ * values for data point 1.
+ *
+ * The #defines below help a bit with this; say you have a flags word
+ * f, you can do things like
+ *
+ * (f & T1_SIGMA_THIS)
+ *
+ * to see the significance bit of data point 0, then do
+ *
+ * ((f >> 3) & T1_SIGMA_THIS)
+ *
+ * to see the significance bit of data point 1.
+ */
+
+#define T1_SIGMA_NW T1_SIGMA_0
+#define T1_SIGMA_N T1_SIGMA_1
+#define T1_SIGMA_NE T1_SIGMA_2
+#define T1_SIGMA_W T1_SIGMA_3
+#define T1_SIGMA_THIS T1_SIGMA_4
+#define T1_SIGMA_E T1_SIGMA_5
+#define T1_SIGMA_SW T1_SIGMA_6
+#define T1_SIGMA_S T1_SIGMA_7
+#define T1_SIGMA_SE T1_SIGMA_8
+#define T1_SIGMA_NEIGHBOURS (T1_SIGMA_NW | T1_SIGMA_N | T1_SIGMA_NE | T1_SIGMA_W | T1_SIGMA_E | T1_SIGMA_SW | T1_SIGMA_S | T1_SIGMA_SE)
+
+#define T1_CHI_THIS T1_CHI_1
+#define T1_CHI_THIS_I T1_CHI_1_I
+#define T1_MU_THIS T1_MU_0
+#define T1_PI_THIS T1_PI_0
+
+#define T1_LUT_SGN_W (1U << 0)
+#define T1_LUT_SIG_N (1U << 1)
+#define T1_LUT_SGN_E (1U << 2)
+#define T1_LUT_SIG_W (1U << 3)
+#define T1_LUT_SGN_N (1U << 4)
+#define T1_LUT_SIG_E (1U << 5)
+#define T1_LUT_SGN_S (1U << 6)
+#define T1_LUT_SIG_S (1U << 7)
+/* END of flags that apply to opj_flag_enc_t */
+
/* ----------------------------------------------------------------------- */
typedef OPJ_UINT16 opj_flag_t;
@@ -119,6 +215,8 @@ typedef OPJ_UINT16 opj_flag_t;
/** Flags for 4 consecutive rows of a column */
typedef OPJ_UINT16 opj_colflag_t;
+typedef OPJ_UINT32 opj_flag_enc_t;
+
/**
Tier-1 coding (coding of code-block coefficients)
*/
@@ -130,12 +228,15 @@ typedef struct opj_t1 {
opj_raw_t *raw;
OPJ_INT32 *data;
+ /** Flags used by decoder */
opj_flag_t *flags;
/** Addition flag array such that colflags[1+0] is for state of col=0,row=0..3,
colflags[1+1] for col=1, row=0..3, colflags[1+flags_stride] for col=0,row=4..7, ...
This array avoids too much cache trashing when processing by 4 vertical samples
as done in the various decoding steps. */
opj_colflag_t* colflags;
+ /** Flags used by encoder */
+ opj_flag_enc_t *enc_flags;
OPJ_UINT32 w;
OPJ_UINT32 h;
OPJ_UINT32 datasize;