summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarl Hetherington <cth@carlh.net>2020-11-16 23:56:11 +0100
committerCarl Hetherington <cth@carlh.net>2020-11-17 00:17:52 +0100
commitcdf9ebe4a827d8d2d6d89e34bad0c976b04834b7 (patch)
treed7a877cd99319f213d1c192f0e3737b7ad7e2514
parentf235ed07f969e1b8b9d3d05082dcdd68ceae771e (diff)
Use 64-byte alignment for aligned image memory.
FFmpeg uses this values if AVX512 is available, and with only 32-byte alignment I am seeing strange scaling effects whereby crop_scale_window_test7 gives black bars down the right side of cropped images (when run on an i7 7700).
-rw-r--r--src/lib/image.cc15
-rw-r--r--test/image_test.cc8
2 files changed, 14 insertions, 9 deletions
diff --git a/src/lib/image.cc b/src/lib/image.cc
index 1439d9f7f..61945e1c9 100644
--- a/src/lib/image.cc
+++ b/src/lib/image.cc
@@ -55,6 +55,11 @@ using std::runtime_error;
using boost::shared_ptr;
using dcp::Size;
+
+/** The memory alignment, in bytes, used for each row of an image if aligment is requested */
+#define ALIGNMENT 64
+
+
int
Image::vertical_factor (int n) const
{
@@ -830,7 +835,7 @@ Image::bytes_per_pixel (int c) const
*
* @param p Pixel format.
* @param s Size in pixels.
- * @param aligned true to make each row of this image aligned to a 32-byte boundary.
+ * @param aligned true to make each row of this image aligned to a ALIGNMENT-byte boundary.
*/
Image::Image (AVPixelFormat p, dcp::Size s, bool aligned)
: _size (s)
@@ -854,7 +859,7 @@ Image::allocate ()
for (int i = 0; i < planes(); ++i) {
_line_size[i] = ceil (_size.width * bytes_per_pixel(i));
- _stride[i] = stride_round_up (i, _line_size, _aligned ? 32 : 1);
+ _stride[i] = stride_round_up (i, _line_size, _aligned ? ALIGNMENT : 1);
/* The assembler function ff_rgb24ToY_avx (in libswscale/x86/input.asm)
uses a 16-byte fetch to read three bytes (R/G/B) of image data.
@@ -867,7 +872,7 @@ Image::allocate ()
Further to the above, valgrind is now telling me that ff_rgb24ToY_ssse3
over-reads by more then _avx. I can't follow the code to work out how much,
- so I'll just over-allocate by 32 bytes and have done with it. Empirical
+ so I'll just over-allocate by ALIGNMENT bytes and have done with it. Empirical
testing suggests that it works.
In addition to these concerns, we may read/write as much as a whole extra line
@@ -893,12 +898,12 @@ Image::allocate ()
|XXXwrittenXXX|<------line-size------------->|XXXwrittenXXXXXXwrittenXXX
^^^^ out of bounds
*/
- _data[i] = (uint8_t *) wrapped_av_malloc (_stride[i] * (sample_size(i).height + 1) + 32);
+ _data[i] = (uint8_t *) wrapped_av_malloc (_stride[i] * (sample_size(i).height + 1) + ALIGNMENT);
#if HAVE_VALGRIND_MEMCHECK_H
/* The data between the end of the line size and the stride is undefined but processed by
libswscale, causing lots of valgrind errors. Mark it all defined to quell these errors.
*/
- VALGRIND_MAKE_MEM_DEFINED (_data[i], _stride[i] * (sample_size(i).height + 1) + 32);
+ VALGRIND_MAKE_MEM_DEFINED (_data[i], _stride[i] * (sample_size(i).height + 1) + ALIGNMENT);
#endif
}
}
diff --git a/test/image_test.cc b/test/image_test.cc
index 8579785a7..3c89d5da7 100644
--- a/test/image_test.cc
+++ b/test/image_test.cc
@@ -39,8 +39,8 @@ BOOST_AUTO_TEST_CASE (aligned_image_test)
{
Image* s = new Image (AV_PIX_FMT_RGB24, dcp::Size (50, 50), true);
BOOST_CHECK_EQUAL (s->planes(), 1);
- /* 160 is 150 aligned to the nearest 32 bytes */
- BOOST_CHECK_EQUAL (s->stride()[0], 160);
+ /* 192 is 150 aligned to the nearest 64 bytes */
+ BOOST_CHECK_EQUAL (s->stride()[0], 192);
BOOST_CHECK_EQUAL (s->line_size()[0], 150);
BOOST_CHECK (s->data()[0]);
BOOST_CHECK (!s->data()[1]);
@@ -50,7 +50,7 @@ BOOST_AUTO_TEST_CASE (aligned_image_test)
/* copy constructor */
Image* t = new Image (*s);
BOOST_CHECK_EQUAL (t->planes(), 1);
- BOOST_CHECK_EQUAL (t->stride()[0], 160);
+ BOOST_CHECK_EQUAL (t->stride()[0], 192);
BOOST_CHECK_EQUAL (t->line_size()[0], 150);
BOOST_CHECK (t->data()[0]);
BOOST_CHECK (!t->data()[1]);
@@ -67,7 +67,7 @@ BOOST_AUTO_TEST_CASE (aligned_image_test)
Image* u = new Image (AV_PIX_FMT_YUV422P, dcp::Size (150, 150), false);
*u = *s;
BOOST_CHECK_EQUAL (u->planes(), 1);
- BOOST_CHECK_EQUAL (u->stride()[0], 160);
+ BOOST_CHECK_EQUAL (u->stride()[0], 192);
BOOST_CHECK_EQUAL (u->line_size()[0], 150);
BOOST_CHECK (u->data()[0]);
BOOST_CHECK (!u->data()[1]);