summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCarl Hetherington <cth@carlh.net>2020-06-02 02:25:40 +0200
committerCarl Hetherington <cth@carlh.net>2020-06-02 02:25:40 +0200
commit6016e4b24205e11b1f006e43a6ed4b7f2bde732a (patch)
tree7703b6eaa36c74a1d1e595d1fff2606fac861536 /src
parent9895449dc9656507305940f387beb1e3998b0fe3 (diff)
Inline some stuff to no great effect.
Diffstat (limited to 'src')
-rw-r--r--src/rgb_xyz.cc23
1 files changed, 9 insertions, 14 deletions
diff --git a/src/rgb_xyz.cc b/src/rgb_xyz.cc
index 5a6a909d..219dc675 100644
--- a/src/rgb_xyz.cc
+++ b/src/rgb_xyz.cc
@@ -385,11 +385,6 @@ dcp::rgb_to_xyz_avx2 (
0, fast_matrix[8], fast_matrix[7], fast_matrix[6], 0, fast_matrix[8], fast_matrix[7], fast_matrix[6]
);
- /* XXX: in-line these, or keep them const? */
- __m256 const_65535 = _mm256_set1_ps (65535.0);
- __m256 const_0 = _mm256_set1_ps (0.0);
- __m256i shuffle = _mm256_set_epi32 (7, 3, 6, 2, 5, 1, 4, 0);
-
for (int y = 0; y < size.height; ++y) {
__m128i const * p = reinterpret_cast<__m128i const *> (rgba + y * stride);
DCP_ASSERT (!(reinterpret_cast<uintptr_t>(p) % 16));
@@ -453,12 +448,12 @@ dcp::rgb_to_xyz_avx2 (
z_A = _mm256_hadd_ps (z_A, z_C);
// clamp
- x_A = _mm256_min_ps(x_A, const_65535);
- x_A = _mm256_max_ps(x_A, const_0);
- y_A = _mm256_min_ps(y_A, const_65535);
- y_A = _mm256_max_ps(y_A, const_0);
- z_A = _mm256_min_ps(z_A, const_65535);
- z_A = _mm256_max_ps(z_A, const_0);
+ x_A = _mm256_min_ps(x_A, _mm256_set1_ps(65535.0));
+ x_A = _mm256_max_ps(x_A, _mm256_set1_ps(0.0));
+ y_A = _mm256_min_ps(y_A, _mm256_set1_ps(65535.0));
+ y_A = _mm256_max_ps(y_A, _mm256_set1_ps(0.0));
+ z_A = _mm256_min_ps(z_A, _mm256_set1_ps(65535.0));
+ z_A = _mm256_max_ps(z_A, _mm256_set1_ps(0));
// round to int
__m256i lut_x = _mm256_cvtps_epi32(_mm256_floor_ps(x_A));
@@ -471,9 +466,9 @@ dcp::rgb_to_xyz_avx2 (
__m256i lut_z_out = _mm256_i32gather_epi32 (lut_out, lut_z, 4);
// shuffle
- lut_x_out = _mm256_permutevar8x32_epi32 (lut_x_out, shuffle);
- lut_y_out = _mm256_permutevar8x32_epi32 (lut_y_out, shuffle);
- lut_z_out = _mm256_permutevar8x32_epi32 (lut_z_out, shuffle);
+ lut_x_out = _mm256_permutevar8x32_epi32 (lut_x_out, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0));
+ lut_y_out = _mm256_permutevar8x32_epi32 (lut_y_out, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0));
+ lut_z_out = _mm256_permutevar8x32_epi32 (lut_z_out, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0));
// write to memory
_mm256_store_si256 (xyz_x, lut_x_out);