diff options
| author | Carl Hetherington <cth@carlh.net> | 2020-06-02 02:25:40 +0200 |
|---|---|---|
| committer | Carl Hetherington <cth@carlh.net> | 2020-06-02 02:25:40 +0200 |
| commit | 6016e4b24205e11b1f006e43a6ed4b7f2bde732a (patch) | |
| tree | 7703b6eaa36c74a1d1e595d1fff2606fac861536 /src | |
| parent | 9895449dc9656507305940f387beb1e3998b0fe3 (diff) | |
Inline some stuff to no great effect.
Diffstat (limited to 'src')
| -rw-r--r-- | src/rgb_xyz.cc | 23 |
1 files changed, 9 insertions, 14 deletions
diff --git a/src/rgb_xyz.cc b/src/rgb_xyz.cc index 5a6a909d..219dc675 100644 --- a/src/rgb_xyz.cc +++ b/src/rgb_xyz.cc @@ -385,11 +385,6 @@ dcp::rgb_to_xyz_avx2 ( 0, fast_matrix[8], fast_matrix[7], fast_matrix[6], 0, fast_matrix[8], fast_matrix[7], fast_matrix[6] ); - /* XXX: in-line these, or keep them const? */ - __m256 const_65535 = _mm256_set1_ps (65535.0); - __m256 const_0 = _mm256_set1_ps (0.0); - __m256i shuffle = _mm256_set_epi32 (7, 3, 6, 2, 5, 1, 4, 0); - for (int y = 0; y < size.height; ++y) { __m128i const * p = reinterpret_cast<__m128i const *> (rgba + y * stride); DCP_ASSERT (!(reinterpret_cast<uintptr_t>(p) % 16)); @@ -453,12 +448,12 @@ dcp::rgb_to_xyz_avx2 ( z_A = _mm256_hadd_ps (z_A, z_C); // clamp - x_A = _mm256_min_ps(x_A, const_65535); - x_A = _mm256_max_ps(x_A, const_0); - y_A = _mm256_min_ps(y_A, const_65535); - y_A = _mm256_max_ps(y_A, const_0); - z_A = _mm256_min_ps(z_A, const_65535); - z_A = _mm256_max_ps(z_A, const_0); + x_A = _mm256_min_ps(x_A, _mm256_set1_ps(65535.0)); + x_A = _mm256_max_ps(x_A, _mm256_set1_ps(0.0)); + y_A = _mm256_min_ps(y_A, _mm256_set1_ps(65535.0)); + y_A = _mm256_max_ps(y_A, _mm256_set1_ps(0.0)); + z_A = _mm256_min_ps(z_A, _mm256_set1_ps(65535.0)); + z_A = _mm256_max_ps(z_A, _mm256_set1_ps(0)); // round to int __m256i lut_x = _mm256_cvtps_epi32(_mm256_floor_ps(x_A)); @@ -471,9 +466,9 @@ dcp::rgb_to_xyz_avx2 ( __m256i lut_z_out = _mm256_i32gather_epi32 (lut_out, lut_z, 4); // shuffle - lut_x_out = _mm256_permutevar8x32_epi32 (lut_x_out, shuffle); - lut_y_out = _mm256_permutevar8x32_epi32 (lut_y_out, shuffle); - lut_z_out = _mm256_permutevar8x32_epi32 (lut_z_out, shuffle); + lut_x_out = _mm256_permutevar8x32_epi32 (lut_x_out, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0)); + lut_y_out = _mm256_permutevar8x32_epi32 (lut_y_out, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0)); + lut_z_out = _mm256_permutevar8x32_epi32 (lut_z_out, _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0)); // write to memory _mm256_store_si256 (xyz_x, lut_x_out); |
