diff options
| author | Peter Johnson <johnson.peter@gmail.com> | 2023-12-09 13:02:11 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-12-09 22:02:11 +0100 |
| commit | 41c25e3827c68a39b9e20c1625a0b96e49955445 (patch) | |
| tree | 2490e32ed7ed1537b564adcdf55e30547578f1cf /src | |
| parent | dfdedea48f0b1f0d7f7d28c5e98be6a64f8febc8 (diff) | |
ht_dec.c: Improve MSVC arm64 popcount performance (#1479)
Use NEON instructions for ARM64 (implementation based on microsoft/STL#2127).
Godbolt output here: https://godbolt.org/z/q7GPTqT14
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib/openjp2/ht_dec.c | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/src/lib/openjp2/ht_dec.c b/src/lib/openjp2/ht_dec.c index 9ea061f3..a554b24a 100644 --- a/src/lib/openjp2/ht_dec.c +++ b/src/lib/openjp2/ht_dec.c @@ -55,6 +55,16 @@ #define OPJ_COMPILER_GNUC #endif +#if defined(OPJ_COMPILER_MSVC) && defined(_M_ARM64) \ + && !defined(_M_ARM64EC) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \ + && !defined(__INTEL_COMPILER) && !defined(__clang__) +#define MSVC_NEON_INTRINSICS +#endif + +#ifdef MSVC_NEON_INTRINSICS +#include <arm64_neon.h> +#endif + //************************************************************************/ /** @brief Displays the error message for disabling the decoding of SPP and * MRP passes @@ -71,6 +81,9 @@ OPJ_UINT32 population_count(OPJ_UINT32 val) { #if defined(OPJ_COMPILER_MSVC) && (defined(_M_IX86) || defined(_M_AMD64)) return (OPJ_UINT32)__popcnt(val); +#elif defined(OPJ_COMPILER_MSVC) && defined(MSVC_NEON_INTRINSICS) + const __n64 temp = neon_cnt(__uint64ToN64_v(val)); + return neon_addv8(temp).n8_i8[0]; #elif (defined OPJ_COMPILER_GNUC) return (OPJ_UINT32)__builtin_popcount(val); #else |
