summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authorPeter Johnson <johnson.peter@gmail.com>2023-12-09 13:02:11 -0800
committerGitHub <noreply@github.com>2023-12-09 22:02:11 +0100
commit41c25e3827c68a39b9e20c1625a0b96e49955445 (patch)
tree2490e32ed7ed1537b564adcdf55e30547578f1cf /src/lib
parentdfdedea48f0b1f0d7f7d28c5e98be6a64f8febc8 (diff)
ht_dec.c: Improve MSVC arm64 popcount performance (#1479)
Use NEON instructions for ARM64 (implementation based on microsoft/STL#2127). Godbolt output here: https://godbolt.org/z/q7GPTqT14
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/openjp2/ht_dec.c13
1 files changed, 13 insertions, 0 deletions
diff --git a/src/lib/openjp2/ht_dec.c b/src/lib/openjp2/ht_dec.c
index 9ea061f3..a554b24a 100644
--- a/src/lib/openjp2/ht_dec.c
+++ b/src/lib/openjp2/ht_dec.c
@@ -55,6 +55,16 @@
#define OPJ_COMPILER_GNUC
#endif
+#if defined(OPJ_COMPILER_MSVC) && defined(_M_ARM64) \
+ && !defined(_M_ARM64EC) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
+ && !defined(__INTEL_COMPILER) && !defined(__clang__)
+#define MSVC_NEON_INTRINSICS
+#endif
+
+#ifdef MSVC_NEON_INTRINSICS
+#include <arm64_neon.h>
+#endif
+
//************************************************************************/
/** @brief Displays the error message for disabling the decoding of SPP and
* MRP passes
@@ -71,6 +81,9 @@ OPJ_UINT32 population_count(OPJ_UINT32 val)
{
#if defined(OPJ_COMPILER_MSVC) && (defined(_M_IX86) || defined(_M_AMD64))
return (OPJ_UINT32)__popcnt(val);
+#elif defined(OPJ_COMPILER_MSVC) && defined(MSVC_NEON_INTRINSICS)
+ const __n64 temp = neon_cnt(__uint64ToN64_v(val));
+ return neon_addv8(temp).n8_i8[0];
#elif (defined OPJ_COMPILER_GNUC)
return (OPJ_UINT32)__builtin_popcount(val);
#else