Use __emul under msvc x86 for fast 64 = 32 * 32

author mayeut <mayeut@users.noreply.github.com>

Mon, 27 Jul 2015 18:12:48 +0000 (20:12 +0200)

committer mayeut <mayeut@users.noreply.github.com>

Mon, 27 Jul 2015 18:12:48 +0000 (20:12 +0200)
author mayeut <mayeut@users.noreply.github.com>
Mon, 27 Jul 2015 18:12:48 +0000 (20:12 +0200)
committer mayeut <mayeut@users.noreply.github.com>
Mon, 27 Jul 2015 18:12:48 +0000 (20:12 +0200)
diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h

index bd9ae09a0eb78d1a85efd38a1b83709bdb938ef1..22b2f62a75083923c7cc0ca63ffddd06a18f1a8f 100644 (file)
--- a/src/lib/openjp2/opj_includes.h
+++ b/src/lib/openjp2/opj_includes.h
@@ -162,12 +162,16 @@ static INLINE long opj_lrintf(float f) {
  }
  #endif
  
-
-
  #if defined(_MSC_VER) && (_MSC_VER < 1400)
         #define vsnprintf _vsnprintf
  #endif
  
+/* MSVC x86 is really bad at doing int64 = int32 * int32 on its own. Use intrinsic. */
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86)\r
+#      include <intrin.h>\r
+#      pragma intrinsic(__emul)\r
+#endif
+
  #include "opj_inttypes.h"
  #include "opj_clock.h"
  #include "opj_malloc.h"
diff --git a/src/lib/openjp2/opj_intmath.h b/src/lib/openjp2/opj_intmath.h

index 188a09a78b6c6daca3bfb1d055312fdf13f14573..f39f684317574d75ab91b6810fad9be7997d4477 100644 (file)
--- a/src/lib/openjp2/opj_intmath.h
+++ b/src/lib/openjp2/opj_intmath.h
@@ -184,7 +184,11 @@ Multiply two fixed-precision rational numbers.
  @return Returns a * b
  */
  static INLINE OPJ_INT32 opj_int_fix_mul(OPJ_INT32 a, OPJ_INT32 b) {
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86)\r
+       OPJ_INT64 temp = __emul(a, b);\r
+#else
         OPJ_INT64 temp = (OPJ_INT64) a * (OPJ_INT64) b ;
+#endif
         temp += 4096;
         assert((temp >> 13) <= (OPJ_INT64)0x7FFFFFFF);
         assert((temp >> 13) >= (-(OPJ_INT64)0x7FFFFFFF - (OPJ_INT64)1));
@@ -192,7 +196,11 @@ static INLINE OPJ_INT32 opj_int_fix_mul(OPJ_INT32 a, OPJ_INT32 b) {
  }
  
  static INLINE OPJ_INT32 opj_int_fix_mul_t1(OPJ_INT32 a, OPJ_INT32 b) {
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86)\r
+       OPJ_INT64 temp = __emul(a, b);\r
+#else
         OPJ_INT64 temp = (OPJ_INT64) a * (OPJ_INT64) b ;
+#endif
         temp += 4096;
         assert((temp >> (13 + 11 - T1_NMSEDEC_FRACBITS)) <= (OPJ_INT64)0x7FFFFFFF);
         assert((temp >> (13 + 11 - T1_NMSEDEC_FRACBITS)) >= (-(OPJ_INT64)0x7FFFFFFF - (OPJ_INT64)1));
author	mayeut <mayeut@users.noreply.github.com>
	Mon, 27 Jul 2015 18:12:48 +0000 (20:12 +0200)
committer	mayeut <mayeut@users.noreply.github.com>
	Mon, 27 Jul 2015 18:12:48 +0000 (20:12 +0200)
src/lib/openjp2/opj_includes.h		patch \| blob \| history
src/lib/openjp2/opj_intmath.h		patch \| blob \| history