summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authormayeut <mayeut@users.noreply.github.com>2015-07-27 20:12:48 +0200
committermayeut <mayeut@users.noreply.github.com>2015-07-27 20:12:48 +0200
commitb0035538af1209abbe405bf2ab45921169620513 (patch)
treeee4e366ca30cc4fa36a8758fc25c880447bb3ffc /src
parent45ccf501f04b062287fd416b35451ce5ed55cb93 (diff)
Use __emul under msvc x86 for fast 64 = 32 * 32
Update uclouvain/openjpeg#220
Diffstat (limited to 'src')
-rw-r--r--src/lib/openjp2/opj_includes.h8
-rw-r--r--src/lib/openjp2/opj_intmath.h8
2 files changed, 14 insertions, 2 deletions
diff --git a/src/lib/openjp2/opj_includes.h b/src/lib/openjp2/opj_includes.h
index bd9ae09a..22b2f62a 100644
--- a/src/lib/openjp2/opj_includes.h
+++ b/src/lib/openjp2/opj_includes.h
@@ -162,12 +162,16 @@ static INLINE long opj_lrintf(float f) {
}
#endif
-
-
#if defined(_MSC_VER) && (_MSC_VER < 1400)
#define vsnprintf _vsnprintf
#endif
+/* MSVC x86 is really bad at doing int64 = int32 * int32 on its own. Use intrinsic. */
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86)
+# include <intrin.h>
+# pragma intrinsic(__emul)
+#endif
+
#include "opj_inttypes.h"
#include "opj_clock.h"
#include "opj_malloc.h"
diff --git a/src/lib/openjp2/opj_intmath.h b/src/lib/openjp2/opj_intmath.h
index 188a09a7..f39f6843 100644
--- a/src/lib/openjp2/opj_intmath.h
+++ b/src/lib/openjp2/opj_intmath.h
@@ -184,7 +184,11 @@ Multiply two fixed-precision rational numbers.
@return Returns a * b
*/
static INLINE OPJ_INT32 opj_int_fix_mul(OPJ_INT32 a, OPJ_INT32 b) {
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86)
+ OPJ_INT64 temp = __emul(a, b);
+#else
OPJ_INT64 temp = (OPJ_INT64) a * (OPJ_INT64) b ;
+#endif
temp += 4096;
assert((temp >> 13) <= (OPJ_INT64)0x7FFFFFFF);
assert((temp >> 13) >= (-(OPJ_INT64)0x7FFFFFFF - (OPJ_INT64)1));
@@ -192,7 +196,11 @@ static INLINE OPJ_INT32 opj_int_fix_mul(OPJ_INT32 a, OPJ_INT32 b) {
}
static INLINE OPJ_INT32 opj_int_fix_mul_t1(OPJ_INT32 a, OPJ_INT32 b) {
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(__INTEL_COMPILER) && defined(_M_IX86)
+ OPJ_INT64 temp = __emul(a, b);
+#else
OPJ_INT64 temp = (OPJ_INT64) a * (OPJ_INT64) b ;
+#endif
temp += 4096;
assert((temp >> (13 + 11 - T1_NMSEDEC_FRACBITS)) <= (OPJ_INT64)0x7FFFFFFF);
assert((temp >> (13 + 11 - T1_NMSEDEC_FRACBITS)) >= (-(OPJ_INT64)0x7FFFFFFF - (OPJ_INT64)1));