/*
- Copyright (C) 2012 Paul Davis
+ Copyright (C) 2012 Paul Davis
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
FPU* FPU::_instance (0);
-#ifndef COMPILER_MSVC
+#if ( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // ARCH_X86
+#ifndef PLATFORM_WINDOWS
-/* use __cpuid() as the name to match the MSVC intrinsic */
+/* use __cpuid() as the name to match the MSVC/mingw intrinsic */
static void
__cpuid(int regs[4], int cpuid_leaf)
"%ebx",
#endif
"%ecx", "%edx");
-
+
regs[0] = eax;
regs[1] = ebx;
regs[2] = ecx;
regs[3] = edx;
}
+#endif /* !PLATFORM_WINDOWS */
+
+#ifndef COMPILER_MSVC
+
static uint64_t
_xgetbv (uint32_t xcr)
{
+#ifdef __APPLE__
+ /* it would be nice to make this work on OS X but as long we use veclib,
+ we don't really need to know about SSE/AVX on that platform.
+ */
+ return 0;
+#else
uint32_t eax, edx;
__asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr));
return (static_cast<uint64_t>(edx) << 32) | eax;
+#endif
}
-#define _XCR_XFEATURE_ENABLED_MASK 0
+#elif _MSC_VER < 1600
+
+// '_xgetbv()' was only available from VC10 onwards
+__declspec(noinline) static uint64_t
+_xgetbv (uint32_t xcr)
+{
+ return 0;
+
+ // N.B. The following would probably work for a pre-VC10 build,
+ // although it might suffer from optimization issues. We'd need
+ // to place this function into its own (unoptimized) source file.
+ __asm {
+ mov ecx, [xcr]
+ __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 /*xgetbv*/
+ }
+}
#endif /* !COMPILER_MSVC */
+#endif /* ARCH_X86 */
+
+#ifndef _XCR_XFEATURE_ENABLED_MASK
+#define _XCR_XFEATURE_ENABLED_MASK 0
+#endif
FPU*
FPU::instance()
return _instance;
}
+void
+FPU::destroy ()
+{
+ delete _instance;
+ _instance = 0;
+}
+
FPU::FPU ()
: _flags ((Flags) 0)
{
info << string_compose (_("CPU vendor: %1"), cpu_vendor) << endmsg;
if (num_ids > 0) {
-
+
/* Now get CPU/FPU flags */
-
+
__cpuid (cpu_info, 1);
- if ((cpu_info[2] & (1<<27)) /* AVX */ &&
- (cpu_info[2] & (1<<28) /* (OS)XSAVE */) &&
- (_xgetbv (_XCR_XFEATURE_ENABLED_MASK) & 0x6)) { /* OS really supports XSAVE */
+ if ((cpu_info[2] & (1<<27)) /* OSXSAVE */ &&
+ (cpu_info[2] & (1<<28) /* AVX */) &&
+ ((_xgetbv (_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6)) { /* OS really supports XSAVE */
info << _("AVX-capable processor") << endmsg;
_flags = Flags (_flags | (HasAVX) );
}
}
/* Figure out CPU/FPU denormal handling capabilities */
-
+
if (cpu_info[3] & (1 << 24)) {
-
+
char** fxbuf = 0;
-
+
/* DAZ wasn't available in the first version of SSE. Since
setting a reserved bit in MXCSR causes a general protection
fault, we need to be able to check the availability of this
assert (fxbuf);
(void) posix_memalign ((void **) fxbuf, 16, 512);
assert (*fxbuf);
-#endif
-
+#endif
+
memset (*fxbuf, 0, 512);
-
+
#ifdef COMPILER_MSVC
char *buf = *fxbuf;
__asm {
: "memory"
);
#endif
-
+
uint32_t mxcsr_mask = *((uint32_t*) &((*fxbuf)[28]));
-
+
/* if the mask is zero, set its default value (from intel specs) */
-
+
if (mxcsr_mask == 0) {
mxcsr_mask = 0xffbf;
}
-
+
if (mxcsr_mask & (1<<6)) {
_flags = Flags (_flags | HasDenormalsAreZero);
- }
-
+ }
+
#if !defined HAVE_POSIX_MEMALIGN && defined PLATFORM_WINDOWS
_aligned_free (*fxbuf);
_aligned_free (fxbuf);
free (fxbuf);
#endif
}
-#endif
/* finally get the CPU brand */
const int parameter_end = 0x80000004;
string cpu_brand;
-
+
if (cpu_info[0] >= parameter_end) {
char* cpu_string_ptr = cpu_string;
-
+
for (int parameter = 0x80000002; parameter <= parameter_end &&
cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
__cpuid(cpu_info, parameter);
}
cpu_brand.assign(cpu_string, cpu_string_ptr - cpu_string);
info << string_compose (_("CPU brand: %1"), cpu_brand) << endmsg;
- }
+ }
}
-}
+#endif /* !ARCH_X86 */
+}
FPU::~FPU ()
{