/*
- Copyright (C) 2012 Paul Davis
+ Copyright (C) 2012 Paul Davis
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "pbd/fpu.h"
#include "pbd/error.h"
-#include "i18n.h"
+#include "pbd/i18n.h"
using namespace PBD;
using namespace std;
static void
__cpuid(int regs[4], int cpuid_leaf)
{
- int eax, ebx, ecx, edx;
asm volatile (
#if defined(__i386__)
"pushl %%ebx;\n\t"
#endif
- "movl %4, %%eax;\n\t"
"cpuid;\n\t"
- "movl %%eax, %0;\n\t"
- "movl %%ebx, %1;\n\t"
- "movl %%ecx, %2;\n\t"
- "movl %%edx, %3;\n\t"
+ "movl %%eax, (%1);\n\t"
+ "movl %%ebx, 4(%1);\n\t"
+ "movl %%ecx, 8(%1);\n\t"
+ "movl %%edx, 12(%1);\n\t"
#if defined(__i386__)
"popl %%ebx;\n\t"
#endif
- :"=m" (eax), "=m" (ebx), "=m" (ecx), "=m" (edx)
- :"r" (cpuid_leaf)
- :"%eax",
+ :"=a" (cpuid_leaf) /* %eax clobbered by CPUID */
+ :"S" (regs), "a" (cpuid_leaf)
+ :
#if !defined(__i386__)
"%ebx",
#endif
- "%ecx", "%edx");
-
- regs[0] = eax;
- regs[1] = ebx;
- regs[2] = ecx;
- regs[3] = edx;
+ "%ecx", "%edx", "memory");
}
#endif /* !PLATFORM_WINDOWS */
-#ifndef COMPILER_MSVC
+#ifndef HAVE_XGETBV // Allow definition by build system
+ #if defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR >= 5
+ #define HAVE_XGETBV
+ #elif defined(_MSC_VER) && _MSC_VER >= 1600
+ // '_xgetbv()' was only available from VC10 onwards
+ #define HAVE_XGETBV
+ #endif
+#endif
+
+#ifndef HAVE_XGETBV
+
+#ifdef COMPILER_MSVC
+
+// '_xgetbv()' was only available from VC10 onwards
+__declspec(noinline) static uint64_t
+_xgetbv (uint32_t xcr)
+{
+ return 0;
+
+ // N.B. The following would probably work for a pre-VC10 build,
+ // although it might suffer from optimization issues. We'd need
+ // to place this function into its own (unoptimized) source file.
+ __asm {
+ mov ecx, [xcr]
+ __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 /*xgetbv*/
+ }
+}
+
+#else
static uint64_t
_xgetbv (uint32_t xcr)
{
#ifdef __APPLE__
- /* it would be nice to make this work on OS X but as long we use veclib,
- we don't really need to know about SSE/AVX on that platform.
- */
- return 0;
+ /* it would be nice to make this work on OS X but as long we use veclib,
+ we don't really need to know about SSE/AVX on that platform.
+ */
+ return 0;
#else
uint32_t eax, edx;
__asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr));
}
#endif /* !COMPILER_MSVC */
+#endif /* !HAVE_XGETBV */
#endif /* ARCH_X86 */
#ifndef _XCR_XFEATURE_ENABLED_MASK
return _instance;
}
+void
+FPU::destroy ()
+{
+ delete _instance;
+ _instance = 0;
+}
+
FPU::FPU ()
: _flags ((Flags) 0)
{
error << _("FPU object instantiated more than once") << endmsg;
}
+ if (getenv("ARDOUR_FPU_FLAGS")) {
+ _flags = Flags (atoi (getenv("ARDOUR_FPU_FLAGS")));
+ return;
+ }
+
#if !( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // !ARCH_X86
/* Non-Intel architecture, nothing to do here */
return;
info << string_compose (_("CPU vendor: %1"), cpu_vendor) << endmsg;
if (num_ids > 0) {
-
+
/* Now get CPU/FPU flags */
-
+
__cpuid (cpu_info, 1);
- if ((cpu_info[2] & (1<<27)) /* AVX */ &&
- (cpu_info[2] & (1<<28) /* (OS)XSAVE */) &&
- (_xgetbv (_XCR_XFEATURE_ENABLED_MASK) & 0x6)) { /* OS really supports XSAVE */
+ if ((cpu_info[2] & (1<<27)) /* OSXSAVE */ &&
+ (cpu_info[2] & (1<<28) /* AVX */) &&
+ ((_xgetbv (_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6)) { /* OS really supports XSAVE */
info << _("AVX-capable processor") << endmsg;
_flags = Flags (_flags | (HasAVX) );
}
}
/* Figure out CPU/FPU denormal handling capabilities */
-
+
if (cpu_info[3] & (1 << 24)) {
-
+
char** fxbuf = 0;
-
+
/* DAZ wasn't available in the first version of SSE. Since
setting a reserved bit in MXCSR causes a general protection
fault, we need to be able to check the availability of this
assert (fxbuf);
(void) posix_memalign ((void **) fxbuf, 16, 512);
assert (*fxbuf);
-#endif
-
+#endif
+
memset (*fxbuf, 0, 512);
-
+
#ifdef COMPILER_MSVC
char *buf = *fxbuf;
__asm {
: "memory"
);
#endif
-
+
uint32_t mxcsr_mask = *((uint32_t*) &((*fxbuf)[28]));
-
+
/* if the mask is zero, set its default value (from intel specs) */
-
+
if (mxcsr_mask == 0) {
mxcsr_mask = 0xffbf;
}
-
+
if (mxcsr_mask & (1<<6)) {
_flags = Flags (_flags | HasDenormalsAreZero);
- }
-
+ }
+
#if !defined HAVE_POSIX_MEMALIGN && defined PLATFORM_WINDOWS
_aligned_free (*fxbuf);
_aligned_free (fxbuf);
const int parameter_end = 0x80000004;
string cpu_brand;
-
+
if (cpu_info[0] >= parameter_end) {
char* cpu_string_ptr = cpu_string;
-
+
for (int parameter = 0x80000002; parameter <= parameter_end &&
cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
__cpuid(cpu_info, parameter);
}
cpu_brand.assign(cpu_string, cpu_string_ptr - cpu_string);
info << string_compose (_("CPU brand: %1"), cpu_brand) << endmsg;
- }
+ }
}
#endif /* !ARCH_X86 */
-}
+}
FPU::~FPU ()
{