/*
- Copyright (C) 2012 Paul Davis
+ Copyright (C) 2012 Paul Davis
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "pbd/fpu.h"
#include "pbd/error.h"
-#include "i18n.h"
+#include "pbd/i18n.h"
using namespace PBD;
using namespace std;
static void
__cpuid(int regs[4], int cpuid_leaf)
{
- int eax, ebx, ecx, edx;
asm volatile (
#if defined(__i386__)
"pushl %%ebx;\n\t"
#endif
- "movl %4, %%eax;\n\t"
"cpuid;\n\t"
- "movl %%eax, %0;\n\t"
- "movl %%ebx, %1;\n\t"
- "movl %%ecx, %2;\n\t"
- "movl %%edx, %3;\n\t"
+ "movl %%eax, (%1);\n\t"
+ "movl %%ebx, 4(%1);\n\t"
+ "movl %%ecx, 8(%1);\n\t"
+ "movl %%edx, 12(%1);\n\t"
#if defined(__i386__)
"popl %%ebx;\n\t"
#endif
- :"=m" (eax), "=m" (ebx), "=m" (ecx), "=m" (edx)
- :"r" (cpuid_leaf)
- :"%eax",
+ :"=a" (cpuid_leaf) /* %eax clobbered by CPUID */
+ :"S" (regs), "a" (cpuid_leaf)
+ :
#if !defined(__i386__)
"%ebx",
#endif
- "%ecx", "%edx");
-
- regs[0] = eax;
- regs[1] = ebx;
- regs[2] = ecx;
- regs[3] = edx;
+ "%ecx", "%edx", "memory");
}
#endif /* !PLATFORM_WINDOWS */
-#ifndef COMPILER_MSVC
-
-static uint64_t
-_xgetbv (uint32_t xcr)
-{
-#ifdef __APPLE__
- /* it would be nice to make this work on OS X but as long we use veclib,
- we don't really need to know about SSE/AVX on that platform.
- */
- return 0;
-#else
- uint32_t eax, edx;
- __asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr));
- return (static_cast<uint64_t>(edx) << 32) | eax;
+#ifndef HAVE_XGETBV // Allow definition by build system
+ #if defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR >= 5
+ #define HAVE_XGETBV
+ #elif defined(_MSC_VER) && _MSC_VER >= 1600
+ // '_xgetbv()' was only available from VC10 onwards
+ #define HAVE_XGETBV
+ #endif
#endif
-}
-#elif _MSC_VER < 1600
+#ifndef HAVE_XGETBV
+
+#ifdef COMPILER_MSVC
// '_xgetbv()' was only available from VC10 onwards
__declspec(noinline) static uint64_t
// N.B. The following would probably work for a pre-VC10 build,
// although it might suffer from optimization issues. We'd need
// to place this function into its own (unoptimized) source file.
- __asm {
+ __asm {
mov ecx, [xcr]
- __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 /*xgetbv*/
- }
+ __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 /*xgetbv*/
+ }
+}
+
+#else
+
+static uint64_t
+_xgetbv (uint32_t xcr)
+{
+#ifdef __APPLE__
+ /* it would be nice to make this work on OS X but as long we use veclib,
+ we don't really need to know about SSE/AVX on that platform.
+ */
+ return 0;
+#else
+ uint32_t eax, edx;
+ __asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr));
+ return (static_cast<uint64_t>(edx) << 32) | eax;
+#endif
}
#endif /* !COMPILER_MSVC */
+#endif /* !HAVE_XGETBV */
#endif /* ARCH_X86 */
#ifndef _XCR_XFEATURE_ENABLED_MASK
return _instance;
}
+void
+FPU::destroy ()
+{
+ delete _instance;
+ _instance = 0;
+}
+
FPU::FPU ()
: _flags ((Flags) 0)
{
error << _("FPU object instantiated more than once") << endmsg;
}
+ if (getenv("ARDOUR_FPU_FLAGS")) {
+ _flags = Flags (atoi (getenv("ARDOUR_FPU_FLAGS")));
+ return;
+ }
+
#if !( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // !ARCH_X86
/* Non-Intel architecture, nothing to do here */
return;
info << string_compose (_("CPU vendor: %1"), cpu_vendor) << endmsg;
if (num_ids > 0) {
-
+
/* Now get CPU/FPU flags */
-
+
__cpuid (cpu_info, 1);
if ((cpu_info[2] & (1<<27)) /* OSXSAVE */ &&
}
/* Figure out CPU/FPU denormal handling capabilities */
-
+
if (cpu_info[3] & (1 << 24)) {
-
+
char** fxbuf = 0;
-
+
/* DAZ wasn't available in the first version of SSE. Since
setting a reserved bit in MXCSR causes a general protection
fault, we need to be able to check the availability of this
assert (fxbuf);
(void) posix_memalign ((void **) fxbuf, 16, 512);
assert (*fxbuf);
-#endif
-
+#endif
+
memset (*fxbuf, 0, 512);
-
+
#ifdef COMPILER_MSVC
char *buf = *fxbuf;
__asm {
: "memory"
);
#endif
-
+
uint32_t mxcsr_mask = *((uint32_t*) &((*fxbuf)[28]));
-
+
/* if the mask is zero, set its default value (from intel specs) */
-
+
if (mxcsr_mask == 0) {
mxcsr_mask = 0xffbf;
}
-
+
if (mxcsr_mask & (1<<6)) {
_flags = Flags (_flags | HasDenormalsAreZero);
- }
-
+ }
+
#if !defined HAVE_POSIX_MEMALIGN && defined PLATFORM_WINDOWS
_aligned_free (*fxbuf);
_aligned_free (fxbuf);
const int parameter_end = 0x80000004;
string cpu_brand;
-
+
if (cpu_info[0] >= parameter_end) {
char* cpu_string_ptr = cpu_string;
-
+
for (int parameter = 0x80000002; parameter <= parameter_end &&
cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
__cpuid(cpu_info, parameter);
}
cpu_brand.assign(cpu_string, cpu_string_ptr - cpu_string);
info << string_compose (_("CPU brand: %1"), cpu_brand) << endmsg;
- }
+ }
}
#endif /* !ARCH_X86 */
-}
+}
FPU::~FPU ()
{