forked from OSchip/llvm-project
X86: enable AVX2 under Haswell native compilation
Patch by Adam Strzelecki llvm-svn: 195632
This commit is contained in:
parent
ce42084402
commit
89ccb616bd
|
@ -95,6 +95,75 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
|
|||
#endif
|
||||
}
|
||||
|
||||
/// GetX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the
|
||||
/// 4 values in the specified arguments. If we can't run cpuid on the host,
|
||||
/// return true.
|
||||
bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
|
||||
unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
|
||||
#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
|
||||
#if defined(__GNUC__)
|
||||
// gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
|
||||
asm ("movq\t%%rbx, %%rsi\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchgq\t%%rbx, %%rsi\n\t"
|
||||
: "=a" (*rEAX),
|
||||
"=S" (*rEBX),
|
||||
"=c" (*rECX),
|
||||
"=d" (*rEDX)
|
||||
: "a" (value),
|
||||
"c" (subleaf));
|
||||
return false;
|
||||
#elif defined(_MSC_VER)
|
||||
// __cpuidex was added in MSVC++ 9.0 SP1
|
||||
#if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729)
|
||||
int registers[4];
|
||||
__cpuidex(registers, value, subleaf);
|
||||
*rEAX = registers[0];
|
||||
*rEBX = registers[1];
|
||||
*rECX = registers[2];
|
||||
*rEDX = registers[3];
|
||||
return false;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
|
||||
#if defined(__GNUC__)
|
||||
asm ("movl\t%%ebx, %%esi\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchgl\t%%ebx, %%esi\n\t"
|
||||
: "=a" (*rEAX),
|
||||
"=S" (*rEBX),
|
||||
"=c" (*rECX),
|
||||
"=d" (*rEDX)
|
||||
: "a" (value),
|
||||
"c" (subleaf));
|
||||
return false;
|
||||
#elif defined(_MSC_VER)
|
||||
__asm {
|
||||
mov eax,value
|
||||
mov ecx,subleaf
|
||||
cpuid
|
||||
mov esi,rEAX
|
||||
mov dword ptr [esi],eax
|
||||
mov esi,rEBX
|
||||
mov dword ptr [esi],ebx
|
||||
mov esi,rECX
|
||||
mov dword ptr [esi],ecx
|
||||
mov esi,rEDX
|
||||
mov dword ptr [esi],edx
|
||||
}
|
||||
return false;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool OSHasAVXSupport() {
|
||||
#if defined(__GNUC__)
|
||||
// Check xgetbv; this uses a .byte sequence instead of the instruction
|
||||
|
@ -131,6 +200,14 @@ std::string sys::getHostCPUName() {
|
|||
unsigned Model = 0;
|
||||
DetectX86FamilyModel(EAX, Family, Model);
|
||||
|
||||
union {
|
||||
unsigned u[3];
|
||||
char c[12];
|
||||
} text;
|
||||
|
||||
GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
|
||||
|
||||
unsigned MaxLeaf = EAX;
|
||||
bool HasSSE3 = (ECX & 0x1);
|
||||
bool HasSSE41 = (ECX & 0x80000);
|
||||
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
|
||||
|
@ -138,15 +215,12 @@ std::string sys::getHostCPUName() {
|
|||
// switch, then we have full AVX support.
|
||||
const unsigned AVXBits = (1 << 27) | (1 << 28);
|
||||
bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
|
||||
bool HasAVX2 = HasAVX && MaxLeaf >= 0x7 &&
|
||||
!GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX) &&
|
||||
(EBX & 0x20);
|
||||
GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
|
||||
bool Em64T = (EDX >> 29) & 0x1;
|
||||
|
||||
union {
|
||||
unsigned u[3];
|
||||
char c[12];
|
||||
} text;
|
||||
|
||||
GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
|
||||
if (memcmp(text.c, "GenuineIntel", 12) == 0) {
|
||||
switch (Family) {
|
||||
case 3:
|
||||
|
@ -254,10 +328,20 @@ std::string sys::getHostCPUName() {
|
|||
|
||||
// Ivy Bridge:
|
||||
case 58:
|
||||
case 62: // Ivy Bridge EP
|
||||
// Not all Ivy Bridge processors support AVX (such as the Pentium
|
||||
// versions instead of the i7 versions).
|
||||
return HasAVX ? "core-avx-i" : "corei7";
|
||||
|
||||
// Haswell:
|
||||
case 60:
|
||||
case 63:
|
||||
case 69:
|
||||
case 70:
|
||||
// Not all Haswell processors support AVX too (such as the Pentium
|
||||
// versions instead of the i7 versions).
|
||||
return HasAVX2 ? "core-avx2" : "corei7";
|
||||
|
||||
case 28: // Most 45 nm Intel Atom processors
|
||||
case 38: // 45 nm Atom Lincroft
|
||||
case 39: // 32 nm Atom Medfield
|
||||
|
|
|
@ -285,7 +285,12 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
|
|||
(Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
|
||||
(Family == 6 && Model == 0x2A) || // SandyBridge
|
||||
(Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
|
||||
(Family == 6 && Model == 0x3A))) {// IvyBridge
|
||||
(Family == 6 && Model == 0x3A) || // IvyBridge
|
||||
(Family == 6 && Model == 0x3E) || // IvyBridge EP
|
||||
(Family == 6 && Model == 0x3C) || // Haswell
|
||||
(Family == 6 && Model == 0x3F) || // ...
|
||||
(Family == 6 && Model == 0x45) || // ...
|
||||
(Family == 6 && Model == 0x46))) { // ...
|
||||
IsUAMemFast = true;
|
||||
ToggleFeature(X86::FeatureFastUAMem);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue