forked from OSchip/llvm-project
Add feature flags for AVX and FMA and fix some SSE4A feature flag
initialization problems. llvm-svn: 74350
This commit is contained in:
parent
3aaa751712
commit
8f6f72cc99
|
@ -55,6 +55,13 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
|
|||
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
|
||||
"Support SSE 4a instructions">;
|
||||
|
||||
def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
|
||||
"Enable AVX instructions">;
|
||||
def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
|
||||
"Enable three-operand fused multiple-add">;
|
||||
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
|
||||
"Enable four-operand fused multiple-add">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 processors supported.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -82,6 +89,9 @@ def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
|||
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
|
||||
// Sandy Bridge does not have FMA
|
||||
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
|
||||
|
||||
def : Proc<"k6", [FeatureMMX]>;
|
||||
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
|
||||
|
|
|
@ -236,6 +236,10 @@ def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
|
|||
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
|
||||
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
|
||||
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
|
||||
def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
|
||||
def HasAVX : Predicate<"Subtarget->hasAVX()">;
|
||||
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
|
||||
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
|
||||
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
|
||||
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
|
||||
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
|
||||
|
|
|
@ -207,6 +207,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
|
|||
|
||||
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
|
||||
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
|
||||
|
||||
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
|
||||
HasAVX = ((ECX >> 28) & 0x1);
|
||||
|
||||
if (IsIntel || IsAMD) {
|
||||
// Determine if bit test memory instructions are slow.
|
||||
unsigned Family = 0;
|
||||
|
@ -217,6 +221,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
|
|||
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
|
||||
HasX86_64 = (EDX >> 29) & 0x1;
|
||||
HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
|
||||
HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -342,6 +347,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
|
|||
, X86SSELevel(NoMMXSSE)
|
||||
, X863DNowLevel(NoThreeDNow)
|
||||
, HasX86_64(false)
|
||||
, HasSSE4A(false)
|
||||
, HasAVX(false)
|
||||
, HasFMA3(false)
|
||||
, HasFMA4(false)
|
||||
, IsBTMemSlow(false)
|
||||
, DarwinVers(0)
|
||||
, IsLinux(false)
|
||||
|
|
|
@ -64,12 +64,21 @@ protected:
|
|||
///
|
||||
bool HasX86_64;
|
||||
|
||||
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
|
||||
bool IsBTMemSlow;
|
||||
|
||||
/// HasSSE4A - True if the processor supports SSE4A instructions.
|
||||
bool HasSSE4A;
|
||||
|
||||
/// HasAVX - Target has AVX instructions
|
||||
bool HasAVX;
|
||||
|
||||
/// HasFMA3 - Target has 3-operand fused multiply-add
|
||||
bool HasFMA3;
|
||||
|
||||
/// HasFMA4 - Target has 4-operand fused multiply-add
|
||||
bool HasFMA4;
|
||||
|
||||
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
|
||||
bool IsBTMemSlow;
|
||||
|
||||
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
|
||||
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
|
||||
unsigned char DarwinVers; // Is any darwin-x86 platform.
|
||||
|
@ -133,6 +142,9 @@ public:
|
|||
bool hasSSE4A() const { return HasSSE4A; }
|
||||
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
|
||||
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
||||
bool hasAVX() const { return hasAVX(); }
|
||||
bool hasFMA3() const { return HasFMA3; }
|
||||
bool hasFMA4() const { return HasFMA4; }
|
||||
|
||||
bool isBTMemSlow() const { return IsBTMemSlow; }
|
||||
|
||||
|
|
Loading…
Reference in New Issue