forked from OSchip/llvm-project
Do not isel load folding bt instructions for pentium m, core, core2, and AMD processors. These are significantly slower than a load followed by a bt of a register.
llvm-svn: 61557
This commit is contained in:
parent
13f3a33f44
commit
4c91aa3418
|
@ -48,6 +48,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
|
|||
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
|
||||
"Support 64-bit instructions",
|
||||
[FeatureSSE2]>;
|
||||
def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
|
||||
"Bit testing of memory is slow">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 processors supported.
|
||||
|
@ -66,27 +68,27 @@ def : Proc<"i686", []>;
|
|||
def : Proc<"pentiumpro", []>;
|
||||
def : Proc<"pentium2", [FeatureMMX]>;
|
||||
def : Proc<"pentium3", [FeatureSSE1]>;
|
||||
def : Proc<"pentium-m", [FeatureSSE2]>;
|
||||
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
|
||||
def : Proc<"pentium4", [FeatureSSE2]>;
|
||||
def : Proc<"x86-64", [Feature64Bit]>;
|
||||
def : Proc<"yonah", [FeatureSSE3]>;
|
||||
def : Proc<"prescott", [FeatureSSE3]>;
|
||||
def : Proc<"nocona", [FeatureSSE3, Feature64Bit]>;
|
||||
def : Proc<"core2", [FeatureSSSE3, Feature64Bit]>;
|
||||
def : Proc<"penryn", [FeatureSSE41, Feature64Bit]>;
|
||||
def : Proc<"x86-64", [Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
|
||||
def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
|
||||
def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
|
||||
|
||||
def : Proc<"k6", [FeatureMMX]>;
|
||||
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
|
||||
def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
|
||||
def : Proc<"athlon", [FeatureMMX, Feature3DNowA]>;
|
||||
def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA]>;
|
||||
def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA]>;
|
||||
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA]>;
|
||||
def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA]>;
|
||||
def : Proc<"k8", [Feature3DNowA, Feature64Bit]>;
|
||||
def : Proc<"opteron", [Feature3DNowA, Feature64Bit]>;
|
||||
def : Proc<"athlon64", [Feature3DNowA, Feature64Bit]>;
|
||||
def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit]>;
|
||||
def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
|
||||
def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
|
||||
def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
|
||||
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
|
||||
def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
|
||||
def : Proc<"k8", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"opteron", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"athlon64", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
|
||||
|
||||
def : Proc<"winchip-c6", [FeatureMMX]>;
|
||||
def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
|
||||
|
|
|
@ -222,6 +222,7 @@ def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
|
|||
def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
|
||||
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
|
||||
def OptForSpeed : Predicate<"!OptForSize">;
|
||||
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Format Definitions.
|
||||
|
@ -2666,11 +2667,11 @@ def BT32rr : I<0xA3, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
|
|||
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
|
||||
"bt{w}\t{$src2, $src1|$src1, $src2}",
|
||||
[(X86bt (loadi16 addr:$src1), GR16:$src2),
|
||||
(implicit EFLAGS)]>, OpSize, TB;
|
||||
(implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>;
|
||||
def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
|
||||
"bt{l}\t{$src2, $src1|$src1, $src2}",
|
||||
[(X86bt (loadi32 addr:$src1), GR32:$src2),
|
||||
(implicit EFLAGS)]>, TB;
|
||||
(implicit EFLAGS)]>, TB, Requires<[FastBTMem]>;
|
||||
} // Defs = [EFLAGS]
|
||||
|
||||
// Sign/Zero extenders
|
||||
|
|
|
@ -149,6 +149,18 @@ bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
|
||||
Family = (EAX >> 8) & 0xf; // Bits 8 - 11
|
||||
Model = (EAX >> 4) & 0xf; // Bits 4 - 7
|
||||
if (Family == 6 || Family == 0xf) {
|
||||
if (Family == 0xf)
|
||||
// Examine extended family ID if family ID is F.
|
||||
Family += (EAX >> 20) & 0xff; // Bits 20 - 27
|
||||
// Examine extended model ID if family ID is 6 or F.
|
||||
Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
|
||||
}
|
||||
}
|
||||
|
||||
void X86Subtarget::AutoDetectSubtargetFeatures() {
|
||||
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
|
||||
union {
|
||||
|
@ -169,8 +181,15 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
|
|||
if ((ECX >> 19) & 0x1) X86SSELevel = SSE41;
|
||||
if ((ECX >> 20) & 0x1) X86SSELevel = SSE42;
|
||||
|
||||
if (memcmp(text.c, "GenuineIntel", 12) == 0 ||
|
||||
memcmp(text.c, "AuthenticAMD", 12) == 0) {
|
||||
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
|
||||
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
|
||||
if (IsIntel || IsAMD) {
|
||||
// Determine if bit test memory instructions are slow.
|
||||
unsigned Family = 0;
|
||||
unsigned Model = 0;
|
||||
DetectFamilyModel(EAX, Family, Model);
|
||||
IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
|
||||
|
||||
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
|
||||
HasX86_64 = (EDX >> 29) & 0x1;
|
||||
}
|
||||
|
@ -180,15 +199,9 @@ static const char *GetCurrentX86CPU() {
|
|||
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
|
||||
if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
|
||||
return "generic";
|
||||
unsigned Family = (EAX >> 8) & 0xf; // Bits 8 - 11
|
||||
unsigned Model = (EAX >> 4) & 0xf; // Bits 4 - 7
|
||||
if (Family == 6 || Family == 0xf) {
|
||||
if (Family == 0xf)
|
||||
// Examine extended family ID if family ID is F.
|
||||
Family += (EAX >> 20) & 0xff; // Bits 20 - 27
|
||||
// Examine extended model ID if family ID is 6 or F.
|
||||
Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
|
||||
}
|
||||
unsigned Family = 0;
|
||||
unsigned Model = 0;
|
||||
DetectFamilyModel(EAX, Family, Model);
|
||||
|
||||
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
|
||||
bool Em64T = (EDX >> 29) & 0x1;
|
||||
|
@ -285,6 +298,7 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
|
|||
, X86SSELevel(NoMMXSSE)
|
||||
, X863DNowLevel(NoThreeDNow)
|
||||
, HasX86_64(false)
|
||||
, IsBTMemSlow(false)
|
||||
, DarwinVers(0)
|
||||
, IsLinux(false)
|
||||
, stackAlignment(8)
|
||||
|
|
|
@ -64,6 +64,9 @@ protected:
|
|||
/// HasX86_64 - True if the processor supports X86-64 instructions.
|
||||
///
|
||||
bool HasX86_64;
|
||||
|
||||
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
|
||||
bool IsBTMemSlow;
|
||||
|
||||
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
|
||||
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
|
||||
|
@ -127,6 +130,8 @@ public:
|
|||
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
|
||||
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
||||
|
||||
bool isBTMemSlow() const { return IsBTMemSlow; }
|
||||
|
||||
unsigned getAsmFlavor() const {
|
||||
return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: llvm-as < %s | llc | grep btl
|
||||
; RUN: llvm-as < %s | llc -mcpu=pentium4 | grep btl | grep esp
|
||||
; RUN: llvm-as < %s | llc -mcpu=penryn | grep btl | not grep esp
|
||||
; PR3253
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||
target triple = "i386-apple-darwin8"
|
||||
|
|
Loading…
Reference in New Issue