Do not isel load folding bt instructions for pentium m, core, core2, and AMD processors. These are significantly slower than a load followed by a bt of a register.

llvm-svn: 61557
This commit is contained in:
Evan Cheng 2009-01-02 05:35:45 +00:00
parent 13f3a33f44
commit 4c91aa3418
5 changed files with 53 additions and 29 deletions

View File

@ -48,6 +48,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions",
[FeatureSSE2]>;
def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@ -66,27 +68,27 @@ def : Proc<"i686", []>;
def : Proc<"pentiumpro", []>;
def : Proc<"pentium2", [FeatureMMX]>;
def : Proc<"pentium3", [FeatureSSE1]>;
def : Proc<"pentium-m", [FeatureSSE2]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
def : Proc<"x86-64", [Feature64Bit]>;
def : Proc<"yonah", [FeatureSSE3]>;
def : Proc<"prescott", [FeatureSSE3]>;
def : Proc<"nocona", [FeatureSSE3, Feature64Bit]>;
def : Proc<"core2", [FeatureSSSE3, Feature64Bit]>;
def : Proc<"penryn", [FeatureSSE41, Feature64Bit]>;
def : Proc<"x86-64", [Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
def : Proc<"athlon", [FeatureMMX, Feature3DNowA]>;
def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA]>;
def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA]>;
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA]>;
def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA]>;
def : Proc<"k8", [Feature3DNowA, Feature64Bit]>;
def : Proc<"opteron", [Feature3DNowA, Feature64Bit]>;
def : Proc<"athlon64", [Feature3DNowA, Feature64Bit]>;
def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit]>;
def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"k8", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"opteron", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"athlon64", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;

View File

@ -222,6 +222,7 @@ def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@ -2666,11 +2667,11 @@ def BT32rr : I<0xA3, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(X86bt (loadi16 addr:$src1), GR16:$src2),
(implicit EFLAGS)]>, OpSize, TB;
(implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>;
def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(X86bt (loadi32 addr:$src1), GR32:$src2),
(implicit EFLAGS)]>, TB;
(implicit EFLAGS)]>, TB, Requires<[FastBTMem]>;
} // Defs = [EFLAGS]
// Sign/Zero extenders

View File

@ -149,6 +149,18 @@ bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
return true;
}
static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
Family = (EAX >> 8) & 0xf; // Bits 8 - 11
Model = (EAX >> 4) & 0xf; // Bits 4 - 7
if (Family == 6 || Family == 0xf) {
if (Family == 0xf)
// Examine extended family ID if family ID is F.
Family += (EAX >> 20) & 0xff; // Bits 20 - 27
// Examine extended model ID if family ID is 6 or F.
Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
}
}
void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
union {
@ -169,8 +181,15 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((ECX >> 19) & 0x1) X86SSELevel = SSE41;
if ((ECX >> 20) & 0x1) X86SSELevel = SSE42;
if (memcmp(text.c, "GenuineIntel", 12) == 0 ||
memcmp(text.c, "AuthenticAMD", 12) == 0) {
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
if (IsIntel || IsAMD) {
// Determine if bit test memory instructions are slow.
unsigned Family = 0;
unsigned Model = 0;
DetectFamilyModel(EAX, Family, Model);
IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1;
}
@ -180,15 +199,9 @@ static const char *GetCurrentX86CPU() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
return "generic";
unsigned Family = (EAX >> 8) & 0xf; // Bits 8 - 11
unsigned Model = (EAX >> 4) & 0xf; // Bits 4 - 7
if (Family == 6 || Family == 0xf) {
if (Family == 0xf)
// Examine extended family ID if family ID is F.
Family += (EAX >> 20) & 0xff; // Bits 20 - 27
// Examine extended model ID if family ID is 6 or F.
Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
}
unsigned Family = 0;
unsigned Model = 0;
DetectFamilyModel(EAX, Family, Model);
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
bool Em64T = (EDX >> 29) & 0x1;
@ -285,6 +298,7 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
, X86SSELevel(NoMMXSSE)
, X863DNowLevel(NoThreeDNow)
, HasX86_64(false)
, IsBTMemSlow(false)
, DarwinVers(0)
, IsLinux(false)
, stackAlignment(8)

View File

@ -65,6 +65,9 @@ protected:
///
bool HasX86_64;
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
unsigned char DarwinVers; // Is any darwin-x86 platform.
@ -127,6 +130,8 @@ public:
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool isBTMemSlow() const { return IsBTMemSlow; }
unsigned getAsmFlavor() const {
return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
}

View File

@ -1,4 +1,6 @@
; RUN: llvm-as < %s | llc | grep btl
; RUN: llvm-as < %s | llc -mcpu=pentium4 | grep btl | grep esp
; RUN: llvm-as < %s | llc -mcpu=penryn | grep btl | not grep esp
; PR3253
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin8"