Remove target attribute break-sse-dep. Instead, do not fold load into sse partial update instructions unless optimizing for size.

llvm-svn: 91910
This commit is contained in:
Evan Cheng 2009-12-22 17:47:23 +00:00
parent 613bf10470
commit 71d7eaa87e
7 changed files with 25 additions and 56 deletions

View File

@ -57,8 +57,6 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions">;
def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">;
def FeatureBreakSSEDep : SubtargetFeature<"break-sse-dep", "BreakSSEDep","true",
"Should break SSE partial update dep with load / xorps">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions">;
@ -88,27 +86,17 @@ def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>;
def : Proc<"pentium3", [FeatureSSE1]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
FeatureBreakSSEDep]>;
def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem,
FeatureBreakSSEDep]>;
def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem,
FeatureBreakSSEDep]>;
def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem,
FeatureBreakSSEDep]>;
def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem,
FeatureBreakSSEDep]>;
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem,
FeatureBreakSSEDep]>;
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem,
FeatureBreakSSEDep]>;
def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
FeatureBreakSSEDep]>;
def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
FeatureBreakSSEDep]>;
def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
// Sandy Bridge does not have FMA
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit,
FeatureBreakSSEDep]>;
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;

View File

@ -2370,7 +2370,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Check switch flag
if (NoFusing) return NULL;
if (TM.getSubtarget<X86Subtarget>().shouldBreakSSEDep())
if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
switch (MI->getOpcode()) {
case X86::CVTSD2SSrr:
case X86::Int_CVTSD2SSrr:
@ -2422,7 +2422,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Check switch flag
if (NoFusing) return NULL;
if (TM.getSubtarget<X86Subtarget>().shouldBreakSSEDep())
if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
switch (MI->getOpcode()) {
case X86::CVTSD2SSrr:
case X86::Int_CVTSD2SSrr:

View File

@ -298,11 +298,10 @@ def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
"TM.getCodeModel() == CodeModel::Kernel">;
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
def OptForSize : Predicate<"OptForSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
def SSEBreakDep : Predicate<"Subtarget->shouldBreakSSEDep() && !OptForSize">;
def NoSSEBreakDep: Predicate<"!Subtarget->shouldBreakSSEDep() || OptForSize">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.

View File

@ -827,7 +827,7 @@ multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
Requires<[HasSSE1, NoSSEBreakDep]>;
Requires<[HasSSE1, OptForSize]>;
// Vector operation, reg.
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@ -1120,7 +1120,7 @@ def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
Requires<[HasSSE2, NoSSEBreakDep]>;
Requires<[HasSSE2, OptForSize]>;
def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
"cvtsi2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (sint_to_fp GR32:$src))]>;
@ -1157,10 +1157,10 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
Requires<[HasSSE2, NoSSEBreakDep]>;
Requires<[HasSSE2, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
(CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[SSEBreakDep]>;
(CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
// Match intrinsics which expect XMM operand(s).
def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
@ -3232,7 +3232,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
[(set VR128:$dst,
(V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
TA, OpSize,
Requires<[HasSSE41, NoSSEBreakDep]>;
Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,

View File

@ -266,7 +266,6 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned Model = 0;
DetectFamilyModel(EAX, Family, Model);
IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
BreakSSEDep = IsIntel;
GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1;
@ -287,7 +286,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, HasFMA3(false)
, HasFMA4(false)
, IsBTMemSlow(false)
, BreakSSEDep(false)
, DarwinVers(0)
, stackAlignment(8)
// FIXME: this is a known good value for Yonah. How about others?

View File

@ -78,14 +78,6 @@ protected:
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
/// BreakSSEDep - True if codegen should unfold load or insert xorps / pxor
/// to break register dependency for a partial register update SSE
/// instruction. This is needed for instructions such as CVTSS2SD which
/// only update the lower part of the register, and the result of the updated
/// part does not depend on the contents of the destination before the
/// instruction, and the non-updated portion of the register is not used.
bool BreakSSEDep;
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
unsigned char DarwinVers; // Is any darwin-x86 platform.
@ -150,7 +142,6 @@ public:
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool shouldBreakSSEDep() const { return BreakSSEDep; }
bool isTargetDarwin() const { return TargetType == isDarwin; }
bool isTargetELF() const { return TargetType == isELF; }

View File

@ -1,27 +1,20 @@
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+break-sse-dep | FileCheck %s --check-prefix=YES
; RUN: llc < %s -march=x86-64 -mattr=+sse2,-break-sse-dep | FileCheck %s --check-prefix=NO
; RUN: llc < %s -march=x86-64 -mattr=+sse2 | FileCheck %s
define double @t1(float* nocapture %x) nounwind readonly ssp {
entry:
; YES: t1:
; YES: movss (%rdi), %xmm0
; YES; cvtss2sd %xmm0, %xmm0
; CHECK: t1:
; CHECK: movss (%rdi), %xmm0
; CHECK; cvtss2sd %xmm0, %xmm0
; NO: t1:
; NO; cvtss2sd (%rdi), %xmm0
%0 = load float* %x, align 4
%1 = fpext float %0 to double
ret double %1
}
define float @t2(double* nocapture %x) nounwind readonly ssp {
define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
entry:
; YES: t2:
; YES: movsd (%rdi), %xmm0
; YES; cvtsd2ss %xmm0, %xmm0
; NO: t2:
; NO; cvtsd2ss (%rdi), %xmm0
; CHECK: t2:
; CHECK; cvtsd2ss (%rdi), %xmm0
%0 = load double* %x, align 8
%1 = fptrunc double %0 to float
ret float %1