forked from OSchip/llvm-project
[X86] Add prefetchwt1 instruction and overhaul priorities and isel enabling for prefetch instructions.
Previously prefetch was only considered legal if sse was enabled, but it should be supported with 3dnow as well. The prfchw flag now imply at least some form of prefetch without the write hint is available, either the sse or 3dnow version. This is true even if 3dnow and sse are explicitly disabled. Similarly prefetchwt1 feature implies availability of prefetchw and the the prefetcht0/1/2/nta instructions. This way we can support _MM_HINT_ET0 using prefetchw and _MM_HINT_ET1 with prefetchwt1. And its assumed that if we have levels for the write hint we would have levels for the non-write hint, thus why we enable the sse prefetch instructions. I believe this behavior is consistent with gcc. I've updated the prefetch.ll to test all of these combinations. llvm-svn: 321335
This commit is contained in:
parent
9befe89367
commit
e268598dd3
|
@ -137,7 +137,7 @@ def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
|
||||||
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
|
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
|
||||||
"Enable AVX-512 PreFetch Instructions",
|
"Enable AVX-512 PreFetch Instructions",
|
||||||
[FeatureAVX512]>;
|
[FeatureAVX512]>;
|
||||||
def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1",
|
def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
|
||||||
"true",
|
"true",
|
||||||
"Prefetch with Intent to Write and T1 Hint">;
|
"Prefetch with Intent to Write and T1 Hint">;
|
||||||
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
|
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
|
||||||
|
|
|
@ -461,7 +461,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||||
setOperationAction(ISD::SRL_PARTS, VT, Custom);
|
setOperationAction(ISD::SRL_PARTS, VT, Custom);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Subtarget.hasSSE1())
|
if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
|
||||||
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
|
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
|
||||||
|
|
||||||
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
|
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
|
||||||
|
|
|
@ -116,14 +116,30 @@ defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", I3DNOW_MISC_FUNC_ITINS, 1>;
|
||||||
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
|
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
|
||||||
[(int_x86_mmx_femms)], IIC_MMX_EMMS>;
|
[(int_x86_mmx_femms)], IIC_MMX_EMMS>;
|
||||||
|
|
||||||
|
// PREFETCHWT1 is supported we want to use it for everything but T0.
|
||||||
|
def PrefetchWLevel : PatFrag<(ops), (i32 imm), [{
|
||||||
|
return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
// Use PREFETCHWT1 for NTA, T2, T1.
|
||||||
|
def PrefetchWT1Level : ImmLeaf<i32, [{
|
||||||
|
return Imm < 3;
|
||||||
|
}]>;
|
||||||
|
|
||||||
let SchedRW = [WriteLoad] in {
|
let SchedRW = [WriteLoad] in {
|
||||||
|
let Predicates = [Has3DNow, NoSSEPrefetch] in
|
||||||
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
|
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
|
||||||
"prefetch\t$addr",
|
"prefetch\t$addr",
|
||||||
[(prefetch addr:$addr, (i32 0), imm, (i32 1))],
|
[(prefetch addr:$addr, imm, imm, (i32 1))],
|
||||||
IIC_SSE_PREFETCH>;
|
IIC_SSE_PREFETCH>;
|
||||||
|
|
||||||
def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
|
def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
|
||||||
[(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))],
|
[(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))],
|
||||||
IIC_SSE_PREFETCH>, TB, Requires<[HasPrefetchW]>;
|
IIC_SSE_PREFETCH>, TB, Requires<[HasPrefetchW]>;
|
||||||
|
|
||||||
|
def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr",
|
||||||
|
[(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))],
|
||||||
|
IIC_SSE_PREFETCH>, TB, Requires<[HasPREFETCHWT1]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// "3DNowA" instructions
|
// "3DNowA" instructions
|
||||||
|
|
|
@ -874,7 +874,10 @@ def HasADX : Predicate<"Subtarget->hasADX()">;
|
||||||
def HasSHA : Predicate<"Subtarget->hasSHA()">;
|
def HasSHA : Predicate<"Subtarget->hasSHA()">;
|
||||||
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
|
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
|
||||||
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
|
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
|
||||||
|
def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
|
||||||
|
def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
|
||||||
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
|
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
|
||||||
|
def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
|
||||||
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
|
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
|
||||||
def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
|
def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
|
||||||
def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">;
|
def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">;
|
||||||
|
|
|
@ -3487,7 +3487,7 @@ let Predicates = [UseSSE2] in {
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
// Prefetch intrinsic.
|
// Prefetch intrinsic.
|
||||||
let Predicates = [HasSSE1], SchedRW = [WriteLoad] in {
|
let Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in {
|
||||||
def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
|
def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
|
||||||
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
|
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
|
||||||
IIC_SSE_PREFETCH>, TB;
|
IIC_SSE_PREFETCH>, TB;
|
||||||
|
|
|
@ -201,7 +201,7 @@ protected:
|
||||||
bool HasCLZERO;
|
bool HasCLZERO;
|
||||||
|
|
||||||
/// Processor has Prefetch with intent to Write instruction
|
/// Processor has Prefetch with intent to Write instruction
|
||||||
bool HasPFPREFETCHWT1;
|
bool HasPREFETCHWT1;
|
||||||
|
|
||||||
/// True if SHLD instructions are slow.
|
/// True if SHLD instructions are slow.
|
||||||
bool IsSHLDSlow;
|
bool IsSHLDSlow;
|
||||||
|
@ -517,7 +517,14 @@ public:
|
||||||
bool hasRTM() const { return HasRTM; }
|
bool hasRTM() const { return HasRTM; }
|
||||||
bool hasADX() const { return HasADX; }
|
bool hasADX() const { return HasADX; }
|
||||||
bool hasSHA() const { return HasSHA; }
|
bool hasSHA() const { return HasSHA; }
|
||||||
bool hasPRFCHW() const { return HasPRFCHW; }
|
bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }
|
||||||
|
bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
|
||||||
|
bool hasSSEPrefetch() const {
|
||||||
|
// We implicitly enable these when we have a write prefix supporting cache
|
||||||
|
// level OR if we have prfchw, but don't already have a read prefetch from
|
||||||
|
// 3dnow.
|
||||||
|
return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
|
||||||
|
}
|
||||||
bool hasRDSEED() const { return HasRDSEED; }
|
bool hasRDSEED() const { return HasRDSEED; }
|
||||||
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
|
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
|
||||||
bool hasMWAITX() const { return HasMWAITX; }
|
bool hasMWAITX() const { return HasMWAITX; }
|
||||||
|
|
|
@ -1,26 +1,100 @@
|
||||||
; RUN: llc < %s -mtriple=i686-- -mattr=+sse | FileCheck %s
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s
|
; RUN: llc < %s -mtriple=i686-- -mattr=+sse | FileCheck %s --check-prefix=SSE
|
||||||
; RUN: llc < %s -mtriple=i686-- -mattr=+sse -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW
|
; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefix=SSE
|
||||||
; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=SLM
|
; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prfchw | FileCheck %s -check-prefix=PRFCHWSSE
|
||||||
; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHW
|
; RUN: llc < %s -mtriple=i686-- -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHWSSE
|
||||||
; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=NOPRFCHW
|
; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=PRFCHWSSE
|
||||||
|
; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHWSSE
|
||||||
|
; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=SSE
|
||||||
|
; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
|
||||||
|
; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
|
||||||
|
; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
|
||||||
|
; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=3DNOW
|
||||||
|
; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=PRFCHW3DNOW
|
||||||
|
|
||||||
|
; Rules:
|
||||||
|
; 3dnow by itself get you just the single prefetch instruction with no hints
|
||||||
|
; sse provides prefetch0/1/2/nta
|
||||||
|
; supporting prefetchw, but not 3dnow implicitly provides prefetcht0/1/2/nta regardless of sse setting as we need something to fall back to for the non-write hint.
|
||||||
|
; supporting prefetchwt1 implies prefetcht0/1/2/nta and prefetchw regardless of other settings. this allows levels for non-write and gives us an instruction for write+T0
|
||||||
|
; 3dnow prefetch instruction will only get used if you have no other prefetch instructions enabled
|
||||||
|
|
||||||
; rdar://10538297
|
; rdar://10538297
|
||||||
|
|
||||||
define void @t(i8* %ptr) nounwind {
|
define void @t(i8* %ptr) nounwind {
|
||||||
|
; SSE-LABEL: t:
|
||||||
|
; SSE: # %bb.0: # %entry
|
||||||
|
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; SSE-NEXT: prefetcht2 (%eax)
|
||||||
|
; SSE-NEXT: prefetcht1 (%eax)
|
||||||
|
; SSE-NEXT: prefetcht0 (%eax)
|
||||||
|
; SSE-NEXT: prefetchnta (%eax)
|
||||||
|
; SSE-NEXT: prefetcht2 (%eax)
|
||||||
|
; SSE-NEXT: prefetcht1 (%eax)
|
||||||
|
; SSE-NEXT: prefetcht0 (%eax)
|
||||||
|
; SSE-NEXT: prefetchnta (%eax)
|
||||||
|
; SSE-NEXT: retl
|
||||||
|
;
|
||||||
|
; PRFCHWSSE-LABEL: t:
|
||||||
|
; PRFCHWSSE: # %bb.0: # %entry
|
||||||
|
; PRFCHWSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; PRFCHWSSE-NEXT: prefetcht2 (%eax)
|
||||||
|
; PRFCHWSSE-NEXT: prefetcht1 (%eax)
|
||||||
|
; PRFCHWSSE-NEXT: prefetcht0 (%eax)
|
||||||
|
; PRFCHWSSE-NEXT: prefetchnta (%eax)
|
||||||
|
; PRFCHWSSE-NEXT: prefetchw (%eax)
|
||||||
|
; PRFCHWSSE-NEXT: prefetchw (%eax)
|
||||||
|
; PRFCHWSSE-NEXT: prefetchw (%eax)
|
||||||
|
; PRFCHWSSE-NEXT: prefetchw (%eax)
|
||||||
|
; PRFCHWSSE-NEXT: retl
|
||||||
|
;
|
||||||
|
; PREFETCHWT1-LABEL: t:
|
||||||
|
; PREFETCHWT1: # %bb.0: # %entry
|
||||||
|
; PREFETCHWT1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; PREFETCHWT1-NEXT: prefetcht2 (%eax)
|
||||||
|
; PREFETCHWT1-NEXT: prefetcht1 (%eax)
|
||||||
|
; PREFETCHWT1-NEXT: prefetcht0 (%eax)
|
||||||
|
; PREFETCHWT1-NEXT: prefetchnta (%eax)
|
||||||
|
; PREFETCHWT1-NEXT: prefetchwt1 (%eax)
|
||||||
|
; PREFETCHWT1-NEXT: prefetchwt1 (%eax)
|
||||||
|
; PREFETCHWT1-NEXT: prefetchw (%eax)
|
||||||
|
; PREFETCHWT1-NEXT: prefetchwt1 (%eax)
|
||||||
|
; PREFETCHWT1-NEXT: retl
|
||||||
|
;
|
||||||
|
; 3DNOW-LABEL: t:
|
||||||
|
; 3DNOW: # %bb.0: # %entry
|
||||||
|
; 3DNOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; 3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; 3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; 3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; 3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; 3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; 3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; 3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; 3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; 3DNOW-NEXT: retl
|
||||||
|
;
|
||||||
|
; PRFCHW3DNOW-LABEL: t:
|
||||||
|
; PRFCHW3DNOW: # %bb.0: # %entry
|
||||||
|
; PRFCHW3DNOW-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; PRFCHW3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; PRFCHW3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; PRFCHW3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; PRFCHW3DNOW-NEXT: prefetch (%eax)
|
||||||
|
; PRFCHW3DNOW-NEXT: prefetchw (%eax)
|
||||||
|
; PRFCHW3DNOW-NEXT: prefetchw (%eax)
|
||||||
|
; PRFCHW3DNOW-NEXT: prefetchw (%eax)
|
||||||
|
; PRFCHW3DNOW-NEXT: prefetchw (%eax)
|
||||||
|
; PRFCHW3DNOW-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
; CHECK: prefetcht2
|
|
||||||
; CHECK: prefetcht1
|
|
||||||
; CHECK: prefetcht0
|
|
||||||
; CHECK: prefetchnta
|
|
||||||
; PRFCHW: prefetchw
|
|
||||||
; NOPRFCHW-NOT: prefetchw
|
|
||||||
; SLM: prefetchw
|
|
||||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
|
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
|
||||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
|
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
|
||||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
|
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
|
||||||
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 )
|
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 )
|
||||||
|
tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 1, i32 1 )
|
||||||
|
tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 2, i32 1 )
|
||||||
tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
|
tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
|
||||||
|
tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 0, i32 1 )
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -667,6 +667,9 @@
|
||||||
# CHECK: prefetchw (%eax)
|
# CHECK: prefetchw (%eax)
|
||||||
0x0f 0x0d 0x08
|
0x0f 0x0d 0x08
|
||||||
|
|
||||||
|
# CHECK: prefetchwt1 (%eax)
|
||||||
|
0x0f 0x0d 0x10
|
||||||
|
|
||||||
# CHECK: adcxl %eax, %eax
|
# CHECK: adcxl %eax, %eax
|
||||||
0x66 0x0f 0x38 0xf6 0xc0
|
0x66 0x0f 0x38 0xf6 0xc0
|
||||||
|
|
||||||
|
|
|
@ -72,8 +72,10 @@ femms
|
||||||
|
|
||||||
// CHECK: prefetch (%rax) # encoding: [0x0f,0x0d,0x00]
|
// CHECK: prefetch (%rax) # encoding: [0x0f,0x0d,0x00]
|
||||||
// CHECK: prefetchw (%rax) # encoding: [0x0f,0x0d,0x08]
|
// CHECK: prefetchw (%rax) # encoding: [0x0f,0x0d,0x08]
|
||||||
|
// CHECK: prefetchwt1 (%rax) # encoding: [0x0f,0x0d,0x10]
|
||||||
prefetch (%rax)
|
prefetch (%rax)
|
||||||
prefetchw (%rax)
|
prefetchw (%rax)
|
||||||
|
prefetchwt1 (%rax)
|
||||||
|
|
||||||
|
|
||||||
// CHECK: pf2iw %mm2, %mm1 # encoding: [0x0f,0x0f,0xca,0x1c]
|
// CHECK: pf2iw %mm2, %mm1 # encoding: [0x0f,0x0f,0xca,0x1c]
|
||||||
|
|
Loading…
Reference in New Issue