forked from OSchip/llvm-project
[CodeGenPrepare] Don't sink non-cheap addrspacecasts.
Summary: Previously, CGP would unconditionally sink addrspacecast instructions, even going so far as to sink them into a loop. Now we check that the cast is "cheap", as defined by TLI. We introduce a new "is-cheap" function to TLI rather than using isNopAddrSpaceCast because some GPU platforms want the ability to ask for non-nop casts to be sunk. Reviewers: arsenm, tra Subscribers: jholewinski, wdng, llvm-commits Differential Revision: https://reviews.llvm.org/D26923 llvm-svn: 287591
This commit is contained in:
parent
838c7f5a85
commit
3e50a5be8f
|
@ -1153,6 +1153,12 @@ public:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
|
||||||
|
/// are happy to sink it into basic blocks.
|
||||||
|
virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
|
||||||
|
return isNoopAddrSpaceCast(SrcAS, DestAS);
|
||||||
|
}
|
||||||
|
|
||||||
/// Return true if the pointer arguments to CI should be aligned by aligning
|
/// Return true if the pointer arguments to CI should be aligned by aligning
|
||||||
/// the object whose address is being passed. If so then MinSize is set to the
|
/// the object whose address is being passed. If so then MinSize is set to the
|
||||||
/// minimum size the object must be to be aligned and PrefAlign is set to the
|
/// minimum size the object must be to be aligned and PrefAlign is set to the
|
||||||
|
|
|
@ -927,6 +927,14 @@ static bool SinkCast(CastInst *CI) {
|
||||||
///
|
///
|
||||||
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
|
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
|
||||||
const DataLayout &DL) {
|
const DataLayout &DL) {
|
||||||
|
// Sink only "cheap" (or nop) address-space casts. This is a weaker condition
|
||||||
|
// than sinking only nop casts, but is helpful on some platforms.
|
||||||
|
if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
|
||||||
|
if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
|
||||||
|
ASC->getDestAddressSpace()))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// If this is a noop copy,
|
// If this is a noop copy,
|
||||||
EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
|
EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
|
||||||
EVT DstVT = TLI.getValueType(DL, CI->getType());
|
EVT DstVT = TLI.getValueType(DL, CI->getType());
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
; RUN: opt -S -codegenprepare < %s | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||||
|
target triple = "nvptx64-nvidia-cuda"
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test
|
||||||
|
define i64 @test(i1 %pred, i64* %ptr) {
|
||||||
|
; CHECK: addrspacecast
|
||||||
|
%ptr_as1 = addrspacecast i64* %ptr to i64 addrspace(1)*
|
||||||
|
br i1 %pred, label %l1, label %l2
|
||||||
|
l1:
|
||||||
|
; CHECK-LABEL: l1:
|
||||||
|
; CHECK-NOT: addrspacecast
|
||||||
|
%v1 = load i64, i64* %ptr
|
||||||
|
ret i64 %v1
|
||||||
|
l2:
|
||||||
|
; CHECK-LABEL: l2:
|
||||||
|
; CHECK-NOT: addrspacecast
|
||||||
|
%v2 = load i64, i64 addrspace(1)* %ptr_as1
|
||||||
|
ret i64 %v2
|
||||||
|
}
|
Loading…
Reference in New Issue