AMDGPU: Implement isCheapAddrSpaceCast

llvm-svn: 288523
This commit is contained in:
Matt Arsenault 2016-12-02 18:12:53 +00:00
parent a5dbdf342b
commit d4da0edd98
3 changed files with 134 additions and 2 deletions

View File

@ -600,8 +600,8 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
static bool isFlatGlobalAddrSpace(unsigned AS) {
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
AS == AMDGPUAS::FLAT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS;
AS == AMDGPUAS::FLAT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS;
}
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
@ -609,6 +609,16 @@ bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
}
bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
// Flat -> private/local is a simple truncate.
// Flat -> global is no-op
if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
return true;
return isNoopAddrSpaceCast(SrcAS, DestAS);
}
bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
const MemSDNode *MemNode = cast<MemSDNode>(N);
const Value *Ptr = MemNode->getMemOperand()->getValue();

View File

@ -130,6 +130,7 @@ public:
bool isMemOpUniform(const SDNode *N) const;
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;

View File

@ -0,0 +1,121 @@
; RUN: opt -S -codegenprepare -mtriple=amdgcn--amdhsa < %s | FileCheck %s
; CHECK-LABEL: @no_sink_local_to_flat(
; CHECK: addrspacecast
; CHECK: br
; CHECK-NOT: addrspacecast
define i64 @no_sink_local_to_flat(i1 %pred, i64 addrspace(3)* %ptr) {
%ptr_cast = addrspacecast i64 addrspace(3)* %ptr to i64 addrspace(4)*
br i1 %pred, label %l1, label %l2
l1:
%v1 = load i64, i64 addrspace(3)* %ptr
ret i64 %v1
l2:
%v2 = load i64, i64 addrspace(4)* %ptr_cast
ret i64 %v2
}
; CHECK-LABEL: @no_sink_private_to_flat(
; CHECK: addrspacecast
; CHECK: br
; CHECK-NOT: addrspacecast
define i64 @no_sink_private_to_flat(i1 %pred, i64* %ptr) {
%ptr_cast = addrspacecast i64* %ptr to i64 addrspace(4)*
br i1 %pred, label %l1, label %l2
l1:
%v1 = load i64, i64* %ptr
ret i64 %v1
l2:
%v2 = load i64, i64 addrspace(4)* %ptr_cast
ret i64 %v2
}
; CHECK-LABEL: @sink_global_to_flat(
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_global_to_flat(i1 %pred, i64 addrspace(1)* %ptr) {
%ptr_cast = addrspacecast i64 addrspace(1)* %ptr to i64 addrspace(4)*
br i1 %pred, label %l1, label %l2
l1:
%v1 = load i64, i64 addrspace(1)* %ptr
ret i64 %v1
l2:
%v2 = load i64, i64 addrspace(4)* %ptr_cast
ret i64 %v2
}
; CHECK-LABEL: @sink_flat_to_global(
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_flat_to_global(i1 %pred, i64 addrspace(4)* %ptr) {
%ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(1)*
br i1 %pred, label %l1, label %l2
l1:
%v1 = load i64, i64 addrspace(4)* %ptr
ret i64 %v1
l2:
%v2 = load i64, i64 addrspace(1)* %ptr_cast
ret i64 %v2
}
; CHECK-LABEL: @sink_flat_to_constant(
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_flat_to_constant(i1 %pred, i64 addrspace(4)* %ptr) {
%ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(2)*
br i1 %pred, label %l1, label %l2
l1:
%v1 = load i64, i64 addrspace(4)* %ptr
ret i64 %v1
l2:
%v2 = load i64, i64 addrspace(2)* %ptr_cast
ret i64 %v2
}
; CHECK-LABEL: @sink_flat_to_local(
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_flat_to_local(i1 %pred, i64 addrspace(4)* %ptr) {
%ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(3)*
br i1 %pred, label %l1, label %l2
l1:
%v1 = load i64, i64 addrspace(4)* %ptr
ret i64 %v1
l2:
%v2 = load i64, i64 addrspace(3)* %ptr_cast
ret i64 %v2
}
; CHECK-LABEL: @sink_flat_to_private(
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_flat_to_private(i1 %pred, i64 addrspace(4)* %ptr) {
%ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64*
br i1 %pred, label %l1, label %l2
l1:
%v1 = load i64, i64 addrspace(4)* %ptr
ret i64 %v1
l2:
%v2 = load i64, i64* %ptr_cast
ret i64 %v2
}