AMDGPU: Allow sinking of addressing modes for atomic_inc/dec

llvm-svn: 297913
This commit is contained in:
Matt Arsenault 2017-03-15 23:15:12 +00:00
parent fbf031f27b
commit 7dc01c96ae
3 changed files with 83 additions and 7 deletions

View File

@ -61,6 +61,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
@ -505,6 +506,13 @@ const SISubtarget *SITargetLowering::getSubtarget() const {
// TargetLowering queries
//===----------------------------------------------------------------------===//
bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
EVT) const {
// SI has some legal vector types, but no legal vector operations. Say no
// shuffles are legal in order to prefer scalarizing some vector operations.
return false;
}
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
unsigned IntrID) const {
@ -524,11 +532,20 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
}
bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
EVT) const {
// SI has some legal vector types, but no legal vector operations. Say no
// shuffles are legal in order to prefer scalarizing some vector operations.
return false;
bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
SmallVectorImpl<Value*> &Ops,
Type *&AccessTy) const {
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec: {
Value *Ptr = II->getArgOperand(0);
AccessTy = II->getType();
Ops.push_back(Ptr);
return true;
}
default:
return false;
}
}
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {

View File

@ -123,11 +123,15 @@ public:
const SISubtarget *getSubtarget() const;
bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
EVT /*VT*/) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
unsigned IntrinsicID) const override;
bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
EVT /*VT*/) const override;
bool getAddrModeArguments(IntrinsicInst * /*I*/,
SmallVectorImpl<Value*> &/*Ops*/,
Type *&/*AccessTy*/) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;

View File

@ -622,7 +622,62 @@ done:
ret void
}
; OPT-LABEL: @test_sink_local_small_offset_atomic_inc_i32(
; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32
; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2)
define void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
%in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
%tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2)
br label %endif
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
store i32 %x, i32 addrspace(3)* %out.gep
br label %done
done:
ret void
}
; OPT-LABEL: @test_sink_local_small_offset_atomic_dec_i32(
; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32
; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2)
define void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
entry:
%out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
%in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%tmp0 = icmp eq i32 %tid, 0
br i1 %tmp0, label %endif, label %if
if:
%tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2)
br label %endif
endif:
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
store i32 %x, i32 addrspace(3)* %out.gep
br label %done
done:
ret void
}
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
attributes #2 = { nounwind argmemonly }