forked from OSchip/llvm-project
AMDGPU: Allow sinking of addressing modes for atomic_inc/dec
llvm-svn: 297913
This commit is contained in:
parent
fbf031f27b
commit
7dc01c96ae
|
@ -61,6 +61,7 @@
|
|||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CodeGen.h"
|
||||
|
@ -505,6 +506,13 @@ const SISubtarget *SITargetLowering::getSubtarget() const {
|
|||
// TargetLowering queries
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
|
||||
EVT) const {
|
||||
// SI has some legal vector types, but no legal vector operations. Say no
|
||||
// shuffles are legal in order to prefer scalarizing some vector operations.
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||
const CallInst &CI,
|
||||
unsigned IntrID) const {
|
||||
|
@ -524,11 +532,20 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
}
|
||||
}
|
||||
|
||||
bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
|
||||
EVT) const {
|
||||
// SI has some legal vector types, but no legal vector operations. Say no
|
||||
// shuffles are legal in order to prefer scalarizing some vector operations.
|
||||
return false;
|
||||
bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
|
||||
SmallVectorImpl<Value*> &Ops,
|
||||
Type *&AccessTy) const {
|
||||
switch (II->getIntrinsicID()) {
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
case Intrinsic::amdgcn_atomic_dec: {
|
||||
Value *Ptr = II->getArgOperand(0);
|
||||
AccessTy = II->getType();
|
||||
Ops.push_back(Ptr);
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
|
||||
|
|
|
@ -123,11 +123,15 @@ public:
|
|||
|
||||
const SISubtarget *getSubtarget() const;
|
||||
|
||||
bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
|
||||
EVT /*VT*/) const override;
|
||||
|
||||
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
|
||||
unsigned IntrinsicID) const override;
|
||||
|
||||
bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
|
||||
EVT /*VT*/) const override;
|
||||
bool getAddrModeArguments(IntrinsicInst * /*I*/,
|
||||
SmallVectorImpl<Value*> &/*Ops*/,
|
||||
Type *&/*AccessTy*/) const override;
|
||||
|
||||
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const override;
|
||||
|
|
|
@ -622,7 +622,62 @@ done:
|
|||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_sink_local_small_offset_atomic_inc_i32(
|
||||
; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32
|
||||
; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
|
||||
; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
|
||||
; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2)
|
||||
define void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
|
||||
%in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2)
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
|
||||
store i32 %x, i32 addrspace(3)* %out.gep
|
||||
br label %done
|
||||
|
||||
done:
|
||||
ret void
|
||||
}
|
||||
|
||||
; OPT-LABEL: @test_sink_local_small_offset_atomic_dec_i32(
|
||||
; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32
|
||||
; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
|
||||
; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
|
||||
; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2)
|
||||
define void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
|
||||
entry:
|
||||
%out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
|
||||
%in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
|
||||
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
|
||||
%tmp0 = icmp eq i32 %tid, 0
|
||||
br i1 %tmp0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2)
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
%x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
|
||||
store i32 %x, i32 addrspace(3)* %out.gep
|
||||
br label %done
|
||||
|
||||
done:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
|
||||
declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
|
||||
declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #2 = { nounwind argmemonly }
|
||||
|
|
Loading…
Reference in New Issue