forked from OSchip/llvm-project
AMDGPU: Remove implicit argument attributes when introducing new calls
In a future patch, a new set of amdgpu-no-* attributes will be introduced to indicate when a function does not need an implicitly passed input. This pass introduces new instances of these intrinsic calls, and should remove the attributes if they were present before.
This commit is contained in:
parent
a74278f21f
commit
04ce2de330
|
@ -200,7 +200,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F) {
|
|||
|
||||
std::pair<Value *, Value *>
|
||||
AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
|
||||
const Function &F = *Builder.GetInsertBlock()->getParent();
|
||||
Function &F = *Builder.GetInsertBlock()->getParent();
|
||||
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
|
||||
|
||||
if (!IsAMDHSA) {
|
||||
|
@ -258,6 +258,9 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
|
|||
CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
|
||||
DispatchPtr->addRetAttr(Attribute::NoAlias);
|
||||
DispatchPtr->addRetAttr(Attribute::NonNull);
|
||||
DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
|
||||
DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
|
||||
F.removeFnAttr("amdgpu-no-dispatch-ptr");
|
||||
|
||||
// Size of the dispatch packet struct.
|
||||
DispatchPtr->addDereferenceableRetAttr(64);
|
||||
|
@ -288,23 +291,27 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
|
|||
|
||||
Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
|
||||
unsigned N) {
|
||||
const AMDGPUSubtarget &ST =
|
||||
AMDGPUSubtarget::get(TM, *Builder.GetInsertBlock()->getParent());
|
||||
Function *F = Builder.GetInsertBlock()->getParent();
|
||||
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, *F);
|
||||
Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
|
||||
StringRef AttrName;
|
||||
|
||||
switch (N) {
|
||||
case 0:
|
||||
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
|
||||
: (Intrinsic::ID)Intrinsic::r600_read_tidig_x;
|
||||
AttrName = "amdgpu-no-workitem-id-x";
|
||||
break;
|
||||
case 1:
|
||||
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
|
||||
: (Intrinsic::ID)Intrinsic::r600_read_tidig_y;
|
||||
AttrName = "amdgpu-no-workitem-id-y";
|
||||
break;
|
||||
|
||||
case 2:
|
||||
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
|
||||
: (Intrinsic::ID)Intrinsic::r600_read_tidig_z;
|
||||
AttrName = "amdgpu-no-workitem-id-z";
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("invalid dimension");
|
||||
|
@ -313,6 +320,7 @@ Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
|
|||
Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
|
||||
CallInst *CI = Builder.CreateCall(WorkitemIdFn);
|
||||
ST.makeLIDRangeMetadata(CI);
|
||||
F->removeFnAttr(AttrName);
|
||||
|
||||
return CI;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck %s
|
||||
|
||||
; This kernel starts with the amdgpu-no-workitem-id-* attributes, but
|
||||
; need to be removed when these intrinsic uses are introduced.
|
||||
|
||||
; CHECK-LABEL: define amdgpu_kernel void @promote_to_lds(i32 addrspace(1)* %out, i32 %in) #0 {
|
||||
; CHECK: call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
; CHECK: call i32 @llvm.amdgcn.workitem.id.x(), !range !2
|
||||
; CHECK: call i32 @llvm.amdgcn.workitem.id.y(), !range !2
|
||||
; CHECK: call i32 @llvm.amdgcn.workitem.id.z(), !range !2
|
||||
define amdgpu_kernel void @promote_to_lds(i32 addrspace(1)* %out, i32 %in) #0 {
|
||||
entry:
|
||||
%tmp = alloca [2 x i32], addrspace(5)
|
||||
%tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
|
||||
%tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
|
||||
store i32 0, i32 addrspace(5)* %tmp1
|
||||
store i32 1, i32 addrspace(5)* %tmp2
|
||||
%tmp3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
|
||||
%tmp4 = load i32, i32 addrspace(5)* %tmp3
|
||||
%tmp5 = load volatile i32, i32 addrspace(1)* undef
|
||||
%tmp6 = add i32 %tmp4, %tmp5
|
||||
store i32 %tmp6, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="1,5" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-dispatch-ptr" }
|
||||
|
||||
; CHECK: attributes #0 = { "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="1,5" }
|
Loading…
Reference in New Issue