AMDGPU: Remove implicit argument attributes when introducing new calls

In a future patch, a new set of amdgpu-no-* attributes will be
introduced to indicate when a function does not need an implicitly
passed input. This pass introduces new instances of these intrinsic
calls, and should remove the attributes if they were present before.
This commit is contained in:
Matt Arsenault 2021-08-13 13:28:57 -04:00
parent a74278f21f
commit 04ce2de330
2 changed files with 39 additions and 3 deletions

View File

@ -200,7 +200,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F) {
std::pair<Value *, Value *>
AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
const Function &F = *Builder.GetInsertBlock()->getParent();
Function &F = *Builder.GetInsertBlock()->getParent();
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
if (!IsAMDHSA) {
@ -258,6 +258,9 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
DispatchPtr->addRetAttr(Attribute::NoAlias);
DispatchPtr->addRetAttr(Attribute::NonNull);
DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
F.removeFnAttr("amdgpu-no-dispatch-ptr");
// Size of the dispatch packet struct.
DispatchPtr->addDereferenceableRetAttr(64);
@ -288,23 +291,27 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
unsigned N) {
const AMDGPUSubtarget &ST =
AMDGPUSubtarget::get(TM, *Builder.GetInsertBlock()->getParent());
Function *F = Builder.GetInsertBlock()->getParent();
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, *F);
Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
StringRef AttrName;
switch (N) {
case 0:
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
: (Intrinsic::ID)Intrinsic::r600_read_tidig_x;
AttrName = "amdgpu-no-workitem-id-x";
break;
case 1:
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
: (Intrinsic::ID)Intrinsic::r600_read_tidig_y;
AttrName = "amdgpu-no-workitem-id-y";
break;
case 2:
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
: (Intrinsic::ID)Intrinsic::r600_read_tidig_z;
AttrName = "amdgpu-no-workitem-id-z";
break;
default:
llvm_unreachable("invalid dimension");
@ -313,6 +320,7 @@ Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
CallInst *CI = Builder.CreateCall(WorkitemIdFn);
ST.makeLIDRangeMetadata(CI);
F->removeFnAttr(AttrName);
return CI;
}

View File

@ -0,0 +1,28 @@
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck %s
; This kernel starts with the amdgpu-no-workitem-id-* attributes, but
; need to be removed when these intrinsic uses are introduced.
; CHECK-LABEL: define amdgpu_kernel void @promote_to_lds(i32 addrspace(1)* %out, i32 %in) #0 {
; CHECK: call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
; CHECK: call i32 @llvm.amdgcn.workitem.id.x(), !range !2
; CHECK: call i32 @llvm.amdgcn.workitem.id.y(), !range !2
; CHECK: call i32 @llvm.amdgcn.workitem.id.z(), !range !2
define amdgpu_kernel void @promote_to_lds(i32 addrspace(1)* %out, i32 %in) #0 {
entry:
%tmp = alloca [2 x i32], addrspace(5)
%tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
%tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
store i32 0, i32 addrspace(5)* %tmp1
store i32 1, i32 addrspace(5)* %tmp2
%tmp3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
%tmp4 = load i32, i32 addrspace(5)* %tmp3
%tmp5 = load volatile i32, i32 addrspace(1)* undef
%tmp6 = add i32 %tmp4, %tmp5
store i32 %tmp6, i32 addrspace(1)* %out
ret void
}
attributes #0 = { "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="1,5" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-dispatch-ptr" }
; CHECK: attributes #0 = { "amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="1,5" }