AMDGPU: Use attributor to propagate uniform-work-group-size

Drop the legacy version in AMDGPUAnnotateKernelFeatures. This has the
side effect of now respecting the linkage, and not changing externally
visible functions.
This commit is contained in:
Matt Arsenault 2021-08-14 14:32:35 -04:00
parent 722b8e0e5a
commit db4963d080
10 changed files with 124 additions and 322 deletions

View File

@ -29,10 +29,7 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
private: private:
const TargetMachine *TM = nullptr; const TargetMachine *TM = nullptr;
SmallVector<CallGraphNode*, 8> NodeList;
bool processUniformWorkGroupAttribute();
bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
bool addFeatureAttributes(Function &F); bool addFeatureAttributes(Function &F);
public: public:
@ -62,56 +59,6 @@ char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
"Add AMDGPU function attributes", false, false) "Add AMDGPU function attributes", false, false)
bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
bool Changed = false;
for (auto *Node : reverse(NodeList)) {
Function *Caller = Node->getFunction();
for (auto I : *Node) {
Function *Callee = std::get<1>(I)->getFunction();
if (Callee)
Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
}
}
return Changed;
}
bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
Function &Caller, Function &Callee) {
// Check for externally defined function
if (!Callee.hasExactDefinition()) {
Callee.addFnAttr("uniform-work-group-size", "false");
if (!Caller.hasFnAttribute("uniform-work-group-size"))
Caller.addFnAttr("uniform-work-group-size", "false");
return true;
}
// Check if the Caller has the attribute
if (Caller.hasFnAttribute("uniform-work-group-size")) {
// Check if the value of the attribute is true
if (Caller.getFnAttribute("uniform-work-group-size")
.getValueAsString().equals("true")) {
// Propagate the attribute to the Callee, if it does not have it
if (!Callee.hasFnAttribute("uniform-work-group-size")) {
Callee.addFnAttr("uniform-work-group-size", "true");
return true;
}
} else {
Callee.addFnAttr("uniform-work-group-size", "false");
return true;
}
} else {
// If the attribute is absent, set it as false
Caller.addFnAttr("uniform-work-group-size", "false");
Callee.addFnAttr("uniform-work-group-size", "false");
return true;
}
return false;
}
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
bool HaveStackObjects = false; bool HaveStackObjects = false;
bool Changed = false; bool Changed = false;
@ -166,14 +113,6 @@ bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
bool Changed = false; bool Changed = false;
for (CallGraphNode *I : SCC) { for (CallGraphNode *I : SCC) {
// Build a list of CallGraphNodes from most number of uses to least
if (I->getNumReferences())
NodeList.push_back(I);
else {
processUniformWorkGroupAttribute();
NodeList.clear();
}
Function *F = I->getFunction(); Function *F = I->getFunction();
// Ignore functions with graphics calling conventions, these are currently // Ignore functions with graphics calling conventions, these are currently
// not allowed to have kernel arguments. // not allowed to have kernel arguments.

View File

@ -182,7 +182,7 @@ define void @use_dispatch_id() #1 {
define void @use_workgroup_id_y_workgroup_id_z() #1 { define void @use_workgroup_id_y_workgroup_id_z() #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y_workgroup_id_z ; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y_workgroup_id_z
; AKF_HSA-SAME: () #[[ATTR2:[0-9]+]] { ; AKF_HSA-SAME: () #[[ATTR1]] {
; AKF_HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() ; AKF_HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
; AKF_HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() ; AKF_HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
; AKF_HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* undef, align 4 ; AKF_HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* undef, align 4
@ -471,7 +471,7 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 { define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9 ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9
; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { ; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* ; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 ; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
; AKF_HSA-NEXT: ret void ; AKF_HSA-NEXT: ret void
@ -489,7 +489,7 @@ define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 { define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 {
; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9 ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9
; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR3]] { ; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR2]] {
; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* ; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 ; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
; AKF_HSA-NEXT: call void @func_indirect_use_queue_ptr() ; AKF_HSA-NEXT: call void @func_indirect_use_queue_ptr()
@ -587,7 +587,7 @@ define void @func_indirect_use_kernarg_segment_ptr() #1 {
define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 { define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr ; AKF_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
; AKF_HSA-SAME: () #[[ATTR2]] { ; AKF_HSA-SAME: () #[[ATTR1]] {
; AKF_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() ; AKF_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 ; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
; AKF_HSA-NEXT: ret void ; AKF_HSA-NEXT: ret void
@ -641,7 +641,7 @@ declare void @external.func() #3
; This function gets deleted. ; This function gets deleted.
define internal void @defined.func() #3 { define internal void @defined.func() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@defined.func ; AKF_HSA-LABEL: define {{[^@]+}}@defined.func
; AKF_HSA-SAME: () #[[ATTR4:[0-9]+]] { ; AKF_HSA-SAME: () #[[ATTR3:[0-9]+]] {
; AKF_HSA-NEXT: ret void ; AKF_HSA-NEXT: ret void
; ;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@defined.func ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@defined.func
@ -653,7 +653,7 @@ define internal void @defined.func() #3 {
define void @func_call_external() #3 { define void @func_call_external() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_call_external ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_external
; AKF_HSA-SAME: () #[[ATTR4]] { ; AKF_HSA-SAME: () #[[ATTR3]] {
; AKF_HSA-NEXT: call void @external.func() ; AKF_HSA-NEXT: call void @external.func()
; AKF_HSA-NEXT: ret void ; AKF_HSA-NEXT: ret void
; ;
@ -668,7 +668,7 @@ define void @func_call_external() #3 {
define void @func_call_defined() #3 { define void @func_call_defined() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_call_defined ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_defined
; AKF_HSA-SAME: () #[[ATTR4]] { ; AKF_HSA-SAME: () #[[ATTR3]] {
; AKF_HSA-NEXT: call void @defined.func() ; AKF_HSA-NEXT: call void @defined.func()
; AKF_HSA-NEXT: ret void ; AKF_HSA-NEXT: ret void
; ;
@ -682,8 +682,8 @@ define void @func_call_defined() #3 {
} }
define void @func_call_asm() #3 { define void @func_call_asm() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_call_asm ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_asm
; AKF_HSA-SAME: () #[[ATTR5:[0-9]+]] { ; AKF_HSA-SAME: () #[[ATTR3]] {
; AKF_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR5]] ; AKF_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR3]]
; AKF_HSA-NEXT: ret void ; AKF_HSA-NEXT: ret void
; ;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm
@ -697,7 +697,7 @@ define void @func_call_asm() #3 {
define amdgpu_kernel void @kern_call_external() #3 { define amdgpu_kernel void @kern_call_external() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_external ; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_external
; AKF_HSA-SAME: () #[[ATTR6:[0-9]+]] { ; AKF_HSA-SAME: () #[[ATTR4:[0-9]+]] {
; AKF_HSA-NEXT: call void @external.func() ; AKF_HSA-NEXT: call void @external.func()
; AKF_HSA-NEXT: ret void ; AKF_HSA-NEXT: ret void
; ;
@ -712,7 +712,7 @@ define amdgpu_kernel void @kern_call_external() #3 {
define amdgpu_kernel void @func_kern_defined() #3 { define amdgpu_kernel void @func_kern_defined() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_kern_defined ; AKF_HSA-LABEL: define {{[^@]+}}@func_kern_defined
; AKF_HSA-SAME: () #[[ATTR6]] { ; AKF_HSA-SAME: () #[[ATTR4]] {
; AKF_HSA-NEXT: call void @defined.func() ; AKF_HSA-NEXT: call void @defined.func()
; AKF_HSA-NEXT: ret void ; AKF_HSA-NEXT: ret void
; ;
@ -727,7 +727,7 @@ define amdgpu_kernel void @func_kern_defined() #3 {
define i32 @use_dispatch_ptr_ret_type() #1 { define i32 @use_dispatch_ptr_ret_type() #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type ; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type
; AKF_HSA-SAME: () #[[ATTR2]] { ; AKF_HSA-SAME: () #[[ATTR1]] {
; AKF_HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() ; AKF_HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 ; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
; AKF_HSA-NEXT: ret i32 0 ; AKF_HSA-NEXT: ret i32 0
@ -745,7 +745,7 @@ define i32 @use_dispatch_ptr_ret_type() #1 {
define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 { define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func
; AKF_HSA-SAME: () #[[ATTR2]] { ; AKF_HSA-SAME: () #[[ATTR1]] {
; AKF_HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)() ; AKF_HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)()
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
; AKF_HSA-NEXT: ret float [[FADD]] ; AKF_HSA-NEXT: ret float [[FADD]]
@ -763,7 +763,7 @@ define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
define float @func_indirect_call(float()* %fptr) #3 { define float @func_indirect_call(float()* %fptr) #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_call ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_call
; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR5]] { ; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR3]] {
; AKF_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]() ; AKF_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]()
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
; AKF_HSA-NEXT: ret float [[FADD]] ; AKF_HSA-NEXT: ret float [[FADD]]
@ -782,7 +782,7 @@ define float @func_indirect_call(float()* %fptr) #3 {
declare float @extern() #3 declare float @extern() #3
define float @func_extern_call() #3 { define float @func_extern_call() #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call ; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call
; AKF_HSA-SAME: () #[[ATTR4]] { ; AKF_HSA-SAME: () #[[ATTR3]] {
; AKF_HSA-NEXT: [[F:%.*]] = call float @extern() ; AKF_HSA-NEXT: [[F:%.*]] = call float @extern()
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
; AKF_HSA-NEXT: ret float [[FADD]] ; AKF_HSA-NEXT: ret float [[FADD]]
@ -800,7 +800,7 @@ define float @func_extern_call() #3 {
define float @func_null_call(float()* %fptr) #3 { define float @func_null_call(float()* %fptr) #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_null_call ; AKF_HSA-LABEL: define {{[^@]+}}@func_null_call
; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR5]] { ; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR3]] {
; AKF_HSA-NEXT: [[F:%.*]] = call float null() ; AKF_HSA-NEXT: [[F:%.*]] = call float null()
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
; AKF_HSA-NEXT: ret float [[FADD]] ; AKF_HSA-NEXT: ret float [[FADD]]
@ -821,7 +821,7 @@ declare float @llvm.amdgcn.rcp.f32(float) #0
; Calls some other recognized intrinsic ; Calls some other recognized intrinsic
define float @func_other_intrinsic_call(float %arg) #3 { define float @func_other_intrinsic_call(float %arg) #3 {
; AKF_HSA-LABEL: define {{[^@]+}}@func_other_intrinsic_call ; AKF_HSA-LABEL: define {{[^@]+}}@func_other_intrinsic_call
; AKF_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR5]] { ; AKF_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
; AKF_HSA-NEXT: [[F:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[ARG]]) ; AKF_HSA-NEXT: [[F:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[ARG]])
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
; AKF_HSA-NEXT: ret float [[FADD]] ; AKF_HSA-NEXT: ret float [[FADD]]
@ -844,12 +844,10 @@ attributes #3 = { nounwind }
;. ;.
; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } ; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="fiji" }
; AKF_HSA: attributes #[[ATTR2]] = { nounwind "target-cpu"="fiji" } ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx900" }
; AKF_HSA: attributes #[[ATTR3]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" } ; AKF_HSA: attributes #[[ATTR3]] = { nounwind }
; AKF_HSA: attributes #[[ATTR4]] = { nounwind "uniform-work-group-size"="false" } ; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-calls" }
; AKF_HSA: attributes #[[ATTR5]] = { nounwind }
; AKF_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" }
;. ;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }

View File

@ -1,34 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=GCN,AKF_GCN %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck -check-prefixes=GCN,ATTRIBUTOR_GCN %s
define internal void @indirect() { define internal void @indirect() {
; AKF_GCN-LABEL: define {{[^@]+}}@indirect() { ; CHECK-LABEL: define {{[^@]+}}@indirect
; AKF_GCN-NEXT: ret void ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; ; CHECK-NEXT: ret void
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@indirect
; ATTRIBUTOR_GCN-SAME: () #[[ATTR0:[0-9]+]] {
; ATTRIBUTOR_GCN-NEXT: ret void
; ;
ret void ret void
} }
define internal void @direct() { define internal void @direct() {
; AKF_GCN-LABEL: define {{[^@]+}}@direct ; CHECK-LABEL: define {{[^@]+}}@direct
; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 ; CHECK-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
; AKF_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 ; CHECK-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
; AKF_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 ; CHECK-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
; AKF_GCN-NEXT: call void [[FP]]() ; CHECK-NEXT: call void [[FP]]()
; AKF_GCN-NEXT: ret void ; CHECK-NEXT: ret void
;
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@direct
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
; ATTRIBUTOR_GCN-NEXT: ret void
; ;
%fptr = alloca void()* %fptr = alloca void()*
store void()* @indirect, void()** %fptr store void()* @indirect, void()** %fptr
@ -38,18 +26,15 @@ define internal void @direct() {
} }
define amdgpu_kernel void @test_direct_indirect_call() { define amdgpu_kernel void @test_direct_indirect_call() {
; GCN-LABEL: define {{[^@]+}}@test_direct_indirect_call ; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call
; GCN-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-SAME: () #[[ATTR1]] {
; GCN-NEXT: call void @direct() ; CHECK-NEXT: call void @direct()
; GCN-NEXT: ret void ; CHECK-NEXT: ret void
; ;
call void @direct() call void @direct()
ret void ret void
} }
;. ;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-stack-objects" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;. ;.

View File

@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false ; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false
; We write to a global so that the attributor don't deletes the function. ; We write to a global so that the attributor don't deletes the function.
@ -21,15 +20,10 @@ define void @foo() #0 {
} }
define amdgpu_kernel void @kernel1() #1 { define amdgpu_kernel void @kernel1() #1 {
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-LABEL: define {{[^@]+}}@kernel1
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-SAME: () #[[ATTR0]] {
; AKF_CHECK-NEXT: call void @foo() ; CHECK-NEXT: call void @foo()
; AKF_CHECK-NEXT: ret void ; CHECK-NEXT: ret void
;
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel1
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] {
; ATTRIBUTOR_CHECK-NEXT: call void @foo()
; ATTRIBUTOR_CHECK-NEXT: ret void
; ;
call void @foo() call void @foo()
ret void ret void
@ -37,8 +31,5 @@ define amdgpu_kernel void @kernel1() #1 {
attributes #0 = { "uniform-work-group-size"="true" } attributes #0 = { "uniform-work-group-size"="true" }
;. ;.
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
;.
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;. ;.

View File

@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
;. ;.
; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global i32* null ; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global i32* null
@ -43,29 +42,17 @@ define amdgpu_kernel void @kernel1() #0 {
@G2 = global i32 0 @G2 = global i32 0
define internal void @internal3() { define internal void @internal3() {
; AKF_CHECK-LABEL: define {{[^@]+}}@internal3 ; CHECK-LABEL: define {{[^@]+}}@internal3
; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-SAME: () #[[ATTR1]] {
; AKF_CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4
; AKF_CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
; AKF_CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; AKF_CHECK: 3: ; CHECK: 3:
; AKF_CHECK-NEXT: call void @internal4() ; CHECK-NEXT: call void @internal4()
; AKF_CHECK-NEXT: call void @internal3() ; CHECK-NEXT: call void @internal3()
; AKF_CHECK-NEXT: br label [[TMP4]] ; CHECK-NEXT: br label [[TMP4]]
; AKF_CHECK: 4: ; CHECK: 4:
; AKF_CHECK-NEXT: ret void ; CHECK-NEXT: ret void
;
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal3
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1]] {
; ATTRIBUTOR_CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4
; ATTRIBUTOR_CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
; ATTRIBUTOR_CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
; ATTRIBUTOR_CHECK: 3:
; ATTRIBUTOR_CHECK-NEXT: call void @internal4()
; ATTRIBUTOR_CHECK-NEXT: call void @internal3()
; ATTRIBUTOR_CHECK-NEXT: br label [[TMP4]]
; ATTRIBUTOR_CHECK: 4:
; ATTRIBUTOR_CHECK-NEXT: ret void
; ;
%1 = load i32, i32* @G2, align 4 %1 = load i32, i32* @G2, align 4
%2 = icmp eq i32 %1, 0 %2 = icmp eq i32 %1, 0
@ -79,30 +66,20 @@ define internal void @internal3() {
} }
define internal void @internal4() { define internal void @internal4() {
; AKF_CHECK-LABEL: define {{[^@]+}}@internal4 ; CHECK-LABEL: define {{[^@]+}}@internal4
; AKF_CHECK-SAME: () #[[ATTR2]] { ; CHECK-SAME: () #[[ATTR1]] {
; AKF_CHECK-NEXT: store i32 1, i32* @G2, align 4 ; CHECK-NEXT: store i32 1, i32* @G2, align 4
; AKF_CHECK-NEXT: ret void ; CHECK-NEXT: ret void
;
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal4
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1]] {
; ATTRIBUTOR_CHECK-NEXT: store i32 1, i32* @G2, align 4
; ATTRIBUTOR_CHECK-NEXT: ret void
; ;
store i32 1, i32* @G2, align 4 store i32 1, i32* @G2, align 4
ret void ret void
} }
define internal void @internal2() { define internal void @internal2() {
; AKF_CHECK-LABEL: define {{[^@]+}}@internal2 ; CHECK-LABEL: define {{[^@]+}}@internal2
; AKF_CHECK-SAME: () #[[ATTR2]] { ; CHECK-SAME: () #[[ATTR1]] {
; AKF_CHECK-NEXT: call void @internal3() ; CHECK-NEXT: call void @internal3()
; AKF_CHECK-NEXT: ret void ; CHECK-NEXT: ret void
;
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal2
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1]] {
; ATTRIBUTOR_CHECK-NEXT: call void @internal3()
; ATTRIBUTOR_CHECK-NEXT: ret void
; ;
call void @internal3() call void @internal3()
ret void ret void
@ -119,12 +96,7 @@ define amdgpu_kernel void @kernel2() #0 {
} }
attributes #0 = { "uniform-work-group-size"="true" } attributes #0 = { "uniform-work-group-size"="true" }
;. ;.
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
; AKF_CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" }
;.
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;. ;.

View File

@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_CHECK %s ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_CHECK %s
; Test to verify if the attribute gets propagated across nested function calls ; Test to verify if the attribute gets propagated across nested function calls
@ -8,50 +7,33 @@
@x = global i32 0 @x = global i32 0
;. ;.
; AKF_CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 ; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
;.
; ATTRIBUTOR_CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
;. ;.
define void @func1() #0 { define void @func1() #0 {
; AKF_CHECK-LABEL: define {{[^@]+}}@func1 ; CHECK-LABEL: define {{[^@]+}}@func1
; AKF_CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; AKF_CHECK-NEXT: store i32 0, i32* @x, align 4 ; CHECK-NEXT: store i32 0, i32* @x, align 4
; AKF_CHECK-NEXT: ret void ; CHECK-NEXT: ret void
;
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func1
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; ATTRIBUTOR_CHECK-NEXT: store i32 0, i32* @x, align 4
; ATTRIBUTOR_CHECK-NEXT: ret void
; ;
store i32 0, i32* @x store i32 0, i32* @x
ret void ret void
} }
define void @func2() #1 { define void @func2() #1 {
; AKF_CHECK-LABEL: define {{[^@]+}}@func2 ; CHECK-LABEL: define {{[^@]+}}@func2
; AKF_CHECK-SAME: () #[[ATTR0]] { ; CHECK-SAME: () #[[ATTR0]] {
; AKF_CHECK-NEXT: call void @func1() ; CHECK-NEXT: call void @func1()
; AKF_CHECK-NEXT: ret void ; CHECK-NEXT: ret void
;
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func2
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] {
; ATTRIBUTOR_CHECK-NEXT: call void @func1()
; ATTRIBUTOR_CHECK-NEXT: ret void
; ;
call void @func1() call void @func1()
ret void ret void
} }
define amdgpu_kernel void @kernel3() #2 { define amdgpu_kernel void @kernel3() #2 {
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel3 ; CHECK-LABEL: define {{[^@]+}}@kernel3
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
; AKF_CHECK-NEXT: call void @func2() ; CHECK-NEXT: call void @func2()
; AKF_CHECK-NEXT: ret void ; CHECK-NEXT: ret void
;
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel3
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
; ATTRIBUTOR_CHECK-NEXT: call void @func2()
; ATTRIBUTOR_CHECK-NEXT: ret void
; ;
call void @func2() call void @func2()
ret void ret void
@ -59,9 +41,6 @@ define amdgpu_kernel void @kernel3() #2 {
attributes #2 = { "uniform-work-group-size"="true" } attributes #2 = { "uniform-work-group-size"="true" }
;. ;.
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" } ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;. ;.

View File

@ -1,7 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
; Function added to prevent attributor from deleting call sites. ; Function added to prevent attributor from deleting call sites.
@ -32,27 +30,17 @@ define amdgpu_kernel void @kernel1() #1 {
} }
define amdgpu_kernel void @kernel2() #2 { define amdgpu_kernel void @kernel2() #2 {
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-LABEL: define {{[^@]+}}@kernel2
; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-SAME: () #[[ATTR0]] {
; AKF_CHECK-NEXT: call void @func() ; CHECK-NEXT: call void @func()
; AKF_CHECK-NEXT: ret void ; CHECK-NEXT: ret void
;
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel2
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] {
; ATTRIBUTOR_CHECK-NEXT: call void @func()
; ATTRIBUTOR_CHECK-NEXT: ret void
; ;
call void @func() call void @func()
ret void ret void
} }
attributes #1 = { "uniform-work-group-size"="true" } attributes #1 = { "uniform-work-group-size"="true" }
;. ;.
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
;.
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;. ;.

View File

@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
@x = global i32 0 @x = global i32 0
@ -52,12 +51,3 @@ define amdgpu_kernel void @kernel2() #2 {
attributes #0 = { nounwind } attributes #0 = { nounwind }
attributes #1 = { "uniform-work-group-size"="false" } attributes #1 = { "uniform-work-group-size"="false" }
attributes #2 = { "uniform-work-group-size"="true" } attributes #2 = { "uniform-work-group-size"="true" }
;.
; AKF_CHECK: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" }
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
;.
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.

View File

@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck --allow-unused-prefixes -check-prefixes=CHECK,AKF_CHECK %s ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck --allow-unused-prefixes -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
; Test to ensure recursive functions exhibit proper behaviour ; Test to ensure recursive functions exhibit proper behaviour
; Test to generate fibonacci numbers ; Test to generate fibonacci numbers
@ -44,39 +43,22 @@ exit:
} }
define internal i32 @fib_internal(i32 %n) #0 { define internal i32 @fib_internal(i32 %n) #0 {
; AKF_CHECK-LABEL: define {{[^@]+}}@fib_internal ; CHECK-LABEL: define {{[^@]+}}@fib_internal
; AKF_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0]] { ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
; AKF_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0
; AKF_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] ; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
; AKF_CHECK: cont1: ; CHECK: cont1:
; AKF_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1
; AKF_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] ; CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
; AKF_CHECK: cont2: ; CHECK: cont2:
; AKF_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 ; CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1
; AKF_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]]) ; CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]])
; AKF_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 ; CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2
; AKF_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]]) ; CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]])
; AKF_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] ; CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
; AKF_CHECK-NEXT: ret i32 [[RETVAL]] ; CHECK-NEXT: ret i32 [[RETVAL]]
; AKF_CHECK: exit: ; CHECK: exit:
; AKF_CHECK-NEXT: ret i32 1 ; CHECK-NEXT: ret i32 1
;
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@fib_internal
; ATTRIBUTOR_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
; ATTRIBUTOR_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0
; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
; ATTRIBUTOR_CHECK: cont1:
; ATTRIBUTOR_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1
; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
; ATTRIBUTOR_CHECK: cont2:
; ATTRIBUTOR_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1
; ATTRIBUTOR_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]])
; ATTRIBUTOR_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2
; ATTRIBUTOR_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]])
; ATTRIBUTOR_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
; ATTRIBUTOR_CHECK-NEXT: ret i32 [[RETVAL]]
; ATTRIBUTOR_CHECK: exit:
; ATTRIBUTOR_CHECK-NEXT: ret i32 1
; ;
%cmp1 = icmp eq i32 %n, 0 %cmp1 = icmp eq i32 %n, 0
br i1 %cmp1, label %exit, label %cont1 br i1 %cmp1, label %exit, label %cont1
@ -99,21 +81,13 @@ exit:
} }
define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 { define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel ; CHECK-LABEL: define {{[^@]+}}@kernel
; AKF_CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR2:[0-9]+]] {
; AKF_CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5) ; CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5)
; AKF_CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5) ; CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5)
; AKF_CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4 ; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4
; AKF_CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4 ; CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4
; AKF_CHECK-NEXT: ret void ; CHECK-NEXT: ret void
;
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel
; ATTRIBUTOR_CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR2:[0-9]+]] {
; ATTRIBUTOR_CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5)
; ATTRIBUTOR_CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5)
; ATTRIBUTOR_CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4
; ATTRIBUTOR_CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4
; ATTRIBUTOR_CHECK-NEXT: ret void
; ;
%r = call i32 @fib(i32 5) %r = call i32 @fib(i32 5)
%r2 = call i32 @fib_internal(i32 5) %r2 = call i32 @fib_internal(i32 5)
@ -126,12 +100,8 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
; nounwind and readnone are added to match attributor results. ; nounwind and readnone are added to match attributor results.
attributes #0 = { nounwind readnone } attributes #0 = { nounwind readnone }
attributes #1 = { "uniform-work-group-size"="true" } attributes #1 = { "uniform-work-group-size"="true" }
;. ;.
; AKF_CHECK: attributes #[[ATTR0]] = { nounwind readnone "uniform-work-group-size"="true" } ; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } ; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;. ; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;. ;.

View File

@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -allow-unused-prefixes -check-prefixes=CHECK,AKF_CHECK %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -allow-unused-prefixes %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -allow-unused-prefixes -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
@x = global i32 0 @x = global i32 0
;. ;.
@ -49,17 +48,11 @@ define void @func3() {
} }
define amdgpu_kernel void @kernel3() #0 { define amdgpu_kernel void @kernel3() #0 {
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel3 ; CHECK-LABEL: define {{[^@]+}}@kernel3
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-SAME: () #[[ATTR0]] {
; AKF_CHECK-NEXT: call void @func2() ; CHECK-NEXT: call void @func2()
; AKF_CHECK-NEXT: call void @func3() ; CHECK-NEXT: call void @func3()
; AKF_CHECK-NEXT: ret void ; CHECK-NEXT: ret void
;
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel3
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] {
; ATTRIBUTOR_CHECK-NEXT: call void @func2()
; ATTRIBUTOR_CHECK-NEXT: call void @func3()
; ATTRIBUTOR_CHECK-NEXT: ret void
; ;
call void @func2() call void @func2()
call void @func3() call void @func3()
@ -68,8 +61,5 @@ define amdgpu_kernel void @kernel3() #0 {
attributes #0 = { "uniform-work-group-size"="false" } attributes #0 = { "uniform-work-group-size"="false" }
;. ;.
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
;.
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;. ;.