forked from OSchip/llvm-project
AMDGPU: Use attributor to propagate uniform-work-group-size
Drop the legacy version in AMDGPUAnnotateKernelFeatures. This has the side effect of now respecting the linkage, and not changing externally visible functions.
This commit is contained in:
parent
722b8e0e5a
commit
db4963d080
|
@ -29,10 +29,7 @@ namespace {
|
|||
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
|
||||
private:
|
||||
const TargetMachine *TM = nullptr;
|
||||
SmallVector<CallGraphNode*, 8> NodeList;
|
||||
|
||||
bool processUniformWorkGroupAttribute();
|
||||
bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
|
||||
bool addFeatureAttributes(Function &F);
|
||||
|
||||
public:
|
||||
|
@ -62,56 +59,6 @@ char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
|
|||
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
|
||||
"Add AMDGPU function attributes", false, false)
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
|
||||
bool Changed = false;
|
||||
|
||||
for (auto *Node : reverse(NodeList)) {
|
||||
Function *Caller = Node->getFunction();
|
||||
|
||||
for (auto I : *Node) {
|
||||
Function *Callee = std::get<1>(I)->getFunction();
|
||||
if (Callee)
|
||||
Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
|
||||
Function &Caller, Function &Callee) {
|
||||
|
||||
// Check for externally defined function
|
||||
if (!Callee.hasExactDefinition()) {
|
||||
Callee.addFnAttr("uniform-work-group-size", "false");
|
||||
if (!Caller.hasFnAttribute("uniform-work-group-size"))
|
||||
Caller.addFnAttr("uniform-work-group-size", "false");
|
||||
|
||||
return true;
|
||||
}
|
||||
// Check if the Caller has the attribute
|
||||
if (Caller.hasFnAttribute("uniform-work-group-size")) {
|
||||
// Check if the value of the attribute is true
|
||||
if (Caller.getFnAttribute("uniform-work-group-size")
|
||||
.getValueAsString().equals("true")) {
|
||||
// Propagate the attribute to the Callee, if it does not have it
|
||||
if (!Callee.hasFnAttribute("uniform-work-group-size")) {
|
||||
Callee.addFnAttr("uniform-work-group-size", "true");
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
Callee.addFnAttr("uniform-work-group-size", "false");
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
// If the attribute is absent, set it as false
|
||||
Caller.addFnAttr("uniform-work-group-size", "false");
|
||||
Callee.addFnAttr("uniform-work-group-size", "false");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
|
||||
bool HaveStackObjects = false;
|
||||
bool Changed = false;
|
||||
|
@ -166,14 +113,6 @@ bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
|
|||
bool Changed = false;
|
||||
|
||||
for (CallGraphNode *I : SCC) {
|
||||
// Build a list of CallGraphNodes from most number of uses to least
|
||||
if (I->getNumReferences())
|
||||
NodeList.push_back(I);
|
||||
else {
|
||||
processUniformWorkGroupAttribute();
|
||||
NodeList.clear();
|
||||
}
|
||||
|
||||
Function *F = I->getFunction();
|
||||
// Ignore functions with graphics calling conventions, these are currently
|
||||
// not allowed to have kernel arguments.
|
||||
|
|
|
@ -182,7 +182,7 @@ define void @use_dispatch_id() #1 {
|
|||
|
||||
define void @use_workgroup_id_y_workgroup_id_z() #1 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y_workgroup_id_z
|
||||
; AKF_HSA-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; AKF_HSA-SAME: () #[[ATTR1]] {
|
||||
; AKF_HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
|
||||
; AKF_HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
|
||||
; AKF_HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* undef, align 4
|
||||
|
@ -471,7 +471,7 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
|
|||
|
||||
define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9
|
||||
; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
|
||||
; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
|
||||
; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
|
||||
; AKF_HSA-NEXT: ret void
|
||||
|
@ -489,7 +489,7 @@ define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
|
|||
|
||||
define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9
|
||||
; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR3]] {
|
||||
; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
|
||||
; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
|
||||
; AKF_HSA-NEXT: call void @func_indirect_use_queue_ptr()
|
||||
|
@ -587,7 +587,7 @@ define void @func_indirect_use_kernarg_segment_ptr() #1 {
|
|||
|
||||
define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
|
||||
; AKF_HSA-SAME: () #[[ATTR2]] {
|
||||
; AKF_HSA-SAME: () #[[ATTR1]] {
|
||||
; AKF_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; AKF_HSA-NEXT: ret void
|
||||
|
@ -641,7 +641,7 @@ declare void @external.func() #3
|
|||
; This function gets deleted.
|
||||
define internal void @defined.func() #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@defined.func
|
||||
; AKF_HSA-SAME: () #[[ATTR4:[0-9]+]] {
|
||||
; AKF_HSA-SAME: () #[[ATTR3:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@defined.func
|
||||
|
@ -653,7 +653,7 @@ define internal void @defined.func() #3 {
|
|||
|
||||
define void @func_call_external() #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_call_external
|
||||
; AKF_HSA-SAME: () #[[ATTR4]] {
|
||||
; AKF_HSA-SAME: () #[[ATTR3]] {
|
||||
; AKF_HSA-NEXT: call void @external.func()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
|
@ -668,7 +668,7 @@ define void @func_call_external() #3 {
|
|||
|
||||
define void @func_call_defined() #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_call_defined
|
||||
; AKF_HSA-SAME: () #[[ATTR4]] {
|
||||
; AKF_HSA-SAME: () #[[ATTR3]] {
|
||||
; AKF_HSA-NEXT: call void @defined.func()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
|
@ -682,8 +682,8 @@ define void @func_call_defined() #3 {
|
|||
}
|
||||
define void @func_call_asm() #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_call_asm
|
||||
; AKF_HSA-SAME: () #[[ATTR5:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR5]]
|
||||
; AKF_HSA-SAME: () #[[ATTR3]] {
|
||||
; AKF_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR3]]
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm
|
||||
|
@ -697,7 +697,7 @@ define void @func_call_asm() #3 {
|
|||
|
||||
define amdgpu_kernel void @kern_call_external() #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_external
|
||||
; AKF_HSA-SAME: () #[[ATTR6:[0-9]+]] {
|
||||
; AKF_HSA-SAME: () #[[ATTR4:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: call void @external.func()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
|
@ -712,7 +712,7 @@ define amdgpu_kernel void @kern_call_external() #3 {
|
|||
|
||||
define amdgpu_kernel void @func_kern_defined() #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_kern_defined
|
||||
; AKF_HSA-SAME: () #[[ATTR6]] {
|
||||
; AKF_HSA-SAME: () #[[ATTR4]] {
|
||||
; AKF_HSA-NEXT: call void @defined.func()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
|
@ -727,7 +727,7 @@ define amdgpu_kernel void @func_kern_defined() #3 {
|
|||
|
||||
define i32 @use_dispatch_ptr_ret_type() #1 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type
|
||||
; AKF_HSA-SAME: () #[[ATTR2]] {
|
||||
; AKF_HSA-SAME: () #[[ATTR1]] {
|
||||
; AKF_HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; AKF_HSA-NEXT: ret i32 0
|
||||
|
@ -745,7 +745,7 @@ define i32 @use_dispatch_ptr_ret_type() #1 {
|
|||
|
||||
define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func
|
||||
; AKF_HSA-SAME: () #[[ATTR2]] {
|
||||
; AKF_HSA-SAME: () #[[ATTR1]] {
|
||||
; AKF_HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)()
|
||||
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
|
||||
; AKF_HSA-NEXT: ret float [[FADD]]
|
||||
|
@ -763,7 +763,7 @@ define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
|
|||
|
||||
define float @func_indirect_call(float()* %fptr) #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_call
|
||||
; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR5]] {
|
||||
; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR3]] {
|
||||
; AKF_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]()
|
||||
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
|
||||
; AKF_HSA-NEXT: ret float [[FADD]]
|
||||
|
@ -782,7 +782,7 @@ define float @func_indirect_call(float()* %fptr) #3 {
|
|||
declare float @extern() #3
|
||||
define float @func_extern_call() #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call
|
||||
; AKF_HSA-SAME: () #[[ATTR4]] {
|
||||
; AKF_HSA-SAME: () #[[ATTR3]] {
|
||||
; AKF_HSA-NEXT: [[F:%.*]] = call float @extern()
|
||||
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
|
||||
; AKF_HSA-NEXT: ret float [[FADD]]
|
||||
|
@ -800,7 +800,7 @@ define float @func_extern_call() #3 {
|
|||
|
||||
define float @func_null_call(float()* %fptr) #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_null_call
|
||||
; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR5]] {
|
||||
; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR3]] {
|
||||
; AKF_HSA-NEXT: [[F:%.*]] = call float null()
|
||||
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
|
||||
; AKF_HSA-NEXT: ret float [[FADD]]
|
||||
|
@ -821,7 +821,7 @@ declare float @llvm.amdgcn.rcp.f32(float) #0
|
|||
; Calls some other recognized intrinsic
|
||||
define float @func_other_intrinsic_call(float %arg) #3 {
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_other_intrinsic_call
|
||||
; AKF_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR5]] {
|
||||
; AKF_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
|
||||
; AKF_HSA-NEXT: [[F:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[ARG]])
|
||||
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
|
||||
; AKF_HSA-NEXT: ret float [[FADD]]
|
||||
|
@ -844,12 +844,10 @@ attributes #3 = { nounwind }
|
|||
|
||||
;.
|
||||
; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
|
||||
; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR2]] = { nounwind "target-cpu"="fiji" }
|
||||
; AKF_HSA: attributes #[[ATTR3]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR4]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR5]] = { nounwind }
|
||||
; AKF_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="fiji" }
|
||||
; AKF_HSA: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx900" }
|
||||
; AKF_HSA: attributes #[[ATTR3]] = { nounwind }
|
||||
; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-calls" }
|
||||
;.
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
|
|
|
@ -1,34 +1,22 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=GCN,AKF_GCN %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck -check-prefixes=GCN,ATTRIBUTOR_GCN %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck %s
|
||||
|
||||
define internal void @indirect() {
|
||||
; AKF_GCN-LABEL: define {{[^@]+}}@indirect() {
|
||||
; AKF_GCN-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@indirect
|
||||
; ATTRIBUTOR_GCN-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; ATTRIBUTOR_GCN-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@indirect
|
||||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @direct() {
|
||||
; AKF_GCN-LABEL: define {{[^@]+}}@direct
|
||||
; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
|
||||
; AKF_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
|
||||
; AKF_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
|
||||
; AKF_GCN-NEXT: call void [[FP]]()
|
||||
; AKF_GCN-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@direct
|
||||
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
|
||||
; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
|
||||
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
|
||||
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
|
||||
; ATTRIBUTOR_GCN-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@direct
|
||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
|
||||
; CHECK-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
|
||||
; CHECK-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
|
||||
; CHECK-NEXT: call void [[FP]]()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%fptr = alloca void()*
|
||||
store void()* @indirect, void()** %fptr
|
||||
|
@ -38,18 +26,15 @@ define internal void @direct() {
|
|||
}
|
||||
|
||||
define amdgpu_kernel void @test_direct_indirect_call() {
|
||||
; GCN-LABEL: define {{[^@]+}}@test_direct_indirect_call
|
||||
; GCN-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; GCN-NEXT: call void @direct()
|
||||
; GCN-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call
|
||||
; CHECK-SAME: () #[[ATTR1]] {
|
||||
; CHECK-NEXT: call void @direct()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @direct()
|
||||
ret void
|
||||
}
|
||||
;.
|
||||
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-stack-objects" "uniform-work-group-size"="false" }
|
||||
; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
|
||||
|
||||
; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false
|
||||
; We write to a global so that the attributor don't deletes the function.
|
||||
|
@ -21,15 +20,10 @@ define void @foo() #0 {
|
|||
}
|
||||
|
||||
define amdgpu_kernel void @kernel1() #1 {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @foo()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @foo()
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @foo()
|
||||
ret void
|
||||
|
@ -37,8 +31,5 @@ define amdgpu_kernel void @kernel1() #1 {
|
|||
|
||||
attributes #0 = { "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck %s
|
||||
|
||||
;.
|
||||
; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global i32* null
|
||||
|
@ -43,29 +42,17 @@ define amdgpu_kernel void @kernel1() #0 {
|
|||
@G2 = global i32 0
|
||||
|
||||
define internal void @internal3() {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@internal3
|
||||
; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4
|
||||
; AKF_CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
|
||||
; AKF_CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
|
||||
; AKF_CHECK: 3:
|
||||
; AKF_CHECK-NEXT: call void @internal4()
|
||||
; AKF_CHECK-NEXT: call void @internal3()
|
||||
; AKF_CHECK-NEXT: br label [[TMP4]]
|
||||
; AKF_CHECK: 4:
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal3
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
|
||||
; ATTRIBUTOR_CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
|
||||
; ATTRIBUTOR_CHECK: 3:
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @internal4()
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @internal3()
|
||||
; ATTRIBUTOR_CHECK-NEXT: br label [[TMP4]]
|
||||
; ATTRIBUTOR_CHECK: 4:
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@internal3
|
||||
; CHECK-SAME: () #[[ATTR1]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
|
||||
; CHECK: 3:
|
||||
; CHECK-NEXT: call void @internal4()
|
||||
; CHECK-NEXT: call void @internal3()
|
||||
; CHECK-NEXT: br label [[TMP4]]
|
||||
; CHECK: 4:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%1 = load i32, i32* @G2, align 4
|
||||
%2 = icmp eq i32 %1, 0
|
||||
|
@ -79,30 +66,20 @@ define internal void @internal3() {
|
|||
}
|
||||
|
||||
define internal void @internal4() {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@internal4
|
||||
; AKF_CHECK-SAME: () #[[ATTR2]] {
|
||||
; AKF_CHECK-NEXT: store i32 1, i32* @G2, align 4
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal4
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: store i32 1, i32* @G2, align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@internal4
|
||||
; CHECK-SAME: () #[[ATTR1]] {
|
||||
; CHECK-NEXT: store i32 1, i32* @G2, align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
store i32 1, i32* @G2, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @internal2() {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@internal2
|
||||
; AKF_CHECK-SAME: () #[[ATTR2]] {
|
||||
; AKF_CHECK-NEXT: call void @internal3()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal2
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @internal3()
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@internal2
|
||||
; CHECK-SAME: () #[[ATTR1]] {
|
||||
; CHECK-NEXT: call void @internal3()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @internal3()
|
||||
ret void
|
||||
|
@ -119,12 +96,7 @@ define amdgpu_kernel void @kernel2() #0 {
|
|||
}
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="true" }
|
||||
|
||||
;.
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; AKF_CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck %s
|
||||
|
||||
; Test to verify if the attribute gets propagated across nested function calls
|
||||
|
||||
|
@ -8,50 +7,33 @@
|
|||
@x = global i32 0
|
||||
|
||||
;.
|
||||
; AKF_CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
|
||||
; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
|
||||
;.
|
||||
define void @func1() #0 {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@func1
|
||||
; AKF_CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func1
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@func1
|
||||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
store i32 0, i32* @x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func2() #1 {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@func2
|
||||
; AKF_CHECK-SAME: () #[[ATTR0]] {
|
||||
; AKF_CHECK-NEXT: call void @func1()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func2
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func1()
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@func2
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: call void @func1()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func1()
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel3() #2 {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @func2()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func2()
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: call void @func2()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func2()
|
||||
ret void
|
||||
|
@ -59,9 +41,6 @@ define amdgpu_kernel void @kernel3() #2 {
|
|||
|
||||
attributes #2 = { "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck %s
|
||||
|
||||
; Function added to prevent attributor from deleting call sites.
|
||||
|
||||
|
@ -32,27 +30,17 @@ define amdgpu_kernel void @kernel1() #1 {
|
|||
}
|
||||
|
||||
define amdgpu_kernel void @kernel2() #2 {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @func()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func()
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: call void @func()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #1 = { "uniform-work-group-size"="true" }
|
||||
|
||||
;.
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
|
||||
|
||||
@x = global i32 0
|
||||
|
||||
|
@ -52,12 +51,3 @@ define amdgpu_kernel void @kernel2() #2 {
|
|||
attributes #0 = { nounwind }
|
||||
attributes #1 = { "uniform-work-group-size"="false" }
|
||||
attributes #2 = { "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck --allow-unused-prefixes -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck --allow-unused-prefixes -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
|
||||
|
||||
; Test to ensure recursive functions exhibit proper behaviour
|
||||
; Test to generate fibonacci numbers
|
||||
|
@ -44,39 +43,22 @@ exit:
|
|||
}
|
||||
|
||||
define internal i32 @fib_internal(i32 %n) #0 {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@fib_internal
|
||||
; AKF_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0]] {
|
||||
; AKF_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0
|
||||
; AKF_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
|
||||
; AKF_CHECK: cont1:
|
||||
; AKF_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1
|
||||
; AKF_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
|
||||
; AKF_CHECK: cont2:
|
||||
; AKF_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1
|
||||
; AKF_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]])
|
||||
; AKF_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2
|
||||
; AKF_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]])
|
||||
; AKF_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
|
||||
; AKF_CHECK-NEXT: ret i32 [[RETVAL]]
|
||||
; AKF_CHECK: exit:
|
||||
; AKF_CHECK-NEXT: ret i32 1
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@fib_internal
|
||||
; ATTRIBUTOR_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0
|
||||
; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
|
||||
; ATTRIBUTOR_CHECK: cont1:
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1
|
||||
; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
|
||||
; ATTRIBUTOR_CHECK: cont2:
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]])
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]])
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret i32 [[RETVAL]]
|
||||
; ATTRIBUTOR_CHECK: exit:
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret i32 1
|
||||
; CHECK-LABEL: define {{[^@]+}}@fib_internal
|
||||
; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
|
||||
; CHECK: cont1:
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1
|
||||
; CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
|
||||
; CHECK: cont2:
|
||||
; CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1
|
||||
; CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]])
|
||||
; CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2
|
||||
; CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]])
|
||||
; CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
|
||||
; CHECK-NEXT: ret i32 [[RETVAL]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret i32 1
|
||||
;
|
||||
%cmp1 = icmp eq i32 %n, 0
|
||||
br i1 %cmp1, label %exit, label %cont1
|
||||
|
@ -99,21 +81,13 @@ exit:
|
|||
}
|
||||
|
||||
define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel
|
||||
; AKF_CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5)
|
||||
; AKF_CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5)
|
||||
; AKF_CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4
|
||||
; AKF_CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel
|
||||
; ATTRIBUTOR_CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5)
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5)
|
||||
; ATTRIBUTOR_CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
; CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5)
|
||||
; CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5)
|
||||
; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4
|
||||
; CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%r = call i32 @fib(i32 5)
|
||||
%r2 = call i32 @fib_internal(i32 5)
|
||||
|
@ -126,12 +100,8 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
|
|||
; nounwind and readnone are added to match attributor results.
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { "uniform-work-group-size"="true" }
|
||||
|
||||
;.
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { nounwind readnone "uniform-work-group-size"="true" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -allow-unused-prefixes -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -allow-unused-prefixes -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -allow-unused-prefixes %s
|
||||
|
||||
@x = global i32 0
|
||||
;.
|
||||
|
@ -49,17 +48,11 @@ define void @func3() {
|
|||
}
|
||||
|
||||
define amdgpu_kernel void @kernel3() #0 {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @func2()
|
||||
; AKF_CHECK-NEXT: call void @func3()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func2()
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func3()
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: call void @func2()
|
||||
; CHECK-NEXT: call void @func3()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func2()
|
||||
call void @func3()
|
||||
|
@ -68,8 +61,5 @@ define amdgpu_kernel void @kernel3() #0 {
|
|||
|
||||
attributes #0 = { "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
|
Loading…
Reference in New Issue