diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index a56758938531..0436d77a3117 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -29,10 +29,7 @@ namespace { class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { private: const TargetMachine *TM = nullptr; - SmallVector NodeList; - bool processUniformWorkGroupAttribute(); - bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee); bool addFeatureAttributes(Function &F); public: @@ -62,56 +59,6 @@ char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, "Add AMDGPU function attributes", false, false) -bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() { - bool Changed = false; - - for (auto *Node : reverse(NodeList)) { - Function *Caller = Node->getFunction(); - - for (auto I : *Node) { - Function *Callee = std::get<1>(I)->getFunction(); - if (Callee) - Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee); - } - } - - return Changed; -} - -bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute( - Function &Caller, Function &Callee) { - - // Check for externally defined function - if (!Callee.hasExactDefinition()) { - Callee.addFnAttr("uniform-work-group-size", "false"); - if (!Caller.hasFnAttribute("uniform-work-group-size")) - Caller.addFnAttr("uniform-work-group-size", "false"); - - return true; - } - // Check if the Caller has the attribute - if (Caller.hasFnAttribute("uniform-work-group-size")) { - // Check if the value of the attribute is true - if (Caller.getFnAttribute("uniform-work-group-size") - .getValueAsString().equals("true")) { - // Propagate the attribute to the Callee, if it does not have it - if (!Callee.hasFnAttribute("uniform-work-group-size")) { - Callee.addFnAttr("uniform-work-group-size", "true"); - return true; - } - } else { - Callee.addFnAttr("uniform-work-group-size", "false"); - return true; - } - } else { - // If the attribute is absent, set it as false - Caller.addFnAttr("uniform-work-group-size", "false"); - Callee.addFnAttr("uniform-work-group-size", "false"); - return true; - } - return false; -} - bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { bool HaveStackObjects = false; bool Changed = false; @@ -166,14 +113,6 @@ bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { bool Changed = false; for (CallGraphNode *I : SCC) { - // Build a list of CallGraphNodes from most number of uses to least - if (I->getNumReferences()) - NodeList.push_back(I); - else { - processUniformWorkGroupAttribute(); - NodeList.clear(); - } - Function *F = I->getFunction(); // Ignore functions with graphics calling conventions, these are currently // not allowed to have kernel arguments. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index e98642d922a7..d6e87b37cf52 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -182,7 +182,7 @@ define void @use_dispatch_id() #1 { define void @use_workgroup_id_y_workgroup_id_z() #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y_workgroup_id_z -; AKF_HSA-SAME: () #[[ATTR2:[0-9]+]] { +; AKF_HSA-SAME: () #[[ATTR1]] { ; AKF_HSA-NEXT: [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() ; AKF_HSA-NEXT: [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() ; AKF_HSA-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* undef, align 4 @@ -471,7 +471,7 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 { ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9 -; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { +; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { ; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* ; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 ; AKF_HSA-NEXT: ret void @@ -489,7 +489,7 @@ define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 { define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 { ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9 -; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR3]] { +; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR2]] { ; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* ; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 ; AKF_HSA-NEXT: call void @func_indirect_use_queue_ptr() @@ -587,7 +587,7 @@ define void @func_indirect_use_kernarg_segment_ptr() #1 { define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr -; AKF_HSA-SAME: () #[[ATTR2]] { +; AKF_HSA-SAME: () #[[ATTR1]] { ; AKF_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() ; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 ; AKF_HSA-NEXT: ret void @@ -641,7 +641,7 @@ declare void @external.func() #3 ; This function gets deleted. define internal void @defined.func() #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@defined.func -; AKF_HSA-SAME: () #[[ATTR4:[0-9]+]] { +; AKF_HSA-SAME: () #[[ATTR3:[0-9]+]] { ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@defined.func @@ -653,7 +653,7 @@ define internal void @defined.func() #3 { define void @func_call_external() #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_external -; AKF_HSA-SAME: () #[[ATTR4]] { +; AKF_HSA-SAME: () #[[ATTR3]] { ; AKF_HSA-NEXT: call void @external.func() ; AKF_HSA-NEXT: ret void ; @@ -668,7 +668,7 @@ define void @func_call_external() #3 { define void @func_call_defined() #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_defined -; AKF_HSA-SAME: () #[[ATTR4]] { +; AKF_HSA-SAME: () #[[ATTR3]] { ; AKF_HSA-NEXT: call void @defined.func() ; AKF_HSA-NEXT: ret void ; @@ -682,8 +682,8 @@ define void @func_call_defined() #3 { } define void @func_call_asm() #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_asm -; AKF_HSA-SAME: () #[[ATTR5:[0-9]+]] { -; AKF_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR5]] +; AKF_HSA-SAME: () #[[ATTR3]] { +; AKF_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR3]] ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm @@ -697,7 +697,7 @@ define void @func_call_asm() #3 { define amdgpu_kernel void @kern_call_external() #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_external -; AKF_HSA-SAME: () #[[ATTR6:[0-9]+]] { +; AKF_HSA-SAME: () #[[ATTR4:[0-9]+]] { ; AKF_HSA-NEXT: call void @external.func() ; AKF_HSA-NEXT: ret void ; @@ -712,7 +712,7 @@ define amdgpu_kernel void @kern_call_external() #3 { define amdgpu_kernel void @func_kern_defined() #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_kern_defined -; AKF_HSA-SAME: () #[[ATTR6]] { +; AKF_HSA-SAME: () #[[ATTR4]] { ; AKF_HSA-NEXT: call void @defined.func() ; AKF_HSA-NEXT: ret void ; @@ -727,7 +727,7 @@ define amdgpu_kernel void @func_kern_defined() #3 { define i32 @use_dispatch_ptr_ret_type() #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type -; AKF_HSA-SAME: () #[[ATTR2]] { +; AKF_HSA-SAME: () #[[ATTR1]] { ; AKF_HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() ; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 ; AKF_HSA-NEXT: ret i32 0 @@ -745,7 +745,7 @@ define i32 @use_dispatch_ptr_ret_type() #1 { define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func -; AKF_HSA-SAME: () #[[ATTR2]] { +; AKF_HSA-SAME: () #[[ATTR1]] { ; AKF_HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)() ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; AKF_HSA-NEXT: ret float [[FADD]] @@ -763,7 +763,7 @@ define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 { define float @func_indirect_call(float()* %fptr) #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_call -; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR5]] { +; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR3]] { ; AKF_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]() ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; AKF_HSA-NEXT: ret float [[FADD]] @@ -782,7 +782,7 @@ define float @func_indirect_call(float()* %fptr) #3 { declare float @extern() #3 define float @func_extern_call() #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call -; AKF_HSA-SAME: () #[[ATTR4]] { +; AKF_HSA-SAME: () #[[ATTR3]] { ; AKF_HSA-NEXT: [[F:%.*]] = call float @extern() ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; AKF_HSA-NEXT: ret float [[FADD]] @@ -800,7 +800,7 @@ define float @func_extern_call() #3 { define float @func_null_call(float()* %fptr) #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_null_call -; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR5]] { +; AKF_HSA-SAME: (float ()* [[FPTR:%.*]]) #[[ATTR3]] { ; AKF_HSA-NEXT: [[F:%.*]] = call float null() ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; AKF_HSA-NEXT: ret float [[FADD]] @@ -821,7 +821,7 @@ declare float @llvm.amdgcn.rcp.f32(float) #0 ; Calls some other recognized intrinsic define float @func_other_intrinsic_call(float %arg) #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_other_intrinsic_call -; AKF_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR5]] { +; AKF_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR3]] { ; AKF_HSA-NEXT: [[F:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[ARG]]) ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 ; AKF_HSA-NEXT: ret float [[FADD]] @@ -844,12 +844,10 @@ attributes #3 = { nounwind } ;. ; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } -; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" } -; AKF_HSA: attributes #[[ATTR2]] = { nounwind "target-cpu"="fiji" } -; AKF_HSA: attributes #[[ATTR3]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; AKF_HSA: attributes #[[ATTR4]] = { nounwind "uniform-work-group-size"="false" } -; AKF_HSA: attributes #[[ATTR5]] = { nounwind } -; AKF_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="fiji" } +; AKF_HSA: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx900" } +; AKF_HSA: attributes #[[ATTR3]] = { nounwind } +; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-calls" } ;. ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll index d09dbb183a2f..da8fdb8acdce 100644 --- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -1,34 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=GCN,AKF_GCN %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck -check-prefixes=GCN,ATTRIBUTOR_GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck %s define internal void @indirect() { -; AKF_GCN-LABEL: define {{[^@]+}}@indirect() { -; AKF_GCN-NEXT: ret void -; -; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@indirect -; ATTRIBUTOR_GCN-SAME: () #[[ATTR0:[0-9]+]] { -; ATTRIBUTOR_GCN-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@indirect +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void ; ret void } define internal void @direct() { -; AKF_GCN-LABEL: define {{[^@]+}}@direct -; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] { -; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 -; AKF_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 -; AKF_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 -; AKF_GCN-NEXT: call void [[FP]]() -; AKF_GCN-NEXT: ret void -; -; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@direct -; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { -; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 -; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 -; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 -; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() -; ATTRIBUTOR_GCN-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@direct +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 +; CHECK-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 +; CHECK-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 +; CHECK-NEXT: call void [[FP]]() +; CHECK-NEXT: ret void ; %fptr = alloca void()* store void()* @indirect, void()** %fptr @@ -38,18 +26,15 @@ define internal void @direct() { } define amdgpu_kernel void @test_direct_indirect_call() { -; GCN-LABEL: define {{[^@]+}}@test_direct_indirect_call -; GCN-SAME: () #[[ATTR1:[0-9]+]] { -; GCN-NEXT: call void @direct() -; GCN-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @direct() +; CHECK-NEXT: ret void ; call void @direct() ret void } ;. -; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-stack-objects" "uniform-work-group-size"="false" } -; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } -;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll index 1fad09a1d336..003d4c74c0fc 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s ; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false ; We write to a global so that the attributor don't deletes the function. @@ -21,15 +20,10 @@ define void @foo() #0 { } define amdgpu_kernel void @kernel1() #1 { -; AKF_CHECK-LABEL: define {{[^@]+}}@kernel1 -; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; AKF_CHECK-NEXT: call void @foo() -; AKF_CHECK-NEXT: ret void -; -; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel1 -; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] { -; ATTRIBUTOR_CHECK-NEXT: call void @foo() -; ATTRIBUTOR_CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@kernel1 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: ret void ; call void @foo() ret void @@ -37,8 +31,5 @@ define amdgpu_kernel void @kernel1() #1 { attributes #0 = { "uniform-work-group-size"="true" } ;. -; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } -; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } -;. -; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll index 3cd24225ff47..e9179b0213b9 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck %s ;. ; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global i32* null @@ -43,29 +42,17 @@ define amdgpu_kernel void @kernel1() #0 { @G2 = global i32 0 define internal void @internal3() { -; AKF_CHECK-LABEL: define {{[^@]+}}@internal3 -; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] { -; AKF_CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4 -; AKF_CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; AKF_CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; AKF_CHECK: 3: -; AKF_CHECK-NEXT: call void @internal4() -; AKF_CHECK-NEXT: call void @internal3() -; AKF_CHECK-NEXT: br label [[TMP4]] -; AKF_CHECK: 4: -; AKF_CHECK-NEXT: ret void -; -; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal3 -; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4 -; ATTRIBUTOR_CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; ATTRIBUTOR_CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; ATTRIBUTOR_CHECK: 3: -; ATTRIBUTOR_CHECK-NEXT: call void @internal4() -; ATTRIBUTOR_CHECK-NEXT: call void @internal3() -; ATTRIBUTOR_CHECK-NEXT: br label [[TMP4]] -; ATTRIBUTOR_CHECK: 4: -; ATTRIBUTOR_CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@internal3 +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; CHECK: 3: +; CHECK-NEXT: call void @internal4() +; CHECK-NEXT: call void @internal3() +; CHECK-NEXT: br label [[TMP4]] +; CHECK: 4: +; CHECK-NEXT: ret void ; %1 = load i32, i32* @G2, align 4 %2 = icmp eq i32 %1, 0 @@ -79,30 +66,20 @@ define internal void @internal3() { } define internal void @internal4() { -; AKF_CHECK-LABEL: define {{[^@]+}}@internal4 -; AKF_CHECK-SAME: () #[[ATTR2]] { -; AKF_CHECK-NEXT: store i32 1, i32* @G2, align 4 -; AKF_CHECK-NEXT: ret void -; -; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal4 -; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_CHECK-NEXT: store i32 1, i32* @G2, align 4 -; ATTRIBUTOR_CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@internal4 +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: store i32 1, i32* @G2, align 4 +; CHECK-NEXT: ret void ; store i32 1, i32* @G2, align 4 ret void } define internal void @internal2() { -; AKF_CHECK-LABEL: define {{[^@]+}}@internal2 -; AKF_CHECK-SAME: () #[[ATTR2]] { -; AKF_CHECK-NEXT: call void @internal3() -; AKF_CHECK-NEXT: ret void -; -; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal2 -; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1]] { -; ATTRIBUTOR_CHECK-NEXT: call void @internal3() -; ATTRIBUTOR_CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@internal2 +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @internal3() +; CHECK-NEXT: ret void ; call void @internal3() ret void @@ -119,12 +96,7 @@ define amdgpu_kernel void @kernel2() #0 { } attributes #0 = { "uniform-work-group-size"="true" } - ;. -; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } -; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } -; AKF_CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" } -;. -; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll index 6b68250ff60a..292022039e5d 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_CHECK %s -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck %s ; Test to verify if the attribute gets propagated across nested function calls @@ -8,50 +7,33 @@ @x = global i32 0 ;. -; AKF_CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 -;. -; ATTRIBUTOR_CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 ;. define void @func1() #0 { -; AKF_CHECK-LABEL: define {{[^@]+}}@func1 -; AKF_CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; AKF_CHECK-NEXT: store i32 0, i32* @x, align 4 -; AKF_CHECK-NEXT: ret void -; -; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func1 -; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; ATTRIBUTOR_CHECK-NEXT: store i32 0, i32* @x, align 4 -; ATTRIBUTOR_CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@func1 +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: store i32 0, i32* @x, align 4 +; CHECK-NEXT: ret void ; store i32 0, i32* @x ret void } define void @func2() #1 { -; AKF_CHECK-LABEL: define {{[^@]+}}@func2 -; AKF_CHECK-SAME: () #[[ATTR0]] { -; AKF_CHECK-NEXT: call void @func1() -; AKF_CHECK-NEXT: ret void -; -; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func2 -; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] { -; ATTRIBUTOR_CHECK-NEXT: call void @func1() -; ATTRIBUTOR_CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@func2 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @func1() +; CHECK-NEXT: ret void ; call void @func1() ret void } define amdgpu_kernel void @kernel3() #2 { -; AKF_CHECK-LABEL: define {{[^@]+}}@kernel3 -; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; AKF_CHECK-NEXT: call void @func2() -; AKF_CHECK-NEXT: ret void -; -; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel3 -; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; ATTRIBUTOR_CHECK-NEXT: call void @func2() -; ATTRIBUTOR_CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@kernel3 +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: call void @func2() +; CHECK-NEXT: ret void ; call void @func2() ret void @@ -59,9 +41,6 @@ define amdgpu_kernel void @kernel3() #2 { attributes #2 = { "uniform-work-group-size"="true" } ;. -; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" } -; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } -;. -; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll index 22bdd0df072e..cd888064cada 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals - -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck %s ; Function added to prevent attributor from deleting call sites. @@ -32,27 +30,17 @@ define amdgpu_kernel void @kernel1() #1 { } define amdgpu_kernel void @kernel2() #2 { -; AKF_CHECK-LABEL: define {{[^@]+}}@kernel2 -; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] { -; AKF_CHECK-NEXT: call void @func() -; AKF_CHECK-NEXT: ret void -; -; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel2 -; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] { -; ATTRIBUTOR_CHECK-NEXT: call void @func() -; ATTRIBUTOR_CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@kernel2 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @func() +; CHECK-NEXT: ret void ; call void @func() ret void } attributes #1 = { "uniform-work-group-size"="true" } - ;. -; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } -; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } -; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" } -;. -; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll index a25484608cff..60c42b0096b0 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s @x = global i32 0 @@ -52,12 +51,3 @@ define amdgpu_kernel void @kernel2() #2 { attributes #0 = { nounwind } attributes #1 = { "uniform-work-group-size"="false" } attributes #2 = { "uniform-work-group-size"="true" } -;. -; AKF_CHECK: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" } -; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } -; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } -;. -; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } -;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll index 123dc50d6c82..3ac9f0675bfe 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck --allow-unused-prefixes -check-prefixes=CHECK,AKF_CHECK %s -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck --allow-unused-prefixes -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s ; Test to ensure recursive functions exhibit proper behaviour ; Test to generate fibonacci numbers @@ -44,39 +43,22 @@ exit: } define internal i32 @fib_internal(i32 %n) #0 { -; AKF_CHECK-LABEL: define {{[^@]+}}@fib_internal -; AKF_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0]] { -; AKF_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 -; AKF_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] -; AKF_CHECK: cont1: -; AKF_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 -; AKF_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] -; AKF_CHECK: cont2: -; AKF_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 -; AKF_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]]) -; AKF_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 -; AKF_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]]) -; AKF_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] -; AKF_CHECK-NEXT: ret i32 [[RETVAL]] -; AKF_CHECK: exit: -; AKF_CHECK-NEXT: ret i32 1 -; -; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@fib_internal -; ATTRIBUTOR_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] { -; ATTRIBUTOR_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 -; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] -; ATTRIBUTOR_CHECK: cont1: -; ATTRIBUTOR_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 -; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] -; ATTRIBUTOR_CHECK: cont2: -; ATTRIBUTOR_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 -; ATTRIBUTOR_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]]) -; ATTRIBUTOR_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 -; ATTRIBUTOR_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]]) -; ATTRIBUTOR_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] -; ATTRIBUTOR_CHECK-NEXT: ret i32 [[RETVAL]] -; ATTRIBUTOR_CHECK: exit: -; ATTRIBUTOR_CHECK-NEXT: ret i32 1 +; CHECK-LABEL: define {{[^@]+}}@fib_internal +; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] +; CHECK: cont1: +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] +; CHECK: cont2: +; CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 +; CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]]) +; CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 +; CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]]) +; CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] +; CHECK-NEXT: ret i32 [[RETVAL]] +; CHECK: exit: +; CHECK-NEXT: ret i32 1 ; %cmp1 = icmp eq i32 %n, 0 br i1 %cmp1, label %exit, label %cont1 @@ -99,21 +81,13 @@ exit: } define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 { -; AKF_CHECK-LABEL: define {{[^@]+}}@kernel -; AKF_CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR1:[0-9]+]] { -; AKF_CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5) -; AKF_CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5) -; AKF_CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4 -; AKF_CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4 -; AKF_CHECK-NEXT: ret void -; -; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel -; ATTRIBUTOR_CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR2:[0-9]+]] { -; ATTRIBUTOR_CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5) -; ATTRIBUTOR_CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5) -; ATTRIBUTOR_CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4 -; ATTRIBUTOR_CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4 -; ATTRIBUTOR_CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@kernel +; CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5) +; CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5) +; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4 +; CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4 +; CHECK-NEXT: ret void ; %r = call i32 @fib(i32 5) %r2 = call i32 @fib_internal(i32 5) @@ -126,12 +100,8 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 { ; nounwind and readnone are added to match attributor results. attributes #0 = { nounwind readnone } attributes #1 = { "uniform-work-group-size"="true" } - ;. -; AKF_CHECK: attributes #[[ATTR0]] = { nounwind readnone "uniform-work-group-size"="true" } -; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } -;. -; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } -; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll index b52c90311ac4..1381f871369d 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -allow-unused-prefixes -check-prefixes=CHECK,AKF_CHECK %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -allow-unused-prefixes -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -allow-unused-prefixes %s @x = global i32 0 ;. @@ -49,17 +48,11 @@ define void @func3() { } define amdgpu_kernel void @kernel3() #0 { -; AKF_CHECK-LABEL: define {{[^@]+}}@kernel3 -; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; AKF_CHECK-NEXT: call void @func2() -; AKF_CHECK-NEXT: call void @func3() -; AKF_CHECK-NEXT: ret void -; -; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel3 -; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] { -; ATTRIBUTOR_CHECK-NEXT: call void @func2() -; ATTRIBUTOR_CHECK-NEXT: call void @func3() -; ATTRIBUTOR_CHECK-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@kernel3 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @func2() +; CHECK-NEXT: call void @func3() +; CHECK-NEXT: ret void ; call void @func2() call void @func3() @@ -68,8 +61,5 @@ define amdgpu_kernel void @kernel3() #0 { attributes #0 = { "uniform-work-group-size"="false" } ;. -; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } -; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } -;. -; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;.