diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0d23323c4fb0..775b35d65214 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2491,7 +2491,7 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const // given the typical code patterns. if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT || Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE || - Opcode == AMDGPU::DS_ORDERED_COUNT) + Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP) return true; if (MI.isCall() || MI.isInlineAsm()) diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll b/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll new file mode 100644 index 000000000000..bce4023316fa --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll @@ -0,0 +1,58 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=HSA-TRAP %s + +; FIXME: merge with trap.ll + +; An s_cbranch_execnz is required to avoid trapping if all lanes are 0 +; GCN-LABEL: {{^}}trap_divergent_branch: +; GCN: s_and_saveexec_b64 +; GCN: s_cbranch_execz [[ENDPGM:BB[0-9]+_[0-9]+]] +; GCN: s_branch [[TRAP:BB[0-9]+_[0-9]+]] +; GCN: [[ENDPGM]]: +; GCN-NEXT: s_endpgm +; GCN: [[TRAP]]: +; GCN: s_trap 2 +; GCN-NEXT: s_endpgm +define amdgpu_kernel void @trap_divergent_branch(i32 addrspace(1)* nocapture readonly %arg) { + %id = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id + %divergent.val = load i32, i32 addrspace(1)* %gep + %cmp = icmp eq i32 %divergent.val, 0 + br i1 %cmp, label %bb, label %end + +bb: + call void @llvm.trap() + br label %end + +end: + ret void +} + +; GCN-LABEL: {{^}}debugtrap_divergent_branch: +; GCN: s_and_saveexec_b64 +; GCN: s_cbranch_execz [[ENDPGM:BB[0-9]+_[0-9]+]] +; GCN: BB{{[0-9]+}}_{{[0-9]+}}: +; GCN: s_trap 3 +; GCN-NEXT: [[ENDPGM]]: +; GCN-NEXT: s_endpgm +define amdgpu_kernel void @debugtrap_divergent_branch(i32 addrspace(1)* nocapture readonly %arg) { + %id = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id + %divergent.val = load i32, i32 addrspace(1)* %gep + %cmp = icmp eq i32 %divergent.val, 0 + br i1 %cmp, label %bb, label %end + +bb: + call void @llvm.debugtrap() + br label %end + +end: + ret void +} + +declare void @llvm.trap() #0 +declare void @llvm.debugtrap() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #2 + +attributes #0 = { nounwind noreturn } +attributes #1 = { nounwind } +attributes #2 = { nounwind readnone speculatable }