diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 7d358dfdce4e..eb0af5e4bd7b 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -3788,14 +3788,33 @@ the ``s_trap`` instruction with the following usage: ``queue_ptr`` terminated and its associated queue put into the error state. - ``llvm.debugtrap`` ``s_trap 0x03`` ``SGPR0-1``: If debugger not - ``queue_ptr`` installed handled - same as ``llvm.trap``. - debugger breakpoint ``s_trap 0x07`` Reserved for debugger + ``llvm.debugtrap`` ``s_trap 0x03`` - If debugger not + installed then + behaves as a + no-operation. The + trap handler is + entered and + immediately returns + to continue + execution of the + wavefront. + - If the debugger is + installed, causes + the debug trap to be + reported by the + debugger and the + wavefront is put in + the halt state until + resumed by the + debugger. + reserved ``s_trap 0x04`` Reserved. + reserved ``s_trap 0x05`` Reserved. + reserved ``s_trap 0x06`` Reserved. + debugger breakpoint ``s_trap 0x07`` Reserved for debugger breakpoints. - debugger ``s_trap 0x08`` Reserved for debugger. - debugger ``s_trap 0xfe`` Reserved for debugger. - debugger ``s_trap 0xff`` Reserved for debugger. + reserved ``s_trap 0x08`` Reserved. + reserved ``s_trap 0xfe`` Reserved. + reserved ``s_trap 0xff`` Reserved. =================== =============== =============== ======================= AMDPAL diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f21dd6285eb1..918148765f28 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3349,8 +3349,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); case ISD::TRAP: - case ISD::DEBUGTRAP: return lowerTRAP(Op, DAG); + case ISD::DEBUGTRAP: + return lowerDEBUGTRAP(Op, DAG); } return SDValue(); } @@ -4011,40 +4012,37 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); - MachineFunction &MF = DAG.getMachineFunction(); SDValue Chain = Op.getOperand(0); - unsigned TrapID = Op.getOpcode() == ISD::DEBUGTRAP ? - SISubtarget::TrapIDLLVMDebugTrap : SISubtarget::TrapIDLLVMTrap; - - if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa && - Subtarget->isTrapHandlerEnabled()) { - SIMachineFunctionInfo *Info = MF.getInfo(); - unsigned UserSGPR = Info->getQueuePtrUserSGPR(); - assert(UserSGPR != AMDGPU::NoRegister); - - SDValue QueuePtr = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); - - SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64); - - SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01, - QueuePtr, SDValue()); - - SDValue Ops[] = { - ToReg, - DAG.getTargetConstant(TrapID, SL, MVT::i16), - SGPR01, - ToReg.getValue(1) - }; - - return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); - } - - switch (TrapID) { - case SISubtarget::TrapIDLLVMTrap: + if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa || + !Subtarget->isTrapHandlerEnabled()) return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); - case SISubtarget::TrapIDLLVMDebugTrap: { + + MachineFunction &MF = DAG.getMachineFunction(); + SIMachineFunctionInfo *Info = MF.getInfo(); + unsigned UserSGPR = Info->getQueuePtrUserSGPR(); + assert(UserSGPR != AMDGPU::NoRegister); + SDValue QueuePtr = CreateLiveInRegister( + DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); + SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64); + SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01, + QueuePtr, SDValue()); + SDValue Ops[] = { + ToReg, + DAG.getTargetConstant(SISubtarget::TrapIDLLVMTrap, SL, MVT::i16), + SGPR01, + ToReg.getValue(1) + }; + return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); +} + +SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Chain = Op.getOperand(0); + MachineFunction &MF = DAG.getMachineFunction(); + + if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa || + !Subtarget->isTrapHandlerEnabled()) { DiagnosticInfoUnsupported NoTrap(MF.getFunction(), "debugtrap handler not supported", Op.getDebugLoc(), @@ -4053,11 +4051,12 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { Ctx.diagnose(NoTrap); return Chain; } - default: - llvm_unreachable("unsupported trap handler type!"); - } - return Chain; + SDValue Ops[] = { + Chain, + DAG.getTargetConstant(SISubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16) + }; + return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); } SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 3a99994c386f..ae8b19a46fc3 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -86,6 +86,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const; SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll index 04ff4c87ea77..53398d595737 100644 --- a/llvm/test/CodeGen/AMDGPU/trap.ll +++ b/llvm/test/CodeGen/AMDGPU/trap.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=HSA-TRAP %s +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s @@ -15,15 +15,15 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s declare void @llvm.trap() #0 -declare void @llvm.debugtrap() #0 +declare void @llvm.debugtrap() #1 ; MESA-TRAP: .section .AMDGPU.config ; MESA-TRAP: .long 47180 -; MESA-TRAP-NEXT: .long 204 +; MESA-TRAP-NEXT: .long 208 ; NOMESA-TRAP: .section .AMDGPU.config ; NOMESA-TRAP: .long 47180 -; NOMESA-TRAP-NEXT: .long 140 +; NOMESA-TRAP-NEXT: .long 144 ; GCN-LABEL: {{^}}hsa_trap: ; HSA-TRAP: enable_trap_handler = 1 @@ -38,24 +38,27 @@ declare void @llvm.debugtrap() #0 ; TRAP-BIT: enable_trap_handler = 1 ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @hsa_trap() { +define amdgpu_kernel void @hsa_trap(i32 addrspace(1)* nocapture readonly %arg0) { + store volatile i32 1, i32 addrspace(1)* %arg0 call void @llvm.trap() + unreachable + store volatile i32 2, i32 addrspace(1)* %arg0 ret void } ; MESA-TRAP: .section .AMDGPU.config ; MESA-TRAP: .long 47180 -; MESA-TRAP-NEXT: .long 204 +; MESA-TRAP-NEXT: .long 208 ; NOMESA-TRAP: .section .AMDGPU.config ; NOMESA-TRAP: .long 47180 -; NOMESA-TRAP-NEXT: .long 140 +; NOMESA-TRAP-NEXT: .long 144 -; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (): debugtrap handler not supported +; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (i32 addrspace(1)*): debugtrap handler not supported ; GCN-LABEL: {{^}}hsa_debugtrap: ; HSA-TRAP: enable_trap_handler = 1 -; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] ; HSA-TRAP: s_trap 3 +; HSA-TRAP: flat_store_dword v[0:1], v3 ; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction ; NO-HSA-TRAP: enable_trap_handler = 0 @@ -64,8 +67,10 @@ define amdgpu_kernel void @hsa_trap() { ; TRAP-BIT: enable_trap_handler = 1 ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @hsa_debugtrap() { +define amdgpu_kernel void @hsa_debugtrap(i32 addrspace(1)* nocapture readonly %arg0) { + store volatile i32 1, i32 addrspace(1)* %arg0 call void @llvm.debugtrap() + store volatile i32 2, i32 addrspace(1)* %arg0 ret void } @@ -75,8 +80,11 @@ define amdgpu_kernel void @hsa_debugtrap() { ; NO-TRAP-BIT: enable_trap_handler = 0 ; NO-HSA-TRAP: s_endpgm ; NO-MESA-TRAP: s_endpgm -define amdgpu_kernel void @trap() { +define amdgpu_kernel void @trap(i32 addrspace(1)* nocapture readonly %arg0) { + store volatile i32 1, i32 addrspace(1)* %arg0 call void @llvm.trap() + unreachable + store volatile i32 2, i32 addrspace(1)* %arg0 ret void } @@ -84,10 +92,10 @@ define amdgpu_kernel void @trap() { ; TRAP-BIT: enable_trap_handler = 1 ; NO-TRAP-BIT: enable_trap_handler = 0 -; HSA: BB{{[0-9]_[0-9]+]]: ; %trap +; HSA-TRAP: BB{{[0-9]_[0-9]+}}: ; %trap ; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] ; HSA-TRAP-NEXT: s_trap 2 -define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr #1 { +define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr { entry: %tmp29 = load volatile i32, i32 addrspace(1)* %arg0 %cmp = icmp eq i32 %tmp29, -1 @@ -98,7 +106,9 @@ trap: unreachable ret: + store volatile i32 3, i32 addrspace(1)* %arg0 ret void } attributes #0 = { nounwind noreturn } +attributes #1 = { nounwind }