forked from OSchip/llvm-project
AMDGPU: Add intrinsics for DS GWS semaphore instructions
llvm-svn: 363983
This commit is contained in:
parent
d0b11698cd
commit
740322f1eb
|
@ -1371,6 +1371,43 @@ def int_amdgcn_ds_gws_barrier :
|
|||
[SDNPMemOperand]
|
||||
>;
|
||||
|
||||
// llvm.amdgcn.ds.gws.sema.v(i32 resource_id)
|
||||
def int_amdgcn_ds_gws_sema_v :
|
||||
GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_v">,
|
||||
Intrinsic<[],
|
||||
[llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly], "",
|
||||
[SDNPMemOperand]
|
||||
>;
|
||||
|
||||
// llvm.amdgcn.ds.gws.sema.br(i32 vsrc, i32 resource_id)
|
||||
def int_amdgcn_ds_gws_sema_br :
|
||||
GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_br">,
|
||||
Intrinsic<[],
|
||||
[llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly], "",
|
||||
[SDNPMemOperand]
|
||||
>;
|
||||
|
||||
// llvm.amdgcn.ds.gws.sema.p(i32 resource_id)
|
||||
def int_amdgcn_ds_gws_sema_p :
|
||||
GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_p">,
|
||||
Intrinsic<[],
|
||||
[llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly], "",
|
||||
[SDNPMemOperand]
|
||||
>;
|
||||
|
||||
// llvm.amdgcn.ds.gws.sema.release.all(i32 resource_id)
|
||||
def int_amdgcn_ds_gws_sema_release_all :
|
||||
GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">,
|
||||
Intrinsic<[],
|
||||
[llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly], "",
|
||||
[SDNPMemOperand]
|
||||
>;
|
||||
|
||||
|
||||
// Copies the source value to the destination value, with the guarantee that
|
||||
// the source value is computed as if the entire program were executed in WQM.
|
||||
def int_amdgcn_wqm : Intrinsic<[llvm_any_ty],
|
||||
|
|
|
@ -2089,10 +2089,39 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
|
|||
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
|
||||
}
|
||||
|
||||
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
|
||||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_ds_gws_init:
|
||||
return AMDGPU::DS_GWS_INIT;
|
||||
case Intrinsic::amdgcn_ds_gws_barrier:
|
||||
return AMDGPU::DS_GWS_BARRIER;
|
||||
case Intrinsic::amdgcn_ds_gws_sema_v:
|
||||
return AMDGPU::DS_GWS_SEMA_V;
|
||||
case Intrinsic::amdgcn_ds_gws_sema_br:
|
||||
return AMDGPU::DS_GWS_SEMA_BR;
|
||||
case Intrinsic::amdgcn_ds_gws_sema_p:
|
||||
return AMDGPU::DS_GWS_SEMA_P;
|
||||
case Intrinsic::amdgcn_ds_gws_sema_release_all:
|
||||
return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
|
||||
default:
|
||||
llvm_unreachable("not a gws intrinsic");
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
|
||||
if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
|
||||
!Subtarget->hasGWSSemaReleaseAll()) {
|
||||
// Let this error.
|
||||
SelectCode(N);
|
||||
return;
|
||||
}
|
||||
|
||||
// Chain, intrinsic ID, vsrc, offset
|
||||
const bool HasVSrc = N->getNumOperands() == 4;
|
||||
assert(HasVSrc || N->getNumOperands() == 3);
|
||||
|
||||
SDLoc SL(N);
|
||||
SDValue VSrc0 = N->getOperand(2);
|
||||
SDValue BaseOffset = N->getOperand(3);
|
||||
SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
|
||||
int ImmOffset = 0;
|
||||
MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
|
||||
MachineMemOperand *MMO = M->getMemOperand();
|
||||
|
@ -2128,28 +2157,37 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
|
|||
glueCopyToM0(N, SDValue(M0Base, 0));
|
||||
}
|
||||
|
||||
// The manual doesn't mention this, but it seems only v0 works.
|
||||
SDValue V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32);
|
||||
SDValue V0;
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Glue;
|
||||
if (HasVSrc) {
|
||||
SDValue VSrc0 = N->getOperand(2);
|
||||
|
||||
SDValue CopyToV0 = CurDAG->getCopyToReg(
|
||||
N->getOperand(0), SL, V0, VSrc0,
|
||||
N->getOperand(N->getNumOperands() - 1));
|
||||
// The manual doesn't mention this, but it seems only v0 works.
|
||||
V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32);
|
||||
|
||||
SDValue CopyToV0 = CurDAG->getCopyToReg(
|
||||
N->getOperand(0), SL, V0, VSrc0,
|
||||
N->getOperand(N->getNumOperands() - 1));
|
||||
Chain = CopyToV0;
|
||||
Glue = CopyToV0.getValue(1);
|
||||
}
|
||||
|
||||
SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
|
||||
|
||||
// TODO: Can this just be removed from the instruction?
|
||||
SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
|
||||
|
||||
unsigned Opc = IntrID == Intrinsic::amdgcn_ds_gws_init ?
|
||||
AMDGPU::DS_GWS_INIT : AMDGPU::DS_GWS_BARRIER;
|
||||
const unsigned Opc = gwsIntrinToOpcode(IntrID);
|
||||
SmallVector<SDValue, 5> Ops;
|
||||
if (HasVSrc)
|
||||
Ops.push_back(V0);
|
||||
Ops.push_back(OffsetField);
|
||||
Ops.push_back(GDS);
|
||||
Ops.push_back(Chain);
|
||||
|
||||
SDValue Ops[] = {
|
||||
V0,
|
||||
OffsetField,
|
||||
GDS,
|
||||
CopyToV0, // Chain
|
||||
CopyToV0.getValue(1) // Glue
|
||||
};
|
||||
if (HasVSrc)
|
||||
Ops.push_back(Glue);
|
||||
|
||||
SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
|
||||
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
|
||||
|
@ -2175,6 +2213,10 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
|
|||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_ds_gws_init:
|
||||
case Intrinsic::amdgcn_ds_gws_barrier:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_v:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_br:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_p:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_release_all:
|
||||
SelectDS_GWS(N, IntrID);
|
||||
return;
|
||||
default:
|
||||
|
|
|
@ -724,6 +724,11 @@ public:
|
|||
return getGeneration() >= GFX9;
|
||||
}
|
||||
|
||||
/// \returns if target has ds_gws_sema_release_all instruction.
|
||||
bool hasGWSSemaReleaseAll() const {
|
||||
return CIInsts;
|
||||
}
|
||||
|
||||
bool hasAddNoCarry() const {
|
||||
return AddNoCarryInsts;
|
||||
}
|
||||
|
|
|
@ -557,7 +557,9 @@ let SubtargetPredicate = isGFX7Plus in {
|
|||
defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>;
|
||||
defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>;
|
||||
|
||||
let isConvergent = 1, usesCustomInserter = 1 in {
|
||||
def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
|
||||
}
|
||||
|
||||
let mayStore = 0 in {
|
||||
defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>;
|
||||
|
|
|
@ -962,7 +962,11 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_ds_gws_init:
|
||||
case Intrinsic::amdgcn_ds_gws_barrier: {
|
||||
case Intrinsic::amdgcn_ds_gws_barrier:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_v:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_br:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_p:
|
||||
case Intrinsic::amdgcn_ds_gws_sema_release_all: {
|
||||
Info.opc = ISD::INTRINSIC_VOID;
|
||||
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
@ -2981,9 +2985,7 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
|
|||
std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, *BB, true);
|
||||
|
||||
MachineBasicBlock::iterator I = LoopBB->end();
|
||||
|
||||
MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
|
||||
assert(Src && "missing operand from GWS instruction");
|
||||
|
||||
const unsigned EncodedReg = AMDGPU::Hwreg::encodeHwreg(
|
||||
AMDGPU::Hwreg::ID_TRAPSTS, AMDGPU::Hwreg::OFFSET_MEM_VIOL, 1);
|
||||
|
@ -2995,7 +2997,7 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
|
|||
|
||||
// This is a pain, but we're not allowed to have physical register live-ins
|
||||
// yet. Insert a pair of copies if the VGPR0 hack is necessary.
|
||||
if (TargetRegisterInfo::isPhysicalRegister(Src->getReg())) {
|
||||
if (Src && TargetRegisterInfo::isPhysicalRegister(Src->getReg())) {
|
||||
unsigned Data0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
BuildMI(*BB, std::next(Prev), DL, TII->get(AMDGPU::COPY), Data0)
|
||||
.add(*Src);
|
||||
|
@ -3722,6 +3724,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
|
|||
case AMDGPU::DS_GWS_SEMA_V:
|
||||
case AMDGPU::DS_GWS_SEMA_BR:
|
||||
case AMDGPU::DS_GWS_SEMA_P:
|
||||
case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
|
||||
case AMDGPU::DS_GWS_BARRIER:
|
||||
if (getSubtarget()->hasGWSAutoReplay())
|
||||
return BB;
|
||||
|
|
|
@ -542,11 +542,6 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
|
|||
// export.)
|
||||
if (AddrOpIdx != -1) {
|
||||
setExpScore(&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore);
|
||||
} else {
|
||||
assert(Inst.getOpcode() == AMDGPU::DS_APPEND ||
|
||||
Inst.getOpcode() == AMDGPU::DS_CONSUME ||
|
||||
Inst.getOpcode() == AMDGPU::DS_GWS_INIT ||
|
||||
Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER);
|
||||
}
|
||||
|
||||
if (Inst.mayStore()) {
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %s
|
||||
|
||||
; GCN-LABEL: {{^}}gws_sema_br_offset0:
|
||||
; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
|
||||
; NOLOOP-DAG: s_mov_b32 m0, -1{{$}}
|
||||
; NOLOOP: v_mov_b32_e32 v0, [[BAR_NUM]]
|
||||
; NOLOOP: ds_gws_sema_br v0 offset:1 gds{{$}}
|
||||
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; LOOP-NEXT: ds_gws_sema_br v0 offset:1 gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
|
||||
; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
|
||||
; LOOP-NEXT: s_cbranch_scc1 [[LOOP]]
|
||||
define amdgpu_kernel void @gws_sema_br_offset0(i32 %val) #0 {
|
||||
call void @llvm.amdgcn.ds.gws.sema.br(i32 %val, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.ds.gws.sema.br(i32, i32) #0
|
||||
|
||||
attributes #0 = { convergent inaccessiblememonly nounwind }
|
|
@ -0,0 +1,26 @@
|
|||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP,GFX8 %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %s
|
||||
|
||||
; GCN-LABEL: {{^}}gws_sema_p_offset0:
|
||||
; NOLOOP-DAG: s_mov_b32 m0, -1{{$}}
|
||||
; NOLOOP: ds_gws_sema_p offset:1 gds{{$}}
|
||||
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; GFX8-NEXT: s_nop 0
|
||||
; LOOP-NEXT: ds_gws_sema_p offset:1 gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
|
||||
; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
|
||||
; LOOP-NEXT: s_cbranch_scc1 [[LOOP]]
|
||||
define amdgpu_kernel void @gws_sema_p_offset0(i32 %val) #0 {
|
||||
call void @llvm.amdgcn.ds.gws.sema.p(i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.ds.gws.sema.p(i32) #0
|
||||
|
||||
attributes #0 = { convergent inaccessiblememonly nounwind }
|
|
@ -0,0 +1,28 @@
|
|||
; RUN: not llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - < %s 2>&1 | FileCheck -enable-var-scope -check-prefix=GFX6ERR %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP,GFX8 %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %s
|
||||
|
||||
; GFX6ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.gws.sema.release.all
|
||||
|
||||
; GCN-LABEL: {{^}}gws_sema_release_all_offset0:
|
||||
; NOLOOP-DAG: s_mov_b32 m0, -1{{$}}
|
||||
; NOLOOP: ds_gws_sema_release_all offset:1 gds{{$}}
|
||||
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; GFX8-NEXT: s_nop 0
|
||||
; LOOP-NEXT: ds_gws_sema_release_all offset:1 gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
|
||||
; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
|
||||
; LOOP-NEXT: s_cbranch_scc1 [[LOOP]]
|
||||
define amdgpu_kernel void @gws_sema_release_all_offset0(i32 %val) #0 {
|
||||
call void @llvm.amdgcn.ds.gws.sema.release.all(i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.ds.gws.sema.release.all(i32) #0
|
||||
|
||||
attributes #0 = { convergent inaccessiblememonly nounwind }
|
|
@ -0,0 +1,26 @@
|
|||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP,GFX8 %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %s
|
||||
|
||||
; GCN-LABEL: {{^}}gws_sema_v_offset0:
|
||||
; NOLOOP-DAG: s_mov_b32 m0, -1{{$}}
|
||||
; NOLOOP: ds_gws_sema_v offset:1 gds{{$}}
|
||||
|
||||
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
|
||||
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
|
||||
; GFX8-NEXT: s_nop 0
|
||||
; LOOP-NEXT: ds_gws_sema_v offset:1 gds
|
||||
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; LOOP-NEXT: s_getreg_b32 [[GETREG:s[0-9]+]], hwreg(HW_REG_TRAPSTS, 8, 1)
|
||||
; LOOP-NEXT: s_cmp_lg_u32 [[GETREG]], 0
|
||||
; LOOP-NEXT: s_cbranch_scc1 [[LOOP]]
|
||||
define amdgpu_kernel void @gws_sema_v_offset0(i32 %val) #0 {
|
||||
call void @llvm.amdgcn.ds.gws.sema.v(i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.ds.gws.sema.v(i32) #0
|
||||
|
||||
attributes #0 = { convergent inaccessiblememonly nounwind }
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O2 -tail-dup-size=1000 -tail-dup-placement-threshold=1000 -enable-tail-merge=0 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -O2 -tail-dup-size=1000 -tail-dup-placement-threshold=1000 -enable-tail-merge=0 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
|
||||
; Need to to trigger tail duplication this during
|
||||
; MachineBlockPlacement, since calls aren't tail duplicated pre-RA.
|
||||
|
@ -8,6 +8,7 @@ declare void @convergent_func() #1
|
|||
declare void @llvm.amdgcn.s.barrier() #1
|
||||
declare void @llvm.amdgcn.ds.gws.init(i32, i32) #2
|
||||
declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #2
|
||||
declare void @llvm.amdgcn.ds.gws.sema.release.all(i32 %offset) #2
|
||||
|
||||
; barrier shouldn't be duplicated.
|
||||
|
||||
|
@ -148,6 +149,29 @@ ret:
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}taildup_gws_sema_release_all:
|
||||
; GCN: ds_gws_sema_release_all
|
||||
; GCN-NOT: ds_gws
|
||||
define amdgpu_kernel void @taildup_gws_sema_release_all(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond, i32 %offset) #0 {
|
||||
entry:
|
||||
br i1 %cond, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
store i32 0, i32 addrspace(1)* %a
|
||||
br label %call
|
||||
|
||||
bb2:
|
||||
store i32 1, i32 addrspace(1)* %a
|
||||
br label %call
|
||||
|
||||
call:
|
||||
call void @llvm.amdgcn.ds.gws.sema.release.all(i32 %offset)
|
||||
br label %ret
|
||||
|
||||
ret:
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind convergent }
|
||||
attributes #2 = { convergent inaccessiblememonly nounwind }
|
||||
|
|
Loading…
Reference in New Issue