forked from OSchip/llvm-project
[AMDGPU] Fix scheduling of exp pos4
Also fix a similar issue in SIInsertWaitcnts, but I don't think that fix has any effect in practice. Differential Revision: https://reviews.llvm.org/D91290
This commit is contained in:
parent
d7d6ac5624
commit
6881a82e8c
|
@ -33,7 +33,7 @@ static bool isExport(const SUnit &SU) {
|
|||
static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
|
||||
const MachineInstr *MI = SU->getInstr();
|
||||
int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
|
||||
return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS3;
|
||||
return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
|
||||
}
|
||||
|
||||
static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
|
||||
|
|
|
@ -699,6 +699,7 @@ enum Target {
|
|||
ET_POS0 = 12,
|
||||
ET_POS3 = 15,
|
||||
ET_POS4 = 16, // GFX10+
|
||||
ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget
|
||||
ET_PRIM = 20, // GFX10+
|
||||
ET_PARAM0 = 32,
|
||||
ET_PARAM31 = 63,
|
||||
|
|
|
@ -1327,7 +1327,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
|
|||
int Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
|
||||
if (Imm >= AMDGPU::Exp::ET_PARAM0 && Imm <= AMDGPU::Exp::ET_PARAM31)
|
||||
ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_PARAM_ACCESS, Inst);
|
||||
else if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS3)
|
||||
else if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST)
|
||||
ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_POS_ACCESS, Inst);
|
||||
else
|
||||
ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_GPR_LOCK, Inst);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10 %s
|
||||
|
||||
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
||||
declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
|
||||
|
@ -546,8 +546,8 @@ end:
|
|||
; GCN-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
|
||||
; GCN-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
|
||||
; GCN-DAG: v_add_f32_e32 [[Z0:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_f32_e32 [[Z1:v[0-9]+]]
|
||||
; GCN-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
|
||||
; GCN-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
|
||||
; GCN: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
|
||||
; GCN-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
|
||||
define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
|
||||
|
@ -570,6 +570,18 @@ define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_pos4_before_param:
|
||||
; GFX10: exp pos4
|
||||
; GFX10-NOT: s_waitcnt
|
||||
; GFX10: exp param0
|
||||
define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0 {
|
||||
%z0 = fadd float %x, %y
|
||||
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
|
||||
%z1 = fsub float %y, %x
|
||||
call void @llvm.amdgcn.exp.f32(i32 16, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_export_pos_before_param_ordered:
|
||||
; GCN: exp pos0
|
||||
; GCN: exp pos1
|
||||
|
|
Loading…
Reference in New Issue