diff --git a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp index 685308b5d128..6494f1daae46 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp @@ -33,7 +33,7 @@ static bool isExport(const SUnit &SU) { static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) { const MachineInstr *MI = SU->getInstr(); int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm(); - return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS3; + return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST; } static void sortChain(const SIInstrInfo *TII, SmallVector &Chain, diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index b5087d32964d..0abd96dc4607 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -698,8 +698,9 @@ enum Target { ET_NULL = 9, ET_POS0 = 12, ET_POS3 = 15, - ET_POS4 = 16, // GFX10+ - ET_PRIM = 20, // GFX10+ + ET_POS4 = 16, // GFX10+ + ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget + ET_PRIM = 20, // GFX10+ ET_PARAM0 = 32, ET_PARAM31 = 63, }; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 5b4ca5bda247..6f1dfdac2c6f 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1327,7 +1327,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, int Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm(); if (Imm >= AMDGPU::Exp::ET_PARAM0 && Imm <= AMDGPU::Exp::ET_PARAM31) ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_PARAM_ACCESS, Inst); - else if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS3) + else if (Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST) ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_POS_ACCESS, Inst); else ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_GPR_LOCK, Inst); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll index 9a62ca5db089..ede466976a3a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10 %s declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1 @@ -546,8 +546,8 @@ end: ; GCN-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0 ; GCN-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0 ; GCN-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1 -; GCN-DAG: v_add_f32_e32 [[Z0:v[0-9]+]] -; GCN-DAG: v_sub_f32_e32 [[Z1:v[0-9]+]] +; GCN-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]] +; GCN-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]] ; GCN: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}} ; GCN-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}} define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 { @@ -570,6 +570,18 @@ define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 { ret void } +; GCN-LABEL: {{^}}test_export_pos4_before_param: +; GFX10: exp pos4 +; GFX10-NOT: s_waitcnt +; GFX10: exp param0 +define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0 { + %z0 = fadd float %x, %y + call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false) + %z1 = fsub float %y, %x + call void @llvm.amdgcn.exp.f32(i32 16, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false) + ret void +} + ; GCN-LABEL: {{^}}test_export_pos_before_param_ordered: ; GCN: exp pos0 ; GCN: exp pos1