forked from OSchip/llvm-project
R600/SI: Commute instructions to enable more folding opportunities
llvm-svn: 225410
This commit is contained in:
parent
e6264cf661
commit
0599297cb4
|
@ -56,10 +56,16 @@ struct FoldCandidate {
|
||||||
uint64_t ImmToFold;
|
uint64_t ImmToFold;
|
||||||
|
|
||||||
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
|
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
|
||||||
UseMI(MI), UseOpNo(OpNo), OpToFold(FoldOp), ImmToFold(0) { }
|
UseMI(MI), UseOpNo(OpNo) {
|
||||||
|
|
||||||
FoldCandidate(MachineInstr *MI, unsigned OpNo, uint64_t Imm) :
|
if (FoldOp->isImm()) {
|
||||||
UseMI(MI), UseOpNo(OpNo), OpToFold(nullptr), ImmToFold(Imm) { }
|
OpToFold = nullptr;
|
||||||
|
ImmToFold = FoldOp->getImm();
|
||||||
|
} else {
|
||||||
|
assert(FoldOp->isReg());
|
||||||
|
OpToFold = FoldOp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool isImm() const {
|
bool isImm() const {
|
||||||
return !OpToFold;
|
return !OpToFold;
|
||||||
|
@ -119,6 +125,35 @@ static bool updateOperand(FoldCandidate &Fold,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
|
||||||
|
MachineInstr *MI, unsigned OpNo,
|
||||||
|
MachineOperand *OpToFold,
|
||||||
|
const SIInstrInfo *TII) {
|
||||||
|
if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
|
||||||
|
// Operand is not legal, so try to commute the instruction to
|
||||||
|
// see if this makes it possible to fold.
|
||||||
|
unsigned CommuteIdx0;
|
||||||
|
unsigned CommuteIdx1;
|
||||||
|
bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
|
||||||
|
|
||||||
|
if (CanCommute) {
|
||||||
|
if (CommuteIdx0 == OpNo)
|
||||||
|
OpNo = CommuteIdx1;
|
||||||
|
else if (CommuteIdx1 == OpNo)
|
||||||
|
OpNo = CommuteIdx0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!CanCommute || !TII->commuteInstruction(MI))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!TII->isOperandLegal(MI, OpNo, OpToFold))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
const SIInstrInfo *TII =
|
const SIInstrInfo *TII =
|
||||||
|
@ -140,6 +175,11 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||||
MachineOperand &OpToFold = MI.getOperand(1);
|
MachineOperand &OpToFold = MI.getOperand(1);
|
||||||
bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm();
|
bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm();
|
||||||
|
|
||||||
|
// FIXME: We could also be folding things like FrameIndexes and
|
||||||
|
// TargetIndexes.
|
||||||
|
if (!FoldingImm && !OpToFold.isReg())
|
||||||
|
continue;
|
||||||
|
|
||||||
// Folding immediates with more than one use will increase program side.
|
// Folding immediates with more than one use will increase program side.
|
||||||
// FIXME: This will also reduce register usage, which may be better
|
// FIXME: This will also reduce register usage, which may be better
|
||||||
// in some cases. A better heuristic is needed.
|
// in some cases. A better heuristic is needed.
|
||||||
|
@ -210,24 +250,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||||
UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
|
UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
||||||
if (FoldingImm) {
|
if (FoldingImm) {
|
||||||
const MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
|
MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
|
||||||
if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &ImmOp)) {
|
tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
|
||||||
FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(),
|
|
||||||
Imm.getSExtValue()));
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normal substitution with registers
|
tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
|
||||||
if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &OpToFold)) {
|
|
||||||
FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(), &OpToFold));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// FIXME: We could commute the instruction to create more opportunites
|
|
||||||
// for folding. This will only be useful if we have 32-bit instructions.
|
|
||||||
|
|
||||||
// FIXME: We could try to change the instruction from 64-bit to 32-bit
|
// FIXME: We could try to change the instruction from 64-bit to 32-bit
|
||||||
// to enable more folding opportunites. The shrink operands pass
|
// to enable more folding opportunites. The shrink operands pass
|
||||||
|
|
|
@ -709,6 +709,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
|
||||||
|
|
||||||
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
|
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
|
||||||
bool NewMI) const {
|
bool NewMI) const {
|
||||||
|
|
||||||
if (MI->getNumOperands() < 3)
|
if (MI->getNumOperands() < 3)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
|
@ -730,8 +731,9 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
|
||||||
// Make sure it's legal to commute operands for VOP2.
|
// Make sure it's legal to commute operands for VOP2.
|
||||||
if (isVOP2(MI->getOpcode()) &&
|
if (isVOP2(MI->getOpcode()) &&
|
||||||
(!isOperandLegal(MI, Src0Idx, &Src1) ||
|
(!isOperandLegal(MI, Src0Idx, &Src1) ||
|
||||||
!isOperandLegal(MI, Src1Idx, &Src0)))
|
!isOperandLegal(MI, Src1Idx, &Src0))) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
if (!Src1.isReg()) {
|
if (!Src1.isReg()) {
|
||||||
// Allow commuting instructions with Imm or FPImm operands.
|
// Allow commuting instructions with Imm or FPImm operands.
|
||||||
|
@ -1471,6 +1473,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
|
||||||
//
|
//
|
||||||
// s_sendmsg 0, s0 ; Operand defined as m0reg
|
// s_sendmsg 0, s0 ; Operand defined as m0reg
|
||||||
// ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
|
// ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
|
||||||
|
|
||||||
return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
|
return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
|
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||||
|
|
||||||
;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
|
;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
|
||||||
;CHECK: v_mul_hi_u32 v0, {{[sv][0-9]+}}, {{v[0-9]+}}
|
;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
|
||||||
;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
|
;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
|
||||||
|
|
||||||
define void @test(i32 %p) {
|
define void @test(i32 %p) {
|
||||||
|
|
|
@ -35,7 +35,7 @@ define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||||
; FUNC-LABEL: {{^}}slow_sdiv_i32_3435:
|
; FUNC-LABEL: {{^}}slow_sdiv_i32_3435:
|
||||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||||
; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
|
; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
|
||||||
; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
|
; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[MAGIC]], [[VAL]]
|
||||||
; SI: v_add_i32
|
; SI: v_add_i32
|
||||||
; SI: v_lshrrev_b32
|
; SI: v_lshrrev_b32
|
||||||
; SI: v_ashrrev_i32
|
; SI: v_ashrrev_i32
|
||||||
|
|
|
@ -41,7 +41,7 @@ define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, floa
|
||||||
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||||
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||||
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
|
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
|
||||||
; SI: buffer_store_dword [[RESULT]]
|
; SI: buffer_store_dword [[RESULT]]
|
||||||
define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
|
define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||||
%fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
|
%fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
|
||||||
|
@ -53,7 +53,7 @@ define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, floa
|
||||||
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||||
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||||
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
|
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
|
||||||
; SI: buffer_store_dword [[RESULT]]
|
; SI: buffer_store_dword [[RESULT]]
|
||||||
define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
|
define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||||
%fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
|
%fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
|
||||||
|
|
Loading…
Reference in New Issue