forked from OSchip/llvm-project
AMDGPU: Shrink insts to fold immediates
This needs to be done in the SSA fold operands pass to be effective, so there is a bit of overlap with SIShrinkInstructions but I don't think this is practically avoidable. llvm-svn: 340859
This commit is contained in:
parent
ec71e018d6
commit
de6c421cc8
|
@ -35,13 +35,16 @@ struct FoldCandidate {
|
|||
uint64_t ImmToFold;
|
||||
int FrameIndexToFold;
|
||||
};
|
||||
int ShrinkOpcode;
|
||||
unsigned char UseOpNo;
|
||||
MachineOperand::MachineOperandType Kind;
|
||||
bool Commuted;
|
||||
|
||||
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
|
||||
bool Commuted_ = false) :
|
||||
UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()),
|
||||
bool Commuted_ = false,
|
||||
int ShrinkOp = -1) :
|
||||
UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
|
||||
Kind(FoldOp->getType()),
|
||||
Commuted(Commuted_) {
|
||||
if (FoldOp->isImm()) {
|
||||
ImmToFold = FoldOp->getImm();
|
||||
|
@ -68,6 +71,14 @@ struct FoldCandidate {
|
|||
bool isCommuted() const {
|
||||
return Commuted;
|
||||
}
|
||||
|
||||
bool needsShrink() const {
|
||||
return ShrinkOpcode != -1;
|
||||
}
|
||||
|
||||
int getShrinkOpcode() const {
|
||||
return ShrinkOpcode;
|
||||
}
|
||||
};
|
||||
|
||||
class SIFoldOperands : public MachineFunctionPass {
|
||||
|
@ -154,6 +165,7 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
|
|||
}
|
||||
|
||||
static bool updateOperand(FoldCandidate &Fold,
|
||||
const SIInstrInfo &TII,
|
||||
const TargetRegisterInfo &TRI) {
|
||||
MachineInstr *MI = Fold.UseMI;
|
||||
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
|
||||
|
@ -189,10 +201,42 @@ static bool updateOperand(FoldCandidate &Fold,
|
|||
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
|
||||
}
|
||||
}
|
||||
|
||||
if (Fold.needsShrink()) {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
|
||||
if (Liveness != MachineBasicBlock::LQR_Dead)
|
||||
return false;
|
||||
|
||||
int Op32 = Fold.getShrinkOpcode();
|
||||
MachineOperand &Dst0 = MI->getOperand(0);
|
||||
MachineOperand &Dst1 = MI->getOperand(1);
|
||||
assert(Dst0.isDef() && Dst1.isDef());
|
||||
|
||||
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
|
||||
unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
|
||||
const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
|
||||
unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
|
||||
|
||||
MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
|
||||
|
||||
// Keep the old instruction around to avoid breaking iterators, but
|
||||
// replace the outputs with dummy registers.
|
||||
Dst0.setReg(NewReg0);
|
||||
Dst1.setReg(NewReg1);
|
||||
|
||||
if (Fold.isCommuted())
|
||||
TII.commuteInstruction(*Inst32, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
Old.ChangeToImmediate(Fold.ImmToFold);
|
||||
return true;
|
||||
}
|
||||
|
||||
assert(!Fold.needsShrink() && "not handled");
|
||||
|
||||
if (Fold.isFI()) {
|
||||
Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
|
||||
return true;
|
||||
|
@ -261,6 +305,8 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
|||
if (isUseMIInFoldList(FoldList, MI))
|
||||
return false;
|
||||
|
||||
unsigned CommuteOpNo = OpNo;
|
||||
|
||||
// Operand is not legal, so try to commute the instruction to
|
||||
// see if this makes it possible to fold.
|
||||
unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
|
||||
|
@ -269,11 +315,12 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
|||
|
||||
if (CanCommute) {
|
||||
if (CommuteIdx0 == OpNo)
|
||||
OpNo = CommuteIdx1;
|
||||
CommuteOpNo = CommuteIdx1;
|
||||
else if (CommuteIdx1 == OpNo)
|
||||
OpNo = CommuteIdx0;
|
||||
CommuteOpNo = CommuteIdx0;
|
||||
}
|
||||
|
||||
|
||||
// One of operands might be an Imm operand, and OpNo may refer to it after
|
||||
// the call of commuteInstruction() below. Such situations are avoided
|
||||
// here explicitly as OpNo must be a register operand to be a candidate
|
||||
|
@ -286,12 +333,39 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
|||
!TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
|
||||
return false;
|
||||
|
||||
if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
|
||||
if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
|
||||
if ((Opc == AMDGPU::V_ADD_I32_e64 ||
|
||||
Opc == AMDGPU::V_SUB_I32_e64 ||
|
||||
Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
|
||||
OpToFold->isImm()) {
|
||||
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
|
||||
|
||||
// Verify the other operand is a VGPR, otherwise we would violate the
|
||||
// constant bus restriction.
|
||||
unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
|
||||
MachineOperand &OtherOp = MI->getOperand(OtherIdx);
|
||||
if (!OtherOp.isReg() ||
|
||||
!TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
|
||||
return false;
|
||||
|
||||
const MachineOperand &SDst = MI->getOperand(1);
|
||||
assert(SDst.isDef());
|
||||
|
||||
// TODO: Handle cases with a used carry.
|
||||
if (!MRI.use_nodbg_empty(SDst.getReg()))
|
||||
return false;
|
||||
|
||||
int Op32 = AMDGPU::getVOPe32(Opc);
|
||||
FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
|
||||
Op32));
|
||||
return true;
|
||||
}
|
||||
|
||||
TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
|
||||
return false;
|
||||
}
|
||||
|
||||
FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true));
|
||||
FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -757,7 +831,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
|
|||
Copy->addImplicitDefUseOperands(*MF);
|
||||
|
||||
for (FoldCandidate &Fold : FoldList) {
|
||||
if (updateOperand(Fold, *TRI)) {
|
||||
if (updateOperand(Fold, *TII, *TRI)) {
|
||||
// Clear kill flags.
|
||||
if (Fold.isReg()) {
|
||||
assert(Fold.OpToFold && Fold.OpToFold->isReg());
|
||||
|
|
|
@ -2572,7 +2572,60 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
|
|||
// Check output modifiers
|
||||
return !hasModifiersSet(MI, AMDGPU::OpName::omod) &&
|
||||
!hasModifiersSet(MI, AMDGPU::OpName::clamp);
|
||||
}
|
||||
|
||||
// Set VCC operand with all flags from \p Orig, except for setting it as
|
||||
// implicit.
|
||||
static void copyFlagsToImplicitVCC(MachineInstr &MI,
|
||||
const MachineOperand &Orig) {
|
||||
|
||||
for (MachineOperand &Use : MI.implicit_operands()) {
|
||||
if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
|
||||
Use.setIsUndef(Orig.isUndef());
|
||||
Use.setIsKill(Orig.isKill());
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
|
||||
unsigned Op32) const {
|
||||
MachineBasicBlock *MBB = MI.getParent();;
|
||||
MachineInstrBuilder Inst32 =
|
||||
BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
|
||||
|
||||
// Add the dst operand if the 32-bit encoding also has an explicit $vdst.
|
||||
// For VOPC instructions, this is replaced by an implicit def of vcc.
|
||||
int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
|
||||
if (Op32DstIdx != -1) {
|
||||
// dst
|
||||
Inst32.add(MI.getOperand(0));
|
||||
} else {
|
||||
assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
|
||||
"Unexpected case");
|
||||
}
|
||||
|
||||
Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0));
|
||||
|
||||
const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
|
||||
if (Src1)
|
||||
Inst32.add(*Src1);
|
||||
|
||||
const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
|
||||
|
||||
if (Src2) {
|
||||
int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
|
||||
if (Op32Src2Idx != -1) {
|
||||
Inst32.add(*Src2);
|
||||
} else {
|
||||
// In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
|
||||
// replaced with an implicit read of vcc. This was already added
|
||||
// during the initial BuildMI, so find it to preserve the flags.
|
||||
copyFlagsToImplicitVCC(*Inst32, *Src2);
|
||||
}
|
||||
}
|
||||
|
||||
return Inst32;
|
||||
}
|
||||
|
||||
bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
|
||||
|
|
|
@ -694,6 +694,9 @@ public:
|
|||
bool canShrink(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI) const;
|
||||
|
||||
MachineInstr *buildShrunkInst(MachineInstr &MI,
|
||||
unsigned NewOpcode) const;
|
||||
|
||||
bool verifyInstruction(const MachineInstr &MI,
|
||||
StringRef &ErrInfo) const override;
|
||||
|
||||
|
|
|
@ -120,19 +120,6 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
|
|||
return false;
|
||||
}
|
||||
|
||||
// Copy MachineOperand with all flags except setting it as implicit.
|
||||
static void copyFlagsToImplicitVCC(MachineInstr &MI,
|
||||
const MachineOperand &Orig) {
|
||||
|
||||
for (MachineOperand &Use : MI.implicit_operands()) {
|
||||
if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
|
||||
Use.setIsUndef(Orig.isUndef());
|
||||
Use.setIsKill(Orig.isKill());
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
|
||||
return isInt<16>(Src.getImm()) &&
|
||||
!TII->isInlineConstant(*Src.getParent(),
|
||||
|
@ -434,40 +421,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
|
|||
// We can shrink this instruction
|
||||
LLVM_DEBUG(dbgs() << "Shrinking " << MI);
|
||||
|
||||
MachineInstrBuilder Inst32 =
|
||||
BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
|
||||
|
||||
// Add the dst operand if the 32-bit encoding also has an explicit $vdst.
|
||||
// For VOPC instructions, this is replaced by an implicit def of vcc.
|
||||
int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
|
||||
if (Op32DstIdx != -1) {
|
||||
// dst
|
||||
Inst32.add(MI.getOperand(0));
|
||||
} else {
|
||||
assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
|
||||
"Unexpected case");
|
||||
}
|
||||
|
||||
|
||||
Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
|
||||
|
||||
const MachineOperand *Src1 =
|
||||
TII->getNamedOperand(MI, AMDGPU::OpName::src1);
|
||||
if (Src1)
|
||||
Inst32.add(*Src1);
|
||||
|
||||
if (Src2) {
|
||||
int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
|
||||
if (Op32Src2Idx != -1) {
|
||||
Inst32.add(*Src2);
|
||||
} else {
|
||||
// In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
|
||||
// replaced with an implicit read of vcc. This was already added
|
||||
// during the initial BuildMI, so find it to preserve the flags.
|
||||
copyFlagsToImplicitVCC(*Inst32, *Src2);
|
||||
}
|
||||
}
|
||||
|
||||
MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
|
||||
++NumInstructionsShrunk;
|
||||
|
||||
// Copy extra operands not present in the instruction definition.
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
|
||||
# Uses a carry out in an instruction that can't be shrunk.
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_1]]
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = IMPLICIT_DEF
|
||||
%3:vgpr_32 = IMPLICIT_DEF
|
||||
|
||||
%4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %5
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# TODO: Is it OK to leave the broken use around on the DBG_VALUE?
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
|
||||
; GCN: DBG_VALUE debug-use %5:sreg_64_xexec, debug-use $noreg
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = IMPLICIT_DEF
|
||||
%3:vgpr_32 = IMPLICIT_DEF
|
||||
|
||||
%4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
DBG_VALUE debug-use %5, debug-use $noreg
|
||||
S_ENDPGM implicit %4
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
# Uses carry out in a normal pattern
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec
|
||||
; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[V_ADD_I32_e64_1]], implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_ADDC_U32_e64_]]
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = IMPLICIT_DEF
|
||||
%3:vgpr_32 = IMPLICIT_DEF
|
||||
|
||||
%4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
%6:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %2, %3, %5, implicit $exec
|
||||
S_ENDPGM implicit %6
|
||||
|
||||
...
|
|
@ -0,0 +1,347 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %2
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %2
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# This does not shrink because it would violate the constant bus
|
||||
# restriction. to have an SGPR input and an immediate, so a copy would
|
||||
# be required.
|
||||
|
||||
name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
|
||||
; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[DEF]], [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
|
||||
%0:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
|
||||
%1:sreg_32_xm0 = IMPLICIT_DEF
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use
|
||||
; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[DEF]], implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
|
||||
%0:sreg_32_xm0 = IMPLICIT_DEF
|
||||
%1:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_add_i32_e64_live_vcc_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_live_vcc_use
|
||||
; GCN: $vcc = S_MOV_B64 -1
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc
|
||||
$vcc = S_MOV_B64 -1
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %2, implicit $vcc
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_use
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: $vcc = S_MOV_B64 -1
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
|
||||
; GCN: bb.1:
|
||||
; GCN: liveins: $vcc
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vcc = S_MOV_B64 -1
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
|
||||
bb.1:
|
||||
liveins: $vcc
|
||||
S_ENDPGM implicit %2, implicit $vcc
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_lo_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_lo_use
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
|
||||
; GCN: bb.1:
|
||||
; GCN: liveins: $vcc_lo
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vcc = S_MOV_B64 -1
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
|
||||
bb.1:
|
||||
liveins: $vcc_lo
|
||||
S_ENDPGM implicit %2, implicit $vcc_lo
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# This is not OK to clobber because vcc_lo has a livein use.
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: $vcc = S_MOV_B64 -1
|
||||
; GCN: bb.1:
|
||||
; GCN: liveins: $vcc
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vcc = S_MOV_B64 -1
|
||||
|
||||
bb.1:
|
||||
liveins: $vcc
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %2, implicit $vcc_lo
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc_hi
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc_hi
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: $vcc_hi = S_MOV_B32 -1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.2(0x80000000)
|
||||
; GCN: liveins: $vcc_hi
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
|
||||
; GCN: bb.2:
|
||||
; GCN: liveins: $vcc_hi
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_hi
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vcc_hi = S_MOV_B32 -1
|
||||
|
||||
bb.1:
|
||||
liveins: $vcc_hi
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
|
||||
bb.2:
|
||||
liveins: $vcc_hi
|
||||
|
||||
S_ENDPGM implicit %2, implicit $vcc_hi
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]]
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]]
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]]
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]]
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec
|
||||
S_ENDPGM implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
# We know this is OK because vcc isn't live out of the block, even
|
||||
# though it had a defined value
|
||||
|
||||
name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
|
||||
; GCN: bb.1:
|
||||
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
|
||||
$vcc = S_MOV_B64 -1
|
||||
%0:sreg_32_xm0 = S_MOV_B32 12345
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
|
||||
|
||||
bb.1:
|
||||
S_ENDPGM implicit %2
|
||||
|
||||
...
|
Loading…
Reference in New Issue