AMDGPU: Shrink insts to fold immediates

This needs to be done in the SSA fold operands
pass to be effective, so there is a bit of overlap
with SIShrinkInstructions but I don't think this
is practically avoidable.

llvm-svn: 340859
This commit is contained in:
Matt Arsenault 2018-08-28 18:34:24 +00:00
parent ec71e018d6
commit de6c421cc8
6 changed files with 564 additions and 54 deletions

View File

@ -35,13 +35,16 @@ struct FoldCandidate {
uint64_t ImmToFold; uint64_t ImmToFold;
int FrameIndexToFold; int FrameIndexToFold;
}; };
int ShrinkOpcode;
unsigned char UseOpNo; unsigned char UseOpNo;
MachineOperand::MachineOperandType Kind; MachineOperand::MachineOperandType Kind;
bool Commuted; bool Commuted;
FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
bool Commuted_ = false) : bool Commuted_ = false,
UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()), int ShrinkOp = -1) :
UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
Kind(FoldOp->getType()),
Commuted(Commuted_) { Commuted(Commuted_) {
if (FoldOp->isImm()) { if (FoldOp->isImm()) {
ImmToFold = FoldOp->getImm(); ImmToFold = FoldOp->getImm();
@ -68,6 +71,14 @@ struct FoldCandidate {
bool isCommuted() const { bool isCommuted() const {
return Commuted; return Commuted;
} }
bool needsShrink() const {
return ShrinkOpcode != -1;
}
int getShrinkOpcode() const {
return ShrinkOpcode;
}
}; };
class SIFoldOperands : public MachineFunctionPass { class SIFoldOperands : public MachineFunctionPass {
@ -154,6 +165,7 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
} }
static bool updateOperand(FoldCandidate &Fold, static bool updateOperand(FoldCandidate &Fold,
const SIInstrInfo &TII,
const TargetRegisterInfo &TRI) { const TargetRegisterInfo &TRI) {
MachineInstr *MI = Fold.UseMI; MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo); MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
@ -189,10 +201,42 @@ static bool updateOperand(FoldCandidate &Fold,
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
} }
} }
if (Fold.needsShrink()) {
MachineBasicBlock *MBB = MI->getParent();
auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
if (Liveness != MachineBasicBlock::LQR_Dead)
return false;
int Op32 = Fold.getShrinkOpcode();
MachineOperand &Dst0 = MI->getOperand(0);
MachineOperand &Dst1 = MI->getOperand(1);
assert(Dst0.isDef() && Dst1.isDef());
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
// Keep the old instruction around to avoid breaking iterators, but
// replace the outputs with dummy registers.
Dst0.setReg(NewReg0);
Dst1.setReg(NewReg1);
if (Fold.isCommuted())
TII.commuteInstruction(*Inst32, false);
return true;
}
Old.ChangeToImmediate(Fold.ImmToFold); Old.ChangeToImmediate(Fold.ImmToFold);
return true; return true;
} }
assert(!Fold.needsShrink() && "not handled");
if (Fold.isFI()) { if (Fold.isFI()) {
Old.ChangeToFrameIndex(Fold.FrameIndexToFold); Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
return true; return true;
@ -261,6 +305,8 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if (isUseMIInFoldList(FoldList, MI)) if (isUseMIInFoldList(FoldList, MI))
return false; return false;
unsigned CommuteOpNo = OpNo;
// Operand is not legal, so try to commute the instruction to // Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold. // see if this makes it possible to fold.
unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex; unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
@ -269,11 +315,12 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if (CanCommute) { if (CanCommute) {
if (CommuteIdx0 == OpNo) if (CommuteIdx0 == OpNo)
OpNo = CommuteIdx1; CommuteOpNo = CommuteIdx1;
else if (CommuteIdx1 == OpNo) else if (CommuteIdx1 == OpNo)
OpNo = CommuteIdx0; CommuteOpNo = CommuteIdx0;
} }
// One of operands might be an Imm operand, and OpNo may refer to it after // One of operands might be an Imm operand, and OpNo may refer to it after
// the call of commuteInstruction() below. Such situations are avoided // the call of commuteInstruction() below. Such situations are avoided
// here explicitly as OpNo must be a register operand to be a candidate // here explicitly as OpNo must be a register operand to be a candidate
@ -286,12 +333,39 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
!TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1)) !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
return false; return false;
if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) { if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
if ((Opc == AMDGPU::V_ADD_I32_e64 ||
Opc == AMDGPU::V_SUB_I32_e64 ||
Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
OpToFold->isImm()) {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
// Verify the other operand is a VGPR, otherwise we would violate the
// constant bus restriction.
unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
MachineOperand &OtherOp = MI->getOperand(OtherIdx);
if (!OtherOp.isReg() ||
!TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg()))
return false;
const MachineOperand &SDst = MI->getOperand(1);
assert(SDst.isDef());
// TODO: Handle cases with a used carry.
if (!MRI.use_nodbg_empty(SDst.getReg()))
return false;
int Op32 = AMDGPU::getVOPe32(Opc);
FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
Op32));
return true;
}
TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1); TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
return false; return false;
} }
FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true)); FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true));
return true; return true;
} }
@ -757,7 +831,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
Copy->addImplicitDefUseOperands(*MF); Copy->addImplicitDefUseOperands(*MF);
for (FoldCandidate &Fold : FoldList) { for (FoldCandidate &Fold : FoldList) {
if (updateOperand(Fold, *TRI)) { if (updateOperand(Fold, *TII, *TRI)) {
// Clear kill flags. // Clear kill flags.
if (Fold.isReg()) { if (Fold.isReg()) {
assert(Fold.OpToFold && Fold.OpToFold->isReg()); assert(Fold.OpToFold && Fold.OpToFold->isReg());

View File

@ -2572,7 +2572,60 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
// Check output modifiers // Check output modifiers
return !hasModifiersSet(MI, AMDGPU::OpName::omod) && return !hasModifiersSet(MI, AMDGPU::OpName::omod) &&
!hasModifiersSet(MI, AMDGPU::OpName::clamp); !hasModifiersSet(MI, AMDGPU::OpName::clamp);
}
// Set VCC operand with all flags from \p Orig, except for setting it as
// implicit.
static void copyFlagsToImplicitVCC(MachineInstr &MI,
const MachineOperand &Orig) {
for (MachineOperand &Use : MI.implicit_operands()) {
if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
Use.setIsUndef(Orig.isUndef());
Use.setIsKill(Orig.isKill());
return;
}
}
}
MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
unsigned Op32) const {
MachineBasicBlock *MBB = MI.getParent();;
MachineInstrBuilder Inst32 =
BuildMI(*MBB, MI, MI.getDebugLoc(), get(Op32));
// Add the dst operand if the 32-bit encoding also has an explicit $vdst.
// For VOPC instructions, this is replaced by an implicit def of vcc.
int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
if (Op32DstIdx != -1) {
// dst
Inst32.add(MI.getOperand(0));
} else {
assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
"Unexpected case");
}
Inst32.add(*getNamedOperand(MI, AMDGPU::OpName::src0));
const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
if (Src1)
Inst32.add(*Src1);
const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
if (Src2) {
int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
if (Op32Src2Idx != -1) {
Inst32.add(*Src2);
} else {
// In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
// replaced with an implicit read of vcc. This was already added
// during the initial BuildMI, so find it to preserve the flags.
copyFlagsToImplicitVCC(*Inst32, *Src2);
}
}
return Inst32;
} }
bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,

View File

@ -694,6 +694,9 @@ public:
bool canShrink(const MachineInstr &MI, bool canShrink(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const; const MachineRegisterInfo &MRI) const;
MachineInstr *buildShrunkInst(MachineInstr &MI,
unsigned NewOpcode) const;
bool verifyInstruction(const MachineInstr &MI, bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override; StringRef &ErrInfo) const override;

View File

@ -120,19 +120,6 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
return false; return false;
} }
// Copy MachineOperand with all flags except setting it as implicit.
static void copyFlagsToImplicitVCC(MachineInstr &MI,
const MachineOperand &Orig) {
for (MachineOperand &Use : MI.implicit_operands()) {
if (Use.isUse() && Use.getReg() == AMDGPU::VCC) {
Use.setIsUndef(Orig.isUndef());
Use.setIsKill(Orig.isKill());
return;
}
}
}
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) { static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
return isInt<16>(Src.getImm()) && return isInt<16>(Src.getImm()) &&
!TII->isInlineConstant(*Src.getParent(), !TII->isInlineConstant(*Src.getParent(),
@ -434,40 +421,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// We can shrink this instruction // We can shrink this instruction
LLVM_DEBUG(dbgs() << "Shrinking " << MI); LLVM_DEBUG(dbgs() << "Shrinking " << MI);
MachineInstrBuilder Inst32 = MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
// Add the dst operand if the 32-bit encoding also has an explicit $vdst.
// For VOPC instructions, this is replaced by an implicit def of vcc.
int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
if (Op32DstIdx != -1) {
// dst
Inst32.add(MI.getOperand(0));
} else {
assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
"Unexpected case");
}
Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
const MachineOperand *Src1 =
TII->getNamedOperand(MI, AMDGPU::OpName::src1);
if (Src1)
Inst32.add(*Src1);
if (Src2) {
int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
if (Op32Src2Idx != -1) {
Inst32.add(*Src2);
} else {
// In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
// replaced with an implicit read of vcc. This was already added
// during the initial BuildMI, so find it to preserve the flags.
copyFlagsToImplicitVCC(*Inst32, *Src2);
}
}
++NumInstructionsShrunk; ++NumInstructionsShrunk;
// Copy extra operands not present in the instruction definition. // Copy extra operands not present in the instruction definition.

View File

@ -0,0 +1,79 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
---
# Uses a carry out in an instruction that can't be shrunk.
name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_1]]
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %5
...
---
# TODO: Is it OK to leave the broken use around on the DBG_VALUE?
name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
; GCN: DBG_VALUE debug-use %5:sreg_64_xexec, debug-use $noreg
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
DBG_VALUE debug-use %5, debug-use $noreg
S_ENDPGM implicit %4
...
---
# Uses carry out in a normal pattern
name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[DEF]], [[S_MOV_B32_]], implicit $exec
; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[V_ADD_I32_e64_1]], implicit $exec
; GCN: S_ENDPGM implicit [[V_ADDC_U32_e64_]]
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec
%6:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %2, %3, %5, implicit $exec
S_ENDPGM implicit %6
...

View File

@ -0,0 +1,347 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
---
name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %2
...
---
name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32_xm0 = S_MOV_B32 12345
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %2
...
---
name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
; GCN: S_ENDPGM implicit [[V_ADD_I32_e32_]]
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %2
...
---
# This does not shrink because it would violate the constant bus
# restriction. to have an SGPR input and an immediate, so a copy would
# be required.
name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[DEF]], [[V_MOV_B32_e32_]], implicit $exec
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
%0:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
%1:sreg_32_xm0 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %2
...
---
name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use
; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[DEF]], implicit $exec
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
%0:sreg_32_xm0 = IMPLICIT_DEF
%1:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %2
...
---
name: shrink_scalar_imm_vgpr_v_add_i32_e64_live_vcc_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_live_vcc_use
; GCN: $vcc = S_MOV_B64 -1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc
$vcc = S_MOV_B64 -1
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %2, implicit $vcc
...
---
name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_use
tracksRegLiveness: true
body: |
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_use
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: $vcc = S_MOV_B64 -1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
; GCN: bb.1:
; GCN: liveins: $vcc
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc
bb.0:
successors: %bb.1
$vcc = S_MOV_B64 -1
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
bb.1:
liveins: $vcc
S_ENDPGM implicit %2, implicit $vcc
...
---
name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_lo_use
tracksRegLiveness: true
body: |
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_liveout_vcc_lo_use
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
; GCN: bb.1:
; GCN: liveins: $vcc_lo
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo
bb.0:
successors: %bb.1
$vcc = S_MOV_B64 -1
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
bb.1:
liveins: $vcc_lo
S_ENDPGM implicit %2, implicit $vcc_lo
...
---
# This is not OK to clobber because vcc_lo has a livein use.
name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc
tracksRegLiveness: true
body: |
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: $vcc = S_MOV_B64 -1
; GCN: bb.1:
; GCN: liveins: $vcc
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_lo
bb.0:
successors: %bb.1
$vcc = S_MOV_B64 -1
bb.1:
liveins: $vcc
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %2, implicit $vcc_lo
...
---
name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc_hi
tracksRegLiveness: true
body: |
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_livein_vcc_hi
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: $vcc_hi = S_MOV_B32 -1
; GCN: bb.1:
; GCN: successors: %bb.2(0x80000000)
; GCN: liveins: $vcc_hi
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
; GCN: bb.2:
; GCN: liveins: $vcc_hi
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]], implicit $vcc_hi
bb.0:
successors: %bb.1
$vcc_hi = S_MOV_B32 -1
bb.1:
liveins: $vcc_hi
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
bb.2:
liveins: $vcc_hi
S_ENDPGM implicit %2, implicit $vcc_hi
...
---
name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]]
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %2
...
---
name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32_xm0 = S_MOV_B32 12345
%2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %2
...
---
name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
; GCN: S_ENDPGM implicit [[V_SUB_I32_e32_]]
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %2
...
---
name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
; GCN: S_ENDPGM implicit [[V_SUBREV_I32_e32_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32_xm0 = S_MOV_B32 12345
%2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, implicit $exec
S_ENDPGM implicit %2
...
---
# We know this is OK because vcc isn't live out of the block, even
# though it had a defined value
name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
tracksRegLiveness: true
body: |
; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_known_dead_no_liveout
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
; GCN: bb.1:
; GCN: S_ENDPGM implicit [[V_ADD_I32_e64_]]
bb.0:
successors: %bb.1
$vcc = S_MOV_B64 -1
%0:sreg_32_xm0 = S_MOV_B32 12345
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, implicit $exec
bb.1:
S_ENDPGM implicit %2
...