forked from OSchip/llvm-project
AMDGPU: Support shrinking add with FI in SIFoldOperands
Avoids test regression in a future patch llvm-svn: 359898
This commit is contained in:
parent
e28ab93546
commit
cfd0ca38b0
|
@ -216,53 +216,55 @@ static bool updateOperand(FoldCandidate &Fold,
|
|||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Fold.needsShrink()) {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
|
||||
if (Liveness != MachineBasicBlock::LQR_Dead)
|
||||
return false;
|
||||
if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
|
||||
if (Liveness != MachineBasicBlock::LQR_Dead)
|
||||
return false;
|
||||
|
||||
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
int Op32 = Fold.getShrinkOpcode();
|
||||
MachineOperand &Dst0 = MI->getOperand(0);
|
||||
MachineOperand &Dst1 = MI->getOperand(1);
|
||||
assert(Dst0.isDef() && Dst1.isDef());
|
||||
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
int Op32 = Fold.getShrinkOpcode();
|
||||
MachineOperand &Dst0 = MI->getOperand(0);
|
||||
MachineOperand &Dst1 = MI->getOperand(1);
|
||||
assert(Dst0.isDef() && Dst1.isDef());
|
||||
|
||||
bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
|
||||
bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
|
||||
|
||||
const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
|
||||
unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
|
||||
const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
|
||||
unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
|
||||
|
||||
MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
|
||||
MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
|
||||
|
||||
if (HaveNonDbgCarryUse) {
|
||||
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
|
||||
.addReg(AMDGPU::VCC, RegState::Kill);
|
||||
}
|
||||
|
||||
// Keep the old instruction around to avoid breaking iterators, but
|
||||
// replace it with a dummy instruction to remove uses.
|
||||
//
|
||||
// FIXME: We should not invert how this pass looks at operands to avoid
|
||||
// this. Should track set of foldable movs instead of looking for uses
|
||||
// when looking at a use.
|
||||
Dst0.setReg(NewReg0);
|
||||
for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
|
||||
MI->RemoveOperand(I);
|
||||
MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
|
||||
|
||||
if (Fold.isCommuted())
|
||||
TII.commuteInstruction(*Inst32, false);
|
||||
return true;
|
||||
if (HaveNonDbgCarryUse) {
|
||||
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
|
||||
.addReg(AMDGPU::VCC, RegState::Kill);
|
||||
}
|
||||
|
||||
Old.ChangeToImmediate(Fold.ImmToFold);
|
||||
// Keep the old instruction around to avoid breaking iterators, but
|
||||
// replace it with a dummy instruction to remove uses.
|
||||
//
|
||||
// FIXME: We should not invert how this pass looks at operands to avoid
|
||||
// this. Should track set of foldable movs instead of looking for uses
|
||||
// when looking at a use.
|
||||
Dst0.setReg(NewReg0);
|
||||
for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
|
||||
MI->RemoveOperand(I);
|
||||
MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
|
||||
|
||||
if (Fold.isCommuted())
|
||||
TII.commuteInstruction(*Inst32, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
assert(!Fold.needsShrink() && "not handled");
|
||||
|
||||
if (Fold.isImm()) {
|
||||
Old.ChangeToImmediate(Fold.ImmToFold);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Fold.isFI()) {
|
||||
Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
|
||||
return true;
|
||||
|
@ -363,7 +365,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
|||
if ((Opc == AMDGPU::V_ADD_I32_e64 ||
|
||||
Opc == AMDGPU::V_SUB_I32_e64 ||
|
||||
Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
|
||||
OpToFold->isImm()) {
|
||||
(OpToFold->isImm() || OpToFold->isFI())) {
|
||||
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
|
||||
|
||||
// Verify the other operand is a VGPR, otherwise we would violate the
|
||||
|
|
|
@ -16,8 +16,8 @@ body: |
|
|||
; GCN: liveins: $vgpr0
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
|
||||
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%1:vgpr_32 = COPY $vgpr0
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
|
||||
|
@ -40,8 +40,8 @@ body: |
|
|||
; GCN: liveins: $vgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
|
||||
|
@ -64,7 +64,7 @@ body: |
|
|||
; GCN: liveins: $sgpr0
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
|
||||
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%1:sreg_32_xm0 = COPY $sgpr0
|
||||
|
@ -88,7 +88,7 @@ body: |
|
|||
; GCN: liveins: $sgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
|
||||
%0:sreg_32_xm0 = COPY $sgpr0
|
||||
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
|
@ -112,8 +112,8 @@ body: |
|
|||
; GCN: liveins: $vgpr0
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[COPY]], 0, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
|
||||
%0:sreg_32_xm0 = S_MOV_B32 %stack.0
|
||||
%1:vgpr_32 = COPY $vgpr0
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
|
||||
|
@ -136,8 +136,8 @@ body: |
|
|||
; GCN: liveins: $vgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[S_MOV_B32_]], 0, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
%1:sreg_32_xm0 = S_MOV_B32 %stack.0
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
|
||||
|
@ -157,8 +157,8 @@ body: |
|
|||
|
||||
; GCN-LABEL: name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], 16, 0, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 16, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
|
||||
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
|
||||
%2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
|
||||
|
@ -199,8 +199,7 @@ body: |
|
|||
|
||||
; GCN-LABEL: name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
|
||||
; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
|
||||
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
|
||||
|
|
Loading…
Reference in New Issue