forked from OSchip/llvm-project
[AMDGPU] Fold V_CNDMASK with identical source operands
Such instructions sometimes appear after lowering and folding. Differential Revision: https://reviews.llvm.org/D31318 llvm-svn: 298723
This commit is contained in:
parent
4986d9fb45
commit
70603dcef2
|
@ -591,6 +591,32 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
|
|||
return false;
|
||||
}
|
||||
|
||||
// Try to fold an instruction into a simpler one
|
||||
static bool tryFoldInst(const SIInstrInfo *TII,
|
||||
MachineInstr *MI) {
|
||||
unsigned Opc = MI->getOpcode();
|
||||
|
||||
if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
|
||||
Opc == AMDGPU::V_CNDMASK_B32_e64 ||
|
||||
Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
|
||||
const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
|
||||
const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
|
||||
if (Src1->isIdenticalTo(*Src0)) {
|
||||
DEBUG(dbgs() << "Folded " << *MI << " into ");
|
||||
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
|
||||
if (Src2Idx != -1)
|
||||
MI->RemoveOperand(Src2Idx);
|
||||
MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
|
||||
mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
|
||||
: getMovOpc(false)));
|
||||
DEBUG(dbgs() << *MI << '\n');
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void SIFoldOperands::foldInstOperand(MachineInstr &MI,
|
||||
MachineOperand &OpToFold) const {
|
||||
// We need mutate the operands of new mov instructions to add implicit
|
||||
|
@ -692,6 +718,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
|
|||
}
|
||||
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
|
||||
static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
|
||||
tryFoldInst(TII, Fold.UseMI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -907,6 +934,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
|||
Next = std::next(I);
|
||||
MachineInstr &MI = *I;
|
||||
|
||||
tryFoldInst(TII, &MI);
|
||||
|
||||
if (!isFoldableCopy(MI)) {
|
||||
if (IsIEEEMode || !tryFoldOMod(MI))
|
||||
tryFoldClamp(MI);
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
# RUN: llc -march=amdgcn -run-pass si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
# CHECK: %1 = V_MOV_B32_e32 0, implicit %exec
|
||||
# CHECK: %2 = V_MOV_B32_e32 0, implicit %exec
|
||||
# CHECK: %4 = COPY %3
|
||||
# CHECK: %5 = V_MOV_B32_e32 0, implicit %exec
|
||||
# CHECK: %6 = V_MOV_B32_e32 0, implicit %exec
|
||||
# CHECK: %7 = COPY %3
|
||||
|
||||
---
|
||||
name: fold_cndmask
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sgpr_64 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
- { id: 3, class: vgpr_32 }
|
||||
- { id: 4, class: vgpr_32 }
|
||||
- { id: 5, class: vgpr_32 }
|
||||
- { id: 6, class: vgpr_32 }
|
||||
- { id: 7, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = V_CNDMASK_B32_e64 0, 0, %0, implicit %exec
|
||||
%2 = V_CNDMASK_B32_e64 %1, %1, %0, implicit %exec
|
||||
%3 = IMPLICIT_DEF
|
||||
%4 = V_CNDMASK_B32_e64 %3, %3, %0, implicit %exec
|
||||
%5 = COPY %1
|
||||
%6 = V_CNDMASK_B32_e64 %5, 0, %0, implicit %exec
|
||||
%vcc = IMPLICIT_DEF
|
||||
%7 = V_CNDMASK_B32_e32 %3, %3, implicit %exec, implicit %vcc
|
||||
|
||||
...
|
Loading…
Reference in New Issue