forked from OSchip/llvm-project
[AMDGPU] Fold copy (copy vgpr)
This allows to reduce a number of used VGPRs in some cases. Differential Revision: https://reviews.llvm.org/D52577 llvm-svn: 343249
This commit is contained in:
parent
670c147d83
commit
b080adfc0c
|
@ -477,6 +477,20 @@ void SIFoldOperands::foldOperand(
|
|||
UseMI->setDesc(TII->get(MovOp));
|
||||
CopiesToReplace.push_back(UseMI);
|
||||
} else {
|
||||
if (UseMI->isCopy() && OpToFold.isReg() &&
|
||||
TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
|
||||
TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(1).getReg()) &&
|
||||
TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
|
||||
TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
|
||||
!UseMI->getOperand(1).getSubReg()) {
|
||||
UseMI->getOperand(1).setReg(OpToFold.getReg());
|
||||
UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
|
||||
UseMI->getOperand(1).setIsKill(false);
|
||||
CopiesToReplace.push_back(UseMI);
|
||||
OpToFold.setIsKill(false);
|
||||
return;
|
||||
}
|
||||
|
||||
const MCInstrDesc &UseDesc = UseMI->getDesc();
|
||||
|
||||
// Don't fold into target independent nodes. Target independent opcodes
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# GCN-LABEL: name: fold_vgpr_copy
|
||||
# GCN: %0:vreg_64 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %4:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: %3:vgpr_32 = IMPLICIT_DEF
|
||||
# GCN-NEXT: DS_WRITE2_B32_gfx9 %0.sub0, killed %4, killed %3, 0, 1, 0, implicit $exec
|
||||
|
||||
---
|
||||
name: fold_vgpr_copy
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
- { id: 3, class: vgpr_32 }
|
||||
- { id: 4, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
%0:vreg_64 = IMPLICIT_DEF
|
||||
%4 = IMPLICIT_DEF
|
||||
%3 = IMPLICIT_DEF
|
||||
%1:vgpr_32 = COPY %0.sub0
|
||||
%2:vgpr_32 = COPY %1
|
||||
DS_WRITE2_B32_gfx9 %2, killed %4, killed %3, 0, 1, 0, implicit $exec
|
||||
|
||||
...
|
Loading…
Reference in New Issue