From b080adfc0c597f7bddb2f6b9c9374492921a69f1 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 27 Sep 2018 18:55:20 +0000 Subject: [PATCH] [AMDGPU] Fold copy (copy vgpr) This allows to reduce a number of used VGPRs in some cases. Differential Revision: https://reviews.llvm.org/D52577 llvm-svn: 343249 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 14 +++++++++++ llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir | 27 +++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index d4b64ab203a6..bd0bc734060c 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -477,6 +477,20 @@ void SIFoldOperands::foldOperand( UseMI->setDesc(TII->get(MovOp)); CopiesToReplace.push_back(UseMI); } else { + if (UseMI->isCopy() && OpToFold.isReg() && + TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) && + TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(1).getReg()) && + TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) && + TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) && + !UseMI->getOperand(1).getSubReg()) { + UseMI->getOperand(1).setReg(OpToFold.getReg()); + UseMI->getOperand(1).setSubReg(OpToFold.getSubReg()); + UseMI->getOperand(1).setIsKill(false); + CopiesToReplace.push_back(UseMI); + OpToFold.setIsKill(false); + return; + } + const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes diff --git a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir new file mode 100644 index 000000000000..7d1c75c3a5b4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir @@ -0,0 +1,27 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: name: fold_vgpr_copy +# GCN: %0:vreg_64 = IMPLICIT_DEF +# GCN-NEXT: %4:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: %3:vgpr_32 = IMPLICIT_DEF +# GCN-NEXT: DS_WRITE2_B32_gfx9 %0.sub0, killed %4, killed %3, 0, 1, 0, implicit $exec + +--- +name: fold_vgpr_copy +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %4 = IMPLICIT_DEF + %3 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub0 + %2:vgpr_32 = COPY %1 + DS_WRITE2_B32_gfx9 %2, killed %4, killed %3, 0, 1, 0, implicit $exec + +...