forked from OSchip/llvm-project
[MachineCopyPropagation] Check CrossCopyRegClass for cross-class copys
On some AMDGPU subtargets, copying to and from AGPR registers using another AGPR register is not possible. A intermediate VGPR register is needed for AGPR to AGPR copy. This is an issue when machine copy propagation forwards a COPY $agpr, replacing a COPY $vgpr which results in $agpr = COPY $agpr. It is removing a cross class copy that may have been optimized by previous passes and potentially creating an unoptimized cross class copy later on. To avoid this issue, check CrossCopyRegClass if a different register class will be needed for the copy. If so then avoid forwarding the copy when the destination does not match the desired register class and if the original copy already matches the desired register class. Issue seen while attempting to optimize another AGPR to AGPR issue: Live-ins: $agpr0 $vgpr0 = COPY $agpr0 $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr0 $agpr2 = COPY $vgpr0 $agpr3 = COPY $vgpr0 $agpr4 = COPY $vgpr0 After machine-cp: $vgpr0 = COPY $agpr0 $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr0 $agpr2 = COPY $agpr0 $agpr3 = COPY $agpr0 $agpr4 = COPY $agpr0 Machine-cp propagated COPY $agpr0 to replace $vgpr0 creating 3 AGPR to AGPR copys. Later this creates a cross-register copy from AGPR->VGPR->AGPR for each copy when the prior VGPR->AGPR copy was already optimal. Reviewed By: lkail, rampitec Differential Revision: https://reviews.llvm.org/D108011
This commit is contained in:
parent
2a35d59b2f
commit
549f6a819a
|
@ -414,6 +414,31 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
|
|||
if (!UseI.isCopy())
|
||||
return false;
|
||||
|
||||
const TargetRegisterClass *CopySrcRC =
|
||||
TRI->getMinimalPhysRegClass(CopySrcReg);
|
||||
const TargetRegisterClass *UseDstRC =
|
||||
TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
|
||||
const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC);
|
||||
|
||||
// If cross copy register class is not the same as copy source register class
|
||||
// then it is not possible to copy the register directly and requires a cross
|
||||
// register class copy. Fowarding this copy without checking register class of
|
||||
// UseDst may create additional cross register copies when expanding the copy
|
||||
// instruction in later passes.
|
||||
if (CopySrcRC != CrossCopyRC) {
|
||||
const TargetRegisterClass *CopyDstRC =
|
||||
TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg());
|
||||
|
||||
// Check if UseDstRC matches the necessary register class to copy from
|
||||
// CopySrc's register class. If so then forwarding the copy will not
|
||||
// introduce any cross-class copys. Else if CopyDstRC matches then keep the
|
||||
// copy and do not forward. If neither UseDstRC or CopyDstRC matches then
|
||||
// we may need a cross register copy later but we do not worry about it
|
||||
// here.
|
||||
if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC)
|
||||
return false;
|
||||
}
|
||||
|
||||
/// COPYs don't have register class constraints, so if the user instruction
|
||||
/// is a COPY, we just try to avoid introducing additional cross-class
|
||||
/// COPYs. For example:
|
||||
|
@ -430,9 +455,6 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
|
|||
///
|
||||
/// so we have reduced the number of cross-class COPYs and potentially
|
||||
/// introduced a nop COPY that can be removed.
|
||||
const TargetRegisterClass *UseDstRC =
|
||||
TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
|
||||
|
||||
const TargetRegisterClass *SuperRC = UseDstRC;
|
||||
for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();
|
||||
SuperRC; SuperRC = *SuperRCI++)
|
||||
|
|
|
@ -801,6 +801,14 @@ const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
|
|||
return &AMDGPU::VGPR_32RegClass;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
|
||||
if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
|
||||
return getEquivalentVGPRClass(RC);
|
||||
|
||||
return RC;
|
||||
}
|
||||
|
||||
static unsigned getNumSubRegsForSpillOp(unsigned Op) {
|
||||
|
||||
switch (Op) {
|
||||
|
|
|
@ -108,6 +108,13 @@ public:
|
|||
const TargetRegisterClass *getPointerRegClass(
|
||||
const MachineFunction &MF, unsigned Kind = 0) const override;
|
||||
|
||||
/// Returns a legal register class to copy a register in the specified class
|
||||
/// to or from. If it is possible to copy the register directly without using
|
||||
/// a cross register class copy, return the specified RC. Returns NULL if it
|
||||
/// is not possible to copy between two registers of the specified class.
|
||||
const TargetRegisterClass *
|
||||
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
|
||||
|
||||
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
|
||||
bool IsLoad, bool IsKill = true) const;
|
||||
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx908 %s -o - -run-pass machine-cp -verify-machineinstrs | FileCheck -check-prefix=GFX908 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx90a %s -o - -run-pass machine-cp -verify-machineinstrs | FileCheck -check-prefix=GFX90A %s
|
||||
|
||||
---
|
||||
name: do_not_propagate_agpr_to_agpr
|
||||
body: |
|
||||
bb.0:
|
||||
successors:
|
||||
liveins: $agpr0
|
||||
|
||||
; GFX908-LABEL: name: do_not_propagate_agpr_to_agpr
|
||||
; GFX908: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
|
||||
; GFX908: renamable $agpr1 = COPY renamable $vgpr0, implicit $exec
|
||||
; GFX908: renamable $agpr2 = COPY renamable $vgpr0, implicit $exec
|
||||
; GFX908: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2
|
||||
; GFX90A-LABEL: name: do_not_propagate_agpr_to_agpr
|
||||
; GFX90A: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
|
||||
; GFX90A: renamable $agpr1 = COPY $agpr0, implicit $exec
|
||||
; GFX90A: renamable $agpr2 = COPY $agpr0, implicit $exec
|
||||
; GFX90A: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2
|
||||
renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
|
||||
renamable $agpr1 = COPY renamable $vgpr0, implicit $exec
|
||||
renamable $agpr2 = COPY renamable $vgpr0, implicit $exec
|
||||
S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2
|
||||
...
|
||||
---
|
||||
name: propagate_vgpr_to_agpr
|
||||
body: |
|
||||
bb.0:
|
||||
successors:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX908-LABEL: name: propagate_vgpr_to_agpr
|
||||
; GFX908: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
|
||||
; GFX908: renamable $agpr1 = COPY $vgpr0, implicit $exec
|
||||
; GFX908: renamable $agpr2 = COPY $vgpr0, implicit $exec
|
||||
; GFX908: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2
|
||||
; GFX90A-LABEL: name: propagate_vgpr_to_agpr
|
||||
; GFX90A: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
|
||||
; GFX90A: renamable $agpr1 = COPY $vgpr0, implicit $exec
|
||||
; GFX90A: renamable $agpr2 = COPY $vgpr0, implicit $exec
|
||||
; GFX90A: S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2
|
||||
renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
|
||||
renamable $agpr1 = COPY renamable $agpr0, implicit $exec
|
||||
renamable $agpr2 = COPY renamable $agpr0, implicit $exec
|
||||
S_ENDPGM 0, implicit $agpr0, implicit $agpr1, implicit $agpr2
|
||||
...
|
||||
---
|
||||
name: propagate_agpr_to_vgpr
|
||||
body: |
|
||||
bb.0:
|
||||
successors:
|
||||
liveins: $agpr0
|
||||
|
||||
; GFX908-LABEL: name: propagate_agpr_to_vgpr
|
||||
; GFX908: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
|
||||
; GFX908: renamable $vgpr1 = COPY $agpr0, implicit $exec
|
||||
; GFX908: renamable $vgpr2 = COPY $agpr0, implicit $exec
|
||||
; GFX908: S_ENDPGM 0, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
|
||||
; GFX90A-LABEL: name: propagate_agpr_to_vgpr
|
||||
; GFX90A: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
|
||||
; GFX90A: renamable $vgpr1 = COPY $agpr0, implicit $exec
|
||||
; GFX90A: renamable $vgpr2 = COPY $agpr0, implicit $exec
|
||||
; GFX90A: S_ENDPGM 0, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
|
||||
renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
|
||||
renamable $vgpr1 = COPY renamable $vgpr0, implicit $exec
|
||||
renamable $vgpr2 = COPY renamable $vgpr0, implicit $exec
|
||||
S_ENDPGM 0, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
|
||||
...
|
Loading…
Reference in New Issue