forked from OSchip/llvm-project
AMDGPU: Fix visiting physreg dest users when folding immediate copies
This can fold the immediate into the physical destination, but this
should not look for further users of the register. Fixes regression
introduced by 766cb615a3
.
This commit is contained in:
parent
a3036b3863
commit
68fab44acf
|
@ -666,32 +666,34 @@ void SIFoldOperands::foldOperand(
|
|||
return;
|
||||
|
||||
const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
|
||||
if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
|
||||
MachineRegisterInfo::use_iterator NextUse;
|
||||
SmallVector<FoldCandidate, 4> CopyUses;
|
||||
for (MachineRegisterInfo::use_iterator Use = MRI->use_begin(DestReg),
|
||||
E = MRI->use_end();
|
||||
Use != E; Use = NextUse) {
|
||||
NextUse = std::next(Use);
|
||||
// There's no point trying to fold into an implicit operand.
|
||||
if (Use->isImplicit())
|
||||
continue;
|
||||
if (!DestReg.isPhysical()) {
|
||||
if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
|
||||
MachineRegisterInfo::use_iterator NextUse;
|
||||
SmallVector<FoldCandidate, 4> CopyUses;
|
||||
for (MachineRegisterInfo::use_iterator Use = MRI->use_begin(DestReg),
|
||||
E = MRI->use_end();
|
||||
Use != E; Use = NextUse) {
|
||||
NextUse = std::next(Use);
|
||||
// There's no point trying to fold into an implicit operand.
|
||||
if (Use->isImplicit())
|
||||
continue;
|
||||
|
||||
FoldCandidate FC = FoldCandidate(Use->getParent(), Use.getOperandNo(),
|
||||
&UseMI->getOperand(1));
|
||||
CopyUses.push_back(FC);
|
||||
FoldCandidate FC = FoldCandidate(Use->getParent(), Use.getOperandNo(),
|
||||
&UseMI->getOperand(1));
|
||||
CopyUses.push_back(FC);
|
||||
}
|
||||
for (auto &F : CopyUses) {
|
||||
foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace);
|
||||
}
|
||||
}
|
||||
for (auto &F : CopyUses) {
|
||||
foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace);
|
||||
}
|
||||
}
|
||||
|
||||
if (DestRC == &AMDGPU::AGPR_32RegClass &&
|
||||
TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
|
||||
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
|
||||
UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
|
||||
CopiesToReplace.push_back(UseMI);
|
||||
return;
|
||||
if (DestRC == &AMDGPU::AGPR_32RegClass &&
|
||||
TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
|
||||
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
|
||||
UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
|
||||
CopiesToReplace.push_back(UseMI);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// In order to fold immediates into copies, we need to change the
|
||||
|
|
|
@ -87,3 +87,26 @@ body: |
|
|||
S_ENDPGM 0, implicit $vgpr0
|
||||
|
||||
...
|
||||
|
||||
# The users of $vgpr1 should not be visited for further immediate
|
||||
# folding.
|
||||
|
||||
# GCN-LABEL: name: no_fold_physreg_users_vgpr{{$}}
|
||||
# GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
# GCN-NEXT: S_NOP 0, implicit-def $vgpr1
|
||||
# GCN-NEXT: %2:vgpr_32 = COPY $vgpr1
|
||||
# GCN-NEXT: $vgpr2 = COPY %2
|
||||
---
|
||||
name: no_fold_physreg_users_vgpr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%0:sreg_32 = S_MOV_B32 0
|
||||
%1:vgpr_32 = COPY %0
|
||||
$vgpr1 = COPY %0
|
||||
S_NOP 0, implicit-def $vgpr1
|
||||
%2:vgpr_32 = COPY $vgpr1
|
||||
$vgpr2 = COPY %2
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
|
||||
|
||||
; Make sure the return value of the first call is not overwritten with
|
||||
; a constant before the fadd use.
|
||||
|
||||
; CHECK-LABEL: vgpr_multi_use_imm_fold:
|
||||
; CHECK: v_mov_b32_e32 v0, 0{{$}}
|
||||
; CHECK: v_mov_b32_e32 v1, 2.0{{$}}
|
||||
; CHECK: s_swappc_b64
|
||||
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], 0
|
||||
; CHECK: s_swappc_b64
|
||||
define amdgpu_kernel void @vgpr_multi_use_imm_fold() {
|
||||
entry:
|
||||
store double 0.0, double addrspace(1)* undef, align 8
|
||||
%call0 = tail call fastcc double @__ocml_log_f64(double 2.0)
|
||||
%op = fadd double %call0, 0.0
|
||||
%call1 = tail call fastcc double @__ocml_sqrt_f64(double %op)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare hidden fastcc double @__ocml_log_f64(double)
|
||||
declare hidden fastcc double @__ocml_sqrt_f64(double)
|
Loading…
Reference in New Issue