MachineCopyPropagation: Catch copies of the form A<-B;A<-B

Differential Revision: http://reviews.llvm.org/D17475

llvm-svn: 261966
This commit is contained in:
Matthias Braun 2016-02-26 03:18:55 +00:00
parent e39ff70685
commit 9dcd65f478
4 changed files with 179 additions and 63 deletions

View File

@ -52,6 +52,7 @@ namespace {
private:
void ClobberRegister(unsigned Reg);
void CopyPropagateBlock(MachineBasicBlock &MBB);
bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
/// Candidates for deletion.
SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;
@ -109,29 +110,61 @@ void MachineCopyPropagation::ClobberRegister(unsigned Reg) {
}
}
/// isNopCopy - Return true if the specified copy is really a nop. That is
/// if the source of the copy is the same of the definition of the copy that
/// supplied the source. If the source of the copy is a sub-register than it
/// must check the sub-indices match. e.g.
/// ecx = mov eax
/// al = mov cl
/// But not
/// ecx = mov eax
/// al = mov ch
static bool isNopCopy(const MachineInstr *CopyMI, unsigned Def, unsigned Src,
const TargetRegisterInfo *TRI) {
unsigned SrcSrc = CopyMI->getOperand(1).getReg();
if (Def == SrcSrc)
/// Return true if \p PreviousCopy did copy register \p Src to register \p Def.
/// This fact may have been obscured by sub register usage or may not be true at
/// all even though Src and Def are subregisters of the registers used in
/// PreviousCopy. e.g.
/// isNopCopy("ecx = COPY eax", AX, CX) == true
/// isNopCopy("ecx = COPY eax", AH, CL) == false
static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
unsigned Def, const TargetRegisterInfo *TRI) {
unsigned PreviousSrc = PreviousCopy.getOperand(1).getReg();
unsigned PreviousDef = PreviousCopy.getOperand(0).getReg();
if (Src == PreviousSrc) {
assert(Def == PreviousDef);
return true;
if (TRI->isSubRegister(SrcSrc, Def)) {
unsigned SrcDef = CopyMI->getOperand(0).getReg();
unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def);
if (!SubIdx)
return false;
return SubIdx == TRI->getSubRegIndex(SrcDef, Src);
}
if (!TRI->isSubRegister(PreviousSrc, Src))
return false;
unsigned SubIdx = TRI->getSubRegIndex(PreviousSrc, Src);
return SubIdx == TRI->getSubRegIndex(PreviousDef, Def);
}
return false;
/// Remove instruction \p Copy if there exists a previous copy that copies the
/// register \p Src to the register \p Def; This may happen indirectly by
/// copying the super registers.
bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
unsigned Def) {
// Avoid eliminating a copy from/to a reserved registers as we cannot predict
// the value (Example: The sparc zero register is writable but stays zero).
if (MRI->isReserved(Src) || MRI->isReserved(Def))
return false;
// Search for an existing copy.
Reg2MIMap::iterator CI = AvailCopyMap.find(Def);
if (CI == AvailCopyMap.end())
return false;
// Check that the existing copy uses the correct sub registers.
MachineInstr &PrevCopy = *CI->second;
if (!isNopCopy(PrevCopy, Src, Def, TRI))
return false;
DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());
// Copy was redundantly redefining either Src or Def. Remove earlier kill
// flags between Copy and PrevCopy because the value will be reused now.
assert(Copy.isCopy());
unsigned CopyDef = Copy.getOperand(0).getReg();
assert(CopyDef == Src || CopyDef == Def);
for (MachineInstr &MI :
make_range(PrevCopy.getIterator(), Copy.getIterator()))
MI.clearRegisterKills(CopyDef, TRI);
Copy.eraseFromParent();
Changed = true;
++NumDeletes;
return true;
}
void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
@ -149,38 +182,23 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
!TargetRegisterInfo::isVirtualRegister(Src) &&
"MachineCopyPropagation should be run after register allocation!");
DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
if (CI != AvailCopyMap.end()) {
MachineInstr *CopyMI = CI->second;
if (!MRI->isReserved(Def) && isNopCopy(CopyMI, Def, Src, TRI)) {
// The two copies cancel out and the source of the first copy
// hasn't been overridden, eliminate the second one. e.g.
// %ECX<def> = COPY %EAX<kill>
// ... nothing clobbered EAX.
// %EAX<def> = COPY %ECX
// =>
// %ECX<def> = COPY %EAX
//
// Also avoid eliminating a copy from reserved registers unless the
// definition is proven not clobbered. e.g.
// %RSP<def> = COPY %RAX
// CALL
// %RAX<def> = COPY %RSP
DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; MI->dump());
// Clear any kills of Def between CopyMI and MI. This extends the
// live range.
for (MachineInstr &MMI
: make_range(CopyMI->getIterator(), MI->getIterator()))
MMI.clearRegisterKills(Def, TRI);
MI->eraseFromParent();
Changed = true;
++NumDeletes;
continue;
}
}
// The two copies cancel out and the source of the first copy
// hasn't been overridden, eliminate the second one. e.g.
// %ECX<def> = COPY %EAX
// ... nothing clobbered EAX.
// %EAX<def> = COPY %ECX
// =>
// %ECX<def> = COPY %EAX
//
// or
//
// %ECX<def> = COPY %EAX
// ... nothing clobbered EAX.
// %ECX<def> = COPY %EAX
// =>
// %ECX<def> = COPY %EAX
if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
continue;
// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
@ -293,9 +311,8 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
}
// Any previous copy definition or reading the Defs is no longer available.
for (unsigned Reg : Defs) {
for (unsigned Reg : Defs)
ClobberRegister(Reg);
}
}
// If MBB doesn't have successors, delete the copies whose defs are not used.

View File

@ -10,11 +10,10 @@ declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
; TODO: this constant should be folded:
; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
; VI: s_mov_b32 s[[LOW:[0-9+]]], s[[ALLBITS]]
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW]]:[[HIGH1]]]
; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW]]:[[HIGH2]]]
define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
%rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone

View File

@ -28,11 +28,10 @@ define void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 {
; TODO: this constant should be folded:
; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
; VI: s_mov_b32 s[[LOW:[0-9+]]], s[[ALLBITS]]
; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW]]:[[HIGH1]]]
; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW]]:[[HIGH2]]]
define void @rsq_clamp_f64(double addrspace(1)* %out, double %src) #0 {
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
store double %rsq_clamp, double addrspace(1)* %out

View File

@ -1,4 +1,4 @@
# RUN: llc -march=x86 -run-pass machine-cp -o /dev/null %s 2>&1 | FileCheck %s
# RUN: llc -march=x86 -run-pass machine-cp -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
--- |
declare void @foo()
@ -6,8 +6,14 @@
define void @copyprop_remove_kill1() { ret void }
define void @copyprop_remove_kill2() { ret void }
define void @copyprop0() { ret void }
define void @copyprop1() { ret void }
define void @copyprop2() { ret void }
define void @nocopyprop0() { ret void }
define void @nocopyprop1() { ret void }
define void @nocopyprop2() { ret void }
define void @nocopyprop3() { ret void }
define void @nocopyprop4() { ret void }
define void @nocopyprop5() { ret void }
...
---
# The second copy is redundant and will be removed, check that we also remove
@ -79,6 +85,38 @@ body: |
NOOP implicit %rax, implicit %rdi
...
---
# The 2nd copy is redundant; The call preserves the source and dest register.
# CHECK-LABEL: name: copyprop1
# CHECK: bb.0:
# CHECK-NEXT: %rax = COPY %rdi
# CHECK-NEXT: NOOP implicit %rax
# CHECK-NEXT: NOOP implicit %rax, implicit %rdi
name: copyprop1
body: |
bb.0:
%rax = COPY %rdi
NOOP implicit killed %rax
%rax = COPY %rdi
NOOP implicit %rax, implicit %rdi
...
---
# CHECK-LABEL: name: copyprop2
# CHECK: bb.0:
# CHECK-NEXT: %rax = COPY %rdi
# CHECK-NEXT: NOOP implicit %ax
# CHECK-NEXT: CALL64pcrel32 @foo, csr_64_rt_mostregs
# CHECK-NOT: %rax = COPY %rdi
# CHECK-NEXT: NOOP implicit %rax, implicit %rdi
name: copyprop2
body: |
bb.0:
%rax = COPY %rdi
NOOP implicit killed %ax
CALL64pcrel32 @foo, csr_64_rt_mostregs
%rax = COPY %rdi
NOOP implicit %rax, implicit %rdi
...
---
# The second copy is not redundant if the source register (%rax) is clobbered
# even if the dest (%rbp) is not.
# CHECK-LABEL: name: nocopyprop0
@ -112,3 +150,66 @@ body: |
%rax = COPY %rbp
NOOP implicit %rax, implicit %rbp
...
---
# The second copy is not redundant if the source register (%rax) is clobbered
# even if the dest (%rbp) is not.
# CHECK-LABEL: name: nocopyprop2
# CHECK: bb.0:
# CHECK-NEXT: %rax = COPY %rbp
# CHECK-NEXT: CALL64pcrel32 @foo, csr_64, implicit %rax, implicit %rbp
# CHECK-NEXT: %rax = COPY %rbp
# CHECK-NEXT: NOOP implicit %rax, implicit %rbp
name: nocopyprop2
body: |
bb.0:
%rax = COPY %rbp
CALL64pcrel32 @foo, csr_64, implicit %rax, implicit %rbp
%rax = COPY %rbp
NOOP implicit %rax, implicit %rbp
...
---
# The second copy is not redundant if the dest register (%rax) is clobbered
# even if the source (%rbp) is not.
# CHECK-LABEL: name: nocopyprop3
# CHECK: bb.0:
# CHECK-NEXT: %rbp = COPY %rax
# CHECK-NEXT: CALL64pcrel32 @foo, csr_64, implicit %rax, implicit %rbp
# CHECK-NEXT: %rbp = COPY %rax
# CHECK-NEXT: NOOP implicit %rax, implicit %rbp
name: nocopyprop3
body: |
bb.0:
%rbp = COPY %rax
CALL64pcrel32 @foo, csr_64, implicit %rax, implicit %rbp
%rbp = COPY %rax
NOOP implicit %rax, implicit %rbp
...
---
# A reserved register may change its value so the 2nd copy is not redundant.
# CHECK-LABEL: name: nocopyprop4
# CHECK: bb.0:
# CHECK-NEXT: %rax = COPY %rip
# CHECK-NEXT: NOOP implicit %rax
# CHECK-NEXT: %rax = COPY %rip
# CHECK-NEXT: NOOP implicit %rax
name: nocopyprop4
body: |
bb.0:
%rax = COPY %rip
NOOP implicit %rax
%rax = COPY %rip
NOOP implicit %rax
...
---
# Writing to a reserved register may have additional effects (slightly illegal
# testcase because writing to %rip like this should make the instruction a jump)
# CHECK-LABEL: name: nocopyprop5
# CHECK: bb.0:
# CHECK-NEXT: %rip = COPY %rax
# CHECK-NEXT: %rip = COPY %rax
name: nocopyprop5
body: |
bb.0:
%rip = COPY %rax
%rip = COPY %rax
...