Add a peephole optimization to optimize pairs of bitcasts. e.g.

v2 = bitcast v1
...
v3 = bitcast v2
...
   = v3
=>
v2 = bitcast v1
...
   = v1
if v1 and v3 are of in the same register class.

bitcast between i32 and fp (and others) are often not nops since they
are in different register classes. These bitcast instructions are often
left because they are in different basic blocks and cannot be
eliminated by dag combine.

rdar://9104514

llvm-svn: 127668
This commit is contained in:
Evan Cheng 2011-03-15 05:13:13 +00:00
parent 880e299dcd
commit e4b8ac9fef
2 changed files with 120 additions and 4 deletions

View File

@ -30,6 +30,15 @@
// If the "sub" instruction all ready sets (or could be modified to set) the
// same flag that the "cmp" instruction sets and that "bz" uses, then we can
// eliminate the "cmp" instruction.
//
// - Optimize Bitcast pairs:
//
// v1 = bitcast v0
// v2 = bitcast v1
// = v2
// =>
// v1 = bitcast v0
// = v0
//
//===----------------------------------------------------------------------===//
@ -57,7 +66,8 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
cl::desc("Disable the peephole optimizer"));
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumEliminated, "Number of compares eliminated");
STATISTIC(NumBitcasts, "Number of bitcasts eliminated");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate foled");
namespace {
@ -85,6 +95,7 @@ namespace {
}
private:
bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
@ -243,12 +254,85 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
return Changed;
}
/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that
/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
/// a value cross register classes), and the source is defined by another
/// bitcast instruction B. And if the register class of source of B matches
/// the register class of instruction A, then it is legal to replace all uses
/// of the def of A with source of B. e.g.
/// %vreg0<def> = VMOVSR %vreg1
/// %vreg3<def> = VMOVRS %vreg0
/// Replace all uses of vreg3 with vreg1.
bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
MachineBasicBlock *MBB) {
unsigned NumDefs = MI->getDesc().getNumDefs();
unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
if (NumDefs != 1)
return false;
unsigned Def = 0;
unsigned Src = 0;
for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isDef())
Def = Reg;
else if (Src)
// Multiple sources?
return false;
else
Src = Reg;
}
assert(Def && Src && "Malformed bitcast instruction!");
MachineInstr *DefMI = MRI->getVRegDef(Src);
if (!DefMI || !DefMI->getDesc().isBitcast())
return false;
unsigned SrcDef = 0;
unsigned SrcSrc = 0;
NumDefs = DefMI->getDesc().getNumDefs();
NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
if (NumDefs != 1)
return false;
for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
const MachineOperand &MO = DefMI->getOperand(i);
if (!MO.isReg() || MO.isDef())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isDef())
SrcDef = Reg;
else if (SrcSrc)
// Multiple sources?
return false;
else
SrcSrc = Reg;
}
if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
return false;
MRI->replaceRegWith(Def, SrcSrc);
MRI->clearKillFlags(SrcSrc);
MI->eraseFromParent();
++NumBitcasts;
return true;
}
/// OptimizeCmpInstr - If the instruction is a compare and the previous
/// instruction it's comparing against all ready sets (or could be modified to
/// set) the same flag as the compare, then we can remove the comparison and use
/// the flag from the previous instruction.
bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
MachineBasicBlock *MBB){
MachineBasicBlock *MBB) {
// If this instruction is a comparison against zero and isn't comparing a
// physical register, we can try to optimize it.
unsigned SrcReg;
@ -259,7 +343,7 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
// Attempt to optimize the comparison instruction.
if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
++NumEliminated;
++NumCmps;
return true;
}
@ -345,7 +429,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
if (MI->getDesc().isCompare()) {
const TargetInstrDesc &TID = MI->getDesc();
if (TID.isBitcast()) {
if (OptimizeBitcastInstr(MI, MBB)) {
// MI is deleted.
Changed = true;
MII = First ? I->begin() : llvm::next(PMII);
continue;
}
} else if (TID.isCompare()) {
if (OptimizeCmpInstr(MI, MBB)) {
// MI is deleted.
Changed = true;

View File

@ -0,0 +1,23 @@
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
; vmov s0, r0 + vmov r0, s0 should have been optimized away.
; rdar://9104514
define void @t(float %x) nounwind ssp {
entry:
; CHECK: t:
; CHECK-NOT: vmov
; CHECK: bl
%0 = bitcast float %x to i32
%cmp = icmp ult i32 %0, 2139095039
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
tail call void @doSomething(float %x) nounwind
br label %if.end
if.end: ; preds = %if.then, %entry
ret void
}
declare void @doSomething(float)