forked from OSchip/llvm-project
Add a new pass on machine instructions to optimize away PHI cycles that
reduce down to a single value. InstCombine already does this transformation but DAG legalization may introduce new opportunities. This has turned out to be important for ARM where 64-bit values are split up during type legalization: InstCombine is not able to remove the PHI cycles on the 64-bit values but the separate 32-bit values can be optimized. I measured the compile time impact of this (running llc on 176.gcc) and it was not significant. llvm-svn: 95951
This commit is contained in:
parent
d7e9212e2d
commit
0827e040e0
|
@ -174,6 +174,10 @@ namespace llvm {
|
||||||
/// optimization by increasing uses of extended values.
|
/// optimization by increasing uses of extended values.
|
||||||
FunctionPass *createOptimizeExtsPass();
|
FunctionPass *createOptimizeExtsPass();
|
||||||
|
|
||||||
|
/// createOptimizePHIsPass - This pass optimizes machine instruction PHIs
|
||||||
|
/// to take advantage of opportunities created during DAG legalization.
|
||||||
|
FunctionPass *createOptimizePHIsPass();
|
||||||
|
|
||||||
/// createStackSlotColoringPass - This pass performs stack slot coloring.
|
/// createStackSlotColoringPass - This pass performs stack slot coloring.
|
||||||
FunctionPass *createStackSlotColoringPass(bool);
|
FunctionPass *createStackSlotColoringPass(bool);
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,7 @@ add_llvm_library(LLVMCodeGen
|
||||||
ObjectCodeEmitter.cpp
|
ObjectCodeEmitter.cpp
|
||||||
OcamlGC.cpp
|
OcamlGC.cpp
|
||||||
OptimizeExts.cpp
|
OptimizeExts.cpp
|
||||||
|
OptimizePHIs.cpp
|
||||||
PHIElimination.cpp
|
PHIElimination.cpp
|
||||||
Passes.cpp
|
Passes.cpp
|
||||||
PostRASchedulerList.cpp
|
PostRASchedulerList.cpp
|
||||||
|
|
|
@ -299,6 +299,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
|
||||||
|
|
||||||
if (OptLevel != CodeGenOpt::None) {
|
if (OptLevel != CodeGenOpt::None) {
|
||||||
PM.add(createOptimizeExtsPass());
|
PM.add(createOptimizeExtsPass());
|
||||||
|
PM.add(createOptimizePHIsPass());
|
||||||
if (!DisableMachineLICM)
|
if (!DisableMachineLICM)
|
||||||
PM.add(createMachineLICMPass());
|
PM.add(createMachineLICMPass());
|
||||||
if (!DisableMachineSink)
|
if (!DisableMachineSink)
|
||||||
|
|
|
@ -0,0 +1,141 @@
|
||||||
|
//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This pass optimizes machine instruction PHIs to take advantage of
|
||||||
|
// opportunities created during DAG legalization.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#define DEBUG_TYPE "phi-opt"
|
||||||
|
#include "llvm/CodeGen/Passes.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
|
#include "llvm/CodeGen/MachineInstr.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
#include "llvm/Target/TargetInstrInfo.h"
|
||||||
|
#include "llvm/Function.h"
|
||||||
|
#include "llvm/ADT/SmallSet.h"
|
||||||
|
#include "llvm/ADT/Statistic.h"
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
class OptimizePHIs : public MachineFunctionPass {
|
||||||
|
MachineRegisterInfo *MRI;
|
||||||
|
const TargetInstrInfo *TII;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static char ID; // Pass identification
|
||||||
|
OptimizePHIs() : MachineFunctionPass(&ID) {}
|
||||||
|
|
||||||
|
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||||
|
|
||||||
|
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||||
|
AU.setPreservesCFG();
|
||||||
|
MachineFunctionPass::getAnalysisUsage(AU);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool IsSingleValuePHICycle(const MachineInstr *MI, unsigned &SingleValReg,
|
||||||
|
SmallSet<unsigned, 16> &RegsInCycle);
|
||||||
|
bool ReplacePHICycles(MachineBasicBlock &MBB);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
char OptimizePHIs::ID = 0;
|
||||||
|
static RegisterPass<OptimizePHIs>
|
||||||
|
X("opt-phis", "Optimize machine instruction PHIs");
|
||||||
|
|
||||||
|
FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
|
||||||
|
|
||||||
|
bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
|
||||||
|
MRI = &Fn.getRegInfo();
|
||||||
|
TII = Fn.getTarget().getInstrInfo();
|
||||||
|
|
||||||
|
// Find PHI cycles that can be replaced by a single value. InstCombine
|
||||||
|
// does this, but DAG legalization may introduce new opportunities, e.g.,
|
||||||
|
// when i64 values are split up for 32-bit targets.
|
||||||
|
bool Changed = false;
|
||||||
|
for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
|
||||||
|
Changed |= ReplacePHICycles(*I);
|
||||||
|
|
||||||
|
return Changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
|
||||||
|
/// are copies of SingleValReg, possibly via copies through other PHIs. If
|
||||||
|
/// SingleValReg is zero on entry, it is set to the register with the single
|
||||||
|
/// non-copy value. RegsInCycle is a set used to keep track of the PHIs that
|
||||||
|
/// have been scanned.
|
||||||
|
bool OptimizePHIs::IsSingleValuePHICycle(const MachineInstr *MI,
|
||||||
|
unsigned &SingleValReg,
|
||||||
|
SmallSet<unsigned, 16> &RegsInCycle) {
|
||||||
|
assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
|
||||||
|
unsigned DstReg = MI->getOperand(0).getReg();
|
||||||
|
|
||||||
|
// See if we already saw this register.
|
||||||
|
if (!RegsInCycle.insert(DstReg))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// Don't scan crazily complex things.
|
||||||
|
if (RegsInCycle.size() == 16)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Scan the PHI operands.
|
||||||
|
for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
|
||||||
|
unsigned SrcReg = MI->getOperand(i).getReg();
|
||||||
|
if (SrcReg == DstReg)
|
||||||
|
continue;
|
||||||
|
const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
|
||||||
|
|
||||||
|
// Skip over register-to-register moves.
|
||||||
|
unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx;
|
||||||
|
if (SrcMI &&
|
||||||
|
TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) &&
|
||||||
|
SrcSubIdx == 0 && DstSubIdx == 0 &&
|
||||||
|
TargetRegisterInfo::isVirtualRegister(MvSrcReg))
|
||||||
|
SrcMI = MRI->getVRegDef(MvSrcReg);
|
||||||
|
if (!SrcMI)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (SrcMI->isPHI()) {
|
||||||
|
if (!IsSingleValuePHICycle(SrcMI, SingleValReg, RegsInCycle))
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
// Fail if there is more than one non-phi/non-move register.
|
||||||
|
if (SingleValReg != 0)
|
||||||
|
return false;
|
||||||
|
SingleValReg = SrcReg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// ReplacePHICycles - Find PHI cycles that can be replaced by a single
|
||||||
|
/// value and remove them.
|
||||||
|
bool OptimizePHIs::ReplacePHICycles(MachineBasicBlock &MBB) {
|
||||||
|
bool Changed = false;
|
||||||
|
for (MachineBasicBlock::iterator
|
||||||
|
MII = MBB.begin(), E = MBB.end(); MII != E; ) {
|
||||||
|
MachineInstr *MI = &*MII++;
|
||||||
|
if (!MI->isPHI())
|
||||||
|
break;
|
||||||
|
|
||||||
|
unsigned SingleValReg = 0;
|
||||||
|
SmallSet<unsigned, 16> RegsInCycle;
|
||||||
|
if (IsSingleValuePHICycle(MI, SingleValReg, RegsInCycle) &&
|
||||||
|
SingleValReg != 0) {
|
||||||
|
MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
|
||||||
|
MI->eraseFromParent();
|
||||||
|
++NumPHICycles;
|
||||||
|
Changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Changed;
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
|
||||||
|
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
|
||||||
|
|
||||||
|
define arm_apcscc i32 @test(i32 %n) nounwind {
|
||||||
|
; CHECK: test:
|
||||||
|
; CHECK-NOT: mov
|
||||||
|
; CHECK: return
|
||||||
|
entry:
|
||||||
|
%0 = icmp eq i32 %n, 1 ; <i1> [#uses=1]
|
||||||
|
br i1 %0, label %return, label %bb.nph
|
||||||
|
|
||||||
|
bb.nph: ; preds = %entry
|
||||||
|
%tmp = add i32 %n, -1 ; <i32> [#uses=1]
|
||||||
|
br label %bb
|
||||||
|
|
||||||
|
bb: ; preds = %bb.nph, %bb
|
||||||
|
%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=1]
|
||||||
|
%u.05 = phi i64 [ undef, %bb.nph ], [ %ins, %bb ] ; <i64> [#uses=1]
|
||||||
|
%1 = tail call arm_apcscc i32 @f() nounwind ; <i32> [#uses=1]
|
||||||
|
%tmp4 = zext i32 %1 to i64 ; <i64> [#uses=1]
|
||||||
|
%mask = and i64 %u.05, -4294967296 ; <i64> [#uses=1]
|
||||||
|
%ins = or i64 %tmp4, %mask ; <i64> [#uses=2]
|
||||||
|
tail call arm_apcscc void @g(i64 %ins) nounwind
|
||||||
|
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
|
||||||
|
%exitcond = icmp eq i32 %indvar.next, %tmp ; <i1> [#uses=1]
|
||||||
|
br i1 %exitcond, label %return, label %bb
|
||||||
|
|
||||||
|
return: ; preds = %bb, %entry
|
||||||
|
ret i32 undef
|
||||||
|
}
|
||||||
|
|
||||||
|
declare arm_apcscc i32 @f()
|
||||||
|
|
||||||
|
declare arm_apcscc void @g(i64)
|
Loading…
Reference in New Issue