forked from OSchip/llvm-project
parent
e3a182c052
commit
7ec38530a5
|
@ -12,8 +12,7 @@
|
|||
// it then removes.
|
||||
//
|
||||
// Note that this pass must be run after register allocation, it cannot handle
|
||||
// SSA form. It also must handle virtual registers for targets that emit virtual
|
||||
// ISA (e.g. NVPTX).
|
||||
// SSA form.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
@ -1574,17 +1573,6 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
template <class Container>
|
||||
static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI,
|
||||
Container &Set) {
|
||||
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
|
||||
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
|
||||
Set.insert(*AI);
|
||||
} else {
|
||||
Set.insert(Reg);
|
||||
}
|
||||
}
|
||||
|
||||
/// findHoistingInsertPosAndDeps - Find the location to move common instructions
|
||||
/// in successors to. The location is usually just before the terminator,
|
||||
/// however if the terminator is a conditional branch and its previous
|
||||
|
@ -1610,7 +1598,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
|
|||
if (!Reg)
|
||||
continue;
|
||||
if (MO.isUse()) {
|
||||
addRegAndItsAliases(Reg, TRI, Uses);
|
||||
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
|
||||
Uses.insert(*AI);
|
||||
} else {
|
||||
if (!MO.isDead())
|
||||
// Don't try to hoist code in the rare case the terminator defines a
|
||||
|
@ -1619,7 +1608,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
|
|||
|
||||
// If the terminator defines a register, make sure we don't hoist
|
||||
// the instruction whose def might be clobbered by the terminator.
|
||||
addRegAndItsAliases(Reg, TRI, Defs);
|
||||
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
|
||||
Defs.insert(*AI);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1675,15 +1665,15 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
|
|||
if (!Reg)
|
||||
continue;
|
||||
if (MO.isUse()) {
|
||||
addRegAndItsAliases(Reg, TRI, Uses);
|
||||
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
|
||||
Uses.insert(*AI);
|
||||
} else {
|
||||
if (Uses.erase(Reg)) {
|
||||
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
|
||||
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
|
||||
Uses.erase(*SubRegs); // Use sub-registers to be conservative
|
||||
}
|
||||
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
|
||||
Uses.erase(*SubRegs); // Use sub-registers to be conservative
|
||||
}
|
||||
addRegAndItsAliases(Reg, TRI, Defs);
|
||||
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
|
||||
Defs.insert(*AI);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1810,12 +1800,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
|
|||
unsigned Reg = MO.getReg();
|
||||
if (!Reg || !LocalDefsSet.count(Reg))
|
||||
continue;
|
||||
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
|
||||
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
|
||||
LocalDefsSet.erase(*AI);
|
||||
} else {
|
||||
LocalDefsSet.erase(Reg);
|
||||
}
|
||||
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
|
||||
LocalDefsSet.erase(*AI);
|
||||
}
|
||||
|
||||
// Track local defs so we can update liveins.
|
||||
|
@ -1827,7 +1813,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
|
|||
if (!Reg)
|
||||
continue;
|
||||
LocalDefs.push_back(Reg);
|
||||
addRegAndItsAliases(Reg, TRI, LocalDefsSet);
|
||||
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
|
||||
LocalDefsSet.insert(*AI);
|
||||
}
|
||||
|
||||
HasDups = true;
|
||||
|
|
|
@ -174,6 +174,7 @@ void NVPTXPassConfig::addIRPasses() {
|
|||
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
|
||||
disablePass(&PrologEpilogCodeInserterID);
|
||||
disablePass(&MachineCopyPropagationID);
|
||||
disablePass(&BranchFolderPassID);
|
||||
disablePass(&TailDuplicateID);
|
||||
|
||||
addPass(createNVPTXImageOptimizerPass());
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp | FileCheck %s
|
||||
; Disable CGP which also folds branches, so that only BranchFolding is under
|
||||
; the spotlight.
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
define void @foo(i32 %x, float* %output) {
|
||||
; CHECK-LABEL: .visible .func foo(
|
||||
; CHECK-NOT: bra.uni
|
||||
; CHECK-NOT: LBB0_
|
||||
%1 = icmp eq i32 %x, 1
|
||||
br i1 %1, label %then, label %else
|
||||
|
||||
then:
|
||||
br label %merge
|
||||
|
||||
else:
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
store float 2.0, float* %output
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue