forked from OSchip/llvm-project
AMDGPU: Remove SIFixSGPRLiveRanges pass
Summary: This pass is unnecessary and overly conservative. It was motivated by situations like def %vreg0:SGPR_32 ... if-block: .. def %vreg1:SGPR_32 ... else-block: ... use %vreg0:SGPR_32 ... and similar situations with uses after the non-uniform control flow, where we are not allowed to assign %vreg0 and %vreg1 to the same physical register, even though in the original, thread/workitem-based CFG, it looks like the live ranges of these registers do not overlap. However, by the time register allocation runs, we have moved to a wave-based CFG that accurately represents the fact that the wave may run through both the if- and the else-block. So the live ranges of %vreg0 and %vreg1 already overlap even without the SIFixSGPRLiveRanges pass. In addition to proving this change correct, I have tested it with Piglit and a small number of other tests. Reviewers: arsenm, tstellarAMD Subscribers: MatzeB, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D19041 llvm-svn: 266345
This commit is contained in:
parent
19f0f5177d
commit
723b73b4eb
|
@ -48,7 +48,6 @@ FunctionPass *createSIWholeQuadModePass();
|
||||||
FunctionPass *createSILowerControlFlowPass();
|
FunctionPass *createSILowerControlFlowPass();
|
||||||
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
|
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
|
||||||
FunctionPass *createSIFixSGPRCopiesPass();
|
FunctionPass *createSIFixSGPRCopiesPass();
|
||||||
FunctionPass *createSIFixSGPRLiveRangesPass();
|
|
||||||
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
||||||
FunctionPass *createSIInsertNopsPass();
|
FunctionPass *createSIInsertNopsPass();
|
||||||
FunctionPass *createSIInsertWaitsPass();
|
FunctionPass *createSIInsertWaitsPass();
|
||||||
|
@ -93,9 +92,6 @@ FunctionPass *createAMDGPUAnnotateUniformValues();
|
||||||
void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
|
void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
|
||||||
extern char &SIFixControlFlowLiveIntervalsID;
|
extern char &SIFixControlFlowLiveIntervalsID;
|
||||||
|
|
||||||
void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
|
|
||||||
extern char &SIFixSGPRLiveRangesID;
|
|
||||||
|
|
||||||
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
|
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
|
||||||
extern char &AMDGPUAnnotateUniformValuesPassID;
|
extern char &AMDGPUAnnotateUniformValuesPassID;
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,6 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||||
initializeSILowerI1CopiesPass(*PR);
|
initializeSILowerI1CopiesPass(*PR);
|
||||||
initializeSIFixSGPRCopiesPass(*PR);
|
initializeSIFixSGPRCopiesPass(*PR);
|
||||||
initializeSIFoldOperandsPass(*PR);
|
initializeSIFoldOperandsPass(*PR);
|
||||||
initializeSIFixSGPRLiveRangesPass(*PR);
|
|
||||||
initializeSIFixControlFlowLiveIntervalsPass(*PR);
|
initializeSIFixControlFlowLiveIntervalsPass(*PR);
|
||||||
initializeSILoadStoreOptimizerPass(*PR);
|
initializeSILoadStoreOptimizerPass(*PR);
|
||||||
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
|
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
|
||||||
|
@ -351,16 +350,10 @@ void GCNPassConfig::addPreRegAlloc() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
|
void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
|
||||||
addPass(&SIFixSGPRLiveRangesID);
|
|
||||||
TargetPassConfig::addFastRegAlloc(RegAllocPass);
|
TargetPassConfig::addFastRegAlloc(RegAllocPass);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
|
void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
|
||||||
// We want to run this after LiveVariables is computed to avoid computing them
|
|
||||||
// twice.
|
|
||||||
// FIXME: We shouldn't disable the verifier here. r249087 introduced a failure
|
|
||||||
// that needs to be fixed.
|
|
||||||
insertPass(&LiveVariablesID, &SIFixSGPRLiveRangesID, /*VerifyAfter=*/false);
|
|
||||||
TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
|
TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,6 @@ add_llvm_target(AMDGPUCodeGen
|
||||||
SIAnnotateControlFlow.cpp
|
SIAnnotateControlFlow.cpp
|
||||||
SIFixControlFlowLiveIntervals.cpp
|
SIFixControlFlowLiveIntervals.cpp
|
||||||
SIFixSGPRCopies.cpp
|
SIFixSGPRCopies.cpp
|
||||||
SIFixSGPRLiveRanges.cpp
|
|
||||||
SIFoldOperands.cpp
|
SIFoldOperands.cpp
|
||||||
SIFrameLowering.cpp
|
SIFrameLowering.cpp
|
||||||
SIInsertNopsPass.cpp
|
SIInsertNopsPass.cpp
|
||||||
|
|
|
@ -1,230 +0,0 @@
|
||||||
//===-- SIFixSGPRLiveRanges.cpp - Fix SGPR live ranges ----------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
/// \file SALU instructions ignore the execution mask, so we need to modify the
|
|
||||||
/// live ranges of the registers they define in some cases.
|
|
||||||
///
|
|
||||||
/// The main case we need to handle is when a def is used in one side of a
|
|
||||||
/// branch and not another. For example:
|
|
||||||
///
|
|
||||||
/// %def
|
|
||||||
/// IF
|
|
||||||
/// ...
|
|
||||||
/// ...
|
|
||||||
/// ELSE
|
|
||||||
/// %use
|
|
||||||
/// ...
|
|
||||||
/// ENDIF
|
|
||||||
///
|
|
||||||
/// Here we need the register allocator to avoid assigning any of the defs
|
|
||||||
/// inside of the IF to the same register as %def. In traditional live
|
|
||||||
/// interval analysis %def is not live inside the IF branch, however, since
|
|
||||||
/// SALU instructions inside of IF will be executed even if the branch is not
|
|
||||||
/// taken, there is the chance that one of the instructions will overwrite the
|
|
||||||
/// value of %def, so the use in ELSE will see the wrong value.
|
|
||||||
///
|
|
||||||
/// The strategy we use for solving this is to add an extra use after the ENDIF:
|
|
||||||
///
|
|
||||||
/// %def
|
|
||||||
/// IF
|
|
||||||
/// ...
|
|
||||||
/// ...
|
|
||||||
/// ELSE
|
|
||||||
/// %use
|
|
||||||
/// ...
|
|
||||||
/// ENDIF
|
|
||||||
/// %use
|
|
||||||
///
|
|
||||||
/// Adding this use will make the def live throughout the IF branch, which is
|
|
||||||
/// what we want.
|
|
||||||
|
|
||||||
#include "AMDGPU.h"
|
|
||||||
#include "SIInstrInfo.h"
|
|
||||||
#include "SIRegisterInfo.h"
|
|
||||||
#include "llvm/ADT/DepthFirstIterator.h"
|
|
||||||
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
|
|
||||||
#include "llvm/CodeGen/LiveVariables.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
||||||
#include "llvm/CodeGen/MachinePostDominators.h"
|
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
||||||
#include "llvm/Support/Debug.h"
|
|
||||||
#include "llvm/Support/raw_ostream.h"
|
|
||||||
#include "llvm/Target/TargetMachine.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
#define DEBUG_TYPE "si-fix-sgpr-live-ranges"
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
class SIFixSGPRLiveRanges : public MachineFunctionPass {
|
|
||||||
public:
|
|
||||||
static char ID;
|
|
||||||
|
|
||||||
public:
|
|
||||||
SIFixSGPRLiveRanges() : MachineFunctionPass(ID) {
|
|
||||||
initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
||||||
|
|
||||||
const char *getPassName() const override {
|
|
||||||
return "SI Fix SGPR live ranges";
|
|
||||||
}
|
|
||||||
|
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
||||||
AU.addRequired<LiveVariables>();
|
|
||||||
AU.addPreserved<LiveVariables>();
|
|
||||||
|
|
||||||
AU.addRequired<MachinePostDominatorTree>();
|
|
||||||
AU.addPreserved<MachinePostDominatorTree>();
|
|
||||||
AU.setPreservesCFG();
|
|
||||||
|
|
||||||
MachineFunctionPass::getAnalysisUsage(AU);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End anonymous namespace.
|
|
||||||
|
|
||||||
INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE,
|
|
||||||
"SI Fix SGPR Live Ranges", false, false)
|
|
||||||
INITIALIZE_PASS_DEPENDENCY(LiveVariables)
|
|
||||||
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
|
|
||||||
INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE,
|
|
||||||
"SI Fix SGPR Live Ranges", false, false)
|
|
||||||
|
|
||||||
char SIFixSGPRLiveRanges::ID = 0;
|
|
||||||
|
|
||||||
char &llvm::SIFixSGPRLiveRangesID = SIFixSGPRLiveRanges::ID;
|
|
||||||
|
|
||||||
FunctionPass *llvm::createSIFixSGPRLiveRangesPass() {
|
|
||||||
return new SIFixSGPRLiveRanges();
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool hasOnlyScalarBr(const MachineBasicBlock *MBB,
|
|
||||||
const SIInstrInfo *TII) {
|
|
||||||
for (MachineBasicBlock::const_iterator I = MBB->getFirstTerminator(),
|
|
||||||
E = MBB->end(); I != E; ++I) {
|
|
||||||
if (!TII->isSOPP(*I))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
|
|
||||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
||||||
const SIInstrInfo *TII =
|
|
||||||
static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
|
||||||
const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
|
|
||||||
MF.getSubtarget().getRegisterInfo());
|
|
||||||
bool MadeChange = false;
|
|
||||||
|
|
||||||
MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
|
|
||||||
SmallVector<unsigned, 16> SGPRLiveRanges;
|
|
||||||
|
|
||||||
LiveVariables *LV = &getAnalysis<LiveVariables>();
|
|
||||||
MachineBasicBlock *Entry = &MF.front();
|
|
||||||
|
|
||||||
// Use a depth first order so that in SSA, we encounter all defs before
|
|
||||||
// uses. Once the defs of the block have been found, attempt to insert
|
|
||||||
// SGPR_USE instructions in successor blocks if required.
|
|
||||||
for (MachineBasicBlock *MBB : depth_first(Entry)) {
|
|
||||||
for (const MachineInstr &MI : *MBB) {
|
|
||||||
for (const MachineOperand &MO : MI.defs()) {
|
|
||||||
// We should never see a live out def of a physical register, so we also
|
|
||||||
// do not need to worry about implicit_defs().
|
|
||||||
unsigned Def = MO.getReg();
|
|
||||||
if (TargetRegisterInfo::isVirtualRegister(Def)) {
|
|
||||||
if (TRI->isSGPRClass(MRI.getRegClass(Def))) {
|
|
||||||
// Only consider defs that are live outs. We don't care about def /
|
|
||||||
// use within the same block.
|
|
||||||
|
|
||||||
// LiveVariables does not consider registers that are only used in a
|
|
||||||
// phi in a sucessor block as live out, unlike LiveIntervals.
|
|
||||||
//
|
|
||||||
// This is OK because SIFixSGPRCopies replaced any SGPR phis with
|
|
||||||
// VGPRs.
|
|
||||||
if (LV->isLiveOut(Def, *MBB))
|
|
||||||
SGPRLiveRanges.push_back(Def);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (MBB->succ_size() < 2 || hasOnlyScalarBr(MBB, TII))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// We have structured control flow, so the number of successors should be
|
|
||||||
// two.
|
|
||||||
assert(MBB->succ_size() == 2);
|
|
||||||
MachineBasicBlock *SuccA = *MBB->succ_begin();
|
|
||||||
MachineBasicBlock *SuccB = *(++MBB->succ_begin());
|
|
||||||
MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);
|
|
||||||
|
|
||||||
if (!NCD)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
MachineBasicBlock::iterator NCDTerm = NCD->getFirstTerminator();
|
|
||||||
|
|
||||||
if (NCDTerm != NCD->end() && NCDTerm->getOpcode() == AMDGPU::SI_ELSE) {
|
|
||||||
assert(NCD->succ_size() == 2);
|
|
||||||
// We want to make sure we insert the Use after the ENDIF, not after
|
|
||||||
// the ELSE.
|
|
||||||
NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
|
|
||||||
*(++NCD->succ_begin()));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned Reg : SGPRLiveRanges) {
|
|
||||||
// FIXME: We could be smarter here. If the register is Live-In to one
|
|
||||||
// block, but the other doesn't have any SGPR defs, then there won't be a
|
|
||||||
// conflict. Also, if the branch condition is uniform then there will be
|
|
||||||
// no conflict.
|
|
||||||
bool LiveInToA = LV->isLiveIn(Reg, *SuccA);
|
|
||||||
bool LiveInToB = LV->isLiveIn(Reg, *SuccB);
|
|
||||||
|
|
||||||
if (!LiveInToA && !LiveInToB) {
|
|
||||||
DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
|
|
||||||
<< " is live into neither successor\n");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (LiveInToA && LiveInToB) {
|
|
||||||
DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
|
|
||||||
<< " is live into both successors\n");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This interval is live in to one successor, but not the other, so
|
|
||||||
// we need to update its range so it is live in to both.
|
|
||||||
DEBUG(dbgs() << "Possible SGPR conflict detected for "
|
|
||||||
<< PrintReg(Reg, TRI, 0)
|
|
||||||
<< " BB#" << SuccA->getNumber()
|
|
||||||
<< ", BB#" << SuccB->getNumber()
|
|
||||||
<< " with NCD = BB#" << NCD->getNumber() << '\n');
|
|
||||||
|
|
||||||
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
|
|
||||||
"Not expecting to extend live range of physreg");
|
|
||||||
|
|
||||||
// FIXME: Need to figure out how to update LiveRange here so this pass
|
|
||||||
// will be able to preserve LiveInterval analysis.
|
|
||||||
MachineInstr *NCDSGPRUse =
|
|
||||||
BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
|
|
||||||
TII->get(AMDGPU::SGPR_USE))
|
|
||||||
.addReg(Reg, RegState::Implicit);
|
|
||||||
|
|
||||||
MadeChange = true;
|
|
||||||
LV->HandleVirtRegUse(Reg, NCD, NCDSGPRUse);
|
|
||||||
|
|
||||||
DEBUG(NCDSGPRUse->dump());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return MadeChange;
|
|
||||||
}
|
|
|
@ -12,7 +12,7 @@ target triple="amdgcn--"
|
||||||
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||||
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
|
||||||
; BB0_1:
|
; BB0_1:
|
||||||
; CHECK: s_load_dword s6, s[0:1], 0xa
|
; CHECK: s_load_dword s0, s[0:1], 0xa
|
||||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
; BB0_2:
|
; BB0_2:
|
||||||
; CHECK: s_or_b64 exec, exec, s[2:3]
|
; CHECK: s_or_b64 exec, exec, s[2:3]
|
||||||
|
|
Loading…
Reference in New Issue