forked from OSchip/llvm-project
Re-submit AMDGPUMachineCFGStructurizer.
Differential Revision: https://reviews.llvm.org/D23209 llvm-svn: 303111
This commit is contained in:
parent
732a6f432e
commit
a06bfe054e
|
@ -50,6 +50,10 @@ FunctionPass *createSIDebuggerInsertNopsPass();
|
|||
FunctionPass *createSIInsertWaitsPass();
|
||||
FunctionPass *createSIInsertWaitcntsPass();
|
||||
FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
|
||||
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
|
||||
|
||||
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
|
||||
extern char &AMDGPUMachineCFGStructurizerID;
|
||||
|
||||
ModulePass *createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM = nullptr);
|
||||
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -118,6 +118,13 @@ static cl::opt<bool> EnableSIInsertWaitcntsPass(
|
|||
cl::desc("Use new waitcnt insertion pass"),
|
||||
cl::init(false));
|
||||
|
||||
// Option to run late CFG structurizer
|
||||
static cl::opt<bool> LateCFGStructurize(
|
||||
"amdgpu-late-structurize",
|
||||
cl::desc("Enable late CFG structurization"),
|
||||
cl::init(false),
|
||||
cl::Hidden);
|
||||
|
||||
extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||
// Register the target
|
||||
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
|
||||
|
@ -702,11 +709,15 @@ bool GCNPassConfig::addPreISel() {
|
|||
// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
|
||||
// regions formed by them.
|
||||
addPass(&AMDGPUUnifyDivergentExitNodesID);
|
||||
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
|
||||
if (!LateCFGStructurize) {
|
||||
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
|
||||
}
|
||||
addPass(createSinkingPass());
|
||||
addPass(createSITypeRewriter());
|
||||
addPass(createAMDGPUAnnotateUniformValues());
|
||||
addPass(createSIAnnotateControlFlowPass());
|
||||
if (!LateCFGStructurize) {
|
||||
addPass(createSIAnnotateControlFlowPass());
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -770,6 +781,9 @@ bool GCNPassConfig::addGlobalInstructionSelect() {
|
|||
#endif
|
||||
|
||||
void GCNPassConfig::addPreRegAlloc() {
|
||||
if (LateCFGStructurize) {
|
||||
addPass(createAMDGPUMachineCFGStructurizerPass());
|
||||
}
|
||||
addPass(createSIWholeQuadModePass());
|
||||
}
|
||||
|
||||
|
|
|
@ -48,6 +48,7 @@ add_llvm_target(AMDGPUCodeGen
|
|||
AMDGPUISelDAGToDAG.cpp
|
||||
AMDGPULowerIntrinsics.cpp
|
||||
AMDGPUMCInstLower.cpp
|
||||
AMDGPUMachineCFGStructurizer.cpp
|
||||
AMDGPUMachineFunction.cpp
|
||||
AMDGPUUnifyMetadata.cpp
|
||||
AMDGPUOpenCLImageTypeLoweringPass.cpp
|
||||
|
|
|
@ -496,6 +496,188 @@ int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
|
|||
return Opcode;
|
||||
}
|
||||
|
||||
void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
const DebugLoc &DL, unsigned DestReg,
|
||||
int64_t Value) const {
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg);
|
||||
if (RegClass == &AMDGPU::SReg_32RegClass ||
|
||||
RegClass == &AMDGPU::SGPR_32RegClass ||
|
||||
RegClass == &AMDGPU::SReg_32_XM0RegClass ||
|
||||
RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
|
||||
.addImm(Value);
|
||||
return;
|
||||
}
|
||||
|
||||
if (RegClass == &AMDGPU::SReg_64RegClass ||
|
||||
RegClass == &AMDGPU::SGPR_64RegClass ||
|
||||
RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
|
||||
.addImm(Value);
|
||||
return;
|
||||
}
|
||||
|
||||
if (RegClass == &AMDGPU::VGPR_32RegClass) {
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
|
||||
.addImm(Value);
|
||||
return;
|
||||
}
|
||||
if (RegClass == &AMDGPU::VReg_64RegClass) {
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
|
||||
.addImm(Value);
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned EltSize = 4;
|
||||
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
|
||||
if (RI.isSGPRClass(RegClass)) {
|
||||
if (RI.getRegSizeInBits(*RegClass) > 32) {
|
||||
Opcode = AMDGPU::S_MOV_B64;
|
||||
EltSize = 8;
|
||||
} else {
|
||||
Opcode = AMDGPU::S_MOV_B32;
|
||||
EltSize = 4;
|
||||
}
|
||||
}
|
||||
|
||||
ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RegClass, EltSize);
|
||||
for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
|
||||
int64_t IdxValue = Idx == 0 ? Value : 0;
|
||||
|
||||
MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
|
||||
get(Opcode), RI.getSubReg(DestReg, Idx));
|
||||
Builder.addImm(IdxValue);
|
||||
}
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const {
|
||||
return &AMDGPU::VGPR_32RegClass;
|
||||
}
|
||||
|
||||
void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
const DebugLoc &DL, unsigned DstReg,
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
unsigned TrueReg,
|
||||
unsigned FalseReg) const {
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
const TargetRegisterClass *RegClass = MRI.getRegClass(DstReg);
|
||||
assert(RegClass == &AMDGPU::VGPR_32RegClass && "Not a VGPR32 reg");
|
||||
|
||||
if (Cond.size() == 1) {
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
|
||||
.addReg(FalseReg)
|
||||
.addReg(TrueReg)
|
||||
.add(Cond[0]);
|
||||
} else if (Cond.size() == 2) {
|
||||
assert(Cond[0].isImm() && "Cond[0] is not an immediate");
|
||||
switch (Cond[0].getImm()) {
|
||||
case SIInstrInfo::SCC_TRUE: {
|
||||
unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
|
||||
.addImm(-1)
|
||||
.addImm(0);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
|
||||
.addReg(FalseReg)
|
||||
.addReg(TrueReg)
|
||||
.addReg(SReg);
|
||||
break;
|
||||
}
|
||||
case SIInstrInfo::SCC_FALSE: {
|
||||
unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
|
||||
.addImm(0)
|
||||
.addImm(-1);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
|
||||
.addReg(FalseReg)
|
||||
.addReg(TrueReg)
|
||||
.addReg(SReg);
|
||||
break;
|
||||
}
|
||||
case SIInstrInfo::VCCNZ: {
|
||||
MachineOperand RegOp = Cond[1];
|
||||
RegOp.setImplicit(false);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
|
||||
.addReg(FalseReg)
|
||||
.addReg(TrueReg)
|
||||
.add(RegOp);
|
||||
break;
|
||||
}
|
||||
case SIInstrInfo::VCCZ: {
|
||||
MachineOperand RegOp = Cond[1];
|
||||
RegOp.setImplicit(false);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
|
||||
.addReg(TrueReg)
|
||||
.addReg(FalseReg)
|
||||
.add(RegOp);
|
||||
break;
|
||||
}
|
||||
case SIInstrInfo::EXECNZ: {
|
||||
unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
|
||||
.addImm(0);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
|
||||
.addImm(-1)
|
||||
.addImm(0);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
|
||||
.addReg(FalseReg)
|
||||
.addReg(TrueReg)
|
||||
.addReg(SReg);
|
||||
break;
|
||||
}
|
||||
case SIInstrInfo::EXECZ: {
|
||||
unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
|
||||
.addImm(0);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
|
||||
.addImm(0)
|
||||
.addImm(-1);
|
||||
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
|
||||
.addReg(FalseReg)
|
||||
.addReg(TrueReg)
|
||||
.addReg(SReg);
|
||||
llvm_unreachable("Unhandled branch predicate EXECZ");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("invalid branch predicate");
|
||||
}
|
||||
} else {
|
||||
llvm_unreachable("Can only handle Cond size 1 or 2");
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
const DebugLoc &DL,
|
||||
unsigned SrcReg, int Value) const {
|
||||
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
|
||||
.addImm(Value)
|
||||
.addReg(SrcReg);
|
||||
|
||||
return Reg;
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
const DebugLoc &DL,
|
||||
unsigned SrcReg, int Value) const {
|
||||
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
|
||||
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
|
||||
.addImm(Value)
|
||||
.addReg(SrcReg);
|
||||
|
||||
return Reg;
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
|
||||
|
||||
if (RI.getRegSizeInBits(*DstRC) == 32) {
|
||||
|
@ -834,6 +1016,20 @@ void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
|||
insertWaitStates(MBB, MI, 1);
|
||||
}
|
||||
|
||||
void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
|
||||
auto MF = MBB.getParent();
|
||||
SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
assert(Info->isEntryFunction());
|
||||
|
||||
if (MBB.succ_empty()) {
|
||||
bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
|
||||
if (HasNoTerminator)
|
||||
BuildMI(MBB, MBB.end(), DebugLoc(),
|
||||
get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
default: return 1; // FIXME: Do wait states equal cycles?
|
||||
|
@ -1241,14 +1437,20 @@ bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB,
|
|||
return false;
|
||||
}
|
||||
|
||||
BranchPredicate Pred = getBranchPredicate(I->getOpcode());
|
||||
if (Pred == INVALID_BR)
|
||||
return true;
|
||||
MachineBasicBlock *CondBB = nullptr;
|
||||
|
||||
MachineBasicBlock *CondBB = I->getOperand(0).getMBB();
|
||||
Cond.push_back(MachineOperand::CreateImm(Pred));
|
||||
Cond.push_back(I->getOperand(1)); // Save the branch register.
|
||||
if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
|
||||
CondBB = I->getOperand(1).getMBB();
|
||||
Cond.push_back(I->getOperand(0));
|
||||
} else {
|
||||
BranchPredicate Pred = getBranchPredicate(I->getOpcode());
|
||||
if (Pred == INVALID_BR)
|
||||
return true;
|
||||
|
||||
CondBB = I->getOperand(0).getMBB();
|
||||
Cond.push_back(MachineOperand::CreateImm(Pred));
|
||||
Cond.push_back(I->getOperand(1)); // Save the branch register.
|
||||
}
|
||||
++I;
|
||||
|
||||
if (I == MBB.end()) {
|
||||
|
@ -1351,6 +1553,13 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
|
|||
return 1;
|
||||
}
|
||||
|
||||
if(Cond.size() == 1 && Cond[0].isReg()) {
|
||||
BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO))
|
||||
.add(Cond[0])
|
||||
.addMBB(TBB);
|
||||
return 1;
|
||||
}
|
||||
|
||||
assert(TBB && Cond[0].isImm());
|
||||
|
||||
unsigned Opcode
|
||||
|
@ -1390,9 +1599,16 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
|
|||
|
||||
bool SIInstrInfo::reverseBranchCondition(
|
||||
SmallVectorImpl<MachineOperand> &Cond) const {
|
||||
assert(Cond.size() == 2);
|
||||
Cond[0].setImm(-Cond[0].getImm());
|
||||
return false;
|
||||
if (Cond.size() != 2) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Cond[0].isImm()) {
|
||||
Cond[0].setImm(-Cond[0].getImm());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
|
||||
|
@ -3920,6 +4136,82 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const {
|
||||
return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO;
|
||||
}
|
||||
|
||||
void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
|
||||
MachineBasicBlock *IfEnd) const {
|
||||
MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator();
|
||||
assert(TI != IfEntry->end());
|
||||
|
||||
MachineInstr *Branch = &(*TI);
|
||||
MachineFunction *MF = IfEntry->getParent();
|
||||
MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
|
||||
|
||||
if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
|
||||
unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
MachineInstr *SIIF =
|
||||
BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
|
||||
.add(Branch->getOperand(0))
|
||||
.add(Branch->getOperand(1));
|
||||
MachineInstr *SIEND =
|
||||
BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF))
|
||||
.addReg(DstReg);
|
||||
|
||||
IfEntry->erase(TI);
|
||||
IfEntry->insert(IfEntry->end(), SIIF);
|
||||
IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND);
|
||||
}
|
||||
}
|
||||
|
||||
void SIInstrInfo::convertNonUniformLoopRegion(
|
||||
MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const {
|
||||
MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator();
|
||||
// We expect 2 terminators, one conditional and one unconditional.
|
||||
assert(TI != LoopEnd->end());
|
||||
|
||||
MachineInstr *Branch = &(*TI);
|
||||
MachineFunction *MF = LoopEnd->getParent();
|
||||
MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo();
|
||||
|
||||
if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
|
||||
|
||||
unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
MachineInstrBuilder HeaderPHIBuilder =
|
||||
BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
|
||||
for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
|
||||
E = LoopEntry->pred_end();
|
||||
PI != E; ++PI) {
|
||||
if (*PI == LoopEnd) {
|
||||
HeaderPHIBuilder.addReg(BackEdgeReg);
|
||||
} else {
|
||||
MachineBasicBlock *PMBB = *PI;
|
||||
unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
|
||||
ZeroReg, 0);
|
||||
HeaderPHIBuilder.addReg(ZeroReg);
|
||||
}
|
||||
HeaderPHIBuilder.addMBB(*PI);
|
||||
}
|
||||
MachineInstr *HeaderPhi = HeaderPHIBuilder;
|
||||
MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(),
|
||||
get(AMDGPU::SI_IF_BREAK), BackEdgeReg)
|
||||
.addReg(DstReg)
|
||||
.add(Branch->getOperand(0));
|
||||
MachineInstr *SILOOP =
|
||||
BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP))
|
||||
.addReg(BackEdgeReg)
|
||||
.addMBB(LoopEntry);
|
||||
|
||||
LoopEntry->insert(LoopEntry->begin(), HeaderPhi);
|
||||
LoopEnd->erase(TI);
|
||||
LoopEnd->insert(LoopEnd->end(), SIIFBREAK);
|
||||
LoopEnd->insert(LoopEnd->end(), SILOOP);
|
||||
}
|
||||
}
|
||||
|
||||
ArrayRef<std::pair<int, const char *>>
|
||||
SIInstrInfo::getSerializableTargetIndices() const {
|
||||
static const std::pair<int, const char *> TargetIndices[] = {
|
||||
|
|
|
@ -143,6 +143,23 @@ public:
|
|||
RegScavenger *RS, unsigned TmpReg,
|
||||
unsigned Offset, unsigned Size) const;
|
||||
|
||||
void materializeImmediate(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
const DebugLoc &DL,
|
||||
unsigned DestReg,
|
||||
int64_t Value) const;
|
||||
|
||||
const TargetRegisterClass *getPreferredSelectRegClass(
|
||||
unsigned Size) const;
|
||||
|
||||
unsigned insertNE(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I, const DebugLoc &DL,
|
||||
unsigned SrcReg, int Value) const;
|
||||
|
||||
unsigned insertEQ(MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::iterator I, const DebugLoc &DL,
|
||||
unsigned SrcReg, int Value) const;
|
||||
|
||||
void storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, unsigned SrcReg,
|
||||
bool isKill, int FrameIndex,
|
||||
|
@ -193,7 +210,7 @@ public:
|
|||
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify) const override;
|
||||
bool AllowModify = false) const override;
|
||||
|
||||
unsigned removeBranch(MachineBasicBlock &MBB,
|
||||
int *BytesRemoved = nullptr) const override;
|
||||
|
@ -218,6 +235,11 @@ public:
|
|||
unsigned DstReg, ArrayRef<MachineOperand> Cond,
|
||||
unsigned TrueReg, unsigned FalseReg) const override;
|
||||
|
||||
void insertVectorSelect(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, const DebugLoc &DL,
|
||||
unsigned DstReg, ArrayRef<MachineOperand> Cond,
|
||||
unsigned TrueReg, unsigned FalseReg) const;
|
||||
|
||||
bool
|
||||
areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
|
||||
AliasAnalysis *AA = nullptr) const override;
|
||||
|
@ -705,6 +727,7 @@ public:
|
|||
void insertNoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI) const override;
|
||||
|
||||
void insertReturn(MachineBasicBlock &MBB) const;
|
||||
/// \brief Return the number of wait states that result from executing this
|
||||
/// instruction.
|
||||
unsigned getNumWaitStates(const MachineInstr &MI) const;
|
||||
|
@ -750,6 +773,14 @@ public:
|
|||
|
||||
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
|
||||
|
||||
bool isNonUniformBranchInstr(MachineInstr &Instr) const;
|
||||
|
||||
void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
|
||||
MachineBasicBlock *IfEnd) const;
|
||||
|
||||
void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
|
||||
MachineBasicBlock *LoopEnd) const;
|
||||
|
||||
ArrayRef<std::pair<int, const char *>>
|
||||
getSerializableTargetIndices() const override;
|
||||
|
||||
|
|
|
@ -174,6 +174,13 @@ def SI_MASK_BRANCH : VPseudoInstSI <
|
|||
|
||||
let isTerminator = 1 in {
|
||||
|
||||
def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI <
|
||||
(outs),
|
||||
(ins SReg_64:$vcc, brtarget:$target),
|
||||
[(brcond i1:$vcc, bb:$target)]> {
|
||||
let Size = 12;
|
||||
}
|
||||
|
||||
def SI_IF: CFPseudoInstSI <
|
||||
(outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
|
||||
[(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
|
||||
|
|
Loading…
Reference in New Issue