forked from OSchip/llvm-project
Revert "[AMDGPU] SIFixSGPRCopies refactoring"
Breaks ASan tests.
This reverts commit 3f8ae7efa8
.
This commit is contained in:
parent
23ace05e0a
commit
8ea1cf3111
|
@ -120,10 +120,6 @@ public:
|
|||
|
||||
class SIFixSGPRCopies : public MachineFunctionPass {
|
||||
MachineDominatorTree *MDT;
|
||||
SmallVector<MachineInstr*, 4> SCCCopies;
|
||||
SmallVector<MachineInstr*, 4> RegSequences;
|
||||
SmallVector<MachineInstr*, 4> PHINodes;
|
||||
SmallVector<MachineInstr*, 4> S2VCopies;
|
||||
unsigned NextVGPRToSGPRCopyID;
|
||||
DenseMap<unsigned, V2SCopyInfo> V2SCopies;
|
||||
DenseMap<MachineInstr *, SetVector<unsigned>> SiblingPenalty;
|
||||
|
@ -138,11 +134,8 @@ public:
|
|||
SIFixSGPRCopies() : MachineFunctionPass(ID), NextVGPRToSGPRCopyID(0) {}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
void fixSCCCopies(bool IsWave32);
|
||||
void prepareRegSequenceAndPHIs(MachineFunction &MF);
|
||||
unsigned getNextVGPRToSGPRCopyId() { return ++NextVGPRToSGPRCopyID; }
|
||||
bool needToBeConvertedToVALU(V2SCopyInfo *I);
|
||||
void analyzeVGPRToSGPRCopy(MachineInstr *MI);
|
||||
void analyzeVGPRToSGPRCopy(V2SCopyInfo& Info);
|
||||
void lowerVGPR2SGPRCopies(MachineFunction &MF);
|
||||
// Handles copies which source register is:
|
||||
// 1. Physical register
|
||||
|
@ -178,6 +171,19 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass() {
|
|||
return new SIFixSGPRCopies();
|
||||
}
|
||||
|
||||
static bool hasVectorOperands(const MachineInstr &MI,
|
||||
const SIRegisterInfo *TRI) {
|
||||
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
|
||||
for (const MachineOperand &MO : MI.operands()) {
|
||||
if (!MO.isReg() || !MO.getReg().isVirtual())
|
||||
continue;
|
||||
|
||||
if (TRI->hasVectorRegisters(MRI.getRegClass(MO.getReg())))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
|
||||
getCopyRegClasses(const MachineInstr &Copy,
|
||||
const SIRegisterInfo &TRI,
|
||||
|
@ -610,6 +616,14 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
|
|||
TII = ST.getInstrInfo();
|
||||
MDT = &getAnalysis<MachineDominatorTree>();
|
||||
|
||||
// We have to lower VGPR to SGPR copies before the main loop
|
||||
// because the REG_SEQUENCE and PHI lowering in main loop
|
||||
// convert the def-use chains to VALU and close the opportunities
|
||||
// for keeping them scalar.
|
||||
// TODO: REG_SEQENCE and PHIs are semantically copies. The next patch
|
||||
// addresses their lowering and unify the processing in one main loop.
|
||||
lowerVGPR2SGPRCopies(MF);
|
||||
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; ++BI) {
|
||||
MachineBasicBlock *MBB = &*BI;
|
||||
|
@ -625,66 +639,100 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
|
|||
case AMDGPU::STRICT_WQM:
|
||||
case AMDGPU::SOFT_WQM:
|
||||
case AMDGPU::STRICT_WWM: {
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
const TargetRegisterClass *SrcRC, *DstRC;
|
||||
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
|
||||
|
||||
if (MI.isCopy() && (SrcReg == AMDGPU::SCC || DstReg == AMDGPU::SCC))
|
||||
SCCCopies.push_back(&MI);
|
||||
|
||||
if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
|
||||
// Since VGPR to SGPR copies affect VGPR to SGPR copy
|
||||
// score and, hence the lowering decision, let's try to get rid of
|
||||
// them as early as possible
|
||||
if (tryChangeVGPRtoSGPRinCopy(MI, TRI, TII))
|
||||
if (MI.isCopy()) {
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
if (SrcReg == AMDGPU::SCC) {
|
||||
Register SCCCopy = MRI->createVirtualRegister(
|
||||
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
|
||||
I = BuildMI(*MI.getParent(),
|
||||
std::next(MachineBasicBlock::iterator(MI)),
|
||||
MI.getDebugLoc(),
|
||||
TII->get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
|
||||
: AMDGPU::S_CSELECT_B64),
|
||||
SCCCopy)
|
||||
.addImm(-1)
|
||||
.addImm(0);
|
||||
I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(),
|
||||
TII->get(AMDGPU::COPY), DstReg)
|
||||
.addReg(SCCCopy);
|
||||
MI.eraseFromParent();
|
||||
continue;
|
||||
} else if (DstReg == AMDGPU::SCC) {
|
||||
unsigned Opcode =
|
||||
ST.isWave64() ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
|
||||
Register Exec = ST.isWave64() ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
|
||||
Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
|
||||
I = BuildMI(*MI.getParent(),
|
||||
std::next(MachineBasicBlock::iterator(MI)),
|
||||
MI.getDebugLoc(), TII->get(Opcode))
|
||||
.addReg(Tmp, getDefRegState(true))
|
||||
.addReg(SrcReg)
|
||||
.addReg(Exec);
|
||||
MI.eraseFromParent();
|
||||
continue;
|
||||
// Collect those not changed to try them after VGPR to SGPR copies
|
||||
// lowering as there will be more opportunities.
|
||||
S2VCopies.push_back(&MI);
|
||||
}
|
||||
if (!isVGPRToSGPRCopy(SrcRC, DstRC, *TRI))
|
||||
continue;
|
||||
if (lowerSpecialCase(MI))
|
||||
continue;
|
||||
|
||||
analyzeVGPRToSGPRCopy(&MI);
|
||||
|
||||
break;
|
||||
}
|
||||
case AMDGPU::INSERT_SUBREG:
|
||||
case AMDGPU::PHI:
|
||||
case AMDGPU::REG_SEQUENCE: {
|
||||
if (TRI->isSGPRClass(TII->getOpRegClass(MI, 0))) {
|
||||
for (MachineOperand &MO : MI.operands()) {
|
||||
if (!MO.isReg() || !MO.getReg().isVirtual())
|
||||
continue;
|
||||
const TargetRegisterClass *SrcRC = MRI->getRegClass(MO.getReg());
|
||||
if (TRI->hasVectorRegisters(SrcRC)) {
|
||||
const TargetRegisterClass *DestRC =
|
||||
TRI->getEquivalentSGPRClass(SrcRC);
|
||||
Register NewDst = MRI->createVirtualRegister(DestRC);
|
||||
MachineBasicBlock *BlockToInsertCopy =
|
||||
MI.isPHI() ? MI.getOperand(MI.getOperandNo(&MO) + 1).getMBB()
|
||||
: MBB;
|
||||
MachineBasicBlock::iterator PointToInsertCopy =
|
||||
MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I;
|
||||
MachineInstr *NewCopy =
|
||||
BuildMI(*BlockToInsertCopy, PointToInsertCopy,
|
||||
PointToInsertCopy->getDebugLoc(),
|
||||
TII->get(AMDGPU::COPY), NewDst)
|
||||
.addReg(MO.getReg());
|
||||
MO.setReg(NewDst);
|
||||
analyzeVGPRToSGPRCopy(NewCopy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (MI.isPHI())
|
||||
PHINodes.push_back(&MI);
|
||||
else if (MI.isRegSequence())
|
||||
RegSequences.push_back(&MI);
|
||||
if (!DstReg.isVirtual()) {
|
||||
// If the destination register is a physical register there isn't
|
||||
// really much we can do to fix this.
|
||||
// Some special instructions use M0 as an input. Some even only use
|
||||
// the first lane. Insert a readfirstlane and hope for the best.
|
||||
if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
|
||||
Register TmpReg
|
||||
= MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
|
||||
|
||||
BuildMI(*MBB, MI, MI.getDebugLoc(),
|
||||
TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
|
||||
.add(MI.getOperand(1));
|
||||
MI.getOperand(1).setReg(TmpReg);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
|
||||
tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case AMDGPU::PHI: {
|
||||
processPHINode(MI);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::REG_SEQUENCE: {
|
||||
if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) ||
|
||||
!hasVectorOperands(MI, TRI)) {
|
||||
foldVGPRCopyIntoRegSequence(MI, TRI, TII, *MRI);
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case AMDGPU::INSERT_SUBREG: {
|
||||
const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
|
||||
DstRC = MRI->getRegClass(MI.getOperand(0).getReg());
|
||||
Src0RC = MRI->getRegClass(MI.getOperand(1).getReg());
|
||||
Src1RC = MRI->getRegClass(MI.getOperand(2).getReg());
|
||||
if (TRI->isSGPRClass(DstRC) &&
|
||||
(TRI->hasVectorRegisters(Src0RC) ||
|
||||
TRI->hasVectorRegisters(Src1RC))) {
|
||||
LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
|
||||
MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT);
|
||||
if (NewBB && NewBB != MBB) {
|
||||
MBB = NewBB;
|
||||
E = MBB->end();
|
||||
BI = MachineFunction::iterator(MBB);
|
||||
BE = MF.end();
|
||||
}
|
||||
assert((!NewBB || NewBB == I->getParent()) &&
|
||||
"moveToVALU did not return the right basic block");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AMDGPU::V_WRITELANE_B32: {
|
||||
|
@ -752,41 +800,11 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
|
|||
}
|
||||
}
|
||||
|
||||
lowerVGPR2SGPRCopies(MF);
|
||||
|
||||
// Postprocessing
|
||||
fixSCCCopies(ST.isWave32());
|
||||
|
||||
for (auto MI : S2VCopies) {
|
||||
// Check if it is still valid
|
||||
if (MI->getParent() && MI->isCopy()) {
|
||||
const TargetRegisterClass *SrcRC, *DstRC;
|
||||
std::tie(SrcRC, DstRC) = getCopyRegClasses(*MI, *TRI, *MRI);
|
||||
if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
|
||||
tryChangeVGPRtoSGPRinCopy(*MI, TRI, TII);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (auto MI : RegSequences) {
|
||||
// Check if it is still valid
|
||||
if (MI->getParent() && MI->isRegSequence())
|
||||
foldVGPRCopyIntoRegSequence(*MI, TRI, TII, *MRI);
|
||||
}
|
||||
|
||||
for (auto MI : PHINodes) {
|
||||
processPHINode(*MI);
|
||||
}
|
||||
|
||||
if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
|
||||
hoistAndMergeSGPRInits(AMDGPU::M0, *MRI, TRI, *MDT, TII);
|
||||
|
||||
SiblingPenalty.clear();
|
||||
V2SCopies.clear();
|
||||
SCCCopies.clear();
|
||||
RegSequences.clear();
|
||||
PHINodes.clear();
|
||||
S2VCopies.clear();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -843,29 +861,7 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
|
|||
}
|
||||
|
||||
bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI) {
|
||||
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
|
||||
if (!DstReg.isVirtual()) {
|
||||
// If the destination register is a physical register there isn't
|
||||
// really much we can do to fix this.
|
||||
// Some special instructions use M0 as an input. Some even only use
|
||||
// the first lane. Insert a readfirstlane and hope for the best.
|
||||
if (DstReg == AMDGPU::M0 &&
|
||||
TRI->hasVectorRegisters(MRI->getRegClass(SrcReg))) {
|
||||
Register TmpReg =
|
||||
MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
|
||||
|
||||
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
|
||||
TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
|
||||
.add(MI.getOperand(1));
|
||||
MI.getOperand(1).setReg(TmpReg);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!SrcReg.isVirtual() || TRI->isAGPR(*MRI, SrcReg)) {
|
||||
TII->moveToVALU(MI, MDT);
|
||||
return true;
|
||||
|
@ -884,13 +880,9 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI) {
|
|||
return false;
|
||||
}
|
||||
|
||||
void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(MachineInstr* MI) {
|
||||
Register DstReg = MI->getOperand(0).getReg();
|
||||
const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
|
||||
|
||||
V2SCopyInfo Info(getNextVGPRToSGPRCopyId(), MI,
|
||||
TRI->getRegSizeInBits(*DstRC));
|
||||
|
||||
void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(V2SCopyInfo& Info) {
|
||||
SmallVector<MachineInstr *, 8> AnalysisWorklist;
|
||||
// Needed because the SSA is not a tree but a graph and may have
|
||||
// forks and joins. We should not then go same way twice.
|
||||
|
@ -938,52 +930,143 @@ void SIFixSGPRCopies::analyzeVGPRToSGPRCopy(MachineInstr* MI) {
|
|||
AnalysisWorklist.push_back(U);
|
||||
}
|
||||
}
|
||||
V2SCopies[Info.ID] = Info;
|
||||
}
|
||||
|
||||
// The main function that computes the VGPR to SGPR copy score
|
||||
// and determines copy further lowering way: v_readfirstlane_b32 or moveToVALU
|
||||
bool SIFixSGPRCopies::needToBeConvertedToVALU(V2SCopyInfo *Info) {
|
||||
if (Info->SChain.empty()) {
|
||||
Info->Score = 0;
|
||||
return true;
|
||||
}
|
||||
Info->Siblings = SiblingPenalty[*std::max_element(
|
||||
Info->SChain.begin(), Info->SChain.end(),
|
||||
[&](MachineInstr *A, MachineInstr *B) -> bool {
|
||||
return SiblingPenalty[A].size() < SiblingPenalty[B].size();
|
||||
})];
|
||||
Info->Siblings.remove_if([&](unsigned ID) { return ID == Info->ID; });
|
||||
// The loop below computes the number of another VGPR to SGPR V2SCopies
|
||||
// which contribute to the current copy SALU chain. We assume that all the
|
||||
// V2SCopies with the same source virtual register will be squashed to one
|
||||
// by regalloc. Also we take care of the V2SCopies of the differnt subregs
|
||||
// of the same register.
|
||||
SmallSet<std::pair<Register, unsigned>, 4> SrcRegs;
|
||||
for (auto J : Info->Siblings) {
|
||||
auto InfoIt = V2SCopies.find(J);
|
||||
if (InfoIt != V2SCopies.end()) {
|
||||
MachineInstr *SiblingCopy = InfoIt->getSecond().Copy;
|
||||
if (SiblingCopy->isImplicitDef())
|
||||
// the COPY has already been MoveToVALUed
|
||||
continue;
|
||||
|
||||
SrcRegs.insert(std::make_pair(SiblingCopy->getOperand(1).getReg(),
|
||||
SiblingCopy->getOperand(1).getSubReg()));
|
||||
}
|
||||
}
|
||||
Info->SiblingPenalty = SrcRegs.size();
|
||||
|
||||
unsigned Penalty =
|
||||
Info->NumSVCopies + Info->SiblingPenalty + Info->NumReadfirstlanes;
|
||||
unsigned Profit = Info->SChain.size();
|
||||
Info->Score = Penalty > Profit ? 0 : Profit - Penalty;
|
||||
Info->NeedToBeConvertedToVALU = Info->Score < 3;
|
||||
return Info->NeedToBeConvertedToVALU;
|
||||
}
|
||||
|
||||
void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
|
||||
|
||||
// The main function that computes the VGPR to SGPR copy score
|
||||
// and determines copy further lowering way: v_readfirstlane_b32 or moveToVALU
|
||||
auto needToBeConvertedToVALU = [&](V2SCopyInfo *I) -> bool {
|
||||
if (I->SChain.empty()) {
|
||||
I->Score = 0;
|
||||
return true;
|
||||
}
|
||||
I->Siblings = SiblingPenalty[*std::max_element(
|
||||
I->SChain.begin(), I->SChain.end(),
|
||||
[&](MachineInstr *A, MachineInstr *B) -> bool {
|
||||
return SiblingPenalty[A].size() < SiblingPenalty[B].size();
|
||||
})];
|
||||
I->Siblings.remove_if([&](unsigned ID) { return ID == I->ID; });
|
||||
// The loop below computes the number of another VGPR to SGPR V2SCopies
|
||||
// which contribute to the current copy SALU chain. We assume that all the
|
||||
// V2SCopies with the same source virtual register will be squashed to one
|
||||
// by regalloc. Also we take care of the V2SCopies of the differnt subregs
|
||||
// of the same register.
|
||||
SmallSet<std::pair<Register, unsigned>, 4> SrcRegs;
|
||||
for (auto J : I->Siblings) {
|
||||
auto InfoIt = V2SCopies.find(J);
|
||||
if (InfoIt != V2SCopies.end()) {
|
||||
MachineInstr *SiblingCopy = InfoIt->getSecond().Copy;
|
||||
if (SiblingCopy->isImplicitDef())
|
||||
// the COPY has already been MoveToVALUed
|
||||
continue;
|
||||
|
||||
SrcRegs.insert(std::make_pair(SiblingCopy->getOperand(1).getReg(),
|
||||
SiblingCopy->getOperand(1).getSubReg()));
|
||||
}
|
||||
}
|
||||
I->SiblingPenalty = SrcRegs.size();
|
||||
|
||||
unsigned Penalty =
|
||||
I->NumSVCopies + I->SiblingPenalty + I->NumReadfirstlanes;
|
||||
unsigned Profit = I->SChain.size();
|
||||
I->Score = Penalty > Profit ? 0 : Profit - Penalty;
|
||||
I->NeedToBeConvertedToVALU = I->Score < 3;
|
||||
return I->NeedToBeConvertedToVALU;
|
||||
};
|
||||
|
||||
auto needProcessing = [](MachineInstr &MI) -> bool {
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::COPY:
|
||||
case AMDGPU::WQM:
|
||||
case AMDGPU::STRICT_WQM:
|
||||
case AMDGPU::SOFT_WQM:
|
||||
case AMDGPU::STRICT_WWM:
|
||||
case AMDGPU::REG_SEQUENCE:
|
||||
case AMDGPU::PHI:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
SmallSet<MachineInstr *, 4> OutOfOrderProcessedCopies;
|
||||
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
|
||||
++BI) {
|
||||
MachineBasicBlock *MBB = &*BI;
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
|
||||
++I) {
|
||||
MachineInstr *MI = &*I;
|
||||
if (!needProcessing(*MI))
|
||||
continue;
|
||||
|
||||
if (MI->isRegSequence() || MI->isPHI()) {
|
||||
MachineBasicBlock::iterator J = I;
|
||||
if (TRI->isSGPRClass(TII->getOpRegClass(*MI, 0))) {
|
||||
for (MachineOperand &MO : MI->operands()) {
|
||||
if (!MO.isReg() || !MO.getReg().isVirtual())
|
||||
continue;
|
||||
const TargetRegisterClass *SrcRC = MRI->getRegClass(MO.getReg());
|
||||
if (TRI->hasVectorRegisters(SrcRC)) {
|
||||
const TargetRegisterClass *DestRC =
|
||||
TRI->getEquivalentSGPRClass(SrcRC);
|
||||
Register NewDst = MRI->createVirtualRegister(DestRC);
|
||||
MachineBasicBlock *BlockToInsertCopy = MBB;
|
||||
MachineBasicBlock::iterator PointToInsertCopy = I;
|
||||
if (MI->isPHI()) {
|
||||
BlockToInsertCopy =
|
||||
MI->getOperand(MI->getOperandNo(&MO) + 1).getMBB();
|
||||
PointToInsertCopy =
|
||||
BlockToInsertCopy->getFirstInstrTerminator();
|
||||
}
|
||||
MachineBasicBlock::iterator NewI =
|
||||
BuildMI(*BlockToInsertCopy, PointToInsertCopy,
|
||||
PointToInsertCopy->getDebugLoc(),
|
||||
TII->get(AMDGPU::COPY), NewDst)
|
||||
.addReg(MO.getReg());
|
||||
MO.setReg(NewDst);
|
||||
if (!MI->isPHI()) {
|
||||
I = NewI;
|
||||
MI = &*I;
|
||||
} else {
|
||||
// We insert the copy into the basic block that may have been
|
||||
// already processed. Pass it to the analysis explicitly.
|
||||
V2SCopyInfo In(getNextVGPRToSGPRCopyId(), MI,
|
||||
TRI->getRegSizeInBits(*DestRC));
|
||||
analyzeVGPRToSGPRCopy(In);
|
||||
V2SCopies[In.ID] = In;
|
||||
OutOfOrderProcessedCopies.insert(MI);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (J == I)
|
||||
continue;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *SrcRC, *DstRC;
|
||||
std::tie(SrcRC, DstRC) = getCopyRegClasses(*MI, *TRI, *MRI);
|
||||
|
||||
if (!isVGPRToSGPRCopy(SrcRC, DstRC, *TRI))
|
||||
continue;
|
||||
|
||||
if (lowerSpecialCase(*MI))
|
||||
continue;
|
||||
|
||||
if (OutOfOrderProcessedCopies.contains(MI))
|
||||
continue;
|
||||
|
||||
V2SCopyInfo In(getNextVGPRToSGPRCopyId(), MI,
|
||||
TRI->getRegSizeInBits(*DstRC));
|
||||
|
||||
analyzeVGPRToSGPRCopy(In);
|
||||
|
||||
V2SCopies[In.ID] = In;
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<unsigned, 8> LoweringWorklist;
|
||||
for (auto &C : V2SCopies) {
|
||||
if (needToBeConvertedToVALU(&C.second))
|
||||
|
@ -1059,46 +1142,3 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
|
|||
MI->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
void SIFixSGPRCopies::fixSCCCopies(bool IsWave32) {
|
||||
for (auto MI : SCCCopies) {
|
||||
// May be lowered out
|
||||
if (!MI->getParent())
|
||||
continue;
|
||||
// May already have been lowered.
|
||||
if (!MI->isCopy())
|
||||
continue;
|
||||
Register SrcReg = MI->getOperand(1).getReg();
|
||||
Register DstReg = MI->getOperand(0).getReg();
|
||||
if (SrcReg == AMDGPU::SCC) {
|
||||
Register SCCCopy = MRI->createVirtualRegister(
|
||||
TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
|
||||
MachineBasicBlock::iterator I =
|
||||
BuildMI(*MI->getParent(),
|
||||
std::next(MachineBasicBlock::iterator(MI)),
|
||||
MI->getDebugLoc(),
|
||||
TII->get(IsWave32 ? AMDGPU::S_CSELECT_B32
|
||||
: AMDGPU::S_CSELECT_B64),
|
||||
SCCCopy)
|
||||
.addImm(-1)
|
||||
.addImm(0);
|
||||
BuildMI(*MI->getParent(), std::next(I), I->getDebugLoc(),
|
||||
TII->get(AMDGPU::COPY), DstReg)
|
||||
.addReg(SCCCopy);
|
||||
MI->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (DstReg == AMDGPU::SCC) {
|
||||
unsigned Opcode = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
|
||||
Register Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
||||
Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
|
||||
BuildMI(*MI->getParent(), std::next(MachineBasicBlock::iterator(MI)),
|
||||
MI->getDebugLoc(), TII->get(Opcode))
|
||||
.addReg(Tmp, getDefRegState(true))
|
||||
.addReg(SrcReg)
|
||||
.addReg(Exec);
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,9 +41,9 @@ body: |
|
|||
; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
|
||||
; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
|
||||
; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
|
||||
; W64-NEXT: {{ $}}
|
||||
; W64-NEXT: .1:
|
||||
|
@ -88,9 +88,9 @@ body: |
|
|||
; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
|
||||
; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
|
||||
; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
|
||||
; W32-NEXT: {{ $}}
|
||||
; W32-NEXT: .1:
|
||||
|
@ -160,10 +160,10 @@ body: |
|
|||
; W64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
|
||||
; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
|
||||
; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3
|
||||
; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
|
||||
; W64-NEXT: {{ $}}
|
||||
; W64-NEXT: .1:
|
||||
|
@ -207,10 +207,10 @@ body: |
|
|||
; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
|
||||
; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
|
||||
; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3
|
||||
; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
|
||||
; W32-NEXT: {{ $}}
|
||||
; W32-NEXT: .1:
|
||||
|
@ -280,10 +280,10 @@ body: |
|
|||
; W64-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
|
||||
; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3
|
||||
; W64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
|
||||
; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
|
||||
; W64-NEXT: {{ $}}
|
||||
; W64-NEXT: .1:
|
||||
|
@ -327,10 +327,10 @@ body: |
|
|||
; W32-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
|
||||
; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, [[COPY2]], %subreg.sub3
|
||||
; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
|
||||
; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, killed [[COPY6]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
|
||||
; W32-NEXT: {{ $}}
|
||||
; W32-NEXT: .1:
|
||||
|
@ -400,9 +400,9 @@ body: |
|
|||
; ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
|
||||
; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
|
||||
; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; ADDR64-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; ADDR64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
|
||||
|
@ -429,9 +429,9 @@ body: |
|
|||
; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
|
||||
; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
|
||||
; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W32-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; W32-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
|
||||
|
@ -485,9 +485,9 @@ body: |
|
|||
; ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
|
||||
; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
|
||||
; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; ADDR64-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
|
||||
; ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; ADDR64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
|
||||
|
@ -513,9 +513,9 @@ body: |
|
|||
; W64-NO-ADDR64-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W64-NO-ADDR64-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W64-NO-ADDR64-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
|
||||
; W64-NO-ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W64-NO-ADDR64-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W64-NO-ADDR64-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
|
||||
; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W64-NO-ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
|
||||
; W64-NO-ADDR64-NEXT: {{ $}}
|
||||
; W64-NO-ADDR64-NEXT: .1:
|
||||
|
@ -560,9 +560,9 @@ body: |
|
|||
; W32-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
; W32-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]], implicit $exec
|
||||
; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]], implicit $exec
|
||||
; W32-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY3]], implicit $exec
|
||||
; W32-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY2]], implicit $exec
|
||||
; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY7]], %subreg.sub1, [[COPY3]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
|
||||
; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
|
||||
; W32-NEXT: {{ $}}
|
||||
; W32-NEXT: .1:
|
||||
|
|
|
@ -433,7 +433,7 @@ define amdgpu_gfx i64 @strict_wwm_called_i64(i64 %a) noinline {
|
|||
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX9-O0-NEXT: ; implicit-def: $sgpr34
|
||||
; GFX9-O0-NEXT: ; implicit-def: $sgpr34
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $exec
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
|
||||
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35
|
||||
|
@ -585,7 +585,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
|
|||
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
|
||||
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $exec
|
||||
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
|
||||
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
|
||||
; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[40:41], v2, v4
|
||||
|
@ -722,7 +722,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in
|
|||
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
|
||||
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
|
||||
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $exec
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $exec
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
|
||||
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
|
||||
; GFX9-O0-NEXT: s_mov_b32 s35, 0x7fffffff
|
||||
|
@ -741,7 +741,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in
|
|||
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
|
||||
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
|
||||
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $exec
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $exec
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
|
||||
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
|
||||
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
|
||||
|
@ -770,7 +770,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in
|
|||
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
|
||||
; GFX9-O0-NEXT: ; implicit-def: $sgpr35
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $exec
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $exec
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $exec
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 killed $exec
|
||||
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec
|
||||
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
|
||||
|
|
Loading…
Reference in New Issue