forked from OSchip/llvm-project
AMDGPU/GlobalISel: Use waterfall loop for buffer_load
This adds support for more complex waterfall loops that need to handle operands > 32-bits, and multiple operands. llvm-svn: 361021
This commit is contained in:
parent
1400a35f71
commit
b3dc73634c
|
@ -105,6 +105,69 @@ const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
|
||||||
return getRegBank(AMDGPU::VGPRRegBankID);
|
return getRegBank(AMDGPU::VGPRRegBankID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <unsigned NumOps>
|
||||||
|
RegisterBankInfo::InstructionMappings
|
||||||
|
AMDGPURegisterBankInfo::addMappingFromTable(
|
||||||
|
const MachineInstr &MI, const MachineRegisterInfo &MRI,
|
||||||
|
const std::array<unsigned, NumOps> RegSrcOpIdx,
|
||||||
|
ArrayRef<OpRegBankEntry<NumOps>> Table) const {
|
||||||
|
|
||||||
|
InstructionMappings AltMappings;
|
||||||
|
|
||||||
|
SmallVector<const ValueMapping *, 10> Operands(MI.getNumOperands());
|
||||||
|
|
||||||
|
unsigned Sizes[NumOps];
|
||||||
|
for (unsigned I = 0; I < NumOps; ++I) {
|
||||||
|
unsigned Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
|
||||||
|
Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; ++I) {
|
||||||
|
unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
|
||||||
|
Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned MappingID = 0;
|
||||||
|
for (const auto &Entry : Table) {
|
||||||
|
for (unsigned I = 0; I < NumOps; ++I) {
|
||||||
|
int OpIdx = RegSrcOpIdx[I];
|
||||||
|
Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
|
||||||
|
}
|
||||||
|
|
||||||
|
AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
|
||||||
|
getOperandsMapping(Operands),
|
||||||
|
Operands.size()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return AltMappings;
|
||||||
|
}
|
||||||
|
|
||||||
|
RegisterBankInfo::InstructionMappings
|
||||||
|
AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
|
||||||
|
const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
|
||||||
|
|
||||||
|
switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
|
||||||
|
case Intrinsic::amdgcn_buffer_load: {
|
||||||
|
static const OpRegBankEntry<3> Table[4] = {
|
||||||
|
// Perfectly legal.
|
||||||
|
{ { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
|
||||||
|
{ { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
|
||||||
|
|
||||||
|
// Waterfall loop needed for rsrc. In the worst case this will execute
|
||||||
|
// approximately an extra 10 * wavesize + 2 instructions.
|
||||||
|
{ { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
|
||||||
|
{ { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1000 }
|
||||||
|
};
|
||||||
|
|
||||||
|
// rsrc, voffset, offset
|
||||||
|
const std::array<unsigned, 3> RegSrcOpIdx = { 2, 3, 4 };
|
||||||
|
return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return RegisterBankInfo::getInstrAlternativeMappings(MI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
RegisterBankInfo::InstructionMappings
|
RegisterBankInfo::InstructionMappings
|
||||||
AMDGPURegisterBankInfo::getInstrAlternativeMappings(
|
AMDGPURegisterBankInfo::getInstrAlternativeMappings(
|
||||||
const MachineInstr &MI) const {
|
const MachineInstr &MI) const {
|
||||||
|
@ -283,6 +346,8 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
|
||||||
AltMappings.push_back(&VMapping);
|
AltMappings.push_back(&VMapping);
|
||||||
return AltMappings;
|
return AltMappings;
|
||||||
}
|
}
|
||||||
|
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||||
|
return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -333,7 +398,21 @@ static LLT getHalfSizedType(LLT Ty) {
|
||||||
/// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
|
/// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
|
||||||
/// any of the required SGPR operands are VGPRs, perform a waterfall loop to
|
/// any of the required SGPR operands are VGPRs, perform a waterfall loop to
|
||||||
/// execute the instruction for each unique combination of values in all lanes
|
/// execute the instruction for each unique combination of values in all lanes
|
||||||
/// in the wave. The block will be split such that new blocks
|
/// in the wave. The block will be split such that rest of the instructions are
|
||||||
|
/// moved to a new block.
|
||||||
|
///
|
||||||
|
/// Essentially performs this loop:
|
||||||
|
//
|
||||||
|
/// Save Execution Mask
|
||||||
|
/// For (Lane : Wavefront) {
|
||||||
|
/// Enable Lane, Disable all other lanes
|
||||||
|
/// SGPR = read SGPR value for current lane from VGPR
|
||||||
|
/// VGPRResult[Lane] = use_op SGPR
|
||||||
|
/// }
|
||||||
|
/// Restore Execution Mask
|
||||||
|
///
|
||||||
|
/// There is additional complexity to try for compare values to identify the
|
||||||
|
/// unique values used.
|
||||||
void AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
void AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
||||||
MachineInstr &MI, MachineRegisterInfo &MRI,
|
MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||||
ArrayRef<unsigned> OpIndices) const {
|
ArrayRef<unsigned> OpIndices) const {
|
||||||
|
@ -345,9 +424,6 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
||||||
MachineBasicBlock &MBB = *MI.getParent();
|
MachineBasicBlock &MBB = *MI.getParent();
|
||||||
const DebugLoc &DL = MI.getDebugLoc();
|
const DebugLoc &DL = MI.getDebugLoc();
|
||||||
|
|
||||||
assert(OpIndices.size() == 1 &&
|
|
||||||
"need to implement support for multiple operands");
|
|
||||||
|
|
||||||
// Use a set to avoid extra readfirstlanes in the case where multiple operands
|
// Use a set to avoid extra readfirstlanes in the case where multiple operands
|
||||||
// are the same register.
|
// are the same register.
|
||||||
SmallSet<unsigned, 4> SGPROperandRegs;
|
SmallSet<unsigned, 4> SGPROperandRegs;
|
||||||
|
@ -386,13 +462,8 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
||||||
B.buildInstr(TargetOpcode::IMPLICIT_DEF)
|
B.buildInstr(TargetOpcode::IMPLICIT_DEF)
|
||||||
.addDef(InitSaveExecReg);
|
.addDef(InitSaveExecReg);
|
||||||
|
|
||||||
// Save the EXEC mask
|
|
||||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
|
|
||||||
.addReg(AMDGPU::EXEC);
|
|
||||||
|
|
||||||
unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||||
unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||||
unsigned CondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
|
||||||
|
|
||||||
// To insert the loop we need to split the block. Move everything before this
|
// To insert the loop we need to split the block. Move everything before this
|
||||||
// point to a new block, and insert a new empty block before this instruction.
|
// point to a new block, and insert a new empty block before this instruction.
|
||||||
|
@ -437,37 +508,172 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
||||||
LoopBB->splice(LoopBB->end(), &MBB, I);
|
LoopBB->splice(LoopBB->end(), &MBB, I);
|
||||||
I = std::prev(LoopBB->end());
|
I = std::prev(LoopBB->end());
|
||||||
|
|
||||||
|
B.setInstr(*I);
|
||||||
|
|
||||||
|
unsigned CondReg = AMDGPU::NoRegister;
|
||||||
|
|
||||||
for (MachineOperand &Op : MI.uses()) {
|
for (MachineOperand &Op : MI.uses()) {
|
||||||
if (!Op.isReg())
|
if (!Op.isReg())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
assert(!Op.isDef());
|
assert(!Op.isDef());
|
||||||
if (SGPROperandRegs.count(Op.getReg())) {
|
if (SGPROperandRegs.count(Op.getReg())) {
|
||||||
unsigned CurrentLaneOpReg
|
LLT OpTy = MRI.getType(Op.getReg());
|
||||||
= MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
|
unsigned OpSize = OpTy.getSizeInBits();
|
||||||
MRI.setType(CurrentLaneOpReg, LLT::scalar(32)); // FIXME
|
|
||||||
|
|
||||||
assert(MRI.getType(Op.getReg())== LLT::scalar(32) &&
|
// Can only do a readlane of 32-bit pieces.
|
||||||
"need to implement support for other types");
|
if (OpSize == 32) {
|
||||||
|
// Avoid extra copies in the simple case of one 32-bit register.
|
||||||
|
unsigned CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
|
||||||
|
MRI.setType(CurrentLaneOpReg, OpTy);
|
||||||
|
|
||||||
constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
|
constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
|
||||||
|
// Read the next variant <- also loop target.
|
||||||
|
BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg)
|
||||||
|
.addReg(Op.getReg());
|
||||||
|
|
||||||
|
unsigned NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||||
|
bool First = CondReg == AMDGPU::NoRegister;
|
||||||
|
if (First)
|
||||||
|
CondReg = NewCondReg;
|
||||||
|
|
||||||
|
// Compare the just read M0 value to all possible Idx values.
|
||||||
|
B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
|
||||||
|
.addDef(NewCondReg)
|
||||||
|
.addReg(CurrentLaneOpReg)
|
||||||
|
.addReg(Op.getReg());
|
||||||
|
Op.setReg(CurrentLaneOpReg);
|
||||||
|
|
||||||
|
if (!First) {
|
||||||
|
unsigned AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
|
||||||
|
|
||||||
|
// If there are multiple operands to consider, and the conditions.
|
||||||
|
B.buildInstr(AMDGPU::S_AND_B64)
|
||||||
|
.addDef(AndReg)
|
||||||
|
.addReg(NewCondReg)
|
||||||
|
.addReg(CondReg);
|
||||||
|
CondReg = AndReg;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LLT S32 = LLT::scalar(32);
|
||||||
|
SmallVector<unsigned, 8> ReadlanePieces;
|
||||||
|
|
||||||
|
// The compares can be done as 64-bit, but the extract needs to be done
|
||||||
|
// in 32-bit pieces.
|
||||||
|
|
||||||
|
bool Is64 = OpSize % 64 == 0;
|
||||||
|
|
||||||
|
LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
|
||||||
|
unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
|
||||||
|
: AMDGPU::V_CMP_EQ_U32_e64;
|
||||||
|
|
||||||
|
// The compares can be done as 64-bit, but the extract needs to be done
|
||||||
|
// in 32-bit pieces.
|
||||||
|
|
||||||
|
// Insert the unmerge before the loop.
|
||||||
|
|
||||||
|
B.setMBB(MBB);
|
||||||
|
auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
|
||||||
|
B.setInstr(*I);
|
||||||
|
|
||||||
|
unsigned NumPieces = Unmerge->getNumOperands() - 1;
|
||||||
|
for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
|
||||||
|
unsigned UnmergePiece = Unmerge.getReg(PieceIdx);
|
||||||
|
|
||||||
|
unsigned CurrentLaneOpReg;
|
||||||
|
if (Is64) {
|
||||||
|
unsigned CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
|
||||||
|
unsigned CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
|
||||||
|
|
||||||
|
MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
|
||||||
|
MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
|
||||||
|
MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
|
||||||
|
|
||||||
|
// FIXME: Should be able to just use a subreg index here.
|
||||||
|
auto Unmerge32 = B.buildUnmerge(S32, UnmergePiece);
|
||||||
|
|
||||||
|
MRI.setRegClass(Unmerge32.getReg(0), &AMDGPU::VGPR_32RegClass);
|
||||||
|
MRI.setRegClass(Unmerge32.getReg(1), &AMDGPU::VGPR_32RegClass);
|
||||||
|
|
||||||
|
// Read the next variant <- also loop target.
|
||||||
|
BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
|
||||||
|
CurrentLaneOpRegLo)
|
||||||
|
.addReg(Unmerge32.getReg(0));
|
||||||
|
|
||||||
|
// Read the next variant <- also loop target.
|
||||||
|
BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
|
||||||
|
CurrentLaneOpRegHi)
|
||||||
|
.addReg(Unmerge32.getReg(1));
|
||||||
|
|
||||||
|
CurrentLaneOpReg =
|
||||||
|
B.buildMerge(LLT::scalar(64),
|
||||||
|
{CurrentLaneOpRegLo, CurrentLaneOpRegHi})
|
||||||
|
.getReg(0);
|
||||||
|
|
||||||
|
MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
|
||||||
|
|
||||||
|
if (OpTy.getScalarSizeInBits() == 64) {
|
||||||
|
// If we need to produce a 64-bit element vector, so use the
|
||||||
|
// merged pieces
|
||||||
|
ReadlanePieces.push_back(CurrentLaneOpReg);
|
||||||
|
} else {
|
||||||
|
// 32-bit element type.
|
||||||
|
ReadlanePieces.push_back(CurrentLaneOpRegLo);
|
||||||
|
ReadlanePieces.push_back(CurrentLaneOpRegHi);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
|
||||||
|
MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
|
||||||
|
MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
|
||||||
|
|
||||||
// Read the next variant <- also loop target.
|
// Read the next variant <- also loop target.
|
||||||
BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
|
BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
|
||||||
CurrentLaneOpReg)
|
CurrentLaneOpReg)
|
||||||
.addReg(Op.getReg());
|
.addReg(UnmergePiece);
|
||||||
|
ReadlanePieces.push_back(CurrentLaneOpReg);
|
||||||
|
}
|
||||||
|
|
||||||
// FIXME: Need to and each conditon
|
unsigned NewCondReg
|
||||||
|
= MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
|
||||||
|
bool First = CondReg == AMDGPU::NoRegister;
|
||||||
|
if (First)
|
||||||
|
CondReg = NewCondReg;
|
||||||
|
|
||||||
// Compare the just read SGPR value to all possible operand values.
|
B.buildInstr(CmpOp)
|
||||||
B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
|
.addDef(NewCondReg)
|
||||||
.addDef(CondReg)
|
|
||||||
.addReg(CurrentLaneOpReg)
|
.addReg(CurrentLaneOpReg)
|
||||||
.addReg(Op.getReg());
|
.addReg(UnmergePiece);
|
||||||
Op.setReg(CurrentLaneOpReg);
|
|
||||||
|
if (!First) {
|
||||||
|
unsigned AndReg
|
||||||
|
= MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
|
||||||
|
|
||||||
|
// If there are multiple operands to consider, and the conditions.
|
||||||
|
B.buildInstr(AMDGPU::S_AND_B64)
|
||||||
|
.addDef(AndReg)
|
||||||
|
.addReg(NewCondReg)
|
||||||
|
.addReg(CondReg);
|
||||||
|
CondReg = AndReg;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: Build merge seems to switch to CONCAT_VECTORS but not
|
||||||
|
// BUILD_VECTOR
|
||||||
|
if (OpTy.isVector()) {
|
||||||
|
auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
|
||||||
|
Op.setReg(Merge.getReg(0));
|
||||||
|
} else {
|
||||||
|
auto Merge = B.buildMerge(OpTy, ReadlanePieces);
|
||||||
|
Op.setReg(Merge.getReg(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
B.setInsertPt(*LoopBB, LoopBB->end());
|
||||||
|
|
||||||
// Update EXEC, save the original EXEC value to VCC.
|
// Update EXEC, save the original EXEC value to VCC.
|
||||||
B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
|
B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
|
||||||
.addDef(NewExec)
|
.addDef(NewExec)
|
||||||
|
@ -488,7 +694,12 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
||||||
B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
|
B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
|
||||||
.addMBB(LoopBB);
|
.addMBB(LoopBB);
|
||||||
|
|
||||||
// Restore the EXEC mask
|
// Save the EXEC mask before the loop.
|
||||||
|
BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
|
||||||
|
.addReg(AMDGPU::EXEC);
|
||||||
|
|
||||||
|
// Restore the EXEC mask after the loop.
|
||||||
|
B.setMBB(*RestoreExecBB);
|
||||||
B.buildInstr(AMDGPU::S_MOV_B64_term)
|
B.buildInstr(AMDGPU::S_MOV_B64_term)
|
||||||
.addDef(AMDGPU::EXEC)
|
.addDef(AMDGPU::EXEC)
|
||||||
.addReg(SaveExecReg);
|
.addReg(SaveExecReg);
|
||||||
|
@ -606,6 +817,18 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||||
applyDefaultMapping(OpdMapper);
|
applyDefaultMapping(OpdMapper);
|
||||||
executeInWaterfallLoop(MI, MRI, { 2 });
|
executeInWaterfallLoop(MI, MRI, { 2 });
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
|
||||||
|
switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
|
||||||
|
case Intrinsic::amdgcn_buffer_load: {
|
||||||
|
executeInWaterfallLoop(MI, MRI, { 2 });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1012,7 +1235,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case AMDGPU::G_INTRINSIC: {
|
case AMDGPU::G_INTRINSIC: {
|
||||||
switch (MI.getOperand(1).getIntrinsicID()) {
|
switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
|
||||||
default:
|
default:
|
||||||
return getInvalidInstructionMapping();
|
return getInvalidInstructionMapping();
|
||||||
case Intrinsic::maxnum:
|
case Intrinsic::maxnum:
|
||||||
|
@ -1034,7 +1257,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
|
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
|
||||||
switch (MI.getOperand(0).getIntrinsicID()) {
|
switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
|
||||||
default:
|
default:
|
||||||
return getInvalidInstructionMapping();
|
return getInvalidInstructionMapping();
|
||||||
case Intrinsic::amdgcn_exp_compr:
|
case Intrinsic::amdgcn_exp_compr:
|
||||||
|
@ -1063,7 +1286,33 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
|
OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
|
||||||
OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
|
OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
|
||||||
break;
|
break;
|
||||||
|
case Intrinsic::amdgcn_buffer_load: {
|
||||||
|
unsigned RSrc = MI.getOperand(2).getReg(); // SGPR
|
||||||
|
unsigned VIndex = MI.getOperand(3).getReg(); // VGPR
|
||||||
|
unsigned Offset = MI.getOperand(4).getReg(); // SGPR/VGPR/imm
|
||||||
|
|
||||||
|
unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
||||||
|
unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
|
||||||
|
unsigned Size3 = MRI.getType(VIndex).getSizeInBits();
|
||||||
|
unsigned Size4 = MRI.getType(Offset).getSizeInBits();
|
||||||
|
|
||||||
|
unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
|
||||||
|
unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
|
||||||
|
|
||||||
|
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
|
||||||
|
OpdsMapping[1] = nullptr; // intrinsic id
|
||||||
|
|
||||||
|
// Lie and claim everything is legal, even though some need to be
|
||||||
|
// SGPRs. applyMapping will have to deal with it as a waterfall loop.
|
||||||
|
OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
|
||||||
|
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size3);
|
||||||
|
OpdsMapping[4] = AMDGPU::getValueMapping(OffsetBank, Size4);
|
||||||
|
OpdsMapping[5] = nullptr;
|
||||||
|
OpdsMapping[6] = nullptr;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case AMDGPU::G_SELECT: {
|
case AMDGPU::G_SELECT: {
|
||||||
|
@ -1121,7 +1370,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
|
return getInstructionMapping(/*ID*/1, /*Cost*/1,
|
||||||
|
getOperandsMapping(OpdsMapping),
|
||||||
MI.getNumOperands());
|
MI.getNumOperands());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,6 +58,22 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
|
||||||
LLT HalfTy,
|
LLT HalfTy,
|
||||||
unsigned Reg) const;
|
unsigned Reg) const;
|
||||||
|
|
||||||
|
template <unsigned NumOps>
|
||||||
|
struct OpRegBankEntry {
|
||||||
|
int8_t RegBanks[NumOps];
|
||||||
|
int16_t Cost;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <unsigned NumOps>
|
||||||
|
InstructionMappings
|
||||||
|
addMappingFromTable(const MachineInstr &MI, const MachineRegisterInfo &MRI,
|
||||||
|
const std::array<unsigned, NumOps> RegSrcOpIdx,
|
||||||
|
ArrayRef<OpRegBankEntry<NumOps>> Table) const;
|
||||||
|
|
||||||
|
RegisterBankInfo::InstructionMappings
|
||||||
|
getInstrAlternativeMappingsIntrinsicWSideEffects(
|
||||||
|
const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
|
||||||
|
|
||||||
bool isSALUMapping(const MachineInstr &MI) const;
|
bool isSALUMapping(const MachineInstr &MI) const;
|
||||||
const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
|
const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
|
||||||
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
|
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
|
||||||
|
|
|
@ -0,0 +1,289 @@
|
||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
---
|
||||||
|
name: buffer_load_sss
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: buffer_load_sss
|
||||||
|
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
|
||||||
|
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||||
|
%1:_(s32) = COPY $sgpr4
|
||||||
|
%2:_(s32) = COPY $sgpr5
|
||||||
|
%3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), %0, %1, %2, 0, 0
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: buffer_load_ssv
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr5
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: buffer_load_ssv
|
||||||
|
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr5
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
|
||||||
|
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||||
|
%1:_(s32) = COPY $sgpr4
|
||||||
|
%2:_(s32) = COPY $vgpr5
|
||||||
|
%3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), %0, %1, %2, 0, 0
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: buffer_load_svs
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: buffer_load_svs
|
||||||
|
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
|
||||||
|
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||||
|
%1:_(s32) = COPY $sgpr4
|
||||||
|
%2:_(s32) = COPY $sgpr5
|
||||||
|
%3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), %0, %1, %2, 0, 0
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: buffer_load_vss
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $sgpr5
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: buffer_load_vss
|
||||||
|
; CHECK: successors: %bb.1(0x80000000)
|
||||||
|
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $sgpr5
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
|
||||||
|
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||||
|
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||||
|
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||||
|
; CHECK: .1:
|
||||||
|
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||||
|
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %10, %bb.1
|
||||||
|
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
|
||||||
|
; CHECK: [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[UV]](s64)
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec
|
||||||
|
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||||
|
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||||
|
; CHECK: [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[UV1]](s64)
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec
|
||||||
|
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||||
|
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||||
|
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||||
|
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||||
|
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
|
||||||
|
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||||
|
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||||
|
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||||
|
; CHECK: .2:
|
||||||
|
; CHECK: successors: %bb.3(0x80000000)
|
||||||
|
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||||
|
; CHECK: .3:
|
||||||
|
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||||
|
%1:_(s32) = COPY $sgpr4
|
||||||
|
%2:_(s32) = COPY $sgpr5
|
||||||
|
%3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), %0, %1, %2, 0, 0
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: buffer_load_vvs
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr5
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: buffer_load_vvs
|
||||||
|
; CHECK: successors: %bb.1(0x80000000)
|
||||||
|
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr5
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
|
||||||
|
; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
|
||||||
|
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||||
|
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||||
|
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||||
|
; CHECK: .1:
|
||||||
|
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||||
|
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
|
||||||
|
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
|
||||||
|
; CHECK: [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[UV]](s64)
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec
|
||||||
|
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||||
|
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||||
|
; CHECK: [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[UV1]](s64)
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec
|
||||||
|
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||||
|
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||||
|
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||||
|
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||||
|
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
|
||||||
|
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||||
|
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||||
|
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||||
|
; CHECK: .2:
|
||||||
|
; CHECK: successors: %bb.3(0x80000000)
|
||||||
|
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||||
|
; CHECK: .3:
|
||||||
|
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||||
|
%1:_(s32) = COPY $vgpr4
|
||||||
|
%2:_(s32) = COPY $sgpr5
|
||||||
|
%3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), %0, %1, %2, 0, 0
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: buffer_load_svv
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr4, $vgpr5
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: buffer_load_svv
|
||||||
|
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr4, $vgpr5
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||||
|
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[COPY]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
|
||||||
|
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||||
|
%1:_(s32) = COPY $vgpr4
|
||||||
|
%2:_(s32) = COPY $vgpr5
|
||||||
|
%3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), %0, %1, %2, 0, 0
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: buffer_load_vsv
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr5
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: buffer_load_vsv
|
||||||
|
; CHECK: successors: %bb.1(0x80000000)
|
||||||
|
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr5
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||||
|
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
|
||||||
|
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||||
|
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||||
|
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||||
|
; CHECK: .1:
|
||||||
|
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||||
|
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %10, %bb.1
|
||||||
|
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
|
||||||
|
; CHECK: [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[UV]](s64)
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec
|
||||||
|
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||||
|
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||||
|
; CHECK: [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[UV1]](s64)
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec
|
||||||
|
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||||
|
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||||
|
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||||
|
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||||
|
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
|
||||||
|
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||||
|
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||||
|
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||||
|
; CHECK: .2:
|
||||||
|
; CHECK: successors: %bb.3(0x80000000)
|
||||||
|
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||||
|
; CHECK: .3:
|
||||||
|
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||||
|
%1:_(s32) = COPY $sgpr4
|
||||||
|
%2:_(s32) = COPY $vgpr5
|
||||||
|
%3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), %0, %1, %2, 0, 0
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: buffer_load_vvv
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: buffer_load_vvv
|
||||||
|
; CHECK: successors: %bb.1(0x80000000)
|
||||||
|
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
|
||||||
|
; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
|
||||||
|
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||||
|
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||||
|
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||||
|
; CHECK: .1:
|
||||||
|
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||||
|
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
|
||||||
|
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %3(<4 x s32>), %bb.1
|
||||||
|
; CHECK: [[UV2:%[0-9]+]]:vgpr_32(s32), [[UV3:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[UV]](s64)
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]](s32), implicit $exec
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]](s32), implicit $exec
|
||||||
|
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
|
||||||
|
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
|
||||||
|
; CHECK: [[UV4:%[0-9]+]]:vgpr_32(s32), [[UV5:%[0-9]+]]:vgpr_32(s32) = G_UNMERGE_VALUES [[UV1]](s64)
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]](s32), implicit $exec
|
||||||
|
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]](s32), implicit $exec
|
||||||
|
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||||
|
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
|
||||||
|
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||||
|
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
|
||||||
|
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32), [[COPY2]](s32), 0, 0
|
||||||
|
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||||
|
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||||
|
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||||
|
; CHECK: .2:
|
||||||
|
; CHECK: successors: %bb.3(0x80000000)
|
||||||
|
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||||
|
; CHECK: .3:
|
||||||
|
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||||
|
%1:_(s32) = COPY $vgpr4
|
||||||
|
%2:_(s32) = COPY $vgpr5
|
||||||
|
%3:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.buffer.load), %0, %1, %2, 0, 0
|
||||||
|
|
||||||
|
...
|
||||||
|
|
|
@ -44,14 +44,14 @@ body: |
|
||||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
|
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
|
||||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
|
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
|
||||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||||
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
|
|
||||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||||
|
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
|
||||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
|
||||||
; CHECK: .2:
|
; CHECK: .2:
|
||||||
; CHECK: successors: %bb.3(0x80000000)
|
; CHECK: successors: %bb.3(0x80000000)
|
||||||
|
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||||
; CHECK: .3:
|
; CHECK: .3:
|
||||||
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
|
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
|
||||||
%0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
%0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||||
|
@ -101,14 +101,14 @@ body: |
|
||||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
|
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
|
||||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
|
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
|
||||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||||
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
|
|
||||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||||
|
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
|
||||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
|
||||||
; CHECK: .2:
|
; CHECK: .2:
|
||||||
; CHECK: successors: %bb.3(0x80000000)
|
; CHECK: successors: %bb.3(0x80000000)
|
||||||
|
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||||
; CHECK: .3:
|
; CHECK: .3:
|
||||||
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
|
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
|
||||||
%0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
%0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||||
|
@ -138,14 +138,14 @@ body: |
|
||||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
|
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
|
||||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s64) = G_PHI [[DEF]](s64), %bb.0, %2(s64), %bb.1
|
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s64) = G_PHI [[DEF]](s64), %bb.0, %2(s64), %bb.1
|
||||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||||
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[V_READFIRSTLANE_B32_]](s32)
|
|
||||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||||
|
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[V_READFIRSTLANE_B32_]](s32)
|
||||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
|
||||||
; CHECK: .2:
|
; CHECK: .2:
|
||||||
; CHECK: successors: %bb.3(0x80000000)
|
; CHECK: successors: %bb.3(0x80000000)
|
||||||
|
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||||
; CHECK: .3:
|
; CHECK: .3:
|
||||||
; CHECK: $vgpr0_vgpr1 = COPY [[EVEC]](s64)
|
; CHECK: $vgpr0_vgpr1 = COPY [[EVEC]](s64)
|
||||||
%0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
%0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||||
|
|
Loading…
Reference in New Issue