forked from OSchip/llvm-project
[AMDGPU] gfx1010 base changes for wave32
Differential Revision: https://reviews.llvm.org/D63293 llvm-svn: 363299
This commit is contained in:
parent
ed9c7ec73d
commit
8bcc9bb595
|
@ -69,9 +69,11 @@ class PredicateControl {
|
||||||
Predicate SubtargetPredicate = TruePredicate;
|
Predicate SubtargetPredicate = TruePredicate;
|
||||||
list<Predicate> AssemblerPredicates = [];
|
list<Predicate> AssemblerPredicates = [];
|
||||||
Predicate AssemblerPredicate = TruePredicate;
|
Predicate AssemblerPredicate = TruePredicate;
|
||||||
|
Predicate WaveSizePredicate = TruePredicate;
|
||||||
list<Predicate> OtherPredicates = [];
|
list<Predicate> OtherPredicates = [];
|
||||||
list<Predicate> Predicates = !listconcat([SubtargetPredicate,
|
list<Predicate> Predicates = !listconcat([SubtargetPredicate,
|
||||||
AssemblerPredicate],
|
AssemblerPredicate,
|
||||||
|
WaveSizePredicate],
|
||||||
AssemblerPredicates,
|
AssemblerPredicates,
|
||||||
OtherPredicates);
|
OtherPredicates);
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,6 +94,16 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||||
|
|
||||||
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
|
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
|
||||||
|
|
||||||
|
// Disable mutually exclusive bits.
|
||||||
|
if (FS.find_lower("+wavefrontsize") != StringRef::npos) {
|
||||||
|
if (FS.find_lower("wavefrontsize16") == StringRef::npos)
|
||||||
|
FullFS += "-wavefrontsize16,";
|
||||||
|
if (FS.find_lower("wavefrontsize32") == StringRef::npos)
|
||||||
|
FullFS += "-wavefrontsize32,";
|
||||||
|
if (FS.find_lower("wavefrontsize64") == StringRef::npos)
|
||||||
|
FullFS += "-wavefrontsize64,";
|
||||||
|
}
|
||||||
|
|
||||||
FullFS += FS;
|
FullFS += FS;
|
||||||
|
|
||||||
ParseSubtargetFeatures(GPU, FullFS);
|
ParseSubtargetFeatures(GPU, FullFS);
|
||||||
|
|
|
@ -375,6 +375,8 @@ public:
|
||||||
return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
|
return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isBoolReg() const;
|
||||||
|
|
||||||
bool isSCSrcF16() const {
|
bool isSCSrcF16() const {
|
||||||
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
|
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
|
||||||
}
|
}
|
||||||
|
@ -616,6 +618,10 @@ public:
|
||||||
|
|
||||||
void addRegOperands(MCInst &Inst, unsigned N) const;
|
void addRegOperands(MCInst &Inst, unsigned N) const;
|
||||||
|
|
||||||
|
void addBoolRegOperands(MCInst &Inst, unsigned N) const {
|
||||||
|
addRegOperands(Inst, N);
|
||||||
|
}
|
||||||
|
|
||||||
void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
|
void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
|
||||||
if (isRegKind())
|
if (isRegKind())
|
||||||
addRegOperands(Inst, N);
|
addRegOperands(Inst, N);
|
||||||
|
@ -881,6 +887,8 @@ private:
|
||||||
/// \param VCCUsed [in] Whether VCC special SGPR is reserved.
|
/// \param VCCUsed [in] Whether VCC special SGPR is reserved.
|
||||||
/// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
|
/// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
|
||||||
/// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
|
/// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
|
||||||
|
/// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
|
||||||
|
/// descriptor field, if valid.
|
||||||
/// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
|
/// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
|
||||||
/// \param VGPRRange [in] Token range, used for VGPR diagnostics.
|
/// \param VGPRRange [in] Token range, used for VGPR diagnostics.
|
||||||
/// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
|
/// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
|
||||||
|
@ -889,9 +897,10 @@ private:
|
||||||
/// \param SGPRBlocks [out] Result SGPR block count.
|
/// \param SGPRBlocks [out] Result SGPR block count.
|
||||||
bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
|
bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
|
||||||
bool FlatScrUsed, bool XNACKUsed,
|
bool FlatScrUsed, bool XNACKUsed,
|
||||||
unsigned NextFreeVGPR, SMRange VGPRRange,
|
Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
|
||||||
unsigned NextFreeSGPR, SMRange SGPRRange,
|
SMRange VGPRRange, unsigned NextFreeSGPR,
|
||||||
unsigned &VGPRBlocks, unsigned &SGPRBlocks);
|
SMRange SGPRRange, unsigned &VGPRBlocks,
|
||||||
|
unsigned &SGPRBlocks);
|
||||||
bool ParseDirectiveAMDGCNTarget();
|
bool ParseDirectiveAMDGCNTarget();
|
||||||
bool ParseDirectiveAMDHSAKernel();
|
bool ParseDirectiveAMDHSAKernel();
|
||||||
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
|
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
|
||||||
|
@ -1159,6 +1168,7 @@ private:
|
||||||
bool validateMIMGDim(const MCInst &Inst);
|
bool validateMIMGDim(const MCInst &Inst);
|
||||||
bool validateLdsDirect(const MCInst &Inst);
|
bool validateLdsDirect(const MCInst &Inst);
|
||||||
bool validateOpSel(const MCInst &Inst);
|
bool validateOpSel(const MCInst &Inst);
|
||||||
|
bool validateVccOperand(unsigned Reg) const;
|
||||||
bool validateVOP3Literal(const MCInst &Inst) const;
|
bool validateVOP3Literal(const MCInst &Inst) const;
|
||||||
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
|
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
|
||||||
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
|
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
|
||||||
|
@ -1190,6 +1200,7 @@ public:
|
||||||
OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
|
OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
|
||||||
OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
|
OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
|
||||||
OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
|
OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
|
||||||
|
OperandMatchResultTy parseBoolReg(OperandVector &Operands);
|
||||||
|
|
||||||
bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
|
bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
|
||||||
const unsigned MinVal,
|
const unsigned MinVal,
|
||||||
|
@ -1479,6 +1490,11 @@ bool AMDGPUOperand::isSDWAInt32Operand() const {
|
||||||
return isSDWAOperand(MVT::i32);
|
return isSDWAOperand(MVT::i32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AMDGPUOperand::isBoolReg() const {
|
||||||
|
return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
|
||||||
|
isSCSrcB64() : isSCSrcB32();
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
|
uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
|
||||||
{
|
{
|
||||||
assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
|
assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
|
||||||
|
@ -3030,6 +3046,13 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if VCC register matches wavefront size
|
||||||
|
bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
|
||||||
|
auto FB = getFeatureBits();
|
||||||
|
return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
|
||||||
|
(FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
|
||||||
|
}
|
||||||
|
|
||||||
// VOP3 literal is only allowed in GFX10+ and only one can be used
|
// VOP3 literal is only allowed in GFX10+ and only one can be used
|
||||||
bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
|
bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
|
||||||
unsigned Opcode = Inst.getOpcode();
|
unsigned Opcode = Inst.getOpcode();
|
||||||
|
@ -3267,9 +3290,9 @@ bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
|
||||||
|
|
||||||
bool AMDGPUAsmParser::calculateGPRBlocks(
|
bool AMDGPUAsmParser::calculateGPRBlocks(
|
||||||
const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
|
const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
|
||||||
bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
|
bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
|
||||||
unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
|
SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
|
||||||
unsigned &SGPRBlocks) {
|
unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
|
||||||
// TODO(scott.linder): These calculations are duplicated from
|
// TODO(scott.linder): These calculations are duplicated from
|
||||||
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
|
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
|
||||||
IsaVersion Version = getIsaVersion(getSTI().getCPU());
|
IsaVersion Version = getIsaVersion(getSTI().getCPU());
|
||||||
|
@ -3298,7 +3321,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks(
|
||||||
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
|
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
|
||||||
}
|
}
|
||||||
|
|
||||||
VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
|
VGPRBlocks =
|
||||||
|
IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
|
||||||
SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
|
SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -3329,6 +3353,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
|
||||||
bool ReserveVCC = true;
|
bool ReserveVCC = true;
|
||||||
bool ReserveFlatScr = true;
|
bool ReserveFlatScr = true;
|
||||||
bool ReserveXNACK = hasXNACK();
|
bool ReserveXNACK = hasXNACK();
|
||||||
|
Optional<bool> EnableWavefrontSize32;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
while (getLexer().is(AsmToken::EndOfStatement))
|
while (getLexer().is(AsmToken::EndOfStatement))
|
||||||
|
@ -3547,8 +3572,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
|
||||||
unsigned VGPRBlocks;
|
unsigned VGPRBlocks;
|
||||||
unsigned SGPRBlocks;
|
unsigned SGPRBlocks;
|
||||||
if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
|
if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
|
||||||
ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
|
ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
|
||||||
SGPRRange, VGPRBlocks, SGPRBlocks))
|
VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
|
||||||
|
SGPRBlocks))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
|
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
|
||||||
|
@ -5383,6 +5409,15 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Boolean holding registers
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
OperandMatchResultTy
|
||||||
|
AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
|
||||||
|
return parseReg(Operands);
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// mubuf
|
// mubuf
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -6294,7 +6329,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
|
||||||
}
|
}
|
||||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
|
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
|
||||||
// Add the register arguments
|
// Add the register arguments
|
||||||
if (Op.isReg() && Op.getReg() == AMDGPU::VCC) {
|
if (Op.isReg() && validateVccOperand(Op.getReg())) {
|
||||||
// VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
|
// VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
|
||||||
// Skip it.
|
// Skip it.
|
||||||
continue;
|
continue;
|
||||||
|
@ -6437,7 +6472,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
|
||||||
|
|
||||||
for (unsigned E = Operands.size(); I != E; ++I) {
|
for (unsigned E = Operands.size(); I != E; ++I) {
|
||||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
|
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
|
||||||
if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) {
|
if (skipVcc && !skippedVcc && Op.isReg() &&
|
||||||
|
(Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
|
||||||
// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
|
// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
|
||||||
// Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
|
// Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
|
||||||
// or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
|
// or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
|
||||||
|
|
|
@ -442,6 +442,7 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
|
||||||
|
|
||||||
printOperand(MI, OpNo, STI, O);
|
printOperand(MI, OpNo, STI, O);
|
||||||
|
|
||||||
|
// Print default vcc/vcc_lo operand.
|
||||||
switch (MI->getOpcode()) {
|
switch (MI->getOpcode()) {
|
||||||
default: break;
|
default: break;
|
||||||
|
|
||||||
|
@ -589,7 +590,8 @@ void AMDGPUInstPrinter::printDefaultVccOperand(unsigned OpNo,
|
||||||
raw_ostream &O) {
|
raw_ostream &O) {
|
||||||
if (OpNo > 0)
|
if (OpNo > 0)
|
||||||
O << ", ";
|
O << ", ";
|
||||||
printRegOperand(AMDGPU::VCC, O, MRI);
|
printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
|
||||||
|
AMDGPU::VCC : AMDGPU::VCC_LO, O, MRI);
|
||||||
if (OpNo == 0)
|
if (OpNo == 0)
|
||||||
O << ", ";
|
O << ", ";
|
||||||
}
|
}
|
||||||
|
@ -597,6 +599,7 @@ void AMDGPUInstPrinter::printDefaultVccOperand(unsigned OpNo,
|
||||||
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
||||||
const MCSubtargetInfo &STI,
|
const MCSubtargetInfo &STI,
|
||||||
raw_ostream &O) {
|
raw_ostream &O) {
|
||||||
|
// Print default vcc/vcc_lo operand of VOPC.
|
||||||
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
|
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
|
||||||
if (OpNo == 0 && (Desc.TSFlags & SIInstrFlags::VOPC) &&
|
if (OpNo == 0 && (Desc.TSFlags & SIInstrFlags::VOPC) &&
|
||||||
(Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
|
(Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
|
||||||
|
@ -680,6 +683,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
||||||
O << "/*INV_OP*/";
|
O << "/*INV_OP*/";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
|
||||||
switch (MI->getOpcode()) {
|
switch (MI->getOpcode()) {
|
||||||
default: break;
|
default: break;
|
||||||
|
|
||||||
|
@ -749,6 +753,7 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
|
||||||
if (InputModifiers & SISrcMods::SEXT)
|
if (InputModifiers & SISrcMods::SEXT)
|
||||||
O << ')';
|
O << ')';
|
||||||
|
|
||||||
|
// Print default vcc/vcc_lo operand of VOP2b.
|
||||||
switch (MI->getOpcode()) {
|
switch (MI->getOpcode()) {
|
||||||
default: break;
|
default: break;
|
||||||
|
|
||||||
|
|
|
@ -389,7 +389,7 @@ SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
|
||||||
const MCOperand &MO = MI.getOperand(OpNo);
|
const MCOperand &MO = MI.getOperand(OpNo);
|
||||||
|
|
||||||
unsigned Reg = MO.getReg();
|
unsigned Reg = MO.getReg();
|
||||||
if (Reg != AMDGPU::VCC) {
|
if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) {
|
||||||
RegEnc |= MRI.getEncodingValue(Reg);
|
RegEnc |= MRI.getEncodingValue(Reg);
|
||||||
RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
|
RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
|
||||||
RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
|
RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
|
||||||
|
|
|
@ -6,6 +6,11 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
|
||||||
|
AssemblerPredicate <"FeatureWavefrontSize32">;
|
||||||
|
def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
|
||||||
|
AssemblerPredicate <"FeatureWavefrontSize64">;
|
||||||
|
|
||||||
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
|
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
|
||||||
|
|
||||||
class GCNPredicateControl : PredicateControl {
|
class GCNPredicateControl : PredicateControl {
|
||||||
|
|
|
@ -188,9 +188,18 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
|
||||||
let CodeSize = base_inst.CodeSize;
|
let CodeSize = base_inst.CodeSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let WaveSizePredicate = isWave64 in {
|
||||||
def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
|
def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
|
||||||
def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
|
def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
|
||||||
def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
|
def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let WaveSizePredicate = isWave32 in {
|
||||||
|
def S_MOV_B32_term : WrapTerminatorInst<S_MOV_B32>;
|
||||||
|
def S_XOR_B32_term : WrapTerminatorInst<S_XOR_B32>;
|
||||||
|
def S_OR_B32_term : WrapTerminatorInst<S_OR_B32>;
|
||||||
|
def S_ANDN2_B32_term : WrapTerminatorInst<S_ANDN2_B32>;
|
||||||
|
}
|
||||||
|
|
||||||
def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
|
def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
|
||||||
[(int_amdgcn_wave_barrier)]> {
|
[(int_amdgcn_wave_barrier)]> {
|
||||||
|
@ -343,6 +352,15 @@ def SI_INIT_EXEC : SPseudoInstSI <
|
||||||
let Defs = [EXEC];
|
let Defs = [EXEC];
|
||||||
let usesCustomInserter = 1;
|
let usesCustomInserter = 1;
|
||||||
let isAsCheapAsAMove = 1;
|
let isAsCheapAsAMove = 1;
|
||||||
|
let WaveSizePredicate = isWave64;
|
||||||
|
}
|
||||||
|
|
||||||
|
def SI_INIT_EXEC_LO : SPseudoInstSI <
|
||||||
|
(outs), (ins i32imm:$src), []> {
|
||||||
|
let Defs = [EXEC_LO];
|
||||||
|
let usesCustomInserter = 1;
|
||||||
|
let isAsCheapAsAMove = 1;
|
||||||
|
let WaveSizePredicate = isWave32;
|
||||||
}
|
}
|
||||||
|
|
||||||
def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
|
def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
|
||||||
|
|
|
@ -275,6 +275,21 @@ let SubtargetPredicate = isGFX9Plus in {
|
||||||
} // End SubtargetPredicate = isGFX9Plus
|
} // End SubtargetPredicate = isGFX9Plus
|
||||||
|
|
||||||
let SubtargetPredicate = isGFX10Plus in {
|
let SubtargetPredicate = isGFX10Plus in {
|
||||||
|
let hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC] in {
|
||||||
|
def S_AND_SAVEEXEC_B32 : SOP1_32<"s_and_saveexec_b32">;
|
||||||
|
def S_OR_SAVEEXEC_B32 : SOP1_32<"s_or_saveexec_b32">;
|
||||||
|
def S_XOR_SAVEEXEC_B32 : SOP1_32<"s_xor_saveexec_b32">;
|
||||||
|
def S_ANDN2_SAVEEXEC_B32 : SOP1_32<"s_andn2_saveexec_b32">;
|
||||||
|
def S_ORN2_SAVEEXEC_B32 : SOP1_32<"s_orn2_saveexec_b32">;
|
||||||
|
def S_NAND_SAVEEXEC_B32 : SOP1_32<"s_nand_saveexec_b32">;
|
||||||
|
def S_NOR_SAVEEXEC_B32 : SOP1_32<"s_nor_saveexec_b32">;
|
||||||
|
def S_XNOR_SAVEEXEC_B32 : SOP1_32<"s_xnor_saveexec_b32">;
|
||||||
|
def S_ANDN1_SAVEEXEC_B32 : SOP1_32<"s_andn1_saveexec_b32">;
|
||||||
|
def S_ORN1_SAVEEXEC_B32 : SOP1_32<"s_orn1_saveexec_b32">;
|
||||||
|
def S_ANDN1_WREXEC_B32 : SOP1_32<"s_andn1_wrexec_b32">;
|
||||||
|
def S_ANDN2_WREXEC_B32 : SOP1_32<"s_andn2_wrexec_b32">;
|
||||||
|
} // End hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC]
|
||||||
|
|
||||||
let Uses = [M0] in {
|
let Uses = [M0] in {
|
||||||
def S_MOVRELSD_2_B32 : SOP1_32<"s_movrelsd_2_b32">;
|
def S_MOVRELSD_2_B32 : SOP1_32<"s_movrelsd_2_b32">;
|
||||||
} // End Uses = [M0]
|
} // End Uses = [M0]
|
||||||
|
@ -782,6 +797,9 @@ let SubtargetPredicate = isGFX10Plus in {
|
||||||
let has_sdst = 0;
|
let has_sdst = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def S_SUBVECTOR_LOOP_BEGIN : SOPK_32_BR<"s_subvector_loop_begin">;
|
||||||
|
def S_SUBVECTOR_LOOP_END : SOPK_32_BR<"s_subvector_loop_end">;
|
||||||
|
|
||||||
def S_WAITCNT_VSCNT : SOPK_WAITCNT<"s_waitcnt_vscnt">;
|
def S_WAITCNT_VSCNT : SOPK_WAITCNT<"s_waitcnt_vscnt">;
|
||||||
def S_WAITCNT_VMCNT : SOPK_WAITCNT<"s_waitcnt_vmcnt">;
|
def S_WAITCNT_VMCNT : SOPK_WAITCNT<"s_waitcnt_vmcnt">;
|
||||||
def S_WAITCNT_EXPCNT : SOPK_WAITCNT<"s_waitcnt_expcnt">;
|
def S_WAITCNT_EXPCNT : SOPK_WAITCNT<"s_waitcnt_expcnt">;
|
||||||
|
@ -1215,6 +1233,18 @@ defm S_ORN1_SAVEEXEC_B64 : SOP1_Real_gfx10<0x038>;
|
||||||
defm S_ANDN1_WREXEC_B64 : SOP1_Real_gfx10<0x039>;
|
defm S_ANDN1_WREXEC_B64 : SOP1_Real_gfx10<0x039>;
|
||||||
defm S_ANDN2_WREXEC_B64 : SOP1_Real_gfx10<0x03a>;
|
defm S_ANDN2_WREXEC_B64 : SOP1_Real_gfx10<0x03a>;
|
||||||
defm S_BITREPLICATE_B64_B32 : SOP1_Real_gfx10<0x03b>;
|
defm S_BITREPLICATE_B64_B32 : SOP1_Real_gfx10<0x03b>;
|
||||||
|
defm S_AND_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03c>;
|
||||||
|
defm S_OR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03d>;
|
||||||
|
defm S_XOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03e>;
|
||||||
|
defm S_ANDN2_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03f>;
|
||||||
|
defm S_ORN2_SAVEEXEC_B32 : SOP1_Real_gfx10<0x040>;
|
||||||
|
defm S_NAND_SAVEEXEC_B32 : SOP1_Real_gfx10<0x041>;
|
||||||
|
defm S_NOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x042>;
|
||||||
|
defm S_XNOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x043>;
|
||||||
|
defm S_ANDN1_SAVEEXEC_B32 : SOP1_Real_gfx10<0x044>;
|
||||||
|
defm S_ORN1_SAVEEXEC_B32 : SOP1_Real_gfx10<0x045>;
|
||||||
|
defm S_ANDN1_WREXEC_B32 : SOP1_Real_gfx10<0x046>;
|
||||||
|
defm S_ANDN2_WREXEC_B32 : SOP1_Real_gfx10<0x047>;
|
||||||
defm S_MOVRELSD_2_B32 : SOP1_Real_gfx10<0x049>;
|
defm S_MOVRELSD_2_B32 : SOP1_Real_gfx10<0x049>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -1382,6 +1412,8 @@ defm S_WAITCNT_VSCNT : SOPK_Real32_gfx10<0x017>;
|
||||||
defm S_WAITCNT_VMCNT : SOPK_Real32_gfx10<0x018>;
|
defm S_WAITCNT_VMCNT : SOPK_Real32_gfx10<0x018>;
|
||||||
defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx10<0x019>;
|
defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx10<0x019>;
|
||||||
defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx10<0x01a>;
|
defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx10<0x01a>;
|
||||||
|
defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx10<0x01b>;
|
||||||
|
defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx10<0x01c>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SOPK - GFX6, GFX7.
|
// SOPK - GFX6, GFX7.
|
||||||
|
|
|
@ -380,12 +380,17 @@ unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
|
||||||
return NumSGPRs / getSGPREncodingGranule(STI) - 1;
|
return NumSGPRs / getSGPREncodingGranule(STI) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
|
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
|
||||||
return 4;
|
Optional<bool> EnableWavefrontSize32) {
|
||||||
|
bool IsWave32 = EnableWavefrontSize32 ?
|
||||||
|
*EnableWavefrontSize32 :
|
||||||
|
STI->getFeatureBits().test(FeatureWavefrontSize32);
|
||||||
|
return IsWave32 ? 8 : 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
|
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
|
||||||
return getVGPRAllocGranule(STI);
|
Optional<bool> EnableWavefrontSize32) {
|
||||||
|
return getVGPRAllocGranule(STI, EnableWavefrontSize32);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
|
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
|
||||||
|
@ -416,10 +421,12 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
|
||||||
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
|
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
|
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
|
||||||
NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
|
Optional<bool> EnableWavefrontSize32) {
|
||||||
|
NumVGPRs = alignTo(std::max(1u, NumVGPRs),
|
||||||
|
getVGPREncodingGranule(STI, EnableWavefrontSize32));
|
||||||
// VGPRBlocks is actual number of VGPR blocks minus 1.
|
// VGPRBlocks is actual number of VGPR blocks minus 1.
|
||||||
return NumVGPRs / getVGPREncodingGranule(STI) - 1;
|
return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace IsaInfo
|
} // end namespace IsaInfo
|
||||||
|
@ -437,7 +444,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
|
||||||
Header.amd_machine_version_minor = Version.Minor;
|
Header.amd_machine_version_minor = Version.Minor;
|
||||||
Header.amd_machine_version_stepping = Version.Stepping;
|
Header.amd_machine_version_stepping = Version.Stepping;
|
||||||
Header.kernel_code_entry_byte_offset = sizeof(Header);
|
Header.kernel_code_entry_byte_offset = sizeof(Header);
|
||||||
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
|
|
||||||
Header.wavefront_size = 6;
|
Header.wavefront_size = 6;
|
||||||
|
|
||||||
// If the code object does not support indirect functions, then the value must
|
// If the code object does not support indirect functions, then the value must
|
||||||
|
|
|
@ -150,10 +150,18 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
|
||||||
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
|
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
|
||||||
|
|
||||||
/// \returns VGPR allocation granularity for given subtarget \p STI.
|
/// \returns VGPR allocation granularity for given subtarget \p STI.
|
||||||
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
|
///
|
||||||
|
/// For subtargets which support it, \p EnableWavefrontSize32 should match
|
||||||
|
/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
|
||||||
|
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
|
||||||
|
Optional<bool> EnableWavefrontSize32 = None);
|
||||||
|
|
||||||
/// \returns VGPR encoding granularity for given subtarget \p STI.
|
/// \returns VGPR encoding granularity for given subtarget \p STI.
|
||||||
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
|
///
|
||||||
|
/// For subtargets which support it, \p EnableWavefrontSize32 should match
|
||||||
|
/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
|
||||||
|
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
|
||||||
|
Optional<bool> EnableWavefrontSize32 = None);
|
||||||
|
|
||||||
/// \returns Total number of VGPRs for given subtarget \p STI.
|
/// \returns Total number of VGPRs for given subtarget \p STI.
|
||||||
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
|
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
|
||||||
|
@ -171,7 +179,11 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
|
||||||
|
|
||||||
/// \returns Number of VGPR blocks needed for given subtarget \p STI when
|
/// \returns Number of VGPR blocks needed for given subtarget \p STI when
|
||||||
/// \p NumVGPRs are used.
|
/// \p NumVGPRs are used.
|
||||||
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
|
///
|
||||||
|
/// For subtargets which support it, \p EnableWavefrontSize32 should match the
|
||||||
|
/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
|
||||||
|
unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
|
||||||
|
Optional<bool> EnableWavefrontSize32 = None);
|
||||||
|
|
||||||
} // end namespace IsaInfo
|
} // end namespace IsaInfo
|
||||||
|
|
||||||
|
|
|
@ -199,7 +199,12 @@ class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst,
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
|
multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
|
||||||
|
let WaveSizePredicate = isWave32 in {
|
||||||
|
def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">;
|
||||||
|
}
|
||||||
|
let WaveSizePredicate = isWave64 in {
|
||||||
def : VOP2bInstAlias<ps, inst, OpName, "vcc">;
|
def : VOP2bInstAlias<ps, inst, OpName, "vcc">;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass VOP2eInst <string opName,
|
multiclass VOP2eInst <string opName,
|
||||||
|
@ -234,7 +239,12 @@ class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd> :
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
|
multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
|
||||||
|
let WaveSizePredicate = isWave32 in {
|
||||||
|
def : VOP2eInstAlias<ps, inst, "vcc_lo">;
|
||||||
|
}
|
||||||
|
let WaveSizePredicate = isWave64 in {
|
||||||
def : VOP2eInstAlias<ps, inst, "vcc">;
|
def : VOP2eInstAlias<ps, inst, "vcc">;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
|
class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
|
||||||
|
@ -953,6 +963,30 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
|
||||||
let DecoderNamespace = "DPP8";
|
let DecoderNamespace = "DPP8";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let WaveSizePredicate = isWave32 in {
|
||||||
|
def _sdwa_w32_gfx10 :
|
||||||
|
Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
|
||||||
|
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
|
||||||
|
VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
|
||||||
|
let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
|
||||||
|
let isAsmParserOnly = 1;
|
||||||
|
let DecoderNamespace = "SDWA10";
|
||||||
|
}
|
||||||
|
def _dpp_w32_gfx10 :
|
||||||
|
VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
|
||||||
|
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
|
||||||
|
let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
|
||||||
|
let isAsmParserOnly = 1;
|
||||||
|
}
|
||||||
|
def _dpp8_w32_gfx10 :
|
||||||
|
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
|
||||||
|
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
|
||||||
|
let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
|
||||||
|
let isAsmParserOnly = 1;
|
||||||
|
}
|
||||||
|
} // End WaveSizePredicate = isWave32
|
||||||
|
|
||||||
|
let WaveSizePredicate = isWave64 in {
|
||||||
def _sdwa_w64_gfx10 :
|
def _sdwa_w64_gfx10 :
|
||||||
Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
|
Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
|
||||||
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
|
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
|
||||||
|
@ -973,6 +1007,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
|
||||||
let AsmString = asmName # AsmDPP8;
|
let AsmString = asmName # AsmDPP8;
|
||||||
let isAsmParserOnly = 1;
|
let isAsmParserOnly = 1;
|
||||||
}
|
}
|
||||||
|
} // End WaveSizePredicate = isWave64
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------- VOP3Only -----------------------------===//
|
//===----------------------------- VOP3Only -----------------------------===//
|
||||||
|
|
|
@ -165,9 +165,16 @@ class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst,
|
||||||
multiclass VOPCInstAliases <string OpName, string Arch> {
|
multiclass VOPCInstAliases <string OpName, string Arch> {
|
||||||
def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
|
def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
|
||||||
!cast<Instruction>(OpName#"_e32_"#Arch)>;
|
!cast<Instruction>(OpName#"_e32_"#Arch)>;
|
||||||
|
let WaveSizePredicate = isWave32 in {
|
||||||
|
def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
|
||||||
|
!cast<Instruction>(OpName#"_e32_"#Arch),
|
||||||
|
"vcc_lo, "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
|
||||||
|
}
|
||||||
|
let WaveSizePredicate = isWave64 in {
|
||||||
def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
|
def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
|
||||||
!cast<Instruction>(OpName#"_e32_"#Arch),
|
!cast<Instruction>(OpName#"_e32_"#Arch),
|
||||||
"vcc, "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
|
"vcc, "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass VOPCXInstAliases <string OpName, string Arch> {
|
multiclass VOPCXInstAliases <string OpName, string Arch> {
|
||||||
|
@ -740,10 +747,17 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
|
||||||
// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
|
// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
|
||||||
// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
|
// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
|
||||||
multiclass ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
|
multiclass ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
|
||||||
|
let WaveSizePredicate = isWave64 in
|
||||||
def : GCNPat <
|
def : GCNPat <
|
||||||
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
|
(i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
|
||||||
(i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
|
(i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
let WaveSizePredicate = isWave32 in
|
||||||
|
def : GCNPat <
|
||||||
|
(i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
|
||||||
|
(i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
|
||||||
|
>;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
|
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
|
||||||
|
@ -780,12 +794,21 @@ defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
|
||||||
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
|
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
|
||||||
|
|
||||||
multiclass FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
|
multiclass FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
|
||||||
|
let WaveSizePredicate = isWave64 in
|
||||||
def : GCNPat <
|
def : GCNPat <
|
||||||
(i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
|
(i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
|
||||||
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
|
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
|
||||||
(i64 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
|
(i64 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
|
||||||
DSTCLAMP.NONE), SReg_64))
|
DSTCLAMP.NONE), SReg_64))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
let WaveSizePredicate = isWave32 in
|
||||||
|
def : GCNPat <
|
||||||
|
(i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
|
||||||
|
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
|
||||||
|
(i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
|
||||||
|
DSTCLAMP.NONE), SReg_32))
|
||||||
|
>;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
|
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
|
||||||
|
|
Loading…
Reference in New Issue