forked from OSchip/llvm-project
[AMDGPU] Refactor waitcnt encoding
- Refactor bit packing/unpacking - Calculate bit mask given bit shift and bit width - Introduce function for decoding bits of waitcnt - Introduce function for encoding bits of waitcnt - Introduce function for getting waitcnt mask (instead of using bare numbers) - Introduce function fot getting max waitcnt(s) (instead of using bare numbers) Differential Revision: https://reviews.llvm.org/D25298 llvm-svn: 283919
This commit is contained in:
parent
e9d075233a
commit
cdd4547607
|
@ -2016,53 +2016,41 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
|
|||
if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
|
||||
Parser.Lex();
|
||||
|
||||
int CntShift;
|
||||
int CntMask;
|
||||
|
||||
IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
|
||||
if (CntName == "vmcnt") {
|
||||
CntMask = getVmcntMask(IV);
|
||||
CntShift = getVmcntShift(IV);
|
||||
} else if (CntName == "expcnt") {
|
||||
CntMask = getExpcntMask(IV);
|
||||
CntShift = getExpcntShift(IV);
|
||||
} else if (CntName == "lgkmcnt") {
|
||||
CntMask = getLgkmcntMask(IV);
|
||||
CntShift = getLgkmcntShift(IV);
|
||||
} else {
|
||||
if (CntName == "vmcnt")
|
||||
IntVal = encodeVmcnt(IV, IntVal, CntVal);
|
||||
else if (CntName == "expcnt")
|
||||
IntVal = encodeExpcnt(IV, IntVal, CntVal);
|
||||
else if (CntName == "lgkmcnt")
|
||||
IntVal = encodeLgkmcnt(IV, IntVal, CntVal);
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
IntVal &= ~(CntMask << CntShift);
|
||||
IntVal |= (CntVal << CntShift);
|
||||
return false;
|
||||
}
|
||||
|
||||
AMDGPUAsmParser::OperandMatchResultTy
|
||||
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
|
||||
// Disable all counters by default.
|
||||
// vmcnt [3:0]
|
||||
// expcnt [6:4]
|
||||
// lgkmcnt [11:8]
|
||||
int64_t CntVal = 0xf7f;
|
||||
IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
|
||||
int64_t Waitcnt = getWaitcntBitMask(IV);
|
||||
SMLoc S = Parser.getTok().getLoc();
|
||||
|
||||
switch(getLexer().getKind()) {
|
||||
default: return MatchOperand_ParseFail;
|
||||
case AsmToken::Integer:
|
||||
// The operand can be an integer value.
|
||||
if (getParser().parseAbsoluteExpression(CntVal))
|
||||
if (getParser().parseAbsoluteExpression(Waitcnt))
|
||||
return MatchOperand_ParseFail;
|
||||
break;
|
||||
|
||||
case AsmToken::Identifier:
|
||||
do {
|
||||
if (parseCnt(CntVal))
|
||||
if (parseCnt(Waitcnt))
|
||||
return MatchOperand_ParseFail;
|
||||
} while(getLexer().isNot(AsmToken::EndOfStatement));
|
||||
break;
|
||||
}
|
||||
Operands.push_back(AMDGPUOperand::CreateImm(this, CntVal, S));
|
||||
Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
|
||||
return MatchOperand_Success;
|
||||
}
|
||||
|
||||
|
|
|
@ -874,25 +874,24 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
|
|||
IsaVersion IV = getIsaVersion(STI.getFeatureBits());
|
||||
|
||||
unsigned SImm16 = MI->getOperand(OpNo).getImm();
|
||||
unsigned Vmcnt = (SImm16 >> getVmcntShift(IV)) & getVmcntMask(IV);
|
||||
unsigned Expcnt = (SImm16 >> getExpcntShift(IV)) & getExpcntMask(IV);
|
||||
unsigned Lgkmcnt = (SImm16 >> getLgkmcntShift(IV)) & getLgkmcntMask(IV);
|
||||
unsigned Vmcnt, Expcnt, Lgkmcnt;
|
||||
decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt);
|
||||
|
||||
bool NeedSpace = false;
|
||||
|
||||
if (Vmcnt != 0xF) {
|
||||
if (Vmcnt != getVmcntBitMask(IV)) {
|
||||
O << "vmcnt(" << Vmcnt << ')';
|
||||
NeedSpace = true;
|
||||
}
|
||||
|
||||
if (Expcnt != 0x7) {
|
||||
if (Expcnt != getExpcntBitMask(IV)) {
|
||||
if (NeedSpace)
|
||||
O << ' ';
|
||||
O << "expcnt(" << Expcnt << ')';
|
||||
NeedSpace = true;
|
||||
}
|
||||
|
||||
if (Lgkmcnt != 0xF) {
|
||||
if (Lgkmcnt != getLgkmcntBitMask(IV)) {
|
||||
if (NeedSpace)
|
||||
O << ' ';
|
||||
O << "lgkmcnt(" << Lgkmcnt << ')';
|
||||
|
|
|
@ -63,12 +63,12 @@ private:
|
|||
const MachineRegisterInfo *MRI;
|
||||
IsaVersion IV;
|
||||
|
||||
/// \brief Constant hardware limits
|
||||
static const Counters WaitCounts;
|
||||
|
||||
/// \brief Constant zero value
|
||||
static const Counters ZeroCounts;
|
||||
|
||||
/// \brief Hardware limits
|
||||
Counters HardwareLimits;
|
||||
|
||||
/// \brief Counter values we have already waited on.
|
||||
Counters WaitedOn;
|
||||
|
||||
|
@ -173,7 +173,6 @@ FunctionPass *llvm::createSIInsertWaitsPass() {
|
|||
return new SIInsertWaits();
|
||||
}
|
||||
|
||||
const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
|
||||
const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
|
||||
|
||||
static bool readsVCCZ(unsigned Opcode) {
|
||||
|
@ -379,7 +378,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
|
|||
Ordered[2] = false;
|
||||
|
||||
// The values we are going to put into the S_WAITCNT instruction
|
||||
Counters Counts = WaitCounts;
|
||||
Counters Counts = HardwareLimits;
|
||||
|
||||
// Do we really need to wait?
|
||||
bool NeedWait = false;
|
||||
|
@ -395,7 +394,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
|
|||
unsigned Value = LastIssued.Array[i] - Required.Array[i];
|
||||
|
||||
// Adjust the value to the real hardware possibilities.
|
||||
Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
|
||||
Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]);
|
||||
|
||||
} else
|
||||
Counts.Array[i] = 0;
|
||||
|
@ -413,9 +412,10 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
|
|||
|
||||
// Build the wait instruction
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
|
||||
.addImm(((Counts.Named.VM & getVmcntMask(IV)) << getVmcntShift(IV)) |
|
||||
((Counts.Named.EXP & getExpcntMask(IV)) << getExpcntShift(IV)) |
|
||||
((Counts.Named.LGKM & getLgkmcntMask(IV)) << getLgkmcntShift(IV)));
|
||||
.addImm(encodeWaitcnt(IV,
|
||||
Counts.Named.VM,
|
||||
Counts.Named.EXP,
|
||||
Counts.Named.LGKM));
|
||||
|
||||
LastOpcodeType = OTHER;
|
||||
LastInstWritesM0 = false;
|
||||
|
@ -443,9 +443,9 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
|
|||
unsigned Imm = I->getOperand(0).getImm();
|
||||
Counters Counts, WaitOn;
|
||||
|
||||
Counts.Named.VM = (Imm >> getVmcntShift(IV)) & getVmcntMask(IV);
|
||||
Counts.Named.EXP = (Imm >> getExpcntShift(IV)) & getExpcntMask(IV);
|
||||
Counts.Named.LGKM = (Imm >> getLgkmcntShift(IV)) & getLgkmcntMask(IV);
|
||||
Counts.Named.VM = decodeVmcnt(IV, Imm);
|
||||
Counts.Named.EXP = decodeExpcnt(IV, Imm);
|
||||
Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
|
||||
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
if (Counts.Array[i] <= LastIssued.Array[i])
|
||||
|
@ -523,6 +523,10 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
|||
MRI = &MF.getRegInfo();
|
||||
IV = getIsaVersion(ST->getFeatureBits());
|
||||
|
||||
HardwareLimits.Named.VM = getVmcntBitMask(IV);
|
||||
HardwareLimits.Named.EXP = getExpcntBitMask(IV);
|
||||
HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
|
||||
|
||||
WaitedOn = ZeroCounts;
|
||||
DelayedWaitOn = ZeroCounts;
|
||||
LastIssued = ZeroCounts;
|
||||
|
|
|
@ -33,6 +33,49 @@
|
|||
#undef GET_INSTRINFO_NAMED_OPS
|
||||
#undef GET_INSTRINFO_ENUM
|
||||
|
||||
namespace {
|
||||
|
||||
/// \returns Bit mask for given bit \p Shift and bit \p Width.
|
||||
unsigned getBitMask(unsigned Shift, unsigned Width) {
|
||||
return ((1 << Width) - 1) << Shift;
|
||||
}
|
||||
|
||||
/// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
|
||||
///
|
||||
/// \returns Packed \p Dst.
|
||||
unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
|
||||
Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
|
||||
Dst |= (Src << Shift) & getBitMask(Shift, Width);
|
||||
return Dst;
|
||||
}
|
||||
|
||||
/// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
|
||||
///
|
||||
/// \returns Unpacked bits.
|
||||
unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
|
||||
return (Src & getBitMask(Shift, Width)) >> Shift;
|
||||
}
|
||||
|
||||
/// \returns Vmcnt bit shift.
|
||||
unsigned getVmcntBitShift() { return 0; }
|
||||
|
||||
/// \returns Vmcnt bit width.
|
||||
unsigned getVmcntBitWidth() { return 4; }
|
||||
|
||||
/// \returns Expcnt bit shift.
|
||||
unsigned getExpcntBitShift() { return 4; }
|
||||
|
||||
/// \returns Expcnt bit width.
|
||||
unsigned getExpcntBitWidth() { return 3; }
|
||||
|
||||
/// \returns Lgkmcnt bit shift.
|
||||
unsigned getLgkmcntBitShift() { return 8; }
|
||||
|
||||
/// \returns Lgkmcnt bit width.
|
||||
unsigned getLgkmcntBitWidth() { return 4; }
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace llvm {
|
||||
namespace AMDGPU {
|
||||
|
||||
|
@ -161,28 +204,63 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
|
|||
return Ints;
|
||||
}
|
||||
|
||||
unsigned getVmcntMask(IsaVersion Version) {
|
||||
return 0xf;
|
||||
unsigned getWaitcntBitMask(IsaVersion Version) {
|
||||
unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
|
||||
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
|
||||
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
|
||||
return Vmcnt | Expcnt | Lgkmcnt;
|
||||
}
|
||||
|
||||
unsigned getVmcntShift(IsaVersion Version) {
|
||||
return 0;
|
||||
unsigned getVmcntBitMask(IsaVersion Version) {
|
||||
return (1 << getVmcntBitWidth()) - 1;
|
||||
}
|
||||
|
||||
unsigned getExpcntMask(IsaVersion Version) {
|
||||
return 0x7;
|
||||
unsigned getExpcntBitMask(IsaVersion Version) {
|
||||
return (1 << getExpcntBitWidth()) - 1;
|
||||
}
|
||||
|
||||
unsigned getExpcntShift(IsaVersion Version) {
|
||||
return 4;
|
||||
unsigned getLgkmcntBitMask(IsaVersion Version) {
|
||||
return (1 << getLgkmcntBitWidth()) - 1;
|
||||
}
|
||||
|
||||
unsigned getLgkmcntMask(IsaVersion Version) {
|
||||
return 0xf;
|
||||
unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
|
||||
return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
|
||||
}
|
||||
|
||||
unsigned getLgkmcntShift(IsaVersion Version) {
|
||||
return 8;
|
||||
unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
|
||||
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
|
||||
}
|
||||
|
||||
unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
|
||||
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
|
||||
}
|
||||
|
||||
void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
|
||||
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
|
||||
Vmcnt = decodeVmcnt(Version, Waitcnt);
|
||||
Expcnt = decodeExpcnt(Version, Waitcnt);
|
||||
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
|
||||
}
|
||||
|
||||
unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
|
||||
return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
|
||||
}
|
||||
|
||||
unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
|
||||
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
|
||||
}
|
||||
|
||||
unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
|
||||
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
|
||||
}
|
||||
|
||||
unsigned encodeWaitcnt(IsaVersion Version,
|
||||
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
|
||||
unsigned Waitcnt = getWaitcntBitMask(Version);;
|
||||
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
|
||||
Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
|
||||
Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
|
||||
return Waitcnt;
|
||||
}
|
||||
|
||||
unsigned getInitialPSInputAddr(const Function &F) {
|
||||
|
|
|
@ -76,23 +76,59 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
|
|||
std::pair<int, int> Default,
|
||||
bool OnlyFirstRequired = false);
|
||||
|
||||
/// \returns VMCNT bit mask for given isa \p Version.
|
||||
unsigned getVmcntMask(IsaVersion Version);
|
||||
/// \returns Waitcnt bit mask for given isa \p Version.
|
||||
unsigned getWaitcntBitMask(IsaVersion Version);
|
||||
|
||||
/// \returns VMCNT bit shift for given isa \p Version.
|
||||
unsigned getVmcntShift(IsaVersion Version);
|
||||
/// \returns Vmcnt bit mask for given isa \p Version.
|
||||
unsigned getVmcntBitMask(IsaVersion Version);
|
||||
|
||||
/// \returns EXPCNT bit mask for given isa \p Version.
|
||||
unsigned getExpcntMask(IsaVersion Version);
|
||||
/// \returns Expcnt bit mask for given isa \p Version.
|
||||
unsigned getExpcntBitMask(IsaVersion Version);
|
||||
|
||||
/// \returns EXPCNT bit shift for given isa \p Version.
|
||||
unsigned getExpcntShift(IsaVersion Version);
|
||||
/// \returns Lgkmcnt bit mask for given isa \p Version.
|
||||
unsigned getLgkmcntBitMask(IsaVersion Version);
|
||||
|
||||
/// \returns LGKMCNT bit mask for given isa \p Version.
|
||||
unsigned getLgkmcntMask(IsaVersion Version);
|
||||
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
|
||||
unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt);
|
||||
|
||||
/// \returns LGKMCNT bit shift for given isa \p Version.
|
||||
unsigned getLgkmcntShift(IsaVersion Version);
|
||||
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
|
||||
unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt);
|
||||
|
||||
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
|
||||
unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt);
|
||||
|
||||
/// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
|
||||
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
|
||||
/// \p Lgkmcnt respectively.
|
||||
///
|
||||
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
|
||||
/// \p Vmcnt = \p Waitcnt[3:0]
|
||||
/// \p Expcnt = \p Waitcnt[6:4]
|
||||
/// \p Lgkmcnt = \p Waitcnt[11:8]
|
||||
void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
|
||||
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
|
||||
|
||||
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
|
||||
unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt);
|
||||
|
||||
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
|
||||
unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt);
|
||||
|
||||
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
|
||||
unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt);
|
||||
|
||||
/// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
|
||||
/// \p Version.
|
||||
///
|
||||
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
|
||||
/// Waitcnt[3:0] = \p Vmcnt
|
||||
/// Waitcnt[6:4] = \p Expcnt
|
||||
/// Waitcnt[11:8] = \p Lgkmcnt
|
||||
///
|
||||
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
|
||||
/// isa \p Version.
|
||||
unsigned encodeWaitcnt(IsaVersion Version,
|
||||
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
|
||||
|
||||
unsigned getInitialPSInputAddr(const Function &F);
|
||||
|
||||
|
|
Loading…
Reference in New Issue