[AMDGPU] Refactor waitcnt encoding

- Refactor bit packing/unpacking
- Calculate bit mask given bit shift and bit width
- Introduce function for decoding bits of waitcnt
- Introduce function for encoding bits of waitcnt
- Introduce function for getting waitcnt mask (instead of using bare numbers)
- Introduce function fot getting max waitcnt(s) (instead of using bare numbers)

Differential Revision: https://reviews.llvm.org/D25298

llvm-svn: 283919
This commit is contained in:
Konstantin Zhuravlyov 2016-10-11 18:58:22 +00:00
parent e9d075233a
commit cdd4547607
5 changed files with 171 additions and 66 deletions

View File

@ -2016,53 +2016,41 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
Parser.Lex();
int CntShift;
int CntMask;
IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
if (CntName == "vmcnt") {
CntMask = getVmcntMask(IV);
CntShift = getVmcntShift(IV);
} else if (CntName == "expcnt") {
CntMask = getExpcntMask(IV);
CntShift = getExpcntShift(IV);
} else if (CntName == "lgkmcnt") {
CntMask = getLgkmcntMask(IV);
CntShift = getLgkmcntShift(IV);
} else {
if (CntName == "vmcnt")
IntVal = encodeVmcnt(IV, IntVal, CntVal);
else if (CntName == "expcnt")
IntVal = encodeExpcnt(IV, IntVal, CntVal);
else if (CntName == "lgkmcnt")
IntVal = encodeLgkmcnt(IV, IntVal, CntVal);
else
return true;
}
IntVal &= ~(CntMask << CntShift);
IntVal |= (CntVal << CntShift);
return false;
}
AMDGPUAsmParser::OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
// Disable all counters by default.
// vmcnt [3:0]
// expcnt [6:4]
// lgkmcnt [11:8]
int64_t CntVal = 0xf7f;
IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
int64_t Waitcnt = getWaitcntBitMask(IV);
SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
default: return MatchOperand_ParseFail;
case AsmToken::Integer:
// The operand can be an integer value.
if (getParser().parseAbsoluteExpression(CntVal))
if (getParser().parseAbsoluteExpression(Waitcnt))
return MatchOperand_ParseFail;
break;
case AsmToken::Identifier:
do {
if (parseCnt(CntVal))
if (parseCnt(Waitcnt))
return MatchOperand_ParseFail;
} while(getLexer().isNot(AsmToken::EndOfStatement));
break;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, CntVal, S));
Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
return MatchOperand_Success;
}

View File

@ -874,25 +874,24 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
IsaVersion IV = getIsaVersion(STI.getFeatureBits());
unsigned SImm16 = MI->getOperand(OpNo).getImm();
unsigned Vmcnt = (SImm16 >> getVmcntShift(IV)) & getVmcntMask(IV);
unsigned Expcnt = (SImm16 >> getExpcntShift(IV)) & getExpcntMask(IV);
unsigned Lgkmcnt = (SImm16 >> getLgkmcntShift(IV)) & getLgkmcntMask(IV);
unsigned Vmcnt, Expcnt, Lgkmcnt;
decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt);
bool NeedSpace = false;
if (Vmcnt != 0xF) {
if (Vmcnt != getVmcntBitMask(IV)) {
O << "vmcnt(" << Vmcnt << ')';
NeedSpace = true;
}
if (Expcnt != 0x7) {
if (Expcnt != getExpcntBitMask(IV)) {
if (NeedSpace)
O << ' ';
O << "expcnt(" << Expcnt << ')';
NeedSpace = true;
}
if (Lgkmcnt != 0xF) {
if (Lgkmcnt != getLgkmcntBitMask(IV)) {
if (NeedSpace)
O << ' ';
O << "lgkmcnt(" << Lgkmcnt << ')';

View File

@ -63,12 +63,12 @@ private:
const MachineRegisterInfo *MRI;
IsaVersion IV;
/// \brief Constant hardware limits
static const Counters WaitCounts;
/// \brief Constant zero value
static const Counters ZeroCounts;
/// \brief Hardware limits
Counters HardwareLimits;
/// \brief Counter values we have already waited on.
Counters WaitedOn;
@ -173,7 +173,6 @@ FunctionPass *llvm::createSIInsertWaitsPass() {
return new SIInsertWaits();
}
const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
static bool readsVCCZ(unsigned Opcode) {
@ -379,7 +378,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
Ordered[2] = false;
// The values we are going to put into the S_WAITCNT instruction
Counters Counts = WaitCounts;
Counters Counts = HardwareLimits;
// Do we really need to wait?
bool NeedWait = false;
@ -395,7 +394,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
unsigned Value = LastIssued.Array[i] - Required.Array[i];
// Adjust the value to the real hardware possibilities.
Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]);
} else
Counts.Array[i] = 0;
@ -413,9 +412,10 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
// Build the wait instruction
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
.addImm(((Counts.Named.VM & getVmcntMask(IV)) << getVmcntShift(IV)) |
((Counts.Named.EXP & getExpcntMask(IV)) << getExpcntShift(IV)) |
((Counts.Named.LGKM & getLgkmcntMask(IV)) << getLgkmcntShift(IV)));
.addImm(encodeWaitcnt(IV,
Counts.Named.VM,
Counts.Named.EXP,
Counts.Named.LGKM));
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
@ -443,9 +443,9 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
unsigned Imm = I->getOperand(0).getImm();
Counters Counts, WaitOn;
Counts.Named.VM = (Imm >> getVmcntShift(IV)) & getVmcntMask(IV);
Counts.Named.EXP = (Imm >> getExpcntShift(IV)) & getExpcntMask(IV);
Counts.Named.LGKM = (Imm >> getLgkmcntShift(IV)) & getLgkmcntMask(IV);
Counts.Named.VM = decodeVmcnt(IV, Imm);
Counts.Named.EXP = decodeExpcnt(IV, Imm);
Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
for (unsigned i = 0; i < 3; ++i) {
if (Counts.Array[i] <= LastIssued.Array[i])
@ -523,6 +523,10 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
IV = getIsaVersion(ST->getFeatureBits());
HardwareLimits.Named.VM = getVmcntBitMask(IV);
HardwareLimits.Named.EXP = getExpcntBitMask(IV);
HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
WaitedOn = ZeroCounts;
DelayedWaitOn = ZeroCounts;
LastIssued = ZeroCounts;

View File

@ -33,6 +33,49 @@
#undef GET_INSTRINFO_NAMED_OPS
#undef GET_INSTRINFO_ENUM
namespace {
/// \returns Bit mask for given bit \p Shift and bit \p Width.
unsigned getBitMask(unsigned Shift, unsigned Width) {
return ((1 << Width) - 1) << Shift;
}
/// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
///
/// \returns Packed \p Dst.
unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
Dst |= (Src << Shift) & getBitMask(Shift, Width);
return Dst;
}
/// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
///
/// \returns Unpacked bits.
unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
return (Src & getBitMask(Shift, Width)) >> Shift;
}
/// \returns Vmcnt bit shift.
unsigned getVmcntBitShift() { return 0; }
/// \returns Vmcnt bit width.
unsigned getVmcntBitWidth() { return 4; }
/// \returns Expcnt bit shift.
unsigned getExpcntBitShift() { return 4; }
/// \returns Expcnt bit width.
unsigned getExpcntBitWidth() { return 3; }
/// \returns Lgkmcnt bit shift.
unsigned getLgkmcntBitShift() { return 8; }
/// \returns Lgkmcnt bit width.
unsigned getLgkmcntBitWidth() { return 4; }
} // anonymous namespace
namespace llvm {
namespace AMDGPU {
@ -161,28 +204,63 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
return Ints;
}
unsigned getVmcntMask(IsaVersion Version) {
return 0xf;
unsigned getWaitcntBitMask(IsaVersion Version) {
unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
return Vmcnt | Expcnt | Lgkmcnt;
}
unsigned getVmcntShift(IsaVersion Version) {
return 0;
unsigned getVmcntBitMask(IsaVersion Version) {
return (1 << getVmcntBitWidth()) - 1;
}
unsigned getExpcntMask(IsaVersion Version) {
return 0x7;
unsigned getExpcntBitMask(IsaVersion Version) {
return (1 << getExpcntBitWidth()) - 1;
}
unsigned getExpcntShift(IsaVersion Version) {
return 4;
unsigned getLgkmcntBitMask(IsaVersion Version) {
return (1 << getLgkmcntBitWidth()) - 1;
}
unsigned getLgkmcntMask(IsaVersion Version) {
return 0xf;
unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
}
unsigned getLgkmcntShift(IsaVersion Version) {
return 8;
unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
}
unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
unsigned encodeWaitcnt(IsaVersion Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);;
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
return Waitcnt;
}
unsigned getInitialPSInputAddr(const Function &F) {

View File

@ -76,23 +76,59 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
std::pair<int, int> Default,
bool OnlyFirstRequired = false);
/// \returns VMCNT bit mask for given isa \p Version.
unsigned getVmcntMask(IsaVersion Version);
/// \returns Waitcnt bit mask for given isa \p Version.
unsigned getWaitcntBitMask(IsaVersion Version);
/// \returns VMCNT bit shift for given isa \p Version.
unsigned getVmcntShift(IsaVersion Version);
/// \returns Vmcnt bit mask for given isa \p Version.
unsigned getVmcntBitMask(IsaVersion Version);
/// \returns EXPCNT bit mask for given isa \p Version.
unsigned getExpcntMask(IsaVersion Version);
/// \returns Expcnt bit mask for given isa \p Version.
unsigned getExpcntBitMask(IsaVersion Version);
/// \returns EXPCNT bit shift for given isa \p Version.
unsigned getExpcntShift(IsaVersion Version);
/// \returns Lgkmcnt bit mask for given isa \p Version.
unsigned getLgkmcntBitMask(IsaVersion Version);
/// \returns LGKMCNT bit mask for given isa \p Version.
unsigned getLgkmcntMask(IsaVersion Version);
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt);
/// \returns LGKMCNT bit shift for given isa \p Version.
unsigned getLgkmcntShift(IsaVersion Version);
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt);
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt);
/// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
/// \p Lgkmcnt respectively.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
/// \p Vmcnt = \p Waitcnt[3:0]
/// \p Expcnt = \p Waitcnt[6:4]
/// \p Lgkmcnt = \p Waitcnt[11:8]
void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt);
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt);
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt);
/// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
/// \p Version.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
/// Waitcnt[3:0] = \p Vmcnt
/// Waitcnt[6:4] = \p Expcnt
/// Waitcnt[11:8] = \p Lgkmcnt
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
unsigned encodeWaitcnt(IsaVersion Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
unsigned getInitialPSInputAddr(const Function &F);