[AMDGPU] Refactor waitcnt encoding

- Refactor bit packing/unpacking
- Calculate bit mask given bit shift and bit width
- Introduce function for decoding bits of waitcnt
- Introduce function for encoding bits of waitcnt
- Introduce function for getting waitcnt mask (instead of using bare numbers)
- Introduce function fot getting max waitcnt(s) (instead of using bare numbers)

Differential Revision: https://reviews.llvm.org/D25298

llvm-svn: 283919
This commit is contained in:
Konstantin Zhuravlyov 2016-10-11 18:58:22 +00:00
parent e9d075233a
commit cdd4547607
5 changed files with 171 additions and 66 deletions

View File

@ -2016,53 +2016,41 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
Parser.Lex(); Parser.Lex();
int CntShift;
int CntMask;
IsaVersion IV = getIsaVersion(getSTI().getFeatureBits()); IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
if (CntName == "vmcnt") { if (CntName == "vmcnt")
CntMask = getVmcntMask(IV); IntVal = encodeVmcnt(IV, IntVal, CntVal);
CntShift = getVmcntShift(IV); else if (CntName == "expcnt")
} else if (CntName == "expcnt") { IntVal = encodeExpcnt(IV, IntVal, CntVal);
CntMask = getExpcntMask(IV); else if (CntName == "lgkmcnt")
CntShift = getExpcntShift(IV); IntVal = encodeLgkmcnt(IV, IntVal, CntVal);
} else if (CntName == "lgkmcnt") { else
CntMask = getLgkmcntMask(IV);
CntShift = getLgkmcntShift(IV);
} else {
return true; return true;
}
IntVal &= ~(CntMask << CntShift);
IntVal |= (CntVal << CntShift);
return false; return false;
} }
AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
// Disable all counters by default. IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
// vmcnt [3:0] int64_t Waitcnt = getWaitcntBitMask(IV);
// expcnt [6:4]
// lgkmcnt [11:8]
int64_t CntVal = 0xf7f;
SMLoc S = Parser.getTok().getLoc(); SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) { switch(getLexer().getKind()) {
default: return MatchOperand_ParseFail; default: return MatchOperand_ParseFail;
case AsmToken::Integer: case AsmToken::Integer:
// The operand can be an integer value. // The operand can be an integer value.
if (getParser().parseAbsoluteExpression(CntVal)) if (getParser().parseAbsoluteExpression(Waitcnt))
return MatchOperand_ParseFail; return MatchOperand_ParseFail;
break; break;
case AsmToken::Identifier: case AsmToken::Identifier:
do { do {
if (parseCnt(CntVal)) if (parseCnt(Waitcnt))
return MatchOperand_ParseFail; return MatchOperand_ParseFail;
} while(getLexer().isNot(AsmToken::EndOfStatement)); } while(getLexer().isNot(AsmToken::EndOfStatement));
break; break;
} }
Operands.push_back(AMDGPUOperand::CreateImm(this, CntVal, S)); Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
return MatchOperand_Success; return MatchOperand_Success;
} }

View File

@ -874,25 +874,24 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
IsaVersion IV = getIsaVersion(STI.getFeatureBits()); IsaVersion IV = getIsaVersion(STI.getFeatureBits());
unsigned SImm16 = MI->getOperand(OpNo).getImm(); unsigned SImm16 = MI->getOperand(OpNo).getImm();
unsigned Vmcnt = (SImm16 >> getVmcntShift(IV)) & getVmcntMask(IV); unsigned Vmcnt, Expcnt, Lgkmcnt;
unsigned Expcnt = (SImm16 >> getExpcntShift(IV)) & getExpcntMask(IV); decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt);
unsigned Lgkmcnt = (SImm16 >> getLgkmcntShift(IV)) & getLgkmcntMask(IV);
bool NeedSpace = false; bool NeedSpace = false;
if (Vmcnt != 0xF) { if (Vmcnt != getVmcntBitMask(IV)) {
O << "vmcnt(" << Vmcnt << ')'; O << "vmcnt(" << Vmcnt << ')';
NeedSpace = true; NeedSpace = true;
} }
if (Expcnt != 0x7) { if (Expcnt != getExpcntBitMask(IV)) {
if (NeedSpace) if (NeedSpace)
O << ' '; O << ' ';
O << "expcnt(" << Expcnt << ')'; O << "expcnt(" << Expcnt << ')';
NeedSpace = true; NeedSpace = true;
} }
if (Lgkmcnt != 0xF) { if (Lgkmcnt != getLgkmcntBitMask(IV)) {
if (NeedSpace) if (NeedSpace)
O << ' '; O << ' ';
O << "lgkmcnt(" << Lgkmcnt << ')'; O << "lgkmcnt(" << Lgkmcnt << ')';

View File

@ -63,12 +63,12 @@ private:
const MachineRegisterInfo *MRI; const MachineRegisterInfo *MRI;
IsaVersion IV; IsaVersion IV;
/// \brief Constant hardware limits
static const Counters WaitCounts;
/// \brief Constant zero value /// \brief Constant zero value
static const Counters ZeroCounts; static const Counters ZeroCounts;
/// \brief Hardware limits
Counters HardwareLimits;
/// \brief Counter values we have already waited on. /// \brief Counter values we have already waited on.
Counters WaitedOn; Counters WaitedOn;
@ -173,7 +173,6 @@ FunctionPass *llvm::createSIInsertWaitsPass() {
return new SIInsertWaits(); return new SIInsertWaits();
} }
const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } }; const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
static bool readsVCCZ(unsigned Opcode) { static bool readsVCCZ(unsigned Opcode) {
@ -379,7 +378,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
Ordered[2] = false; Ordered[2] = false;
// The values we are going to put into the S_WAITCNT instruction // The values we are going to put into the S_WAITCNT instruction
Counters Counts = WaitCounts; Counters Counts = HardwareLimits;
// Do we really need to wait? // Do we really need to wait?
bool NeedWait = false; bool NeedWait = false;
@ -395,7 +394,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
unsigned Value = LastIssued.Array[i] - Required.Array[i]; unsigned Value = LastIssued.Array[i] - Required.Array[i];
// Adjust the value to the real hardware possibilities. // Adjust the value to the real hardware possibilities.
Counts.Array[i] = std::min(Value, WaitCounts.Array[i]); Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]);
} else } else
Counts.Array[i] = 0; Counts.Array[i] = 0;
@ -413,9 +412,10 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
// Build the wait instruction // Build the wait instruction
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
.addImm(((Counts.Named.VM & getVmcntMask(IV)) << getVmcntShift(IV)) | .addImm(encodeWaitcnt(IV,
((Counts.Named.EXP & getExpcntMask(IV)) << getExpcntShift(IV)) | Counts.Named.VM,
((Counts.Named.LGKM & getLgkmcntMask(IV)) << getLgkmcntShift(IV))); Counts.Named.EXP,
Counts.Named.LGKM));
LastOpcodeType = OTHER; LastOpcodeType = OTHER;
LastInstWritesM0 = false; LastInstWritesM0 = false;
@ -443,9 +443,9 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
unsigned Imm = I->getOperand(0).getImm(); unsigned Imm = I->getOperand(0).getImm();
Counters Counts, WaitOn; Counters Counts, WaitOn;
Counts.Named.VM = (Imm >> getVmcntShift(IV)) & getVmcntMask(IV); Counts.Named.VM = decodeVmcnt(IV, Imm);
Counts.Named.EXP = (Imm >> getExpcntShift(IV)) & getExpcntMask(IV); Counts.Named.EXP = decodeExpcnt(IV, Imm);
Counts.Named.LGKM = (Imm >> getLgkmcntShift(IV)) & getLgkmcntMask(IV); Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
for (unsigned i = 0; i < 3; ++i) { for (unsigned i = 0; i < 3; ++i) {
if (Counts.Array[i] <= LastIssued.Array[i]) if (Counts.Array[i] <= LastIssued.Array[i])
@ -523,6 +523,10 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo(); MRI = &MF.getRegInfo();
IV = getIsaVersion(ST->getFeatureBits()); IV = getIsaVersion(ST->getFeatureBits());
HardwareLimits.Named.VM = getVmcntBitMask(IV);
HardwareLimits.Named.EXP = getExpcntBitMask(IV);
HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
WaitedOn = ZeroCounts; WaitedOn = ZeroCounts;
DelayedWaitOn = ZeroCounts; DelayedWaitOn = ZeroCounts;
LastIssued = ZeroCounts; LastIssued = ZeroCounts;

View File

@ -33,6 +33,49 @@
#undef GET_INSTRINFO_NAMED_OPS #undef GET_INSTRINFO_NAMED_OPS
#undef GET_INSTRINFO_ENUM #undef GET_INSTRINFO_ENUM
namespace {
/// \returns Bit mask for given bit \p Shift and bit \p Width.
unsigned getBitMask(unsigned Shift, unsigned Width) {
return ((1 << Width) - 1) << Shift;
}
/// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
///
/// \returns Packed \p Dst.
unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
Dst |= (Src << Shift) & getBitMask(Shift, Width);
return Dst;
}
/// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
///
/// \returns Unpacked bits.
unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
return (Src & getBitMask(Shift, Width)) >> Shift;
}
/// \returns Vmcnt bit shift.
unsigned getVmcntBitShift() { return 0; }
/// \returns Vmcnt bit width.
unsigned getVmcntBitWidth() { return 4; }
/// \returns Expcnt bit shift.
unsigned getExpcntBitShift() { return 4; }
/// \returns Expcnt bit width.
unsigned getExpcntBitWidth() { return 3; }
/// \returns Lgkmcnt bit shift.
unsigned getLgkmcntBitShift() { return 8; }
/// \returns Lgkmcnt bit width.
unsigned getLgkmcntBitWidth() { return 4; }
} // anonymous namespace
namespace llvm { namespace llvm {
namespace AMDGPU { namespace AMDGPU {
@ -161,28 +204,63 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
return Ints; return Ints;
} }
unsigned getVmcntMask(IsaVersion Version) { unsigned getWaitcntBitMask(IsaVersion Version) {
return 0xf; unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
return Vmcnt | Expcnt | Lgkmcnt;
} }
unsigned getVmcntShift(IsaVersion Version) { unsigned getVmcntBitMask(IsaVersion Version) {
return 0; return (1 << getVmcntBitWidth()) - 1;
} }
unsigned getExpcntMask(IsaVersion Version) { unsigned getExpcntBitMask(IsaVersion Version) {
return 0x7; return (1 << getExpcntBitWidth()) - 1;
} }
unsigned getExpcntShift(IsaVersion Version) { unsigned getLgkmcntBitMask(IsaVersion Version) {
return 4; return (1 << getLgkmcntBitWidth()) - 1;
} }
unsigned getLgkmcntMask(IsaVersion Version) { unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
return 0xf; return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
} }
unsigned getLgkmcntShift(IsaVersion Version) { unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
return 8; return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
}
unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
unsigned encodeWaitcnt(IsaVersion Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);;
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
return Waitcnt;
} }
unsigned getInitialPSInputAddr(const Function &F) { unsigned getInitialPSInputAddr(const Function &F) {

View File

@ -76,23 +76,59 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
std::pair<int, int> Default, std::pair<int, int> Default,
bool OnlyFirstRequired = false); bool OnlyFirstRequired = false);
/// \returns VMCNT bit mask for given isa \p Version. /// \returns Waitcnt bit mask for given isa \p Version.
unsigned getVmcntMask(IsaVersion Version); unsigned getWaitcntBitMask(IsaVersion Version);
/// \returns VMCNT bit shift for given isa \p Version. /// \returns Vmcnt bit mask for given isa \p Version.
unsigned getVmcntShift(IsaVersion Version); unsigned getVmcntBitMask(IsaVersion Version);
/// \returns EXPCNT bit mask for given isa \p Version. /// \returns Expcnt bit mask for given isa \p Version.
unsigned getExpcntMask(IsaVersion Version); unsigned getExpcntBitMask(IsaVersion Version);
/// \returns EXPCNT bit shift for given isa \p Version. /// \returns Lgkmcnt bit mask for given isa \p Version.
unsigned getExpcntShift(IsaVersion Version); unsigned getLgkmcntBitMask(IsaVersion Version);
/// \returns LGKMCNT bit mask for given isa \p Version. /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
unsigned getLgkmcntMask(IsaVersion Version); unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt);
/// \returns LGKMCNT bit shift for given isa \p Version. /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
unsigned getLgkmcntShift(IsaVersion Version); unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt);
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt);
/// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
/// \p Lgkmcnt respectively.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
/// \p Vmcnt = \p Waitcnt[3:0]
/// \p Expcnt = \p Waitcnt[6:4]
/// \p Lgkmcnt = \p Waitcnt[11:8]
void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt);
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt);
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt);
/// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
/// \p Version.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
/// Waitcnt[3:0] = \p Vmcnt
/// Waitcnt[6:4] = \p Expcnt
/// Waitcnt[11:8] = \p Lgkmcnt
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
unsigned encodeWaitcnt(IsaVersion Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
unsigned getInitialPSInputAddr(const Function &F); unsigned getInitialPSInputAddr(const Function &F);