From cdd4547607128d41b3f224b0790a761d34c000eb Mon Sep 17 00:00:00 2001 From: Konstantin Zhuravlyov Date: Tue, 11 Oct 2016 18:58:22 +0000 Subject: [PATCH] [AMDGPU] Refactor waitcnt encoding - Refactor bit packing/unpacking - Calculate bit mask given bit shift and bit width - Introduce function for decoding bits of waitcnt - Introduce function for encoding bits of waitcnt - Introduce function for getting waitcnt mask (instead of using bare numbers) - Introduce function fot getting max waitcnt(s) (instead of using bare numbers) Differential Revision: https://reviews.llvm.org/D25298 llvm-svn: 283919 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 36 +++---- .../AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp | 11 +- llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 28 ++--- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 102 +++++++++++++++--- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 60 ++++++++--- 5 files changed, 171 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 839a5559f92d..6d8a7a5d4f66 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -2016,53 +2016,41 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) Parser.Lex(); - int CntShift; - int CntMask; - IsaVersion IV = getIsaVersion(getSTI().getFeatureBits()); - if (CntName == "vmcnt") { - CntMask = getVmcntMask(IV); - CntShift = getVmcntShift(IV); - } else if (CntName == "expcnt") { - CntMask = getExpcntMask(IV); - CntShift = getExpcntShift(IV); - } else if (CntName == "lgkmcnt") { - CntMask = getLgkmcntMask(IV); - CntShift = getLgkmcntShift(IV); - } else { + if (CntName == "vmcnt") + IntVal = encodeVmcnt(IV, IntVal, CntVal); + else if (CntName == "expcnt") + IntVal = encodeExpcnt(IV, IntVal, CntVal); + else if (CntName == "lgkmcnt") + IntVal = encodeLgkmcnt(IV, IntVal, CntVal); + else return true; - } - IntVal &= ~(CntMask << CntShift); - IntVal |= (CntVal << CntShift); return false; } AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { - // Disable all counters by default. - // vmcnt [3:0] - // expcnt [6:4] - // lgkmcnt [11:8] - int64_t CntVal = 0xf7f; + IsaVersion IV = getIsaVersion(getSTI().getFeatureBits()); + int64_t Waitcnt = getWaitcntBitMask(IV); SMLoc S = Parser.getTok().getLoc(); switch(getLexer().getKind()) { default: return MatchOperand_ParseFail; case AsmToken::Integer: // The operand can be an integer value. - if (getParser().parseAbsoluteExpression(CntVal)) + if (getParser().parseAbsoluteExpression(Waitcnt)) return MatchOperand_ParseFail; break; case AsmToken::Identifier: do { - if (parseCnt(CntVal)) + if (parseCnt(Waitcnt)) return MatchOperand_ParseFail; } while(getLexer().isNot(AsmToken::EndOfStatement)); break; } - Operands.push_back(AMDGPUOperand::CreateImm(this, CntVal, S)); + Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); return MatchOperand_Success; } diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 494b86714c58..56a6a0413c8a 100644 --- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -874,25 +874,24 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, IsaVersion IV = getIsaVersion(STI.getFeatureBits()); unsigned SImm16 = MI->getOperand(OpNo).getImm(); - unsigned Vmcnt = (SImm16 >> getVmcntShift(IV)) & getVmcntMask(IV); - unsigned Expcnt = (SImm16 >> getExpcntShift(IV)) & getExpcntMask(IV); - unsigned Lgkmcnt = (SImm16 >> getLgkmcntShift(IV)) & getLgkmcntMask(IV); + unsigned Vmcnt, Expcnt, Lgkmcnt; + decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt); bool NeedSpace = false; - if (Vmcnt != 0xF) { + if (Vmcnt != getVmcntBitMask(IV)) { O << "vmcnt(" << Vmcnt << ')'; NeedSpace = true; } - if (Expcnt != 0x7) { + if (Expcnt != getExpcntBitMask(IV)) { if (NeedSpace) O << ' '; O << "expcnt(" << Expcnt << ')'; NeedSpace = true; } - if (Lgkmcnt != 0xF) { + if (Lgkmcnt != getLgkmcntBitMask(IV)) { if (NeedSpace) O << ' '; O << "lgkmcnt(" << Lgkmcnt << ')'; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index 2d9d653e7ff0..e2ae25af5612 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -63,12 +63,12 @@ private: const MachineRegisterInfo *MRI; IsaVersion IV; - /// \brief Constant hardware limits - static const Counters WaitCounts; - /// \brief Constant zero value static const Counters ZeroCounts; + /// \brief Hardware limits + Counters HardwareLimits; + /// \brief Counter values we have already waited on. Counters WaitedOn; @@ -173,7 +173,6 @@ FunctionPass *llvm::createSIInsertWaitsPass() { return new SIInsertWaits(); } -const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } }; const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } }; static bool readsVCCZ(unsigned Opcode) { @@ -379,7 +378,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, Ordered[2] = false; // The values we are going to put into the S_WAITCNT instruction - Counters Counts = WaitCounts; + Counters Counts = HardwareLimits; // Do we really need to wait? bool NeedWait = false; @@ -395,7 +394,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, unsigned Value = LastIssued.Array[i] - Required.Array[i]; // Adjust the value to the real hardware possibilities. - Counts.Array[i] = std::min(Value, WaitCounts.Array[i]); + Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]); } else Counts.Array[i] = 0; @@ -413,9 +412,10 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB, // Build the wait instruction BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) - .addImm(((Counts.Named.VM & getVmcntMask(IV)) << getVmcntShift(IV)) | - ((Counts.Named.EXP & getExpcntMask(IV)) << getExpcntShift(IV)) | - ((Counts.Named.LGKM & getLgkmcntMask(IV)) << getLgkmcntShift(IV))); + .addImm(encodeWaitcnt(IV, + Counts.Named.VM, + Counts.Named.EXP, + Counts.Named.LGKM)); LastOpcodeType = OTHER; LastInstWritesM0 = false; @@ -443,9 +443,9 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) { unsigned Imm = I->getOperand(0).getImm(); Counters Counts, WaitOn; - Counts.Named.VM = (Imm >> getVmcntShift(IV)) & getVmcntMask(IV); - Counts.Named.EXP = (Imm >> getExpcntShift(IV)) & getExpcntMask(IV); - Counts.Named.LGKM = (Imm >> getLgkmcntShift(IV)) & getLgkmcntMask(IV); + Counts.Named.VM = decodeVmcnt(IV, Imm); + Counts.Named.EXP = decodeExpcnt(IV, Imm); + Counts.Named.LGKM = decodeLgkmcnt(IV, Imm); for (unsigned i = 0; i < 3; ++i) { if (Counts.Array[i] <= LastIssued.Array[i]) @@ -523,6 +523,10 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); IV = getIsaVersion(ST->getFeatureBits()); + HardwareLimits.Named.VM = getVmcntBitMask(IV); + HardwareLimits.Named.EXP = getExpcntBitMask(IV); + HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV); + WaitedOn = ZeroCounts; DelayedWaitOn = ZeroCounts; LastIssued = ZeroCounts; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 68330d616d58..4698cb9a2be6 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -33,6 +33,49 @@ #undef GET_INSTRINFO_NAMED_OPS #undef GET_INSTRINFO_ENUM +namespace { + +/// \returns Bit mask for given bit \p Shift and bit \p Width. +unsigned getBitMask(unsigned Shift, unsigned Width) { + return ((1 << Width) - 1) << Shift; +} + +/// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. +/// +/// \returns Packed \p Dst. +unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { + Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); + Dst |= (Src << Shift) & getBitMask(Shift, Width); + return Dst; +} + +/// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. +/// +/// \returns Unpacked bits. +unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { + return (Src & getBitMask(Shift, Width)) >> Shift; +} + +/// \returns Vmcnt bit shift. +unsigned getVmcntBitShift() { return 0; } + +/// \returns Vmcnt bit width. +unsigned getVmcntBitWidth() { return 4; } + +/// \returns Expcnt bit shift. +unsigned getExpcntBitShift() { return 4; } + +/// \returns Expcnt bit width. +unsigned getExpcntBitWidth() { return 3; } + +/// \returns Lgkmcnt bit shift. +unsigned getLgkmcntBitShift() { return 8; } + +/// \returns Lgkmcnt bit width. +unsigned getLgkmcntBitWidth() { return 4; } + +} // anonymous namespace + namespace llvm { namespace AMDGPU { @@ -161,28 +204,63 @@ std::pair getIntegerPairAttribute(const Function &F, return Ints; } -unsigned getVmcntMask(IsaVersion Version) { - return 0xf; +unsigned getWaitcntBitMask(IsaVersion Version) { + unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth()); + unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); + unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); + return Vmcnt | Expcnt | Lgkmcnt; } -unsigned getVmcntShift(IsaVersion Version) { - return 0; +unsigned getVmcntBitMask(IsaVersion Version) { + return (1 << getVmcntBitWidth()) - 1; } -unsigned getExpcntMask(IsaVersion Version) { - return 0x7; +unsigned getExpcntBitMask(IsaVersion Version) { + return (1 << getExpcntBitWidth()) - 1; } -unsigned getExpcntShift(IsaVersion Version) { - return 4; +unsigned getLgkmcntBitMask(IsaVersion Version) { + return (1 << getLgkmcntBitWidth()) - 1; } -unsigned getLgkmcntMask(IsaVersion Version) { - return 0xf; +unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) { + return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); } -unsigned getLgkmcntShift(IsaVersion Version) { - return 8; +unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) { + return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); +} + +unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) { + return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); +} + +void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt, + unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { + Vmcnt = decodeVmcnt(Version, Waitcnt); + Expcnt = decodeExpcnt(Version, Waitcnt); + Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); +} + +unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) { + return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); +} + +unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) { + return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); +} + +unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) { + return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); +} + +unsigned encodeWaitcnt(IsaVersion Version, + unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { + unsigned Waitcnt = getWaitcntBitMask(Version);; + Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); + Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); + Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); + return Waitcnt; } unsigned getInitialPSInputAddr(const Function &F) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 95788766d35e..354cb304cf32 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -76,23 +76,59 @@ std::pair getIntegerPairAttribute(const Function &F, std::pair Default, bool OnlyFirstRequired = false); -/// \returns VMCNT bit mask for given isa \p Version. -unsigned getVmcntMask(IsaVersion Version); +/// \returns Waitcnt bit mask for given isa \p Version. +unsigned getWaitcntBitMask(IsaVersion Version); -/// \returns VMCNT bit shift for given isa \p Version. -unsigned getVmcntShift(IsaVersion Version); +/// \returns Vmcnt bit mask for given isa \p Version. +unsigned getVmcntBitMask(IsaVersion Version); -/// \returns EXPCNT bit mask for given isa \p Version. -unsigned getExpcntMask(IsaVersion Version); +/// \returns Expcnt bit mask for given isa \p Version. +unsigned getExpcntBitMask(IsaVersion Version); -/// \returns EXPCNT bit shift for given isa \p Version. -unsigned getExpcntShift(IsaVersion Version); +/// \returns Lgkmcnt bit mask for given isa \p Version. +unsigned getLgkmcntBitMask(IsaVersion Version); -/// \returns LGKMCNT bit mask for given isa \p Version. -unsigned getLgkmcntMask(IsaVersion Version); +/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. +unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt); -/// \returns LGKMCNT bit shift for given isa \p Version. -unsigned getLgkmcntShift(IsaVersion Version); +/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. +unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt); + +/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. +unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt); + +/// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa +/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and +/// \p Lgkmcnt respectively. +/// +/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: +/// \p Vmcnt = \p Waitcnt[3:0] +/// \p Expcnt = \p Waitcnt[6:4] +/// \p Lgkmcnt = \p Waitcnt[11:8] +void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt, + unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); + +/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. +unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt); + +/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. +unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt); + +/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. +unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt); + +/// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa +/// \p Version. +/// +/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: +/// Waitcnt[3:0] = \p Vmcnt +/// Waitcnt[6:4] = \p Expcnt +/// Waitcnt[11:8] = \p Lgkmcnt +/// +/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given +/// isa \p Version. +unsigned encodeWaitcnt(IsaVersion Version, + unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); unsigned getInitialPSInputAddr(const Function &F);