diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index ac77d282fe93..1885b5958e8d 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -12,6 +12,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIDefines.h" +#include "SIInstrInfo.h" #include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" @@ -128,6 +129,7 @@ public: enum ImmTy { ImmTyNone, ImmTyGDS, + ImmTyLDS, ImmTyOffen, ImmTyIdxen, ImmTyAddr64, @@ -303,6 +305,7 @@ public: bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } bool isGDS() const { return isImmTy(ImmTyGDS); } + bool isLDS() const { return isImmTy(ImmTyLDS); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } bool isTFE() const { return isImmTy(ImmTyTFE); } @@ -649,6 +652,7 @@ public: switch (Type) { case ImmTyNone: OS << "None"; break; case ImmTyGDS: OS << "GDS"; break; + case ImmTyLDS: OS << "LDS"; break; case ImmTyOffen: OS << "Offen"; break; case ImmTyIdxen: OS << "Idxen"; break; case ImmTyAddr64: OS << "Addr64"; break; @@ -4078,6 +4082,7 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const { void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn) { + bool HasLdsModifier = false; OptionalImmIndexMap OptionalIdx; assert(IsAtomicReturn ? IsAtomic : true); @@ -4096,6 +4101,8 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, continue; } + HasLdsModifier = Op.isLDS(); + // Handle tokens like 'offen' which are sometimes hard-coded into the // asm string. There are no MCInst operands for these. if (Op.isToken()) { @@ -4107,6 +4114,20 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, OptionalIdx[Op.getImmTy()] = i; } + // This is a workaround for an llvm quirk which may result in an + // incorrect instruction selection. Lds and non-lds versions of + // MUBUF instructions are identical except that lds versions + // have mandatory 'lds' modifier. However this modifier follows + // optional modifiers and llvm asm matcher regards this 'lds' + // modifier as an optional one. As a result, an lds version + // of opcode may be selected even if it has no 'lds' modifier. + if (!HasLdsModifier) { + int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); + if (NoLdsOpcode != -1) { // Got lds version - correct it. + Inst.setOpcode(NoLdsOpcode); + } + } + // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. if (IsAtomicReturn) { MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. @@ -4118,7 +4139,10 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); + + if (!HasLdsModifier) { // tfe is not legal with lds opcodes + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); + } } void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { @@ -4312,6 +4336,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, + {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, {"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr}, @@ -5022,6 +5047,8 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; case MCK_gds: return Operand.isGDS() ? Match_Success : Match_InvalidOperand; + case MCK_lds: + return Operand.isLDS() ? Match_Success : Match_InvalidOperand; case MCK_glc: return Operand.isGLC() ? Match_Success : Match_InvalidOperand; case MCK_d16: diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index a430ffd4615f..ebabccadbe55 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -57,6 +57,11 @@ class MUBUFAddr64Table { string OpName = NAME # suffix; } +class MUBUFLdsTable { + bit IsLds = is_lds; + string OpName = NAME # suffix; +} + class MTBUFAddr64Table { bit IsAddr64 = is_addr64; string OpName = NAME # suffix; @@ -310,6 +315,7 @@ class MUBUF_Pseudo offen = 0; bits<1> idxen = 0; bits<1> addr64 = 0; + bits<1> lds = 0; bits<1> has_vdata = 1; bits<1> has_vaddr = 1; bits<1> has_glc = 1; @@ -336,7 +342,6 @@ class MUBUF_Real op, MUBUF_Pseudo ps> : bits<12> offset; bits<1> glc; - bits<1> lds = 0; bits<8> vaddr; bits<8> vdata; bits<7> srsrc; @@ -371,31 +376,35 @@ class MUBUF_Invalidate : } class getMUBUFInsDA vdataList, - list vaddrList=[]> { + list vaddrList=[], + bit isLds = 0> { RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList)); RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, GLC:$glc, slc:$slc, tfe:$tfe), + offset:$offset, GLC:$glc, slc:$slc), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, GLC:$glc, slc:$slc, tfe:$tfe) + offset:$offset, GLC:$glc, slc:$slc) ); dag InsData = !if(!empty(vaddrList), (ins vdataClass:$vdata, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, GLC:$glc, slc:$slc, tfe:$tfe), + SCSrc_b32:$soffset, offset:$offset, GLC:$glc, slc:$slc), (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, GLC:$glc, slc:$slc, tfe:$tfe) + SCSrc_b32:$soffset, offset:$offset, GLC:$glc, slc:$slc) ); - dag ret = !if(!empty(vdataList), InsNoData, InsData); + dag ret = !con( + !if(!empty(vdataList), InsNoData, InsData), + !if(isLds, (ins), (ins tfe:$tfe)) + ); } -class getMUBUFIns vdataList=[]> { +class getMUBUFIns vdataList=[], bit isLds = 0> { dag ret = - !if(!eq(addrKind, BUFAddrKind.Offset), getMUBUFInsDA.ret, - !if(!eq(addrKind, BUFAddrKind.OffEn), getMUBUFInsDA.ret, - !if(!eq(addrKind, BUFAddrKind.IdxEn), getMUBUFInsDA.ret, - !if(!eq(addrKind, BUFAddrKind.BothEn), getMUBUFInsDA.ret, - !if(!eq(addrKind, BUFAddrKind.Addr64), getMUBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.Offset), getMUBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.OffEn), getMUBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.IdxEn), getMUBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.BothEn), getMUBUFInsDA.ret, + !if(!eq(addrKind, BUFAddrKind.Addr64), getMUBUFInsDA.ret, (ins)))))); } @@ -426,20 +435,28 @@ class MUBUF_Load_Pseudo pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind> : MUBUF_Pseudo.ret, !if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))), - " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe", + !con(getMUBUFIns.ret, + !if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))), + " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc" # + !if(isLds, " lds", "$tfe"), pattern>, MUBUF_SetupAddr { - let PseudoInstr = opName # "_" # getAddrName.ret; + let PseudoInstr = opName # !if(isLds, "_lds", "") # + "_" # getAddrName.ret; + let Constraints = !if(HasTiedDest, "$vdata = $vdata_in", ""); let mayLoad = 1; let mayStore = 0; let maybeAtomic = 1; + let Uses = !if(isLds, [EXEC, M0], [EXEC]); + let has_tfe = !if(isLds, 0, 1); + let lds = isLds; } // FIXME: tfe can't be an operand because it requires a separate @@ -447,32 +464,45 @@ class MUBUF_Load_Pseudo { + bit TiedDest = 0, + bit isLds = 0> { def _OFFSET : MUBUF_Load_Pseudo , - MUBUFAddr64Table<0>; + TiedDest, isLds, + !if(isLds, + [], + [(set load_vt:$vdata, + (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>, + MUBUFAddr64Table<0, !if(isLds, "_LDS", "")>; def _ADDR64 : MUBUF_Load_Pseudo , - MUBUFAddr64Table<1>; + TiedDest, isLds, + !if(isLds, + [], + [(set load_vt:$vdata, + (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>, + MUBUFAddr64Table<1, !if(isLds, "_LDS", "")>; - def _OFFEN : MUBUF_Load_Pseudo ; - def _IDXEN : MUBUF_Load_Pseudo ; - def _BOTHEN : MUBUF_Load_Pseudo ; + def _OFFEN : MUBUF_Load_Pseudo ; + def _IDXEN : MUBUF_Load_Pseudo ; + def _BOTHEN : MUBUF_Load_Pseudo ; let DisableWQM = 1 in { - def _OFFSET_exact : MUBUF_Load_Pseudo ; - def _OFFEN_exact : MUBUF_Load_Pseudo ; - def _IDXEN_exact : MUBUF_Load_Pseudo ; - def _BOTHEN_exact : MUBUF_Load_Pseudo ; + def _OFFSET_exact : MUBUF_Load_Pseudo ; + def _OFFEN_exact : MUBUF_Load_Pseudo ; + def _IDXEN_exact : MUBUF_Load_Pseudo ; + def _BOTHEN_exact : MUBUF_Load_Pseudo ; } } +multiclass MUBUF_Pseudo_Loads_Lds { + defm NAME : MUBUF_Pseudo_Loads; + defm _LDS : MUBUF_Pseudo_Loads; +} + class MUBUF_Store_Pseudo ; defm BUFFER_LOAD_FORMAT_XY : MUBUF_Pseudo_Loads < @@ -726,19 +756,19 @@ let SubtargetPredicate = HasPackedD16VMem, D16 = 1 in { >; } // End HasPackedD16VMem. -defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads < +defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads_Lds < "buffer_load_ubyte", VGPR_32, i32, mubuf_az_extloadi8 >; -defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads < +defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads_Lds < "buffer_load_sbyte", VGPR_32, i32, mubuf_sextloadi8 >; -defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads < +defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads_Lds < "buffer_load_ushort", VGPR_32, i32, mubuf_az_extloadi16 >; -defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads < +defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads_Lds < "buffer_load_sshort", VGPR_32, i32, mubuf_sextloadi16 >; -defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads < +defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds < "buffer_load_dword", VGPR_32, i32, mubuf_load >; defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads < @@ -1582,7 +1612,7 @@ class MUBUF_Real_si op, MUBUF_Pseudo ps> : let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); let Inst{15} = ps.addr64; - let Inst{16} = lds; + let Inst{16} = !if(ps.lds, 1, 0); let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); @@ -1601,6 +1631,31 @@ multiclass MUBUF_Real_AllAddr_si op> { def _BOTHEN_si : MUBUF_Real_si (NAME#"_BOTHEN")>; } +multiclass MUBUF_Real_AllAddr_Lds_si op> { + + def _OFFSET_si : MUBUF_Real_si (NAME#"_OFFSET")>, + MUBUFLdsTable<0, "_OFFSET_si">; + def _ADDR64_si : MUBUF_Real_si (NAME#"_ADDR64")>, + MUBUFLdsTable<0, "_ADDR64_si">; + def _OFFEN_si : MUBUF_Real_si (NAME#"_OFFEN")>, + MUBUFLdsTable<0, "_OFFEN_si">; + def _IDXEN_si : MUBUF_Real_si (NAME#"_IDXEN")>, + MUBUFLdsTable<0, "_IDXEN_si">; + def _BOTHEN_si : MUBUF_Real_si (NAME#"_BOTHEN")>, + MUBUFLdsTable<0, "_BOTHEN_si">; + + def _LDS_OFFSET_si : MUBUF_Real_si (NAME#"_LDS_OFFSET")>, + MUBUFLdsTable<1, "_OFFSET_si">; + def _LDS_ADDR64_si : MUBUF_Real_si (NAME#"_LDS_ADDR64")>, + MUBUFLdsTable<1, "_ADDR64_si">; + def _LDS_OFFEN_si : MUBUF_Real_si (NAME#"_LDS_OFFEN")>, + MUBUFLdsTable<1, "_OFFEN_si">; + def _LDS_IDXEN_si : MUBUF_Real_si (NAME#"_LDS_IDXEN")>, + MUBUFLdsTable<1, "_IDXEN_si">; + def _LDS_BOTHEN_si : MUBUF_Real_si (NAME#"_LDS_BOTHEN")>, + MUBUFLdsTable<1, "_BOTHEN_si">; +} + multiclass MUBUF_Real_Atomic_si op> : MUBUF_Real_AllAddr_si { def _OFFSET_RTN_si : MUBUF_Real_si (NAME#"_OFFSET_RTN")>; def _ADDR64_RTN_si : MUBUF_Real_si (NAME#"_ADDR64_RTN")>; @@ -1609,7 +1664,7 @@ multiclass MUBUF_Real_Atomic_si op> : MUBUF_Real_AllAddr_si { def _BOTHEN_RTN_si : MUBUF_Real_si (NAME#"_BOTHEN_RTN")>; } -defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_si <0x00>; +defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_si <0x00>; defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_si <0x01>; defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x02>; defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x03>; @@ -1617,11 +1672,11 @@ defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_si <0x04>; defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_si <0x05>; defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x06>; defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x07>; -defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_si <0x08>; -defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_si <0x09>; -defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_si <0x0a>; -defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_si <0x0b>; -defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_si <0x0c>; +defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_si <0x08>; +defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_si <0x09>; +defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_si <0x0a>; +defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_si <0x0b>; +defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_si <0x0c>; defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_si <0x0d>; defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_si <0x0e>; defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_si <0x0f>; @@ -1741,7 +1796,7 @@ class MUBUF_Real_vi op, MUBUF_Pseudo ps> : let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); - let Inst{16} = lds; + let Inst{16} = !if(ps.lds, 1, 0); let Inst{17} = !if(ps.has_slc, slc, ?); let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding @@ -1759,6 +1814,27 @@ multiclass MUBUF_Real_AllAddr_vi op> { def _BOTHEN_vi : MUBUF_Real_vi (NAME#"_BOTHEN")>; } +multiclass MUBUF_Real_AllAddr_Lds_vi op> { + + def _OFFSET_vi : MUBUF_Real_vi (NAME#"_OFFSET")>, + MUBUFLdsTable<0, "_OFFSET_vi">; + def _OFFEN_vi : MUBUF_Real_vi (NAME#"_OFFEN")>, + MUBUFLdsTable<0, "_OFFEN_vi">; + def _IDXEN_vi : MUBUF_Real_vi (NAME#"_IDXEN")>, + MUBUFLdsTable<0, "_IDXEN_vi">; + def _BOTHEN_vi : MUBUF_Real_vi (NAME#"_BOTHEN")>, + MUBUFLdsTable<0, "_BOTHEN_vi">; + + def _LDS_OFFSET_vi : MUBUF_Real_vi (NAME#"_LDS_OFFSET")>, + MUBUFLdsTable<1, "_OFFSET_vi">; + def _LDS_OFFEN_vi : MUBUF_Real_vi (NAME#"_LDS_OFFEN")>, + MUBUFLdsTable<1, "_OFFEN_vi">; + def _LDS_IDXEN_vi : MUBUF_Real_vi (NAME#"_LDS_IDXEN")>, + MUBUFLdsTable<1, "_IDXEN_vi">; + def _LDS_BOTHEN_vi : MUBUF_Real_vi (NAME#"_LDS_BOTHEN")>, + MUBUFLdsTable<1, "_BOTHEN_vi">; +} + class MUBUF_Real_gfx80 op, MUBUF_Pseudo ps> : MUBUF_Real, Enc64, @@ -1770,7 +1846,7 @@ class MUBUF_Real_gfx80 op, MUBUF_Pseudo ps> : let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); - let Inst{16} = lds; + let Inst{16} = !if(ps.lds, 1, 0); let Inst{17} = !if(ps.has_slc, slc, ?); let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding @@ -1796,7 +1872,7 @@ multiclass MUBUF_Real_Atomic_vi op> : def _BOTHEN_RTN_vi : MUBUF_Real_vi (NAME#"_BOTHEN_RTN")>; } -defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_vi <0x00>; +defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_vi <0x00>; defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_vi <0x01>; defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_vi <0x02>; defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_vi <0x03>; @@ -1824,11 +1900,11 @@ let SubtargetPredicate = HasPackedD16VMem in { defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_vi <0x0e>; defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_vi <0x0f>; } // End HasPackedD16VMem. -defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_vi <0x10>; -defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_vi <0x11>; -defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_vi <0x12>; -defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_vi <0x13>; -defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_vi <0x14>; +defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_vi <0x10>; +defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_vi <0x11>; +defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_vi <0x12>; +defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_vi <0x13>; +defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_vi <0x14>; defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_vi <0x15>; defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_vi <0x16>; defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_vi <0x17>; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 0ae5503fcfda..61639f5cf649 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -915,6 +915,9 @@ namespace AMDGPU { LLVM_READONLY int getAddr64Inst(uint16_t Opcode); + LLVM_READONLY + int getMUBUFNoLdsInst(uint16_t Opcode); + LLVM_READONLY int getAtomicRetOp(uint16_t Opcode); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 6d889e4ad194..10f5c3bae3ff 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2109,6 +2109,14 @@ def getAddr64Inst : InstrMapping { let ValueCols = [["1"]]; } +def getMUBUFNoLdsInst : InstrMapping { + let FilterClass = "MUBUFLdsTable"; + let RowFields = ["OpName"]; + let ColFields = ["IsLds"]; + let KeyCol = ["1"]; + let ValueCols = [["0"]]; +} + // Maps an atomic opcode to its version with a return value. def getAtomicRetOp : InstrMapping { let FilterClass = "AtomicNoRet"; diff --git a/llvm/test/MC/AMDGPU/mubuf.s b/llvm/test/MC/AMDGPU/mubuf.s index 03cef9dbb7fd..b310075798c3 100644 --- a/llvm/test/MC/AMDGPU/mubuf.s +++ b/llvm/test/MC/AMDGPU/mubuf.s @@ -2,9 +2,9 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOCI %s -// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI -check-prefix=NOSICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOCI -check-prefix=NOSICIVI %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI -check-prefix=NOSICIVI %s //===----------------------------------------------------------------------===// // Test for different operand combinations @@ -710,3 +710,69 @@ buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc slc // SICI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc slc ; encoding: [0x04,0x70,0xf0,0xe0,0x02,0x01,0x42,0xb8] // VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc slc ; encoding: [0x04,0x70,0x2e,0xe1,0x02,0x01,0x02,0xb8] + +//===----------------------------------------------------------------------===// +// Lds support +//===----------------------------------------------------------------------===// + +buffer_load_sbyte v5, off, s[8:11], s3 lds +// SICI: buffer_load_sbyte v5, off, s[8:11], s3 lds ; encoding: [0x00,0x00,0x25,0xe0,0x00,0x05,0x02,0x03] +// VI: buffer_load_sbyte v5, off, s[8:11], s3 lds ; encoding: [0x00,0x00,0x45,0xe0,0x00,0x05,0x02,0x03] + +buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds +// SICI: buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds ; encoding: [0x00,0x40,0x25,0xe0,0x00,0x05,0x42,0x03] +// VI: buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds ; encoding: [0x00,0x40,0x47,0xe0,0x00,0x05,0x02,0x03] + +buffer_load_sbyte v5, off, s[8:11], s3 offset:4095 glc slc lds +// SICI: buffer_load_sbyte v5, off, s[8:11], s3 offset:4095 glc slc lds ; encoding: [0xff,0x4f,0x25,0xe0,0x00,0x05,0x42,0x03] +// VI: buffer_load_sbyte v5, off, s[8:11], s3 offset:4095 glc slc lds ; encoding: [0xff,0x4f,0x47,0xe0,0x00,0x05,0x02,0x03] + +buffer_load_sbyte v5, v0, s[8:11], s3 offen offset:4095 slc lds +// SICI: buffer_load_sbyte v5, v0, s[8:11], s3 offen offset:4095 slc lds ; encoding: [0xff,0x1f,0x25,0xe0,0x00,0x05,0x42,0x03] +// VI: buffer_load_sbyte v5, v0, s[8:11], s3 offen offset:4095 slc lds ; encoding: [0xff,0x1f,0x47,0xe0,0x00,0x05,0x02,0x03] + +buffer_load_sbyte v5, v0, s[8:11], s3 offen lds +// SICI: buffer_load_sbyte v5, v0, s[8:11], s3 offen lds ; encoding: [0x00,0x10,0x25,0xe0,0x00,0x05,0x02,0x03] +// VI: buffer_load_sbyte v5, v0, s[8:11], s3 offen lds ; encoding: [0x00,0x10,0x45,0xe0,0x00,0x05,0x02,0x03] + +buffer_load_sbyte v5, v0, s[8:11], s3 idxen glc slc lds +// SICI: buffer_load_sbyte v5, v0, s[8:11], s3 idxen glc slc lds ; encoding: [0x00,0x60,0x25,0xe0,0x00,0x05,0x42,0x03] +// VI: buffer_load_sbyte v5, v0, s[8:11], s3 idxen glc slc lds ; encoding: [0x00,0x60,0x47,0xe0,0x00,0x05,0x02,0x03] + +buffer_load_sbyte v5, v[0:1], s[8:11], s3 idxen offen offset:4095 lds +// SICI: buffer_load_sbyte v5, v[0:1], s[8:11], s3 idxen offen offset:4095 lds ; encoding: [0xff,0x3f,0x25,0xe0,0x00,0x05,0x02,0x03] +// VI: buffer_load_sbyte v5, v[0:1], s[8:11], s3 idxen offen offset:4095 lds ; encoding: [0xff,0x3f,0x45,0xe0,0x00,0x05,0x02,0x03] + +buffer_load_sbyte v5, v[0:1], s[8:11], s3 idxen offen offset:4095 glc slc lds +// SICI: buffer_load_sbyte v5, v[0:1], s[8:11], s3 idxen offen offset:4095 glc slc lds ; encoding: [0xff,0x7f,0x25,0xe0,0x00,0x05,0x42,0x03] +// VI: buffer_load_sbyte v5, v[0:1], s[8:11], s3 idxen offen offset:4095 glc slc lds ; encoding: [0xff,0x7f,0x47,0xe0,0x00,0x05,0x02,0x03] + +buffer_load_ubyte v5, off, s[8:11], s3 offset:4095 lds +// SICI: buffer_load_ubyte v5, off, s[8:11], s3 offset:4095 lds ; encoding: [0xff,0x0f,0x21,0xe0,0x00,0x05,0x02,0x03] +// VI: buffer_load_ubyte v5, off, s[8:11], s3 offset:4095 lds ; encoding: [0xff,0x0f,0x41,0xe0,0x00,0x05,0x02,0x03] + +buffer_load_sshort v5, v0, s[8:11], s3 offen offset:4095 glc slc lds +// SICI: buffer_load_sshort v5, v0, s[8:11], s3 offen offset:4095 glc slc lds ; encoding: [0xff,0x5f,0x2d,0xe0,0x00,0x05,0x42,0x03] +// VI: buffer_load_sshort v5, v0, s[8:11], s3 offen offset:4095 glc slc lds ; encoding: [0xff,0x5f,0x4f,0xe0,0x00,0x05,0x02,0x03] + +buffer_load_ushort v5, v0, s[8:11], s3 idxen offset:4095 glc slc lds +// SICI: buffer_load_ushort v5, v0, s[8:11], s3 idxen offset:4095 glc slc lds ; encoding: [0xff,0x6f,0x29,0xe0,0x00,0x05,0x42,0x03] +// VI: buffer_load_ushort v5, v0, s[8:11], s3 idxen offset:4095 glc slc lds ; encoding: [0xff,0x6f,0x4b,0xe0,0x00,0x05,0x02,0x03] + +buffer_load_dword v5, v0, s[8:11], s101 offen lds +// SICI: buffer_load_dword v5, v0, s[8:11], s101 offen lds ; encoding: [0x00,0x10,0x31,0xe0,0x00,0x05,0x02,0x65] +// VI: buffer_load_dword v5, v0, s[8:11], s101 offen lds ; encoding: [0x00,0x10,0x51,0xe0,0x00,0x05,0x02,0x65] + +buffer_load_format_x v5, v[0:1], s[8:11], s3 idxen offen offset:4095 glc slc lds +// SICI: buffer_load_format_x v5, v[0:1], s[8:11], s3 idxen offen offset:4095 glc slc lds ; encoding: [0xff,0x7f,0x01,0xe0,0x00,0x05,0x42,0x03] +// VI: buffer_load_format_x v5, v[0:1], s[8:11], s3 idxen offen offset:4095 glc slc lds ; encoding: [0xff,0x7f,0x03,0xe0,0x00,0x05,0x02,0x03] + +//===----------------------------------------------------------------------===// +// Errors handling +//===----------------------------------------------------------------------===// + +buffer_load_sbyte v5, off, s[8:11], s3 lds tfe +// NOSICIVI: error: invalid operand for instruction + +buffer_load_dword v5, off, s[8:11], s3 tfe lds +// NOSICIVI: error: invalid operand for instruction diff --git a/llvm/test/MC/Disassembler/AMDGPU/mubuf_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/mubuf_vi.txt index 04ff0ef61b2b..ef245ce2ac99 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/mubuf_vi.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/mubuf_vi.txt @@ -359,3 +359,46 @@ # VI: buffer_atomic_inc v1, v[2:3], s[8:11], 56 idxen offen offset:4 glc slc ; encoding: [0x04,0x70,0x2e,0xe1,0x02,0x01,0x02,0xb8] 0x04 0x70 0x2e 0xe1 0x02 0x01 0x02 0xb8 + +#===------------------------------------------------------------------------===# +# Lds support +#===------------------------------------------------------------------------===# + +# VI: buffer_load_sbyte v5, off, s[8:11], s3 lds ; encoding: [0x00,0x00,0x45,0xe0,0x00,0x05,0x02,0x03] +0x00,0x00,0x45,0xe0,0x00,0x05,0x02,0x03 + +# VI: buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds ; encoding: [0x00,0x40,0x47,0xe0,0x00,0x05,0x02,0x03] +0x00,0x40,0x47,0xe0,0x00,0x05,0x02,0x03 + +# VI: buffer_load_sbyte v5, off, s[8:11], s3 offset:4095 glc slc lds ; encoding: [0xff,0x4f,0x47,0xe0,0x00,0x05,0x02,0x03] +0xff,0x4f,0x47,0xe0,0x00,0x05,0x02,0x03 + +# VI: buffer_load_sbyte v5, v0, s[8:11], s3 offen offset:4095 slc lds ; encoding: [0xff,0x1f,0x47,0xe0,0x00,0x05,0x02,0x03] +0xff,0x1f,0x47,0xe0,0x00,0x05,0x02,0x03 + +# VI: buffer_load_sbyte v5, v0, s[8:11], s3 offen lds ; encoding: [0x00,0x10,0x45,0xe0,0x00,0x05,0x02,0x03] +0x00,0x10,0x45,0xe0,0x00,0x05,0x02,0x03 + +# VI: buffer_load_sbyte v5, v0, s[8:11], s3 idxen glc slc lds ; encoding: [0x00,0x60,0x47,0xe0,0x00,0x05,0x02,0x03] +0x00,0x60,0x47,0xe0,0x00,0x05,0x02,0x03 + +# VI: buffer_load_sbyte v5, v[0:1], s[8:11], s3 idxen offen offset:4095 lds ; encoding: [0xff,0x3f,0x45,0xe0,0x00,0x05,0x02,0x03] +0xff,0x3f,0x45,0xe0,0x00,0x05,0x02,0x03 + +# VI: buffer_load_sbyte v5, v[0:1], s[8:11], s3 idxen offen offset:4095 glc slc lds ; encoding: [0xff,0x7f,0x47,0xe0,0x00,0x05,0x02,0x03] +0xff,0x7f,0x47,0xe0,0x00,0x05,0x02,0x03 + +# VI: buffer_load_ubyte v5, off, s[8:11], s3 offset:4095 lds ; encoding: [0xff,0x0f,0x41,0xe0,0x00,0x05,0x02,0x03] +0xff,0x0f,0x41,0xe0,0x00,0x05,0x02,0x03 + +# VI: buffer_load_sshort v5, v0, s[8:11], s3 offen offset:4095 glc slc lds ; encoding: [0xff,0x5f,0x4f,0xe0,0x00,0x05,0x02,0x03] +0xff,0x5f,0x4f,0xe0,0x00,0x05,0x02,0x03 + +# VI: buffer_load_ushort v5, v0, s[8:11], s3 idxen offset:4095 glc slc lds ; encoding: [0xff,0x6f,0x4b,0xe0,0x00,0x05,0x02,0x03] +0xff,0x6f,0x4b,0xe0,0x00,0x05,0x02,0x03 + +# VI: buffer_load_dword v5, v0, s[8:11], s101 offen lds ; encoding: [0x00,0x10,0x51,0xe0,0x00,0x05,0x02,0x65] +0x00,0x10,0x51,0xe0,0x00,0x05,0x02,0x65 + +# VI: buffer_load_format_x v5, v[0:1], s[8:11], s3 idxen offen offset:4095 glc slc lds ; encoding: [0xff,0x7f,0x03,0xe0,0x00,0x05,0x02,0x03] +0xff,0x7f,0x03,0xe0,0x00,0x05,0x02,0x03