diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 6ea9367f2702..3697d5aec647 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -265,11 +265,20 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { } DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { + int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::vdst); + int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dmask); + + assert(VDataIdx != -1); + assert(DMaskIdx != -1); + + bool isAtomic = (VDstIdx != -1); + unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; if (DMask == 0) return MCDisassembler::Success; @@ -278,12 +287,26 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { if (ChannelCount == 1) return MCDisassembler::Success; - int NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount); - assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); + int NewOpcode = -1; + + if (isAtomic) { + if (DMask == 0x1 || DMask == 0x3 || DMask == 0xF) { + NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), ChannelCount); + } + if (NewOpcode == -1) return MCDisassembler::Success; + } else { + NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount); + assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); + } + auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; - // Widen the register to the correct number of enabled channels. + // Get first subregister of VData unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); + unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); + Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; + + // Widen the register to the correct number of enabled channels. auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &MRI.getRegClass(RCID)); if (NewVdata == AMDGPU::NoRegister) { @@ -297,6 +320,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { // how it is usually emitted because the number of register components is not // in the instruction encoding. MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); + + if (isAtomic) { + // Atomic operations have an additional operand (a copy of data) + MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); + } + return MCDisassembler::Success; } diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index c49691c43427..d31d33ab4de9 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -12,6 +12,11 @@ class MIMG_Mask { int Channels = channels; } +class MIMG_Atomic_Size { + string Op = op; + int AtomicSize = !if(is32Bit, 1, 2); +} + class mimg si, bits<7> vi = si> { field bits<7> SI = si; field bits<7> VI = vi; @@ -173,9 +178,13 @@ class MIMG_Atomic_Real_vi { let isPseudo = 1, isCodeGenOnly = 1 in { def "" : MIMG_Atomic_Helper, @@ -183,18 +192,35 @@ multiclass MIMG_Atomic_Helper_m ; + def _si : MIMG_Atomic_Real_si, + MIMG_Atomic_Size; - def _vi : MIMG_Atomic_Real_vi; + def _vi : MIMG_Atomic_Real_vi, + MIMG_Atomic_Size; } } -multiclass MIMG_Atomic { +multiclass MIMG_Atomic_Addr_Helper_m { // _V* variants have different address size, but the size is not encoded. // So only one variant can be disassembled. V1 looks the safest to decode. - defm _V1 : MIMG_Atomic_Helper_m ; - defm _V2 : MIMG_Atomic_Helper_m ; - defm _V4 : MIMG_Atomic_Helper_m ; + defm _V1 : MIMG_Atomic_Helper_m ; + defm _V2 : MIMG_Atomic_Helper_m ; + defm _V4 : MIMG_Atomic_Helper_m ; +} + +multiclass MIMG_Atomic { // 64-bit atomics + // _V* variants have different dst size, but the size is encoded implicitly, + // using dmask and tfe. Only 32-bit variant is registered with disassembler. + // Other variants are reconstructed by disassembler using dmask and tfe. + defm _V1 : MIMG_Atomic_Addr_Helper_m ; + defm _V2 : MIMG_Atomic_Addr_Helper_m ; } class MIMG_Sampler_Helper op, string asm, @@ -344,7 +370,7 @@ defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">; } defm IMAGE_ATOMIC_SWAP : MIMG_Atomic , "image_atomic_swap">; -defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic , "image_atomic_cmpswap", VReg_64>; +defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic , "image_atomic_cmpswap", VReg_64, VReg_128>; defm IMAGE_ATOMIC_ADD : MIMG_Atomic , "image_atomic_add">; defm IMAGE_ATOMIC_SUB : MIMG_Atomic , "image_atomic_sub">; //def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>; -- not on VI @@ -590,9 +616,9 @@ class ImageAtomicPattern : GC // ImageAtomic patterns. multiclass ImageAtomicPatterns { - def : ImageAtomicPattern(opcode # _V1), i32>; - def : ImageAtomicPattern(opcode # _V2), v2i32>; - def : ImageAtomicPattern(opcode # _V4), v4i32>; + def : ImageAtomicPattern(opcode # _V1_V1), i32>; + def : ImageAtomicPattern(opcode # _V1_V2), v2i32>; + def : ImageAtomicPattern(opcode # _V1_V4), v4i32>; } // ImageAtomicCmpSwap for amdgcn. @@ -784,9 +810,9 @@ defm : ImageSamplePatterns; // Image atomics defm : ImageAtomicPatterns; -def : ImageAtomicCmpSwapPattern; -def : ImageAtomicCmpSwapPattern; -def : ImageAtomicCmpSwapPattern; +def : ImageAtomicCmpSwapPattern; +def : ImageAtomicCmpSwapPattern; +def : ImageAtomicCmpSwapPattern; defm : ImageAtomicPatterns; defm : ImageAtomicPatterns; defm : ImageAtomicPatterns; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index f4516988b198..993aaa26dafe 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2040,6 +2040,22 @@ def getMaskedMIMGOp4 : InstrMapping { let ValueCols = [["1"], ["2"], ["3"] ]; } +def getMIMGAtomicOp1 : InstrMapping { + let FilterClass = "MIMG_Atomic_Size"; + let RowFields = ["Op"]; + let ColFields = ["AtomicSize"]; + let KeyCol = ["1"]; + let ValueCols = [["2"]]; +} + +def getMIMGAtomicOp2 : InstrMapping { + let FilterClass = "MIMG_Atomic_Size"; + let RowFields = ["Op"]; + let ColFields = ["AtomicSize"]; + let KeyCol = ["2"]; + let ValueCols = [["1"]]; +} + // Maps an commuted opcode to its original version def getCommuteOrig : InstrMapping { let FilterClass = "Commutable_REV"; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 0deb66b6452f..6b4253471fc2 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -156,6 +156,28 @@ int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) } } +int getMaskedMIMGAtomicOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) { + assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst) != -1); + assert(NewChannels == 1 || NewChannels == 2 || NewChannels == 4); + + unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass); + assert(OrigChannels == 1 || OrigChannels == 2 || OrigChannels == 4); + + if (NewChannels == OrigChannels) return Opc; + + if (OrigChannels <= 2 && NewChannels <= 2) { + // This is an ordinary atomic (not an atomic_cmpswap) + return (OrigChannels == 1)? + AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc); + } else if (OrigChannels >= 2 && NewChannels >= 2) { + // This is an atomic_cmpswap + return (OrigChannels == 2)? + AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc); + } else { // invalid OrigChannels/NewChannels value + return -1; + } +} + // Wrapper for Tablegen'd function. enum Subtarget is not defined in any // header files, so we need to wrap it in a function that takes unsigned // instead. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 0c1d69765942..7745af1c45cb 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -159,6 +159,11 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); LLVM_READONLY int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels); + +LLVM_READONLY +int getMaskedMIMGAtomicOp(const MCInstrInfo &MII, + unsigned Opc, unsigned NewChannels); + LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); diff --git a/llvm/test/MC/AMDGPU/mimg.s b/llvm/test/MC/AMDGPU/mimg.s index 96d9a7d3a898..6e4f3d4baf58 100644 --- a/llvm/test/MC/AMDGPU/mimg.s +++ b/llvm/test/MC/AMDGPU/mimg.s @@ -30,25 +30,21 @@ image_atomic_add v4, v[192:195], s[28:35] dmask:0x1 unorm glc // SICI: image_atomic_add v4, v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x44,0xf0,0xc0,0x04,0x07,0x00] // VI: image_atomic_add v4, v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0xc0,0x04,0x07,0x00] -image_atomic_add v5, v1, s[8:15] -// SICI: image_atomic_add v5, v1, s[8:15] ; encoding: [0x00,0x00,0x44,0xf0,0x01,0x05,0x02,0x00] -// VI: image_atomic_add v5, v1, s[8:15] ; encoding: [0x00,0x00,0x48,0xf0,0x01,0x05,0x02,0x00] +image_atomic_add v252, v2, s[8:15] dmask:0x1 unorm +// SICI: image_atomic_add v252, v2, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x44,0xf0,0x02,0xfc,0x02,0x00] +// VI: image_atomic_add v252, v2, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x48,0xf0,0x02,0xfc,0x02,0x00] -image_atomic_add v252, v2, s[8:15] unorm -// SICI: image_atomic_add v252, v2, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x02,0xfc,0x02,0x00] -// VI: image_atomic_add v252, v2, s[8:15] unorm ; encoding: [0x00,0x10,0x48,0xf0,0x02,0xfc,0x02,0x00] +image_atomic_add v[6:7], v255, s[8:15] dmask:0x3 +// SICI: image_atomic_add v[6:7], v255, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x44,0xf0,0xff,0x06,0x02,0x00] +// VI: image_atomic_add v[6:7], v255, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x48,0xf0,0xff,0x06,0x02,0x00] -image_atomic_add v6, v255, s[8:15] dmask:0x1 -// SICI: image_atomic_add v6, v255, s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x44,0xf0,0xff,0x06,0x02,0x00] -// VI: image_atomic_add v6, v255, s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x48,0xf0,0xff,0x06,0x02,0x00] +image_atomic_add v7, v3, s[0:7] dmask:0x1 glc +// SICI: image_atomic_add v7, v3, s[0:7] dmask:0x1 glc ; encoding: [0x00,0x21,0x44,0xf0,0x03,0x07,0x00,0x00] +// VI: image_atomic_add v7, v3, s[0:7] dmask:0x1 glc ; encoding: [0x00,0x21,0x48,0xf0,0x03,0x07,0x00,0x00] -image_atomic_add v7, v3, s[0:7] glc -// SICI: image_atomic_add v7, v3, s[0:7] glc ; encoding: [0x00,0x20,0x44,0xf0,0x03,0x07,0x00,0x00] -// VI: image_atomic_add v7, v3, s[0:7] glc ; encoding: [0x00,0x20,0x48,0xf0,0x03,0x07,0x00,0x00] - -image_atomic_add v8, v4, s[8:15] slc -// SICI: image_atomic_add v8, v4, s[8:15] slc ; encoding: [0x00,0x00,0x44,0xf2,0x04,0x08,0x02,0x00] -// VI: image_atomic_add v8, v4, s[8:15] slc ; encoding: [0x00,0x00,0x48,0xf2,0x04,0x08,0x02,0x00] +image_atomic_add v8, v4, s[8:15] dmask:0x1 slc +// SICI: image_atomic_add v8, v4, s[8:15] dmask:0x1 slc ; encoding: [0x00,0x01,0x44,0xf2,0x04,0x08,0x02,0x00] +// VI: image_atomic_add v8, v4, s[8:15] dmask:0x1 slc ; encoding: [0x00,0x01,0x48,0xf2,0x04,0x08,0x02,0x00] image_atomic_add v9, v5, s[8:15] dmask:0x1 unorm glc slc lwe da // SICI: image_atomic_add v9, v5, s[8:15] dmask:0x1 unorm glc slc lwe da ; encoding: [0x00,0x71,0x46,0xf2,0x05,0x09,0x02,0x00] @@ -66,6 +62,10 @@ image_atomic_swap v4, v[192:195], s[28:35] dmask:0x1 unorm glc // SICI: image_atomic_swap v4, v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0xc0,0x04,0x07,0x00] // VI: image_atomic_swap v4, v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0xc0,0x04,0x07,0x00] -image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x1 unorm glc -// SIIC: image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0xc0,0x04,0x07,0x00] -// VI: image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x44,0xf0,0xc0,0x04,0x07,0x00] +image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 unorm glc +// SICI: image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x40,0xf0,0xc0,0x04,0x07,0x00] +// VI: image_atomic_cmpswap v[4:5], v[192:195], s[28:35] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x44,0xf0,0xc0,0x04,0x07,0x00] + +image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc +// SICI: image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc ; encoding: [0x00,0x3f,0x40,0xf0,0xc0,0x04,0x07,0x00] +// VI: image_atomic_cmpswap v[4:7], v[192:195], s[28:35] dmask:0xf unorm glc ; encoding: [0x00,0x3f,0x44,0xf0,0xc0,0x04,0x07,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt index 4e1cec4a44d9..1a4885d774a6 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt @@ -69,3 +69,35 @@ # VI: image_atomic_add v5, v1, s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x48,0xf0,0x01,0x05,0x02,0x00] 0x00,0x51,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_add v[5:6], v1, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x48,0xf0,0x01,0x05,0x02,0x00] +0x00,0x13,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x44,0xf0,0x01,0x05,0x02,0x00] +0x00,0x13,0x44,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:8], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00] +0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00 + +#===------------------------------------------------------------------------===# +# Invalid image atomics (incorrect dmask value). +# Disassembler may produce a partially incorrect instruction but should not fail. +#===------------------------------------------------------------------------===# + +# VI: image_atomic_add v5, v1, s[8:15] dmask:0x2 unorm ; encoding: [0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00] +0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_add v5, v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00] +0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00] +0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00] +0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00] +0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00] +0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00