[AMDGPU][MC][GFX6][GFX7][GFX10] Added instructions buffer_atomic_[fcmpswap/fmin/fmax]*

See https://bugs.llvm.org/show_bug.cgi?id=28232

Reviewers: arsenm, rampitec

Differential Revision: https://reviews.llvm.org/D68788

llvm-svn: 374559
This commit is contained in:
Dmitry Preobrazhensky 2019-10-11 14:44:51 +00:00
parent e38c36b7b0
commit b82fae01ea
4 changed files with 152 additions and 14 deletions

View File

@ -1010,19 +1010,36 @@ def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
let SubtargetPredicate = isGFX6 in { // isn't on CI & VI
/*
defm BUFFER_ATOMIC_RSUB : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">;
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">;
defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin">;
defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax">;
defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub_x2">;
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap_x2">;
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin_x2">;
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax_x2">;
*/
def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc",
int_amdgcn_buffer_wbinvl1_sc>;
}
let SubtargetPredicate = isGFX6GFX7GFX10 in {
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <
"buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag
>;
defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <
"buffer_atomic_fmin", VGPR_32, f32, null_frag
>;
defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <
"buffer_atomic_fmax", VGPR_32, f32, null_frag
>;
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag
>;
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_fmin_x2", VReg_64, f64, null_frag
>;
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_fmax_x2", VReg_64, f64, null_frag
>;
}
let SubtargetPredicate = HasD16LoadStore in {
defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Pseudo_Loads <
@ -2025,10 +2042,9 @@ defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>;
defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>;
defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>;
defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>;
// FIXME-GFX6-GFX7-GFX10: Add following instructions:
//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>;
defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>;
defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>;
@ -2043,10 +2059,9 @@ defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>;
defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
// FIXME-GFX6-GFX7-GFX10: Add following instructions:
//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>;
defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;

View File

@ -8,3 +8,39 @@ buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds dlc
buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc
// GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc ; encoding: [0x00,0xc0,0x24,0xe0,0x00,0x05,0x42,0x03]
buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095
// GFX10: buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0x00,0x00,0x00]
buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095
// GFX10: buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe1,0x00,0x00,0x00,0x00]
buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095
// GFX10: buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x78,0xe1,0x00,0x00,0x00,0x00]
buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095
// GFX10: buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x01,0x00,0x00]
buffer_atomic_fmax v0, off, s[0:3], s0 offset:7
// GFX10: buffer_atomic_fmax v0, off, s[0:3], s0 offset:7 ; encoding: [0x07,0x00,0x00,0xe1,0x00,0x00,0x00,0x00]
buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc
// GFX10: buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc ; encoding: [0xff,0x4f,0x00,0xe1,0x00,0x00,0x00,0x00]
buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095
// GFX10: buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x05,0x00,0x00]
buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095
// GFX10: buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x80,0xe1,0x00,0x00,0x00,0x00]
buffer_atomic_fmin v0, off, s[0:3], s0
// GFX10: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
buffer_atomic_fmin v0, off, s[0:3], s0 offset:0
// GFX10: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc
// GFX10: buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc ; encoding: [0xff,0x0f,0x7c,0xe1,0x00,0x00,0x40,0x00]
buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095
// GFX10: buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x7c,0xe1,0x00,0x00,0x00,0x00]

View File

@ -719,6 +719,62 @@ buffer_atomic_add v5, off, s[8:11], 0.15915494 offset:4095 glc
// NOSICI: error: invalid operand for instruction
// VI: buffer_atomic_add v5, off, s[8:11], 0.15915494 offset:4095 glc ; encoding: [0xff,0x4f,0x08,0xe1,0x00,0x05,0x02,0xf8]
buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095
// SICI: buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0x00,0x00,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fcmpswap v[0:1], v[0:1], s[0:3], s0 addr64 offset:4095
// SICI: buffer_atomic_fcmpswap v[0:1], v[0:1], s[0:3], s0 addr64 offset:4095 ; encoding: [0xff,0x8f,0xf8,0xe0,0x00,0x00,0x00,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095
// SICI: buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe1,0x00,0x00,0x00,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095
// SICI: buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x78,0xe1,0x00,0x00,0x00,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095
// SICI: buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x01,0x00,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fmax v0, off, s[0:3], s0 offset:7
// SICI: buffer_atomic_fmax v0, off, s[0:3], s0 offset:7 ; encoding: [0x07,0x00,0x00,0xe1,0x00,0x00,0x00,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc
// SICI: buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc ; encoding: [0xff,0x4f,0x00,0xe1,0x00,0x00,0x00,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095
// SICI: buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x05,0x00,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095
// SICI: buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x80,0xe1,0x00,0x00,0x00,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fmin v0, v[0:1], s[0:3], s0 addr64 offset:4095
// SICI: buffer_atomic_fmin v0, v[0:1], s[0:3], s0 addr64 offset:4095 ; encoding: [0xff,0x8f,0xfc,0xe0,0x00,0x00,0x00,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fmin v0, off, s[0:3], s0
// SICI: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
// NOVI: error: instruction not supported on this GPU
buffer_atomic_fmin v0, off, s[0:3], s0 offset:0
// SICI: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc
// SICI: buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc ; encoding: [0xff,0x0f,0x7c,0xe1,0x00,0x00,0x40,0x00]
// NOVI: error: not a valid operand.
buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095
// SICI: buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x7c,0xe1,0x00,0x00,0x00,0x00]
// NOVI: error: not a valid operand.
//===----------------------------------------------------------------------===//
// Lds support
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,31 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck %s
# CHECK: buffer_atomic_fcmpswap v[5:6], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0x05,0x02,0x03]
0xff,0x0f,0xf8,0xe0,0x00,0x05,0x02,0x03
# CHECK: buffer_atomic_fcmpswap v[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0xfe,0x02,0x03]
0xff,0x0f,0xf8,0xe0,0x00,0xfe,0x02,0x03
# CHECK: buffer_atomic_fcmpswap_x2 v[5:8], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x78,0xe1,0x00,0x05,0x02,0x03]
0x07,0x00,0x78,0xe1,0x00,0x05,0x02,0x03
# CHECK: buffer_atomic_fcmpswap_x2 v[5:8], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x78,0xe1,0x00,0x05,0x02,0x03]
0xff,0x4f,0x78,0xe1,0x00,0x05,0x02,0x03
# CHECK: buffer_atomic_fmax v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x00,0xe1,0x00,0x05,0x02,0x03]
0xff,0x2f,0x00,0xe1,0x00,0x05,0x02,0x03
# CHECK: buffer_atomic_fmax_x2 v[5:6], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x80,0xe1,0x00,0x05,0x02,0x03]
0xff,0x4f,0x80,0xe1,0x00,0x05,0x02,0x03
# CHECK: buffer_atomic_fmax_x2 v[5:6], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x05,0x42,0x03]
0xff,0x0f,0x80,0xe1,0x00,0x05,0x42,0x03
# CHECK: buffer_atomic_fmin v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03]
0x00,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03
# CHECK: buffer_atomic_fmin v5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03]
0x07,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03
# CHECK: buffer_atomic_fmin_x2 v[5:6], off, ttmp[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe1,0x00,0x05,0x1e,0x03]
0xff,0x0f,0x7c,0xe1,0x00,0x05,0x1e,0x03