forked from OSchip/llvm-project
[AMDGPU][MC][GFX6][GFX7][GFX10] Added instructions buffer_atomic_[fcmpswap/fmin/fmax]*
See https://bugs.llvm.org/show_bug.cgi?id=28232 Reviewers: arsenm, rampitec Differential Revision: https://reviews.llvm.org/D68788 llvm-svn: 374559
This commit is contained in:
parent
e38c36b7b0
commit
b82fae01ea
|
@ -1010,19 +1010,36 @@ def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
|
|||
let SubtargetPredicate = isGFX6 in { // isn't on CI & VI
|
||||
/*
|
||||
defm BUFFER_ATOMIC_RSUB : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">;
|
||||
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">;
|
||||
defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin">;
|
||||
defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax">;
|
||||
defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub_x2">;
|
||||
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap_x2">;
|
||||
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin_x2">;
|
||||
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax_x2">;
|
||||
*/
|
||||
|
||||
def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc",
|
||||
int_amdgcn_buffer_wbinvl1_sc>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = isGFX6GFX7GFX10 in {
|
||||
|
||||
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <
|
||||
"buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag
|
||||
>;
|
||||
defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics <
|
||||
"buffer_atomic_fmin", VGPR_32, f32, null_frag
|
||||
>;
|
||||
defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics <
|
||||
"buffer_atomic_fmax", VGPR_32, f32, null_frag
|
||||
>;
|
||||
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <
|
||||
"buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag
|
||||
>;
|
||||
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Pseudo_Atomics <
|
||||
"buffer_atomic_fmin_x2", VReg_64, f64, null_frag
|
||||
>;
|
||||
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <
|
||||
"buffer_atomic_fmax_x2", VReg_64, f64, null_frag
|
||||
>;
|
||||
|
||||
}
|
||||
|
||||
let SubtargetPredicate = HasD16LoadStore in {
|
||||
|
||||
defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Pseudo_Loads <
|
||||
|
@ -2025,10 +2042,9 @@ defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>;
|
|||
defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>;
|
||||
defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>;
|
||||
defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>;
|
||||
// FIXME-GFX6-GFX7-GFX10: Add following instructions:
|
||||
//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
|
||||
//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
|
||||
//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
|
||||
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
|
||||
defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
|
||||
defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
|
||||
defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>;
|
||||
defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>;
|
||||
defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>;
|
||||
|
@ -2043,10 +2059,9 @@ defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>;
|
|||
defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
|
||||
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
|
||||
// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
|
||||
// FIXME-GFX6-GFX7-GFX10: Add following instructions:
|
||||
//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
|
||||
//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
|
||||
//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
|
||||
defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
|
||||
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
|
||||
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
|
||||
|
||||
defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>;
|
||||
defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;
|
||||
|
|
|
@ -8,3 +8,39 @@ buffer_load_sbyte v5, off, s[8:11], s3 glc slc lds dlc
|
|||
|
||||
buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc
|
||||
// GFX10: buffer_load_sbyte v5, off, s[8:11], s3 glc slc dlc ; encoding: [0x00,0xc0,0x24,0xe0,0x00,0x05,0x42,0x03]
|
||||
|
||||
buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095
|
||||
// GFX10: buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0x00,0x00,0x00]
|
||||
|
||||
buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095
|
||||
// GFX10: buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe1,0x00,0x00,0x00,0x00]
|
||||
|
||||
buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095
|
||||
// GFX10: buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x78,0xe1,0x00,0x00,0x00,0x00]
|
||||
|
||||
buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095
|
||||
// GFX10: buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x01,0x00,0x00]
|
||||
|
||||
buffer_atomic_fmax v0, off, s[0:3], s0 offset:7
|
||||
// GFX10: buffer_atomic_fmax v0, off, s[0:3], s0 offset:7 ; encoding: [0x07,0x00,0x00,0xe1,0x00,0x00,0x00,0x00]
|
||||
|
||||
buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc
|
||||
// GFX10: buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc ; encoding: [0xff,0x4f,0x00,0xe1,0x00,0x00,0x00,0x00]
|
||||
|
||||
buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095
|
||||
// GFX10: buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x05,0x00,0x00]
|
||||
|
||||
buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095
|
||||
// GFX10: buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x80,0xe1,0x00,0x00,0x00,0x00]
|
||||
|
||||
buffer_atomic_fmin v0, off, s[0:3], s0
|
||||
// GFX10: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
|
||||
|
||||
buffer_atomic_fmin v0, off, s[0:3], s0 offset:0
|
||||
// GFX10: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
|
||||
|
||||
buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc
|
||||
// GFX10: buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc ; encoding: [0xff,0x0f,0x7c,0xe1,0x00,0x00,0x40,0x00]
|
||||
|
||||
buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095
|
||||
// GFX10: buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x7c,0xe1,0x00,0x00,0x00,0x00]
|
||||
|
|
|
@ -719,6 +719,62 @@ buffer_atomic_add v5, off, s[8:11], 0.15915494 offset:4095 glc
|
|||
// NOSICI: error: invalid operand for instruction
|
||||
// VI: buffer_atomic_add v5, off, s[8:11], 0.15915494 offset:4095 glc ; encoding: [0xff,0x4f,0x08,0xe1,0x00,0x05,0x02,0xf8]
|
||||
|
||||
buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095
|
||||
// SICI: buffer_atomic_fcmpswap v[0:1], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0x00,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fcmpswap v[0:1], v[0:1], s[0:3], s0 addr64 offset:4095
|
||||
// SICI: buffer_atomic_fcmpswap v[0:1], v[0:1], s[0:3], s0 addr64 offset:4095 ; encoding: [0xff,0x8f,0xf8,0xe0,0x00,0x00,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095
|
||||
// SICI: buffer_atomic_fcmpswap_x2 v[0:3], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe1,0x00,0x00,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095
|
||||
// SICI: buffer_atomic_fcmpswap_x2 v[0:3], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x78,0xe1,0x00,0x00,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095
|
||||
// SICI: buffer_atomic_fmax v1, off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x01,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fmax v0, off, s[0:3], s0 offset:7
|
||||
// SICI: buffer_atomic_fmax v0, off, s[0:3], s0 offset:7 ; encoding: [0x07,0x00,0x00,0xe1,0x00,0x00,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc
|
||||
// SICI: buffer_atomic_fmax v0, off, s[0:3], s0 offset:4095 glc ; encoding: [0xff,0x4f,0x00,0xe1,0x00,0x00,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095
|
||||
// SICI: buffer_atomic_fmax_x2 v[5:6], off, s[0:3], s0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x05,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095
|
||||
// SICI: buffer_atomic_fmax_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x80,0xe1,0x00,0x00,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fmin v0, v[0:1], s[0:3], s0 addr64 offset:4095
|
||||
// SICI: buffer_atomic_fmin v0, v[0:1], s[0:3], s0 addr64 offset:4095 ; encoding: [0xff,0x8f,0xfc,0xe0,0x00,0x00,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fmin v0, off, s[0:3], s0
|
||||
// SICI: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
|
||||
buffer_atomic_fmin v0, off, s[0:3], s0 offset:0
|
||||
// SICI: buffer_atomic_fmin v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc
|
||||
// SICI: buffer_atomic_fmin_x2 v[0:1], off, s[0:3], s0 offset:4095 slc ; encoding: [0xff,0x0f,0x7c,0xe1,0x00,0x00,0x40,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095
|
||||
// SICI: buffer_atomic_fmin_x2 v[0:1], v0, s[0:3], s0 idxen offset:4095 ; encoding: [0xff,0x2f,0x7c,0xe1,0x00,0x00,0x00,0x00]
|
||||
// NOVI: error: not a valid operand.
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Lds support
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck %s
|
||||
|
||||
# CHECK: buffer_atomic_fcmpswap v[5:6], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0x05,0x02,0x03]
|
||||
0xff,0x0f,0xf8,0xe0,0x00,0x05,0x02,0x03
|
||||
|
||||
# CHECK: buffer_atomic_fcmpswap v[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xf8,0xe0,0x00,0xfe,0x02,0x03]
|
||||
0xff,0x0f,0xf8,0xe0,0x00,0xfe,0x02,0x03
|
||||
|
||||
# CHECK: buffer_atomic_fcmpswap_x2 v[5:8], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x78,0xe1,0x00,0x05,0x02,0x03]
|
||||
0x07,0x00,0x78,0xe1,0x00,0x05,0x02,0x03
|
||||
|
||||
# CHECK: buffer_atomic_fcmpswap_x2 v[5:8], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x78,0xe1,0x00,0x05,0x02,0x03]
|
||||
0xff,0x4f,0x78,0xe1,0x00,0x05,0x02,0x03
|
||||
|
||||
# CHECK: buffer_atomic_fmax v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x00,0xe1,0x00,0x05,0x02,0x03]
|
||||
0xff,0x2f,0x00,0xe1,0x00,0x05,0x02,0x03
|
||||
|
||||
# CHECK: buffer_atomic_fmax_x2 v[5:6], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x80,0xe1,0x00,0x05,0x02,0x03]
|
||||
0xff,0x4f,0x80,0xe1,0x00,0x05,0x02,0x03
|
||||
|
||||
# CHECK: buffer_atomic_fmax_x2 v[5:6], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x05,0x42,0x03]
|
||||
0xff,0x0f,0x80,0xe1,0x00,0x05,0x42,0x03
|
||||
|
||||
# CHECK: buffer_atomic_fmin v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03]
|
||||
0x00,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03
|
||||
|
||||
# CHECK: buffer_atomic_fmin v5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03]
|
||||
0x07,0x00,0xfc,0xe0,0x00,0x05,0x02,0x03
|
||||
|
||||
# CHECK: buffer_atomic_fmin_x2 v[5:6], off, ttmp[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe1,0x00,0x05,0x1e,0x03]
|
||||
0xff,0x0f,0x7c,0xe1,0x00,0x05,0x1e,0x03
|
Loading…
Reference in New Issue