forked from OSchip/llvm-project
AMDGPU: Fix global atomic saddr operand class
This commit is contained in:
parent
625db2fe5b
commit
a7455652c0
|
@ -355,7 +355,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
|
||||||
|
|
||||||
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
|
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
|
||||||
(outs),
|
(outs),
|
||||||
(ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
|
(ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
|
||||||
" $vaddr, $vdata, $saddr$offset$slc">,
|
" $vaddr, $vdata, $saddr$offset$slc">,
|
||||||
GlobalSaddrTable<1, opName>,
|
GlobalSaddrTable<1, opName>,
|
||||||
AtomicNoRet <opName#"_saddr", 0> {
|
AtomicNoRet <opName#"_saddr", 0> {
|
||||||
|
@ -389,7 +389,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
|
||||||
|
|
||||||
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
|
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
|
||||||
(outs vdst_rc:$vdst),
|
(outs vdst_rc:$vdst),
|
||||||
(ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
|
(ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
|
||||||
" $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
|
" $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
|
||||||
GlobalSaddrTable<1, opName#"_rtn">,
|
GlobalSaddrTable<1, opName#"_rtn">,
|
||||||
AtomicNoRet <opName#"_saddr", 1> {
|
AtomicNoRet <opName#"_saddr", 1> {
|
||||||
|
|
|
@ -346,7 +346,7 @@ body: |
|
||||||
name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
|
name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
||||||
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@ -360,7 +360,7 @@ body: |
|
||||||
name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
|
name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
%2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
%2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
||||||
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@ -374,7 +374,7 @@ body: |
|
||||||
name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
|
name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
%2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
%2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
||||||
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@ -388,7 +388,7 @@ body: |
|
||||||
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
|
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
%2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
%2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
||||||
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@ -402,7 +402,7 @@ body: |
|
||||||
name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
|
name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
%2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
%2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
||||||
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@ -416,7 +416,7 @@ body: |
|
||||||
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
|
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
|
||||||
body: |
|
body: |
|
||||||
bb.0:
|
bb.0:
|
||||||
%2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
%2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
|
||||||
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
|
@ -19,10 +19,10 @@ global_atomic_csub v2, v[0:1], v2, off offset:100 glc slc
|
||||||
global_atomic_csub v2, v[0:1], v2, off
|
global_atomic_csub v2, v[0:1], v2, off
|
||||||
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x7d,0x02]
|
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x7d,0x02]
|
||||||
|
|
||||||
global_atomic_csub v2, v[0:1], v2, s[2:3]
|
global_atomic_csub v2, v0, v2, s[2:3]
|
||||||
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x02,0x02]
|
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x02,0x02]
|
||||||
|
|
||||||
global_atomic_csub v2, v[0:1], v2, s[2:3] offset:100 glc slc
|
global_atomic_csub v2, v0, v2, s[2:3] offset:100 glc slc
|
||||||
// GFX10: encoding: [0x64,0x80,0xd3,0xdc,0x00,0x02,0x02,0x02]
|
// GFX10: encoding: [0x64,0x80,0xd3,0xdc,0x00,0x02,0x02,0x02]
|
||||||
|
|
||||||
buffer_atomic_csub v5, off, s[8:11], s3
|
buffer_atomic_csub v5, off, s[8:11], s3
|
||||||
|
|
|
@ -42,16 +42,16 @@
|
||||||
# CHECK: flat_store_byte v[0:1], v0 offset:4095 glc ; encoding: [0xff,0x0f,0x61,0xdc,0x00,0x00,0x00,0x00]
|
# CHECK: flat_store_byte v[0:1], v0 offset:4095 glc ; encoding: [0xff,0x0f,0x61,0xdc,0x00,0x00,0x00,0x00]
|
||||||
0xff,0x0f,0x61,0xdc,0x00,0x00,0x00,0x00
|
0xff,0x0f,0x61,0xdc,0x00,0x00,0x00,0x00
|
||||||
|
|
||||||
# CHECK: global_atomic_add v[2:3], v4, s[0:1] ; encoding: [0x00,0x80,0x08,0xdd,0x02,0x04,0x00,0x00]
|
# CHECK: global_atomic_add v2, v4, s[0:1] ; encoding: [0x00,0x80,0x08,0xdd,0x02,0x04,0x00,0x00]
|
||||||
0x00,0x80,0x08,0xdd,0x02,0x04,0x00,0x00
|
0x00,0x80,0x08,0xdd,0x02,0x04,0x00,0x00
|
||||||
|
|
||||||
# CHECK: global_atomic_add v[2:3], v4, s[0:1] offset:4095 ; encoding: [0xff,0x8f,0x08,0xdd,0x02,0x04,0x00,0x00]
|
# CHECK: global_atomic_add v2, v4, s[0:1] offset:4095 ; encoding: [0xff,0x8f,0x08,0xdd,0x02,0x04,0x00,0x00]
|
||||||
0xff,0x8f,0x08,0xdd,0x02,0x04,0x00,0x00
|
0xff,0x8f,0x08,0xdd,0x02,0x04,0x00,0x00
|
||||||
|
|
||||||
# CHECK: global_atomic_add v[2:3], v4, s[0:1] offset:-4096 ; encoding: [0x00,0x90,0x08,0xdd,0x02,0x04,0x00,0x00]
|
# CHECK: global_atomic_add v2, v4, s[0:1] offset:-4096 ; encoding: [0x00,0x90,0x08,0xdd,0x02,0x04,0x00,0x00]
|
||||||
0x00,0x90,0x08,0xdd,0x02,0x04,0x00,0x00
|
0x00,0x90,0x08,0xdd,0x02,0x04,0x00,0x00
|
||||||
|
|
||||||
# CHECK: global_atomic_add v0, v[2:3], v4, s[0:1] offset:-1 glc ; encoding: [0xff,0x9f,0x09,0xdd,0x02,0x04,0x00,0x00]
|
# CHECK: global_atomic_add v0, v2, v4, s[0:1] offset:-1 glc ; encoding: [0xff,0x9f,0x09,0xdd,0x02,0x04,0x00,0x00]
|
||||||
0xff,0x9f,0x09,0xdd,0x02,0x04,0x00,0x00
|
0xff,0x9f,0x09,0xdd,0x02,0x04,0x00,0x00
|
||||||
|
|
||||||
# CHECK: global_load_sbyte v0, v2, s[0:1] ; encoding: [0x00,0x80,0x44,0xdc,0x02,0x00,0x00,0x00]
|
# CHECK: global_load_sbyte v0, v2, s[0:1] ; encoding: [0x00,0x80,0x44,0xdc,0x02,0x00,0x00,0x00]
|
||||||
|
|
|
@ -19,10 +19,10 @@
|
||||||
# GFX10: global_atomic_csub v2, v[0:1], v2, off glc
|
# GFX10: global_atomic_csub v2, v[0:1], v2, off glc
|
||||||
0x00,0x80,0xd1,0xdc,0x00,0x02,0x7d,0x02
|
0x00,0x80,0xd1,0xdc,0x00,0x02,0x7d,0x02
|
||||||
|
|
||||||
# GFX10: global_atomic_csub v2, v[0:1], v2, s[2:3] glc
|
# GFX10: global_atomic_csub v2, v0, v2, s[2:3] glc
|
||||||
0x00,0x80,0xd1,0xdc,0x00,0x02,0x02,0x02
|
0x00,0x80,0xd1,0xdc,0x00,0x02,0x02,0x02
|
||||||
|
|
||||||
# GFX10: global_atomic_csub v2, v[0:1], v2, s[2:3] offset:100 glc slc
|
# GFX10: global_atomic_csub v2, v0, v2, s[2:3] offset:100 glc slc
|
||||||
0x64,0x80,0xd3,0xdc,0x00,0x02,0x02,0x02
|
0x64,0x80,0xd3,0xdc,0x00,0x02,0x02,0x02
|
||||||
|
|
||||||
# GFX10: buffer_atomic_csub v5, off, s[8:11], s3
|
# GFX10: buffer_atomic_csub v5, off, s[8:11], s3
|
||||||
|
|
Loading…
Reference in New Issue