AMDGPU: Fix global atomic saddr operand class

This commit is contained in:
Matt Arsenault 2020-08-14 20:01:51 -04:00
parent 625db2fe5b
commit a7455652c0
5 changed files with 16 additions and 16 deletions
llvm

View File

@ -355,7 +355,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
(ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
" $vaddr, $vdata, $saddr$offset$slc">,
GlobalSaddrTable<1, opName>,
AtomicNoRet <opName#"_saddr", 0> {
@ -389,7 +389,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
(ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
(ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
" $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
GlobalSaddrTable<1, opName#"_rtn">,
AtomicNoRet <opName#"_saddr", 1> {

View File

@ -346,7 +346,7 @@ body: |
name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
body: |
bb.0:
GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@ -360,7 +360,7 @@ body: |
name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@ -374,7 +374,7 @@ body: |
name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@ -388,7 +388,7 @@ body: |
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@ -402,7 +402,7 @@ body: |
name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...
@ -416,7 +416,7 @@ body: |
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...

View File

@ -19,10 +19,10 @@ global_atomic_csub v2, v[0:1], v2, off offset:100 glc slc
global_atomic_csub v2, v[0:1], v2, off
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x7d,0x02]
global_atomic_csub v2, v[0:1], v2, s[2:3]
global_atomic_csub v2, v0, v2, s[2:3]
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x02,0x02]
global_atomic_csub v2, v[0:1], v2, s[2:3] offset:100 glc slc
global_atomic_csub v2, v0, v2, s[2:3] offset:100 glc slc
// GFX10: encoding: [0x64,0x80,0xd3,0xdc,0x00,0x02,0x02,0x02]
buffer_atomic_csub v5, off, s[8:11], s3

View File

@ -42,16 +42,16 @@
# CHECK: flat_store_byte v[0:1], v0 offset:4095 glc ; encoding: [0xff,0x0f,0x61,0xdc,0x00,0x00,0x00,0x00]
0xff,0x0f,0x61,0xdc,0x00,0x00,0x00,0x00
# CHECK: global_atomic_add v[2:3], v4, s[0:1] ; encoding: [0x00,0x80,0x08,0xdd,0x02,0x04,0x00,0x00]
# CHECK: global_atomic_add v2, v4, s[0:1] ; encoding: [0x00,0x80,0x08,0xdd,0x02,0x04,0x00,0x00]
0x00,0x80,0x08,0xdd,0x02,0x04,0x00,0x00
# CHECK: global_atomic_add v[2:3], v4, s[0:1] offset:4095 ; encoding: [0xff,0x8f,0x08,0xdd,0x02,0x04,0x00,0x00]
# CHECK: global_atomic_add v2, v4, s[0:1] offset:4095 ; encoding: [0xff,0x8f,0x08,0xdd,0x02,0x04,0x00,0x00]
0xff,0x8f,0x08,0xdd,0x02,0x04,0x00,0x00
# CHECK: global_atomic_add v[2:3], v4, s[0:1] offset:-4096 ; encoding: [0x00,0x90,0x08,0xdd,0x02,0x04,0x00,0x00]
# CHECK: global_atomic_add v2, v4, s[0:1] offset:-4096 ; encoding: [0x00,0x90,0x08,0xdd,0x02,0x04,0x00,0x00]
0x00,0x90,0x08,0xdd,0x02,0x04,0x00,0x00
# CHECK: global_atomic_add v0, v[2:3], v4, s[0:1] offset:-1 glc ; encoding: [0xff,0x9f,0x09,0xdd,0x02,0x04,0x00,0x00]
# CHECK: global_atomic_add v0, v2, v4, s[0:1] offset:-1 glc ; encoding: [0xff,0x9f,0x09,0xdd,0x02,0x04,0x00,0x00]
0xff,0x9f,0x09,0xdd,0x02,0x04,0x00,0x00
# CHECK: global_load_sbyte v0, v2, s[0:1] ; encoding: [0x00,0x80,0x44,0xdc,0x02,0x00,0x00,0x00]

View File

@ -19,10 +19,10 @@
# GFX10: global_atomic_csub v2, v[0:1], v2, off glc
0x00,0x80,0xd1,0xdc,0x00,0x02,0x7d,0x02
# GFX10: global_atomic_csub v2, v[0:1], v2, s[2:3] glc
# GFX10: global_atomic_csub v2, v0, v2, s[2:3] glc
0x00,0x80,0xd1,0xdc,0x00,0x02,0x02,0x02
# GFX10: global_atomic_csub v2, v[0:1], v2, s[2:3] offset:100 glc slc
# GFX10: global_atomic_csub v2, v0, v2, s[2:3] offset:100 glc slc
0x64,0x80,0xd3,0xdc,0x00,0x02,0x02,0x02
# GFX10: buffer_atomic_csub v5, off, s[8:11], s3