forked from OSchip/llvm-project
[AMDGPU] Remove atomic pattern args in FLAT_[Global_]Atomic_Pseudo defs
We already have explicit patterns for these. Differential Revision: https://reviews.llvm.org/D124084
This commit is contained in:
parent
f935908d7b
commit
165ae7276c
|
@ -464,7 +464,6 @@ multiclass FLAT_Atomic_Pseudo<
|
|||
string opName,
|
||||
RegisterClass vdst_rc,
|
||||
ValueType vt,
|
||||
SDPatternOperator atomic = null_frag,
|
||||
ValueType data_vt = vt,
|
||||
RegisterClass data_rc = vdst_rc,
|
||||
bit isFP = isFloatType<data_vt>.ret,
|
||||
|
@ -483,11 +482,9 @@ multiclass FLAT_Atomic_Pseudo<
|
|||
def _RTN : FLAT_AtomicRet_Pseudo <opName,
|
||||
(outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
|
||||
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
|
||||
" $vdst, $vaddr, $vdata$offset$cpol",
|
||||
[(set vt:$vdst,
|
||||
(atomic (FlatOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
|
||||
GlobalSaddrTable<0, opName#"_rtn">,
|
||||
AtomicNoRet <opName, 1>{
|
||||
" $vdst, $vaddr, $vdata$offset$cpol">,
|
||||
GlobalSaddrTable<0, opName#"_rtn">,
|
||||
AtomicNoRet <opName, 1> {
|
||||
let FPAtomic = isFP;
|
||||
let AddedComplexity = -1; // Prefer global atomics if available
|
||||
}
|
||||
|
@ -530,7 +527,6 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
|
|||
string opName,
|
||||
RegisterClass vdst_rc,
|
||||
ValueType vt,
|
||||
SDPatternOperator atomic = null_frag,
|
||||
ValueType data_vt = vt,
|
||||
RegisterClass data_rc = vdst_rc,
|
||||
bit isFP = isFloatType<data_vt>.ret,
|
||||
|
@ -540,11 +536,9 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
|
|||
def _RTN : FLAT_AtomicRet_Pseudo <opName,
|
||||
(outs vdst_op:$vdst),
|
||||
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
|
||||
" $vdst, $vaddr, $vdata, off$offset$cpol",
|
||||
[(set vt:$vdst,
|
||||
(atomic (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
|
||||
GlobalSaddrTable<0, opName#"_rtn">,
|
||||
AtomicNoRet <opName, 1> {
|
||||
" $vdst, $vaddr, $vdata, off$offset$cpol">,
|
||||
GlobalSaddrTable<0, opName#"_rtn">,
|
||||
AtomicNoRet <opName, 1> {
|
||||
let has_saddr = 1;
|
||||
let FPAtomic = isFP;
|
||||
}
|
||||
|
@ -566,12 +560,11 @@ multiclass FLAT_Global_Atomic_Pseudo<
|
|||
string opName,
|
||||
RegisterClass vdst_rc,
|
||||
ValueType vt,
|
||||
SDPatternOperator atomic_rtn = null_frag,
|
||||
ValueType data_vt = vt,
|
||||
RegisterClass data_rc = vdst_rc> {
|
||||
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
|
||||
defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
|
||||
defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
|
||||
defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -608,93 +601,91 @@ def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR
|
|||
}
|
||||
|
||||
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
|
||||
VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32,
|
||||
v2i32, VReg_64>;
|
||||
VGPR_32, i32, v2i32, VReg_64>;
|
||||
|
||||
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
|
||||
VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64,
|
||||
v2i64, VReg_128>;
|
||||
VReg_64, i64, v2i64, VReg_128>;
|
||||
|
||||
defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
|
||||
VGPR_32, i32, atomic_swap_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
|
||||
VReg_64, i64, atomic_swap_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
|
||||
VGPR_32, i32, atomic_load_add_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
|
||||
VGPR_32, i32, atomic_load_sub_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
|
||||
VGPR_32, i32, atomic_load_min_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
|
||||
VGPR_32, i32, atomic_load_umin_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
|
||||
VGPR_32, i32, atomic_load_max_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
|
||||
VGPR_32, i32, atomic_load_umax_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
|
||||
VGPR_32, i32, atomic_load_and_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
|
||||
VGPR_32, i32, atomic_load_or_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
|
||||
VGPR_32, i32, atomic_load_xor_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
|
||||
VGPR_32, i32, atomic_inc_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
|
||||
VGPR_32, i32, atomic_dec_flat_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
|
||||
VReg_64, i64, atomic_load_add_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
|
||||
VReg_64, i64, atomic_load_sub_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
|
||||
VReg_64, i64, atomic_load_min_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
|
||||
VReg_64, i64, atomic_load_umin_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
|
||||
VReg_64, i64, atomic_load_max_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
|
||||
VReg_64, i64, atomic_load_umax_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
|
||||
VReg_64, i64, atomic_load_and_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
|
||||
VReg_64, i64, atomic_load_or_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
|
||||
VReg_64, i64, atomic_load_xor_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
|
||||
VReg_64, i64, atomic_inc_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
|
||||
VReg_64, i64, atomic_dec_flat_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
// GFX7-, GFX10-only flat instructions.
|
||||
let SubtargetPredicate = isGFX7GFX10 in {
|
||||
|
||||
defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
|
||||
VGPR_32, f32, null_frag, v2f32, VReg_64>;
|
||||
VGPR_32, f32, v2f32, VReg_64>;
|
||||
|
||||
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
|
||||
VReg_64, f64, null_frag, v2f64, VReg_128>;
|
||||
VReg_64, f64, v2f64, VReg_128>;
|
||||
|
||||
defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
|
||||
VGPR_32, f32>;
|
||||
|
@ -758,88 +749,86 @@ defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d
|
|||
|
||||
let is_flat_global = 1 in {
|
||||
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
|
||||
VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32,
|
||||
v2i32, VReg_64>;
|
||||
VGPR_32, i32, v2i32, VReg_64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
|
||||
VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
|
||||
v2i64, VReg_128>;
|
||||
VReg_64, i64, v2i64, VReg_128>;
|
||||
|
||||
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
|
||||
VGPR_32, i32, atomic_swap_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
|
||||
VReg_64, i64, atomic_swap_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
|
||||
VGPR_32, i32, atomic_load_add_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
|
||||
VGPR_32, i32, atomic_load_sub_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
|
||||
VGPR_32, i32, atomic_load_min_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
|
||||
VGPR_32, i32, atomic_load_umin_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
|
||||
VGPR_32, i32, atomic_load_max_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
|
||||
VGPR_32, i32, atomic_load_umax_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
|
||||
VGPR_32, i32, atomic_load_and_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
|
||||
VGPR_32, i32, atomic_load_or_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
|
||||
VGPR_32, i32, atomic_load_xor_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
|
||||
VGPR_32, i32, atomic_inc_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
|
||||
VGPR_32, i32, atomic_dec_global_32>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
|
||||
VReg_64, i64, atomic_load_add_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
|
||||
VReg_64, i64, atomic_load_sub_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
|
||||
VReg_64, i64, atomic_load_min_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
|
||||
VReg_64, i64, atomic_load_umin_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
|
||||
VReg_64, i64, atomic_load_max_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
|
||||
VReg_64, i64, atomic_load_umax_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
|
||||
VReg_64, i64, atomic_load_and_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
|
||||
VReg_64, i64, atomic_load_or_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
|
||||
VReg_64, i64, atomic_load_xor_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
|
||||
VReg_64, i64, atomic_inc_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
|
||||
VReg_64, i64, atomic_dec_global_64>;
|
||||
VReg_64, i64>;
|
||||
|
||||
let SubtargetPredicate = HasGFX10_BEncoding in
|
||||
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
|
||||
VGPR_32, i32, int_amdgcn_global_atomic_csub>;
|
||||
VGPR_32, i32>;
|
||||
|
||||
let SubtargetPredicate = isGFX940Plus in {
|
||||
|
||||
|
@ -895,13 +884,13 @@ defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_d
|
|||
|
||||
let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
|
||||
defm GLOBAL_ATOMIC_FCMPSWAP :
|
||||
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>;
|
||||
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>;
|
||||
defm GLOBAL_ATOMIC_FMIN :
|
||||
FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
|
||||
defm GLOBAL_ATOMIC_FMAX :
|
||||
FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
|
||||
defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
|
||||
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>;
|
||||
FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>;
|
||||
defm GLOBAL_ATOMIC_FMIN_X2 :
|
||||
FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
|
||||
defm GLOBAL_ATOMIC_FMAX_X2 :
|
||||
|
|
Loading…
Reference in New Issue