forked from OSchip/llvm-project
[AMDGPU] Disable SCC bit on fp atomics
Differential Revision: https://reviews.llvm.org/D98221
This commit is contained in:
parent
574a9dabc6
commit
9931b1f7a4
|
@ -1219,6 +1219,10 @@ def isGFX90AOnly :
|
||||||
Predicate<"Subtarget->hasGFX90AInsts()">,
|
Predicate<"Subtarget->hasGFX90AInsts()">,
|
||||||
AssemblerPredicate<(all_of FeatureGFX90AInsts)>;
|
AssemblerPredicate<(all_of FeatureGFX90AInsts)>;
|
||||||
|
|
||||||
|
def isGFX908orGFX90A :
|
||||||
|
Predicate<"Subtarget->hasMAIInsts()">,
|
||||||
|
AssemblerPredicate<(all_of FeatureMAIInsts)>;
|
||||||
|
|
||||||
def isGFX8GFX9 :
|
def isGFX8GFX9 :
|
||||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
|
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
|
||||||
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
|
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
|
||||||
|
|
|
@ -1384,18 +1384,21 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2
|
||||||
// VI
|
// VI
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
|
class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
|
||||||
FLAT_Real <op, ps>,
|
FLAT_Real <op, ps>,
|
||||||
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
|
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
|
||||||
let AssemblerPredicate = isGFX8GFX9;
|
let AssemblerPredicate = isGFX8GFX9;
|
||||||
let DecoderNamespace = "GFX8";
|
let DecoderNamespace = "GFX8";
|
||||||
|
|
||||||
let Inst{25} = !if(ps.has_sccb, sccb, ps.sccbValue);
|
let Inst{25} = !if(has_sccb, sccb, ps.sccbValue);
|
||||||
|
let AsmString = ps.Mnemonic #
|
||||||
|
!subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass FLAT_Real_AllAddr_vi<bits<7> op> {
|
multiclass FLAT_Real_AllAddr_vi<bits<7> op,
|
||||||
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>;
|
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
|
||||||
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
|
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
|
||||||
|
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
|
||||||
}
|
}
|
||||||
|
|
||||||
def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
|
def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
|
||||||
|
@ -1423,15 +1426,17 @@ def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
|
||||||
def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
|
def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
|
||||||
def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
|
def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
|
||||||
|
|
||||||
multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
|
multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps,
|
||||||
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
|
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
|
||||||
def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
|
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
|
||||||
|
def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> :
|
multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
|
||||||
FLAT_Real_AllAddr_vi<op> {
|
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
|
||||||
def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
|
FLAT_Real_AllAddr_vi<op, has_sccb> {
|
||||||
def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
|
def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
|
||||||
|
def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1538,14 +1543,19 @@ defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
|
||||||
defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
|
defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
|
||||||
defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
|
defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
|
||||||
|
|
||||||
let SubtargetPredicate = isGFX90APlus in {
|
let SubtargetPredicate = isGFX908orGFX90A in {
|
||||||
defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64>;
|
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>;
|
||||||
defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64>;
|
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
|
||||||
defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64>;
|
}
|
||||||
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f>;
|
|
||||||
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50>;
|
let SubtargetPredicate = isGFX90AOnly in {
|
||||||
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51>;
|
defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>;
|
||||||
} // End SubtargetPredicate = isGFX90APlus
|
defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>;
|
||||||
|
defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>;
|
||||||
|
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>;
|
||||||
|
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>;
|
||||||
|
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
|
||||||
|
} // End SubtargetPredicate = isGFX90AOnly
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// GFX10.
|
// GFX10.
|
||||||
|
@ -1752,10 +1762,3 @@ defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>;
|
||||||
defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>;
|
defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>;
|
||||||
defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>;
|
defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>;
|
||||||
defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>;
|
defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>;
|
||||||
|
|
||||||
let SubtargetPredicate = HasAtomicFaddInsts in {
|
|
||||||
|
|
||||||
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d>;
|
|
||||||
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e>;
|
|
||||||
|
|
||||||
} // End SubtargetPredicate = HasAtomicFaddInsts
|
|
||||||
|
|
|
@ -600,10 +600,6 @@ flat_atomic_add_f64 v[0:1], v[2:3] offset:7
|
||||||
// GFX90A: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x3d,0xdd,0x00,0x02,0x00,0x00]
|
// GFX90A: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x3d,0xdd,0x00,0x02,0x00,0x00]
|
||||||
flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc
|
flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc
|
||||||
|
|
||||||
// NOT-GFX90A: error: instruction not supported on this GPU
|
|
||||||
// GFX90A: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc scc ; encoding: [0xff,0x0f,0x3d,0xdf,0x00,0x02,0x00,0x00]
|
|
||||||
flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc scc
|
|
||||||
|
|
||||||
// NOT-GFX90A: error: instruction not supported on this GPU
|
// NOT-GFX90A: error: instruction not supported on this GPU
|
||||||
// GFX90A: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x3e,0xdd,0x00,0x02,0x00,0x00]
|
// GFX90A: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x3e,0xdd,0x00,0x02,0x00,0x00]
|
||||||
flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 slc
|
flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 slc
|
||||||
|
@ -1023,3 +1019,30 @@ flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc
|
||||||
// GFX90A: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc ; encoding: [0x00,0x00,0x41,0xdd,0x00,0x02,0x00,0x00]
|
// GFX90A: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc ; encoding: [0x00,0x00,0x41,0xdd,0x00,0x02,0x00,0x00]
|
||||||
// NOT-GFX90A: error: instruction not supported on this GPU
|
// NOT-GFX90A: error: instruction not supported on this GPU
|
||||||
flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
|
flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
|
||||||
|
|
||||||
|
// GFX90A: global_atomic_add v[2:3], v5, off scc ; encoding: [0x00,0x80,0x08,0xdf,0x02,0x05,0x7f,0x00]
|
||||||
|
// NOT-GFX90A: error: failed parsing operand.
|
||||||
|
global_atomic_add v[2:3], v5, off scc
|
||||||
|
|
||||||
|
// GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc ; encoding: [0x00,0x80,0x35,0xdd,0x00,0x02,0x7f,0x00]
|
||||||
|
// GFX908: error: operands are not valid for this GPU or mode
|
||||||
|
// GFX1010: error: instruction not supported on this GPU
|
||||||
|
global_atomic_add_f32 v0, v[0:1], v2, off glc
|
||||||
|
|
||||||
|
// GFX90A: global_atomic_add_f32 v[0:1], v2, off ; encoding: [0x00,0x80,0x34,0xdd,0x00,0x02,0x7f,0x00]
|
||||||
|
// GFX1010: error: instruction not supported on this GPU
|
||||||
|
global_atomic_add_f32 v[0:1], v2, off
|
||||||
|
|
||||||
|
// GFX90A: global_atomic_add_f32 v0, v2, s[0:1] ; encoding: [0x00,0x80,0x34,0xdd,0x00,0x02,0x00,0x00]
|
||||||
|
// GFX1010: error: instruction not supported on this GPU
|
||||||
|
global_atomic_add_f32 v0, v2, s[0:1]
|
||||||
|
|
||||||
|
// GFX90A: global_atomic_add_f32 v1, v0, v2, s[0:1] glc
|
||||||
|
// GFX908: error: operands are not valid for this GPU or mode
|
||||||
|
// GFX1010: error: instruction not supported on this GPU
|
||||||
|
global_atomic_add_f32 v1, v0, v2, s[0:1] glc ; encoding: [0x00,0x80,0x35,0xdd,0x00,0x02,0x00,0x01]
|
||||||
|
|
||||||
|
// GFX908: error: operands are not valid for this GPU or mode
|
||||||
|
// GFX1010: error: instruction not supported on this GPU
|
||||||
|
// GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc ; encoding: [0x00,0x80,0x39,0xdd,0x00,0x02,0x7f,0x00]
|
||||||
|
global_atomic_pk_add_f16 v0, v[0:1], v2, off glc
|
||||||
|
|
|
@ -194,3 +194,39 @@ image_sample_cd v[0:3], v[0:1], s[4:11], s[16:19] dmask:0xf
|
||||||
|
|
||||||
image_sample_b v[0:3], v[0:1], s[4:11], s[16:19] dmask:0xf
|
image_sample_b v[0:3], v[0:1], s[4:11], s[16:19] dmask:0xf
|
||||||
// GFX90A: error: instruction not supported on this GPU
|
// GFX90A: error: instruction not supported on this GPU
|
||||||
|
|
||||||
|
global_atomic_add_f32 v0, v[0:1], v2, off glc scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
||||||
|
global_atomic_add_f32 v[0:1], v2, off scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
||||||
|
global_atomic_add_f32 v0, v2, s[0:1] scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
||||||
|
global_atomic_add_f32 v1, v0, v2, s[0:1] glc scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
||||||
|
global_atomic_pk_add_f16 v0, v[0:1], v2, off glc scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
||||||
|
flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
||||||
|
flat_atomic_add_f64 v[0:1], v[2:3] scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
||||||
|
flat_atomic_min_f64 v[0:1], v[2:3] scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
||||||
|
flat_atomic_max_f64 v[0:1], v[2:3] scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
||||||
|
global_atomic_add_f64 v[0:1], v[2:3], off scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
||||||
|
global_atomic_min_f64 v[0:1], v[2:3], off scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
||||||
|
global_atomic_max_f64 v[0:1], v[2:3], off scc
|
||||||
|
// GFX90A: error: invalid operand for instruction
|
||||||
|
|
|
@ -436,8 +436,8 @@
|
||||||
# GFX90A: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x3c,0xdd,0x00,0x02,0x00,0x00]
|
# GFX90A: flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x3c,0xdd,0x00,0x02,0x00,0x00]
|
||||||
0xff,0x0f,0x3c,0xdd,0x00,0x02,0x00,0x00
|
0xff,0x0f,0x3c,0xdd,0x00,0x02,0x00,0x00
|
||||||
|
|
||||||
# GFX90A: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc scc ; encoding: [0xff,0x0f,0x3d,0xdf,0x00,0x02,0x00,0x00]
|
# GFX90A: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x3d,0xdd,0x00,0x02,0x00,0x00]
|
||||||
0xff,0x0f,0x3d,0xdf,0x00,0x02,0x00,0x00
|
0xff,0x0f,0x3d,0xdd,0x00,0x02,0x00,0x00
|
||||||
|
|
||||||
# GFX90A: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x3c,0xdd,0xfe,0x02,0x00,0x00]
|
# GFX90A: flat_atomic_add_f64 v[254:255], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x3c,0xdd,0xfe,0x02,0x00,0x00]
|
||||||
0xff,0x0f,0x3c,0xdd,0xfe,0x02,0x00,0x00
|
0xff,0x0f,0x3c,0xdd,0xfe,0x02,0x00,0x00
|
||||||
|
|
Loading…
Reference in New Issue