forked from OSchip/llvm-project
[AMDGPU][MC] Added support for several VI-specific opcodes (s_wakeup, etc)
Added support for VI: - s_endpgm_saved - s_wakeup - s_rfe_restore_b64 - v_perm_b32 Enabled for VI: - v_mov_fed_b32 - v_mov_fed_b32_e64 See bug 32593: https://bugs.llvm.org//show_bug.cgi?id=32593 Reviewers: artem.tamazov, vpykhtin Differential Revision: https://reviews.llvm.org/D31931 llvm-svn: 300076
This commit is contained in:
parent
845033a6c9
commit
14104e0d0f
|
@ -444,6 +444,17 @@ let Defs = [SCC] in {
|
|||
def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
|
||||
} // End Defs = [SCC]
|
||||
|
||||
let SubtargetPredicate = isVI in {
|
||||
def S_RFE_RESTORE_B64 : SOP2_Pseudo <
|
||||
"s_rfe_restore_b64", (outs),
|
||||
(ins SSrc_b64:$src0, SSrc_b32:$src1),
|
||||
"$src0, $src1"
|
||||
> {
|
||||
let hasSideEffects = 1;
|
||||
let has_sdst = 0;
|
||||
}
|
||||
}
|
||||
|
||||
let SubtargetPredicate = isGFX9 in {
|
||||
def S_PACK_LL_B32_B16 : SOP2_32<"s_pack_ll_b32_b16">;
|
||||
def S_PACK_LH_B32_B16 : SOP2_32<"s_pack_lh_b32_b16">;
|
||||
|
@ -762,6 +773,14 @@ def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
|
|||
let isReturn = 1;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = isVI in {
|
||||
def S_ENDPGM_SAVED : SOPP <0x0000001B, (ins), "s_endpgm_saved"> {
|
||||
let simm16 = 0;
|
||||
let isBarrier = 1;
|
||||
let isReturn = 1;
|
||||
}
|
||||
}
|
||||
|
||||
let isBranch = 1, SchedRW = [WriteBranch] in {
|
||||
def S_BRANCH : SOPP <
|
||||
0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
|
||||
|
@ -836,6 +855,14 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
|
|||
let isConvergent = 1;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = isVI in {
|
||||
def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
|
||||
let simm16 = 0;
|
||||
let mayLoad = 1;
|
||||
let mayStore = 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
|
||||
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
|
||||
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
|
||||
|
@ -1241,6 +1268,7 @@ def S_ABSDIFF_I32_vi : SOP2_Real_vi <0x2a, S_ABSDIFF_I32>;
|
|||
def S_PACK_LL_B32_B16_vi : SOP2_Real_vi <0x32, S_PACK_LL_B32_B16>;
|
||||
def S_PACK_LH_B32_B16_vi : SOP2_Real_vi <0x33, S_PACK_LH_B32_B16>;
|
||||
def S_PACK_HH_B32_B16_vi : SOP2_Real_vi <0x34, S_PACK_HH_B32_B16>;
|
||||
def S_RFE_RESTORE_B64_vi : SOP2_Real_vi <0x2b, S_RFE_RESTORE_B64>;
|
||||
|
||||
def S_MOVK_I32_vi : SOPK_Real_vi <0x00, S_MOVK_I32>;
|
||||
def S_CMOVK_I32_vi : SOPK_Real_vi <0x01, S_CMOVK_I32>;
|
||||
|
|
|
@ -284,11 +284,14 @@ defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>;
|
|||
defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
|
||||
} // End Uses = [M0, EXEC]
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
|
||||
}
|
||||
|
||||
// These instruction only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
let SchedRW = [WriteQuarterRate32] in {
|
||||
defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
|
||||
defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
|
||||
defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>;
|
||||
defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
|
||||
|
@ -533,6 +536,7 @@ defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>;
|
|||
defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>;
|
||||
defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>;
|
||||
defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>;
|
||||
defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>;
|
||||
defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>;
|
||||
defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>;
|
||||
defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>;
|
||||
|
|
|
@ -260,6 +260,8 @@ def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
|
|||
|
||||
} // End isCommutable = 1
|
||||
|
||||
def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
|
||||
|
||||
} // End SubtargetPredicate = isVI
|
||||
|
||||
let Predicates = [isVI] in {
|
||||
|
@ -473,6 +475,8 @@ defm V_MAD_F16 : VOP3_Real_vi <0x1ea>;
|
|||
defm V_MAD_U16 : VOP3_Real_vi <0x1eb>;
|
||||
defm V_MAD_I16 : VOP3_Real_vi <0x1ec>;
|
||||
|
||||
defm V_PERM_B32 : VOP3_Real_vi <0x1ed>;
|
||||
|
||||
defm V_FMA_F16 : VOP3_Real_vi <0x1ee>;
|
||||
defm V_DIV_FIXUP_F16 : VOP3_Real_vi <0x1ef>;
|
||||
|
||||
|
|
|
@ -98831,3 +98831,17 @@ v_cmpx_t_u32_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:WORD_1
|
|||
v_cmpx_t_u32_sdwa vcc, v1, sext(v2) src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: [0xf9,0x04,0xbe,0x7d,0x01,0x16,0x06,0x0e]
|
||||
|
||||
s_rfe_restore_b64 s[4:5], s2
|
||||
// CHECK: [0x04,0x02,0x80,0x95]
|
||||
|
||||
v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00]
|
||||
|
||||
v_mov_fed_b32_e64 v5, s1
|
||||
// CHECK: [0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00]
|
||||
|
||||
v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
|
||||
// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x06]
|
||||
|
||||
v_perm_b32 v5, s1, 0, v255
|
||||
// CHECK: [0x05,0x00,0xed,0xd1,0x01,0x00,0xfd,0x07]
|
||||
|
|
|
@ -203,3 +203,11 @@ s_set_gpr_idx_mode 0
|
|||
s_set_gpr_idx_mode 15
|
||||
// VI: s_set_gpr_idx_mode dst src0 src1 src2 ; encoding: [0x0f,0x00,0x9d,0xbf]
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
|
||||
s_endpgm_saved
|
||||
// VI: s_endpgm_saved ; encoding: [0x00,0x00,0x9b,0xbf]
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
|
||||
s_wakeup
|
||||
// VI: s_wakeup ; encoding: [0x00,0x00,0x83,0xbf]
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
|
|
|
@ -56,7 +56,7 @@ v_cvt_u32_f32_e32 v1, v2
|
|||
v_cvt_i32_f32_e32 v1, v2
|
||||
|
||||
// SICI: v_mov_fed_b32_e32 v1, v2 ; encoding: [0x02,0x13,0x02,0x7e]
|
||||
// NOVI: error: instruction not supported on this GPU
|
||||
// VI: v_mov_fed_b32_e32 v1, v2 ; encoding: [0x02,0x13,0x02,0x7e]
|
||||
v_mov_fed_b32_e32 v1, v2
|
||||
|
||||
// GCN: v_cvt_f16_f32_e32 v1, v2 ; encoding: [0x02,0x15,0x02,0x7e]
|
||||
|
|
Loading…
Reference in New Issue