forked from OSchip/llvm-project
[AMDGPU][MC] Corrected v_madak/madmk to avoid printing "_e32" in disassembler output
See bug 32927: https://bugs.llvm.org//show_bug.cgi?id=32927 Reviewers: vpykhtin, artem.tamazov, arsenm Differential Revision: https://reviews.llvm.org/D32913 llvm-svn: 302648
This commit is contained in:
parent
8e5e40862a
commit
da61a7f9ef
|
@ -164,8 +164,11 @@ multiclass VOP2eInst <string opName,
|
|||
class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
|
||||
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
|
||||
field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm);
|
||||
field string Asm32 = "$vdst, $src0, $src1, $imm";
|
||||
field bit HasExt = 0;
|
||||
|
||||
// Hack to stop printing _e64
|
||||
let DstRC = RegisterOperand<VGPR_32>;
|
||||
field string Asm32 = " $vdst, $src0, $src1, $imm";
|
||||
}
|
||||
|
||||
def VOP_MADAK_F16 : VOP_MADAK <f16>;
|
||||
|
@ -174,8 +177,11 @@ def VOP_MADAK_F32 : VOP_MADAK <f32>;
|
|||
class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
|
||||
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
|
||||
field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1);
|
||||
field string Asm32 = "$vdst, $src0, $imm, $src1";
|
||||
field bit HasExt = 0;
|
||||
|
||||
// Hack to stop printing _e64
|
||||
let DstRC = RegisterOperand<VGPR_32>;
|
||||
field string Asm32 = " $vdst, $src0, $imm, $src1";
|
||||
}
|
||||
|
||||
def VOP_MADMK_F16 : VOP_MADMK <f16>;
|
||||
|
@ -298,7 +304,7 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> {
|
|||
let SubtargetPredicate = isGCN in {
|
||||
|
||||
defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
|
||||
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32>;
|
||||
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, [], "">;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>;
|
||||
|
@ -328,7 +334,7 @@ let Constraints = "$vdst = $src2", DisableEncoding="$src2",
|
|||
defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
|
||||
}
|
||||
|
||||
def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32>;
|
||||
def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, [], "">;
|
||||
|
||||
// No patterns so that the scalar instructions are always selected.
|
||||
// The scalar versions will be replaced with vector when needed later.
|
||||
|
@ -383,7 +389,7 @@ defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
|
|||
|
||||
let SubtargetPredicate = isVI in {
|
||||
|
||||
def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16>;
|
||||
def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
|
||||
defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>;
|
||||
defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>;
|
||||
defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>;
|
||||
|
@ -394,7 +400,7 @@ defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>;
|
|||
defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>;
|
||||
defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
|
||||
defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>;
|
||||
def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16>;
|
||||
def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
|
||||
defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>;
|
||||
defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>;
|
||||
defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">;
|
||||
|
|
|
@ -9,7 +9,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
|
|||
; GCN-LABEL: {{^}}madak_f32:
|
||||
; GCN: buffer_load_dword [[VA:v[0-9]+]]
|
||||
; GCN: buffer_load_dword [[VB:v[0-9]+]]
|
||||
; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
|
||||
; GCN: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
|
||||
define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
||||
|
@ -63,7 +63,7 @@ define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, flo
|
|||
|
||||
; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
|
||||
; GCN: buffer_load_dword [[VA:v[0-9]+]]
|
||||
; GCN: v_madak_f32_e32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
|
||||
; GCN: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
|
||||
define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
|
||||
|
@ -198,7 +198,7 @@ define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalia
|
|||
; GCN: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
|
||||
; GCN: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
|
||||
; GCN: buffer_load_dword [[VGPR:v[0-9]+]]
|
||||
; GCN: v_madak_f32_e32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
|
||||
; GCN: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
|
||||
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[VGPR]], [[MADAK]]
|
||||
; GCN: buffer_store_dword [[MUL]]
|
||||
define amdgpu_kernel void @madak_constant_bus_violation(i32 %arg1, float %sgpr0, float %sgpr1) #0 {
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
; GCN-LABEL: {{^}}madak_f16
|
||||
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
|
||||
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
|
||||
; VI: v_madak_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], 0x4900{{$}}
|
||||
; VI: v_madak_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], 0x4900{{$}}
|
||||
; VI: buffer_store_short v[[R_F16]]
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @madak_f16(
|
||||
|
|
|
@ -133,16 +133,16 @@ v_add_f16 v1, 65535, v2
|
|||
|
||||
// K-constant
|
||||
v_madmk_f16 v1, v2, 0x4280, v3
|
||||
// VI: v_madmk_f16_e32 v1, v2, 0x4280, v3 ; encoding: [0x02,0x07,0x02,0x48,0x80,0x42,0x00,0x00]
|
||||
// VI: v_madmk_f16 v1, v2, 0x4280, v3 ; encoding: [0x02,0x07,0x02,0x48,0x80,0x42,0x00,0x00]
|
||||
|
||||
v_madmk_f16 v1, v2, 1.0, v3
|
||||
// VI: v_madmk_f16_e32 v1, v2, 0x3c00, v3 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x3c,0x00,0x00]
|
||||
// VI: v_madmk_f16 v1, v2, 0x3c00, v3 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x3c,0x00,0x00]
|
||||
|
||||
v_madmk_f16 v1, v2, 1, v3
|
||||
// VI: v_madmk_f16_e32 v1, v2, 0x1, v3 ; encoding: [0x02,0x07,0x02,0x48,0x01,0x00,0x00,0x00]
|
||||
// VI: v_madmk_f16 v1, v2, 0x1, v3 ; encoding: [0x02,0x07,0x02,0x48,0x01,0x00,0x00,0x00]
|
||||
|
||||
v_madmk_f16 v1, v2, 64.0, v3
|
||||
// VI: v_madmk_f16_e32 v1, v2, 0x5400, v3 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x54,0x00,0x00]
|
||||
// VI: v_madmk_f16 v1, v2, 0x5400, v3 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x54,0x00,0x00]
|
||||
|
||||
|
||||
v_add_f16_e32 v1, 64.0, v2
|
||||
|
|
|
@ -250,13 +250,13 @@ v_bfm_b32_e64 v1, v2, v3
|
|||
// VI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c]
|
||||
v_mac_f32_e32 v1, v2, v3
|
||||
|
||||
// SICI: v_madmk_f32_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42]
|
||||
// VI: v_madmk_f32_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x2e,0x00,0x00,0x80,0x42]
|
||||
v_madmk_f32_e32 v1, v2, 64.0, v3
|
||||
// SICI: v_madmk_f32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x40,0x00,0x00,0x80,0x42]
|
||||
// VI: v_madmk_f32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x2e,0x00,0x00,0x80,0x42]
|
||||
v_madmk_f32 v1, v2, 64.0, v3
|
||||
|
||||
// SICI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42]
|
||||
// VI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
|
||||
v_madak_f32_e32 v1, v2, v3, 64.0
|
||||
// SICI: v_madak_f32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x42,0x00,0x00,0x80,0x42]
|
||||
// VI: v_madak_f32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
|
||||
v_madak_f32 v1, v2, v3, 64.0
|
||||
|
||||
// SICI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
|
||||
// VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
|
||||
|
@ -430,12 +430,12 @@ v_mac_f16_e32 v1, v2, v3
|
|||
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
// NOSICI: v_madmk_f16 v1, v2, 64.0, v3
|
||||
// VI: v_madmk_f16_e32 v1, v2, 0x5400, v3 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x54,0x00,0x00]
|
||||
// VI: v_madmk_f16 v1, v2, 0x5400, v3 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x54,0x00,0x00]
|
||||
v_madmk_f16 v1, v2, 64.0, v3
|
||||
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
// NOSICI: v_madak_f16 v1, v2, v3, 64.0
|
||||
// VI: v_madak_f16_e32 v1, v2, v3, 0x5400 ; encoding: [0x02,0x07,0x02,0x4a,0x00,0x54,0x00,0x00]
|
||||
// VI: v_madak_f16 v1, v2, v3, 0x5400 ; encoding: [0x02,0x07,0x02,0x4a,0x00,0x54,0x00,0x00]
|
||||
v_madak_f16 v1, v2, v3, 64.0
|
||||
|
||||
// NOSICI: error: instruction not supported on this GPU
|
||||
|
|
|
@ -44,11 +44,11 @@
|
|||
# VI: v_add_f16_e32 v1, 0, v3 ; encoding: [0x80,0x06,0x02,0x3e]
|
||||
0xff 0x06 0x02 0x3e 0x00 0x00 0x00 0x00
|
||||
|
||||
# VI: v_madmk_f16_e32 v1, v2, 0x41, v3 ; encoding: [0x02,0x07,0x02,0x48,0x41,0x00,0x00,0x00]
|
||||
# VI: v_madmk_f16 v1, v2, 0x41, v3 ; encoding: [0x02,0x07,0x02,0x48,0x41,0x00,0x00,0x00]
|
||||
0x02 0x07 0x02 0x48 0x41 0x00 0x00 0x00
|
||||
|
||||
# VI: v_madmk_f16_e32 v1, v2, 0x10041, v3 ; encoding: [0x02,0x07,0x02,0x48,0x41,0x00,0x01,0x00]
|
||||
# VI: v_madmk_f16 v1, v2, 0x10041, v3 ; encoding: [0x02,0x07,0x02,0x48,0x41,0x00,0x01,0x00]
|
||||
0x02 0x07 0x02 0x48 0x41 0x00 0x01 0x00
|
||||
|
||||
# VI: v_madmk_f16_e32 v1, v2, 0x1000041, v3 ; encoding: [0x02,0x07,0x02,0x48,0x41,0x00,0x00,0x01]
|
||||
# VI: v_madmk_f16 v1, v2, 0x1000041, v3 ; encoding: [0x02,0x07,0x02,0x48,0x41,0x00,0x00,0x01]
|
||||
0x02 0x07 0x02 0x48 0x41 0x00 0x00 0x01
|
||||
|
|
|
@ -78,10 +78,10 @@
|
|||
# VI: v_mac_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x2c]
|
||||
0x02 0x07 0x02 0x2c
|
||||
|
||||
# VI: v_madmk_f32_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x2e,0x00,0x00,0x80,0x42]
|
||||
# VI: v_madmk_f32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x2e,0x00,0x00,0x80,0x42]
|
||||
0x02 0x07 0x02 0x2e 0x00 0x00 0x80 0x42
|
||||
|
||||
# VI: v_madak_f32_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
|
||||
# VI: v_madak_f32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x30,0x00,0x00,0x80,0x42]
|
||||
0x02 0x07 0x02 0x30 0x00 0x00 0x80 0x42
|
||||
|
||||
# VI: v_bcnt_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8b,0xd2,0x02,0x07,0x02,0x00]
|
||||
|
@ -207,10 +207,10 @@
|
|||
# VI: v_mac_f16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x46]
|
||||
0x02 0x07 0x02 0x46
|
||||
|
||||
# VI: v_madmk_f16_e32 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x00,0x80,0x42]
|
||||
# VI: v_madmk_f16 v1, v2, 0x42800000, v3 ; encoding: [0x02,0x07,0x02,0x48,0x00,0x00,0x80,0x42]
|
||||
0x02 0x07 0x02 0x48 0x00 0x00 0x80 0x42
|
||||
|
||||
# VI: v_madak_f16_e32 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x4a,0x00,0x00,0x80,0x42]
|
||||
# VI: v_madak_f16 v1, v2, v3, 0x42800000 ; encoding: [0x02,0x07,0x02,0x4a,0x00,0x00,0x80,0x42]
|
||||
0x02 0x07 0x02 0x4a 0x00 0x00 0x80 0x42
|
||||
|
||||
# VI: v_add_u16_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
|
||||
|
|
Loading…
Reference in New Issue