[AMDGPU][MC][GFX8+] Enabled clamp for v_mul_i32_i24_e64 and v_mul_u32_u24_e64

See bug 45925: https://bugs.llvm.org/show_bug.cgi?id=45925

Reviewers: arsenm, rampitec

Differential Revision: https://reviews.llvm.org/D80287
This commit is contained in:
Dmitry Preobrazhensky 2020-05-22 13:54:59 +03:00
parent bf897e6ea1
commit 933ebc4078
6 changed files with 35 additions and 5 deletions

View File

@ -467,9 +467,9 @@ defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>; defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>; defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>; defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>; defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>; defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;

View File

@ -14,7 +14,7 @@ body: |
; GCN: liveins: $sgpr0, $vgpr0 ; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] ; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]]
%0:sgpr(s32) = COPY $sgpr0 %0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = COPY $vgpr0
@ -35,7 +35,7 @@ body: |
; GCN: liveins: $sgpr0, $vgpr0 ; GCN: liveins: $sgpr0, $vgpr0
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] ; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]]
%0:vgpr(s32) = COPY $vgpr0 %0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr0
@ -56,7 +56,7 @@ body: |
; GCN: liveins: $vgpr0, $vgpr1 ; GCN: liveins: $vgpr0, $vgpr1
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec ; GCN: [[V_MUL_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e64 [[COPY]], [[COPY1]], 0, implicit $exec
; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]] ; GCN: S_ENDPGM 0, implicit [[V_MUL_U32_U24_e64_]]
%0:vgpr(s32) = COPY $vgpr0 %0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1 %1:vgpr(s32) = COPY $vgpr1

View File

@ -39551,6 +39551,9 @@ v_mul_i32_i24_e64 v5, v1, 0.5
v_mul_i32_i24_e64 v5, v1, -4.0 v_mul_i32_i24_e64 v5, v1, -4.0
// GFX10: encoding: [0x05,0x00,0x09,0xd5,0x01,0xef,0x01,0x00] // GFX10: encoding: [0x05,0x00,0x09,0xd5,0x01,0xef,0x01,0x00]
v_mul_i32_i24_e64 v5, v1, -4.0 clamp
// GFX10: encoding: [0x05,0x80,0x09,0xd5,0x01,0xef,0x01,0x00]
v_mul_i32_i24_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD v_mul_i32_i24_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
// GFX10: encoding: [0xf9,0x04,0x0a,0x12,0x01,0x06,0x06,0x06] // GFX10: encoding: [0xf9,0x04,0x0a,0x12,0x01,0x06,0x06,0x06]
@ -40187,6 +40190,9 @@ v_mul_u32_u24_e64 v5, v1, 0.5
v_mul_u32_u24_e64 v5, v1, -4.0 v_mul_u32_u24_e64 v5, v1, -4.0
// GFX10: encoding: [0x05,0x00,0x0b,0xd5,0x01,0xef,0x01,0x00] // GFX10: encoding: [0x05,0x00,0x0b,0xd5,0x01,0xef,0x01,0x00]
v_mul_u32_u24_e64 v5, v1, -4.0 clamp
// GFX10: encoding: [0x05,0x80,0x0b,0xd5,0x01,0xef,0x01,0x00]
v_mul_u32_u24_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD v_mul_u32_u24_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
// GFX10: encoding: [0xf9,0x04,0x0a,0x16,0x01,0x06,0x06,0x06] // GFX10: encoding: [0xf9,0x04,0x0a,0x16,0x01,0x06,0x06,0x06]

View File

@ -258,6 +258,18 @@ v_mul_i32_i24 v1, v3, s5
// SICI: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00] // SICI: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x12,0xd2,0x03,0x0b,0x00,0x00]
// VI: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd1,0x03,0x0b,0x00,0x00] // VI: v_mul_i32_i24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x06,0xd1,0x03,0x0b,0x00,0x00]
v_mul_i32_i24 v1, v3, s5 clamp
// NOSICI: error: integer clamping is not supported on this GPU
// VI: v_mul_i32_i24_e64 v1, v3, s5 clamp ; encoding: [0x01,0x80,0x06,0xd1,0x03,0x0b,0x00,0x00]
v_mul_u32_u24 v1, v3, s5
// SICI: v_mul_u32_u24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x16,0xd2,0x03,0x0b,0x00,0x00]
// VI: v_mul_u32_u24_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x08,0xd1,0x03,0x0b,0x00,0x00]
v_mul_u32_u24 v1, v3, s5 clamp
// NOSICI: error: integer clamping is not supported on this GPU
// VI: v_mul_u32_u24_e64 v1, v3, s5 clamp ; encoding: [0x01,0x80,0x08,0xd1,0x03,0x0b,0x00,0x00]
v_mac_f32_e64 v0, v1, v2 v_mac_f32_e64 v0, v1, v2
// SICI: v_mac_f32_e64 v0, v1, v2 ; encoding: [0x00,0x00,0x3e,0xd2,0x01,0x05,0x02,0x00] // SICI: v_mac_f32_e64 v0, v1, v2 ; encoding: [0x00,0x00,0x3e,0xd2,0x01,0x05,0x02,0x00]
// VI: v_mac_f32_e64 v0, v1, v2 ; encoding: [0x00,0x00,0x16,0xd1,0x01,0x05,0x02,0x00] // VI: v_mac_f32_e64 v0, v1, v2 ; encoding: [0x00,0x00,0x16,0xd1,0x01,0x05,0x02,0x00]

View File

@ -88434,6 +88434,9 @@
# GFX10: v_mul_i32_i24_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x09,0xd5,0x6a,0x04,0x02,0x00] # GFX10: v_mul_i32_i24_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x09,0xd5,0x6a,0x04,0x02,0x00]
0x05,0x00,0x09,0xd5,0x6a,0x04,0x02,0x00 0x05,0x00,0x09,0xd5,0x6a,0x04,0x02,0x00
# GFX10: v_mul_i32_i24_e64 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x09,0xd5,0x6a,0x04,0x02,0x00]
0x05,0x80,0x09,0xd5,0x6a,0x04,0x02,0x00
# GFX10: v_mul_i32_i24_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0xfe,0x13,0x01,0x06,0x06,0x06] # GFX10: v_mul_i32_i24_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0xfe,0x13,0x01,0x06,0x06,0x06]
0xf9,0x04,0xfe,0x13,0x01,0x06,0x06,0x06 0xf9,0x04,0xfe,0x13,0x01,0x06,0x06,0x06
@ -89262,6 +89265,9 @@
# GFX10: v_mul_u32_u24_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0b,0xd5,0x6a,0x04,0x02,0x00] # GFX10: v_mul_u32_u24_e64 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x0b,0xd5,0x6a,0x04,0x02,0x00]
0x05,0x00,0x0b,0xd5,0x6a,0x04,0x02,0x00 0x05,0x00,0x0b,0xd5,0x6a,0x04,0x02,0x00
# GFX10: v_mul_u32_u24_e64 v5, vcc_lo, v2 clamp ; encoding: [0x05,0x80,0x0b,0xd5,0x6a,0x04,0x02,0x00]
0x05,0x80,0x0b,0xd5,0x6a,0x04,0x02,0x00
# GFX10: v_mul_u32_u24_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0xfe,0x17,0x01,0x06,0x06,0x06] # GFX10: v_mul_u32_u24_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0xfe,0x17,0x01,0x06,0x06,0x06]
0xf9,0x04,0xfe,0x17,0x01,0x06,0x06,0x06 0xf9,0x04,0xfe,0x17,0x01,0x06,0x06,0x06

View File

@ -27,12 +27,18 @@
# VI: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c] # VI: v_mul_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0c]
0x02 0x07 0x02 0x0c 0x02 0x07 0x02 0x0c
# VI: v_mul_i32_i24_e64 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x06,0xd1,0x02,0x07,0x02,0x00]
0x01 0x80 0x06 0xd1 0x02 0x07 0x02 0x00
# VI: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e] # VI: v_mul_hi_i32_i24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x0e]
0x02 0x07 0x02 0x0e 0x02 0x07 0x02 0x0e
# VI: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10] # VI: v_mul_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x10]
0x02 0x07 0x02 0x10 0x02 0x07 0x02 0x10
# VI: v_mul_u32_u24_e64 v1, v3, s5 clamp ; encoding: [0x01,0x80,0x08,0xd1,0x03,0x0b,0x00,0x00]
0x01 0x80 0x08 0xd1 0x03 0x0b 0x00 0x00
# VI: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12] # VI: v_mul_hi_u32_u24_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x12]
0x02 0x07 0x02 0x12 0x02 0x07 0x02 0x12