[AMDGPU] Add 24-bit mulhi intrinsics in INTRINSIC_WO_CHAIN combine.

mul24 intrinsic's operands are simplified by
AMDGPUTargetLowering::performIntrinsicWOChainCombine(). This change adds
the mul24hi intrinsics in the combine since its operands can be
simplified like that of the mul24 intrinsics.

Differential Revision: https://reviews.llvm.org/D112702
This commit is contained in:
Abinav Puthan Purayil 2021-10-28 16:08:59 +05:30
parent 9f8e779b42
commit 2da6ef3664
2 changed files with 31 additions and 50 deletions

View File

@ -2897,8 +2897,22 @@ static SDValue simplifyMul24(SDNode *Node24,
unsigned NewOpcode = Node24->getOpcode();
if (IsIntrin) {
unsigned IID = cast<ConstantSDNode>(Node24->getOperand(0))->getZExtValue();
NewOpcode = IID == Intrinsic::amdgcn_mul_i24 ?
AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
switch (IID) {
case Intrinsic::amdgcn_mul_i24:
NewOpcode = AMDGPUISD::MUL_I24;
break;
case Intrinsic::amdgcn_mul_u24:
NewOpcode = AMDGPUISD::MUL_U24;
break;
case Intrinsic::amdgcn_mulhi_i24:
NewOpcode = AMDGPUISD::MULHI_I24;
break;
case Intrinsic::amdgcn_mulhi_u24:
NewOpcode = AMDGPUISD::MULHI_U24;
break;
default:
llvm_unreachable("Expected 24-bit mul intrinsic");
}
}
APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
@ -3107,6 +3121,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
switch (IID) {
case Intrinsic::amdgcn_mul_i24:
case Intrinsic::amdgcn_mul_u24:
case Intrinsic::amdgcn_mulhi_i24:
case Intrinsic::amdgcn_mulhi_u24:
return simplifyMul24(N, DCI);
case Intrinsic::amdgcn_fract:
case Intrinsic::amdgcn_rsq:

View File

@ -575,11 +575,9 @@ define i64 @test_umul48_i64(i64 %lhs, i64 %rhs) {
; GCN-LABEL: test_umul48_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s4, 0xffffff
; GCN-NEXT: v_and_b32_e32 v1, s4, v0
; GCN-NEXT: v_and_b32_e32 v3, s4, v2
; GCN-NEXT: v_mul_u32_u24_e32 v0, v0, v2
; GCN-NEXT: v_mul_hi_u32_u24_e32 v1, v1, v3
; GCN-NEXT: v_mul_u32_u24_e32 v3, v0, v2
; GCN-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v2
; GCN-NEXT: v_mov_b32_e32 v0, v3
; GCN-NEXT: s_setpc_b64 s[30:31]
%lhs24 = and i64 %lhs, 16777215
%rhs24 = and i64 %rhs, 16777215
@ -588,49 +586,16 @@ define i64 @test_umul48_i64(i64 %lhs, i64 %rhs) {
}
define <2 x i64> @test_umul48_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; SI-LABEL: test_umul48_v2i64:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, 0xffffff
; SI-NEXT: v_mul_u32_u24_e32 v5, v0, v4
; SI-NEXT: v_mul_u32_u24_e32 v7, v2, v6
; SI-NEXT: v_and_b32_e32 v2, s4, v2
; SI-NEXT: v_and_b32_e32 v0, s4, v0
; SI-NEXT: v_and_b32_e32 v3, s4, v6
; SI-NEXT: v_and_b32_e32 v1, s4, v4
; SI-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1
; SI-NEXT: v_mul_hi_u32_u24_e32 v3, v2, v3
; SI-NEXT: v_mov_b32_e32 v0, v5
; SI-NEXT: v_mov_b32_e32 v2, v7
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: test_umul48_v2i64:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: s_mov_b32 s4, 0xffffff
; VI-NEXT: v_and_b32_e32 v3, s4, v2
; VI-NEXT: v_and_b32_e32 v1, s4, v0
; VI-NEXT: v_and_b32_e32 v5, s4, v6
; VI-NEXT: v_and_b32_e32 v7, s4, v4
; VI-NEXT: v_mul_u32_u24_e32 v0, v0, v4
; VI-NEXT: v_mul_hi_u32_u24_e32 v1, v1, v7
; VI-NEXT: v_mul_u32_u24_e32 v2, v2, v6
; VI-NEXT: v_mul_hi_u32_u24_e32 v3, v3, v5
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: test_umul48_v2i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s4, 0xffffff
; GFX9-NEXT: v_and_b32_e32 v3, s4, v2
; GFX9-NEXT: v_and_b32_e32 v1, s4, v0
; GFX9-NEXT: v_and_b32_e32 v5, s4, v6
; GFX9-NEXT: v_and_b32_e32 v7, s4, v4
; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v4
; GFX9-NEXT: v_mul_hi_u32_u24_e32 v1, v1, v7
; GFX9-NEXT: v_mul_u32_u24_e32 v2, v2, v6
; GFX9-NEXT: v_mul_hi_u32_u24_e32 v3, v3, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: test_umul48_v2i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mul_u32_u24_e32 v5, v0, v4
; GCN-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v4
; GCN-NEXT: v_mul_u32_u24_e32 v4, v2, v6
; GCN-NEXT: v_mul_hi_u32_u24_e32 v3, v2, v6
; GCN-NEXT: v_mov_b32_e32 v0, v5
; GCN-NEXT: v_mov_b32_e32 v2, v4
; GCN-NEXT: s_setpc_b64 s[30:31]
%lhs24 = and <2 x i64> %lhs, <i64 16777215, i64 16777215>
%rhs24 = and <2 x i64> %rhs, <i64 16777215, i64 16777215>
%mul = mul <2 x i64> %lhs24, %rhs24