forked from OSchip/llvm-project
[DAGCombiner] Add support for mulhi const folding in DAGCombiner
Differential Revision: https://reviews.llvm.org/D103323 Change-Id: I4ffaaa32301795ba8a339567a68e77fe0862b869
This commit is contained in:
parent
bf17ee1950
commit
83cb9632a1
|
@ -4462,6 +4462,10 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
|
||||||
return DAG.getConstant(0, DL, VT);
|
return DAG.getConstant(0, DL, VT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fold (mulhs c1, c2)
|
||||||
|
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
|
||||||
|
return C;
|
||||||
|
|
||||||
// fold (mulhs x, 0) -> 0
|
// fold (mulhs x, 0) -> 0
|
||||||
if (isNullConstant(N1))
|
if (isNullConstant(N1))
|
||||||
return N1;
|
return N1;
|
||||||
|
@ -4510,6 +4514,10 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
|
||||||
return DAG.getConstant(0, DL, VT);
|
return DAG.getConstant(0, DL, VT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fold (mulhu c1, c2)
|
||||||
|
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
|
||||||
|
return C;
|
||||||
|
|
||||||
// fold (mulhu x, 0) -> 0
|
// fold (mulhu x, 0) -> 0
|
||||||
if (isNullConstant(N1))
|
if (isNullConstant(N1))
|
||||||
return N1;
|
return N1;
|
||||||
|
|
|
@ -5079,6 +5079,18 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
|
||||||
if (!C2.getBoolValue())
|
if (!C2.getBoolValue())
|
||||||
break;
|
break;
|
||||||
return C1.srem(C2);
|
return C1.srem(C2);
|
||||||
|
case ISD::MULHS: {
|
||||||
|
unsigned FullWidth = C1.getBitWidth() * 2;
|
||||||
|
APInt C1Ext = C1.sext(FullWidth);
|
||||||
|
APInt C2Ext = C2.sext(FullWidth);
|
||||||
|
return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
|
||||||
|
}
|
||||||
|
case ISD::MULHU: {
|
||||||
|
unsigned FullWidth = C1.getBitWidth() * 2;
|
||||||
|
APInt C1Ext = C1.zext(FullWidth);
|
||||||
|
APInt C2Ext = C2.zext(FullWidth);
|
||||||
|
return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return llvm::None;
|
return llvm::None;
|
||||||
}
|
}
|
||||||
|
|
|
@ -203,15 +203,9 @@ bb:
|
||||||
|
|
||||||
define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
|
define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
|
||||||
; GCN-LABEL: v_test_udiv64_mulhi_fold
|
; GCN-LABEL: v_test_udiv64_mulhi_fold
|
||||||
; GFX1030: s_mov_b32 [[VAL1:s[0-9]+]], 0xa9000000
|
; GFX1030: s_add_u32 [[VAL:s[0-9]+]], 0x4237, s{{[0-9]+}}
|
||||||
; GFX1030: s_brev_b32 [[VAL2:s[0-9]+]], 6
|
; GFX1030-NOT: s_mul_hi_u32
|
||||||
; GFX1030: s_movk_i32 [[VAL3:s[0-9]+]], 0x500
|
; GFX1030: v_add_co_u32 v{{[0-9]+}}, [[VAL]], 0xa9000000, [[VAL]]
|
||||||
; GFX1030: s_mul_hi_u32 s7, [[VAL1]], [[VAL2]]
|
|
||||||
; GFX1030: s_mov_b32 [[VAL4:s[0-9]+]], 0xa7c5
|
|
||||||
; GFX1030: s_mul_hi_u32 s8, [[VAL1]], [[VAL3]]
|
|
||||||
; GFX1030: s_mul_hi_u32 s5, [[VAL4]], [[VAL2]]
|
|
||||||
; GFX1030: s_mul_hi_u32 s6, [[VAL4]], [[VAL3]]
|
|
||||||
; GFX1030: v_add_co_u32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
|
|
||||||
%d = udiv i64 %arg, 100000
|
%d = udiv i64 %arg, 100000
|
||||||
ret i64 %d
|
ret i64 %d
|
||||||
}
|
}
|
||||||
|
|
|
@ -2190,14 +2190,12 @@ define <8 x i64> @sext_mulhsw_v8i16_ashr_i64(<8 x i16> %a, <8 x i16> %b) {
|
||||||
define <8 x i16> @sse2_pmulh_w_const(<8 x i16> %a0, <8 x i16> %a1) {
|
define <8 x i16> @sse2_pmulh_w_const(<8 x i16> %a0, <8 x i16> %a1) {
|
||||||
; SSE-LABEL: sse2_pmulh_w_const:
|
; SSE-LABEL: sse2_pmulh_w_const:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65535,65534,65533,65532,65531,65530,65529,0]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,65535,65535,65535,65535,65535,65535,0]
|
||||||
; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: sse2_pmulh_w_const:
|
; AVX-LABEL: sse2_pmulh_w_const:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65535,65534,65533,65532,65531,65530,65529,0]
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,65535,65535,65535,65535,65535,0]
|
||||||
; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0>, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
|
%res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0>, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
|
@ -2207,14 +2205,12 @@ declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>)
|
||||||
define <8 x i16> @sse2_pmulhu_w_const(<8 x i16> %a0, <8 x i16> %a1) {
|
define <8 x i16> @sse2_pmulhu_w_const(<8 x i16> %a0, <8 x i16> %a1) {
|
||||||
; SSE-LABEL: sse2_pmulhu_w_const:
|
; SSE-LABEL: sse2_pmulhu_w_const:
|
||||||
; SSE: # %bb.0:
|
; SSE: # %bb.0:
|
||||||
; SSE-NEXT: movdqa {{.*#+}} xmm0 = [65535,65534,65533,65532,65531,65530,65529,0]
|
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,1,2,3,4,5,0]
|
||||||
; SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: sse2_pmulhu_w_const:
|
; AVX-LABEL: sse2_pmulhu_w_const:
|
||||||
; AVX: # %bb.0:
|
; AVX: # %bb.0:
|
||||||
; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [65535,65534,65533,65532,65531,65530,65529,0]
|
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,1,2,3,4,5,0]
|
||||||
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
|
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0>, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
|
%res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0>, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
|
|
Loading…
Reference in New Issue