[PowerPC] Exploit the High Order Vector Multiply Instructions on Power10

This patch aims to exploit the following vector multiply high instructions on Power10.
vmulhsw VRT, VRA, VRB
vmulhsd VRT, VRA, VRB
vmulhuw VRT, VRA, VRB
vmulhud VRT, VRA, VRB

Differential Revision: https://reviews.llvm.org/D82584
This commit is contained in:
Amy Kwan 2020-07-24 20:57:57 -05:00
parent e937840dbd
commit 739cd2638b
3 changed files with 71 additions and 4 deletions

View File

@ -811,6 +811,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.isISA3_1()) {
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
setOperationAction(ISD::UDIV, MVT::v4i32, Legal);

View File

@ -976,13 +976,17 @@ let Predicates = [IsISA3_1] in {
"vmulld $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>;
def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmulhsw $vD, $vA, $vB", IIC_VecGeneral, []>;
"vmulhsw $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>;
def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmulhuw $vD, $vA, $vB", IIC_VecGeneral, []>;
"vmulhuw $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>;
def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmulhsd $vD, $vA, $vB", IIC_VecGeneral, []>;
"vmulhsd $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>;
def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmulhud $vD, $vA, $vB", IIC_VecGeneral, []>;
"vmulhud $vD, $vA, $vB", IIC_VecGeneral,
[(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>;
def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmodsw $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>;

View File

@ -7,6 +7,9 @@
; RUN: FileCheck %s
; This test case aims to test the vector multiply instructions on Power10.
; This includes the low order and high order versions of vector multiply.
; The low order version operates on doublewords, whereas the high order version
; operates on signed and unsigned words and doublewords.
define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vmulld:
@ -17,3 +20,59 @@ entry:
%mul = mul <2 x i64> %b, %a
ret <2 x i64> %mul
}
define <2 x i64> @test_vmulhsd(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vmulhsd:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmulhsd v2, v3, v2
; CHECK-NEXT: blr
entry:
%0 = sext <2 x i64> %a to <2 x i128>
%1 = sext <2 x i64> %b to <2 x i128>
%mul = mul <2 x i128> %1, %0
%shr = lshr <2 x i128> %mul, <i128 64, i128 64>
%tr = trunc <2 x i128> %shr to <2 x i64>
ret <2 x i64> %tr
}
define <2 x i64> @test_vmulhud(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test_vmulhud:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmulhud v2, v3, v2
; CHECK-NEXT: blr
entry:
%0 = zext <2 x i64> %a to <2 x i128>
%1 = zext <2 x i64> %b to <2 x i128>
%mul = mul <2 x i128> %1, %0
%shr = lshr <2 x i128> %mul, <i128 64, i128 64>
%tr = trunc <2 x i128> %shr to <2 x i64>
ret <2 x i64> %tr
}
define <4 x i32> @test_vmulhsw(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vmulhsw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmulhsw v2, v3, v2
; CHECK-NEXT: blr
entry:
%0 = sext <4 x i32> %a to <4 x i64>
%1 = sext <4 x i32> %b to <4 x i64>
%mul = mul <4 x i64> %1, %0
%shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
%tr = trunc <4 x i64> %shr to <4 x i32>
ret <4 x i32> %tr
}
define <4 x i32> @test_vmulhuw(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test_vmulhuw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmulhuw v2, v3, v2
; CHECK-NEXT: blr
entry:
%0 = zext <4 x i32> %a to <4 x i64>
%1 = zext <4 x i32> %b to <4 x i64>
%mul = mul <4 x i64> %1, %0
%shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
%tr = trunc <4 x i64> %shr to <4 x i32>
ret <4 x i32> %tr
}