[PowerPC] Exploit the High Order Vector Multiply Instructions on Power10

This patch aims to exploit the following vector multiply high instructions on Power10. vmulhsw VRT, VRA, VRB vmulhsd VRT, VRA, VRB vmulhuw VRT, VRA, VRB vmulhud VRT, VRA, VRB Differential Revision: https://reviews.llvm.org/D82584
2020-07-24 20:57:57 -05:00 · 2020-07-24 20:57:57 -05:00 · 739cd2638b
parent e937840dbd
commit 739cd2638b
3 changed files with 71 additions and 4 deletions
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -811,6 +811,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,

    if (Subtarget.isISA3_1()) {
      setOperationAction(ISD::MUL, MVT::v2i64, Legal);
+      setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
+      setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
+      setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
+      setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
      setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
      setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
      setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@ -976,13 +976,17 @@ let Predicates = [IsISA3_1] in {
                        "vmulld $vD, $vA, $vB", IIC_VecGeneral,
                        [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>;
  def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmulhsw $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vmulhsw $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>;
  def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmulhuw $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vmulhuw $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>;
  def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmulhsd $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vmulhsd $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>;
  def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
-                         "vmulhud $vD, $vA, $vB", IIC_VecGeneral, []>;
+                         "vmulhud $vD, $vA, $vB", IIC_VecGeneral,
+                         [(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>;
  def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                        "vmodsw $vD, $vA, $vB", IIC_VecGeneral,
                        [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>;
--- a/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll
@ -7,6 +7,9 @@
 ; RUN:   FileCheck %s

 ; This test case aims to test the vector multiply instructions on Power10.
+; This includes the low order and high order versions of vector multiply.
+; The low order version operates on doublewords, whereas the high order version
+; operates on signed and unsigned words and doublewords.

 define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vmulld:
@ -17,3 +20,59 @@ entry:
  %mul = mul <2 x i64> %b, %a
  ret <2 x i64> %mul
 }
+
+define <2 x i64> @test_vmulhsd(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmulhsd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhsd v2, v3, v2
+; CHECK-NEXT:    blr
+entry:
+  %0 = sext <2 x i64> %a to <2 x i128>
+  %1 = sext <2 x i64> %b to <2 x i128>
+  %mul = mul <2 x i128> %1, %0
+  %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
+  %tr = trunc <2 x i128> %shr to <2 x i64>
+  ret <2 x i64> %tr
+}
+
+define <2 x i64> @test_vmulhud(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmulhud:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhud v2, v3, v2
+; CHECK-NEXT:    blr
+entry:
+  %0 = zext <2 x i64> %a to <2 x i128>
+  %1 = zext <2 x i64> %b to <2 x i128>
+  %mul = mul <2 x i128> %1, %0
+  %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
+  %tr = trunc <2 x i128> %shr to <2 x i64>
+  ret <2 x i64> %tr
+}
+
+define <4 x i32> @test_vmulhsw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmulhsw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhsw v2, v3, v2
+; CHECK-NEXT:    blr
+entry:
+  %0 = sext <4 x i32> %a to <4 x i64>
+  %1 = sext <4 x i32> %b to <4 x i64>
+  %mul = mul <4 x i64> %1, %0
+  %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
+  %tr = trunc <4 x i64> %shr to <4 x i32>
+  ret <4 x i32> %tr
+}
+
+define <4 x i32> @test_vmulhuw(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmulhuw:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmulhuw v2, v3, v2
+; CHECK-NEXT:    blr
+entry:
+  %0 = zext <4 x i32> %a to <4 x i64>
+  %1 = zext <4 x i32> %b to <4 x i64>
+  %mul = mul <4 x i64> %1, %0
+  %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
+  %tr = trunc <4 x i64> %shr to <4 x i32>
+  ret <4 x i32> %tr
+}