[VE] MUL,SUB,OR,XOR v256i32|64 isel

v256i32|i64 isel patterns and tests. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D115643
2021-12-14 13:23:34 +01:00 · 2021-12-14 13:23:34 +01:00 · 6847379e89
parent 12d42653b3
commit 6847379e89
7 changed files with 120 additions and 40 deletions
--- a/llvm/lib/Target/VE/VVPInstrInfo.td
+++ b/llvm/lib/Target/VE/VVPInstrInfo.td
@ -40,7 +40,18 @@ class vvp_commutative<SDNode RootOp> :
 def vvp_add    : SDNode<"VEISD::VVP_ADD",  SDTIntBinOpVVP>;
 def c_vvp_add  : vvp_commutative<vvp_add>;

+def vvp_sub    : SDNode<"VEISD::VVP_SUB",  SDTIntBinOpVVP>;
+
+def vvp_mul    : SDNode<"VEISD::VVP_MUL",  SDTIntBinOpVVP>;
+def c_vvp_mul  : vvp_commutative<vvp_mul>;
+
 def vvp_and    : SDNode<"VEISD::VVP_AND",  SDTIntBinOpVVP>;
 def c_vvp_and  : vvp_commutative<vvp_and>;

+def vvp_or     : SDNode<"VEISD::VVP_OR",  SDTIntBinOpVVP>;
+def c_vvp_or   : vvp_commutative<vvp_or>;
+
+def vvp_xor    : SDNode<"VEISD::VVP_XOR",  SDTIntBinOpVVP>;
+def c_vvp_xor  : vvp_commutative<vvp_xor>;
+
 // } Binary Operators
--- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@ -87,6 +87,18 @@ multiclass Binary_rv_vv_ShortLong<
 defm : Binary_rv_vv_ShortLong<c_vvp_add,
                              i64, v256i64, "VADDSL",
                              i32, v256i32, "VADDSWSX">;
+defm : Binary_rv_vv_ShortLong<vvp_sub,
+                              i64, v256i64, "VSUBSL",
+                              i32, v256i32, "VSUBSWSX">;
+defm : Binary_rv_vv_ShortLong<c_vvp_mul,
+                              i64, v256i64, "VMULSL",
+                              i32, v256i32, "VMULSWSX">;
 defm : Binary_rv_vv_ShortLong<c_vvp_and,
                              i64, v256i64, "VAND",
                              i32, v256i32, "PVANDLO">;
+defm : Binary_rv_vv_ShortLong<c_vvp_or,
+                              i64, v256i64, "VOR",
+                              i32, v256i32, "PVORLO">;
+defm : Binary_rv_vv_ShortLong<c_vvp_xor,
+                              i64, v256i64, "VXOR",
+                              i32, v256i32, "PVXORLO">;
--- a/llvm/lib/Target/VE/VVPNodes.def
+++ b/llvm/lib/Target/VE/VVPNodes.def
@ -33,8 +33,12 @@

 // Integer arithmetic.
 ADD_BINARY_VVP_OP(VVP_ADD,ADD)
+ADD_BINARY_VVP_OP(VVP_SUB,SUB)
+ADD_BINARY_VVP_OP(VVP_MUL,MUL)

 ADD_BINARY_VVP_OP(VVP_AND,AND)
+ADD_BINARY_VVP_OP(VVP_OR,OR)
+ADD_BINARY_VVP_OP(VVP_XOR,XOR)

 #undef HANDLE_VP_TO_VVP
 #undef ADD_BINARY_VVP_OP
--- a/llvm/test/CodeGen/VE/Vector/vp_mul.ll
+++ b/llvm/test/CodeGen/VE/Vector/vp_mul.ll
@ -1,16 +1,29 @@
-; REQUIRES: asserts
-; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s

-; CHECK:  t{{[0-9]+}}: v256i32 = vp_mul [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] 
-; CHECK:  [[A]]: v256i32
-; CHECK:  [[B]]: v256i32
-; CHECK:  [[MASK]]: v256i1
-; CHECK:  [[EVL]]: i32
+declare <256 x i32> @llvm.vp.mul.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)

-define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+define fastcc <256 x i32> @test_vp_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vmuls.w.sx %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
  %r0 = call <256 x i32> @llvm.vp.mul.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
  ret <256 x i32> %r0
 }

-; integer arith
-declare <256 x i32> @llvm.vp.mul.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+declare <256 x i64> @llvm.vp.mul.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vmuls.l %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.mul.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
--- a/llvm/test/CodeGen/VE/Vector/vp_or.ll
+++ b/llvm/test/CodeGen/VE/Vector/vp_or.ll
@ -1,16 +1,30 @@
-; REQUIRES: asserts
-; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s

-; CHECK:  t{{[0-9]+}}: v256i32 = vp_or [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] 
-; CHECK:  [[A]]: v256i32
-; CHECK:  [[B]]: v256i32
-; CHECK:  [[MASK]]: v256i1
-; CHECK:  [[EVL]]: i32
+declare <256 x i32> @llvm.vp.or.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)

-define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+define fastcc <256 x i32> @test_vp_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvor.lo %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
  %r0 = call <256 x i32> @llvm.vp.or.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
  ret <256 x i32> %r0
 }

-; integer arith
-declare <256 x i32> @llvm.vp.or.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+declare <256 x i64> @llvm.vp.or.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vor %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.or.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
+
--- a/llvm/test/CodeGen/VE/Vector/vp_sub.ll
+++ b/llvm/test/CodeGen/VE/Vector/vp_sub.ll
@ -1,16 +1,29 @@
-; REQUIRES: asserts
-; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s

-; CHECK:  t{{[0-9]+}}: v256i32 = vp_sub [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] 
-; CHECK:  [[A]]: v256i32
-; CHECK:  [[B]]: v256i32
-; CHECK:  [[MASK]]: v256i1
-; CHECK:  [[EVL]]: i32
+declare <256 x i32> @llvm.vp.sub.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)

-define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+define fastcc <256 x i32> @test_vp_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vsubs.w.sx %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
  %r0 = call <256 x i32> @llvm.vp.sub.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
  ret <256 x i32> %r0
 }

-; integer arith
-declare <256 x i32> @llvm.vp.sub.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+declare <256 x i64> @llvm.vp.sub.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vsubs.l %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.sub.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
--- a/llvm/test/CodeGen/VE/Vector/vp_xor.ll
+++ b/llvm/test/CodeGen/VE/Vector/vp_xor.ll
@ -1,16 +1,29 @@
-; REQUIRES: asserts
-; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s

-; CHECK:  t{{[0-9]+}}: v256i32 = vp_xor [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] 
-; CHECK:  [[A]]: v256i32
-; CHECK:  [[B]]: v256i32
-; CHECK:  [[MASK]]: v256i1
-; CHECK:  [[EVL]]: i32
+declare <256 x i32> @llvm.vp.xor.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)

-define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+define fastcc <256 x i32> @test_vp_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvxor.lo %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
  %r0 = call <256 x i32> @llvm.vp.xor.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
  ret <256 x i32> %r0
 }

-; integer arith
-declare <256 x i32> @llvm.vp.xor.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+declare <256 x i64> @llvm.vp.xor.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_v256i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vxor %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.xor.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}