[VP] ISD helper functions [VE] isel for vp_add, vp_and

This implements vp_add, vp_and for the VE target by lowering them to the VVP_* layer. We also add helper functions for VP SDNodes (isVPSDNode, getVPMaskIdx, getVPExplicitVectorLengthIdx). Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D93766
2021-01-08 13:53:18 +01:00 · 2021-01-08 13:53:18 +01:00 · 611d3c63f3
parent b0dc54e08a
commit 611d3c63f3
6 changed files with 136 additions and 36 deletions
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@ -1196,6 +1196,15 @@ static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500;
 /// For example ISD::AND for ISD::VECREDUCE_AND.
 NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode);

+/// Whether this is a vector-predicated Opcode.
+bool isVPOpcode(unsigned Opcode);
+
+/// The operand position of the vector mask.
+Optional<unsigned> getVPMaskIdx(unsigned Opcode);
+
+/// The operand position of the explicit vector length parameter.
+Optional<unsigned> getVPExplicitVectorLengthIdx(unsigned Opcode);
+
 //===--------------------------------------------------------------------===//
 /// MemIndexedMode enum - This enum defines the load / store indexed
 /// addressing modes.
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -377,6 +377,41 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
  }
 }

+bool ISD::isVPOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...)                                   \
+  case ISD::SDOPC:                                                             \
+    return true;
+#include "llvm/IR/VPIntrinsics.def"
+  }
+}
+
+/// The operand position of the vector mask.
+Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return None;
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, ...)        \
+  case ISD::SDOPC:                                                             \
+    return MASKPOS;
+#include "llvm/IR/VPIntrinsics.def"
+  }
+}
+
+/// The operand position of the explicit vector length parameter.
+Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return None;
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS)     \
+  case ISD::SDOPC:                                                             \
+    return EVLPOS;
+#include "llvm/IR/VPIntrinsics.def"
+  }
+}
+
 ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
  switch (ExtType) {
  case ISD::EXTLOAD:
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@ -301,6 +301,8 @@ void VETargetLowering::initVPUActions() {
    // TODO We will custom-widen into VVP_* nodes in the future. While we are
    // buildling the infrastructure for this, we only do this for legal vector
    // VTs.
+#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME)                                     \
+  setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
 #define ADD_VVP_OP(VVP_NAME, ISD_NAME)                                         \
  setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
 #include "VVPNodes.def"
@ -1666,7 +1668,11 @@ SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
 }

 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
-  switch (Op.getOpcode()) {
+  unsigned Opcode = Op.getOpcode();
+  if (ISD::isVPOpcode(Opcode))
+    return lowerToVVP(Op, DAG);
+
+  switch (Opcode) {
  default:
    llvm_unreachable("Should not custom lower this!");
  case ISD::ATOMIC_FENCE:
@ -2664,8 +2670,11 @@ bool VETargetLowering::hasAndNot(SDValue Y) const {
 }

 /// \returns the VVP_* SDNode opcode corresponsing to \p OC.
-static Optional<unsigned> getVVPOpcode(unsigned OC) {
-  switch (OC) {
+static Optional<unsigned> getVVPOpcode(unsigned Opcode) {
+  switch (Opcode) {
+#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME)                                       \
+  case ISD::VPOPC:                                                             \
+    return VEISD::VVPNAME;
 #define ADD_VVP_OP(VVPNAME, SDNAME)                                            \
  case VEISD::VVPNAME:                                                         \
  case ISD::SDNAME:                                                            \
@ -2677,27 +2686,41 @@ static Optional<unsigned> getVVPOpcode(unsigned OC) {

 SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
  // Can we represent this as a VVP node.
-  auto OCOpt = getVVPOpcode(Op->getOpcode());
-  if (!OCOpt.hasValue())
+  const unsigned Opcode = Op->getOpcode();
+  auto VVPOpcodeOpt = getVVPOpcode(Opcode);
+  if (!VVPOpcodeOpt.hasValue())
    return SDValue();
-  unsigned VVPOC = OCOpt.getValue();
+  unsigned VVPOpcode = VVPOpcodeOpt.getValue();
+  const bool FromVP = ISD::isVPOpcode(Opcode);

  // The representative and legalized vector type of this operation.
+  SDLoc DL(Op);
+  MVT MaskVT = MVT::v256i1; // TODO: packed mode.
  EVT OpVecVT = Op.getValueType();
  EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);

-  // Materialize the VL parameter.
-  SDLoc DL(Op);
-  SDValue AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32);
-  MVT MaskVT = MVT::v256i1;
-  SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32);
-  SDValue Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT,
-                             ConstTrue); // emit a VEISD::VEC_BROADCAST here.
+  SDValue AVL;
+  SDValue Mask;
+
+  if (FromVP) {
+    // All upstream VP SDNodes always have a mask and avl.
+    auto MaskIdx = ISD::getVPMaskIdx(Opcode).getValue();
+    auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode).getValue();
+    Mask = Op->getOperand(MaskIdx);
+    AVL = Op->getOperand(AVLIdx);
+
+  } else {
+    // Materialize the VL parameter.
+    AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32);
+    SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32);
+    Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT,
+                       ConstTrue); // emit a VEISD::VEC_BROADCAST here.
+  }

  // Categories we are interested in.
  bool IsBinaryOp = false;

-  switch (VVPOC) {
+  switch (VVPOpcode) {
 #define ADD_BINARY_VVP_OP(VVPNAME, ...)                                        \
  case VEISD::VVPNAME:                                                         \
    IsBinaryOp = true;                                                         \
@ -2707,7 +2730,7 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {

  if (IsBinaryOp) {
    assert(LegalVecVT.isSimple());
-    return DAG.getNode(VVPOC, DL, LegalVecVT, Op->getOperand(0),
+    return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0),
                       Op->getOperand(1), Mask, AVL);
  }
  llvm_unreachable("lowerToVVP called for unexpected SDNode.");
--- a/llvm/lib/Target/VE/VVPNodes.def
+++ b/llvm/lib/Target/VE/VVPNodes.def
@ -10,6 +10,13 @@
 //
 //===----------------------------------------------------------------------===//

+/// HANDLE_VP_TO_VVP(VPOPC, VVPOPC)
+/// \p  VPOPC is the VP_* SDNode opcode.
+/// \p  VVPOPC is the VVP_* SDNode opcode.
+#ifndef HANDLE_VP_TO_VVP
+#define HANDLE_VP_TO_VVP(VPOPC, VVPOPC)
+#endif
+
 /// ADD_VVP_OP(VVPNAME,SDNAME)
 /// \p VVPName is a VVP SDNode operator.
 /// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
@ -21,7 +28,7 @@
 /// \p VVPName is a VVP Binary operator.
 /// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
 #ifndef ADD_BINARY_VVP_OP
-#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y)
+#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y) HANDLE_VP_TO_VVP(VP_##Y, X)
 #endif

 // Integer arithmetic.
@ -29,5 +36,6 @@ ADD_BINARY_VVP_OP(VVP_ADD,ADD)

 ADD_BINARY_VVP_OP(VVP_AND,AND)

+#undef HANDLE_VP_TO_VVP
 #undef ADD_BINARY_VVP_OP
 #undef ADD_VVP_OP
--- a/llvm/test/CodeGen/VE/Vector/vp_add.ll
+++ b/llvm/test/CodeGen/VE/Vector/vp_add.ll
@ -1,16 +1,29 @@
-; REQUIRES: asserts
-; RUN: not --crash llc %s -march=ve -mattr=+vpu -o /dev/null |& FileCheck %s
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s

-; CHECK:  t{{[0-9]+}}: v256i32 = vp_add [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] 
-; CHECK:  [[A]]: v256i32
-; CHECK:  [[B]]: v256i32
-; CHECK:  [[MASK]]: v256i1
-; CHECK:  [[EVL]]: i32
+declare <256 x i32> @llvm.vp.add.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)

-define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+define fastcc <256 x i32> @test_vp_add_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_add_v256i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vadds.w.sx %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
  %r0 = call <256 x i32> @llvm.vp.add.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
  ret <256 x i32> %r0
 }

-; integer arith
-declare <256 x i32> @llvm.vp.add.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+declare <256 x i64> @llvm.vp.add.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_int_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_int_v256i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vadds.l %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.add.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}
+
--- a/llvm/test/CodeGen/VE/Vector/vp_and.ll
+++ b/llvm/test/CodeGen/VE/Vector/vp_and.ll
@ -1,16 +1,28 @@
-; REQUIRES: asserts
-; RUN: not --crash llc %s -march=ve -mattr=+vpu -o /dev/null |& FileCheck %s
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s

-; CHECK:  t{{[0-9]+}}: v256i32 = vp_and [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] 
-; CHECK:  [[A]]: v256i32
-; CHECK:  [[B]]: v256i32
-; CHECK:  [[MASK]]: v256i1
-; CHECK:  [[EVL]]: i32
+declare <256 x i32> @llvm.vp.and.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)

-define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+define fastcc <256 x i32> @test_vp_and_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_and_v256i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvand.lo %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
  %r0 = call <256 x i32> @llvm.vp.and.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
  ret <256 x i32> %r0
 }

-; integer arith
-declare <256 x i32> @llvm.vp.and.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
+
+declare <256 x i64> @llvm.vp.and.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
+
+define fastcc <256 x i64> @test_vp_int_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
+; CHECK-LABEL: test_vp_int_v256i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vand %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %r0 = call <256 x i64> @llvm.vp.and.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
+  ret <256 x i64> %r0
+}