[VE] VE Vector Predicated SDNode, vector add isel and tests

VE Vector Predicated (VVP) SDNodes form an intermediate layer between VE
vector instructions and the initial SDNodes.

We introduce 'vvp_add' with isel and tests as the first of these VVP
nodes. VVP nodes have a mask and explicit vector length operand, which
we will make proper use of later.

Reviewed By: kaz7

Differential Revision: https://reviews.llvm.org/D91802
This commit is contained in:
Simon Moll 2020-11-23 15:33:10 +01:00
parent 619630f997
commit b955c7e630
8 changed files with 371 additions and 5 deletions

View File

@ -254,8 +254,17 @@ void VETargetLowering::initSPUActions() {
}
void VETargetLowering::initVPUActions() {
for (MVT LegalVecVT : AllVectorVTs)
for (MVT LegalVecVT : AllVectorVTs) {
setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
// Translate all vector instructions with legal element types to VVP_*
// nodes.
// TODO We will custom-widen into VVP_* nodes in the future. While we are
// buildling the infrastructure for this, we only do this for legal vector
// VTs.
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
#include "VVPNodes.def"
}
}
SDValue
@ -846,6 +855,10 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
TARGET_NODE_CASE(VEC_BROADCAST)
TARGET_NODE_CASE(RET_FLAG)
TARGET_NODE_CASE(GLOBAL_BASE_REG)
// Register the VVP_* SDNodes.
#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
#include "VVPNodes.def"
}
#undef TARGET_NODE_CASE
return nullptr;
@ -1403,6 +1416,10 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerVASTART(Op, DAG);
case ISD::VAARG:
return lowerVAARG(Op, DAG);
#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
#include "VVPNodes.def"
return lowerToVVP(Op, DAG);
}
}
/// } Custom Lower
@ -1665,3 +1682,53 @@ bool VETargetLowering::hasAndNot(SDValue Y) const {
// It's ok for generic registers.
return true;
}
/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
static Optional<unsigned> getVVPOpcode(unsigned OC) {
switch (OC) {
#define ADD_VVP_OP(VVPNAME, SDNAME) \
case VEISD::VVPNAME: \
case ISD::SDNAME: \
return VEISD::VVPNAME;
#include "VVPNodes.def"
}
return None;
}
SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
// Can we represent this as a VVP node.
auto OCOpt = getVVPOpcode(Op->getOpcode());
if (!OCOpt.hasValue())
return SDValue();
unsigned VVPOC = OCOpt.getValue();
// The representative and legalized vector type of this operation.
EVT OpVecVT = Op.getValueType();
EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
// Materialize the VL parameter.
SDLoc DL(Op);
SDValue AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32);
MVT MaskVT = MVT::v256i1;
SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32);
SDValue Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT,
ConstTrue); // emit a VEISD::VEC_BROADCAST here.
// Categories we are interested in.
bool IsBinaryOp = false;
switch (VVPOC) {
#define ADD_BINARY_VVP_OP(VVPNAME, ...) \
case VEISD::VVPNAME: \
IsBinaryOp = true; \
break;
#include "VVPNodes.def"
}
if (IsBinaryOp) {
assert(LegalVecVT.isSimple());
return DAG.getNode(VVPOC, DL, LegalVecVT, Op->getOperand(0),
Op->getOperand(1), Mask, AVL);
}
llvm_unreachable("lowerToVVP called for unexpected SDNode.");
}

View File

@ -39,6 +39,10 @@ enum NodeType : unsigned {
CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
GLOBAL_BASE_REG, // Global base reg for PIC.
// VVP_* nodes.
#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME,
#include "VVPNodes.def"
};
}
@ -120,6 +124,10 @@ public:
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
/// } Custom Lower
/// VVP Lowering {
SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
/// } VVPLowering
/// Custom DAGCombine {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;

View File

@ -245,6 +245,7 @@ def fplomsbzero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0x80000000)
== 0; }]>;
def fplozero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0xffffffff)
== 0; }]>;
def nonzero : PatLeaf<(imm), [{ return N->getSExtValue() !=0 ; }]>;
def CCSIOp : PatLeaf<(cond), [{
switch (N->get()) {
@ -2219,6 +2220,22 @@ def : Pat<(i32 (and i32:$val, 0xffff)),
def : Pat<(i64 (and i64:$val, 0xffffffff)),
(ANDrm $val, !add(32, 64))>;
//===----------------------------------------------------------------------===//
// Vector Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
// Custom intermediate ISDs.
class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,
[SDTCisVec<0>, IsVLVT<2>]>>;
// Whether this is an all-true mask (assuming undef-bits above VL are all-true).
def true_mask : PatLeaf<
(vec_broadcast (i32 nonzero), (i32 srcvalue))>;
// Match any broadcast (ignoring VL).
def any_broadcast : PatFrag<(ops node:$sx),
(vec_broadcast node:$sx, (i32 srcvalue))>;
// Vector instructions.
include "VEInstrVec.td"
@ -2227,3 +2244,6 @@ include "VEInstrIntrinsicVL.td"
// Patterns and intermediate SD nodes (VEC_*).
include "VEInstrPatternsVec.td"
// Patterns and intermediate SD nodes (VVP_*).
include "VVPInstrPatternsVec.td"

View File

@ -15,10 +15,6 @@
// Instruction format superclass
//===----------------------------------------------------------------------===//
// Custom intermediate ISDs.
class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2, [SDTCisVec<0>, IsVLVT<2>]>>;
multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp, SDNodeXForm ImmCast, int SubRegIdx> {
// VBRDil
def : Pat<(v32 (vec_broadcast (s32 ImmOp:$sy), i32:$vl)),

View File

@ -0,0 +1,43 @@
//===-------------- VVPInstrInfo.td - VVP_* SDNode patterns ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the VE Vector Predicated SDNodes (VVP SDNodes). VVP
// SDNodes are an intermediate isel layer between the vector SDNodes emitted by
// LLVM and the actual VE vector instructions. For example:
//
// ADD(x,y) --> VVP_ADD(x,y,mask,evl) --> VADDSWSXrvml(x,y,mask,evl)
// ^ ^ ^
// The standard The VVP layer SDNode. The VE vector instruction.
// SDNode.
//
// TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream.
//===----------------------------------------------------------------------===//
// Binary Operators {
// BinaryOp(x,y,mask,vl)
def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc.
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisInt<0>,
SDTCisSameNumEltsAs<0, 3>,
IsVLVT<4>
]>;
// Binary operator commutative pattern.
class vvp_commutative<SDNode RootOp> :
PatFrags<
(ops node:$lhs, node:$rhs, node:$mask, node:$vlen),
[(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen),
(RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>;
// VVP node definitions.
def vvp_add : SDNode<"VEISD::VVP_ADD", SDTIntBinOpVVP>;
def c_vvp_add : vvp_commutative<vvp_add>;
// } Binary Operators

View File

@ -0,0 +1,68 @@
//===----------- VVPInstrPatternsVec.td - VVP_* SDNode patterns -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file describes how VVP_* SDNodes are lowered to machine instructions.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
//
// VVP SDNode definitions.
//
//===----------------------------------------------------------------------===//
include "VVPInstrInfo.td"
multiclass VectorBinaryArith<
SDPatternOperator OpNode,
ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
string OpBaseName,
SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
// No mask.
def : Pat<(OpNode
(any_broadcast ScalarVT:$sx),
DataVT:$vy, (MaskVT true_mask), i32:$avl),
(!cast<Instruction>(OpBaseName#"rvl")
ScalarVT:$sx, $vy, $avl)>;
def : Pat<(OpNode DataVT:$vx, DataVT:$vy, (MaskVT true_mask), i32:$avl),
(!cast<Instruction>(OpBaseName#"vvl")
$vx, $vy, $avl)>;
// Mask.
def : Pat<(OpNode
(any_broadcast ScalarVT:$sx),
DataVT:$vy, MaskVT:$mask, i32:$avl),
(!cast<Instruction>(OpBaseName#"rvml")
ScalarVT:$sx, $vy, $mask, $avl)>;
def : Pat<(OpNode DataVT:$vx, DataVT:$vy, MaskVT:$mask, i32:$avl),
(!cast<Instruction>(OpBaseName#"vvml")
$vx, $vy, $mask, $avl)>;
// TODO We do not specify patterns for the immediate variants here. There
// will be an immediate folding pass that takes care of switching to the
// immediate variant where applicable.
// TODO Fold vvp_select into passthru.
}
// Expand both 64bit and 32 bit variant (256 elements)
multiclass VectorBinaryArith_ShortLong<
SDPatternOperator OpNode,
ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
defm : VectorBinaryArith<OpNode,
LongScalarVT, LongDataVT, v256i1,
LongOpBaseName, simm7, LO7>;
defm : VectorBinaryArith<OpNode,
ShortScalarVT, ShortDataVT, v256i1,
ShortOpBaseName, simm7, LO7>;
}
defm : VectorBinaryArith_ShortLong<c_vvp_add,
i64, v256i64, "VADDSL",
i32, v256i32, "VADDSWSX">;

View File

@ -0,0 +1,32 @@
//===-- VVPNodes.def - Lists & properties of VE Vector Predication Nodes --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines all VVP_* SDNodes and their properties
//
//===----------------------------------------------------------------------===//
/// ADD_VVP_OP(VVPNAME,SDNAME)
/// \p VVPName is a VVP SDNode operator.
/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
#ifndef ADD_VVP_OP
#define ADD_VVP_OP(X, Y)
#endif
/// ADD_BINARY_VVP_OP(VVPNAME,SDNAME)
/// \p VVPName is a VVP Binary operator.
/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
#ifndef ADD_BINARY_VVP_OP
#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y)
#endif
// Integer arithmetic.
ADD_BINARY_VVP_OP(VVP_ADD,ADD)
#undef ADD_BINARY_VVP_OP
#undef ADD_VVP_OP

View File

@ -0,0 +1,132 @@
; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
; <256 x i32>
; Function Attrs: nounwind
define fastcc <256 x i32> @add_vv_v256i32(<256 x i32> %x, <256 x i32> %y) {
; CHECK-LABEL: add_vv_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vadds.w.sx %v0, %v0, %v1
; CHECK-NEXT: b.l.t (, %s10)
%z = add <256 x i32> %x, %y
ret <256 x i32> %z
}
; Function Attrs: nounwind
define fastcc <256 x i32> @add_sv_v256i32(i32 %x, <256 x i32> %y) {
; CHECK-LABEL: add_sv_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vadds.w.sx %v0, %s0, %v0
; CHECK-NEXT: b.l.t (, %s10)
%xins = insertelement <256 x i32> undef, i32 %x, i32 0
%vx = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
%z = add <256 x i32> %vx, %y
ret <256 x i32> %z
}
; Function Attrs: nounwind
define fastcc <256 x i32> @add_vs_v256i32(<256 x i32> %x, i32 %y) {
; CHECK-LABEL: add_vs_v256i32:
; CHECK: # %bb.0:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vadds.w.sx %v0, %s0, %v0
; CHECK-NEXT: b.l.t (, %s10)
%yins = insertelement <256 x i32> undef, i32 %y, i32 0
%vy = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer
%z = add <256 x i32> %x, %vy
ret <256 x i32> %z
}
; <256 x i64>
; Function Attrs: nounwind
define fastcc <256 x i64> @add_vv_v256i64(<256 x i64> %x, <256 x i64> %y) {
; CHECK-LABEL: add_vv_v256i64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vadds.l %v0, %v0, %v1
; CHECK-NEXT: b.l.t (, %s10)
%z = add <256 x i64> %x, %y
ret <256 x i64> %z
}
; Function Attrs: nounwind
define fastcc <256 x i64> @add_sv_v256i64(i64 %x, <256 x i64> %y) {
; CHECK-LABEL: add_sv_v256i64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vadds.l %v0, %s0, %v0
; CHECK-NEXT: b.l.t (, %s10)
%xins = insertelement <256 x i64> undef, i64 %x, i32 0
%vx = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
%z = add <256 x i64> %vx, %y
ret <256 x i64> %z
}
; Function Attrs: nounwind
define fastcc <256 x i64> @add_vs_v256i64(<256 x i64> %x, i64 %y) {
; CHECK-LABEL: add_vs_v256i64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s1, 256
; CHECK-NEXT: lvl %s1
; CHECK-NEXT: vadds.l %v0, %s0, %v0
; CHECK-NEXT: b.l.t (, %s10)
%yins = insertelement <256 x i64> undef, i64 %y, i32 0
%vy = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer
%z = add <256 x i64> %x, %vy
ret <256 x i64> %z
}
; <128 x i64>
; We expect this to be widened.
; Function Attrs: nounwind
define fastcc <128 x i64> @add_vv_v128i64(<128 x i64> %x, <128 x i64> %y) {
; CHECK-LABEL: add_vv_v128i64:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vadds.l %v0, %v0, %v1
; CHECK-NEXT: b.l.t (, %s10)
%z = add <128 x i64> %x, %y
ret <128 x i64> %z
}
; <256 x i16>
; We expect promotion.
; Function Attrs: nounwind
define fastcc <256 x i16> @add_vv_v256i16(<256 x i16> %x, <256 x i16> %y) {
; CHECK-LABEL: add_vv_v256i16:
; CHECK: # %bb.0:
; CHECK-NEXT: lea %s0, 256
; CHECK-NEXT: lvl %s0
; CHECK-NEXT: vadds.w.sx %v0, %v0, %v1
; CHECK-NEXT: b.l.t (, %s10)
%z = add <256 x i16> %x, %y
ret <256 x i16> %z
}
; <128 x i16>
; We expect this to be scalarized (for now).
; Function Attrs: nounwind
define fastcc <128 x i16> @add_vv_v128i16(<128 x i16> %x, <128 x i16> %y) {
; CHECK-LABEL: add_vv_v128i16:
; CHECK: # %bb.0:
; CHECK-NOT: vadd
%z = add <128 x i16> %x, %y
ret <128 x i16> %z
}