forked from OSchip/llvm-project
[VE] VE Vector Predicated SDNode, vector add isel and tests
VE Vector Predicated (VVP) SDNodes form an intermediate layer between VE vector instructions and the initial SDNodes. We introduce 'vvp_add' with isel and tests as the first of these VVP nodes. VVP nodes have a mask and explicit vector length operand, which we will make proper use of later. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D91802
This commit is contained in:
parent
619630f997
commit
b955c7e630
|
@ -254,8 +254,17 @@ void VETargetLowering::initSPUActions() {
|
|||
}
|
||||
|
||||
void VETargetLowering::initVPUActions() {
|
||||
for (MVT LegalVecVT : AllVectorVTs)
|
||||
for (MVT LegalVecVT : AllVectorVTs) {
|
||||
setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
|
||||
// Translate all vector instructions with legal element types to VVP_*
|
||||
// nodes.
|
||||
// TODO We will custom-widen into VVP_* nodes in the future. While we are
|
||||
// buildling the infrastructure for this, we only do this for legal vector
|
||||
// VTs.
|
||||
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
|
||||
setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
|
||||
#include "VVPNodes.def"
|
||||
}
|
||||
}
|
||||
|
||||
SDValue
|
||||
|
@ -846,6 +855,10 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
TARGET_NODE_CASE(VEC_BROADCAST)
|
||||
TARGET_NODE_CASE(RET_FLAG)
|
||||
TARGET_NODE_CASE(GLOBAL_BASE_REG)
|
||||
|
||||
// Register the VVP_* SDNodes.
|
||||
#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
|
||||
#include "VVPNodes.def"
|
||||
}
|
||||
#undef TARGET_NODE_CASE
|
||||
return nullptr;
|
||||
|
@ -1403,6 +1416,10 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
return lowerVASTART(Op, DAG);
|
||||
case ISD::VAARG:
|
||||
return lowerVAARG(Op, DAG);
|
||||
|
||||
#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
|
||||
#include "VVPNodes.def"
|
||||
return lowerToVVP(Op, DAG);
|
||||
}
|
||||
}
|
||||
/// } Custom Lower
|
||||
|
@ -1665,3 +1682,53 @@ bool VETargetLowering::hasAndNot(SDValue Y) const {
|
|||
// It's ok for generic registers.
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
|
||||
static Optional<unsigned> getVVPOpcode(unsigned OC) {
|
||||
switch (OC) {
|
||||
#define ADD_VVP_OP(VVPNAME, SDNAME) \
|
||||
case VEISD::VVPNAME: \
|
||||
case ISD::SDNAME: \
|
||||
return VEISD::VVPNAME;
|
||||
#include "VVPNodes.def"
|
||||
}
|
||||
return None;
|
||||
}
|
||||
|
||||
SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
|
||||
// Can we represent this as a VVP node.
|
||||
auto OCOpt = getVVPOpcode(Op->getOpcode());
|
||||
if (!OCOpt.hasValue())
|
||||
return SDValue();
|
||||
unsigned VVPOC = OCOpt.getValue();
|
||||
|
||||
// The representative and legalized vector type of this operation.
|
||||
EVT OpVecVT = Op.getValueType();
|
||||
EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
|
||||
|
||||
// Materialize the VL parameter.
|
||||
SDLoc DL(Op);
|
||||
SDValue AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32);
|
||||
MVT MaskVT = MVT::v256i1;
|
||||
SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32);
|
||||
SDValue Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT,
|
||||
ConstTrue); // emit a VEISD::VEC_BROADCAST here.
|
||||
|
||||
// Categories we are interested in.
|
||||
bool IsBinaryOp = false;
|
||||
|
||||
switch (VVPOC) {
|
||||
#define ADD_BINARY_VVP_OP(VVPNAME, ...) \
|
||||
case VEISD::VVPNAME: \
|
||||
IsBinaryOp = true; \
|
||||
break;
|
||||
#include "VVPNodes.def"
|
||||
}
|
||||
|
||||
if (IsBinaryOp) {
|
||||
assert(LegalVecVT.isSimple());
|
||||
return DAG.getNode(VVPOC, DL, LegalVecVT, Op->getOperand(0),
|
||||
Op->getOperand(1), Mask, AVL);
|
||||
}
|
||||
llvm_unreachable("lowerToVVP called for unexpected SDNode.");
|
||||
}
|
||||
|
|
|
@ -39,6 +39,10 @@ enum NodeType : unsigned {
|
|||
CALL, // A call instruction.
|
||||
RET_FLAG, // Return with a flag operand.
|
||||
GLOBAL_BASE_REG, // Global base reg for PIC.
|
||||
|
||||
// VVP_* nodes.
|
||||
#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME,
|
||||
#include "VVPNodes.def"
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -120,6 +124,10 @@ public:
|
|||
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
/// } Custom Lower
|
||||
|
||||
/// VVP Lowering {
|
||||
SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
|
||||
/// } VVPLowering
|
||||
|
||||
/// Custom DAGCombine {
|
||||
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
||||
|
||||
|
|
|
@ -245,6 +245,7 @@ def fplomsbzero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0x80000000)
|
|||
== 0; }]>;
|
||||
def fplozero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0xffffffff)
|
||||
== 0; }]>;
|
||||
def nonzero : PatLeaf<(imm), [{ return N->getSExtValue() !=0 ; }]>;
|
||||
|
||||
def CCSIOp : PatLeaf<(cond), [{
|
||||
switch (N->get()) {
|
||||
|
@ -2219,6 +2220,22 @@ def : Pat<(i32 (and i32:$val, 0xffff)),
|
|||
def : Pat<(i64 (and i64:$val, 0xffffffff)),
|
||||
(ANDrm $val, !add(32, 64))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Vector Instruction Pattern Stuff
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Custom intermediate ISDs.
|
||||
class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
|
||||
def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,
|
||||
[SDTCisVec<0>, IsVLVT<2>]>>;
|
||||
|
||||
// Whether this is an all-true mask (assuming undef-bits above VL are all-true).
|
||||
def true_mask : PatLeaf<
|
||||
(vec_broadcast (i32 nonzero), (i32 srcvalue))>;
|
||||
// Match any broadcast (ignoring VL).
|
||||
def any_broadcast : PatFrag<(ops node:$sx),
|
||||
(vec_broadcast node:$sx, (i32 srcvalue))>;
|
||||
|
||||
// Vector instructions.
|
||||
include "VEInstrVec.td"
|
||||
|
||||
|
@ -2227,3 +2244,6 @@ include "VEInstrIntrinsicVL.td"
|
|||
|
||||
// Patterns and intermediate SD nodes (VEC_*).
|
||||
include "VEInstrPatternsVec.td"
|
||||
|
||||
// Patterns and intermediate SD nodes (VVP_*).
|
||||
include "VVPInstrPatternsVec.td"
|
||||
|
|
|
@ -15,10 +15,6 @@
|
|||
// Instruction format superclass
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Custom intermediate ISDs.
|
||||
class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
|
||||
def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2, [SDTCisVec<0>, IsVLVT<2>]>>;
|
||||
|
||||
multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp, SDNodeXForm ImmCast, int SubRegIdx> {
|
||||
// VBRDil
|
||||
def : Pat<(v32 (vec_broadcast (s32 ImmOp:$sy), i32:$vl)),
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
//===-------------- VVPInstrInfo.td - VVP_* SDNode patterns ---------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the VE Vector Predicated SDNodes (VVP SDNodes). VVP
|
||||
// SDNodes are an intermediate isel layer between the vector SDNodes emitted by
|
||||
// LLVM and the actual VE vector instructions. For example:
|
||||
//
|
||||
// ADD(x,y) --> VVP_ADD(x,y,mask,evl) --> VADDSWSXrvml(x,y,mask,evl)
|
||||
// ^ ^ ^
|
||||
// The standard The VVP layer SDNode. The VE vector instruction.
|
||||
// SDNode.
|
||||
//
|
||||
// TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Binary Operators {
|
||||
|
||||
// BinaryOp(x,y,mask,vl)
|
||||
def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc.
|
||||
SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisInt<0>,
|
||||
SDTCisSameNumEltsAs<0, 3>,
|
||||
IsVLVT<4>
|
||||
]>;
|
||||
|
||||
// Binary operator commutative pattern.
|
||||
class vvp_commutative<SDNode RootOp> :
|
||||
PatFrags<
|
||||
(ops node:$lhs, node:$rhs, node:$mask, node:$vlen),
|
||||
[(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen),
|
||||
(RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>;
|
||||
|
||||
// VVP node definitions.
|
||||
def vvp_add : SDNode<"VEISD::VVP_ADD", SDTIntBinOpVVP>;
|
||||
def c_vvp_add : vvp_commutative<vvp_add>;
|
||||
|
||||
// } Binary Operators
|
|
@ -0,0 +1,68 @@
|
|||
//===----------- VVPInstrPatternsVec.td - VVP_* SDNode patterns -----------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes how VVP_* SDNodes are lowered to machine instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// VVP SDNode definitions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
include "VVPInstrInfo.td"
|
||||
|
||||
multiclass VectorBinaryArith<
|
||||
SDPatternOperator OpNode,
|
||||
ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
|
||||
string OpBaseName,
|
||||
SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
|
||||
// No mask.
|
||||
def : Pat<(OpNode
|
||||
(any_broadcast ScalarVT:$sx),
|
||||
DataVT:$vy, (MaskVT true_mask), i32:$avl),
|
||||
(!cast<Instruction>(OpBaseName#"rvl")
|
||||
ScalarVT:$sx, $vy, $avl)>;
|
||||
def : Pat<(OpNode DataVT:$vx, DataVT:$vy, (MaskVT true_mask), i32:$avl),
|
||||
(!cast<Instruction>(OpBaseName#"vvl")
|
||||
$vx, $vy, $avl)>;
|
||||
|
||||
// Mask.
|
||||
def : Pat<(OpNode
|
||||
(any_broadcast ScalarVT:$sx),
|
||||
DataVT:$vy, MaskVT:$mask, i32:$avl),
|
||||
(!cast<Instruction>(OpBaseName#"rvml")
|
||||
ScalarVT:$sx, $vy, $mask, $avl)>;
|
||||
def : Pat<(OpNode DataVT:$vx, DataVT:$vy, MaskVT:$mask, i32:$avl),
|
||||
(!cast<Instruction>(OpBaseName#"vvml")
|
||||
$vx, $vy, $mask, $avl)>;
|
||||
|
||||
// TODO We do not specify patterns for the immediate variants here. There
|
||||
// will be an immediate folding pass that takes care of switching to the
|
||||
// immediate variant where applicable.
|
||||
|
||||
// TODO Fold vvp_select into passthru.
|
||||
}
|
||||
|
||||
// Expand both 64bit and 32 bit variant (256 elements)
|
||||
multiclass VectorBinaryArith_ShortLong<
|
||||
SDPatternOperator OpNode,
|
||||
ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
|
||||
ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
|
||||
defm : VectorBinaryArith<OpNode,
|
||||
LongScalarVT, LongDataVT, v256i1,
|
||||
LongOpBaseName, simm7, LO7>;
|
||||
defm : VectorBinaryArith<OpNode,
|
||||
ShortScalarVT, ShortDataVT, v256i1,
|
||||
ShortOpBaseName, simm7, LO7>;
|
||||
}
|
||||
|
||||
|
||||
defm : VectorBinaryArith_ShortLong<c_vvp_add,
|
||||
i64, v256i64, "VADDSL",
|
||||
i32, v256i32, "VADDSWSX">;
|
|
@ -0,0 +1,32 @@
|
|||
//===-- VVPNodes.def - Lists & properties of VE Vector Predication Nodes --===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines all VVP_* SDNodes and their properties
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// ADD_VVP_OP(VVPNAME,SDNAME)
|
||||
/// \p VVPName is a VVP SDNode operator.
|
||||
/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
|
||||
#ifndef ADD_VVP_OP
|
||||
#define ADD_VVP_OP(X, Y)
|
||||
#endif
|
||||
|
||||
/// ADD_BINARY_VVP_OP(VVPNAME,SDNAME)
|
||||
/// \p VVPName is a VVP Binary operator.
|
||||
/// \p SDNAME is the generic SD opcode corresponding to \p VVPName.
|
||||
#ifndef ADD_BINARY_VVP_OP
|
||||
#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y)
|
||||
#endif
|
||||
|
||||
// Integer arithmetic.
|
||||
ADD_BINARY_VVP_OP(VVP_ADD,ADD)
|
||||
|
||||
|
||||
#undef ADD_BINARY_VVP_OP
|
||||
#undef ADD_VVP_OP
|
|
@ -0,0 +1,132 @@
|
|||
; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
|
||||
|
||||
; <256 x i32>
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i32> @add_vv_v256i32(<256 x i32> %x, <256 x i32> %y) {
|
||||
; CHECK-LABEL: add_vv_v256i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vadds.w.sx %v0, %v0, %v1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%z = add <256 x i32> %x, %y
|
||||
ret <256 x i32> %z
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i32> @add_sv_v256i32(i32 %x, <256 x i32> %y) {
|
||||
; CHECK-LABEL: add_sv_v256i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vadds.w.sx %v0, %s0, %v0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%xins = insertelement <256 x i32> undef, i32 %x, i32 0
|
||||
%vx = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
|
||||
%z = add <256 x i32> %vx, %y
|
||||
ret <256 x i32> %z
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i32> @add_vs_v256i32(<256 x i32> %x, i32 %y) {
|
||||
; CHECK-LABEL: add_vs_v256i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vadds.w.sx %v0, %s0, %v0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%yins = insertelement <256 x i32> undef, i32 %y, i32 0
|
||||
%vy = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer
|
||||
%z = add <256 x i32> %x, %vy
|
||||
ret <256 x i32> %z
|
||||
}
|
||||
|
||||
|
||||
|
||||
; <256 x i64>
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i64> @add_vv_v256i64(<256 x i64> %x, <256 x i64> %y) {
|
||||
; CHECK-LABEL: add_vv_v256i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vadds.l %v0, %v0, %v1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%z = add <256 x i64> %x, %y
|
||||
ret <256 x i64> %z
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i64> @add_sv_v256i64(i64 %x, <256 x i64> %y) {
|
||||
; CHECK-LABEL: add_sv_v256i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vadds.l %v0, %s0, %v0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%xins = insertelement <256 x i64> undef, i64 %x, i32 0
|
||||
%vx = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
|
||||
%z = add <256 x i64> %vx, %y
|
||||
ret <256 x i64> %z
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i64> @add_vs_v256i64(<256 x i64> %x, i64 %y) {
|
||||
; CHECK-LABEL: add_vs_v256i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vadds.l %v0, %s0, %v0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%yins = insertelement <256 x i64> undef, i64 %y, i32 0
|
||||
%vy = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer
|
||||
%z = add <256 x i64> %x, %vy
|
||||
ret <256 x i64> %z
|
||||
}
|
||||
|
||||
; <128 x i64>
|
||||
; We expect this to be widened.
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <128 x i64> @add_vv_v128i64(<128 x i64> %x, <128 x i64> %y) {
|
||||
; CHECK-LABEL: add_vv_v128i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vadds.l %v0, %v0, %v1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%z = add <128 x i64> %x, %y
|
||||
ret <128 x i64> %z
|
||||
}
|
||||
|
||||
; <256 x i16>
|
||||
; We expect promotion.
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i16> @add_vv_v256i16(<256 x i16> %x, <256 x i16> %y) {
|
||||
; CHECK-LABEL: add_vv_v256i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s0, 256
|
||||
; CHECK-NEXT: lvl %s0
|
||||
; CHECK-NEXT: vadds.w.sx %v0, %v0, %v1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%z = add <256 x i16> %x, %y
|
||||
ret <256 x i16> %z
|
||||
}
|
||||
|
||||
; <128 x i16>
|
||||
; We expect this to be scalarized (for now).
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <128 x i16> @add_vv_v128i16(<128 x i16> %x, <128 x i16> %y) {
|
||||
; CHECK-LABEL: add_vv_v128i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NOT: vadd
|
||||
%z = add <128 x i16> %x, %y
|
||||
ret <128 x i16> %z
|
||||
}
|
||||
|
Loading…
Reference in New Issue