forked from OSchip/llvm-project
[VE] (masked) load|store v256.32|64 isel
Add `vvp_load|store` nodes. Lower to `vld`, `vst` where possible. Use `vgt` for masked loads for now. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D120413
This commit is contained in:
parent
3ca1098557
commit
9ebaec461a
|
@ -61,6 +61,10 @@ bool isMaskArithmetic(SDValue Op) {
|
|||
/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
|
||||
Optional<unsigned> getVVPOpcode(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
case ISD::MLOAD:
|
||||
return VEISD::VVP_LOAD;
|
||||
case ISD::MSTORE:
|
||||
return VEISD::VVP_STORE;
|
||||
#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \
|
||||
case ISD::VPOPC: \
|
||||
return VEISD::VVPNAME;
|
||||
|
@ -166,8 +170,12 @@ Optional<int> getMaskPos(unsigned Opc) {
|
|||
if (isVVPBinaryOp(Opc))
|
||||
return 2;
|
||||
|
||||
// VM Opcodes.
|
||||
// Other opcodes.
|
||||
switch (Opc) {
|
||||
case ISD::MSTORE:
|
||||
return 4;
|
||||
case ISD::MLOAD:
|
||||
return 3;
|
||||
case VEISD::VVP_SELECT:
|
||||
return 2;
|
||||
}
|
||||
|
@ -177,6 +185,116 @@ Optional<int> getMaskPos(unsigned Opc) {
|
|||
|
||||
bool isLegalAVL(SDValue AVL) { return AVL->getOpcode() == VEISD::LEGALAVL; }
|
||||
|
||||
/// Node Properties {
|
||||
|
||||
SDValue getNodeChain(SDValue Op) {
|
||||
if (MemSDNode *MemN = dyn_cast<MemSDNode>(Op.getNode()))
|
||||
return MemN->getChain();
|
||||
|
||||
switch (Op->getOpcode()) {
|
||||
case VEISD::VVP_LOAD:
|
||||
case VEISD::VVP_STORE:
|
||||
return Op->getOperand(0);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue getMemoryPtr(SDValue Op) {
|
||||
if (auto *MemN = dyn_cast<MemSDNode>(Op.getNode()))
|
||||
return MemN->getBasePtr();
|
||||
|
||||
switch (Op->getOpcode()) {
|
||||
case VEISD::VVP_LOAD:
|
||||
return Op->getOperand(1);
|
||||
case VEISD::VVP_STORE:
|
||||
return Op->getOperand(2);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
Optional<EVT> getIdiomaticVectorType(SDNode *Op) {
|
||||
unsigned OC = Op->getOpcode();
|
||||
|
||||
// For memory ops -> the transfered data type
|
||||
if (auto MemN = dyn_cast<MemSDNode>(Op))
|
||||
return MemN->getMemoryVT();
|
||||
|
||||
switch (OC) {
|
||||
// Standard ISD.
|
||||
case ISD::SELECT: // not aliased with VVP_SELECT
|
||||
case ISD::CONCAT_VECTORS:
|
||||
case ISD::EXTRACT_SUBVECTOR:
|
||||
case ISD::VECTOR_SHUFFLE:
|
||||
case ISD::BUILD_VECTOR:
|
||||
case ISD::SCALAR_TO_VECTOR:
|
||||
return Op->getValueType(0);
|
||||
}
|
||||
|
||||
// Translate to VVP where possible.
|
||||
if (auto VVPOpc = getVVPOpcode(OC))
|
||||
OC = *VVPOpc;
|
||||
|
||||
switch (OC) {
|
||||
default:
|
||||
case VEISD::VVP_SETCC:
|
||||
return Op->getOperand(0).getValueType();
|
||||
|
||||
case VEISD::VVP_SELECT:
|
||||
#define ADD_BINARY_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
|
||||
#include "VVPNodes.def"
|
||||
return Op->getValueType(0);
|
||||
|
||||
case VEISD::VVP_LOAD:
|
||||
return Op->getValueType(0);
|
||||
|
||||
case VEISD::VVP_STORE:
|
||||
return Op->getOperand(1)->getValueType(0);
|
||||
|
||||
// VEC
|
||||
case VEISD::VEC_BROADCAST:
|
||||
return Op->getValueType(0);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue getLoadStoreStride(SDValue Op, VECustomDAG &CDAG) {
|
||||
if (Op->getOpcode() == VEISD::VVP_STORE)
|
||||
return Op->getOperand(3);
|
||||
if (Op->getOpcode() == VEISD::VVP_LOAD)
|
||||
return Op->getOperand(2);
|
||||
|
||||
if (isa<MemSDNode>(Op.getNode())) {
|
||||
// Regular MLOAD/MSTORE/LOAD/STORE
|
||||
// No stride argument -> use the contiguous element size as stride.
|
||||
uint64_t ElemStride = getIdiomaticVectorType(Op.getNode())
|
||||
->getVectorElementType()
|
||||
.getStoreSize();
|
||||
return CDAG.getConstant(ElemStride, MVT::i64);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue getStoredValue(SDValue Op) {
|
||||
switch (Op->getOpcode()) {
|
||||
case VEISD::VVP_STORE:
|
||||
return Op->getOperand(1);
|
||||
}
|
||||
if (auto *StoreN = dyn_cast<StoreSDNode>(Op.getNode()))
|
||||
return StoreN->getValue();
|
||||
if (auto *StoreN = dyn_cast<MaskedStoreSDNode>(Op.getNode()))
|
||||
return StoreN->getValue();
|
||||
if (auto *StoreN = dyn_cast<VPStoreSDNode>(Op.getNode()))
|
||||
return StoreN->getValue();
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue getNodePassthru(SDValue Op) {
|
||||
if (auto *N = dyn_cast<MaskedLoadSDNode>(Op.getNode()))
|
||||
return N->getPassThru();
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// } Node Properties
|
||||
|
||||
SDValue getNodeAVL(SDValue Op) {
|
||||
auto PosOpt = getAVLPos(Op->getOpcode());
|
||||
return PosOpt ? Op->getOperand(*PosOpt) : SDValue();
|
||||
|
|
|
@ -88,6 +88,22 @@ std::pair<SDValue, bool> getAnnotatedNodeAVL(SDValue);
|
|||
|
||||
/// } AVL Functions
|
||||
|
||||
/// Node Properties {
|
||||
|
||||
Optional<EVT> getIdiomaticVectorType(SDNode *Op);
|
||||
|
||||
SDValue getLoadStoreStride(SDValue Op, VECustomDAG &CDAG);
|
||||
|
||||
SDValue getMemoryPtr(SDValue Op);
|
||||
|
||||
SDValue getNodeChain(SDValue Op);
|
||||
|
||||
SDValue getStoredValue(SDValue Op);
|
||||
|
||||
SDValue getNodePassthru(SDValue Op);
|
||||
|
||||
/// } Node Properties
|
||||
|
||||
enum class Packing {
|
||||
Normal = 0, // 256 element standard mode.
|
||||
Dense = 1 // 512 element packed mode.
|
||||
|
@ -157,6 +173,10 @@ public:
|
|||
SDValue getPack(EVT DestVT, SDValue LoVec, SDValue HiVec, SDValue AVL) const;
|
||||
/// } Packing
|
||||
|
||||
SDValue getMergeValues(ArrayRef<SDValue> Values) const {
|
||||
return DAG.getMergeValues(Values, DL);
|
||||
}
|
||||
|
||||
SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false,
|
||||
bool IsOpaque = false) const;
|
||||
|
||||
|
|
|
@ -322,6 +322,17 @@ void VETargetLowering::initVPUActions() {
|
|||
setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
|
||||
}
|
||||
|
||||
// vNt32, vNt64 ops (legal element types)
|
||||
for (MVT VT : MVT::vector_valuetypes()) {
|
||||
MVT ElemVT = VT.getVectorElementType();
|
||||
unsigned ElemBits = ElemVT.getScalarSizeInBits();
|
||||
if (ElemBits != 32 && ElemBits != 64)
|
||||
continue;
|
||||
|
||||
for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
|
||||
setOperationAction(MemOpc, VT, Custom);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue
|
||||
|
@ -1321,6 +1332,12 @@ static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
|
|||
SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
|
||||
|
||||
EVT MemVT = LdNode->getMemoryVT();
|
||||
|
||||
// Dispatch to vector isel.
|
||||
if (MemVT.isVector() && !isMaskType(MemVT))
|
||||
return lowerToVVP(Op, DAG);
|
||||
|
||||
SDValue BasePtr = LdNode->getBasePtr();
|
||||
if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
|
||||
// Do not expand store instruction with frame index here because of
|
||||
|
@ -1328,7 +1345,6 @@ SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
|||
return Op;
|
||||
}
|
||||
|
||||
EVT MemVT = LdNode->getMemoryVT();
|
||||
if (MemVT == MVT::f128)
|
||||
return lowerLoadF128(Op, DAG);
|
||||
|
||||
|
@ -1375,6 +1391,11 @@ SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
|||
StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
|
||||
assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
|
||||
|
||||
// always expand non-mask vector loads to VVP
|
||||
EVT MemVT = StNode->getMemoryVT();
|
||||
if (MemVT.isVector() && !isMaskType(MemVT))
|
||||
return lowerToVVP(Op, DAG);
|
||||
|
||||
SDValue BasePtr = StNode->getBasePtr();
|
||||
if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
|
||||
// Do not expand store instruction with frame index here because of
|
||||
|
@ -1382,7 +1403,6 @@ SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
|||
return Op;
|
||||
}
|
||||
|
||||
EVT MemVT = StNode->getMemoryVT();
|
||||
if (MemVT == MVT::f128)
|
||||
return lowerStoreF128(Op, DAG);
|
||||
|
||||
|
@ -1699,12 +1719,9 @@ VETargetLowering::getCustomOperationAction(SDNode &Op) const {
|
|||
SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
LLVM_DEBUG(dbgs() << "::LowerOperation"; Op->print(dbgs()););
|
||||
unsigned Opcode = Op.getOpcode();
|
||||
if (ISD::isVPOpcode(Opcode))
|
||||
return lowerToVVP(Op, DAG);
|
||||
|
||||
/// Scalar isel.
|
||||
switch (Opcode) {
|
||||
default:
|
||||
llvm_unreachable("Should not custom lower this!");
|
||||
case ISD::ATOMIC_FENCE:
|
||||
return lowerATOMIC_FENCE(Op, DAG);
|
||||
case ISD::ATOMIC_SWAP:
|
||||
|
@ -1748,6 +1765,16 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
return lowerINSERT_VECTOR_ELT(Op, DAG);
|
||||
case ISD::EXTRACT_VECTOR_ELT:
|
||||
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
|
||||
}
|
||||
|
||||
/// Vector isel.
|
||||
LLVM_DEBUG(dbgs() << "::LowerOperation_VVP"; Op->print(dbgs()););
|
||||
if (ISD::isVPOpcode(Opcode))
|
||||
return lowerToVVP(Op, DAG);
|
||||
|
||||
switch (Opcode) {
|
||||
default:
|
||||
llvm_unreachable("Should not custom lower this!");
|
||||
|
||||
// Legalize the AVL of this internal node.
|
||||
case VEISD::VEC_BROADCAST:
|
||||
|
@ -1759,6 +1786,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
return legalizeInternalVectorOp(Op, DAG);
|
||||
|
||||
// Translate into a VEC_*/VVP_* layer operation.
|
||||
case ISD::MLOAD:
|
||||
case ISD::MSTORE:
|
||||
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
|
||||
#include "VVPNodes.def"
|
||||
if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
|
||||
|
|
|
@ -186,6 +186,8 @@ public:
|
|||
|
||||
/// VVP Lowering {
|
||||
SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerVVP_LOAD_STORE(SDValue Op, VECustomDAG&) const;
|
||||
|
||||
SDValue legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue splitVectorOp(SDValue Op, VECustomDAG &CDAG) const;
|
||||
SDValue legalizePackedAVL(SDValue Op, VECustomDAG &CDAG) const;
|
||||
|
|
|
@ -21,6 +21,32 @@
|
|||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/CodeGen/BasicTTIImpl.h"
|
||||
|
||||
static llvm::Type *getVectorElementType(llvm::Type *Ty) {
|
||||
return llvm::cast<llvm::FixedVectorType>(Ty)->getElementType();
|
||||
}
|
||||
|
||||
static llvm::Type *getLaneType(llvm::Type *Ty) {
|
||||
using namespace llvm;
|
||||
if (!isa<VectorType>(Ty))
|
||||
return Ty;
|
||||
return getVectorElementType(Ty);
|
||||
}
|
||||
|
||||
static bool isVectorLaneType(llvm::Type &ElemTy) {
|
||||
// check element sizes for vregs
|
||||
if (ElemTy.isIntegerTy()) {
|
||||
unsigned ScaBits = ElemTy.getScalarSizeInBits();
|
||||
return ScaBits == 1 || ScaBits == 32 || ScaBits == 64;
|
||||
}
|
||||
if (ElemTy.isPointerTy()) {
|
||||
return true;
|
||||
}
|
||||
if (ElemTy.isFloatTy() || ElemTy.isDoubleTy()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class VETTIImpl : public BasicTTIImplBase<VETTIImpl> {
|
||||
|
@ -86,6 +112,21 @@ public:
|
|||
// output
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load & Store {
|
||||
bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) {
|
||||
return isVectorLaneType(*getLaneType(DataType));
|
||||
}
|
||||
bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) {
|
||||
return isVectorLaneType(*getLaneType(DataType));
|
||||
}
|
||||
bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) {
|
||||
return isVectorLaneType(*getLaneType(DataType));
|
||||
};
|
||||
bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) {
|
||||
return isVectorLaneType(*getLaneType(DataType));
|
||||
}
|
||||
// } Load & Store
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
|
|
@ -46,6 +46,13 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
|
|||
|
||||
// The representative and legalized vector type of this operation.
|
||||
VECustomDAG CDAG(DAG, Op);
|
||||
// Dispatch to complex lowering functions.
|
||||
switch (VVPOpcode) {
|
||||
case VEISD::VVP_LOAD:
|
||||
case VEISD::VVP_STORE:
|
||||
return lowerVVP_LOAD_STORE(Op, CDAG);
|
||||
};
|
||||
|
||||
EVT OpVecVT = Op.getValueType();
|
||||
EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
|
||||
auto Packing = getTypePacking(LegalVecVT.getSimpleVT());
|
||||
|
@ -89,6 +96,60 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
|
|||
llvm_unreachable("lowerToVVP called for unexpected SDNode.");
|
||||
}
|
||||
|
||||
SDValue VETargetLowering::lowerVVP_LOAD_STORE(SDValue Op,
|
||||
VECustomDAG &CDAG) const {
|
||||
auto VVPOpc = *getVVPOpcode(Op->getOpcode());
|
||||
const bool IsLoad = (VVPOpc == VEISD::VVP_LOAD);
|
||||
|
||||
// Shares.
|
||||
SDValue BasePtr = getMemoryPtr(Op);
|
||||
SDValue Mask = getNodeMask(Op);
|
||||
SDValue Chain = getNodeChain(Op);
|
||||
SDValue AVL = getNodeAVL(Op);
|
||||
// Store specific.
|
||||
SDValue Data = getStoredValue(Op);
|
||||
// Load specific.
|
||||
SDValue PassThru = getNodePassthru(Op);
|
||||
|
||||
auto DataVT = *getIdiomaticVectorType(Op.getNode());
|
||||
auto Packing = getTypePacking(DataVT);
|
||||
|
||||
assert(Packing == Packing::Normal && "TODO Packed load store isel");
|
||||
|
||||
// TODO: Infer lower AVL from mask.
|
||||
if (!AVL)
|
||||
AVL = CDAG.getConstant(DataVT.getVectorNumElements(), MVT::i32);
|
||||
|
||||
// Default to the all-true mask.
|
||||
if (!Mask)
|
||||
Mask = CDAG.getConstantMask(Packing, true);
|
||||
|
||||
SDValue StrideV = getLoadStoreStride(Op, CDAG);
|
||||
if (IsLoad) {
|
||||
MVT LegalDataVT = getLegalVectorType(
|
||||
Packing, DataVT.getVectorElementType().getSimpleVT());
|
||||
|
||||
auto NewLoadV = CDAG.getNode(VEISD::VVP_LOAD, {LegalDataVT, MVT::Other},
|
||||
{Chain, BasePtr, StrideV, Mask, AVL});
|
||||
|
||||
if (!PassThru || PassThru->isUndef())
|
||||
return NewLoadV;
|
||||
|
||||
// Convert passthru to an explicit select node.
|
||||
SDValue DataV = CDAG.getNode(VEISD::VVP_SELECT, DataVT,
|
||||
{NewLoadV, PassThru, Mask, AVL});
|
||||
SDValue NewLoadChainV = SDValue(NewLoadV.getNode(), 1);
|
||||
|
||||
// Merge them back into one node.
|
||||
return CDAG.getMergeValues({DataV, NewLoadChainV});
|
||||
}
|
||||
|
||||
// VVP_STORE
|
||||
assert(VVPOpc == VEISD::VVP_STORE);
|
||||
return CDAG.getNode(VEISD::VVP_STORE, Op.getNode()->getVTList(),
|
||||
{Chain, Data, BasePtr, StrideV, Mask, AVL});
|
||||
}
|
||||
|
||||
SDValue VETargetLowering::legalizeInternalVectorOp(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
VECustomDAG CDAG(DAG, Op);
|
||||
|
|
|
@ -18,6 +18,24 @@
|
|||
// TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// vvp_load(ptr, stride, mask, avl)
|
||||
def SDTLoadVVP : SDTypeProfile<1, 4, [
|
||||
SDTCisVec<0>,
|
||||
SDTCisPtrTy<1>,
|
||||
SDTCisInt<2>,
|
||||
SDTCisVec<3>,
|
||||
IsVLVT<4>
|
||||
]>;
|
||||
|
||||
// vvp_store(data, ptr, stride, mask, avl)
|
||||
def SDTStoreVVP: SDTypeProfile<0, 5, [
|
||||
SDTCisVec<0>,
|
||||
SDTCisPtrTy<1>,
|
||||
SDTCisInt<2>,
|
||||
SDTCisVec<3>,
|
||||
IsVLVT<4>
|
||||
]>;
|
||||
|
||||
// Binary Operators {
|
||||
|
||||
// BinaryOp(x,y,mask,vl)
|
||||
|
@ -102,6 +120,12 @@ def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>;
|
|||
|
||||
// } Binary Operators
|
||||
|
||||
def vvp_load : SDNode<"VEISD::VVP_LOAD", SDTLoadVVP,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand ]>;
|
||||
def vvp_store : SDNode<"VEISD::VVP_STORE", SDTStoreVVP,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
||||
|
||||
def vvp_select : SDNode<"VEISD::VVP_SELECT", SDTSelectVVP>;
|
||||
|
||||
// setcc (lhs, rhs, cc, mask, vl)
|
||||
|
|
|
@ -17,6 +17,85 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
include "VVPInstrInfo.td"
|
||||
|
||||
multiclass VectorStore<ValueType DataVT,
|
||||
ValueType PtrVT, ValueType MaskVT,
|
||||
string STWithMask, string STNoMask> {
|
||||
// Unmasked (imm stride).
|
||||
def : Pat<(vvp_store
|
||||
DataVT:$val, PtrVT:$addr,
|
||||
(i64 simm7:$stride), (MaskVT true_mask), i32:$avl),
|
||||
(!cast<Instruction>(STNoMask#"irvl")
|
||||
(LO7 $stride), $addr, $val, $avl)>;
|
||||
// Unmasked.
|
||||
def : Pat<(vvp_store
|
||||
DataVT:$val, PtrVT:$addr,
|
||||
i64:$stride, (MaskVT true_mask), i32:$avl),
|
||||
(!cast<Instruction>(STNoMask#"rrvl")
|
||||
$stride, $addr, $val, $avl)>;
|
||||
// Masked (imm stride).
|
||||
def : Pat<(vvp_store
|
||||
DataVT:$val, PtrVT:$addr,
|
||||
(i64 simm7:$stride), MaskVT:$mask, i32:$avl),
|
||||
(!cast<Instruction>(STWithMask#"irvml")
|
||||
(LO7 $stride), $addr, $val, $mask, $avl)>;
|
||||
// Masked.
|
||||
def : Pat<(vvp_store
|
||||
DataVT:$val, PtrVT:$addr,
|
||||
i64:$stride, MaskVT:$mask, i32:$avl),
|
||||
(!cast<Instruction>(STWithMask#"rrvml")
|
||||
$stride, $addr, $val, $mask, $avl)>;
|
||||
}
|
||||
|
||||
defm : VectorStore<v256f64, i64, v256i1, "VST", "VST">;
|
||||
defm : VectorStore<v256i64, i64, v256i1, "VST", "VST">;
|
||||
defm : VectorStore<v256f32, i64, v256i1, "VSTU", "VSTU">;
|
||||
defm : VectorStore<v256i32, i64, v256i1, "VSTL", "VSTL">;
|
||||
|
||||
multiclass VectorLoad<ValueType DataVT,
|
||||
ValueType PtrVT, ValueType MaskVT,
|
||||
string GTWithMask, string LDNoMask> {
|
||||
// Unmasked (imm stride).
|
||||
def : Pat<(DataVT (vvp_load
|
||||
PtrVT:$addr, (i64 simm7:$stride),
|
||||
(MaskVT true_mask), i32:$avl)),
|
||||
(!cast<Instruction>(LDNoMask#"irl")
|
||||
(LO7 $stride), $addr, $avl)>;
|
||||
// Unmasked.
|
||||
def : Pat<(DataVT (vvp_load
|
||||
PtrVT:$addr, i64:$stride,
|
||||
(MaskVT true_mask), i32:$avl)),
|
||||
(!cast<Instruction>(LDNoMask#"rrl")
|
||||
$stride, PtrVT:$addr, $avl)>;
|
||||
// Masked (imm stride).
|
||||
def : Pat<(DataVT (vvp_load
|
||||
PtrVT:$addr, (i64 simm7:$stride),
|
||||
MaskVT:$mask, i32:$avl)),
|
||||
(!cast<Instruction>(GTWithMask#"vizml")
|
||||
(VADDULrvml $addr,
|
||||
(VMULULivml (LO7 $stride), (VSEQl $avl), $mask, $avl),
|
||||
$mask, $avl),
|
||||
0, 0,
|
||||
$mask,
|
||||
$avl)>;
|
||||
// Masked.
|
||||
def : Pat<(DataVT (vvp_load
|
||||
PtrVT:$addr, i64:$stride, MaskVT:$mask, i32:$avl)),
|
||||
(!cast<Instruction>(GTWithMask#"vizml")
|
||||
(VADDULrvml $addr,
|
||||
(VMULULrvml $stride, (VSEQl $avl), $mask, $avl),
|
||||
$mask, $avl),
|
||||
0, 0,
|
||||
$mask,
|
||||
$avl)>;
|
||||
}
|
||||
|
||||
defm : VectorLoad<v256f64, i64, v256i1, "VGT", "VLD">;
|
||||
defm : VectorLoad<v256i64, i64, v256i1, "VGT", "VLD">;
|
||||
defm : VectorLoad<v256f32, i64, v256i1, "VGTU", "VLDU">;
|
||||
defm : VectorLoad<v256i32, i64, v256i1, "VGTLZX", "VLDLZX">;
|
||||
|
||||
|
||||
|
||||
multiclass Binary_rv<SDPatternOperator OpNode,
|
||||
ValueType ScalarVT, ValueType DataVT,
|
||||
ValueType MaskVT, string OpBaseName> {
|
||||
|
|
|
@ -44,6 +44,9 @@
|
|||
#define REGISTER_PACKED(OPC)
|
||||
#endif
|
||||
|
||||
ADD_VVP_OP(VVP_LOAD,LOAD) HANDLE_VP_TO_VVP(VP_LOAD, VVP_LOAD) REGISTER_PACKED(VVP_LOAD)
|
||||
ADD_VVP_OP(VVP_STORE,STORE) HANDLE_VP_TO_VVP(VP_STORE, VVP_STORE) REGISTER_PACKED(VVP_STORE)
|
||||
|
||||
// Integer arithmetic.
|
||||
ADD_BINARY_VVP_OP_COMPACT(ADD) REGISTER_PACKED(VVP_ADD)
|
||||
ADD_BINARY_VVP_OP_COMPACT(SUB) REGISTER_PACKED(VVP_SUB)
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare <128 x double> @llvm.masked.load.v128f64.p0v128f64(<128 x double>* %0, i32 immarg %1, <128 x i1> %2, <128 x double> %3) #0
|
||||
|
||||
; TODO: Custom widen by lowering to vvp_load in ReplaceNodeResult
|
||||
; Function Attrs: nounwind
|
||||
; define fastcc <128 x double> @vec_mload_v128f64(<128 x double>* %P, <128 x i1> %M) {
|
||||
; %r = call <128 x double> @llvm.masked.load.v128f64.p0v128f64(<128 x double>* %P, i32 16, <128 x i1> %M, <128 x double> undef)
|
||||
; ret <128 x double> %r
|
||||
; }
|
||||
|
||||
|
||||
declare <256 x double> @llvm.masked.load.v256f64.p0v256f64(<256 x double>* %0, i32 immarg %1, <256 x i1> %2, <256 x double> %3) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x double> @vec_mload_v256f64(<256 x double>* %P, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mload_v256f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vseq %v0
|
||||
; CHECK-NEXT: vmulu.l %v0, 8, %v0, %vm1
|
||||
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x double> @llvm.masked.load.v256f64.p0v256f64(<256 x double>* %P, i32 16, <256 x i1> %M, <256 x double> undef)
|
||||
ret <256 x double> %r
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x double> @vec_load_v256f64(<256 x double>* %P) {
|
||||
; CHECK-LABEL: vec_load_v256f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vld %v0, 8, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = load <256 x double>, <256 x double>* %P, align 4
|
||||
ret <256 x double> %r
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x double> @vec_mload_pt_v256f64(<256 x double>* %P, <256 x double> %PT, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mload_pt_v256f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vseq %v1
|
||||
; CHECK-NEXT: vmulu.l %v1, 8, %v1, %vm1
|
||||
; CHECK-NEXT: vaddu.l %v1, %s0, %v1, %vm1
|
||||
; CHECK-NEXT: vgt %v1, %v1, 0, 0, %vm1
|
||||
; CHECK-NEXT: vmrg %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x double> @llvm.masked.load.v256f64.p0v256f64(<256 x double>* %P, i32 16, <256 x i1> %M, <256 x double> %PT)
|
||||
ret <256 x double> %r
|
||||
}
|
||||
|
||||
|
||||
declare <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* %0, i32 immarg %1, <256 x i1> %2, <256 x float> %3) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x float> @vec_mload_v256f32(<256 x float>* %P, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mload_v256f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vseq %v0
|
||||
; CHECK-NEXT: vmulu.l %v0, 4, %v0, %vm1
|
||||
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: vgtu %v0, %v0, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* %P, i32 16, <256 x i1> %M, <256 x float> undef)
|
||||
ret <256 x float> %r
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x float> @vec_mload_pt_v256f32(<256 x float>* %P, <256 x float> %PT, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mload_pt_v256f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vseq %v1
|
||||
; CHECK-NEXT: vmulu.l %v1, 4, %v1, %vm1
|
||||
; CHECK-NEXT: vaddu.l %v1, %s0, %v1, %vm1
|
||||
; CHECK-NEXT: vgtu %v1, %v1, 0, 0, %vm1
|
||||
; CHECK-NEXT: vmrg %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* %P, i32 16, <256 x i1> %M, <256 x float> %PT)
|
||||
ret <256 x float> %r
|
||||
}
|
||||
|
||||
|
||||
declare <256 x i32> @llvm.masked.load.v256i32.p0v256i32(<256 x i32>* %0, i32 immarg %1, <256 x i1> %2, <256 x i32> %3) #0
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i32> @vec_mload_v256i32(<256 x i32>* %P, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mload_v256i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vseq %v0
|
||||
; CHECK-NEXT: vmulu.l %v0, 4, %v0, %vm1
|
||||
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: vgtl.zx %v0, %v0, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x i32> @llvm.masked.load.v256i32.p0v256i32(<256 x i32>* %P, i32 16, <256 x i1> %M, <256 x i32> undef)
|
||||
ret <256 x i32> %r
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define fastcc <256 x i32> @vec_mload_pt_v256i32(<256 x i32>* %P, <256 x i32> %PT, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mload_pt_v256i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vseq %v1
|
||||
; CHECK-NEXT: vmulu.l %v1, 4, %v1, %vm1
|
||||
; CHECK-NEXT: vaddu.l %v1, %s0, %v1, %vm1
|
||||
; CHECK-NEXT: vgtl.zx %v1, %v1, 0, 0, %vm1
|
||||
; CHECK-NEXT: vmrg %v0, %v0, %v1, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x i32> @llvm.masked.load.v256i32.p0v256i32(<256 x i32>* %P, i32 16, <256 x i1> %M, <256 x i32> %PT)
|
||||
ret <256 x i32> %r
|
||||
}
|
||||
|
||||
attributes #0 = { argmemonly nounwind readonly willreturn }
|
|
@ -0,0 +1,43 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare void @llvm.masked.store.v256f64.p0v256f64(<256 x double>, <256 x double>*, i32 immarg, <256 x i1>)
|
||||
|
||||
define fastcc void @vec_mstore_v256f64(<256 x double>* %P, <256 x double> %V, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mstore_v256f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vst %v0, 8, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.masked.store.v256f64.p0v256f64(<256 x double> %V, <256 x double>* %P, i32 16, <256 x i1> %M)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare void @llvm.masked.store.v256f32.p0v256f32(<256 x float>, <256 x float>*, i32 immarg, <256 x i1>)
|
||||
|
||||
define fastcc void @vec_mstore_v256f32(<256 x float>* %P, <256 x float> %V, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mstore_v256f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vstu %v0, 4, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.masked.store.v256f32.p0v256f32(<256 x float> %V, <256 x float>* %P, i32 16, <256 x i1> %M)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare void @llvm.masked.store.v256i32.p0v256i32(<256 x i32>, <256 x i32>*, i32 immarg, <256 x i1>)
|
||||
|
||||
define fastcc void @vec_mstore_v256i32(<256 x i32>* %P, <256 x i32> %V, <256 x i1> %M) {
|
||||
; CHECK-LABEL: vec_mstore_v256i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lea %s1, 256
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vstl %v0, 4, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.masked.store.v256i32.p0v256i32(<256 x i32> %V, <256 x i32>* %P, i32 16, <256 x i1> %M)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue