forked from OSchip/llvm-project
[LegalizeTypes][VP] Add splitting support for vp.gather and vp.scatter
Split these nodes in a similar way as their masked versions. Reviewed By: frasercrmck, craig.topper Differential Revision: https://reviews.llvm.org/D117760
This commit is contained in:
parent
13252160c3
commit
2233befa5d
|
@ -821,6 +821,9 @@ private:
|
|||
/// Split mask operator of a VP intrinsic.
|
||||
std::pair<SDValue, SDValue> SplitMask(SDValue Mask);
|
||||
|
||||
/// Split mask operator of a VP intrinsic in a given location.
|
||||
std::pair<SDValue, SDValue> SplitMask(SDValue Mask, const SDLoc &DL);
|
||||
|
||||
// Helper function for incrementing the pointer when splitting
|
||||
// memory operations
|
||||
void IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI,
|
||||
|
@ -851,7 +854,8 @@ private:
|
|||
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi,
|
||||
bool SplitSETCC = false);
|
||||
void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
|
@ -879,8 +883,8 @@ private:
|
|||
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo);
|
||||
SDValue SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo);
|
||||
SDValue SplitVecOp_Gather(MemSDNode *MGT, unsigned OpNo);
|
||||
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
|
||||
SDValue SplitVecOp_VSETCC(SDNode *N);
|
||||
SDValue SplitVecOp_FP_ROUND(SDNode *N);
|
||||
|
|
|
@ -944,7 +944,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
|
|||
SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
|
||||
break;
|
||||
case ISD::MGATHER:
|
||||
SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi);
|
||||
case ISD::VP_GATHER:
|
||||
SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true);
|
||||
break;
|
||||
case ISD::SETCC:
|
||||
SplitVecRes_SETCC(N, Lo, Hi);
|
||||
|
@ -1118,12 +1119,17 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
|
|||
}
|
||||
|
||||
std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask) {
|
||||
return SplitMask(Mask, SDLoc(Mask));
|
||||
}
|
||||
|
||||
std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask,
|
||||
const SDLoc &DL) {
|
||||
SDValue MaskLo, MaskHi;
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
else
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
|
||||
return std::make_pair(MaskLo, MaskHi);
|
||||
}
|
||||
|
||||
|
@ -1923,61 +1929,85 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
|||
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
|
||||
SDValue &Lo, SDValue &Hi) {
|
||||
void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
|
||||
SDValue &Hi, bool SplitSETCC) {
|
||||
EVT LoVT, HiVT;
|
||||
SDLoc dl(MGT);
|
||||
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
|
||||
SDLoc dl(N);
|
||||
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
|
||||
|
||||
SDValue Ch = MGT->getChain();
|
||||
SDValue Ptr = MGT->getBasePtr();
|
||||
SDValue Mask = MGT->getMask();
|
||||
SDValue PassThru = MGT->getPassThru();
|
||||
SDValue Index = MGT->getIndex();
|
||||
SDValue Scale = MGT->getScale();
|
||||
EVT MemoryVT = MGT->getMemoryVT();
|
||||
Align Alignment = MGT->getOriginalAlign();
|
||||
ISD::LoadExtType ExtType = MGT->getExtensionType();
|
||||
SDValue Ch = N->getChain();
|
||||
SDValue Ptr = N->getBasePtr();
|
||||
struct Operands {
|
||||
SDValue Mask;
|
||||
SDValue Index;
|
||||
SDValue Scale;
|
||||
} Ops = [&]() -> Operands {
|
||||
if (auto *MSC = dyn_cast<MaskedGatherSDNode>(N)) {
|
||||
return {MSC->getMask(), MSC->getIndex(), MSC->getScale()};
|
||||
}
|
||||
auto *VPSC = cast<VPGatherSDNode>(N);
|
||||
return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale()};
|
||||
}();
|
||||
|
||||
EVT MemoryVT = N->getMemoryVT();
|
||||
Align Alignment = N->getOriginalAlign();
|
||||
|
||||
// Split Mask operand
|
||||
SDValue MaskLo, MaskHi;
|
||||
if (Mask.getOpcode() == ISD::SETCC) {
|
||||
SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
|
||||
if (SplitSETCC && Ops.Mask.getOpcode() == ISD::SETCC) {
|
||||
SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
|
||||
} else {
|
||||
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
else
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
|
||||
std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, dl);
|
||||
}
|
||||
|
||||
EVT LoMemVT, HiMemVT;
|
||||
// Split MemoryVT
|
||||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
|
||||
|
||||
SDValue PassThruLo, PassThruHi;
|
||||
if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(PassThru, PassThruLo, PassThruHi);
|
||||
else
|
||||
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
|
||||
|
||||
SDValue IndexHi, IndexLo;
|
||||
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Index, IndexLo, IndexHi);
|
||||
if (getTypeAction(Ops.Index.getValueType()) ==
|
||||
TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Ops.Index, IndexLo, IndexHi);
|
||||
else
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl);
|
||||
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
|
||||
MGT->getPointerInfo(), MachineMemOperand::MOLoad,
|
||||
MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
|
||||
MGT->getRanges());
|
||||
N->getPointerInfo(), MachineMemOperand::MOLoad,
|
||||
MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
|
||||
|
||||
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
|
||||
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
|
||||
MMO, MGT->getIndexType(), ExtType);
|
||||
if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) {
|
||||
SDValue PassThru = MGT->getPassThru();
|
||||
SDValue PassThruLo, PassThruHi;
|
||||
if (getTypeAction(PassThru.getValueType()) ==
|
||||
TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(PassThru, PassThruLo, PassThruHi);
|
||||
else
|
||||
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
|
||||
|
||||
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
|
||||
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
|
||||
MMO, MGT->getIndexType(), ExtType);
|
||||
ISD::LoadExtType ExtType = MGT->getExtensionType();
|
||||
ISD::MemIndexType IndexTy = MGT->getIndexType();
|
||||
|
||||
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Ops.Scale};
|
||||
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
|
||||
OpsLo, MMO, IndexTy, ExtType);
|
||||
|
||||
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Ops.Scale};
|
||||
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
|
||||
OpsHi, MMO, IndexTy, ExtType);
|
||||
} else {
|
||||
auto *VPGT = cast<VPGatherSDNode>(N);
|
||||
SDValue EVLLo, EVLHi;
|
||||
std::tie(EVLLo, EVLHi) =
|
||||
DAG.SplitEVL(VPGT->getVectorLength(), MemoryVT, dl);
|
||||
|
||||
SDValue OpsLo[] = {Ch, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
|
||||
Lo = DAG.getGatherVP(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
|
||||
MMO, VPGT->getIndexType());
|
||||
|
||||
SDValue OpsHi[] = {Ch, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
|
||||
Hi = DAG.getGatherVP(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
|
||||
MMO, VPGT->getIndexType());
|
||||
}
|
||||
|
||||
// Build a factor node to remember that this load is independent of the
|
||||
// other one.
|
||||
|
@ -1986,10 +2016,9 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
|
|||
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(MGT, 1), Ch);
|
||||
ReplaceValueWith(SDValue(N, 1), Ch);
|
||||
}
|
||||
|
||||
|
||||
void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
|
||||
assert(N->getValueType(0).isVector() &&
|
||||
N->getOperand(0).getValueType().isVector() &&
|
||||
|
@ -2286,10 +2315,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
|
|||
Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
|
||||
break;
|
||||
case ISD::MSCATTER:
|
||||
Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
|
||||
case ISD::VP_SCATTER:
|
||||
Res = SplitVecOp_Scatter(cast<MemSDNode>(N), OpNo);
|
||||
break;
|
||||
case ISD::MGATHER:
|
||||
Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
|
||||
case ISD::VP_GATHER:
|
||||
Res = SplitVecOp_Gather(cast<MemSDNode>(N), OpNo);
|
||||
break;
|
||||
case ISD::VSELECT:
|
||||
Res = SplitVecOp_VSELECT(N, OpNo);
|
||||
|
@ -2663,69 +2694,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
|
|||
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
|
||||
unsigned OpNo) {
|
||||
EVT LoVT, HiVT;
|
||||
SDLoc dl(MGT);
|
||||
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_Gather(MemSDNode *N, unsigned OpNo) {
|
||||
(void)OpNo;
|
||||
SDValue Lo, Hi;
|
||||
SplitVecRes_Gather(N, Lo, Hi);
|
||||
|
||||
SDValue Ch = MGT->getChain();
|
||||
SDValue Ptr = MGT->getBasePtr();
|
||||
SDValue Index = MGT->getIndex();
|
||||
SDValue Scale = MGT->getScale();
|
||||
SDValue Mask = MGT->getMask();
|
||||
SDValue PassThru = MGT->getPassThru();
|
||||
Align Alignment = MGT->getOriginalAlign();
|
||||
ISD::LoadExtType ExtType = MGT->getExtensionType();
|
||||
|
||||
SDValue MaskLo, MaskHi;
|
||||
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
// Split Mask operand
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
else
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
|
||||
|
||||
EVT MemoryVT = MGT->getMemoryVT();
|
||||
EVT LoMemVT, HiMemVT;
|
||||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
|
||||
|
||||
SDValue PassThruLo, PassThruHi;
|
||||
if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(PassThru, PassThruLo, PassThruHi);
|
||||
else
|
||||
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
|
||||
|
||||
SDValue IndexHi, IndexLo;
|
||||
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Index, IndexLo, IndexHi);
|
||||
else
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
|
||||
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
|
||||
MGT->getPointerInfo(), MachineMemOperand::MOLoad,
|
||||
MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
|
||||
MGT->getRanges());
|
||||
|
||||
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
|
||||
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
|
||||
OpsLo, MMO, MGT->getIndexType(), ExtType);
|
||||
|
||||
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
|
||||
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
|
||||
OpsHi, MMO, MGT->getIndexType(), ExtType);
|
||||
|
||||
// Build a factor node to remember that this load is independent of the
|
||||
// other one.
|
||||
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
|
||||
Hi.getValue(1));
|
||||
|
||||
// Legalize the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(MGT, 1), Ch);
|
||||
|
||||
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo,
|
||||
Hi);
|
||||
ReplaceValueWith(SDValue(MGT, 0), Res);
|
||||
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, N, N->getValueType(0), Lo, Hi);
|
||||
ReplaceValueWith(SDValue(N, 0), Res);
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -2886,64 +2861,87 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
|
|||
return Res;
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
|
||||
unsigned OpNo) {
|
||||
SDValue Ch = N->getChain();
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
|
||||
SDValue Ch = N->getChain();
|
||||
SDValue Ptr = N->getBasePtr();
|
||||
SDValue Mask = N->getMask();
|
||||
SDValue Index = N->getIndex();
|
||||
SDValue Scale = N->getScale();
|
||||
SDValue Data = N->getValue();
|
||||
EVT MemoryVT = N->getMemoryVT();
|
||||
Align Alignment = N->getOriginalAlign();
|
||||
SDLoc DL(N);
|
||||
|
||||
struct Operands {
|
||||
SDValue Mask;
|
||||
SDValue Index;
|
||||
SDValue Scale;
|
||||
SDValue Data;
|
||||
} Ops = [&]() -> Operands {
|
||||
if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
|
||||
return {MSC->getMask(), MSC->getIndex(), MSC->getScale(),
|
||||
MSC->getValue()};
|
||||
}
|
||||
auto *VPSC = cast<VPScatterSDNode>(N);
|
||||
return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale(),
|
||||
VPSC->getValue()};
|
||||
}();
|
||||
// Split all operands
|
||||
|
||||
EVT LoMemVT, HiMemVT;
|
||||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
|
||||
|
||||
SDValue DataLo, DataHi;
|
||||
if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
if (getTypeAction(Ops.Data.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
// Split Data operand
|
||||
GetSplitVector(Data, DataLo, DataHi);
|
||||
GetSplitVector(Ops.Data, DataLo, DataHi);
|
||||
else
|
||||
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
|
||||
std::tie(DataLo, DataHi) = DAG.SplitVector(Ops.Data, DL);
|
||||
|
||||
// Split Mask operand
|
||||
SDValue MaskLo, MaskHi;
|
||||
if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
|
||||
SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
|
||||
if (OpNo == 1 && Ops.Mask.getOpcode() == ISD::SETCC) {
|
||||
SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
|
||||
} else {
|
||||
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Mask, MaskLo, MaskHi);
|
||||
else
|
||||
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
|
||||
std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, DL);
|
||||
}
|
||||
|
||||
SDValue IndexHi, IndexLo;
|
||||
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Index, IndexLo, IndexHi);
|
||||
if (getTypeAction(Ops.Index.getValueType()) ==
|
||||
TargetLowering::TypeSplitVector)
|
||||
GetSplitVector(Ops.Index, IndexLo, IndexHi);
|
||||
else
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
|
||||
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL);
|
||||
|
||||
SDValue Lo;
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
|
||||
N->getPointerInfo(), MachineMemOperand::MOStore,
|
||||
MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
|
||||
|
||||
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
|
||||
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT,
|
||||
DL, OpsLo, MMO, N->getIndexType(),
|
||||
N->isTruncatingStore());
|
||||
if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
|
||||
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
|
||||
Lo =
|
||||
DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
|
||||
MSC->getIndexType(), MSC->isTruncatingStore());
|
||||
|
||||
// The order of the Scatter operation after split is well defined. The "Hi"
|
||||
// part comes after the "Lo". So these two operations should be chained one
|
||||
// after another.
|
||||
SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Ops.Scale};
|
||||
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi,
|
||||
MMO, MSC->getIndexType(),
|
||||
MSC->isTruncatingStore());
|
||||
}
|
||||
auto *VPSC = cast<VPScatterSDNode>(N);
|
||||
SDValue EVLLo, EVLHi;
|
||||
std::tie(EVLLo, EVLHi) =
|
||||
DAG.SplitEVL(VPSC->getVectorLength(), Ops.Data.getValueType(), DL);
|
||||
|
||||
SDValue OpsLo[] = {Ch, DataLo, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
|
||||
Lo = DAG.getScatterVP(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
|
||||
VPSC->getIndexType());
|
||||
|
||||
// The order of the Scatter operation after split is well defined. The "Hi"
|
||||
// part comes after the "Lo". So these two operations should be chained one
|
||||
// after another.
|
||||
SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
|
||||
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT,
|
||||
DL, OpsHi, MMO, N->getIndexType(),
|
||||
N->isTruncatingStore());
|
||||
SDValue OpsHi[] = {Lo, DataHi, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
|
||||
return DAG.getScatterVP(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi, MMO,
|
||||
VPSC->getIndexType());
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
|
||||
|
|
|
@ -271,6 +271,54 @@ define <8 x i8> @vpgather_baseidx_v8i8(i8* %base, <8 x i8> %idxs, <8 x i1> %m, i
|
|||
ret <8 x i8> %v
|
||||
}
|
||||
|
||||
declare <32 x i8> @llvm.vp.gather.v32i8.v32p0i8(<32 x i8*>, <32 x i1>, i32)
|
||||
|
||||
define <32 x i8> @vpgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_v32i8:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: li a2, 32
|
||||
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
|
||||
; RV32-NEXT: vsext.vf4 v16, v8
|
||||
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_v32i8:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi a3, a1, -16
|
||||
; RV64-NEXT: vmv1r.v v10, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: bltu a1, a3, .LBB13_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB13_2:
|
||||
; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v12, v8, 16
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf8 v16, v12
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v10, 2
|
||||
; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t
|
||||
; RV64-NEXT: li a2, 16
|
||||
; RV64-NEXT: bltu a1, a2, .LBB13_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB13_4:
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf8 v16, v8
|
||||
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v10
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t
|
||||
; RV64-NEXT: li a0, 32
|
||||
; RV64-NEXT: vsetvli zero, a0, e8, m2, tu, mu
|
||||
; RV64-NEXT: vslideup.vi v8, v12, 16
|
||||
; RV64-NEXT: ret
|
||||
%ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs
|
||||
%v = call <32 x i8> @llvm.vp.gather.v32i8.v32p0i8(<32 x i8*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x i8> %v
|
||||
}
|
||||
|
||||
declare <2 x i16> @llvm.vp.gather.v2i16.v2p0i16(<2 x i16*>, <2 x i1>, i32)
|
||||
|
||||
define <2 x i16> @vpgather_v2i16(<2 x i16*> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
|
||||
|
@ -1870,3 +1918,756 @@ define <8 x double> @vpgather_baseidx_v8f64(double* %base, <8 x i64> %idxs, <8 x
|
|||
%v = call <8 x double> @llvm.vp.gather.v8f64.v8p0f64(<8 x double*> %ptrs, <8 x i1> %m, i32 %evl)
|
||||
ret <8 x double> %v
|
||||
}
|
||||
|
||||
declare <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*>, <32 x i1>, i32)
|
||||
|
||||
define <32 x double> @vpgather_v32f64(<32 x double*> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi a2, a0, -16
|
||||
; RV32-NEXT: vmv1r.v v1, v0
|
||||
; RV32-NEXT: li a1, 0
|
||||
; RV32-NEXT: bltu a0, a2, .LBB86_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a1, a2
|
||||
; RV32-NEXT: .LBB86_2:
|
||||
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v24, v8, 16
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v1, 2
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (zero), v24, v0.t
|
||||
; RV32-NEXT: li a1, 16
|
||||
; RV32-NEXT: bltu a0, a1, .LBB86_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: li a0, 16
|
||||
; RV32-NEXT: .LBB86_4:
|
||||
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v1
|
||||
; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t
|
||||
; RV32-NEXT: vmv.v.v v8, v24
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi a2, a0, -16
|
||||
; RV64-NEXT: vmv1r.v v24, v0
|
||||
; RV64-NEXT: li a1, 0
|
||||
; RV64-NEXT: bltu a0, a2, .LBB86_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a1, a2
|
||||
; RV64-NEXT: .LBB86_2:
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v24, 2
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: bltu a0, a1, .LBB86_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a0, 16
|
||||
; RV64-NEXT: .LBB86_4:
|
||||
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v24
|
||||
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x double> %v
|
||||
}
|
||||
|
||||
define <32 x double> @vpgather_baseidx_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_v32i8_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: li a3, 16
|
||||
; RV32-NEXT: mv a2, a1
|
||||
; RV32-NEXT: bltu a1, a3, .LBB87_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: li a2, 16
|
||||
; RV32-NEXT: .LBB87_2:
|
||||
; RV32-NEXT: li a3, 32
|
||||
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu
|
||||
; RV32-NEXT: vsext.vf4 v16, v8
|
||||
; RV32-NEXT: vsll.vi v16, v16, 3
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
|
||||
; RV32-NEXT: addi a3, a1, -16
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: bltu a1, a3, .LBB87_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a2, a3
|
||||
; RV32-NEXT: .LBB87_4:
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v0, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v24, v16, 16
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_v32i8_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi a3, a1, -16
|
||||
; RV64-NEXT: vmv1r.v v10, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: bltu a1, a3, .LBB87_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB87_2:
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v10, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v12, v8, 16
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf8 v16, v12
|
||||
; RV64-NEXT: vsll.vi v16, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
|
||||
; RV64-NEXT: li a2, 16
|
||||
; RV64-NEXT: bltu a1, a2, .LBB87_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB87_4:
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf8 v24, v8
|
||||
; RV64-NEXT: vsll.vi v24, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v10
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i8> %idxs
|
||||
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x double> %v
|
||||
}
|
||||
|
||||
define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vmv1r.v v10, v0
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: vsetivli zero, 16, e8, m2, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v12, v8, 16
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a3, a1, -16
|
||||
; RV32-NEXT: vsext.vf8 v16, v12
|
||||
; RV32-NEXT: bltu a1, a3, .LBB88_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a2, a3
|
||||
; RV32-NEXT: .LBB88_2:
|
||||
; RV32-NEXT: vsext.vf8 v24, v8
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v10, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v16, v16, 3
|
||||
; RV32-NEXT: vsetvli a3, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v12, v16, 0
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t
|
||||
; RV32-NEXT: li a2, 16
|
||||
; RV32-NEXT: bltu a1, a2, .LBB88_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: li a1, 16
|
||||
; RV32-NEXT: .LBB88_4:
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v24, v24, 3
|
||||
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v4, v24, 0
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v10
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v4, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vmv1r.v v10, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v12, v8, 16
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a3, a1, -16
|
||||
; RV64-NEXT: vsext.vf8 v16, v12
|
||||
; RV64-NEXT: bltu a1, a3, .LBB88_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB88_2:
|
||||
; RV64-NEXT: vsext.vf8 v24, v8
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v10, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v16, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
|
||||
; RV64-NEXT: li a2, 16
|
||||
; RV64-NEXT: bltu a1, a2, .LBB88_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB88_4:
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v24, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v10
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = sext <32 x i8> %idxs to <32 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
|
||||
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x double> %v
|
||||
}
|
||||
|
||||
define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vmv1r.v v10, v0
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: vsetivli zero, 16, e8, m2, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v12, v8, 16
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a3, a1, -16
|
||||
; RV32-NEXT: vzext.vf8 v16, v12
|
||||
; RV32-NEXT: bltu a1, a3, .LBB89_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a2, a3
|
||||
; RV32-NEXT: .LBB89_2:
|
||||
; RV32-NEXT: vzext.vf8 v24, v8
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v10, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v16, v16, 3
|
||||
; RV32-NEXT: vsetvli a3, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v12, v16, 0
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v12, v0.t
|
||||
; RV32-NEXT: li a2, 16
|
||||
; RV32-NEXT: bltu a1, a2, .LBB89_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: li a1, 16
|
||||
; RV32-NEXT: .LBB89_4:
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v24, v24, 3
|
||||
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v4, v24, 0
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v10
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v4, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vmv1r.v v10, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v12, v8, 16
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a3, a1, -16
|
||||
; RV64-NEXT: vzext.vf8 v16, v12
|
||||
; RV64-NEXT: bltu a1, a3, .LBB89_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB89_2:
|
||||
; RV64-NEXT: vzext.vf8 v24, v8
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v10, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v16, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
|
||||
; RV64-NEXT: li a2, 16
|
||||
; RV64-NEXT: bltu a1, a2, .LBB89_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB89_4:
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v24, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v10
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = zext <32 x i8> %idxs to <32 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
|
||||
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x double> %v
|
||||
}
|
||||
|
||||
define <32 x double> @vpgather_baseidx_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_v32i16_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: li a3, 16
|
||||
; RV32-NEXT: mv a2, a1
|
||||
; RV32-NEXT: bltu a1, a3, .LBB90_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: li a2, 16
|
||||
; RV32-NEXT: .LBB90_2:
|
||||
; RV32-NEXT: li a3, 32
|
||||
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu
|
||||
; RV32-NEXT: vsext.vf2 v16, v8
|
||||
; RV32-NEXT: vsll.vi v16, v16, 3
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
|
||||
; RV32-NEXT: addi a3, a1, -16
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: bltu a1, a3, .LBB90_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a2, a3
|
||||
; RV32-NEXT: .LBB90_4:
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v0, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v24, v16, 16
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_v32i16_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi a3, a1, -16
|
||||
; RV64-NEXT: vmv1r.v v12, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: bltu a1, a3, .LBB90_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB90_2:
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v12, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v16, v8, 16
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf4 v24, v16
|
||||
; RV64-NEXT: vsll.vi v16, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
|
||||
; RV64-NEXT: li a2, 16
|
||||
; RV64-NEXT: bltu a1, a2, .LBB90_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB90_4:
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf4 v24, v8
|
||||
; RV64-NEXT: vsll.vi v24, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v12
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i16> %idxs
|
||||
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x double> %v
|
||||
}
|
||||
|
||||
define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vmv1r.v v12, v0
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v24, v8, 16
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a3, a1, -16
|
||||
; RV32-NEXT: vsext.vf4 v16, v24
|
||||
; RV32-NEXT: bltu a1, a3, .LBB91_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a2, a3
|
||||
; RV32-NEXT: .LBB91_2:
|
||||
; RV32-NEXT: vsext.vf4 v24, v8
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v12, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v16, v16, 3
|
||||
; RV32-NEXT: vsetvli a3, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v8, v16, 0
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
|
||||
; RV32-NEXT: li a2, 16
|
||||
; RV32-NEXT: bltu a1, a2, .LBB91_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: li a1, 16
|
||||
; RV32-NEXT: .LBB91_4:
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v24, v24, 3
|
||||
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v4, v24, 0
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v12
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v4, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vmv1r.v v12, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v24, v8, 16
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a3, a1, -16
|
||||
; RV64-NEXT: vsext.vf4 v16, v24
|
||||
; RV64-NEXT: bltu a1, a3, .LBB91_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB91_2:
|
||||
; RV64-NEXT: vsext.vf4 v24, v8
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v12, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v16, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
|
||||
; RV64-NEXT: li a2, 16
|
||||
; RV64-NEXT: bltu a1, a2, .LBB91_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB91_4:
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v24, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v12
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = sext <32 x i16> %idxs to <32 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
|
||||
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x double> %v
|
||||
}
|
||||
|
||||
define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vmv1r.v v12, v0
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: vsetivli zero, 16, e16, m4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v24, v8, 16
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a3, a1, -16
|
||||
; RV32-NEXT: vzext.vf4 v16, v24
|
||||
; RV32-NEXT: bltu a1, a3, .LBB92_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a2, a3
|
||||
; RV32-NEXT: .LBB92_2:
|
||||
; RV32-NEXT: vzext.vf4 v24, v8
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v12, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v16, v16, 3
|
||||
; RV32-NEXT: vsetvli a3, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v8, v16, 0
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
|
||||
; RV32-NEXT: li a2, 16
|
||||
; RV32-NEXT: bltu a1, a2, .LBB92_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: li a1, 16
|
||||
; RV32-NEXT: .LBB92_4:
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v24, v24, 3
|
||||
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v4, v24, 0
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v12
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v4, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vmv1r.v v12, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v24, v8, 16
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a3, a1, -16
|
||||
; RV64-NEXT: vzext.vf4 v16, v24
|
||||
; RV64-NEXT: bltu a1, a3, .LBB92_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB92_2:
|
||||
; RV64-NEXT: vzext.vf4 v24, v8
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v12, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v16, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
|
||||
; RV64-NEXT: li a2, 16
|
||||
; RV64-NEXT: bltu a1, a2, .LBB92_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB92_4:
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v24, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v12
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = zext <32 x i16> %idxs to <32 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
|
||||
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x double> %v
|
||||
}
|
||||
|
||||
define <32 x double> @vpgather_baseidx_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_v32i32_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: li a3, 16
|
||||
; RV32-NEXT: mv a2, a1
|
||||
; RV32-NEXT: bltu a1, a3, .LBB93_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: li a2, 16
|
||||
; RV32-NEXT: .LBB93_2:
|
||||
; RV32-NEXT: li a3, 32
|
||||
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v16, v8, 3
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t
|
||||
; RV32-NEXT: addi a3, a1, -16
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: bltu a1, a3, .LBB93_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a2, a3
|
||||
; RV32-NEXT: .LBB93_4:
|
||||
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v24, v16, 16
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v0, 2
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_v32i32_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi a3, a1, -16
|
||||
; RV64-NEXT: vmv1r.v v1, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: bltu a1, a3, .LBB93_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB93_2:
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v1, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v16, v8, 16
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf2 v24, v16
|
||||
; RV64-NEXT: vsll.vi v16, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
|
||||
; RV64-NEXT: li a2, 16
|
||||
; RV64-NEXT: bltu a1, a2, .LBB93_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB93_4:
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf2 v24, v8
|
||||
; RV64-NEXT: vsll.vi v8, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v1
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i32> %idxs
|
||||
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x double> %v
|
||||
}
|
||||
|
||||
define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vmv1r.v v1, v0
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v24, v8, 16
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a3, a1, -16
|
||||
; RV32-NEXT: vsext.vf2 v16, v24
|
||||
; RV32-NEXT: bltu a1, a3, .LBB94_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a2, a3
|
||||
; RV32-NEXT: .LBB94_2:
|
||||
; RV32-NEXT: vsext.vf2 v24, v8
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v1, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v8, v16, 3
|
||||
; RV32-NEXT: vsetvli a3, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v4, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v4, v0.t
|
||||
; RV32-NEXT: li a2, 16
|
||||
; RV32-NEXT: bltu a1, a2, .LBB94_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: li a1, 16
|
||||
; RV32-NEXT: .LBB94_4:
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v8, v24, 3
|
||||
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v24, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v1
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vmv1r.v v1, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v24, v8, 16
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a3, a1, -16
|
||||
; RV64-NEXT: vsext.vf2 v16, v24
|
||||
; RV64-NEXT: bltu a1, a3, .LBB94_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB94_2:
|
||||
; RV64-NEXT: vsext.vf2 v24, v8
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v1, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v8, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
|
||||
; RV64-NEXT: li a2, 16
|
||||
; RV64-NEXT: bltu a1, a2, .LBB94_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB94_4:
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v8, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v1
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = sext <32 x i32> %idxs to <32 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
|
||||
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x double> %v
|
||||
}
|
||||
|
||||
define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vmv1r.v v1, v0
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v24, v8, 16
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a3, a1, -16
|
||||
; RV32-NEXT: vzext.vf2 v16, v24
|
||||
; RV32-NEXT: bltu a1, a3, .LBB95_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a2, a3
|
||||
; RV32-NEXT: .LBB95_2:
|
||||
; RV32-NEXT: vzext.vf2 v24, v8
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v1, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v8, v16, 3
|
||||
; RV32-NEXT: vsetvli a3, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v4, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v4, v0.t
|
||||
; RV32-NEXT: li a2, 16
|
||||
; RV32-NEXT: bltu a1, a2, .LBB95_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: li a1, 16
|
||||
; RV32-NEXT: .LBB95_4:
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v8, v24, 3
|
||||
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v24, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v1
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vmv1r.v v1, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v24, v8, 16
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a3, a1, -16
|
||||
; RV64-NEXT: vzext.vf2 v16, v24
|
||||
; RV64-NEXT: bltu a1, a3, .LBB95_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB95_2:
|
||||
; RV64-NEXT: vzext.vf2 v24, v8
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v1, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v8, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t
|
||||
; RV64-NEXT: li a2, 16
|
||||
; RV64-NEXT: bltu a1, a2, .LBB95_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB95_4:
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v8, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v1
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = zext <32 x i32> %idxs to <32 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
|
||||
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x double> %v
|
||||
}
|
||||
|
||||
define <32 x double> @vpgather_baseidx_v32f64(double* %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi a3, a1, -16
|
||||
; RV32-NEXT: vmv1r.v v24, v0
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: bltu a1, a3, .LBB96_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a2, a3
|
||||
; RV32-NEXT: .LBB96_2:
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v24, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v16, v16, 3
|
||||
; RV32-NEXT: vsetvli a3, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v28, v16, 0
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t
|
||||
; RV32-NEXT: li a2, 16
|
||||
; RV32-NEXT: bltu a1, a2, .LBB96_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: li a1, 16
|
||||
; RV32-NEXT: .LBB96_4:
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v8, v8, 3
|
||||
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v28, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v24
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v28, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi a3, a1, -16
|
||||
; RV64-NEXT: vmv1r.v v24, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: bltu a1, a3, .LBB96_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB96_2:
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v24, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v16, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
|
||||
; RV64-NEXT: li a2, 16
|
||||
; RV64-NEXT: bltu a1, a2, .LBB96_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB96_4:
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v8, v8, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v24
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i64> %idxs
|
||||
%v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret <32 x double> %v
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m -riscv-v-vector-bits-min=128 \
|
||||
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -riscv-v-vector-bits-min=128 \
|
||||
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
|
||||
|
||||
declare void @llvm.vp.scatter.v2i8.v2p0i8(<2 x i8>, <2 x i8*>, <2 x i1>, i32)
|
||||
|
@ -1716,3 +1716,470 @@ define void @vpscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64>
|
|||
call void @llvm.vp.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, <8 x i1> %m, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double>, <32 x double*>, <32 x i1>, i32)
|
||||
|
||||
define void @vpscatter_v32f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpscatter_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: li a2, 32
|
||||
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
|
||||
; RV32-NEXT: vle32.v v24, (a0)
|
||||
; RV32-NEXT: li a0, 16
|
||||
; RV32-NEXT: mv a2, a1
|
||||
; RV32-NEXT: bltu a1, a0, .LBB79_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: li a2, 16
|
||||
; RV32-NEXT: .LBB79_2:
|
||||
; RV32-NEXT: li a0, 0
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a2, a1, -16
|
||||
; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t
|
||||
; RV32-NEXT: bltu a1, a2, .LBB79_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a0, a2
|
||||
; RV32-NEXT: .LBB79_4:
|
||||
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v8, v24, 16
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v0, 2
|
||||
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsoxei32.v v16, (zero), v8, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpscatter_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -16
|
||||
; RV64-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 4
|
||||
; RV64-NEXT: sub sp, sp, a1
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vle64.v v24, (a0)
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 3
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: addi a1, sp, 16
|
||||
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: li a3, 16
|
||||
; RV64-NEXT: addi a0, a0, 128
|
||||
; RV64-NEXT: mv a1, a2
|
||||
; RV64-NEXT: bltu a2, a3, .LBB79_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB79_2:
|
||||
; RV64-NEXT: li a3, 0
|
||||
; RV64-NEXT: vle64.v v16, (a0)
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a0, a2, -16
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 3
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
|
||||
; RV64-NEXT: bltu a2, a0, .LBB79_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a3, a0
|
||||
; RV64-NEXT: .LBB79_4:
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v0, 2
|
||||
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a0, sp, 16
|
||||
; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
|
||||
; RV64-NEXT: csrr a0, vlenb
|
||||
; RV64-NEXT: slli a0, a0, 4
|
||||
; RV64-NEXT: add sp, sp, a0
|
||||
; RV64-NEXT: addi sp, sp, 16
|
||||
; RV64-NEXT: ret
|
||||
call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpscatter_baseidx_v32i32_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: li a3, 32
|
||||
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu
|
||||
; RV32-NEXT: vle32.v v24, (a1)
|
||||
; RV32-NEXT: li a3, 16
|
||||
; RV32-NEXT: mv a1, a2
|
||||
; RV32-NEXT: bltu a2, a3, .LBB80_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: li a1, 16
|
||||
; RV32-NEXT: .LBB80_2:
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: vsll.vi v24, v24, 3
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a1, a2, -16
|
||||
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
|
||||
; RV32-NEXT: bltu a2, a1, .LBB80_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a3, a1
|
||||
; RV32-NEXT: .LBB80_4:
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v0, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v8, v24, 16
|
||||
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpscatter_baseidx_v32i32_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -16
|
||||
; RV64-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV64-NEXT: csrr a3, vlenb
|
||||
; RV64-NEXT: slli a4, a3, 3
|
||||
; RV64-NEXT: add a3, a4, a3
|
||||
; RV64-NEXT: sub sp, sp, a3
|
||||
; RV64-NEXT: li a3, 32
|
||||
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, mu
|
||||
; RV64-NEXT: vle32.v v24, (a1)
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: addi a3, sp, 16
|
||||
; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: mv a3, a2
|
||||
; RV64-NEXT: bltu a2, a1, .LBB80_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: li a3, 16
|
||||
; RV64-NEXT: .LBB80_2:
|
||||
; RV64-NEXT: li a1, 0
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: csrr a4, vlenb
|
||||
; RV64-NEXT: add a4, sp, a4
|
||||
; RV64-NEXT: addi a4, a4, 16
|
||||
; RV64-NEXT: vl8re8.v v0, (a4) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsext.vf2 v24, v0
|
||||
; RV64-NEXT: vsll.vi v24, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a3, a2, -16
|
||||
; RV64-NEXT: addi a4, sp, 16
|
||||
; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
|
||||
; RV64-NEXT: bltu a2, a3, .LBB80_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a1, a3
|
||||
; RV64-NEXT: .LBB80_4:
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v0, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV64-NEXT: csrr a2, vlenb
|
||||
; RV64-NEXT: add a2, sp, a2
|
||||
; RV64-NEXT: addi a2, a2, 16
|
||||
; RV64-NEXT: vl8re8.v v8, (a2) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vslidedown.vi v8, v8, 16
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf2 v24, v8
|
||||
; RV64-NEXT: vsll.vi v8, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
|
||||
; RV64-NEXT: csrr a0, vlenb
|
||||
; RV64-NEXT: slli a1, a0, 3
|
||||
; RV64-NEXT: add a0, a1, a0
|
||||
; RV64-NEXT: add sp, sp, a0
|
||||
; RV64-NEXT: addi sp, sp, 16
|
||||
; RV64-NEXT: ret
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i32> %idxs
|
||||
call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV32-NEXT: csrr a3, vlenb
|
||||
; RV32-NEXT: li a4, 24
|
||||
; RV32-NEXT: mul a3, a3, a4
|
||||
; RV32-NEXT: sub sp, sp, a3
|
||||
; RV32-NEXT: li a3, 32
|
||||
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu
|
||||
; RV32-NEXT: vle32.v v24, (a1)
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: slli a1, a1, 4
|
||||
; RV32-NEXT: add a1, sp, a1
|
||||
; RV32-NEXT: addi a1, a1, 16
|
||||
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: slli a1, a1, 3
|
||||
; RV32-NEXT: add a1, sp, a1
|
||||
; RV32-NEXT: addi a1, a1, 16
|
||||
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v8, v24, 16
|
||||
; RV32-NEXT: addi a1, sp, 16
|
||||
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: li a3, 16
|
||||
; RV32-NEXT: vsext.vf2 v8, v24
|
||||
; RV32-NEXT: mv a1, a2
|
||||
; RV32-NEXT: bltu a2, a3, .LBB81_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: li a1, 16
|
||||
; RV32-NEXT: .LBB81_2:
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: addi a4, sp, 16
|
||||
; RV32-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload
|
||||
; RV32-NEXT: vsext.vf2 v16, v24
|
||||
; RV32-NEXT: vsll.vi v8, v8, 3
|
||||
; RV32-NEXT: vsetvli a4, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v24, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a1, a2, -16
|
||||
; RV32-NEXT: csrr a4, vlenb
|
||||
; RV32-NEXT: slli a4, a4, 3
|
||||
; RV32-NEXT: add a4, sp, a4
|
||||
; RV32-NEXT: addi a4, a4, 16
|
||||
; RV32-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload
|
||||
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
|
||||
; RV32-NEXT: bltu a2, a1, .LBB81_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a3, a1
|
||||
; RV32-NEXT: .LBB81_4:
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v0, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v8, v16, 3
|
||||
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v16, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: slli a1, a1, 4
|
||||
; RV32-NEXT: add a1, sp, a1
|
||||
; RV32-NEXT: addi a1, a1, 16
|
||||
; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
|
||||
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
||||
; RV32-NEXT: csrr a0, vlenb
|
||||
; RV32-NEXT: li a1, 24
|
||||
; RV32-NEXT: mul a0, a0, a1
|
||||
; RV32-NEXT: add sp, sp, a0
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -16
|
||||
; RV64-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV64-NEXT: csrr a3, vlenb
|
||||
; RV64-NEXT: li a4, 24
|
||||
; RV64-NEXT: mul a3, a3, a4
|
||||
; RV64-NEXT: sub sp, sp, a3
|
||||
; RV64-NEXT: li a3, 32
|
||||
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, mu
|
||||
; RV64-NEXT: vle32.v v24, (a1)
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 4
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 3
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v8, v24, 16
|
||||
; RV64-NEXT: addi a1, sp, 16
|
||||
; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: li a3, 16
|
||||
; RV64-NEXT: vsext.vf2 v8, v24
|
||||
; RV64-NEXT: mv a1, a2
|
||||
; RV64-NEXT: bltu a2, a3, .LBB81_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB81_2:
|
||||
; RV64-NEXT: li a3, 0
|
||||
; RV64-NEXT: addi a4, sp, 16
|
||||
; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsext.vf2 v16, v24
|
||||
; RV64-NEXT: vsll.vi v8, v8, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a1, a2, -16
|
||||
; RV64-NEXT: csrr a4, vlenb
|
||||
; RV64-NEXT: slli a4, a4, 3
|
||||
; RV64-NEXT: add a4, sp, a4
|
||||
; RV64-NEXT: addi a4, a4, 16
|
||||
; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t
|
||||
; RV64-NEXT: bltu a2, a1, .LBB81_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a3, a1
|
||||
; RV64-NEXT: .LBB81_4:
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v0, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v8, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 4
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
|
||||
; RV64-NEXT: csrr a0, vlenb
|
||||
; RV64-NEXT: li a1, 24
|
||||
; RV64-NEXT: mul a0, a0, a1
|
||||
; RV64-NEXT: add sp, sp, a0
|
||||
; RV64-NEXT: addi sp, sp, 16
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = sext <32 x i32> %idxs to <32 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
|
||||
call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV32-NEXT: csrr a3, vlenb
|
||||
; RV32-NEXT: li a4, 24
|
||||
; RV32-NEXT: mul a3, a3, a4
|
||||
; RV32-NEXT: sub sp, sp, a3
|
||||
; RV32-NEXT: li a3, 32
|
||||
; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, mu
|
||||
; RV32-NEXT: vle32.v v24, (a1)
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: slli a1, a1, 4
|
||||
; RV32-NEXT: add a1, sp, a1
|
||||
; RV32-NEXT: addi a1, a1, 16
|
||||
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: slli a1, a1, 3
|
||||
; RV32-NEXT: add a1, sp, a1
|
||||
; RV32-NEXT: addi a1, a1, 16
|
||||
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v8, v24, 16
|
||||
; RV32-NEXT: addi a1, sp, 16
|
||||
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: li a3, 16
|
||||
; RV32-NEXT: vzext.vf2 v8, v24
|
||||
; RV32-NEXT: mv a1, a2
|
||||
; RV32-NEXT: bltu a2, a3, .LBB82_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: li a1, 16
|
||||
; RV32-NEXT: .LBB82_2:
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: addi a4, sp, 16
|
||||
; RV32-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload
|
||||
; RV32-NEXT: vzext.vf2 v16, v24
|
||||
; RV32-NEXT: vsll.vi v8, v8, 3
|
||||
; RV32-NEXT: vsetvli a4, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v24, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a1, a2, -16
|
||||
; RV32-NEXT: csrr a4, vlenb
|
||||
; RV32-NEXT: slli a4, a4, 3
|
||||
; RV32-NEXT: add a4, sp, a4
|
||||
; RV32-NEXT: addi a4, a4, 16
|
||||
; RV32-NEXT: vl8re8.v v8, (a4) # Unknown-size Folded Reload
|
||||
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
|
||||
; RV32-NEXT: bltu a2, a1, .LBB82_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a3, a1
|
||||
; RV32-NEXT: .LBB82_4:
|
||||
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV32-NEXT: vslidedown.vi v0, v0, 2
|
||||
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v8, v16, 3
|
||||
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v16, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: slli a1, a1, 4
|
||||
; RV32-NEXT: add a1, sp, a1
|
||||
; RV32-NEXT: addi a1, a1, 16
|
||||
; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
|
||||
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
||||
; RV32-NEXT: csrr a0, vlenb
|
||||
; RV32-NEXT: li a1, 24
|
||||
; RV32-NEXT: mul a0, a0, a1
|
||||
; RV32-NEXT: add sp, sp, a0
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -16
|
||||
; RV64-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV64-NEXT: csrr a3, vlenb
|
||||
; RV64-NEXT: li a4, 24
|
||||
; RV64-NEXT: mul a3, a3, a4
|
||||
; RV64-NEXT: sub sp, sp, a3
|
||||
; RV64-NEXT: li a3, 32
|
||||
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, mu
|
||||
; RV64-NEXT: vle32.v v24, (a1)
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 4
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 3
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v8, v24, 16
|
||||
; RV64-NEXT: addi a1, sp, 16
|
||||
; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: li a3, 16
|
||||
; RV64-NEXT: vzext.vf2 v8, v24
|
||||
; RV64-NEXT: mv a1, a2
|
||||
; RV64-NEXT: bltu a2, a3, .LBB82_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: li a1, 16
|
||||
; RV64-NEXT: .LBB82_2:
|
||||
; RV64-NEXT: li a3, 0
|
||||
; RV64-NEXT: addi a4, sp, 16
|
||||
; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vzext.vf2 v16, v24
|
||||
; RV64-NEXT: vsll.vi v8, v8, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a1, a2, -16
|
||||
; RV64-NEXT: csrr a4, vlenb
|
||||
; RV64-NEXT: slli a4, a4, 3
|
||||
; RV64-NEXT: add a4, sp, a4
|
||||
; RV64-NEXT: addi a4, a4, 16
|
||||
; RV64-NEXT: vl8re8.v v24, (a4) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t
|
||||
; RV64-NEXT: bltu a2, a1, .LBB82_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a3, a1
|
||||
; RV64-NEXT: .LBB82_4:
|
||||
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vi v0, v0, 2
|
||||
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v8, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 4
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
|
||||
; RV64-NEXT: csrr a0, vlenb
|
||||
; RV64-NEXT: li a1, 24
|
||||
; RV64-NEXT: mul a0, a0, a1
|
||||
; RV64-NEXT: add sp, sp, a0
|
||||
; RV64-NEXT: addi sp, sp, 16
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = zext <32 x i32> %idxs to <32 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
|
||||
call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -251,6 +251,107 @@ define <vscale x 8 x i8> @vpgather_baseidx_nxv8i8(i8* %base, <vscale x 8 x i8> %
|
|||
ret <vscale x 8 x i8> %v
|
||||
}
|
||||
|
||||
declare <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*>, <vscale x 32 x i1>, i32)
|
||||
|
||||
define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(i8* %base, <vscale x 32 x i8> %idxs, <vscale x 32 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_nxv32i8:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vmv1r.v v12, v0
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: csrr a2, vlenb
|
||||
; RV32-NEXT: srli a5, a2, 2
|
||||
; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, mu
|
||||
; RV32-NEXT: slli a2, a2, 1
|
||||
; RV32-NEXT: sub a4, a1, a2
|
||||
; RV32-NEXT: vslidedown.vx v0, v0, a5
|
||||
; RV32-NEXT: bltu a1, a4, .LBB12_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a3, a4
|
||||
; RV32-NEXT: .LBB12_2:
|
||||
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, mu
|
||||
; RV32-NEXT: vsext.vf4 v24, v10
|
||||
; RV32-NEXT: vsetvli zero, a3, e8, m2, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v18, (a0), v24, v0.t
|
||||
; RV32-NEXT: bltu a1, a2, .LBB12_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a1, a2
|
||||
; RV32-NEXT: .LBB12_4:
|
||||
; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, mu
|
||||
; RV32-NEXT: vsext.vf4 v24, v8
|
||||
; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v12
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t
|
||||
; RV32-NEXT: vmv4r.v v8, v16
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_nxv32i8:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: csrr a3, vlenb
|
||||
; RV64-NEXT: slli t0, a3, 1
|
||||
; RV64-NEXT: sub a4, a1, t0
|
||||
; RV64-NEXT: vmv1r.v v12, v0
|
||||
; RV64-NEXT: li t1, 0
|
||||
; RV64-NEXT: li a7, 0
|
||||
; RV64-NEXT: bltu a1, a4, .LBB12_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a7, a4
|
||||
; RV64-NEXT: .LBB12_2:
|
||||
; RV64-NEXT: sub a4, a7, a3
|
||||
; RV64-NEXT: mv a2, t1
|
||||
; RV64-NEXT: bltu a7, a4, .LBB12_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a2, a4
|
||||
; RV64-NEXT: .LBB12_4:
|
||||
; RV64-NEXT: srli a4, a3, 2
|
||||
; RV64-NEXT: vsetvli a5, zero, e8, mf2, ta, mu
|
||||
; RV64-NEXT: vslidedown.vx v13, v12, a4
|
||||
; RV64-NEXT: srli a6, a3, 3
|
||||
; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vx v0, v13, a6
|
||||
; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf8 v24, v11
|
||||
; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v19, (a0), v24, v0.t
|
||||
; RV64-NEXT: bltu a1, t0, .LBB12_6
|
||||
; RV64-NEXT: # %bb.5:
|
||||
; RV64-NEXT: mv a1, t0
|
||||
; RV64-NEXT: .LBB12_6:
|
||||
; RV64-NEXT: sub a2, a1, a3
|
||||
; RV64-NEXT: bltu a1, a2, .LBB12_8
|
||||
; RV64-NEXT: # %bb.7:
|
||||
; RV64-NEXT: mv t1, a2
|
||||
; RV64-NEXT: .LBB12_8:
|
||||
; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu
|
||||
; RV64-NEXT: vslidedown.vx v0, v12, a6
|
||||
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf8 v24, v9
|
||||
; RV64-NEXT: vsetvli zero, t1, e8, m1, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v17, (a0), v24, v0.t
|
||||
; RV64-NEXT: bltu a1, a3, .LBB12_10
|
||||
; RV64-NEXT: # %bb.9:
|
||||
; RV64-NEXT: mv a1, a3
|
||||
; RV64-NEXT: .LBB12_10:
|
||||
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf8 v24, v8
|
||||
; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v12
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v24, v0.t
|
||||
; RV64-NEXT: bltu a7, a3, .LBB12_12
|
||||
; RV64-NEXT: # %bb.11:
|
||||
; RV64-NEXT: mv a7, a3
|
||||
; RV64-NEXT: .LBB12_12:
|
||||
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf8 v24, v10
|
||||
; RV64-NEXT: vsetvli zero, a7, e8, m1, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v13
|
||||
; RV64-NEXT: vluxei64.v v18, (a0), v24, v0.t
|
||||
; RV64-NEXT: vmv4r.v v8, v16
|
||||
; RV64-NEXT: ret
|
||||
%ptrs = getelementptr inbounds i8, i8* %base, <vscale x 32 x i8> %idxs
|
||||
%v = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*> %ptrs, <vscale x 32 x i1> %m, i32 %evl)
|
||||
ret <vscale x 32 x i8> %v
|
||||
}
|
||||
|
||||
declare <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*>, <vscale x 1 x i1>, i32)
|
||||
|
||||
define <vscale x 1 x i16> @vpgather_nxv1i16(<vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
|
||||
|
@ -2232,3 +2333,264 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8f64(double* %base, <vscale x
|
|||
%v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
||||
ret <vscale x 8 x double> %v
|
||||
}
|
||||
|
||||
declare <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 16 x i1>, i32)
|
||||
|
||||
define <vscale x 16 x double> @vpgather_nxv16f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_nxv16f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vmv1r.v v24, v0
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: srli a4, a1, 3
|
||||
; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
|
||||
; RV32-NEXT: sub a3, a0, a1
|
||||
; RV32-NEXT: vslidedown.vx v0, v0, a4
|
||||
; RV32-NEXT: bltu a0, a3, .LBB102_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a2, a3
|
||||
; RV32-NEXT: .LBB102_2:
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (zero), v12, v0.t
|
||||
; RV32-NEXT: bltu a0, a1, .LBB102_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a0, a1
|
||||
; RV32-NEXT: .LBB102_4:
|
||||
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v24
|
||||
; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t
|
||||
; RV32-NEXT: vmv.v.v v8, v24
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_nxv16f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vmv1r.v v24, v0
|
||||
; RV64-NEXT: li a2, 0
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: srli a4, a1, 3
|
||||
; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
|
||||
; RV64-NEXT: sub a3, a0, a1
|
||||
; RV64-NEXT: vslidedown.vx v0, v0, a4
|
||||
; RV64-NEXT: bltu a0, a3, .LBB102_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a2, a3
|
||||
; RV64-NEXT: .LBB102_2:
|
||||
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t
|
||||
; RV64-NEXT: bltu a0, a1, .LBB102_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a0, a1
|
||||
; RV64-NEXT: .LBB102_4:
|
||||
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v24
|
||||
; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
||||
ret <vscale x 16 x double> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x double> @vpgather_baseidx_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vmv1r.v v12, v0
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: csrr a2, vlenb
|
||||
; RV32-NEXT: srli a5, a2, 3
|
||||
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, mu
|
||||
; RV32-NEXT: sub a4, a1, a2
|
||||
; RV32-NEXT: vslidedown.vx v0, v0, a5
|
||||
; RV32-NEXT: bltu a1, a4, .LBB103_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a3, a4
|
||||
; RV32-NEXT: .LBB103_2:
|
||||
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, mu
|
||||
; RV32-NEXT: vsext.vf2 v16, v8
|
||||
; RV32-NEXT: vsll.vi v24, v16, 3
|
||||
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t
|
||||
; RV32-NEXT: bltu a1, a2, .LBB103_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a1, a2
|
||||
; RV32-NEXT: .LBB103_4:
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v12
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vmv1r.v v12, v0
|
||||
; RV64-NEXT: li a3, 0
|
||||
; RV64-NEXT: csrr a2, vlenb
|
||||
; RV64-NEXT: srli a5, a2, 3
|
||||
; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu
|
||||
; RV64-NEXT: sub a4, a1, a2
|
||||
; RV64-NEXT: vslidedown.vx v0, v0, a5
|
||||
; RV64-NEXT: bltu a1, a4, .LBB103_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a3, a4
|
||||
; RV64-NEXT: .LBB103_2:
|
||||
; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf4 v16, v10
|
||||
; RV64-NEXT: vsll.vi v16, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
|
||||
; RV64-NEXT: bltu a1, a2, .LBB103_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a1, a2
|
||||
; RV64-NEXT: .LBB103_4:
|
||||
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf4 v24, v8
|
||||
; RV64-NEXT: vsll.vi v24, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v12
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs
|
||||
%v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
||||
ret <vscale x 16 x double> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x double> @vpgather_baseidx_sext_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vmv1r.v v12, v0
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsext.vf4 v16, v10
|
||||
; RV32-NEXT: csrr a2, vlenb
|
||||
; RV32-NEXT: srli a5, a2, 3
|
||||
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, mu
|
||||
; RV32-NEXT: sub a4, a1, a2
|
||||
; RV32-NEXT: vslidedown.vx v0, v0, a5
|
||||
; RV32-NEXT: bltu a1, a4, .LBB104_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a3, a4
|
||||
; RV32-NEXT: .LBB104_2:
|
||||
; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsext.vf4 v24, v8
|
||||
; RV32-NEXT: vsll.vi v16, v16, 3
|
||||
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v8, v16, 0
|
||||
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
|
||||
; RV32-NEXT: bltu a1, a2, .LBB104_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a1, a2
|
||||
; RV32-NEXT: .LBB104_4:
|
||||
; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v24, v24, 3
|
||||
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v4, v24, 0
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v12
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v4, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vmv1r.v v12, v0
|
||||
; RV64-NEXT: li a3, 0
|
||||
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf4 v16, v10
|
||||
; RV64-NEXT: csrr a2, vlenb
|
||||
; RV64-NEXT: srli a5, a2, 3
|
||||
; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu
|
||||
; RV64-NEXT: sub a4, a1, a2
|
||||
; RV64-NEXT: vslidedown.vx v0, v0, a5
|
||||
; RV64-NEXT: bltu a1, a4, .LBB104_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a3, a4
|
||||
; RV64-NEXT: .LBB104_2:
|
||||
; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf4 v24, v8
|
||||
; RV64-NEXT: vsll.vi v16, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
|
||||
; RV64-NEXT: bltu a1, a2, .LBB104_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a1, a2
|
||||
; RV64-NEXT: .LBB104_4:
|
||||
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v24, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v12
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
|
||||
%v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
||||
ret <vscale x 16 x double> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vmv1r.v v12, v0
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu
|
||||
; RV32-NEXT: vzext.vf4 v16, v10
|
||||
; RV32-NEXT: csrr a2, vlenb
|
||||
; RV32-NEXT: srli a5, a2, 3
|
||||
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, mu
|
||||
; RV32-NEXT: sub a4, a1, a2
|
||||
; RV32-NEXT: vslidedown.vx v0, v0, a5
|
||||
; RV32-NEXT: bltu a1, a4, .LBB105_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a3, a4
|
||||
; RV32-NEXT: .LBB105_2:
|
||||
; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, mu
|
||||
; RV32-NEXT: vzext.vf4 v24, v8
|
||||
; RV32-NEXT: vsll.vi v16, v16, 3
|
||||
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v8, v16, 0
|
||||
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t
|
||||
; RV32-NEXT: bltu a1, a2, .LBB105_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a1, a2
|
||||
; RV32-NEXT: .LBB105_4:
|
||||
; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v24, v24, 3
|
||||
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v4, v24, 0
|
||||
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV32-NEXT: vmv1r.v v0, v12
|
||||
; RV32-NEXT: vluxei32.v v8, (a0), v4, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vmv1r.v v12, v0
|
||||
; RV64-NEXT: li a3, 0
|
||||
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vzext.vf4 v16, v10
|
||||
; RV64-NEXT: csrr a2, vlenb
|
||||
; RV64-NEXT: srli a5, a2, 3
|
||||
; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu
|
||||
; RV64-NEXT: sub a4, a1, a2
|
||||
; RV64-NEXT: vslidedown.vx v0, v0, a5
|
||||
; RV64-NEXT: bltu a1, a4, .LBB105_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a3, a4
|
||||
; RV64-NEXT: .LBB105_2:
|
||||
; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vzext.vf4 v24, v8
|
||||
; RV64-NEXT: vsll.vi v16, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t
|
||||
; RV64-NEXT: bltu a1, a2, .LBB105_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a1, a2
|
||||
; RV64-NEXT: .LBB105_4:
|
||||
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v24, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
|
||||
; RV64-NEXT: vmv1r.v v0, v12
|
||||
; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
|
||||
%v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
||||
ret <vscale x 16 x double> %v
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v \
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m \
|
||||
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v \
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m \
|
||||
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
|
||||
|
||||
declare void @llvm.vp.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, <vscale x 1 x i1>, i32)
|
||||
|
@ -2071,3 +2071,367 @@ define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base
|
|||
call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double>, <vscale x 16 x double*>, <vscale x 16 x i1>, i32)
|
||||
|
||||
define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpscatter_nxv16f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vl8re32.v v24, (a0)
|
||||
; RV32-NEXT: csrr a0, vlenb
|
||||
; RV32-NEXT: mv a2, a1
|
||||
; RV32-NEXT: bltu a1, a0, .LBB95_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a2, a0
|
||||
; RV32-NEXT: .LBB95_2:
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t
|
||||
; RV32-NEXT: srli a2, a0, 3
|
||||
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, mu
|
||||
; RV32-NEXT: sub a0, a1, a0
|
||||
; RV32-NEXT: vslidedown.vx v0, v0, a2
|
||||
; RV32-NEXT: bltu a1, a0, .LBB95_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a3, a0
|
||||
; RV32-NEXT: .LBB95_4:
|
||||
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpscatter_nxv16f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -16
|
||||
; RV64-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 3
|
||||
; RV64-NEXT: sub sp, sp, a1
|
||||
; RV64-NEXT: addi a1, sp, 16
|
||||
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: vl8re64.v v16, (a0)
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a3, a1, 3
|
||||
; RV64-NEXT: add a0, a0, a3
|
||||
; RV64-NEXT: mv a3, a2
|
||||
; RV64-NEXT: bltu a2, a1, .LBB95_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a3, a1
|
||||
; RV64-NEXT: .LBB95_2:
|
||||
; RV64-NEXT: li a4, 0
|
||||
; RV64-NEXT: vl8re64.v v24, (a0)
|
||||
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
|
||||
; RV64-NEXT: srli a3, a1, 3
|
||||
; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
|
||||
; RV64-NEXT: sub a0, a2, a1
|
||||
; RV64-NEXT: vslidedown.vx v0, v0, a3
|
||||
; RV64-NEXT: bltu a2, a0, .LBB95_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a4, a0
|
||||
; RV64-NEXT: .LBB95_4:
|
||||
; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a0, sp, 16
|
||||
; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
|
||||
; RV64-NEXT: csrr a0, vlenb
|
||||
; RV64-NEXT: slli a0, a0, 3
|
||||
; RV64-NEXT: add sp, sp, a0
|
||||
; RV64-NEXT: addi sp, sp, 16
|
||||
; RV64-NEXT: ret
|
||||
call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vl4re16.v v4, (a1)
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: mv a3, a2
|
||||
; RV32-NEXT: bltu a2, a1, .LBB96_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a3, a1
|
||||
; RV32-NEXT: .LBB96_2:
|
||||
; RV32-NEXT: li a4, 0
|
||||
; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, mu
|
||||
; RV32-NEXT: vsext.vf2 v24, v4
|
||||
; RV32-NEXT: vsll.vi v24, v24, 3
|
||||
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
|
||||
; RV32-NEXT: srli a3, a1, 3
|
||||
; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu
|
||||
; RV32-NEXT: sub a1, a2, a1
|
||||
; RV32-NEXT: vslidedown.vx v0, v0, a3
|
||||
; RV32-NEXT: bltu a2, a1, .LBB96_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a4, a1
|
||||
; RV32-NEXT: .LBB96_4:
|
||||
; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: vl4re16.v v4, (a1)
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: mv a3, a2
|
||||
; RV64-NEXT: bltu a2, a1, .LBB96_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a3, a1
|
||||
; RV64-NEXT: .LBB96_2:
|
||||
; RV64-NEXT: li a4, 0
|
||||
; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf4 v24, v4
|
||||
; RV64-NEXT: vsll.vi v24, v24, 3
|
||||
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
|
||||
; RV64-NEXT: srli a3, a1, 3
|
||||
; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu
|
||||
; RV64-NEXT: sub a1, a2, a1
|
||||
; RV64-NEXT: vslidedown.vx v0, v0, a3
|
||||
; RV64-NEXT: bltu a2, a1, .LBB96_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a4, a1
|
||||
; RV64-NEXT: .LBB96_4:
|
||||
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsext.vf4 v8, v6
|
||||
; RV64-NEXT: vsll.vi v8, v8, 3
|
||||
; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
|
||||
; RV64-NEXT: ret
|
||||
%ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs
|
||||
call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV32-NEXT: csrr a3, vlenb
|
||||
; RV32-NEXT: slli a3, a3, 4
|
||||
; RV32-NEXT: sub sp, sp, a3
|
||||
; RV32-NEXT: vl4re16.v v24, (a1)
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: slli a1, a1, 3
|
||||
; RV32-NEXT: add a1, sp, a1
|
||||
; RV32-NEXT: addi a1, a1, 16
|
||||
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
||||
; RV32-NEXT: addi a1, sp, 16
|
||||
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: vsext.vf4 v8, v24
|
||||
; RV32-NEXT: mv a3, a2
|
||||
; RV32-NEXT: bltu a2, a1, .LBB97_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a3, a1
|
||||
; RV32-NEXT: .LBB97_2:
|
||||
; RV32-NEXT: li a4, 0
|
||||
; RV32-NEXT: vsext.vf4 v16, v26
|
||||
; RV32-NEXT: vsll.vi v8, v8, 3
|
||||
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v24, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a3, sp, 16
|
||||
; RV32-NEXT: vl8re8.v v8, (a3) # Unknown-size Folded Reload
|
||||
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
|
||||
; RV32-NEXT: srli a3, a1, 3
|
||||
; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu
|
||||
; RV32-NEXT: sub a1, a2, a1
|
||||
; RV32-NEXT: vslidedown.vx v0, v0, a3
|
||||
; RV32-NEXT: bltu a2, a1, .LBB97_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a4, a1
|
||||
; RV32-NEXT: .LBB97_4:
|
||||
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v8, v16, 3
|
||||
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v16, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, mu
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: slli a1, a1, 3
|
||||
; RV32-NEXT: add a1, sp, a1
|
||||
; RV32-NEXT: addi a1, a1, 16
|
||||
; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
|
||||
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
||||
; RV32-NEXT: csrr a0, vlenb
|
||||
; RV32-NEXT: slli a0, a0, 4
|
||||
; RV32-NEXT: add sp, sp, a0
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -16
|
||||
; RV64-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV64-NEXT: csrr a3, vlenb
|
||||
; RV64-NEXT: slli a3, a3, 4
|
||||
; RV64-NEXT: sub sp, sp, a3
|
||||
; RV64-NEXT: vl4re16.v v24, (a1)
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 3
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: addi a1, sp, 16
|
||||
; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: vsext.vf4 v8, v24
|
||||
; RV64-NEXT: mv a3, a2
|
||||
; RV64-NEXT: bltu a2, a1, .LBB97_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a3, a1
|
||||
; RV64-NEXT: .LBB97_2:
|
||||
; RV64-NEXT: li a4, 0
|
||||
; RV64-NEXT: vsext.vf4 v16, v26
|
||||
; RV64-NEXT: vsll.vi v8, v8, 3
|
||||
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a3, sp, 16
|
||||
; RV64-NEXT: vl8re8.v v24, (a3) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t
|
||||
; RV64-NEXT: srli a3, a1, 3
|
||||
; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu
|
||||
; RV64-NEXT: sub a1, a2, a1
|
||||
; RV64-NEXT: vslidedown.vx v0, v0, a3
|
||||
; RV64-NEXT: bltu a2, a1, .LBB97_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a4, a1
|
||||
; RV64-NEXT: .LBB97_4:
|
||||
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v8, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 3
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
|
||||
; RV64-NEXT: csrr a0, vlenb
|
||||
; RV64-NEXT: slli a0, a0, 4
|
||||
; RV64-NEXT: add sp, sp, a0
|
||||
; RV64-NEXT: addi sp, sp, 16
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
|
||||
call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
|
||||
; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: addi sp, sp, -16
|
||||
; RV32-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV32-NEXT: csrr a3, vlenb
|
||||
; RV32-NEXT: slli a3, a3, 4
|
||||
; RV32-NEXT: sub sp, sp, a3
|
||||
; RV32-NEXT: vl4re16.v v24, (a1)
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: slli a1, a1, 3
|
||||
; RV32-NEXT: add a1, sp, a1
|
||||
; RV32-NEXT: addi a1, a1, 16
|
||||
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
||||
; RV32-NEXT: addi a1, sp, 16
|
||||
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: vzext.vf4 v8, v24
|
||||
; RV32-NEXT: mv a3, a2
|
||||
; RV32-NEXT: bltu a2, a1, .LBB98_2
|
||||
; RV32-NEXT: # %bb.1:
|
||||
; RV32-NEXT: mv a3, a1
|
||||
; RV32-NEXT: .LBB98_2:
|
||||
; RV32-NEXT: li a4, 0
|
||||
; RV32-NEXT: vzext.vf4 v16, v26
|
||||
; RV32-NEXT: vsll.vi v8, v8, 3
|
||||
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v24, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV32-NEXT: addi a3, sp, 16
|
||||
; RV32-NEXT: vl8re8.v v8, (a3) # Unknown-size Folded Reload
|
||||
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
|
||||
; RV32-NEXT: srli a3, a1, 3
|
||||
; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu
|
||||
; RV32-NEXT: sub a1, a2, a1
|
||||
; RV32-NEXT: vslidedown.vx v0, v0, a3
|
||||
; RV32-NEXT: bltu a2, a1, .LBB98_4
|
||||
; RV32-NEXT: # %bb.3:
|
||||
; RV32-NEXT: mv a4, a1
|
||||
; RV32-NEXT: .LBB98_4:
|
||||
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu
|
||||
; RV32-NEXT: vsll.vi v8, v16, 3
|
||||
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
|
||||
; RV32-NEXT: vnsrl.wi v16, v8, 0
|
||||
; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, mu
|
||||
; RV32-NEXT: csrr a1, vlenb
|
||||
; RV32-NEXT: slli a1, a1, 3
|
||||
; RV32-NEXT: add a1, sp, a1
|
||||
; RV32-NEXT: addi a1, a1, 16
|
||||
; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
|
||||
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
|
||||
; RV32-NEXT: csrr a0, vlenb
|
||||
; RV32-NEXT: slli a0, a0, 4
|
||||
; RV32-NEXT: add sp, sp, a0
|
||||
; RV32-NEXT: addi sp, sp, 16
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
|
||||
; RV64: # %bb.0:
|
||||
; RV64-NEXT: addi sp, sp, -16
|
||||
; RV64-NEXT: .cfi_def_cfa_offset 16
|
||||
; RV64-NEXT: csrr a3, vlenb
|
||||
; RV64-NEXT: slli a3, a3, 4
|
||||
; RV64-NEXT: sub sp, sp, a3
|
||||
; RV64-NEXT: vl4re16.v v24, (a1)
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 3
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: addi a1, sp, 16
|
||||
; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
|
||||
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: vzext.vf4 v8, v24
|
||||
; RV64-NEXT: mv a3, a2
|
||||
; RV64-NEXT: bltu a2, a1, .LBB98_2
|
||||
; RV64-NEXT: # %bb.1:
|
||||
; RV64-NEXT: mv a3, a1
|
||||
; RV64-NEXT: .LBB98_2:
|
||||
; RV64-NEXT: li a4, 0
|
||||
; RV64-NEXT: vzext.vf4 v16, v26
|
||||
; RV64-NEXT: vsll.vi v8, v8, 3
|
||||
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu
|
||||
; RV64-NEXT: addi a3, sp, 16
|
||||
; RV64-NEXT: vl8re8.v v24, (a3) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t
|
||||
; RV64-NEXT: srli a3, a1, 3
|
||||
; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu
|
||||
; RV64-NEXT: sub a1, a2, a1
|
||||
; RV64-NEXT: vslidedown.vx v0, v0, a3
|
||||
; RV64-NEXT: bltu a2, a1, .LBB98_4
|
||||
; RV64-NEXT: # %bb.3:
|
||||
; RV64-NEXT: mv a4, a1
|
||||
; RV64-NEXT: .LBB98_4:
|
||||
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu
|
||||
; RV64-NEXT: vsll.vi v8, v16, 3
|
||||
; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu
|
||||
; RV64-NEXT: csrr a1, vlenb
|
||||
; RV64-NEXT: slli a1, a1, 3
|
||||
; RV64-NEXT: add a1, sp, a1
|
||||
; RV64-NEXT: addi a1, a1, 16
|
||||
; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
|
||||
; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t
|
||||
; RV64-NEXT: csrr a0, vlenb
|
||||
; RV64-NEXT: slli a0, a0, 4
|
||||
; RV64-NEXT: add sp, sp, a0
|
||||
; RV64-NEXT: addi sp, sp, 16
|
||||
; RV64-NEXT: ret
|
||||
%eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
|
||||
%ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
|
||||
call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue