forked from OSchip/llvm-project
[SVE][CodeGen] Lower scalable masked scatters
Lowers the llvm.masked.scatter intrinsics (scalar plus vector addressing mode only) Changes included in this patch: - Custom lowering for MSCATTER, which chooses the appropriate scatter store opcode to use. Floating-point scatters are cast to integer, with patterns added to match FP reinterpret_casts. - Added the getCanonicalIndexType function to convert redundant addressing modes (e.g. scaling is redundant when accessing bytes) - Tests with 32 & 64-bit scaled & unscaled offsets Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D90941
This commit is contained in:
parent
98aa067109
commit
170947a5de
|
@ -2391,6 +2391,9 @@ public:
|
|||
ISD::MemIndexType getIndexType() const {
|
||||
return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
|
||||
}
|
||||
void setIndexType(ISD::MemIndexType IndexType) {
|
||||
LSBaseSDNodeBits.AddressingMode = IndexType;
|
||||
}
|
||||
bool isIndexScaled() const {
|
||||
return (getIndexType() == ISD::SIGNED_SCALED) ||
|
||||
(getIndexType() == ISD::UNSIGNED_SCALED);
|
||||
|
|
|
@ -4510,6 +4510,10 @@ public:
|
|||
// combiner can fold the new nodes.
|
||||
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
/// Give targets the chance to reduce the number of distinct addresing modes.
|
||||
ISD::MemIndexType getCanonicalIndexType(ISD::MemIndexType IndexType,
|
||||
EVT MemVT, SDValue Offsets) const;
|
||||
|
||||
private:
|
||||
SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
|
||||
const SDLoc &DL, DAGCombinerInfo &DCI) const;
|
||||
|
|
|
@ -1865,6 +1865,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
|
|||
else
|
||||
NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
|
||||
|
||||
N->setIndexType(TLI.getCanonicalIndexType(N->getIndexType(),
|
||||
N->getMemoryVT(), NewOps[OpNo]));
|
||||
} else {
|
||||
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
|
||||
TruncateStore = true;
|
||||
|
|
|
@ -7356,15 +7356,21 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
|
|||
return SDValue(E, 0);
|
||||
}
|
||||
|
||||
IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]);
|
||||
auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
|
||||
VTs, VT, MMO, IndexType, IsTrunc);
|
||||
createOperands(N, Ops);
|
||||
|
||||
assert(N->getMask().getValueType().getVectorNumElements() ==
|
||||
N->getValue().getValueType().getVectorNumElements() &&
|
||||
assert(N->getMask().getValueType().getVectorElementCount() ==
|
||||
N->getValue().getValueType().getVectorElementCount() &&
|
||||
"Vector width mismatch between mask and data");
|
||||
assert(N->getIndex().getValueType().getVectorNumElements() >=
|
||||
N->getValue().getValueType().getVectorNumElements() &&
|
||||
assert(
|
||||
N->getIndex().getValueType().getVectorElementCount().isScalable() ==
|
||||
N->getValue().getValueType().getVectorElementCount().isScalable() &&
|
||||
"Scalable flags of index and data do not match");
|
||||
assert(ElementCount::isKnownGE(
|
||||
N->getIndex().getValueType().getVectorElementCount(),
|
||||
N->getValue().getValueType().getVectorElementCount()) &&
|
||||
"Vector width mismatch between index and data");
|
||||
assert(isa<ConstantSDNode>(N->getScale()) &&
|
||||
cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
|
||||
|
|
|
@ -4297,7 +4297,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
|
|||
if (!UniformBase) {
|
||||
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
|
||||
Index = getValue(Ptr);
|
||||
IndexType = ISD::SIGNED_SCALED;
|
||||
IndexType = ISD::SIGNED_UNSCALED;
|
||||
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
|
||||
}
|
||||
SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
|
||||
|
|
|
@ -7439,6 +7439,25 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// Convert redundant addressing modes (e.g. scaling is redundant
|
||||
// when accessing bytes).
|
||||
ISD::MemIndexType
|
||||
TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
|
||||
SDValue Offsets) const {
|
||||
bool IsScaledIndex =
|
||||
(IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED);
|
||||
bool IsSignedIndex =
|
||||
(IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
|
||||
|
||||
// Scaling is unimportant for bytes, canonicalize to unscaled.
|
||||
if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
|
||||
IsScaledIndex = false;
|
||||
IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
|
||||
}
|
||||
|
||||
return IndexType;
|
||||
}
|
||||
|
||||
SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
|
||||
unsigned Opcode = Node->getOpcode();
|
||||
SDValue LHS = Node->getOperand(0);
|
||||
|
|
|
@ -1001,6 +1001,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
|
||||
setOperationAction(ISD::MSCATTER, VT, Custom);
|
||||
setOperationAction(ISD::MUL, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
|
@ -1052,6 +1053,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
MVT::nxv4f32, MVT::nxv2f64}) {
|
||||
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
|
||||
setOperationAction(ISD::MSCATTER, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
setOperationAction(ISD::FADD, VT, Custom);
|
||||
|
@ -1073,6 +1075,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::FP_ROUND, VT, Custom);
|
||||
}
|
||||
|
||||
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16})
|
||||
setOperationAction(ISD::MSCATTER, VT, Custom);
|
||||
|
||||
setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
|
||||
|
@ -3705,6 +3710,100 @@ bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
|
|||
return ExtVal.getValueType().isScalableVector();
|
||||
}
|
||||
|
||||
unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
|
||||
std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
|
||||
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
|
||||
AArch64ISD::SST1_PRED},
|
||||
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
|
||||
AArch64ISD::SST1_UXTW_PRED},
|
||||
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
|
||||
AArch64ISD::SST1_PRED},
|
||||
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
|
||||
AArch64ISD::SST1_SXTW_PRED},
|
||||
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
|
||||
AArch64ISD::SST1_SCALED_PRED},
|
||||
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
|
||||
AArch64ISD::SST1_UXTW_SCALED_PRED},
|
||||
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
|
||||
AArch64ISD::SST1_SCALED_PRED},
|
||||
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
|
||||
AArch64ISD::SST1_SXTW_SCALED_PRED},
|
||||
};
|
||||
auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
|
||||
return AddrModes.find(Key)->second;
|
||||
}
|
||||
|
||||
bool getScatterIndexIsExtended(SDValue Index) {
|
||||
unsigned Opcode = Index.getOpcode();
|
||||
if (Opcode == ISD::SIGN_EXTEND_INREG)
|
||||
return true;
|
||||
|
||||
if (Opcode == ISD::AND) {
|
||||
SDValue Splat = Index.getOperand(1);
|
||||
if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
|
||||
return false;
|
||||
ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
|
||||
if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
|
||||
assert(MSC && "Can only custom lower scatter store nodes");
|
||||
|
||||
SDValue Index = MSC->getIndex();
|
||||
SDValue Chain = MSC->getChain();
|
||||
SDValue StoreVal = MSC->getValue();
|
||||
SDValue Mask = MSC->getMask();
|
||||
SDValue BasePtr = MSC->getBasePtr();
|
||||
|
||||
ISD::MemIndexType IndexType = MSC->getIndexType();
|
||||
bool IsScaled =
|
||||
IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
|
||||
bool IsSigned =
|
||||
IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
|
||||
bool NeedsExtend =
|
||||
getScatterIndexIsExtended(Index) ||
|
||||
Index.getSimpleValueType().getVectorElementType() == MVT::i32;
|
||||
|
||||
EVT VT = StoreVal.getSimpleValueType();
|
||||
SDVTList VTs = DAG.getVTList(MVT::Other);
|
||||
EVT MemVT = MSC->getMemoryVT();
|
||||
SDValue InputVT = DAG.getValueType(MemVT);
|
||||
|
||||
if (VT.getVectorElementType() == MVT::bf16 &&
|
||||
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
|
||||
return SDValue();
|
||||
|
||||
// Handle FP data
|
||||
if (VT.isFloatingPoint()) {
|
||||
VT = VT.changeVectorElementTypeToInteger();
|
||||
ElementCount EC = VT.getVectorElementCount();
|
||||
auto ScalarIntVT =
|
||||
MVT::getIntegerVT(AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
|
||||
StoreVal = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL,
|
||||
MVT::getVectorVT(ScalarIntVT, EC), StoreVal);
|
||||
|
||||
InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
|
||||
}
|
||||
|
||||
if (getScatterIndexIsExtended(Index)) {
|
||||
if (Index.getOpcode() == ISD::AND)
|
||||
IsSigned = false;
|
||||
Index = Index.getOperand(0);
|
||||
}
|
||||
|
||||
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
|
||||
return DAG.getNode(getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend), DL,
|
||||
VTs, Ops);
|
||||
}
|
||||
|
||||
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
|
||||
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
|
||||
EVT VT, EVT MemVT,
|
||||
|
@ -3982,6 +4081,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
|||
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
||||
case ISD::STORE:
|
||||
return LowerSTORE(Op, DAG);
|
||||
case ISD::MSCATTER:
|
||||
return LowerMSCATTER(Op, DAG);
|
||||
case ISD::VECREDUCE_SEQ_FADD:
|
||||
return LowerVECREDUCE_SEQ_FADD(Op, DAG);
|
||||
case ISD::VECREDUCE_ADD:
|
||||
|
|
|
@ -807,6 +807,8 @@ private:
|
|||
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
bool isEligibleForTailCallOptimization(
|
||||
|
|
|
@ -1191,6 +1191,13 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
|||
def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 2))),
|
||||
(UUNPKHI_ZZ_D ZPR:$Zs)>;
|
||||
|
||||
let Predicates = [HasSVE, HasBF16] in {
|
||||
def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 0))),
|
||||
(UUNPKLO_ZZ_D ZPR:$Zs)>;
|
||||
def : Pat<(nxv2bf16 (extract_subvector (nxv4bf16 ZPR:$Zs), (i64 2))),
|
||||
(UUNPKHI_ZZ_D ZPR:$Zs)>;
|
||||
}
|
||||
|
||||
def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
|
||||
(UUNPKLO_ZZ_S ZPR:$Zs)>;
|
||||
def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
|
||||
|
@ -1769,6 +1776,16 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
|||
def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
|
||||
def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
|
||||
|
||||
def : Pat<(nxv2i64 (reinterpret_cast (nxv2f64 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
|
||||
def : Pat<(nxv2i64 (reinterpret_cast (nxv2f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
|
||||
def : Pat<(nxv2i64 (reinterpret_cast (nxv2f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
|
||||
def : Pat<(nxv4i32 (reinterpret_cast (nxv4f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
|
||||
def : Pat<(nxv4i32 (reinterpret_cast (nxv4f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
|
||||
let Predicates = [HasSVE, HasBF16] in {
|
||||
def : Pat<(nxv2i64 (reinterpret_cast (nxv2bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
|
||||
def : Pat<(nxv4i32 (reinterpret_cast (nxv4bf16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
|
||||
}
|
||||
|
||||
def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)),
|
||||
(AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>;
|
||||
def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)),
|
||||
|
|
|
@ -0,0 +1,370 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; scaled unpacked 32-bit offsets
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
define void @masked_scatter_nxv2i16_sext(<vscale x 2 x i16> %data, i16* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i32_sext(<vscale x 2 x i32> %data, i32* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i64_sext(<vscale x 2 x i64> %data, i64* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, sxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr i64, i64* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f16_sext(<vscale x 2 x half> %data, half* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr half, half* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2bf16_sext(<vscale x 2 x bfloat> %data, bfloat* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2bf16_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr bfloat, bfloat* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f32_sext(<vscale x 2 x float> %data, float* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr float, float* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f64_sext(<vscale x 2 x double> %data, double* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, sxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr double, double* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i16_zext(<vscale x 2 x i16> %data, i16* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i32_zext(<vscale x 2 x i32> %data, i32* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i64_zext(<vscale x 2 x i64> %data, i64* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, uxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr i64, i64* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f16_zext(<vscale x 2 x half> %data, half* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr half, half* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2bf16_zext(<vscale x 2 x bfloat> %data, bfloat* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2bf16_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr bfloat, bfloat* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f32_zext(<vscale x 2 x float> %data, float* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr float, float* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f64_zext(<vscale x 2 x double> %data, double* %base, <vscale x 2 x i32> %indexes, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, uxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 2 x i32> %indexes to <vscale x 2 x i64>
|
||||
%ptrs = getelementptr double, double* %base, <vscale x 2 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; scaled packed 32-bit offset
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
define void @masked_scatter_nxv4i16_sext(<vscale x 4 x i16> %data, i16* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i16_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16> %data, <vscale x 4 x i16*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4i32_sext(<vscale x 4 x i32> %data, i32* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i32_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, sxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr i32, i32* %base, <vscale x 4 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4f16_sext(<vscale x 4 x half> %data, half* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f16_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr half, half* %base, <vscale x 4 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv4f16(<vscale x 4 x half> %data, <vscale x 4 x half*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4bf16_sext(<vscale x 4 x bfloat> %data, bfloat* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4bf16_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr bfloat, bfloat* %base, <vscale x 4 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv4bf16(<vscale x 4 x bfloat> %data, <vscale x 4 x bfloat*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4f32_sext(<vscale x 4 x float> %data, float* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f32_sext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, sxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr float, float* %base, <vscale x 4 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x float*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4i16_zext(<vscale x 4 x i16> %data, i16* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i16_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16> %data, <vscale x 4 x i16*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4i32_zext(<vscale x 4 x i32> %data, i32* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i32_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, uxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr i32, i32* %base, <vscale x 4 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4f16_zext(<vscale x 4 x half> %data, half* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f16_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr half, half* %base, <vscale x 4 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv4f16(<vscale x 4 x half> %data, <vscale x 4 x half*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4bf16_zext(<vscale x 4 x bfloat> %data, bfloat* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4bf16_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr bfloat, bfloat* %base, <vscale x 4 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv4bf16(<vscale x 4 x bfloat> %data, <vscale x 4 x bfloat*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4f32_zext(<vscale x 4 x float> %data, float* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f32_zext:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: uunpkhi z2.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z1.s
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, uxtw #2]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
|
||||
%ptrs = getelementptr float, float* %base, <vscale x 4 x i64> %ext
|
||||
call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x float*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>)
|
||||
attributes #0 = { "target-features"="+sve,+bf16" }
|
|
@ -0,0 +1,577 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; unscaled unpacked 32-bit offsets
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
define void @masked_scatter_nxv2i8_sext_offsets(<vscale x 2 x i8> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i8_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i8*>
|
||||
call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i16_sext_offsets(<vscale x 2 x i16> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
||||
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i32_sext_offsets(<vscale x 2 x i32> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
||||
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i64_sext_offsets(<vscale x 2 x i64> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i64*>
|
||||
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f16_sext_offsets(<vscale x 2 x half> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x half*>
|
||||
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2bf16_sext_offsets(<vscale x 2 x bfloat> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2bf16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x bfloat*>
|
||||
call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f32_sext_offsets(<vscale x 2 x float> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x float*>
|
||||
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f64_sext_offsets(<vscale x 2 x double> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x double*>
|
||||
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i8_zext_offsets(<vscale x 2 x i8> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i8_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i8*>
|
||||
call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i16_zext_offsets(<vscale x 2 x i16> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
||||
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i32_zext_offsets(<vscale x 2 x i32> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
||||
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i64_zext_offsets(<vscale x 2 x i64> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i64*>
|
||||
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f16_zext_offsets(<vscale x 2 x half> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x half*>
|
||||
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2bf16_zext_offsets(<vscale x 2 x bfloat> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2bf16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x bfloat*>
|
||||
call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f32_zext_offsets(<vscale x 2 x float> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x float*>
|
||||
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f64_zext_offsets(<vscale x 2 x double> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x double*>
|
||||
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; unscaled packed 32-bit offsets
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
define void @masked_scatter_nxv4i8_sext_offsets(<vscale x 4 x i8> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i8_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1b { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x i8*>
|
||||
call void @llvm.masked.scatter.nxv4i8(<vscale x 4 x i8> %data, <vscale x 4 x i8*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4i16_sext_offsets(<vscale x 4 x i16> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x i16*>
|
||||
call void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16> %data, <vscale x 4 x i16*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4i32_sext_offsets(<vscale x 4 x i32> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i32_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x i32*>
|
||||
call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4f16_sext_offsets(<vscale x 4 x half> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x half*>
|
||||
call void @llvm.masked.scatter.nxv4f16(<vscale x 4 x half> %data, <vscale x 4 x half*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4bf16_sext_offsets(<vscale x 4 x bfloat> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4bf16_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x bfloat*>
|
||||
call void @llvm.masked.scatter.nxv4bf16(<vscale x 4 x bfloat> %data, <vscale x 4 x bfloat*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4f32_sext_offsets(<vscale x 4 x float> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f32_sext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: sunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: sunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x float*>
|
||||
call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x float*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4i8_zext_offsets(<vscale x 4 x i8> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i8_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1b { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x i8*>
|
||||
call void @llvm.masked.scatter.nxv4i8(<vscale x 4 x i8> %data, <vscale x 4 x i8*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4i16_zext_offsets(<vscale x 4 x i16> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x i16*>
|
||||
call void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16> %data, <vscale x 4 x i16*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4i32_zext_offsets(<vscale x 4 x i32> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4i32_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x i32*>
|
||||
call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i32*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4f16_zext_offsets(<vscale x 4 x half> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x half*>
|
||||
call void @llvm.masked.scatter.nxv4f16(<vscale x 4 x half> %data, <vscale x 4 x half*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4bf16_zext_offsets(<vscale x 4 x bfloat> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4bf16_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x bfloat*>
|
||||
call void @llvm.masked.scatter.nxv4bf16(<vscale x 4 x bfloat> %data, <vscale x 4 x bfloat*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv4f32_zext_offsets(<vscale x 4 x float> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv4f32_zext_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: uunpklo z3.d, z1.s
|
||||
; CHECK-NEXT: uunpkhi z1.d, z1.s
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: add z2.d, z2.d, z3.d
|
||||
; CHECK-NEXT: uunpklo z3.d, z0.s
|
||||
; CHECK-NEXT: uunpkhi z0.d, z0.s
|
||||
; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 4 x i8*> %byte_ptrs to <vscale x 4 x float*>
|
||||
call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x float*> %ptrs, i32 0, <vscale x 4 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>)
|
||||
attributes #0 = { "target-features"="+sve,+bf16" }
|
|
@ -0,0 +1,73 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; scaled 64-bit offsets
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
define void @masked_scatter_nxv2i16(<vscale x 2 x i16> %data, i16* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, lsl #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i16, i16* %base, <vscale x 2 x i64> %offsets
|
||||
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i32(<vscale x 2 x i32> %data, i32* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, lsl #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i32, i32* %base, <vscale x 2 x i64> %offsets
|
||||
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i64(<vscale x 2 x i64> %data, i64* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, lsl #3]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr i64, i64* %base, <vscale x 2 x i64> %offsets
|
||||
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, half* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, lsl #1]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr half, half* %base, <vscale x 2 x i64> %offsets
|
||||
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f32(<vscale x 2 x float> %data, float* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, lsl #2]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr float, float* %base, <vscale x 2 x i64> %offsets
|
||||
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, double* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, lsl #3]
|
||||
; CHECK-NEXT: ret
|
||||
%ptrs = getelementptr double, double* %base, <vscale x 2 x i64> %offsets
|
||||
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
|
|
@ -0,0 +1,132 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; unscaled 64-bit offsets
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
define void @masked_scatter_nxv2i8_unscaled_64bit_offsets(<vscale x 2 x i8> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i8_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i8*>
|
||||
call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i16_unscaled_64bit_offsets(<vscale x 2 x i16> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i16_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
|
||||
call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i32_unscaled_64bit_offsets(<vscale x 2 x i32> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i32_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
|
||||
call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2i64_unscaled_64bit_offsets(<vscale x 2 x i64> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2i64_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i64*>
|
||||
call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f16_unscaled_64bit_offsets(<vscale x 2 x half> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f16_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x half*>
|
||||
call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2bf16_unscaled_64bit_offsets(<vscale x 2 x bfloat> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2bf16_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x bfloat*>
|
||||
call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f32_unscaled_64bit_offsets(<vscale x 2 x float> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f32_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x float*>
|
||||
call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_scatter_nxv2f64_unscaled_64bit_offsets(<vscale x 2 x double> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind #0 {
|
||||
; CHECK-LABEL: masked_scatter_nxv2f64_unscaled_64bit_offsets:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z2.d, x0
|
||||
; CHECK-NEXT: mov x8, xzr
|
||||
; CHECK-NEXT: add z1.d, z2.d, z1.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
|
||||
; CHECK-NEXT: ret
|
||||
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
|
||||
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x double*>
|
||||
call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.scatter.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>)
|
||||
attributes #0 = { "target-features"="+sve,+bf16" }
|
Loading…
Reference in New Issue