[Aarch64][SVE] Add intrinsics for scatter stores

Summary:
This patch adds the following SVE intrinsics for scatter stores:
* 64-bit offsets:
  * @llvm.aarch64.sve.st1.scatter (unscaled)
  * @llvm.aarch64.sve.st1.scatter.index (scaled)
* 32-bit unscaled offsets:
  * @llvm.aarch64.sve.st1.scatter.uxtw (zero-extended offset)
  * @llvm.aarch64.sve.st1.scatter.sxtw (sign-extended-offset)
* 32-bit scaled offsets:
  * @llvm.aarch64.sve.st1.scatter.uxtw.index (zero-extended offset)
  * @llvm.aarch64.sve.st1.scatter.sxtw.index (sign-extended offset)
* vector base + immediate:
  * @llvm.aarch64.sve.st1.scatter.imm

Reviewers: rengolin, efriedma, sdesmalen

Reviewed By: efriedma, sdesmalen

Subscribers: kmclaughlin, eli.friedman, tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71074
This commit is contained in:
Andrzej Warzynski 2019-12-16 11:51:10 +00:00
parent 22caa3cfbc
commit 7e20c3a71d
11 changed files with 1057 additions and 60 deletions

View File

@ -1064,6 +1064,35 @@ class AdvSIMD_1VectorArg_Imm_Intrinsic
llvm_i32_ty],
[IntrNoMem, ImmArg<1>]>;
class AdvSIMD_ScatterStore_64bitOffset_Intrinsic
: Intrinsic<[],
[
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMPointerToElt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i64_ty>
],
[IntrWriteMem, IntrArgMemOnly]>;
class AdvSIMD_ScatterStore_32bitOffset_Intrinsic
: Intrinsic<[],
[
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMPointerToElt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
],
[IntrWriteMem, IntrArgMemOnly]>;
class AdvSIMD_ScatterStore_VectorBase_Intrinsic
: Intrinsic<[],
[
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty, llvm_i64_ty
],
[IntrWriteMem, IntrArgMemOnly, ImmArg<3>]>;
//
// Loads
//
@ -1406,6 +1435,36 @@ def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_32bitOffset_Intri
// vector base + immediate index
def int_aarch64_sve_ld1_gather_imm : AdvSIMD_GatherLoad_VecTorBase_Intrinsic;
//
// Scatter stores:
//
// scalar + vector, 64 bit unscaled offsets
def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_64bitOffset_Intrinsic;
// scalar + vector, 64 bit scaled offsets
def int_aarch64_sve_st1_scatter_index
: AdvSIMD_ScatterStore_64bitOffset_Intrinsic;
// scalar + vector, 32 bit unscaled offsets, sign (sxtw) or zero (zxtw)
// extended to 64 bits
def int_aarch64_sve_st1_scatter_sxtw
: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;
def int_aarch64_sve_st1_scatter_uxtw
: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;
// scalar + vector, 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended
// to 64 bits
def int_aarch64_sve_st1_scatter_sxtw_index
: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;
def int_aarch64_sve_st1_scatter_uxtw_index
: AdvSIMD_ScatterStore_32bitOffset_Intrinsic;
// vector base + immediate index
def int_aarch64_sve_st1_scatter_imm : AdvSIMD_ScatterStore_VectorBase_Intrinsic;
//
// SVE2 - Non-widening pairwise arithmetic
//

View File

@ -1357,6 +1357,13 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";
case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";
case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";
case AArch64ISD::SST1: return "AArch64ISD::SST1";
case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";
}
return nullptr;
}
@ -12080,6 +12087,75 @@ static MVT getSVEContainerType(EVT ContentTy) {
}
}
static SDValue performST1ScatterCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode,
bool OnlyPackedOffsets = true) {
const SDValue Src = N->getOperand(2);
const EVT SrcVT = Src->getValueType(0);
assert(SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors");
SDLoc DL(N);
MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();
// Make sure that source data will fit into an SVE register
if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();
// For FPs, ACLE only supports _packed_ single and double precision types.
if (SrcElVT.isFloatingPoint())
if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
return SDValue();
// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)
const SDValue Base = N->getOperand(4);
// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(5);
auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();
// Some scatter store variants allow unpacked offsets, but only as nxv2i32
// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
// nxv2i64. Legalize accordingly.
if (!OnlyPackedOffsets &&
Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
if (!TLI.isTypeLegal(Offset.getValueType()))
return SDValue();
// Source value type that is representable in hardware
EVT HwSrcVt = getSVEContainerType(SrcVT);
// Keep the original type of the input data to store - this is needed to
// differentiate between ST1B, ST1H, ST1W and ST1D. For FP values we want the
// integer equivalent, so just use HwSrcVt.
SDValue InputVT = DAG.getValueType(SrcVT);
if (SrcVT.isFloatingPoint())
InputVT = DAG.getValueType(HwSrcVt);
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue SrcNew;
if (Src.getValueType().isFloatingPoint())
SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
else
SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);
SDValue Ops[] = {N->getOperand(0), // Chain
SrcNew,
N->getOperand(3), // Pg
Base,
Offset,
InputVT};
return DAG.getNode(Opcode, DL, VTs, Ops);
}
static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode) {
EVT RetVT = N->getValueType(0);
@ -12300,6 +12376,24 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED);
case Intrinsic::aarch64_sve_ld1_gather_imm:
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM);
case Intrinsic::aarch64_sve_st1_scatter:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1);
case Intrinsic::aarch64_sve_st1_scatter_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SCALED);
case Intrinsic::aarch64_sve_st1_scatter_sxtw:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_imm:
return performST1ScatterCombine(N, DAG, AArch64ISD::SST1_IMM);
default:
break;
}

View File

@ -223,6 +223,14 @@ enum NodeType : unsigned {
GLD1S_UXTW_SCALED,
GLD1S_SXTW_SCALED,
GLD1S_IMM,
// Scatter store
SST1,
SST1_SCALED,
SST1_UXTW,
SST1_SXTW,
SST1_UXTW_SCALED,
SST1_SXTW_SCALED,
SST1_IMM,
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,

View File

@ -393,6 +393,27 @@ def uimm5s8 : Operand<i64>, ImmLeaf<i64,
let PrintMethod = "printImmScale<8>";
}
// tuimm5sN predicate - similiar to uimm5sN, but use TImmLeaf (TargetConstant)
// instead of ImmLeaf (Constant)
def tuimm5s2 : Operand<i64>, TImmLeaf<i64,
[{ return Imm >= 0 && Imm < (32*2) && ((Imm % 2) == 0); }],
UImmS2XForm> {
let ParserMatchClass = UImm5s2Operand;
let PrintMethod = "printImmScale<2>";
}
def tuimm5s4 : Operand<i64>, TImmLeaf<i64,
[{ return Imm >= 0 && Imm < (32*4) && ((Imm % 4) == 0); }],
UImmS4XForm> {
let ParserMatchClass = UImm5s4Operand;
let PrintMethod = "printImmScale<4>";
}
def tuimm5s8 : Operand<i64>, TImmLeaf<i64,
[{ return Imm >= 0 && Imm < (32*8) && ((Imm % 8) == 0); }],
UImmS8XForm> {
let ParserMatchClass = UImm5s8Operand;
let PrintMethod = "printImmScale<8>";
}
// uimm6sN predicate - True if the immediate is a multiple of N in the range
// [0 * N, 64 * N].
def UImm6s1Operand : UImmScaledMemoryIndexed<6, 1>;
@ -750,6 +771,14 @@ def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_31Operand;
}
// timm0_31 predicate - same ass imm0_31, but use TargetConstant (TimmLeaf)
// instead of Contant (ImmLeaf)
def timm0_31 : Operand<i64>, TImmLeaf<i64, [{
return ((uint64_t)Imm) < 32;
}]> {
let ParserMatchClass = Imm0_31Operand;
}
// True if the 32-bit immediate is in the range [0,31]
def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{
return ((uint64_t)Imm) < 32;

View File

@ -20,6 +20,24 @@ def SDT_AArch64_GLD1_IMM : SDTypeProfile<1, 4, [
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
def SDT_AArch64_SST1 : SDTypeProfile<0, 5, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
def SDT_AArch64_SST1_IMM : SDTypeProfile<0, 5, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>, SDTCisVT<4, OtherVT>,
SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>
]>;
def AArch64st1_scatter : SDNode<"AArch64ISD::SST1", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED", SDT_AArch64_SST1, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM", SDT_AArch64_SST1_IMM, [SDNPHasChain, SDNPMayStore, SDNPOptInGlue]>;
def AArch64ld1_gather : SDNode<"AArch64ISD::GLD1", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_scaled : SDNode<"AArch64ISD::GLD1_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
def AArch64ld1_gather_uxtw : SDNode<"AArch64ISD::GLD1_UXTW", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
@ -584,51 +602,55 @@ let Predicates = [HasSVE] in {
defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>;
defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
// Scatters using unscaled 32-bit offsets, e.g.
// st1h z0.s, p0, [x0, z0.s, uxtw]
// and unpacked:
// Scatters using unpacked, unscaled 32-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, uxtw]
defm SST1B_D : sve_mem_sst_sv_32_unscaled<0b000, "st1b", Z_d, ZPR64, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only>;
defm SST1B_S : sve_mem_sst_sv_32_unscaled<0b001, "st1b", Z_s, ZPR32, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only>;
defm SST1H_D : sve_mem_sst_sv_32_unscaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
defm SST1H_S : sve_mem_sst_sv_32_unscaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
defm SST1W_D : sve_mem_sst_sv_32_unscaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
defm SST1W : sve_mem_sst_sv_32_unscaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW8, ZPR32ExtUXTW8>;
defm SST1D : sve_mem_sst_sv_32_unscaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW8, ZPR64ExtUXTW8>;
defm SST1B_D : sve_mem_64b_sst_sv_32_unscaled<0b000, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
defm SST1H_D : sve_mem_64b_sst_sv_32_unscaled<0b010, "st1h", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
defm SST1W_D : sve_mem_64b_sst_sv_32_unscaled<0b100, "st1w", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8,nxv2i32>;
defm SST1D : sve_mem_64b_sst_sv_32_unscaled<0b110, "st1d", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
// Scatters using scaled 32-bit offsets, e.g.
// Scatters using packed, unscaled 32-bit offsets, e.g.
// st1h z0.s, p0, [x0, z0.s, uxtw]
defm SST1B_S : sve_mem_32b_sst_sv_32_unscaled<0b001, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
defm SST1H_S : sve_mem_32b_sst_sv_32_unscaled<0b011, "st1h", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
defm SST1W : sve_mem_32b_sst_sv_32_unscaled<0b101, "st1w", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
// Scatters using packed, scaled 32-bit offsets, e.g.
// st1h z0.s, p0, [x0, z0.s, uxtw #1]
// and unpacked:
defm SST1H_S : sve_mem_32b_sst_sv_32_scaled<0b011, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
defm SST1W : sve_mem_32b_sst_sv_32_scaled<0b101, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
// Scatters using unpacked, scaled 32-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, uxtw #1]
defm SST1H_D : sve_mem_sst_sv_32_scaled<0b010, "st1h", Z_d, ZPR64, ZPR64ExtSXTW16, ZPR64ExtUXTW16>;
defm SST1H_S : sve_mem_sst_sv_32_scaled<0b011, "st1h", Z_s, ZPR32, ZPR32ExtSXTW16, ZPR32ExtUXTW16>;
defm SST1W_D : sve_mem_sst_sv_32_scaled<0b100, "st1w", Z_d, ZPR64, ZPR64ExtSXTW32, ZPR64ExtUXTW32>;
defm SST1W : sve_mem_sst_sv_32_scaled<0b101, "st1w", Z_s, ZPR32, ZPR32ExtSXTW32, ZPR32ExtUXTW32>;
defm SST1D : sve_mem_sst_sv_32_scaled<0b110, "st1d", Z_d, ZPR64, ZPR64ExtSXTW64, ZPR64ExtUXTW64>;
defm SST1H_D : sve_mem_64b_sst_sv_32_scaled<0b010, "st1h", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
defm SST1W_D : sve_mem_64b_sst_sv_32_scaled<0b100, "st1w", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
defm SST1D : sve_mem_64b_sst_sv_32_scaled<0b110, "st1d", AArch64st1_scatter_sxtw_scaled, AArch64st1_scatter_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
// Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.s, p0, [z0.s, #16]
defm SST1B_S : sve_mem_32b_sst_vi_ptrs<0b001, "st1b", timm0_31, AArch64st1_scatter_imm, nxv4i8>;
defm SST1H_S : sve_mem_32b_sst_vi_ptrs<0b011, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv4i16>;
defm SST1W : sve_mem_32b_sst_vi_ptrs<0b101, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv4i32>;
// Scatters using 32/64-bit pointers with offset, e.g.
// st1h z0.d, p0, [z0.d, #16]
defm SST1B_D : sve_mem_sst_vi_ptrs<0b000, "st1b", Z_d, ZPR64, imm0_31>;
defm SST1B_S : sve_mem_sst_vi_ptrs<0b001, "st1b", Z_s, ZPR32, imm0_31>;
defm SST1H_D : sve_mem_sst_vi_ptrs<0b010, "st1h", Z_d, ZPR64, uimm5s2>;
defm SST1H_S : sve_mem_sst_vi_ptrs<0b011, "st1h", Z_s, ZPR32, uimm5s2>;
defm SST1W_D : sve_mem_sst_vi_ptrs<0b100, "st1w", Z_d, ZPR64, uimm5s4>;
defm SST1W : sve_mem_sst_vi_ptrs<0b101, "st1w", Z_s, ZPR32, uimm5s4>;
defm SST1D : sve_mem_sst_vi_ptrs<0b110, "st1d", Z_d, ZPR64, uimm5s8>;
defm SST1B_D : sve_mem_64b_sst_vi_ptrs<0b000, "st1b", timm0_31, AArch64st1_scatter_imm, nxv2i8>;
defm SST1H_D : sve_mem_64b_sst_vi_ptrs<0b010, "st1h", tuimm5s2, AArch64st1_scatter_imm, nxv2i16>;
defm SST1W_D : sve_mem_64b_sst_vi_ptrs<0b100, "st1w", tuimm5s4, AArch64st1_scatter_imm, nxv2i32>;
defm SST1D : sve_mem_64b_sst_vi_ptrs<0b110, "st1d", tuimm5s8, AArch64st1_scatter_imm, nxv2i64>;
// Scatters using unscaled 64-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d]
defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b">;
defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h">;
defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w">;
defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d">;
defm SST1B_D : sve_mem_sst_sv_64_unscaled<0b00, "st1b", AArch64st1_scatter, nxv2i8>;
defm SST1H_D : sve_mem_sst_sv_64_unscaled<0b01, "st1h", AArch64st1_scatter, nxv2i16>;
defm SST1W_D : sve_mem_sst_sv_64_unscaled<0b10, "st1w", AArch64st1_scatter, nxv2i32>;
defm SST1D : sve_mem_sst_sv_64_unscaled<0b11, "st1d", AArch64st1_scatter, nxv2i64>;
// Scatters using scaled 64-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, lsl #1]
defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", ZPR64ExtLSL16>;
defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", ZPR64ExtLSL32>;
defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", ZPR64ExtLSL64>;
defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>;
defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>;
defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>;
// ST(2|3|4) structured stores (register + immediate)
defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>;

View File

@ -4564,32 +4564,84 @@ class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
let mayStore = 1;
}
multiclass sve_mem_sst_sv_32_scaled<bits<3> opc, string asm,
RegisterOperand listty,
ZPRRegOp zprty,
multiclass sve_mem_32b_sst_sv_32_scaled<bits<3> opc, string asm,
SDPatternOperator sxtw_op,
SDPatternOperator uxtw_op,
RegisterOperand sxtw_opnd,
RegisterOperand uxtw_opnd > {
def _UXTW_SCALED : sve_mem_sst_sv<opc, 0, 1, asm, listty, uxtw_opnd>;
def _SXTW_SCALED : sve_mem_sst_sv<opc, 1, 1, asm, listty, sxtw_opnd>;
RegisterOperand uxtw_opnd,
ValueType vt > {
def _UXTW_SCALED : sve_mem_sst_sv<opc, 0, 1, asm, Z_s, uxtw_opnd>;
def _SXTW_SCALED : sve_mem_sst_sv<opc, 1, 1, asm, Z_s, sxtw_opnd>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _UXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
(!cast<Instruction>(NAME # _UXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW_SCALED) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
(!cast<Instruction>(NAME # _SXTW_SCALED) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
def : Pat<(uxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
(!cast<Instruction>(NAME # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
def : Pat<(sxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
(!cast<Instruction>(NAME # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
multiclass sve_mem_sst_sv_32_unscaled<bits<3> opc, string asm,
RegisterOperand listty,
ZPRRegOp zprty,
RegisterOperand sxtw_opnd,
RegisterOperand uxtw_opnd> {
def _UXTW : sve_mem_sst_sv<opc, 0, 0, asm, listty, uxtw_opnd>;
def _SXTW : sve_mem_sst_sv<opc, 1, 0, asm, listty, sxtw_opnd>;
multiclass sve_mem_64b_sst_sv_32_scaled<bits<3> opc, string asm,
SDPatternOperator sxtw_op,
SDPatternOperator uxtw_op,
RegisterOperand sxtw_opnd,
RegisterOperand uxtw_opnd,
ValueType vt > {
def _UXTW_SCALED : sve_mem_sst_sv<opc, 0, 1, asm, Z_d, uxtw_opnd>;
def _SXTW_SCALED : sve_mem_sst_sv<opc, 1, 1, asm, Z_d, sxtw_opnd>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _UXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
(!cast<Instruction>(NAME # _UXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
(!cast<Instruction>(NAME # _SXTW_SCALED) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
def : Pat<(uxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
(!cast<Instruction>(NAME # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
def : Pat<(sxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
(!cast<Instruction>(NAME # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
multiclass sve_mem_64b_sst_sv_32_unscaled<bits<3> opc, string asm,
SDPatternOperator sxtw_op,
SDPatternOperator uxtw_op,
RegisterOperand sxtw_opnd,
RegisterOperand uxtw_opnd,
ValueType vt> {
def _UXTW : sve_mem_sst_sv<opc, 0, 0, asm, Z_d, uxtw_opnd>;
def _SXTW : sve_mem_sst_sv<opc, 1, 0, asm, Z_d, sxtw_opnd>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _UXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
def : Pat<(uxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
(!cast<Instruction>(NAME # _UXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
def : Pat<(sxtw_op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
(!cast<Instruction>(NAME # _SXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
multiclass sve_mem_32b_sst_sv_32_unscaled<bits<3> opc, string asm,
SDPatternOperator sxtw_op,
SDPatternOperator uxtw_op,
RegisterOperand sxtw_opnd,
RegisterOperand uxtw_opnd,
ValueType vt> {
def _UXTW : sve_mem_sst_sv<opc, 0, 0, asm, Z_s, uxtw_opnd>;
def _SXTW : sve_mem_sst_sv<opc, 1, 0, asm, Z_s, sxtw_opnd>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _UXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME # _SXTW) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
def : Pat<(uxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
(!cast<Instruction>(NAME # _UXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
def : Pat<(sxtw_op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt),
(!cast<Instruction>(NAME # _SXTW) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
class sve_mem_sst_sv2<bits<2> msz, bit scaled, string asm,
@ -4616,19 +4668,28 @@ class sve_mem_sst_sv2<bits<2> msz, bit scaled, string asm,
}
multiclass sve_mem_sst_sv_64_scaled<bits<2> msz, string asm,
RegisterOperand zprext> {
def "" : sve_mem_sst_sv2<msz, 1, asm, zprext>;
SDPatternOperator op,
RegisterOperand zprext,
ValueType vt> {
def _SCALED_REAL : sve_mem_sst_sv2<msz, 1, asm, zprext>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
(!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt),
(!cast<Instruction>(NAME # _SCALED_REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
}
multiclass sve_mem_sst_sv_64_unscaled<bits<2> msz, string asm> {
def "" : sve_mem_sst_sv2<msz, 0, asm, ZPR64ExtLSL8>;
multiclass sve_mem_sst_sv_64_unscaled<bits<2> msz, string asm,
SDPatternOperator op,
ValueType vt> {
def _REAL : sve_mem_sst_sv2<msz, 0, asm, ZPR64ExtLSL8>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Rn, $Zm]",
(!cast<Instruction>(NAME) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt),
(!cast<Instruction>(NAME # _REAL) ZPR:$data, PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
}
class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty,
@ -4654,16 +4715,38 @@ class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty,
let mayStore = 1;
}
multiclass sve_mem_sst_vi_ptrs<bits<3> opc, string asm, RegisterOperand listty,
ZPRRegOp zprty, Operand imm_ty> {
def _IMM : sve_mem_sst_vi<opc, asm, zprty, listty, imm_ty>;
multiclass sve_mem_32b_sst_vi_ptrs<bits<3> opc, string asm,
Operand imm_ty,
SDPatternOperator op,
ValueType vt> {
def _IMM : sve_mem_sst_vi<opc, asm, ZPR32, Z_s, imm_ty>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 0>;
(!cast<Instruction>(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $imm5]",
(!cast<Instruction>(NAME # _IMM) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, imm_ty:$imm5), 0>;
(!cast<Instruction>(NAME # _IMM) ZPR32:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _IMM) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, 0), 1>;
(!cast<Instruction>(NAME # _IMM) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>;
def : Pat<(op (nxv4i32 ZPR:$data), (nxv4i1 PPR:$gp), (nxv4i32 ZPR:$ptrs), imm_ty:$index, vt),
(!cast<Instruction>(NAME # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
}
multiclass sve_mem_64b_sst_vi_ptrs<bits<3> opc, string asm,
Operand imm_ty,
SDPatternOperator op,
ValueType vt> {
def _IMM : sve_mem_sst_vi<opc, asm, ZPR64, Z_d, imm_ty>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $imm5]",
(!cast<Instruction>(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _IMM) Z_s:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt),
(!cast<Instruction>(NAME # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
}
class sve_mem_z_spill<string asm>

View File

@ -0,0 +1,193 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
;
; ST1H, ST1W, ST1D: base + 32-bit scaled offset, sign (sxtw) or zero
; (uxtw) extended to 64 bits.
; e.g. st1h { z0.d }, p0, [x0, z1.d, uxtw #1]
;
; ST1H
define void @sst1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %indices) {
; CHECK-LABEL: sst1h_s_uxtw:
; CHECK: st1h { z0.s }, p0, [x0, z1.s, uxtw #1]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i16(<vscale x 4 x i16> %data_trunc,
<vscale x 4 x i1> %pg,
i16* %base,
<vscale x 4 x i32> %indices)
ret void
}
define void @sst1h_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %indices) {
; CHECK-LABEL: sst1h_s_sxtw:
; CHECK: st1h { z0.s }, p0, [x0, z1.s, sxtw #1]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i16(<vscale x 4 x i16> %data_trunc,
<vscale x 4 x i1> %pg,
i16* %base,
<vscale x 4 x i32> %indices)
ret void
}
define void @sst1h_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %indices) {
; CHECK-LABEL: sst1h_d_uxtw:
; CHECK: st1h { z0.d }, p0, [x0, z1.d, uxtw #1]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i16(<vscale x 2 x i16> %data_trunc,
<vscale x 2 x i1> %pg,
i16* %base,
<vscale x 2 x i32> %indices)
ret void
}
define void @sst1h_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %indices) {
; CHECK-LABEL: sst1h_d_sxtw:
; CHECK: st1h { z0.d }, p0, [x0, z1.d, sxtw #1]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i16(<vscale x 2 x i16> %data_trunc,
<vscale x 2 x i1> %pg,
i16* %base,
<vscale x 2 x i32> %indices)
ret void
}
; ST1W
define void @sst1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %indices) {
; CHECK-LABEL: sst1w_s_uxtw:
; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32> %data,
<vscale x 4 x i1> %pg,
i32* %base,
<vscale x 4 x i32> %indices)
ret void
}
define void @sst1w_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %indices) {
; CHECK-LABEL: sst1w_s_sxtw:
; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32> %data,
<vscale x 4 x i1> %pg,
i32* %base,
<vscale x 4 x i32> %indices)
ret void
}
define void @sst1w_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %indices) {
; CHECK-LABEL: sst1w_d_uxtw:
; CHECK: st1w { z0.d }, p0, [x0, z1.d, uxtw #2]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i32(<vscale x 2 x i32> %data_trunc,
<vscale x 2 x i1> %pg,
i32* %base,
<vscale x 2 x i32> %indices)
ret void
}
define void @sst1w_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %indices) {
; CHECK-LABEL: sst1w_d_sxtw:
; CHECK: st1w { z0.d }, p0, [x0, z1.d, sxtw #2]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i32(<vscale x 2 x i32> %data_trunc,
<vscale x 2 x i1> %pg,
i32* %base,
<vscale x 2 x i32> %indices)
ret void
}
define void @sst1w_s_uxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %indices) {
; CHECK-LABEL: sst1w_s_uxtw_float:
; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4f32(<vscale x 4 x float> %data,
<vscale x 4 x i1> %pg,
float* %base,
<vscale x 4 x i32> %indices)
ret void
}
define void @sst1w_s_sxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %indices) {
; CHECK-LABEL: sst1w_s_sxtw_float:
; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4f32(<vscale x 4 x float> %data,
<vscale x 4 x i1> %pg,
float* %base,
<vscale x 4 x i32> %indices)
ret void
}
; ST1D
define void @sst1d_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %indices) {
; CHECK-LABEL: sst1d_d_uxtw:
; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw #3]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i64(<vscale x 2 x i64> %data,
<vscale x 2 x i1> %pg,
i64* %base,
<vscale x 2 x i32> %indices)
ret void
}
define void @sst1d_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %indices) {
; CHECK-LABEL: sst1d_d_sxtw:
; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw #3]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i64(<vscale x 2 x i64> %data,
<vscale x 2 x i1> %pg,
i64* %base,
<vscale x 2 x i32> %indices)
ret void
}
define void @sst1d_d_uxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %indices) {
; CHECK-LABEL: sst1d_d_uxtw_double:
; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw #3]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2f64(<vscale x 2 x double> %data,
<vscale x 2 x i1> %pg,
double* %base,
<vscale x 2 x i32> %indices)
ret void
}
define void @sst1d_d_sxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %indices) {
; CHECK-LABEL: sst1d_d_sxtw_double:
; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw #3]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2f64(<vscale x 2 x double> %data,
<vscale x 2 x i1> %pg,
double* %base,
<vscale x 2 x i32> %indices)
ret void
}
; ST1H
declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
; ST1W
declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*, <vscale x 4 x i32>)
; ST1D
declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i32>)

View File

@ -0,0 +1,248 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
;
; ST1B, ST1W, ST1H, ST1D: base + 32-bit unscaled offset, sign (sxtw) or zero
; (uxtw) extended to 64 bits.
; e.g. st1h { z0.d }, p0, [x0, z1.d, uxtw]
;
; ST1B
define void @sst1b_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %offsets) {
; CHECK-LABEL: sst1b_s_uxtw:
; CHECK: st1b { z0.s }, p0, [x0, z1.s, uxtw]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i8(<vscale x 4 x i8> %data_trunc,
<vscale x 4 x i1> %pg,
i8* %base,
<vscale x 4 x i32> %offsets)
ret void
}
define void @sst1b_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %offsets) {
; CHECK-LABEL: sst1b_s_sxtw:
; CHECK: st1b { z0.s }, p0, [x0, z1.s, sxtw]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i8(<vscale x 4 x i8> %data_trunc,
<vscale x 4 x i1> %pg,
i8* %base,
<vscale x 4 x i32> %offsets)
ret void
}
define void @sst1b_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %offsets) {
; CHECK-LABEL: sst1b_d_uxtw:
; CHECK: st1b { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i8(<vscale x 2 x i8> %data_trunc,
<vscale x 2 x i1> %pg,
i8* %base,
<vscale x 2 x i32> %offsets)
ret void
}
define void @sst1b_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %offsets) {
; CHECK-LABEL: sst1b_d_sxtw:
; CHECK: st1b { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i8(<vscale x 2 x i8> %data_trunc,
<vscale x 2 x i1> %pg,
i8* %base,
<vscale x 2 x i32> %offsets)
ret void
}
; ST1H
define void @sst1h_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %offsets) {
; CHECK-LABEL: sst1h_s_uxtw:
; CHECK: st1h { z0.s }, p0, [x0, z1.s, uxtw]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i16(<vscale x 4 x i16> %data_trunc,
<vscale x 4 x i1> %pg,
i16* %base,
<vscale x 4 x i32> %offsets)
ret void
}
define void @sst1h_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %offsets) {
; CHECK-LABEL: sst1h_s_sxtw:
; CHECK: st1h { z0.s }, p0, [x0, z1.s, sxtw]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i16(<vscale x 4 x i16> %data_trunc,
<vscale x 4 x i1> %pg,
i16* %base,
<vscale x 4 x i32> %offsets)
ret void
}
define void @sst1h_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %offsets) {
; CHECK-LABEL: sst1h_d_uxtw:
; CHECK: st1h { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i16(<vscale x 2 x i16> %data_trunc,
<vscale x 2 x i1> %pg,
i16* %base,
<vscale x 2 x i32> %offsets)
ret void
}
define void @sst1h_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %offsets) {
; CHECK-LABEL: sst1h_d_sxtw:
; CHECK: st1h { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i16(<vscale x 2 x i16> %data_trunc,
<vscale x 2 x i1> %pg,
i16* %base,
<vscale x 2 x i32> %offsets)
ret void
}
; ST1W
define void @sst1w_s_uxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %offsets) {
; CHECK-LABEL: sst1w_s_uxtw:
; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32> %data,
<vscale x 4 x i1> %pg,
i32* %base,
<vscale x 4 x i32> %offsets)
ret void
}
define void @sst1w_s_sxtw(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %offsets) {
; CHECK-LABEL: sst1w_s_sxtw:
; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32> %data,
<vscale x 4 x i1> %pg,
i32* %base,
<vscale x 4 x i32> %offsets)
ret void
}
define void @sst1w_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %offsets) {
; CHECK-LABEL: sst1w_d_uxtw:
; CHECK: st1w { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i32(<vscale x 2 x i32> %data_trunc,
<vscale x 2 x i1> %pg,
i32* %base,
<vscale x 2 x i32> %offsets)
ret void
}
define void @sst1w_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %offsets) {
; CHECK-LABEL: sst1w_d_sxtw:
; CHECK: st1w { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i32(<vscale x 2 x i32> %data_trunc,
<vscale x 2 x i1> %pg,
i32* %base,
<vscale x 2 x i32> %offsets)
ret void
}
define void @sst1w_s_uxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %offsets) {
; CHECK-LABEL: sst1w_s_uxtw_float:
; CHECK: st1w { z0.s }, p0, [x0, z1.s, uxtw]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4f32(<vscale x 4 x float> %data,
<vscale x 4 x i1> %pg,
float* %base,
<vscale x 4 x i32> %offsets)
ret void
}
define void @sst1w_s_sxtw_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %offsets) {
; CHECK-LABEL: sst1w_s_sxtw_float:
; CHECK: st1w { z0.s }, p0, [x0, z1.s, sxtw]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4f32(<vscale x 4 x float> %data,
<vscale x 4 x i1> %pg,
float* %base,
<vscale x 4 x i32> %offsets)
ret void
}
; ST1D
define void @sst1d_d_uxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %offsets) {
; CHECK-LABEL: sst1d_d_uxtw:
; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i64(<vscale x 2 x i64> %data,
<vscale x 2 x i1> %pg,
i64* %base,
<vscale x 2 x i32> %offsets)
ret void
}
define void @sst1d_d_sxtw(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %offsets) {
; CHECK-LABEL: sst1d_d_sxtw:
; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i64(<vscale x 2 x i64> %data,
<vscale x 2 x i1> %pg,
i64* %base,
<vscale x 2 x i32> %offsets)
ret void
}
define void @sst1d_d_uxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %offsets) {
; CHECK-LABEL: sst1d_d_uxtw_double:
; CHECK: st1d { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2f64(<vscale x 2 x double> %data,
<vscale x 2 x i1> %pg,
double* %base,
<vscale x 2 x i32> %offsets)
ret void
}
define void @sst1d_d_sxtw_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %offsets) {
; CHECK-LABEL: sst1d_d_sxtw_double:
; CHECK: st1d { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2f64(<vscale x 2 x double> %data,
<vscale x 2 x i1> %pg,
double* %base,
<vscale x 2 x i32> %offsets)
ret void
}
; ST1B
declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i8*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i8*, <vscale x 2 x i32>)
; ST1H
declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
; ST1W
declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*, <vscale x 4 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*, <vscale x 4 x i32>)
; ST1D
declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.sxtw.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i32>)
declare void @llvm.aarch64.sve.st1.scatter.uxtw.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i32>)

View File

@ -0,0 +1,58 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
;
; ST1H, ST1W, ST1D: base + 64-bit scaled offset
; e.g. st1h { z0.d }, p0, [x0, z0.d, lsl #1]
;
define void @sst1h_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %offsets) {
; CHECK-LABEL: sst1h_index
; CHECK: st1h { z0.d }, p0, [x0, z1.d, lsl #1]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
call void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16> %data_trunc,
<vscale x 2 x i1> %pg,
i16* %base,
<vscale x 2 x i64> %offsets)
ret void
}
define void @sst1w_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
; CHECK-LABEL: sst1w_index
; CHECK: st1w { z0.d }, p0, [x0, z1.d, lsl #2]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32> %data_trunc,
<vscale x 2 x i1> %pg,
i32* %base,
<vscale x 2 x i64> %offsets)
ret void
}
define void @sst1d_index(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %offsets) {
; CHECK-LABEL: sst1d_index
; CHECK: st1d { z0.d }, p0, [x0, z1.d, lsl #3]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64> %data,
<vscale x 2 x i1> %pg,
i64* %base,
<vscale x 2 x i64> %offsets)
ret void
}
define void @sst1d_index_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %offsets) {
; CHECK-LABEL: sst1d_index_double
; CHECK: st1d { z0.d }, p0, [x0, z1.d, lsl #3]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %data,
<vscale x 2 x i1> %pg,
double* %base,
<vscale x 2 x i64> %offsets)
ret void
}
declare void @llvm.aarch64.sve.st1.scatter.index.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
declare void @llvm.aarch64.sve.st1.scatter.index.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
declare void @llvm.aarch64.sve.st1.scatter.index.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
declare void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i64>)

View File

@ -0,0 +1,70 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
;
; ST1B, ST1W, ST1H, ST1D: base + 64-bit unscaled offset
; e.g. st1h { z0.d }, p0, [x0, z1.d]
;
define void @sst1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sst1b_d:
; CHECK: st1b { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
call void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8> %data_trunc,
<vscale x 2 x i1> %pg,
i8* %base,
<vscale x 2 x i64> %b)
ret void
}
define void @sst1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sst1h_d:
; CHECK: st1h { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
call void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16> %data_trunc,
<vscale x 2 x i1> %pg,
i16* %base,
<vscale x 2 x i64> %b)
ret void
}
define void @sst1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sst1w_d:
; CHECK: st1w { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
call void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32> %data_trunc,
<vscale x 2 x i1> %pg,
i32* %base,
<vscale x 2 x i64> %b)
ret void
}
define void @sst1d_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sst1d_d:
; CHECK: st1d { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64> %data,
<vscale x 2 x i1> %pg,
i64* %base,
<vscale x 2 x i64> %b)
ret void
}
define void @sst1d_d_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sst1d_d_double:
; CHECK: st1d { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double> %data,
<vscale x 2 x i1> %pg,
double* %base,
<vscale x 2 x i64> %b)
ret void
}
declare void @llvm.aarch64.sve.st1.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i8*, <vscale x 2 x i64>)
declare void @llvm.aarch64.sve.st1.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
declare void @llvm.aarch64.sve.st1.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
declare void @llvm.aarch64.sve.st1.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
declare void @llvm.aarch64.sve.st1.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i64>)

View File

@ -0,0 +1,133 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
;
; ST1B, ST1W, ST1H, ST1D: vector + immediate (index)
; e.g. st1h { z0.s }, p0, [z1.s, #16]
;
; ST1B
define void @sst1b_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
; CHECK-LABEL: sst1b_s_imm:
; CHECK: st1b { z0.s }, p0, [z1.s, #16]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32(<vscale x 4 x i8> %data_trunc,
<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %base,
i64 16)
ret void
}
define void @sst1b_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
; CHECK-LABEL: sst1b_d_imm:
; CHECK: st1b { z0.d }, p0, [z1.d, #16]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64(<vscale x 2 x i8> %data_trunc,
<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %base,
i64 16)
ret void
}
; ST1H
define void @sst1h_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
; CHECK-LABEL: sst1h_s_imm:
; CHECK: st1h { z0.s }, p0, [z1.s, #16]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32(<vscale x 4 x i16> %data_trunc,
<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %base,
i64 16)
ret void
}
define void @sst1h_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
; CHECK-LABEL: sst1h_d_imm:
; CHECK: st1h { z0.d }, p0, [z1.d, #16]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64(<vscale x 2 x i16> %data_trunc,
<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %base,
i64 16)
ret void
}
; ST1W
define void @sst1w_s_imm(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
; CHECK-LABEL: sst1w_s_imm:
; CHECK: st1w { z0.s }, p0, [z1.s, #16]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32(<vscale x 4 x i32> %data,
<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %base,
i64 16)
ret void
}
define void @sst1w_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
; CHECK-LABEL: sst1w_d_imm:
; CHECK: st1w { z0.d }, p0, [z1.d, #16]
; CHECK-NEXT: ret
%data_trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64(<vscale x 2 x i32> %data_trunc,
<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %base,
i64 16)
ret void
}
define void @sst1w_s_imm_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
; CHECK-LABEL: sst1w_s_imm_float:
; CHECK: st1w { z0.s }, p0, [z1.s, #16]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32(<vscale x 4 x float> %data,
<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %base,
i64 16)
ret void
}
; ST1D
define void @sst1d_d_imm(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
; CHECK-LABEL: sst1d_d_imm:
; CHECK: st1d { z0.d }, p0, [z1.d, #16]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64(<vscale x 2 x i64> %data,
<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %base,
i64 16)
ret void
}
define void @sst1d_d_imm_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
; CHECK-LABEL: sst1d_d_imm_double:
; CHECK: st1d { z0.d }, p0, [z1.d, #16]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64(<vscale x 2 x double> %data,
<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %base,
i64 16)
ret void
}
; ST1B
declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i8.nxv4i32(<vscale x 4 x i8>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i8.nxv2i64(<vscale x 2 x i8>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
; ST1H
declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i16.nxv4i32(<vscale x 4 x i16>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i16.nxv2i64(<vscale x 2 x i16>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
; ST1W
declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i32.nxv2i64(<vscale x 2 x i32>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
declare void @llvm.aarch64.sve.st1.scatter.imm.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>, i64)
; ST1D
declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)
declare void @llvm.aarch64.sve.st1.scatter.imm.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>, i64)