forked from OSchip/llvm-project
[AArch64][SVE] Gather loads: pass 32 bit unpacked offsets as nxv2i32
Summary: Currently 32 bit unpacked offsets are passed as nxv2i64. However, as pointed out in https://reviews.llvm.org/D71074, using nxv2i32 instead would improve consistency with: * how other arguments are treated * how scatter stores are implemented This patch makes sure that 32 bit unpacked offsets are passes as nxv2i32 instead of nxv2i64. Reviewers: sdesmalen, efriedma Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71724
This commit is contained in:
parent
535b3c6b2f
commit
404da13e1e
|
@ -1114,7 +1114,8 @@ class AdvSIMD_GatherLoad_32bitOffset_Intrinsic
|
|||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
LLVMPointerToElt<0>, llvm_anyvector_ty
|
||||
LLVMPointerToElt<0>,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
|
||||
],
|
||||
[IntrReadMem, IntrArgMemOnly]>;
|
||||
|
||||
|
|
|
@ -12231,18 +12231,14 @@ static SDValue performST1ScatterCombine(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
|
||||
static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,
|
||||
unsigned Opcode) {
|
||||
unsigned Opcode,
|
||||
bool OnlyPackedOffsets = true) {
|
||||
EVT RetVT = N->getValueType(0);
|
||||
assert(RetVT.isScalableVector() &&
|
||||
"Gather loads are only possible for SVE vectors");
|
||||
|
||||
SDLoc DL(N);
|
||||
MVT RetElVT = RetVT.getVectorElementType().getSimpleVT();
|
||||
unsigned NumElements = AArch64::SVEBitsPerBlock / RetElVT.getSizeInBits();
|
||||
|
||||
EVT MaxVT = llvm::MVT::getScalableVectorVT(RetElVT, NumElements);
|
||||
if (RetVT.getSizeInBits().getKnownMinSize() >
|
||||
MaxVT.getSizeInBits().getKnownMinSize())
|
||||
if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
|
||||
return SDValue();
|
||||
|
||||
// Depending on the addressing mode, this is either a pointer or a vector of
|
||||
|
@ -12250,12 +12246,19 @@ static SDValue performLD1GatherCombine(SDNode *N, SelectionDAG &DAG,
|
|||
const SDValue Base = N->getOperand(3);
|
||||
// Depending on the addressing mode, this is either a single offset or a
|
||||
// vector of offsets (that fits into one register)
|
||||
const SDValue Offset = N->getOperand(4);
|
||||
SDValue Offset = N->getOperand(4);
|
||||
|
||||
if (!DAG.getTargetLoweringInfo().isTypeLegal(Base.getValueType()) ||
|
||||
!DAG.getTargetLoweringInfo().isTypeLegal(Offset.getValueType()))
|
||||
auto &TLI = DAG.getTargetLoweringInfo();
|
||||
if (!TLI.isTypeLegal(Base.getValueType()))
|
||||
return SDValue();
|
||||
|
||||
// Some gather load variants allow unpacked offsets, but only as nxv2i32
|
||||
// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
|
||||
// nxv2i64. Legalize accordingly.
|
||||
if (!OnlyPackedOffsets &&
|
||||
Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
|
||||
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
|
||||
|
||||
// Return value type that is representable in hardware
|
||||
EVT HwRetVt = getSVEContainerType(RetVT);
|
||||
|
||||
|
@ -12439,13 +12442,17 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case Intrinsic::aarch64_sve_ld1_gather_index:
|
||||
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SCALED);
|
||||
case Intrinsic::aarch64_sve_ld1_gather_sxtw:
|
||||
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW);
|
||||
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW,
|
||||
/*OnlyPackedOffsets=*/false);
|
||||
case Intrinsic::aarch64_sve_ld1_gather_uxtw:
|
||||
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW);
|
||||
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW,
|
||||
/*OnlyPackedOffsets=*/false);
|
||||
case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
|
||||
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED);
|
||||
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED,
|
||||
/*OnlyPackedOffsets=*/false);
|
||||
case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
|
||||
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED);
|
||||
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED,
|
||||
/*OnlyPackedOffsets=*/false);
|
||||
case Intrinsic::aarch64_sve_ld1_gather_imm:
|
||||
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1_IMM);
|
||||
case Intrinsic::aarch64_sve_st1_scatter:
|
||||
|
|
|
@ -11,7 +11,7 @@ define <vscale x 4 x i32> @gld1h_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base,
|
|||
; CHECK-LABEL: gld1h_s_uxtw_index:
|
||||
; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
|
@ -22,31 +22,31 @@ define <vscale x 4 x i32> @gld1h_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base,
|
|||
; CHECK-LABEL: gld1h_s_sxtw_index:
|
||||
; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1h_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1h_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1h_d_uxtw_index:
|
||||
; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1h_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1h_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1h_d_sxtw_index:
|
||||
; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
@ -56,7 +56,7 @@ define <vscale x 4 x i32> @gld1w_s_uxtw_index(<vscale x 4 x i1> %pg, i32* %base,
|
|||
; CHECK-LABEL: gld1w_s_uxtw_index:
|
||||
; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %load
|
||||
|
@ -66,30 +66,30 @@ define <vscale x 4 x i32> @gld1w_s_sxtw_index(<vscale x 4 x i1> %pg, i32* %base,
|
|||
; CHECK-LABEL: gld1w_s_sxtw_index:
|
||||
; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1w_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1w_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1w_d_uxtw_index:
|
||||
; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1w_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1w_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1w_d_sxtw_index:
|
||||
; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
@ -98,7 +98,7 @@ define <vscale x 4 x float> @gld1w_s_uxtw_index_float(<vscale x 4 x i1> %pg, flo
|
|||
; CHECK-LABEL: gld1w_s_uxtw_index_float:
|
||||
; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
float* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x float> %load
|
||||
|
@ -108,50 +108,50 @@ define <vscale x 4 x float> @gld1w_s_sxtw_index_float(<vscale x 4 x i1> %pg, flo
|
|||
; CHECK-LABEL: gld1w_s_sxtw_index_float:
|
||||
; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
float* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x float> %load
|
||||
}
|
||||
|
||||
; LD1D
|
||||
define <vscale x 2 x i64> @gld1d_s_uxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1d_s_uxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1d_s_uxtw_index:
|
||||
; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
i64* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1d_sxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1d_sxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1d_sxtw_index:
|
||||
; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
i64* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gld1d_uxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x double> @gld1d_uxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1d_uxtw_index_double:
|
||||
; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
double* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gld1d_sxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x double> @gld1d_sxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1d_sxtw_index_double:
|
||||
; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
double* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
|
@ -166,7 +166,7 @@ define <vscale x 4 x i32> @gld1sh_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base
|
|||
; CHECK-LABEL: gld1sh_s_uxtw_index:
|
||||
; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
|
@ -177,79 +177,79 @@ define <vscale x 4 x i32> @gld1sh_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base
|
|||
; CHECK-LABEL: gld1sh_s_sxtw_index:
|
||||
; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1sh_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1sh_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1sh_d_uxtw_index:
|
||||
; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1sh_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1sh_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1sh_d_sxtw_index:
|
||||
; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LD1SW
|
||||
define <vscale x 2 x i64> @gld1sw_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1sw_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1sw_d_uxtw_index:
|
||||
; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1sw_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1sw_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1sw_d_sxtw_index:
|
||||
; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
|
||||
; LD1H/LD1SH
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16.nxv4i32(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16.nxv4i32(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16.nxv2i64(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16.nxv2i64(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
|
||||
|
||||
; LD1W/LD1SW
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32.nxv2i64(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32.nxv2i64(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32.nxv4i32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32.nxv4i32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
|
||||
; LD1D
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
|
||||
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64.nxv2i64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64.nxv2i64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)
|
||||
|
|
|
@ -11,7 +11,7 @@ define <vscale x 4 x i32> @gld1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscal
|
|||
; CHECK-LABEL: gld1b_s_uxtw:
|
||||
; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
|
@ -22,31 +22,31 @@ define <vscale x 4 x i32> @gld1b_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vscal
|
|||
; CHECK-LABEL: gld1b_s_sxtw:
|
||||
; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1b_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1b_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1b_d_uxtw:
|
||||
; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8(<vscale x 2 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1b_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1b_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1b_d_sxtw:
|
||||
; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8(<vscale x 2 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
@ -56,7 +56,7 @@ define <vscale x 4 x i32> @gld1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vsca
|
|||
; CHECK-LABEL: gld1h_s_uxtw:
|
||||
; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
|
@ -67,31 +67,31 @@ define <vscale x 4 x i32> @gld1h_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vsca
|
|||
; CHECK-LABEL: gld1h_s_sxtw:
|
||||
; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1h_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1h_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1h_d_uxtw:
|
||||
; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1h_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1h_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1h_d_sxtw:
|
||||
; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
@ -101,7 +101,7 @@ define <vscale x 4 x i32> @gld1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vsca
|
|||
; CHECK-LABEL: gld1w_s_uxtw:
|
||||
; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %load
|
||||
|
@ -111,30 +111,30 @@ define <vscale x 4 x i32> @gld1w_s_sxtw(<vscale x 4 x i1> %pg, i32* %base, <vsca
|
|||
; CHECK-LABEL: gld1w_s_sxtw:
|
||||
; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1w_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1w_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1w_d_uxtw:
|
||||
; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1w_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1w_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1w_d_sxtw:
|
||||
; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
@ -143,7 +143,7 @@ define <vscale x 4 x float> @gld1w_s_uxtw_float(<vscale x 4 x i1> %pg, float* %b
|
|||
; CHECK-LABEL: gld1w_s_uxtw_float:
|
||||
; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
float* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x float> %load
|
||||
|
@ -153,50 +153,50 @@ define <vscale x 4 x float> @gld1w_s_sxtw_float(<vscale x 4 x i1> %pg, float* %b
|
|||
; CHECK-LABEL: gld1w_s_sxtw_float:
|
||||
; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
float* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x float> %load
|
||||
}
|
||||
|
||||
; LD1D
|
||||
define <vscale x 2 x i64> @gld1d_d_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1d_d_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1d_d_uxtw:
|
||||
; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
i64* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1d_d_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1d_d_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1d_d_sxtw:
|
||||
; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
i64* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gld1d_d_uxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x double> @gld1d_d_uxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1d_d_uxtw_double:
|
||||
; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
double* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gld1d_d_sxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x double> @gld1d_d_sxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1d_d_sxtw_double:
|
||||
; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
double* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
|
@ -211,7 +211,7 @@ define <vscale x 4 x i32> @gld1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vsca
|
|||
; CHECK-LABEL: gld1sb_s_uxtw:
|
||||
; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
|
@ -222,31 +222,31 @@ define <vscale x 4 x i32> @gld1sb_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vsca
|
|||
; CHECK-LABEL: gld1sb_s_sxtw:
|
||||
; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1sb_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1sb_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1sb_d_uxtw:
|
||||
; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8(<vscale x 2 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1sb_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1sb_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1sb_d_sxtw:
|
||||
; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8(<vscale x 2 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
@ -256,7 +256,7 @@ define <vscale x 4 x i32> @gld1sh_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vsc
|
|||
; CHECK-LABEL: gld1sh_s_uxtw:
|
||||
; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
|
@ -267,82 +267,82 @@ define <vscale x 4 x i32> @gld1sh_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vsc
|
|||
; CHECK-LABEL: gld1sh_s_sxtw:
|
||||
; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1sh_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1sh_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1sh_d_uxtw:
|
||||
; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1sh_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1sh_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1sh_d_sxtw:
|
||||
; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LD1SW
|
||||
define <vscale x 2 x i64> @gld1sw_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1sw_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1sw_d_uxtw:
|
||||
; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gld1sw_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
|
||||
define <vscale x 2 x i64> @gld1sw_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gld1sw_d_sxtw:
|
||||
; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LD1B/LD1SB
|
||||
declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8.nxv4i32(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8.nxv2i64(<vscale x 2 x i1>, i8*, <vscale x 2 x i64>)
|
||||
declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8.nxv4i32(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8.nxv2i64(<vscale x 2 x i1>, i8*, <vscale x 2 x i64>)
|
||||
declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8(<vscale x 2 x i1>, i8*, <vscale x 2 x i32>)
|
||||
declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8(<vscale x 2 x i1>, i8*, <vscale x 2 x i32>)
|
||||
|
||||
; LD1H/LD1SH
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16.nxv4i32(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16.nxv2i64(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16.nxv4i32(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16.nxv2i64(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
|
||||
|
||||
; LD1W/LD1SW
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32.nxv2i64(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32.nxv2i64(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32.nxv4i32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32.nxv4i32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
|
||||
; LD1D
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
|
||||
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64.nxv2i64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64.nxv2i64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)
|
||||
|
|
Loading…
Reference in New Issue