forked from OSchip/llvm-project
[AArch64][SVE] Add intrinsics for first-faulting gather loads
Summary: The following intrinsics are added: * @llvm.aarch64.sve.ldff1.gather * @llvm.aarch64.sve.ldff1.gather.index * @llvm.aarch64.sve.ldff1.gather_sxtw * @llvm.aarch64.sve.ldff1.gather.uxtw * @llvm.aarch64.sve.ldff1.gather_sxtw.index * @llvm.aarch64.sve.ldff1.gather.uxtw.index * @llvm.aarch64.sve.ldff1.gather.scalar.offset Although this patch is quite substantial, the vast majority of the implementation is just a 'copy & paste' of the implementation of regular gather loads, including tests. There's only a handful of new definitions: * AArch64ISD nodes defined in AArch64ISelLowering.h (e.g. GLDFF1) * Seleciton DAG Types in AArch64SVEInstrInfo.td (e.g. AArch64ldff1_gather) * intrinsics in IntrinsicsAArch64.td (e.g. aarch64_sve_ldff1_gather) * Pseudo instructions in SVEInstrFormats.td to workaround the issue of use-before-def for the FFR register. Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D75128
This commit is contained in:
parent
5122e82870
commit
fa9439fac8
|
@ -1734,6 +1734,32 @@ def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_In
|
|||
|
||||
def int_aarch64_sve_ld1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic;
|
||||
|
||||
|
||||
//
|
||||
// First-faulting gather loads: scalar base + vector offsets
|
||||
//
|
||||
|
||||
// 64 bit unscalled offsets
|
||||
def int_aarch64_sve_ldff1_gather : AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic;
|
||||
|
||||
// 64 bit scaled offsets
|
||||
def int_aarch64_sve_ldff1_gather_index : AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic;
|
||||
|
||||
// 32 bit unscaled offsets, sign (sxtw) or zero (uxtw) extended to 64 bits
|
||||
def int_aarch64_sve_ldff1_gather_sxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;
|
||||
def int_aarch64_sve_ldff1_gather_uxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;
|
||||
|
||||
// 32 bit scaled offsets, sign (sxtw) or zero (uxtw) extended to 64 bits
|
||||
def int_aarch64_sve_ldff1_gather_sxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;
|
||||
def int_aarch64_sve_ldff1_gather_uxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;
|
||||
|
||||
//
|
||||
// First-faulting gather loads: vector base + scalar offset
|
||||
//
|
||||
|
||||
def int_aarch64_sve_ldff1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic;
|
||||
|
||||
|
||||
//
|
||||
// Scatter stores: scalar base + vector offsets
|
||||
//
|
||||
|
|
|
@ -1422,6 +1422,22 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";
|
||||
case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";
|
||||
case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";
|
||||
case AArch64ISD::GLDFF1: return "AArch64ISD::GLDFF1";
|
||||
case AArch64ISD::GLDFF1_SCALED: return "AArch64ISD::GLDFF1_SCALED";
|
||||
case AArch64ISD::GLDFF1_SXTW: return "AArch64ISD::GLDFF1_SXTW";
|
||||
case AArch64ISD::GLDFF1_UXTW: return "AArch64ISD::GLDFF1_UXTW";
|
||||
case AArch64ISD::GLDFF1_SXTW_SCALED:return "AArch64ISD::GLDFF1_SXTW_SCALED";
|
||||
case AArch64ISD::GLDFF1_UXTW_SCALED:return "AArch64ISD::GLDFF1_UXTW_SCALED";
|
||||
case AArch64ISD::GLDFF1_IMM: return "AArch64ISD::GLDFF1_IMM";
|
||||
case AArch64ISD::GLDFF1S: return "AArch64ISD::GLDFF1S";
|
||||
case AArch64ISD::GLDFF1S_SCALED: return "AArch64ISD::GLDFF1S_SCALED";
|
||||
case AArch64ISD::GLDFF1S_SXTW: return "AArch64ISD::GLDFF1S_SXTW";
|
||||
case AArch64ISD::GLDFF1S_UXTW: return "AArch64ISD::GLDFF1S_UXTW";
|
||||
case AArch64ISD::GLDFF1S_SXTW_SCALED:
|
||||
return "AArch64ISD::GLDFF1S_SXTW_SCALED";
|
||||
case AArch64ISD::GLDFF1S_UXTW_SCALED:
|
||||
return "AArch64ISD::GLDFF1S_UXTW_SCALED";
|
||||
case AArch64ISD::GLDFF1S_IMM: return "AArch64ISD::GLDFF1S_IMM";
|
||||
case AArch64ISD::SST1: return "AArch64ISD::SST1";
|
||||
case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
|
||||
case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
|
||||
|
@ -10434,6 +10450,13 @@ static SDValue performSVEAndCombine(SDNode *N,
|
|||
case AArch64ISD::GLD1_UXTW:
|
||||
case AArch64ISD::GLD1_UXTW_SCALED:
|
||||
case AArch64ISD::GLD1_IMM:
|
||||
case AArch64ISD::GLDFF1:
|
||||
case AArch64ISD::GLDFF1_SCALED:
|
||||
case AArch64ISD::GLDFF1_SXTW:
|
||||
case AArch64ISD::GLDFF1_SXTW_SCALED:
|
||||
case AArch64ISD::GLDFF1_UXTW:
|
||||
case AArch64ISD::GLDFF1_UXTW_SCALED:
|
||||
case AArch64ISD::GLDFF1_IMM:
|
||||
MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
|
||||
break;
|
||||
default:
|
||||
|
@ -12707,13 +12730,13 @@ static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
|
|||
// vector of offsets (that fits into one register)
|
||||
SDValue Offset = N->getOperand(4);
|
||||
|
||||
// GLD1_IMM requires that the offset is an immediate that is:
|
||||
// GLD{FF}1_IMM requires that the offset is an immediate that is:
|
||||
// * a multiple of #SizeInBytes,
|
||||
// * in the range [0, 31 x #SizeInBytes],
|
||||
// where #SizeInBytes is the size in bytes of the loaded items. For
|
||||
// immediates outside that range and non-immediate scalar offsets use GLD1 or
|
||||
// GLD1_UXTW instead.
|
||||
if (Opcode == AArch64ISD::GLD1_IMM) {
|
||||
if (Opcode == AArch64ISD::GLD1_IMM || Opcode == AArch64ISD::GLDFF1_IMM) {
|
||||
uint64_t MaxIndex = 31;
|
||||
uint64_t RetElSize = RetElVT.getStoreSize().getKnownMinSize();
|
||||
|
||||
|
@ -12722,9 +12745,11 @@ static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
|
|||
OffsetConst->getZExtValue() > MaxIndex * RetElSize ||
|
||||
OffsetConst->getZExtValue() % RetElSize) {
|
||||
if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
|
||||
Opcode = AArch64ISD::GLD1_UXTW;
|
||||
Opcode = (Opcode == AArch64ISD::GLD1_IMM) ? AArch64ISD::GLD1_UXTW
|
||||
: AArch64ISD::GLDFF1_UXTW;
|
||||
else
|
||||
Opcode = AArch64ISD::GLD1;
|
||||
Opcode = (Opcode == AArch64ISD::GLD1_IMM) ? AArch64ISD::GLD1
|
||||
: AArch64ISD::GLDFF1;
|
||||
|
||||
std::swap(Base, Offset);
|
||||
}
|
||||
|
@ -12813,6 +12838,27 @@ performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
|
|||
case AArch64ISD::GLD1_IMM:
|
||||
NewOpc = AArch64ISD::GLD1S_IMM;
|
||||
break;
|
||||
case AArch64ISD::GLDFF1:
|
||||
NewOpc = AArch64ISD::GLDFF1S;
|
||||
break;
|
||||
case AArch64ISD::GLDFF1_SCALED:
|
||||
NewOpc = AArch64ISD::GLDFF1S_SCALED;
|
||||
break;
|
||||
case AArch64ISD::GLDFF1_SXTW:
|
||||
NewOpc = AArch64ISD::GLDFF1S_SXTW;
|
||||
break;
|
||||
case AArch64ISD::GLDFF1_SXTW_SCALED:
|
||||
NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED;
|
||||
break;
|
||||
case AArch64ISD::GLDFF1_UXTW:
|
||||
NewOpc = AArch64ISD::GLDFF1S_UXTW;
|
||||
break;
|
||||
case AArch64ISD::GLDFF1_UXTW_SCALED:
|
||||
NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED;
|
||||
break;
|
||||
case AArch64ISD::GLDFF1_IMM:
|
||||
NewOpc = AArch64ISD::GLDFF1S_IMM;
|
||||
break;
|
||||
default:
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -12950,6 +12996,24 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
/*OnlyPackedOffsets=*/false);
|
||||
case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
|
||||
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM);
|
||||
case Intrinsic::aarch64_sve_ldff1_gather:
|
||||
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1);
|
||||
case Intrinsic::aarch64_sve_ldff1_gather_index:
|
||||
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_SCALED);
|
||||
case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
|
||||
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_SXTW,
|
||||
/*OnlyPackedOffsets=*/false);
|
||||
case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
|
||||
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_UXTW,
|
||||
/*OnlyPackedOffsets=*/false);
|
||||
case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
|
||||
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_SXTW_SCALED,
|
||||
/*OnlyPackedOffsets=*/false);
|
||||
case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
|
||||
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_UXTW_SCALED,
|
||||
/*OnlyPackedOffsets=*/false);
|
||||
case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
|
||||
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_IMM);
|
||||
case Intrinsic::aarch64_sve_st1_scatter:
|
||||
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1);
|
||||
case Intrinsic::aarch64_sve_st1_scatter_index:
|
||||
|
|
|
@ -242,6 +242,25 @@ enum NodeType : unsigned {
|
|||
GLD1S_UXTW_SCALED,
|
||||
GLD1S_SXTW_SCALED,
|
||||
GLD1S_IMM,
|
||||
|
||||
// Unsigned gather loads.
|
||||
GLDFF1,
|
||||
GLDFF1_SCALED,
|
||||
GLDFF1_UXTW,
|
||||
GLDFF1_SXTW,
|
||||
GLDFF1_UXTW_SCALED,
|
||||
GLDFF1_SXTW_SCALED,
|
||||
GLDFF1_IMM,
|
||||
|
||||
// Signed gather loads.
|
||||
GLDFF1S,
|
||||
GLDFF1S_SCALED,
|
||||
GLDFF1S_UXTW,
|
||||
GLDFF1S_SXTW,
|
||||
GLDFF1S_UXTW_SCALED,
|
||||
GLDFF1S_SXTW_SCALED,
|
||||
GLDFF1S_IMM,
|
||||
|
||||
// Scatter store
|
||||
SST1,
|
||||
SST1_SCALED,
|
||||
|
|
|
@ -53,6 +53,22 @@ def AArch64ld1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED", SDT
|
|||
def AArch64ld1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ld1s_gather_imm : SDNode<"AArch64ISD::GLD1S_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
|
||||
def AArch64ldff1_gather : SDNode<"AArch64ISD::GLDFF1", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1_gather_scaled : SDNode<"AArch64ISD::GLDFF1_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1_gather_uxtw : SDNode<"AArch64ISD::GLDFF1_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1_gather_sxtw : SDNode<"AArch64ISD::GLDFF1_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1_gather_imm : SDNode<"AArch64ISD::GLDFF1_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
|
||||
def AArch64ldff1s_gather : SDNode<"AArch64ISD::GLDFF1S", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1s_gather_scaled : SDNode<"AArch64ISD::GLDFF1S_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1s_gather_uxtw : SDNode<"AArch64ISD::GLDFF1S_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1s_gather_sxtw : SDNode<"AArch64ISD::GLDFF1S_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
def AArch64ldff1s_gather_imm : SDNode<"AArch64ISD::GLDFF1S_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
|
||||
|
||||
// Scatter stores - node definitions
|
||||
//
|
||||
def SDT_AArch64_SCATTER_SV : SDTypeProfile<0, 5, [
|
||||
|
@ -581,114 +597,114 @@ let Predicates = [HasSVE] in {
|
|||
// Gathers using unscaled 32-bit offsets, e.g.
|
||||
// ld1h z0.s, p0/z, [x0, z0.s, uxtw]
|
||||
defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
|
||||
defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", null_frag, null_frag, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
|
||||
defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
|
||||
defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
|
||||
defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", null_frag, null_frag, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
|
||||
defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
|
||||
defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
|
||||
defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
|
||||
defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
|
||||
defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
|
||||
defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", null_frag, null_frag, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
|
||||
defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
|
||||
defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
|
||||
defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
|
||||
defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
|
||||
defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>;
|
||||
defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
|
||||
defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>;
|
||||
|
||||
// Gathers using scaled 32-bit offsets, e.g.
|
||||
// ld1h z0.s, p0/z, [x0, z0.s, uxtw #1]
|
||||
defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
|
||||
defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
|
||||
defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
|
||||
defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", null_frag, null_frag, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
|
||||
defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
|
||||
defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", null_frag, null_frag, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
|
||||
defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
|
||||
defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled, AArch64ldff1s_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
|
||||
defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
|
||||
defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>;
|
||||
defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
|
||||
defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>;
|
||||
|
||||
// Gathers using 32-bit pointers with scaled offset, e.g.
|
||||
// ld1h z0.s, p0/z, [z0.s, #16]
|
||||
defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv4i8>;
|
||||
defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, null_frag, nxv4i8>;
|
||||
defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv4i8>;
|
||||
defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, null_frag, nxv4i8>;
|
||||
defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv4i16>;
|
||||
defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, null_frag, nxv4i16>;
|
||||
defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv4i16>;
|
||||
defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, null_frag, nxv4i16>;
|
||||
defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv4i32>;
|
||||
defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, null_frag, nxv4i32>;
|
||||
defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv4i8>;
|
||||
defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm, nxv4i8>;
|
||||
defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv4i8>;
|
||||
defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm, nxv4i8>;
|
||||
defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv4i16>;
|
||||
defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm, nxv4i16>;
|
||||
defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv4i16>;
|
||||
defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm, nxv4i16>;
|
||||
defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv4i32>;
|
||||
defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm, nxv4i32>;
|
||||
|
||||
// Gathers using 64-bit pointers with scaled offset, e.g.
|
||||
// ld1h z0.d, p0/z, [z0.d, #16]
|
||||
defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv2i8>;
|
||||
defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, null_frag, nxv2i8>;
|
||||
defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv2i8>;
|
||||
defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, null_frag, nxv2i8>;
|
||||
defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv2i16>;
|
||||
defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, null_frag, nxv2i16>;
|
||||
defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv2i16>;
|
||||
defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, null_frag, nxv2i16>;
|
||||
defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, AArch64ld1s_gather_imm, nxv2i32>;
|
||||
defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, null_frag, nxv2i32>;
|
||||
defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv2i32>;
|
||||
defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, null_frag, nxv2i32>;
|
||||
defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm, nxv2i64>;
|
||||
defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, null_frag, nxv2i64>;
|
||||
defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv2i8>;
|
||||
defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm, nxv2i8>;
|
||||
defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv2i8>;
|
||||
defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm, nxv2i8>;
|
||||
defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv2i16>;
|
||||
defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm, nxv2i16>;
|
||||
defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv2i16>;
|
||||
defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm, nxv2i16>;
|
||||
defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, AArch64ld1s_gather_imm, nxv2i32>;
|
||||
defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, AArch64ldff1s_gather_imm, nxv2i32>;
|
||||
defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv2i32>;
|
||||
defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm, nxv2i32>;
|
||||
defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm, nxv2i64>;
|
||||
defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, AArch64ldff1_gather_imm, nxv2i64>;
|
||||
|
||||
// Gathers using unscaled 64-bit offsets, e.g.
|
||||
// ld1h z0.d, p0/z, [x0, z0.d]
|
||||
defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", AArch64ld1s_gather, nxv2i8>;
|
||||
defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", null_frag, nxv2i8>;
|
||||
defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather, nxv2i8>;
|
||||
defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", null_frag, nxv2i8>;
|
||||
defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", AArch64ld1s_gather, nxv2i16>;
|
||||
defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", null_frag, nxv2i16>;
|
||||
defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather, nxv2i16>;
|
||||
defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", null_frag, nxv2i16>;
|
||||
defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", AArch64ld1s_gather, nxv2i32>;
|
||||
defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", null_frag, nxv2i32>;
|
||||
defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather, nxv2i32>;
|
||||
defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", null_frag, nxv2i32>;
|
||||
defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather, nxv2i64>;
|
||||
defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", null_frag, nxv2i64>;
|
||||
defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", AArch64ld1s_gather, nxv2i8>;
|
||||
defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather, nxv2i8>;
|
||||
defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather, nxv2i8>;
|
||||
defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", AArch64ldff1_gather, nxv2i8>;
|
||||
defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", AArch64ld1s_gather, nxv2i16>;
|
||||
defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather, nxv2i16>;
|
||||
defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather, nxv2i16>;
|
||||
defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", AArch64ldff1_gather, nxv2i16>;
|
||||
defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", AArch64ld1s_gather, nxv2i32>;
|
||||
defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather, nxv2i32>;
|
||||
defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather, nxv2i32>;
|
||||
defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", AArch64ldff1_gather, nxv2i32>;
|
||||
defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather, nxv2i64>;
|
||||
defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", AArch64ldff1_gather, nxv2i64>;
|
||||
|
||||
// Gathers using scaled 64-bit offsets, e.g.
|
||||
// ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
|
||||
defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", AArch64ld1s_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
|
||||
defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", null_frag, ZPR64ExtLSL16, nxv2i16>;
|
||||
defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
|
||||
defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", null_frag, ZPR64ExtLSL16, nxv2i16>;
|
||||
defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", AArch64ld1s_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
|
||||
defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", null_frag, ZPR64ExtLSL32, nxv2i32>;
|
||||
defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
|
||||
defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", null_frag, ZPR64ExtLSL32, nxv2i32>;
|
||||
defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled, ZPR64ExtLSL64, nxv2i64>;
|
||||
defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", null_frag, ZPR64ExtLSL64, nxv2i64>;
|
||||
defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", AArch64ld1s_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
|
||||
defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
|
||||
defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
|
||||
defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", AArch64ldff1_gather_scaled, ZPR64ExtLSL16, nxv2i16>;
|
||||
defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", AArch64ld1s_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
|
||||
defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
|
||||
defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
|
||||
defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", AArch64ldff1_gather_scaled, ZPR64ExtLSL32, nxv2i32>;
|
||||
defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled, ZPR64ExtLSL64, nxv2i64>;
|
||||
defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", AArch64ldff1_gather_scaled, ZPR64ExtLSL64, nxv2i64>;
|
||||
|
||||
// Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g.
|
||||
// ld1h z0.d, p0/z, [x0, z0.d, uxtw]
|
||||
defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
|
||||
defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", null_frag, null_frag, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
|
||||
defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
|
||||
defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
|
||||
defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", null_frag, null_frag, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
|
||||
defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
|
||||
defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
|
||||
defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
|
||||
defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
|
||||
defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
|
||||
defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
|
||||
defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>;
|
||||
defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
|
||||
defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
|
||||
defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
|
||||
defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
|
||||
defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
|
||||
defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>;
|
||||
defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
|
||||
defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", null_frag, null_frag, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
|
||||
defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>;
|
||||
|
||||
// Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g.
|
||||
// ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
|
||||
defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
|
||||
defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", null_frag, null_frag, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
|
||||
defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
|
||||
defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", null_frag, null_frag, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
|
||||
defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
|
||||
defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", null_frag, null_frag, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
|
||||
defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
|
||||
defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", null_frag, null_frag, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
|
||||
defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
|
||||
defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", null_frag, null_frag, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
|
||||
defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
|
||||
defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled, AArch64ldff1s_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
|
||||
defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
|
||||
defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>;
|
||||
defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
|
||||
defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_scaled, AArch64ldff1s_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
|
||||
defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
|
||||
defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
|
||||
defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
|
||||
defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
|
||||
|
||||
// Non-temporal contiguous loads (register + immediate)
|
||||
defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>;
|
||||
|
|
|
@ -6252,10 +6252,19 @@ multiclass sve_mem_32b_gld_sv_32_scaled<bits<4> opc, string asm,
|
|||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
|
||||
(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
|
||||
|
||||
// We need a layer of indirection because early machine code passes balk at
|
||||
// physical register (i.e. FFR) uses that have no previous definition.
|
||||
let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
|
||||
def _UXTW_SCALED : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_SCALED_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
|
||||
def _SXTW_SCALED : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
|
||||
}
|
||||
|
||||
def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)),
|
||||
(!cast<Instruction>(NAME # _UXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
|
||||
(!cast<Instruction>(NAME # _UXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
|
||||
def : Pat<(nxv4i32 (sxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$indices), vt)),
|
||||
(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
|
||||
(!cast<Instruction>(NAME # _SXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
|
||||
}
|
||||
|
||||
multiclass sve_mem_32b_gld_vs_32_unscaled<bits<4> opc, string asm,
|
||||
|
@ -6272,10 +6281,19 @@ multiclass sve_mem_32b_gld_vs_32_unscaled<bits<4> opc, string asm,
|
|||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
|
||||
(!cast<Instruction>(NAME # _SXTW_REAL) ZPR32:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
|
||||
|
||||
// We need a layer of indirection because early machine code passes balk at
|
||||
// physical register (i.e. FFR) uses that have no previous definition.
|
||||
let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
|
||||
def _UXTW : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
|
||||
def _SXTW : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_REAL) Z_s:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
|
||||
}
|
||||
|
||||
def : Pat<(nxv4i32 (uxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)),
|
||||
(!cast<Instruction>(NAME # _UXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
|
||||
(!cast<Instruction>(NAME # _UXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
|
||||
def : Pat<(nxv4i32 (sxtw_op (nxv4i1 PPR:$gp), GPR64sp:$base, (nxv4i32 ZPR:$offsets), vt)),
|
||||
(!cast<Instruction>(NAME # _SXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
|
||||
(!cast<Instruction>(NAME # _SXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
|
||||
}
|
||||
|
||||
|
||||
|
@ -6314,8 +6332,15 @@ multiclass sve_mem_32b_gld_vi_32_ptrs<bits<4> opc, string asm, Operand imm_ty,
|
|||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
|
||||
(!cast<Instruction>(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, 0), 1>;
|
||||
|
||||
// We need a layer of indirection because early machine code passes balk at
|
||||
// physical register (i.e. FFR) uses that have no previous definition.
|
||||
let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
|
||||
def _IMM : Pseudo<(outs Z_s:$Zt), (ins PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _IMM_REAL) Z_s:$Zt, PPR3bAny:$Pg, ZPR32:$Zn, imm_ty:$imm5)>;
|
||||
}
|
||||
|
||||
def : Pat<(nxv4i32 (op (nxv4i1 PPR:$gp), (nxv4i32 ZPR:$ptrs), imm_ty:$index, vt)),
|
||||
(!cast<Instruction>(NAME # _IMM_REAL) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
|
||||
(!cast<Instruction>(NAME # _IMM) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
|
||||
}
|
||||
|
||||
class sve_mem_prfm_si<bits<2> msz, string asm>
|
||||
|
@ -6564,10 +6589,19 @@ multiclass sve_mem_64b_gld_sv_32_scaled<bits<4> opc, string asm,
|
|||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
|
||||
(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
|
||||
|
||||
// We need a layer of indirection because early machine code passes balk at
|
||||
// physical register (i.e. FFR) uses that have no previous definition.
|
||||
let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
|
||||
def _UXTW_SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
|
||||
def _SXTW_SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
|
||||
}
|
||||
|
||||
def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
|
||||
(!cast<Instruction>(NAME # _UXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
|
||||
(!cast<Instruction>(NAME # _UXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
|
||||
def : Pat<(nxv2i64 (sxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
|
||||
(!cast<Instruction>(NAME # _SXTW_SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
|
||||
(!cast<Instruction>(NAME # _SXTW_SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
|
||||
}
|
||||
|
||||
multiclass sve_mem_64b_gld_vs_32_unscaled<bits<4> opc, string asm,
|
||||
|
@ -6584,10 +6618,19 @@ multiclass sve_mem_64b_gld_vs_32_unscaled<bits<4> opc, string asm,
|
|||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
|
||||
(!cast<Instruction>(NAME # _SXTW_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), 0>;
|
||||
|
||||
// We need a layer of indirection because early machine code passes balk at
|
||||
// physical register (i.e. FFR) uses that have no previous definition.
|
||||
let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
|
||||
def _UXTW : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _UXTW_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, uxtw_opnd:$Zm)>;
|
||||
def _SXTW : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _SXTW_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, sxtw_opnd:$Zm)>;
|
||||
}
|
||||
|
||||
def : Pat<(nxv2i64 (uxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
|
||||
(!cast<Instruction>(NAME # _UXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
|
||||
(!cast<Instruction>(NAME # _UXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
|
||||
def : Pat<(nxv2i64 (sxtw_op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
|
||||
(!cast<Instruction>(NAME # _SXTW_REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
|
||||
(!cast<Instruction>(NAME # _SXTW) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
|
||||
}
|
||||
|
||||
multiclass sve_mem_64b_gld_sv2_64_scaled<bits<4> opc, string asm,
|
||||
|
@ -6598,8 +6641,15 @@ multiclass sve_mem_64b_gld_sv2_64_scaled<bits<4> opc, string asm,
|
|||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
|
||||
(!cast<Instruction>(NAME # _SCALED_REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), 0>;
|
||||
|
||||
// We need a layer of indirection because early machine code passes balk at
|
||||
// physical register (i.e. FFR) uses that have no previous definition.
|
||||
let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
|
||||
def _SCALED : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _SCALED_REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm)>;
|
||||
}
|
||||
|
||||
def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$indices), vt)),
|
||||
(!cast<Instruction>(NAME # _SCALED_REAL) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
|
||||
(!cast<Instruction>(NAME # _SCALED) PPR:$gp, GPR64sp:$base, ZPR:$indices)>;
|
||||
}
|
||||
|
||||
multiclass sve_mem_64b_gld_vs2_64_unscaled<bits<4> opc, string asm,
|
||||
|
@ -6609,8 +6659,15 @@ multiclass sve_mem_64b_gld_vs2_64_unscaled<bits<4> opc, string asm,
|
|||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Rn, $Zm]",
|
||||
(!cast<Instruction>(NAME # _REAL) ZPR64:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), 0>;
|
||||
|
||||
// We need a layer of indirection because early machine code passes balk at
|
||||
// physical register (i.e. FFR) uses that have no previous definition.
|
||||
let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
|
||||
def "" : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _REAL) Z_d:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, ZPR64ExtLSL8:$Zm)>;
|
||||
}
|
||||
|
||||
def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), GPR64sp:$base, (nxv2i64 ZPR:$offsets), vt)),
|
||||
(!cast<Instruction>(NAME # _REAL) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
|
||||
(!cast<Instruction>(NAME) PPR:$gp, GPR64sp:$base, ZPR:$offsets)>;
|
||||
}
|
||||
|
||||
class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
|
||||
|
@ -6648,8 +6705,15 @@ multiclass sve_mem_64b_gld_vi_64_ptrs<bits<4> opc, string asm, Operand imm_ty,
|
|||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
|
||||
(!cast<Instruction>(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
|
||||
|
||||
// We need a layer of indirection because early machine code passes balk at
|
||||
// physical register (i.e. FFR) uses that have no previous definition.
|
||||
let hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
|
||||
def _IMM : Pseudo<(outs Z_d:$Zt), (ins PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), []>,
|
||||
PseudoInstExpansion<(!cast<Instruction>(NAME # _IMM_REAL) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5)>;
|
||||
}
|
||||
|
||||
def : Pat<(nxv2i64 (op (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt)),
|
||||
(!cast<Instruction>(NAME # _IMM_REAL) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
|
||||
(!cast<Instruction>(NAME # _IMM) PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
|
||||
}
|
||||
|
||||
// bit lsl is '0' if the offsets are extended (uxtw/sxtw), '1' if shifted (lsl)
|
||||
|
|
|
@ -0,0 +1,255 @@
|
|||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; LDFF1H, LDFF1W, LDFF1D: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw)
|
||||
; extended to 64 bits
|
||||
; e.g. ldff1h z0.d, p0/z, [x0, z0.d, uxtw #1]
|
||||
;
|
||||
|
||||
; LDFF1H
|
||||
define <vscale x 4 x i32> @gldff1h_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1h_s_uxtw_index:
|
||||
; CHECK: ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @gldff1h_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1h_s_sxtw_index:
|
||||
; CHECK: ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1h_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1h_d_uxtw_index:
|
||||
; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1h_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1h_d_sxtw_index:
|
||||
; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1W
|
||||
define <vscale x 4 x i32> @gldff1w_s_uxtw_index(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_s_uxtw_index:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @gldff1w_s_sxtw_index(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_s_sxtw_index:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1w_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_d_uxtw_index:
|
||||
; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1w_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_d_sxtw_index:
|
||||
; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @gldff1w_s_uxtw_index_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_s_uxtw_index_float:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
float* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x float> %load
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @gldff1w_s_sxtw_index_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_s_sxtw_index_float:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
float* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x float> %load
|
||||
}
|
||||
|
||||
; LDFF1D
|
||||
define <vscale x 2 x i64> @gldff1d_s_uxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1d_s_uxtw_index:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
i64* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1d_sxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1d_sxtw_index:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
i64* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gldff1d_uxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1d_uxtw_index_double:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
double* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gldff1d_sxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1d_sxtw_index_double:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
double* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
;
|
||||
; LDFF1SH, LDFF1SW, LDFF1SD: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw)
|
||||
; extended to 64 bits
|
||||
; e.g. ldff1sh z0.d, p0/z, [x0, z0.d, uxtw #1]
|
||||
;
|
||||
|
||||
; LDFF1SH
|
||||
define <vscale x 4 x i32> @gldff1sh_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sh_s_uxtw_index:
|
||||
; CHECK: ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @gldff1sh_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sh_s_sxtw_index:
|
||||
; CHECK: ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sh_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sh_d_uxtw_index:
|
||||
; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sh_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sh_d_sxtw_index:
|
||||
; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1SW
|
||||
define <vscale x 2 x i64> @gldff1sw_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sw_d_uxtw_index:
|
||||
; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sw_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sw_d_sxtw_index:
|
||||
; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
|
||||
; LDFF1H/LDFF1SH
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
|
||||
|
||||
; LDFF1W/LDFF1SW
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
|
||||
; LDFF1D
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
|
||||
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)
|
|
@ -0,0 +1,348 @@
|
|||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; LDFF1B, LDFF1W, LDFF1H, LDFF1D: base + 32-bit unscaled offset, sign (sxtw) or zero
|
||||
; (uxtw) extended to 64 bits.
|
||||
; e.g. ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
;
|
||||
|
||||
; LDFF1B
|
||||
define <vscale x 4 x i32> @gldff1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1b_s_uxtw:
|
||||
; CHECK: ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @gldff1b_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1b_s_sxtw:
|
||||
; CHECK: ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1b_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1b_d_uxtw:
|
||||
; CHECK: ldff1b { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i8(<vscale x 2 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1b_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1b_d_sxtw:
|
||||
; CHECK: ldff1b { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i8(<vscale x 2 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1H
|
||||
define <vscale x 4 x i32> @gldff1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1h_s_uxtw:
|
||||
; CHECK: ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @gldff1h_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1h_s_sxtw:
|
||||
; CHECK: ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1h_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1h_d_uxtw:
|
||||
; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1h_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1h_d_sxtw:
|
||||
; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1W
|
||||
define <vscale x 4 x i32> @gldff1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_s_uxtw:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @gldff1w_s_sxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_s_sxtw:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1w_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_d_uxtw:
|
||||
; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1w_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_d_sxtw:
|
||||
; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @gldff1w_s_uxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_s_uxtw_float:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
float* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x float> %load
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @gldff1w_s_sxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1w_s_sxtw_float:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4f32(<vscale x 4 x i1> %pg,
|
||||
float* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
ret <vscale x 4 x float> %load
|
||||
}
|
||||
|
||||
; LDFF1D
|
||||
define <vscale x 2 x i64> @gldff1d_d_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1d_d_uxtw:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
i64* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1d_d_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1d_d_sxtw:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
i64* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gldff1d_d_uxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1d_d_uxtw_double:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
double* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gldff1d_d_sxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1d_d_sxtw_double:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
double* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
;
|
||||
; LDFF1SB, LDFF1SW, LDFF1SH: base + 32-bit unscaled offset, sign (sxtw) or zero
|
||||
; (uxtw) extended to 64 bits.
|
||||
; e.g. ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
;
|
||||
|
||||
; LDFF1SB
|
||||
define <vscale x 4 x i32> @gldff1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sb_s_uxtw:
|
||||
; CHECK: ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @gldff1sb_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sb_s_sxtw:
|
||||
; CHECK: ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sb_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sb_d_uxtw:
|
||||
; CHECK: ldff1sb { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i8(<vscale x 2 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sb_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sb_d_sxtw:
|
||||
; CHECK: ldff1sb { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i8(<vscale x 2 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1SH
|
||||
define <vscale x 4 x i32> @gldff1sh_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sh_s_uxtw:
|
||||
; CHECK: ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @gldff1sh_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sh_s_sxtw:
|
||||
; CHECK: ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 4 x i32> %b)
|
||||
%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sh_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sh_d_uxtw:
|
||||
; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sh_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sh_d_sxtw:
|
||||
; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1SW
|
||||
define <vscale x 2 x i64> @gldff1sw_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sw_d_uxtw:
|
||||
; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sw_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
|
||||
; CHECK-LABEL: gldff1sw_d_sxtw:
|
||||
; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i32> %b)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1B/LDFF1SB
|
||||
declare <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i8(<vscale x 2 x i1>, i8*, <vscale x 2 x i32>)
|
||||
declare <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i8(<vscale x 2 x i1>, i8*, <vscale x 2 x i32>)
|
||||
|
||||
; LDFF1H/LDFF1SH
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
|
||||
|
||||
; LDFF1W/LDFF1SW
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
|
||||
|
||||
; LDFF1D
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
|
||||
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.sxtw.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.uxtw.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)
|
|
@ -0,0 +1,80 @@
|
|||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; LDFF1H, LDFF1W, LDFF1D: base + 64-bit scaled offset
|
||||
; e.g. ldff1h z0.d, p0/z, [x0, z0.d, lsl #1]
|
||||
;
|
||||
|
||||
define <vscale x 2 x i64> @gldff1h_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1h_index
|
||||
; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d, lsl #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1w_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1w_index
|
||||
; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d, lsl #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1d_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1d_index
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
i64* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gldff1d_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1d_index_double
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
double* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
;
|
||||
; LDFF1SH, LDFF1SW: base + 64-bit scaled offset
|
||||
; e.g. ldff1sh z0.d, p0/z, [x0, z0.d, lsl #1]
|
||||
;
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sh_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1sh_index
|
||||
; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d, lsl #1]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1sw_index
|
||||
; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d, lsl #2]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.index.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.index.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.index.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)
|
|
@ -0,0 +1,103 @@
|
|||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; LDFF1B, LDFF1W, LDFF1H, LDFF1D: base + 64-bit unscaled offset
|
||||
; e.g. ldff1h { z0.d }, p0/z, [x0, z0.d]
|
||||
;
|
||||
|
||||
define <vscale x 2 x i64> @gldff1b_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1b_d:
|
||||
; CHECK: ldff1b { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1h_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1h_d:
|
||||
; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1w_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
|
||||
; CHECK-LABEL: gldff1w_d:
|
||||
; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %offsets)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1d_d(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1d_d:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
i64* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gldff1d_d_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1d_d_double:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1> %pg,
|
||||
double* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
;
|
||||
; LDFF1SB, LDFF1SW, LDFF1SH: base + 64-bit unscaled offset
|
||||
; e.g. ldff1sh { z0.d }, p0/z, [x0, z0.d]
|
||||
;
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sb_d(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1sb_d:
|
||||
; CHECK: ldff1sb { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1> %pg,
|
||||
i8* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sh_d(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
|
||||
; CHECK-LABEL: gldff1sh_d:
|
||||
; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1> %pg,
|
||||
i16* %base,
|
||||
<vscale x 2 x i64> %b)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sw_d(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %offsets) {
|
||||
; CHECK-LABEL: gldff1sw_d:
|
||||
; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1> %pg,
|
||||
i32* %base,
|
||||
<vscale x 2 x i64> %offsets)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
declare <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.nxv2i8(<vscale x 2 x i1>, i8*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)
|
|
@ -0,0 +1,368 @@
|
|||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; LDFF1B, LDFF1W, LDFF1H, LDFF1D: vector base + immediate offset (index)
|
||||
; e.g. ldff1h { z0.s }, p0/z, [z0.s, #16]
|
||||
;
|
||||
|
||||
; LDFF1B
|
||||
define <vscale x 4 x i32> @gldff1b_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1b_s_imm_offset:
|
||||
; CHECK: ldff1b { z0.s }, p0/z, [z0.s, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 16)
|
||||
%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1b_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1b_d_imm_offset:
|
||||
; CHECK: ldff1b { z0.d }, p0/z, [z0.d, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 16)
|
||||
%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1H
|
||||
define <vscale x 4 x i32> @gldff1h_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1h_s_imm_offset:
|
||||
; CHECK: ldff1h { z0.s }, p0/z, [z0.s, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 16)
|
||||
%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1h_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1h_d_imm_offset:
|
||||
; CHECK: ldff1h { z0.d }, p0/z, [z0.d, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 16)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1W
|
||||
define <vscale x 4 x i32> @gldff1w_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1w_s_imm_offset:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [z0.s, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 16)
|
||||
ret <vscale x 4 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1w_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1w_d_imm_offset:
|
||||
; CHECK: ldff1w { z0.d }, p0/z, [z0.d, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 16)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @gldff1w_s_imm_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1w_s_imm_offset_float:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [z0.s, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 16)
|
||||
ret <vscale x 4 x float> %load
|
||||
}
|
||||
|
||||
; LDFF1D
|
||||
define <vscale x 2 x i64> @gldff1d_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1d_d_imm_offset:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [z0.d, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 16)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gldff1d_d_imm_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1d_d_imm_offset_double:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [z0.d, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 16)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
;
|
||||
; LDFF1SB, LDFF1SW, LDFF1SH: vector base + immediate offset (index)
|
||||
; e.g. ldff1sh { z0.s }, p0/z, [z0.s, #16]
|
||||
;
|
||||
|
||||
; LDFF1SB
|
||||
define <vscale x 4 x i32> @gldff1sb_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1sb_s_imm_offset:
|
||||
; CHECK: ldff1sb { z0.s }, p0/z, [z0.s, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 16)
|
||||
%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sb_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1sb_d_imm_offset:
|
||||
; CHECK: ldff1sb { z0.d }, p0/z, [z0.d, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 16)
|
||||
%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1SH
|
||||
define <vscale x 4 x i32> @gldff1sh_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1sh_s_imm_offset:
|
||||
; CHECK: ldff1sh { z0.s }, p0/z, [z0.s, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 16)
|
||||
%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sh_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1sh_d_imm_offset:
|
||||
; CHECK: ldff1sh { z0.d }, p0/z, [z0.d, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 16)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1SW
|
||||
define <vscale x 2 x i64> @gldff1sw_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1sw_d_imm_offset:
|
||||
; CHECK: ldff1sw { z0.d }, p0/z, [z0.d, #16]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 16)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
;
|
||||
; LDFF1B, LDFF1W, LDFF1H, LDFF1D: vector base + out of range immediate offset
|
||||
; e.g. ldff1b { z0.d }, p0/z, [x0, z0.d]
|
||||
;
|
||||
|
||||
; LDFF1B
|
||||
define <vscale x 4 x i32> @gldff1b_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1b_s_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #32
|
||||
; CHECK-NEXT: ldff1b { z0.s }, p0/z, [x8, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 32)
|
||||
%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1b_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1b_d_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #32
|
||||
; CHECK-NEXT: ldff1b { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 32)
|
||||
%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1H
|
||||
define <vscale x 4 x i32> @gldff1h_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1h_s_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #63
|
||||
; CHECK-NEXT: ldff1h { z0.s }, p0/z, [x8, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 63)
|
||||
%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1h_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1h_d_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #63
|
||||
; CHECK-NEXT: ldff1h { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 63)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1W
|
||||
define <vscale x 4 x i32> @gldff1w_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1w_s_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #125
|
||||
; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x8, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 125)
|
||||
ret <vscale x 4 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1w_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1w_d_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #125
|
||||
; CHECK-NEXT: ldff1w { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 125)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @gldff1w_s_imm_offset_out_of_range_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1w_s_imm_offset_out_of_range_float:
|
||||
; CHECK: mov w8, #125
|
||||
; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x8, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 125)
|
||||
ret <vscale x 4 x float> %load
|
||||
}
|
||||
|
||||
; LDFF1D
|
||||
define <vscale x 2 x i64> @gldff1d_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1d_d_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #249
|
||||
; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 249)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gldff1d_d_imm_offset_out_of_range_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1d_d_imm_offset_out_of_range_double:
|
||||
; CHECK: mov w8, #249
|
||||
; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 249)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
;
|
||||
; LDFF1SB, LDFF1SW, LDFF1SH: vector base + out of range immediate offset
|
||||
; e.g. ldff1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
|
||||
;
|
||||
|
||||
; LDFF1SB
|
||||
define <vscale x 4 x i32> @gldff1sb_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1sb_s_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #32
|
||||
; CHECK-NEXT: ldff1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 32)
|
||||
%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sb_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1sb_d_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #32
|
||||
; CHECK-NEXT: ldff1sb { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 32)
|
||||
%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1SH
|
||||
define <vscale x 4 x i32> @gldff1sh_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
|
||||
; CHECK-LABEL: gldff1sh_s_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #63
|
||||
; CHECK-NEXT: ldff1sh { z0.s }, p0/z, [x8, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 63)
|
||||
%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sh_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1sh_d_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #63
|
||||
; CHECK-NEXT: ldff1sh { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 63)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1SW
|
||||
define <vscale x 2 x i64> @gldff1sw_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
|
||||
; CHECK-LABEL: gldff1sw_d_imm_offset_out_of_range:
|
||||
; CHECK: mov w8, #125
|
||||
; CHECK-NEXT: ldff1sw { z0.d }, p0/z, [x8, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 125)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1B/LDFF1SB
|
||||
declare <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
|
||||
declare <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
|
||||
|
||||
; LDFF1H/LDFF1SH
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
|
||||
|
||||
; LDFF1W/LDFF1SW
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
|
||||
|
||||
; LDFF1D
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
|
||||
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
|
|
@ -0,0 +1,186 @@
|
|||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; LDFF1B, LDFF1W, LDFF1H, LDFF1D: vector base + scalar offset (index)
|
||||
; e.g. ldff1b { z0.d }, p0/z, [x0, z0.d]
|
||||
;
|
||||
|
||||
; LDFF1B
|
||||
define <vscale x 4 x i32> @gldff1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1b_s_scalar_offset:
|
||||
; CHECK: ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 %offset)
|
||||
%res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1b_d_scalar_offset:
|
||||
; CHECK: ldff1b { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 %offset)
|
||||
%res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1H
|
||||
define <vscale x 4 x i32> @gldff1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1h_s_scalar_offset:
|
||||
; CHECK: ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 %offset)
|
||||
%res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1h_d_scalar_offset:
|
||||
; CHECK: ldff1h { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 %offset)
|
||||
%res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1W
|
||||
define <vscale x 4 x i32> @gldff1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1w_s_scalar_offset:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 %offset)
|
||||
ret <vscale x 4 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1w_d_scalar_offset:
|
||||
; CHECK: ldff1w { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 %offset)
|
||||
%res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @gldff1w_s_scalar_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1w_s_scalar_offset_float:
|
||||
; CHECK: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 %offset)
|
||||
ret <vscale x 4 x float> %load
|
||||
}
|
||||
|
||||
; LDFF1D
|
||||
define <vscale x 2 x i64> @gldff1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1d_d_scalar_offset:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 %offset)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @gldff1d_d_scalar_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1d_d_scalar_offset_double:
|
||||
; CHECK: ldff1d { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 %offset)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
; LDFF1SB, LDFF1SW, LDFF1SH: vector base + scalar offset (index)
|
||||
; e.g. ldff1b { z0.d }, p0/z, [x0, z0.d]
|
||||
;
|
||||
|
||||
; LDFF1SB
|
||||
define <vscale x 4 x i32> @gldff1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1sb_s_scalar_offset:
|
||||
; CHECK: ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 %offset)
|
||||
%res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1sb_d_scalar_offset:
|
||||
; CHECK: ldff1sb { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 %offset)
|
||||
%res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1SH
|
||||
define <vscale x 4 x i32> @gldff1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1sh_s_scalar_offset:
|
||||
; CHECK: ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
|
||||
<vscale x 4 x i32> %base,
|
||||
i64 %offset)
|
||||
%res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @gldff1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1sh_d_scalar_offset:
|
||||
; CHECK: ldff1sh { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 %offset)
|
||||
%res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1SW
|
||||
define <vscale x 2 x i64> @gldff1sw_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
|
||||
; CHECK-LABEL: gldff1sw_d_scalar_offset:
|
||||
; CHECK: ldff1sw { z0.d }, p0/z, [x0, z0.d]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
|
||||
<vscale x 2 x i64> %base,
|
||||
i64 %offset)
|
||||
%res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
; LDFF1B/LDFF1SB
|
||||
declare <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
|
||||
declare <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
|
||||
|
||||
; LDFF1H/LDFF1SH
|
||||
declare <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
|
||||
declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
|
||||
|
||||
; LDFF1W/LDFF1SW
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
|
||||
declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
|
||||
|
||||
; LDFF1D
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
|
||||
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
|
Loading…
Reference in New Issue