forked from OSchip/llvm-project
[llvm][CodeGen] Fix issue for SVE gather prefetch.
Summary: This change is fixing an issue where the dagcombine incorrectly used an addressing mode with scaled offsets (indices), instead of unscaled offsets. Those addressing modes do not exist for `prfh` , `prfw` and `prfd`, hence we can reuse `prfb` because that has unscaled offsets, and because the pseudo-code in the XML spec suggests that the element size is not used for the amount of data that is prefetched by the instruction. FWIW, GCC also emits a `prfb` for these cases. Reviewers: sdesmalen, andwar, rengolin Reviewed By: sdesmalen Subscribers: tschuett, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78069
This commit is contained in:
parent
681466f5e6
commit
48879c02bf
|
@ -13032,13 +13032,12 @@ static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
|
|||
return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
|
||||
}
|
||||
|
||||
/// Combines a node carrying the intrinsic `aarch64_sve_prf<T>_gather` into a
|
||||
/// node that uses `aarch64_sve_prf<T>_gather_scaled_uxtw` when the scalar
|
||||
/// offset passed to `aarch64_sve_prf<T>_gather` is not a valid immediate for
|
||||
/// the sve gather prefetch instruction with vector plus immediate addressing
|
||||
/// mode.
|
||||
/// Combines a node carrying the intrinsic
|
||||
/// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
|
||||
/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
|
||||
/// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
|
||||
/// sve gather prefetch instruction with vector plus immediate addressing mode.
|
||||
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
|
||||
unsigned NewIID,
|
||||
unsigned ScalarSizeInBytes) {
|
||||
const unsigned ImmPos = 4, OffsetPos = 3;
|
||||
// No need to combine the node if the immediate is valid...
|
||||
|
@ -13048,10 +13047,11 @@ static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
|
|||
// ...otherwise swap the offset base with the offset...
|
||||
SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
|
||||
std::swap(Ops[ImmPos], Ops[OffsetPos]);
|
||||
// ...and remap the intrinsic `aarch64_sve_prf_gather<T>` to
|
||||
// `aarch64_sve_prf_gather<T>_scaled_uxtw`.
|
||||
// ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
|
||||
// `aarch64_sve_prfb_gather_uxtw_index`.
|
||||
SDLoc DL(N);
|
||||
Ops[1] = DAG.getConstant(NewIID, DL, MVT::i64);
|
||||
Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
|
||||
MVT::i64);
|
||||
|
||||
return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
|
||||
}
|
||||
|
@ -13121,21 +13121,13 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ISD::INTRINSIC_W_CHAIN:
|
||||
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
|
||||
case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
|
||||
return combineSVEPrefetchVecBaseImmOff(
|
||||
N, DAG, Intrinsic::aarch64_sve_prfb_gather_uxtw_index,
|
||||
1 /*=ScalarSizeInBytes*/);
|
||||
return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
|
||||
case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
|
||||
return combineSVEPrefetchVecBaseImmOff(
|
||||
N, DAG, Intrinsic::aarch64_sve_prfh_gather_uxtw_index,
|
||||
2 /*=ScalarSizeInBytes*/);
|
||||
return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
|
||||
case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
|
||||
return combineSVEPrefetchVecBaseImmOff(
|
||||
N, DAG, Intrinsic::aarch64_sve_prfw_gather_uxtw_index,
|
||||
4 /*=ScalarSizeInBytes*/);
|
||||
return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
|
||||
case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
|
||||
return combineSVEPrefetchVecBaseImmOff(
|
||||
N, DAG, Intrinsic::aarch64_sve_prfd_gather_uxtw_index,
|
||||
8 /*=ScalarSizeInBytes*/);
|
||||
return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
|
||||
case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
|
||||
case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
|
||||
case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
|
||||
|
|
|
@ -59,7 +59,7 @@ define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64_invalid_immediat
|
|||
; PRFH <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 2, ..., 62
|
||||
define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %offset, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_runtime_offset:
|
||||
; CHECK-NEXT: prfh pldl1strm, p0, [x0, z0.s, uxtw #1]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %offset, i32 1)
|
||||
ret void
|
||||
|
@ -68,7 +68,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_runtime_offset(<
|
|||
define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #63
|
||||
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N]], z0.s, uxtw #1]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 63, i32 1)
|
||||
ret void
|
||||
|
@ -77,7 +77,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound:
|
||||
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
||||
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #1]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
|
||||
ret void
|
||||
|
@ -86,7 +86,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_2(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_2:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
||||
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #1]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
|
||||
ret void
|
||||
|
@ -95,8 +95,8 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediat
|
|||
; PRFH <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 2, ..., 62
|
||||
define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %offset, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_runtime_offset:
|
||||
; CHECK-NEXT: prfh pldl1strm, p0, [x0, z0.d, uxtw #1]
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %offset, i32 1)
|
||||
ret void
|
||||
}
|
||||
|
@ -104,7 +104,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_runtime_offset(<
|
|||
define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #63
|
||||
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N]], z0.d, uxtw #1]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 63, i32 1)
|
||||
ret void
|
||||
|
@ -113,7 +113,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound:
|
||||
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
||||
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #1]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
|
||||
ret void
|
||||
|
@ -122,7 +122,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_2(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_2:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
||||
; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #1]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
|
||||
ret void
|
||||
|
@ -133,7 +133,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediat
|
|||
; PRFW <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 4, ..., 124
|
||||
define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %offset, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_runtime_offset:
|
||||
; CHECK-NEXT: prfw pldl1strm, p0, [x0, z0.s, uxtw #2]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %offset, i32 1)
|
||||
ret void
|
||||
|
@ -142,7 +142,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_runtime_offset(<
|
|||
define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #125
|
||||
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N]], z0.s, uxtw #2]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 125, i32 1)
|
||||
ret void
|
||||
|
@ -151,7 +151,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound:
|
||||
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
||||
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #2]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
|
||||
ret void
|
||||
|
@ -160,7 +160,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_4(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_4:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
||||
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #2]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
|
||||
ret void
|
||||
|
@ -169,7 +169,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediat
|
|||
; PRFW <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 4, ..., 124
|
||||
define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %offset, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_runtime_offset:
|
||||
; CHECK-NEXT: prfw pldl1strm, p0, [x0, z0.d, uxtw #2]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %offset, i32 1)
|
||||
ret void
|
||||
|
@ -178,7 +178,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_runtime_offset(<
|
|||
define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #125
|
||||
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N]], z0.d, uxtw #2]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 125, i32 1)
|
||||
ret void
|
||||
|
@ -187,7 +187,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound:
|
||||
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
||||
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #2]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
|
||||
ret void
|
||||
|
@ -196,7 +196,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_4(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_4:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
||||
; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #2]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
|
||||
ret void
|
||||
|
@ -207,7 +207,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediat
|
|||
; PRFD <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 8, ..., 248
|
||||
define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %offset, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_runtime_offset:
|
||||
; CHECK-NEXT: prfd pldl1strm, p0, [x0, z0.s, uxtw #3]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %offset, i32 1)
|
||||
ret void
|
||||
|
@ -216,7 +216,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_runtime_offset(<
|
|||
define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #125
|
||||
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N]], z0.s, uxtw #3]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 125, i32 1)
|
||||
ret void
|
||||
|
@ -225,7 +225,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound:
|
||||
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
||||
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #3]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
|
||||
ret void
|
||||
|
@ -234,7 +234,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_8(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_8:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
||||
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #3]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
|
||||
ret void
|
||||
|
@ -243,7 +243,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediat
|
|||
; PRFD <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 4, ..., 248
|
||||
define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %offset, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_runtime_offset:
|
||||
; CHECK-NEXT: prfd pldl1strm, p0, [x0, z0.d, uxtw #3]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %offset, i32 1)
|
||||
ret void
|
||||
|
@ -252,7 +252,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_runtime_offset(<
|
|||
define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #125
|
||||
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N]], z0.d, uxtw #3]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 125, i32 1)
|
||||
ret void
|
||||
|
@ -261,7 +261,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound:
|
||||
; CHECK-NEXT: mov x[[N:[0-9]+]], #-1
|
||||
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #3]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
|
||||
ret void
|
||||
|
@ -270,7 +270,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediat
|
|||
define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_8(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
|
||||
; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_8:
|
||||
; CHECK-NEXT: mov w[[N:[0-9]+]], #33
|
||||
; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #3]
|
||||
; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
|
||||
ret void
|
||||
|
|
Loading…
Reference in New Issue