forked from OSchip/llvm-project
[AArch64][SVE] Implement intrinsics for non-temporal loads & stores
Summary: Adds the following intrinsics: - llvm.aarch64.sve.ldnt1 - llvm.aarch64.sve.stnt1 This patch creates masked loads and stores with the MONonTemporal flag set when used with the intrinsics above. Reviewers: sdesmalen, paulwalker-arm, dancgr, mgudim, efriedma, rengolin Reviewed By: efriedma Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71000
This commit is contained in:
parent
bf4580b7e7
commit
3f5bf35f86
|
@ -768,6 +768,20 @@ def llvm_nxv4f32_ty : LLVMType<nxv4f32>;
|
|||
def llvm_nxv2f64_ty : LLVMType<nxv2f64>;
|
||||
|
||||
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
|
||||
|
||||
class AdvSIMD_1Vec_PredLoad_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
LLVMPointerTo<0>],
|
||||
[IntrReadMem, IntrArgMemOnly]>;
|
||||
|
||||
class AdvSIMD_1Vec_PredStore_Intrinsic
|
||||
: Intrinsic<[],
|
||||
[llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
LLVMPointerTo<0>],
|
||||
[IntrArgMemOnly, NoCapture<2>]>;
|
||||
|
||||
class AdvSIMD_Merged1VectorArg_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>,
|
||||
|
@ -1033,6 +1047,18 @@ class AdvSIMD_GatherLoad_VecTorBase_Intrinsic
|
|||
],
|
||||
[IntrReadMem, IntrArgMemOnly]>;
|
||||
|
||||
//
|
||||
// Loads
|
||||
//
|
||||
|
||||
def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;
|
||||
|
||||
//
|
||||
// Stores
|
||||
//
|
||||
|
||||
def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic;
|
||||
|
||||
//
|
||||
// Integer arithmetic
|
||||
//
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/MemoryLocation.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/CodeGen/ISDOpcodes.h"
|
||||
|
@ -6589,7 +6590,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(
|
|||
if (Align == 0) // Ensure that codegen never sees alignment 0
|
||||
Align = getEVTAlignment(MemVT);
|
||||
|
||||
if (!Size)
|
||||
if (!Size && MemVT.isScalableVector())
|
||||
Size = MemoryLocation::UnknownSize;
|
||||
else if (!Size)
|
||||
Size = MemVT.getStoreSize();
|
||||
|
||||
MachineFunction &MF = getMachineFunction();
|
||||
|
|
|
@ -8506,6 +8506,26 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
Info.align = Align(16);
|
||||
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
|
||||
return true;
|
||||
case Intrinsic::aarch64_sve_ldnt1: {
|
||||
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.memVT = MVT::getVT(PtrTy->getElementType());
|
||||
Info.ptrVal = I.getArgOperand(1);
|
||||
Info.offset = 0;
|
||||
Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
|
||||
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::aarch64_sve_stnt1: {
|
||||
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.memVT = MVT::getVT(PtrTy->getElementType());
|
||||
Info.ptrVal = I.getArgOperand(2);
|
||||
Info.offset = 0;
|
||||
Info.align = MaybeAlign(DL.getABITypeAlignment(PtrTy->getElementType()));
|
||||
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -10871,6 +10891,48 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
|
|||
return NewST1;
|
||||
}
|
||||
|
||||
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
|
||||
SDLoc DL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
EVT PtrTy = N->getOperand(3).getValueType();
|
||||
|
||||
EVT LoadVT = VT;
|
||||
if (VT.isFloatingPoint())
|
||||
LoadVT = VT.changeTypeToInteger();
|
||||
|
||||
auto *MINode = cast<MemIntrinsicSDNode>(N);
|
||||
SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
|
||||
SDValue L = DAG.getMaskedLoad(VT, DL, MINode->getChain(),
|
||||
MINode->getOperand(3), DAG.getUNDEF(PtrTy),
|
||||
MINode->getOperand(2), PassThru,
|
||||
MINode->getMemoryVT(), MINode->getMemOperand(),
|
||||
ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
|
||||
|
||||
if (VT.isFloatingPoint()) {
|
||||
SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
}
|
||||
|
||||
return L;
|
||||
}
|
||||
|
||||
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
|
||||
SDLoc DL(N);
|
||||
|
||||
SDValue Data = N->getOperand(2);
|
||||
EVT DataVT = Data.getValueType();
|
||||
EVT PtrTy = N->getOperand(4).getValueType();
|
||||
|
||||
if (DataVT.isFloatingPoint())
|
||||
Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
|
||||
|
||||
auto *MINode = cast<MemIntrinsicSDNode>(N);
|
||||
return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
|
||||
DAG.getUNDEF(PtrTy), MINode->getOperand(3),
|
||||
MINode->getMemoryVT(), MINode->getMemOperand(),
|
||||
ISD::UNINDEXED, false, false);
|
||||
}
|
||||
|
||||
/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
|
||||
/// load store optimizer pass will merge them to store pair stores. This should
|
||||
/// be better than a movi to create the vector zero followed by a vector store
|
||||
|
@ -12087,6 +12149,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case Intrinsic::aarch64_neon_st3lane:
|
||||
case Intrinsic::aarch64_neon_st4lane:
|
||||
return performNEONPostLDSTCombine(N, DCI, DAG);
|
||||
case Intrinsic::aarch64_sve_ldnt1:
|
||||
return performLDNT1Combine(N, DAG);
|
||||
case Intrinsic::aarch64_sve_stnt1:
|
||||
return performSTNT1Combine(N, DAG);
|
||||
case Intrinsic::aarch64_sve_ld1_gather:
|
||||
return performLD1GatherCombine(N, DAG, AArch64ISD::GLD1);
|
||||
case Intrinsic::aarch64_sve_ld1_gather_index:
|
||||
|
|
|
@ -265,7 +265,8 @@ def nonext_masked_load :
|
|||
PatFrag<(ops node:$ptr, node:$pred, node:$def),
|
||||
(masked_ld node:$ptr, undef, node:$pred, node:$def), [{
|
||||
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
|
||||
cast<MaskedLoadSDNode>(N)->isUnindexed();
|
||||
cast<MaskedLoadSDNode>(N)->isUnindexed() &&
|
||||
!cast<MaskedLoadSDNode>(N)->isNonTemporal();
|
||||
}]>;
|
||||
// sign extending masked load fragments.
|
||||
def asext_masked_load :
|
||||
|
@ -313,12 +314,21 @@ def zext_masked_load_i32 :
|
|||
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
def non_temporal_load :
|
||||
PatFrag<(ops node:$ptr, node:$pred, node:$def),
|
||||
(masked_ld node:$ptr, undef, node:$pred, node:$def), [{
|
||||
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
|
||||
cast<MaskedLoadSDNode>(N)->isUnindexed() &&
|
||||
cast<MaskedLoadSDNode>(N)->isNonTemporal();
|
||||
}]>;
|
||||
|
||||
// non-truncating masked store fragment.
|
||||
def nontrunc_masked_store :
|
||||
PatFrag<(ops node:$val, node:$ptr, node:$pred),
|
||||
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
|
||||
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
|
||||
cast<MaskedStoreSDNode>(N)->isUnindexed();
|
||||
cast<MaskedStoreSDNode>(N)->isUnindexed() &&
|
||||
!cast<MaskedStoreSDNode>(N)->isNonTemporal();
|
||||
}]>;
|
||||
// truncating masked store fragments.
|
||||
def trunc_masked_store :
|
||||
|
@ -343,6 +353,14 @@ def trunc_masked_store_i32 :
|
|||
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
def non_temporal_store :
|
||||
PatFrag<(ops node:$val, node:$ptr, node:$pred),
|
||||
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
|
||||
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
|
||||
cast<MaskedStoreSDNode>(N)->isUnindexed() &&
|
||||
cast<MaskedStoreSDNode>(N)->isNonTemporal();
|
||||
}]>;
|
||||
|
||||
// Node definitions.
|
||||
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
|
||||
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
|
||||
|
|
|
@ -1164,6 +1164,18 @@ let Predicates = [HasSVE] in {
|
|||
// 16-element contiguous stores
|
||||
defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>;
|
||||
|
||||
defm : pred_load<nxv16i8, nxv16i1, non_temporal_load, LDNT1B_ZRR>;
|
||||
defm : pred_load<nxv8i16, nxv8i1, non_temporal_load, LDNT1H_ZRR>;
|
||||
defm : pred_load<nxv4i32, nxv4i1, non_temporal_load, LDNT1W_ZRR>;
|
||||
defm : pred_load<nxv2i64, nxv2i1, non_temporal_load, LDNT1D_ZRR>;
|
||||
defm : pred_load<nxv8f16, nxv8i1, non_temporal_load, LDNT1H_ZRR>;
|
||||
defm : pred_load<nxv4f32, nxv4i1, non_temporal_load, LDNT1W_ZRR>;
|
||||
defm : pred_load<nxv2f64, nxv2i1, non_temporal_load, LDNT1D_ZRR>;
|
||||
|
||||
defm : pred_store<nxv16i8, nxv16i1, non_temporal_store, STNT1B_ZRR>;
|
||||
defm : pred_store<nxv8i16, nxv8i1, non_temporal_store, STNT1H_ZRR>;
|
||||
defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRR>;
|
||||
defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRR>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSVE2] in {
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; LDNT1B
|
||||
;
|
||||
|
||||
define <vscale x 16 x i8> @ldnt1b_i8(<vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
|
||||
; CHECK-LABEL: ldnt1b_i8:
|
||||
; CHECK: ldnt1b { z0.b }, p0/z, [x0, #0]
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %pred,
|
||||
<vscale x 16 x i8>* %addr)
|
||||
ret <vscale x 16 x i8> %res
|
||||
}
|
||||
|
||||
;
|
||||
; LDNT1H
|
||||
;
|
||||
|
||||
define <vscale x 8 x i16> @ldnt1h_i16(<vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
|
||||
; CHECK-LABEL: ldnt1h_i16:
|
||||
; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %pred,
|
||||
<vscale x 8 x i16>* %addr)
|
||||
ret <vscale x 8 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @ldnt1h_f16(<vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
|
||||
; CHECK-LABEL: ldnt1h_f16:
|
||||
; CHECK: ldnt1h { z0.h }, p0/z, [x0, #0, lsl #1]
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> %pred,
|
||||
<vscale x 8 x half>* %addr)
|
||||
ret <vscale x 8 x half> %res
|
||||
}
|
||||
|
||||
;
|
||||
; LDNT1W
|
||||
;
|
||||
|
||||
define <vscale x 4 x i32> @ldnt1w_i32(<vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
|
||||
; CHECK-LABEL: ldnt1w_i32:
|
||||
; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2]
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %pred,
|
||||
<vscale x 4 x i32>* %addr)
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @ldnt1w_f32(<vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
|
||||
; CHECK-LABEL: ldnt1w_f32:
|
||||
; CHECK: ldnt1w { z0.s }, p0/z, [x0, #0, lsl #2]
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> %pred,
|
||||
<vscale x 4 x float>* %addr)
|
||||
ret <vscale x 4 x float> %res
|
||||
}
|
||||
|
||||
;
|
||||
; LDNT1D
|
||||
;
|
||||
|
||||
define <vscale x 2 x i64> @ldnt1d_i64(<vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
|
||||
; CHECK-LABEL: ldnt1d_i64:
|
||||
; CHECK: ldnt1d { z0.d }, p0/z, [x0, #0, lsl #3]
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %pred,
|
||||
<vscale x 2 x i64>* %addr)
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @ldnt1d_f64(<vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
|
||||
; CHECK-LABEL: ldnt1d_f64:
|
||||
; CHECK: ldnt1d { z0.d }, p0/z, [x0, #0, lsl #3]
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> %pred,
|
||||
<vscale x 2 x double>* %addr)
|
||||
ret <vscale x 2 x double> %res
|
||||
}
|
||||
|
||||
declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>*)
|
||||
declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>*)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>*)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>*)
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>*)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>*)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>*)
|
|
@ -0,0 +1,95 @@
|
|||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; STNT1B
|
||||
;
|
||||
|
||||
define void @stnt1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
|
||||
; CHECK-LABEL: stnt1b_i8:
|
||||
; CHECK: stnt1b { z0.b }, p0, [x0, #0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data,
|
||||
<vscale x 16 x i1> %pred,
|
||||
<vscale x 16 x i8>* %addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; STNT1H
|
||||
;
|
||||
|
||||
define void @stnt1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
|
||||
; CHECK-LABEL: stnt1h_i16:
|
||||
; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data,
|
||||
<vscale x 8 x i1> %pred,
|
||||
<vscale x 8 x i16>* %addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stnt1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
|
||||
; CHECK-LABEL: stnt1h_f16:
|
||||
; CHECK: stnt1h { z0.h }, p0, [x0, #0, lsl #1]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data,
|
||||
<vscale x 8 x i1> %pred,
|
||||
<vscale x 8 x half>* %addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; STNT1W
|
||||
;
|
||||
|
||||
define void @stnt1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
|
||||
; CHECK-LABEL: stnt1w_i32:
|
||||
; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data,
|
||||
<vscale x 4 x i1> %pred,
|
||||
<vscale x 4 x i32>* %addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stnt1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
|
||||
; CHECK-LABEL: stnt1w_f32:
|
||||
; CHECK: stnt1w { z0.s }, p0, [x0, #0, lsl #2]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data,
|
||||
<vscale x 4 x i1> %pred,
|
||||
<vscale x 4 x float>* %addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; STNT1D
|
||||
;
|
||||
|
||||
define void @stnt1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
|
||||
; CHECK-LABEL: stnt1d_i64:
|
||||
; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data,
|
||||
<vscale x 2 x i1> %pred,
|
||||
<vscale x 2 x i64>* %addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @stnt1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
|
||||
; CHECK-LABEL: stnt1d_f64:
|
||||
; CHECK: stnt1d { z0.d }, p0, [x0, #0, lsl #3]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data,
|
||||
<vscale x 2 x i1> %pred,
|
||||
<vscale x 2 x double>* %addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
|
||||
declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
|
||||
declare void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
|
||||
declare void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
|
||||
declare void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
|
||||
declare void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
|
||||
declare void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
|
Loading…
Reference in New Issue