forked from OSchip/llvm-project
[AArch64][SVE] Implement masked store intrinsics
Summary: Adds support for codegen of masked stores, with non-truncating and truncating variants. Reviewers: huntergr, greened, dmgreen, rovka, sdesmalen Reviewed By: dmgreen, sdesmalen Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69378
This commit is contained in:
parent
81399002ae
commit
5c2c94648e
|
@ -309,6 +309,34 @@ def zext_masked_load_i32 :
|
|||
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
// non-truncating masked store fragment.
|
||||
def nontrunc_masked_store :
|
||||
PatFrag<(ops node:$val, node:$ptr, node:$pred),
|
||||
(masked_st node:$val, node:$ptr, node:$pred), [{
|
||||
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();
|
||||
}]>;
|
||||
// truncating masked store fragments.
|
||||
def trunc_masked_store :
|
||||
PatFrag<(ops node:$val, node:$ptr, node:$pred),
|
||||
(masked_st node:$val, node:$ptr, node:$pred), [{
|
||||
return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
|
||||
}]>;
|
||||
def trunc_masked_store_i8 :
|
||||
PatFrag<(ops node:$val, node:$ptr, node:$pred),
|
||||
(trunc_masked_store node:$val, node:$ptr, node:$pred), [{
|
||||
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
|
||||
}]>;
|
||||
def trunc_masked_store_i16 :
|
||||
PatFrag<(ops node:$val, node:$ptr, node:$pred),
|
||||
(trunc_masked_store node:$val, node:$ptr, node:$pred), [{
|
||||
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
|
||||
}]>;
|
||||
def trunc_masked_store_i32 :
|
||||
PatFrag<(ops node:$val, node:$ptr, node:$pred),
|
||||
(trunc_masked_store node:$val, node:$ptr, node:$pred), [{
|
||||
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
|
||||
}]>;
|
||||
|
||||
// Node definitions.
|
||||
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
|
||||
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
|
||||
|
|
|
@ -1108,6 +1108,36 @@ let Predicates = [HasSVE] in {
|
|||
// 16-element contiguous loads
|
||||
defm : pred_load<nxv16i8, nxv16i1, nonext_masked_load, LD1B_IMM>;
|
||||
|
||||
multiclass pred_store<ValueType Ty, ValueType PredTy, SDPatternOperator Store,
|
||||
Instruction RegImmInst> {
|
||||
def _default : Pat<(Store (Ty ZPR:$vec), GPR64:$base, (PredTy PPR:$gp)),
|
||||
(RegImmInst ZPR:$vec, PPR:$gp, GPR64:$base, (i64 0))>;
|
||||
}
|
||||
|
||||
// 2-element contiguous stores
|
||||
defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i8, ST1B_D_IMM>;
|
||||
defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i16, ST1H_D_IMM>;
|
||||
defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i32, ST1W_D_IMM>;
|
||||
defm : pred_store<nxv2i64, nxv2i1, nontrunc_masked_store, ST1D_IMM>;
|
||||
defm : pred_store<nxv2f16, nxv2i1, nontrunc_masked_store, ST1H_D_IMM>;
|
||||
defm : pred_store<nxv2f32, nxv2i1, nontrunc_masked_store, ST1W_D_IMM>;
|
||||
defm : pred_store<nxv2f64, nxv2i1, nontrunc_masked_store, ST1D_IMM>;
|
||||
|
||||
// 4-element contiguous stores
|
||||
defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i8, ST1B_S_IMM>;
|
||||
defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i16, ST1H_S_IMM>;
|
||||
defm : pred_store<nxv4i32, nxv4i1, nontrunc_masked_store, ST1W_IMM>;
|
||||
defm : pred_store<nxv4f16, nxv4i1, nontrunc_masked_store, ST1H_S_IMM>;
|
||||
defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W_IMM>;
|
||||
|
||||
// 8-element contiguous stores
|
||||
defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H_IMM>;
|
||||
defm : pred_store<nxv8i16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;
|
||||
defm : pred_store<nxv8f16, nxv8i1, nontrunc_masked_store, ST1H_IMM>;
|
||||
|
||||
// 16-element contiguous stores
|
||||
defm : pred_store<nxv16i8, nxv16i1, nontrunc_masked_store, ST1B_IMM>;
|
||||
|
||||
}
|
||||
|
||||
let Predicates = [HasSVE2] in {
|
||||
|
|
|
@ -147,7 +147,7 @@ public:
|
|||
|
||||
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
|
||||
|
||||
bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) {
|
||||
bool isLegalMaskedLoadStore(Type *DataType, MaybeAlign Alignment) {
|
||||
if (!isa<VectorType>(DataType) || !ST->hasSVE())
|
||||
return false;
|
||||
|
||||
|
@ -162,6 +162,14 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) {
|
||||
return isLegalMaskedLoadStore(DataType, Alignment);
|
||||
}
|
||||
|
||||
bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) {
|
||||
return isLegalMaskedLoadStore(DataType, Alignment);
|
||||
}
|
||||
|
||||
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
|
||||
ArrayRef<unsigned> Indices, unsigned Alignment,
|
||||
unsigned AddressSpace,
|
||||
|
|
|
@ -1,79 +1,173 @@
|
|||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; Masked Loads
|
||||
;
|
||||
|
||||
define <vscale x 2 x i64> @masked_load_nxv2i64(<vscale x 2 x i64> *%a, <vscale x 2 x i1> %mask) {
|
||||
define <vscale x 2 x i64> @masked_load_nxv2i64(<vscale x 2 x i64> *%a, <vscale x 2 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv2i64:
|
||||
; CHECK: ld1d { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64> *%a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
|
||||
ret <vscale x 2 x i64> %load
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @masked_load_nxv4i32(<vscale x 4 x i32> *%a, <vscale x 4 x i1> %mask) {
|
||||
define <vscale x 4 x i32> @masked_load_nxv4i32(<vscale x 4 x i32> *%a, <vscale x 4 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv4i32:
|
||||
; CHECK: ld1w { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32> *%a, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
|
||||
ret <vscale x 4 x i32> %load
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @masked_load_nxv8i16(<vscale x 8 x i16> *%a, <vscale x 8 x i1> %mask) {
|
||||
define <vscale x 8 x i16> @masked_load_nxv8i16(<vscale x 8 x i16> *%a, <vscale x 8 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv8i16:
|
||||
; CHECK: ld1h { [[IN:z[0-9]+]].h }, [[PG:p[0-9]+]]/z, [x0]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16> *%a, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x i16> undef)
|
||||
ret <vscale x 8 x i16> %load
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @masked_load_nxv16i8(<vscale x 16 x i8> *%a, <vscale x 16 x i1> %mask) {
|
||||
define <vscale x 16 x i8> @masked_load_nxv16i8(<vscale x 16 x i8> *%a, <vscale x 16 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv16i8:
|
||||
; CHECK: ld1b { [[IN:z[0-9]+]].b }, [[PG:p[0-9]+]]/z, [x0]
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8(<vscale x 16 x i8> *%a, i32 1, <vscale x 16 x i1> %mask, <vscale x 16 x i8> undef)
|
||||
ret <vscale x 16 x i8> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @masked_load_nxv2f64(<vscale x 2 x double> *%a, <vscale x 2 x i1> %mask) {
|
||||
define <vscale x 2 x double> @masked_load_nxv2f64(<vscale x 2 x double> *%a, <vscale x 2 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv2f64:
|
||||
; CHECK: ld1d { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x double> @llvm.masked.load.nxv2f64(<vscale x 2 x double> *%a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
|
||||
ret <vscale x 2 x double> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x float> @masked_load_nxv2f32(<vscale x 2 x float> *%a, <vscale x 2 x i1> %mask) {
|
||||
define <vscale x 2 x float> @masked_load_nxv2f32(<vscale x 2 x float> *%a, <vscale x 2 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv2f32:
|
||||
; CHECK: ld1w { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x float> @llvm.masked.load.nxv2f32(<vscale x 2 x float> *%a, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
|
||||
ret <vscale x 2 x float> %load
|
||||
}
|
||||
|
||||
define <vscale x 2 x half> @masked_load_nxv2f16(<vscale x 2 x half> *%a, <vscale x 2 x i1> %mask) {
|
||||
define <vscale x 2 x half> @masked_load_nxv2f16(<vscale x 2 x half> *%a, <vscale x 2 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv2f16:
|
||||
; CHECK: ld1h { [[IN:z[0-9]+]].d }, [[PG:p[0-9]+]]/z, [x0]
|
||||
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 2 x half> @llvm.masked.load.nxv2f16(<vscale x 2 x half> *%a, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
|
||||
ret <vscale x 2 x half> %load
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @masked_load_nxv4f32(<vscale x 4 x float> *%a, <vscale x 4 x i1> %mask) {
|
||||
define <vscale x 4 x float> @masked_load_nxv4f32(<vscale x 4 x float> *%a, <vscale x 4 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv4f32:
|
||||
; CHECK: ld1w { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x float> @llvm.masked.load.nxv4f32(<vscale x 4 x float> *%a, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> undef)
|
||||
ret <vscale x 4 x float> %load
|
||||
}
|
||||
|
||||
define <vscale x 4 x half> @masked_load_nxv4f16(<vscale x 4 x half> *%a, <vscale x 4 x i1> %mask) {
|
||||
define <vscale x 4 x half> @masked_load_nxv4f16(<vscale x 4 x half> *%a, <vscale x 4 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv4f16:
|
||||
; CHECK: ld1h { [[IN:z[0-9]+]].s }, [[PG:p[0-9]+]]/z, [x0]
|
||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 4 x half> @llvm.masked.load.nxv4f16(<vscale x 4 x half> *%a, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x half> undef)
|
||||
ret <vscale x 4 x half> %load
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @masked_load_nxv8f16(<vscale x 8 x half> *%a, <vscale x 8 x i1> %mask) {
|
||||
define <vscale x 8 x half> @masked_load_nxv8f16(<vscale x 8 x half> *%a, <vscale x 8 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_load_nxv8f16:
|
||||
; CHECK: ld1h { [[IN:z[0-9]+]].h }, [[PG:p[0-9]+]]/z, [x0]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
%load = call <vscale x 8 x half> @llvm.masked.load.nxv8f16(<vscale x 8 x half> *%a, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x half> undef)
|
||||
ret <vscale x 8 x half> %load
|
||||
}
|
||||
|
||||
;
|
||||
; Masked Stores
|
||||
;
|
||||
|
||||
define void @masked_store_nxv2i64(<vscale x 2 x i64> *%a, <vscale x 2 x i64> %val, <vscale x 2 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_store_nxv2i64:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64> *%a, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_nxv4i32(<vscale x 4 x i32> *%a, <vscale x 4 x i32> %val, <vscale x 4 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_store_nxv4i32:
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32> *%a, i32 4, <vscale x 4 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_nxv8i16(<vscale x 8 x i16> *%a, <vscale x 8 x i16> %val, <vscale x 8 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_store_nxv8i16:
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16> *%a, i32 2, <vscale x 8 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_nxv16i8(<vscale x 16 x i8> *%a, <vscale x 16 x i8> %val, <vscale x 16 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_store_nxv16i8:
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv16i8(<vscale x 16 x i8> %val, <vscale x 16 x i8> *%a, i32 1, <vscale x 16 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_nxv2f64(<vscale x 2 x double> *%a, <vscale x 2 x double> %val, <vscale x 2 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_store_nxv2f64:
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double> *%a, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_nxv2f32(<vscale x 2 x float> *%a, <vscale x 2 x float> %val, <vscale x 2 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_store_nxv2f32:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float> *%a, i32 4, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_nxv2f16(<vscale x 2 x half> *%a, <vscale x 2 x half> %val, <vscale x 2 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_store_nxv2f16:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half> *%a, i32 4, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_nxv4f32(<vscale x 4 x float> *%a, <vscale x 4 x float> %val, <vscale x 4 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_store_nxv4f32:
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float> *%a, i32 4, <vscale x 4 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_nxv4f16(<vscale x 4 x half> *%a, <vscale x 4 x half> %val, <vscale x 4 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_store_nxv4f16:
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half> *%a, i32 2, <vscale x 4 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_store_nxv8f16(<vscale x 8 x half> *%a, <vscale x 8 x half> %val, <vscale x 8 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_store_nxv8f16:
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
call void @llvm.masked.store.nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half> *%a, i32 2, <vscale x 8 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
|
||||
declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
|
||||
declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
|
||||
|
@ -85,3 +179,15 @@ declare <vscale x 2 x half> @llvm.masked.load.nxv2f16(<vscale x 2 x half>*, i32,
|
|||
declare <vscale x 4 x float> @llvm.masked.load.nxv4f32(<vscale x 4 x float>*, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
|
||||
declare <vscale x 4 x half> @llvm.masked.load.nxv4f16(<vscale x 4 x half>*, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
|
||||
declare <vscale x 8 x half> @llvm.masked.load.nxv8f16(<vscale x 8 x half>*, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
|
||||
|
||||
declare void @llvm.masked.store.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>*, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.store.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>*, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.store.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>*, i32, <vscale x 8 x i1>)
|
||||
declare void @llvm.masked.store.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32, <vscale x 16 x i1>)
|
||||
|
||||
declare void @llvm.masked.store.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>*, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.store.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>*, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.store.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>*, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.store.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>*, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.store.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>*, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.store.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>*, i32, <vscale x 8 x i1>)
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; Masked Stores
|
||||
;
|
||||
|
||||
define void @masked_trunc_store_nxv2i8(<vscale x 2 x i64> *%a, <vscale x 2 x i64> %val, <vscale x 2 x i8> *%b, <vscale x 2 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_trunc_store_nxv2i8:
|
||||
; CHECK-NEXT: st1b { z0.d }, p0, [x1]
|
||||
; CHECK-NEXT: ret
|
||||
%trunc = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
|
||||
call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %trunc, <vscale x 2 x i8> *%b, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_trunc_store_nxv2i16(<vscale x 2 x i64> *%a, <vscale x 2 x i64> %val, <vscale x 2 x i16> *%b, <vscale x 2 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_trunc_store_nxv2i16:
|
||||
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
||||
; CHECK-NEXT: ret
|
||||
%trunc = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
|
||||
call void @llvm.masked.store.nxv2i16(<vscale x 2 x i16> %trunc, <vscale x 2 x i16> *%b, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_trunc_store_nxv2i32(<vscale x 2 x i64> *%a, <vscale x 2 x i64> %val, <vscale x 2 x i32> *%b, <vscale x 2 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_trunc_store_nxv2i32:
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
|
||||
; CHECK-NEXT: ret
|
||||
%trunc = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
|
||||
call void @llvm.masked.store.nxv2i32(<vscale x 2 x i32> %trunc, <vscale x 2 x i32> *%b, i32 8, <vscale x 2 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_trunc_store_nxv4i8(<vscale x 4 x i32> *%a, <vscale x 4 x i32> %val, <vscale x 4 x i8> *%b, <vscale x 4 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_trunc_store_nxv4i8:
|
||||
; CHECK-NEXT: st1b { z0.s }, p0, [x1]
|
||||
; CHECK-NEXT: ret
|
||||
%trunc = trunc <vscale x 4 x i32> %val to <vscale x 4 x i8>
|
||||
call void @llvm.masked.store.nxv4i8(<vscale x 4 x i8> %trunc, <vscale x 4 x i8> *%b, i32 4, <vscale x 4 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_trunc_store_nxv4i16(<vscale x 4 x i32> *%a, <vscale x 4 x i32> %val, <vscale x 4 x i16> *%b, <vscale x 4 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_trunc_store_nxv4i16:
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
||||
; CHECK-NEXT: ret
|
||||
%trunc = trunc <vscale x 4 x i32> %val to <vscale x 4 x i16>
|
||||
call void @llvm.masked.store.nxv4i16(<vscale x 4 x i16> %trunc, <vscale x 4 x i16> *%b, i32 4, <vscale x 4 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @masked_trunc_store_nxv8i8(<vscale x 8 x i16> *%a, <vscale x 8 x i16> %val, <vscale x 8 x i8> *%b, <vscale x 8 x i1> %mask) nounwind {
|
||||
; CHECK-LABEL: masked_trunc_store_nxv8i8:
|
||||
; CHECK-NEXT: st1b { z0.h }, p0, [x1]
|
||||
; CHECK-NEXT: ret
|
||||
%trunc = trunc <vscale x 8 x i16> %val to <vscale x 8 x i8>
|
||||
call void @llvm.masked.store.nxv8i8(<vscale x 8 x i8> %trunc, <vscale x 8 x i8> *%b, i32 2, <vscale x 8 x i1> %mask)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.masked.store.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>*, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.store.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>*, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.store.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>*, i32, <vscale x 2 x i1>)
|
||||
declare void @llvm.masked.store.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>*, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.store.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>*, i32, <vscale x 4 x i1>)
|
||||
declare void @llvm.masked.store.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>*, i32, <vscale x 8 x i1>)
|
Loading…
Reference in New Issue