forked from OSchip/llvm-project
[VE] strided v256.23 isel and tests
ISel for experimental.vp.strided.load|store for v256.32 types via lowering to vvp_load|store SDNodes. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D121616
This commit is contained in:
parent
541d89b02c
commit
6ac3d8ef9c
|
@ -73,6 +73,11 @@ Optional<unsigned> getVVPOpcode(unsigned Opcode) {
|
|||
case ISD::SDNAME: \
|
||||
return VEISD::VVPNAME;
|
||||
#include "VVPNodes.def"
|
||||
// TODO: Map those in VVPNodes.def too
|
||||
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
|
||||
return VEISD::VVP_LOAD;
|
||||
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
|
||||
return VEISD::VVP_STORE;
|
||||
}
|
||||
return None;
|
||||
}
|
||||
|
@ -275,10 +280,17 @@ Optional<EVT> getIdiomaticVectorType(SDNode *Op) {
|
|||
}
|
||||
|
||||
SDValue getLoadStoreStride(SDValue Op, VECustomDAG &CDAG) {
|
||||
if (Op->getOpcode() == VEISD::VVP_STORE)
|
||||
switch (Op->getOpcode()) {
|
||||
case VEISD::VVP_STORE:
|
||||
return Op->getOperand(3);
|
||||
if (Op->getOpcode() == VEISD::VVP_LOAD)
|
||||
case VEISD::VVP_LOAD:
|
||||
return Op->getOperand(2);
|
||||
}
|
||||
|
||||
if (auto *StoreN = dyn_cast<VPStridedStoreSDNode>(Op.getNode()))
|
||||
return StoreN->getStride();
|
||||
if (auto *StoreN = dyn_cast<VPStridedLoadSDNode>(Op.getNode()))
|
||||
return StoreN->getStride();
|
||||
|
||||
if (isa<MemSDNode>(Op.getNode())) {
|
||||
// Regular MLOAD/MSTORE/LOAD/STORE
|
||||
|
@ -309,6 +321,7 @@ SDValue getGatherScatterScale(SDValue Op) {
|
|||
|
||||
SDValue getStoredValue(SDValue Op) {
|
||||
switch (Op->getOpcode()) {
|
||||
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
|
||||
case VEISD::VVP_STORE:
|
||||
return Op->getOperand(1);
|
||||
}
|
||||
|
@ -316,6 +329,8 @@ SDValue getStoredValue(SDValue Op) {
|
|||
return StoreN->getValue();
|
||||
if (auto *StoreN = dyn_cast<MaskedStoreSDNode>(Op.getNode()))
|
||||
return StoreN->getValue();
|
||||
if (auto *StoreN = dyn_cast<VPStridedStoreSDNode>(Op.getNode()))
|
||||
return StoreN->getValue();
|
||||
if (auto *StoreN = dyn_cast<VPStoreSDNode>(Op.getNode()))
|
||||
return StoreN->getValue();
|
||||
if (auto *StoreN = dyn_cast<MaskedScatterSDNode>(Op.getNode()))
|
||||
|
|
|
@ -315,6 +315,8 @@ void VETargetLowering::initVPUActions() {
|
|||
setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
|
||||
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
|
||||
setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
|
||||
setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
|
||||
setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
|
||||
#include "VVPNodes.def"
|
||||
}
|
||||
|
||||
|
|
|
@ -134,6 +134,8 @@ SDValue VETargetLowering::lowerVVP_LOAD_STORE(SDValue Op,
|
|||
// Load specific.
|
||||
SDValue PassThru = getNodePassthru(Op);
|
||||
|
||||
SDValue StrideV = getLoadStoreStride(Op, CDAG);
|
||||
|
||||
auto DataVT = *getIdiomaticVectorType(Op.getNode());
|
||||
auto Packing = getTypePacking(DataVT);
|
||||
|
||||
|
@ -145,7 +147,6 @@ SDValue VETargetLowering::lowerVVP_LOAD_STORE(SDValue Op,
|
|||
if (!Mask)
|
||||
Mask = CDAG.getConstantMask(Packing, true);
|
||||
|
||||
SDValue StrideV = getLoadStoreStride(Op, CDAG);
|
||||
if (IsLoad) {
|
||||
MVT LegalDataVT = getLegalVectorType(
|
||||
Packing, DataVT.getVectorElementType().getSimpleVT());
|
||||
|
|
|
@ -0,0 +1,170 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare <256 x float> @llvm.experimental.vp.strided.load.v256f32.i64(float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
|
||||
define fastcc <256 x float> @vp_strided_load_v256f32_rrm(float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256f32_rrm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vseq %v0
|
||||
; CHECK-NEXT: vmulu.l %v0, %s1, %v0, %vm1
|
||||
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: vgtu %v0, %v0, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.i64(float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
ret <256 x float> %r
|
||||
}
|
||||
|
||||
define fastcc <256 x float> @vp_strided_load_v256f32_rr(float* %ptr, i64 %stride, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256f32_rr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vldu %v0, %s1, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
%r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.i64(float* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
|
||||
ret <256 x float> %r
|
||||
}
|
||||
|
||||
define fastcc <256 x float> @vp_strided_load_v256f32_ri(float* %ptr, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256f32_ri:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vldu %v0, 24, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
%r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.i64(float* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
|
||||
ret <256 x float> %r
|
||||
}
|
||||
|
||||
declare <256 x i32> @llvm.experimental.vp.strided.load.v256i32.i64(i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
|
||||
define fastcc <256 x i32> @vp_strided_load_v256i32_rrm(i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256i32_rrm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vseq %v0
|
||||
; CHECK-NEXT: vmulu.l %v0, %s1, %v0, %vm1
|
||||
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: vgtl.zx %v0, %v0, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.i64(i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
ret <256 x i32> %r
|
||||
}
|
||||
|
||||
define fastcc <256 x i32> @vp_strided_load_v256i32_rr(i32* %ptr, i64 %stride, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256i32_rr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vldl.zx %v0, %s1, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
%r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.i64(i32* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
|
||||
ret <256 x i32> %r
|
||||
}
|
||||
|
||||
define fastcc <256 x i32> @vp_strided_load_v256i32_ri(i32* %ptr, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256i32_ri:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vldl.zx %v0, 24, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
%r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.i64(i32* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
|
||||
ret <256 x i32> %r
|
||||
}
|
||||
|
||||
declare <256 x double> @llvm.experimental.vp.strided.load.v256f64.i64(double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
|
||||
define fastcc <256 x double> @vp_strided_load_v256f64_rrm(double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256f64_rrm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vseq %v0
|
||||
; CHECK-NEXT: vmulu.l %v0, %s1, %v0, %vm1
|
||||
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.i64(double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
ret <256 x double> %r
|
||||
}
|
||||
|
||||
define fastcc <256 x double> @vp_strided_load_v256f64_rr(double* %ptr, i64 %stride, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256f64_rr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vld %v0, %s1, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
%r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.i64(double* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
|
||||
ret <256 x double> %r
|
||||
}
|
||||
|
||||
define fastcc <256 x double> @vp_strided_load_v256f64_ri(double* %ptr, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256f64_ri:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vld %v0, 24, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
%r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.i64(double* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
|
||||
ret <256 x double> %r
|
||||
}
|
||||
|
||||
declare <256 x i64> @llvm.experimental.vp.strided.load.v256i64.i64(i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
|
||||
define fastcc <256 x i64> @vp_strided_load_v256i64_rrm(i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256i64_rrm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vseq %v0
|
||||
; CHECK-NEXT: vmulu.l %v0, %s1, %v0, %vm1
|
||||
; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1
|
||||
; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.i64(i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
ret <256 x i64> %r
|
||||
}
|
||||
|
||||
define fastcc <256 x i64> @vp_strided_load_v256i64_rr(i64* %ptr, i64 %stride, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256i64_rr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vld %v0, %s1, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
%r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.i64(i64* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
|
||||
ret <256 x i64> %r
|
||||
}
|
||||
|
||||
define fastcc <256 x i64> @vp_strided_load_v256i64_ri(i64* %ptr, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_load_v256i64_ri:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vld %v0, 24, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
%r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.i64(i64* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
|
||||
ret <256 x i64> %r
|
||||
}
|
|
@ -0,0 +1,158 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
|
||||
|
||||
declare void @llvm.experimental.vp.strided.store.v256f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
|
||||
define fastcc void @vp_strided_store_v256f32_rrm(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256f32_rrm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vstu %v0, %s1, %s0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.experimental.vp.strided.store.v256f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define fastcc void @vp_strided_store_v256f32_rr(<256 x float> %val, float* %ptr, i64 %stride, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256f32_rr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vstu %v0, %s1, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
call void @llvm.experimental.vp.strided.store.v256f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define fastcc void @vp_strided_store_v256f32_ri(<256 x float> %val, float* %ptr, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256f32_ri:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vstu %v0, 24, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
call void @llvm.experimental.vp.strided.store.v256f32.i64(<256 x float> %val, float* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.experimental.vp.strided.store.v256i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
|
||||
define fastcc void @vp_strided_store_v256i32_rrm(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256i32_rrm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vstl %v0, %s1, %s0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.experimental.vp.strided.store.v256i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define fastcc void @vp_strided_store_v256i32_rr(<256 x i32> %val, i32* %ptr, i64 %stride, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256i32_rr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vstl %v0, %s1, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
call void @llvm.experimental.vp.strided.store.v256i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define fastcc void @vp_strided_store_v256i32_ri(<256 x i32> %val, i32* %ptr, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256i32_ri:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vstl %v0, 24, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
call void @llvm.experimental.vp.strided.store.v256i32.i64(<256 x i32> %val, i32* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.experimental.vp.strided.store.v256f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
|
||||
define fastcc void @vp_strided_store_v256f64_rrm(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256f64_rrm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vst %v0, %s1, %s0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.experimental.vp.strided.store.v256f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define fastcc void @vp_strided_store_v256f64_rr(<256 x double> %val, double* %ptr, i64 %stride, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256f64_rr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vst %v0, %s1, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
call void @llvm.experimental.vp.strided.store.v256f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define fastcc void @vp_strided_store_v256f64_ri(<256 x double> %val, double* %ptr, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256f64_ri:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vst %v0, 24, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
call void @llvm.experimental.vp.strided.store.v256f64.i64(<256 x double> %val, double* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.experimental.vp.strided.store.v256i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
|
||||
define fastcc void @vp_strided_store_v256i64_rrm(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256i64_rrm:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vst %v0, %s1, %s0, %vm1
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
call void @llvm.experimental.vp.strided.store.v256i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define fastcc void @vp_strided_store_v256i64_rr(<256 x i64> %val, i64* %ptr, i64 %stride, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256i64_rr:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s2, %s2, (32)0
|
||||
; CHECK-NEXT: lvl %s2
|
||||
; CHECK-NEXT: vst %v0, %s1, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
call void @llvm.experimental.vp.strided.store.v256i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl)
|
||||
ret void
|
||||
}
|
||||
|
||||
define fastcc void @vp_strided_store_v256i64_ri(<256 x i64> %val, i64* %ptr, i32 %evl) {
|
||||
; CHECK-LABEL: vp_strided_store_v256i64_ri:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: and %s1, %s1, (32)0
|
||||
; CHECK-NEXT: lvl %s1
|
||||
; CHECK-NEXT: vst %v0, 24, %s0
|
||||
; CHECK-NEXT: b.l.t (, %s10)
|
||||
%one = insertelement <256 x i1> undef, i1 1, i32 0
|
||||
%allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer
|
||||
call void @llvm.experimental.vp.strided.store.v256i64.i64(<256 x i64> %val, i64* %ptr, i64 24, <256 x i1> %allones, i32 %evl)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue