From a823bdf3ab78748c9ff19ef5c55681af79785c65 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 31 Aug 2021 15:29:47 +0100 Subject: [PATCH] [RISCV][VP] Custom lower VP_STORE and VP_LOAD This patch adds support for the vector-predicated `VP_STORE` and `VP_LOAD` nodes. We do this in the same way we lower `MSTORE` and `MLOAD`: to regular load/store instructions via intrinsics. One necessary change was made to `SelectionDAGLegalize` so that `VP_STORE` nodes' operation actions are taken from the stored "value" operands, in the same vein as `STORE` or `MSTORE`. Reviewed By: craig.topper, rogfer01 Differential Revision: https://reviews.llvm.org/D108999 --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 5 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 92 +++-- llvm/lib/Target/RISCV/RISCVISelLowering.h | 4 +- .../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 257 +++++++++++++ .../RISCV/rvv/fixed-vectors-vpstore.ll | 257 +++++++++++++ llvm/test/CodeGen/RISCV/rvv/vpload.ll | 341 ++++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vpstore.ll | 341 ++++++++++++++++++ 7 files changed, 1267 insertions(+), 30 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vpload.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vpstore.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9518fef286e1..9df9e53fe716 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1169,6 +1169,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOpcode(), cast(Node)->getValue().getValueType()); break; + case ISD::VP_STORE: + Action = TLI.getOperationAction( + Node->getOpcode(), + cast(Node)->getValue().getValueType()); + break; case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a1e191291359..843849176496 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -557,6 +557,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); + setOperationAction(ISD::VP_LOAD, VT, Custom); + setOperationAction(ISD::VP_STORE, VT, Custom); setOperationAction(ISD::VP_GATHER, VT, Custom); setOperationAction(ISD::VP_SCATTER, VT, Custom); @@ -624,6 +626,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); + setOperationAction(ISD::VP_LOAD, VT, Custom); + setOperationAction(ISD::VP_STORE, VT, Custom); setOperationAction(ISD::VP_GATHER, VT, Custom); setOperationAction(ISD::VP_SCATTER, VT, Custom); @@ -737,6 +741,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); + setOperationAction(ISD::VP_LOAD, VT, Custom); + setOperationAction(ISD::VP_STORE, VT, Custom); setOperationAction(ISD::VP_GATHER, VT, Custom); setOperationAction(ISD::VP_SCATTER, VT, Custom); @@ -816,6 +822,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); + setOperationAction(ISD::VP_LOAD, VT, Custom); + setOperationAction(ISD::VP_STORE, VT, Custom); setOperationAction(ISD::VP_GATHER, VT, Custom); setOperationAction(ISD::VP_SCATTER, VT, Custom); @@ -2646,9 +2654,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerFixedLengthVectorStoreToRVV(Op, DAG); return Op; case ISD::MLOAD: - return lowerMLOAD(Op, DAG); + case ISD::VP_LOAD: + return lowerMaskedLoad(Op, DAG); case ISD::MSTORE: - return lowerMSTORE(Op, DAG); + case ISD::VP_STORE: + return lowerMaskedStore(Op, DAG); case ISD::SETCC: return lowerFixedLengthVectorSetccToRVV(Op, DAG); case ISD::ADD: @@ -4448,16 +4458,29 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op, Store->getMemoryVT(), Store->getMemOperand()); } -SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const { - auto *Load = cast(Op); - +SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, + SelectionDAG &DAG) const { SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); - MVT XLenVT = Subtarget.getXLenVT(); - SDValue Mask = Load->getMask(); - SDValue PassThru = Load->getPassThru(); - SDValue VL; + const auto *MemSD = cast(Op); + EVT MemVT = MemSD->getMemoryVT(); + MachineMemOperand *MMO = MemSD->getMemOperand(); + SDValue Chain = MemSD->getChain(); + SDValue BasePtr = MemSD->getBasePtr(); + + SDValue Mask, PassThru, VL; + if (const auto *VPLoad = dyn_cast(Op)) { + Mask = VPLoad->getMask(); + PassThru = DAG.getUNDEF(VT); + VL = VPLoad->getVectorLength(); + } else { + const auto *MLoad = cast(Op); + Mask = MLoad->getMask(); + PassThru = MLoad->getPassThru(); + } + + MVT XLenVT = Subtarget.getXLenVT(); MVT ContainerVT = VT; if (VT.isFixedLengthVector()) { @@ -4466,18 +4489,17 @@ SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const { Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); - VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); - } else - VL = DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); + } + + if (!VL) + VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT); - SDValue Ops[] = {Load->getChain(), IntID, PassThru, - Load->getBasePtr(), Mask, VL}; + SDValue Ops[] = {Chain, IntID, PassThru, BasePtr, Mask, VL}; SDValue Result = - DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, - Load->getMemoryVT(), Load->getMemOperand()); - SDValue Chain = Result.getValue(1); + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); + Chain = Result.getValue(1); if (VT.isFixedLengthVector()) Result = convertFromScalableVector(VT, Result, DAG, Subtarget); @@ -4485,15 +4507,29 @@ SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues({Result, Chain}, DL); } -SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const { - auto *Store = cast(Op); - +SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, + SelectionDAG &DAG) const { SDLoc DL(Op); - SDValue Val = Store->getValue(); - SDValue Mask = Store->getMask(); + + const auto *MemSD = cast(Op); + EVT MemVT = MemSD->getMemoryVT(); + MachineMemOperand *MMO = MemSD->getMemOperand(); + SDValue Chain = MemSD->getChain(); + SDValue BasePtr = MemSD->getBasePtr(); + SDValue Val, Mask, VL; + + if (const auto *VPStore = dyn_cast(Op)) { + Val = VPStore->getValue(); + Mask = VPStore->getMask(); + VL = VPStore->getVectorLength(); + } else { + const auto *MStore = cast(Op); + Val = MStore->getValue(); + Mask = MStore->getMask(); + } + MVT VT = Val.getSimpleValueType(); MVT XLenVT = Subtarget.getXLenVT(); - SDValue VL; MVT ContainerVT = VT; if (VT.isFixedLengthVector()) { @@ -4502,15 +4538,15 @@ SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const { Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); - VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT); - } else - VL = DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); + } + + if (!VL) + VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT); return DAG.getMemIntrinsicNode( ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), - {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL}, - Store->getMemoryVT(), Store->getMemOperand()); + {Chain, IntID, Val, BasePtr, Mask, VL}, MemVT, MMO); } SDValue diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index e3b616b2335d..5c0b0c360f3f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -564,8 +564,8 @@ private: SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerMLOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerMSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll new file mode 100644 index 000000000000..562d75e52677 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -0,0 +1,257 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare <2 x i8> @llvm.vp.load.v2i8(<2 x i8>*, <2 x i1>, i32) + +define <2 x i8> @vpload_v2i8(<2 x i8>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <2 x i8> @llvm.vp.load.v2i8(<2 x i8>* %ptr, <2 x i1> %m, i32 %evl) + ret <2 x i8> %load +} + +declare <4 x i8> @llvm.vp.load.v4i8(<4 x i8>*, <4 x i1>, i32) + +define <4 x i8> @vpload_v4i8(<4 x i8>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <4 x i8> @llvm.vp.load.v4i8(<4 x i8>* %ptr, <4 x i1> %m, i32 %evl) + ret <4 x i8> %load +} + +declare <8 x i8> @llvm.vp.load.v8i8(<8 x i8>*, <8 x i1>, i32) + +define <8 x i8> @vpload_v8i8(<8 x i8>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <8 x i8> @llvm.vp.load.v8i8(<8 x i8>* %ptr, <8 x i1> %m, i32 %evl) + ret <8 x i8> %load +} + +declare <2 x i16> @llvm.vp.load.v2i16(<2 x i16>*, <2 x i1>, i32) + +define <2 x i16> @vpload_v2i16(<2 x i16>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <2 x i16> @llvm.vp.load.v2i16(<2 x i16>* %ptr, <2 x i1> %m, i32 %evl) + ret <2 x i16> %load +} + +declare <4 x i16> @llvm.vp.load.v4i16(<4 x i16>*, <4 x i1>, i32) + +define <4 x i16> @vpload_v4i16(<4 x i16>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <4 x i16> @llvm.vp.load.v4i16(<4 x i16>* %ptr, <4 x i1> %m, i32 %evl) + ret <4 x i16> %load +} + +declare <8 x i16> @llvm.vp.load.v8i16(<8 x i16>*, <8 x i1>, i32) + +define <8 x i16> @vpload_v8i16(<8 x i16>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <8 x i16> @llvm.vp.load.v8i16(<8 x i16>* %ptr, <8 x i1> %m, i32 %evl) + ret <8 x i16> %load +} + +declare <2 x i32> @llvm.vp.load.v2i32(<2 x i32>*, <2 x i1>, i32) + +define <2 x i32> @vpload_v2i32(<2 x i32>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <2 x i32> @llvm.vp.load.v2i32(<2 x i32>* %ptr, <2 x i1> %m, i32 %evl) + ret <2 x i32> %load +} + +declare <4 x i32> @llvm.vp.load.v4i32(<4 x i32>*, <4 x i1>, i32) + +define <4 x i32> @vpload_v4i32(<4 x i32>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <4 x i32> @llvm.vp.load.v4i32(<4 x i32>* %ptr, <4 x i1> %m, i32 %evl) + ret <4 x i32> %load +} + +declare <8 x i32> @llvm.vp.load.v8i32(<8 x i32>*, <8 x i1>, i32) + +define <8 x i32> @vpload_v8i32(<8 x i32>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <8 x i32> @llvm.vp.load.v8i32(<8 x i32>* %ptr, <8 x i1> %m, i32 %evl) + ret <8 x i32> %load +} + +declare <2 x i64> @llvm.vp.load.v2i64(<2 x i64>*, <2 x i1>, i32) + +define <2 x i64> @vpload_v2i64(<2 x i64>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <2 x i64> @llvm.vp.load.v2i64(<2 x i64>* %ptr, <2 x i1> %m, i32 %evl) + ret <2 x i64> %load +} + +declare <4 x i64> @llvm.vp.load.v4i64(<4 x i64>*, <4 x i1>, i32) + +define <4 x i64> @vpload_v4i64(<4 x i64>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <4 x i64> @llvm.vp.load.v4i64(<4 x i64>* %ptr, <4 x i1> %m, i32 %evl) + ret <4 x i64> %load +} + +declare <8 x i64> @llvm.vp.load.v8i64(<8 x i64>*, <8 x i1>, i32) + +define <8 x i64> @vpload_v8i64(<8 x i64>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <8 x i64> @llvm.vp.load.v8i64(<8 x i64>* %ptr, <8 x i1> %m, i32 %evl) + ret <8 x i64> %load +} + +declare <2 x half> @llvm.vp.load.v2f16(<2 x half>*, <2 x i1>, i32) + +define <2 x half> @vpload_v2f16(<2 x half>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <2 x half> @llvm.vp.load.v2f16(<2 x half>* %ptr, <2 x i1> %m, i32 %evl) + ret <2 x half> %load +} + +declare <4 x half> @llvm.vp.load.v4f16(<4 x half>*, <4 x i1>, i32) + +define <4 x half> @vpload_v4f16(<4 x half>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <4 x half> @llvm.vp.load.v4f16(<4 x half>* %ptr, <4 x i1> %m, i32 %evl) + ret <4 x half> %load +} + +declare <8 x half> @llvm.vp.load.v8f16(<8 x half>*, <8 x i1>, i32) + +define <8 x half> @vpload_v8f16(<8 x half>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <8 x half> @llvm.vp.load.v8f16(<8 x half>* %ptr, <8 x i1> %m, i32 %evl) + ret <8 x half> %load +} + +declare <2 x float> @llvm.vp.load.v2f32(<2 x float>*, <2 x i1>, i32) + +define <2 x float> @vpload_v2f32(<2 x float>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <2 x float> @llvm.vp.load.v2f32(<2 x float>* %ptr, <2 x i1> %m, i32 %evl) + ret <2 x float> %load +} + +declare <4 x float> @llvm.vp.load.v4f32(<4 x float>*, <4 x i1>, i32) + +define <4 x float> @vpload_v4f32(<4 x float>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <4 x float> @llvm.vp.load.v4f32(<4 x float>* %ptr, <4 x i1> %m, i32 %evl) + ret <4 x float> %load +} + +declare <8 x float> @llvm.vp.load.v8f32(<8 x float>*, <8 x i1>, i32) + +define <8 x float> @vpload_v8f32(<8 x float>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <8 x float> @llvm.vp.load.v8f32(<8 x float>* %ptr, <8 x i1> %m, i32 %evl) + ret <8 x float> %load +} + +declare <2 x double> @llvm.vp.load.v2f64(<2 x double>*, <2 x i1>, i32) + +define <2 x double> @vpload_v2f64(<2 x double>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <2 x double> @llvm.vp.load.v2f64(<2 x double>* %ptr, <2 x i1> %m, i32 %evl) + ret <2 x double> %load +} + +declare <4 x double> @llvm.vp.load.v4f64(<4 x double>*, <4 x i1>, i32) + +define <4 x double> @vpload_v4f64(<4 x double>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <4 x double> @llvm.vp.load.v4f64(<4 x double>* %ptr, <4 x i1> %m, i32 %evl) + ret <4 x double> %load +} + +declare <8 x double> @llvm.vp.load.v8f64(<8 x double>*, <8 x i1>, i32) + +define <8 x double> @vpload_v8f64(<8 x double>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call <8 x double> @llvm.vp.load.v8f64(<8 x double>* %ptr, <8 x i1> %m, i32 %evl) + ret <8 x double> %load +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll new file mode 100644 index 000000000000..2a9889c0bbe1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -0,0 +1,257 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare void @llvm.vp.store.v2i8(<2 x i8>, <2 x i8>*, <2 x i1>, i32) + +define void @vpstore_v2i8(<2 x i8> %val, <2 x i8>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vse8.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v2i8(<2 x i8> %val, <2 x i8>* %ptr, <2 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v4i8(<4 x i8>, <4 x i8>*, <4 x i1>, i32) + +define void @vpstore_v4i8(<4 x i8> %val, <4 x i8>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vse8.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v4i8(<4 x i8> %val, <4 x i8>* %ptr, <4 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v8i8(<8 x i8>, <8 x i8>*, <8 x i1>, i32) + +define void @vpstore_v8i8(<8 x i8> %val, <8 x i8>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vse8.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v8i8(<8 x i8> %val, <8 x i8>* %ptr, <8 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v2i16(<2 x i16>, <2 x i16>*, <2 x i1>, i32) + +define void @vpstore_v2i16(<2 x i16> %val, <2 x i16>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v2i16(<2 x i16> %val, <2 x i16>* %ptr, <2 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v4i16(<4 x i16>, <4 x i16>*, <4 x i1>, i32) + +define void @vpstore_v4i16(<4 x i16> %val, <4 x i16>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v4i16(<4 x i16> %val, <4 x i16>* %ptr, <4 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v8i16(<8 x i16>, <8 x i16>*, <8 x i1>, i32) + +define void @vpstore_v8i16(<8 x i16> %val, <8 x i16>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v8i16(<8 x i16> %val, <8 x i16>* %ptr, <8 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v2i32(<2 x i32>, <2 x i32>*, <2 x i1>, i32) + +define void @vpstore_v2i32(<2 x i32> %val, <2 x i32>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v2i32(<2 x i32> %val, <2 x i32>* %ptr, <2 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v4i32(<4 x i32>, <4 x i32>*, <4 x i1>, i32) + +define void @vpstore_v4i32(<4 x i32> %val, <4 x i32>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v4i32(<4 x i32> %val, <4 x i32>* %ptr, <4 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v8i32(<8 x i32>, <8 x i32>*, <8 x i1>, i32) + +define void @vpstore_v8i32(<8 x i32> %val, <8 x i32>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v8i32(<8 x i32> %val, <8 x i32>* %ptr, <8 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v2i64(<2 x i64>, <2 x i64>*, <2 x i1>, i32) + +define void @vpstore_v2i64(<2 x i64> %val, <2 x i64>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v2i64(<2 x i64> %val, <2 x i64>* %ptr, <2 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v4i64(<4 x i64>, <4 x i64>*, <4 x i1>, i32) + +define void @vpstore_v4i64(<4 x i64> %val, <4 x i64>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v4i64(<4 x i64> %val, <4 x i64>* %ptr, <4 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v8i64(<8 x i64>, <8 x i64>*, <8 x i1>, i32) + +define void @vpstore_v8i64(<8 x i64> %val, <8 x i64>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v8i64(<8 x i64> %val, <8 x i64>* %ptr, <8 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v2f16(<2 x half>, <2 x half>*, <2 x i1>, i32) + +define void @vpstore_v2f16(<2 x half> %val, <2 x half>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v2f16(<2 x half> %val, <2 x half>* %ptr, <2 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v4f16(<4 x half>, <4 x half>*, <4 x i1>, i32) + +define void @vpstore_v4f16(<4 x half> %val, <4 x half>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v4f16(<4 x half> %val, <4 x half>* %ptr, <4 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v8f16(<8 x half>, <8 x half>*, <8 x i1>, i32) + +define void @vpstore_v8f16(<8 x half> %val, <8 x half>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v8f16(<8 x half> %val, <8 x half>* %ptr, <8 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v2f32(<2 x float>, <2 x float>*, <2 x i1>, i32) + +define void @vpstore_v2f32(<2 x float> %val, <2 x float>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v2f32(<2 x float> %val, <2 x float>* %ptr, <2 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v4f32(<4 x float>, <4 x float>*, <4 x i1>, i32) + +define void @vpstore_v4f32(<4 x float> %val, <4 x float>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v4f32(<4 x float> %val, <4 x float>* %ptr, <4 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v8f32(<8 x float>, <8 x float>*, <8 x i1>, i32) + +define void @vpstore_v8f32(<8 x float> %val, <8 x float>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v8f32(<8 x float> %val, <8 x float>* %ptr, <8 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v2f64(<2 x double>, <2 x double>*, <2 x i1>, i32) + +define void @vpstore_v2f64(<2 x double> %val, <2 x double>* %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v2f64(<2 x double> %val, <2 x double>* %ptr, <2 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v4f64(<4 x double>, <4 x double>*, <4 x i1>, i32) + +define void @vpstore_v4f64(<4 x double> %val, <4 x double>* %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v4f64(<4 x double> %val, <4 x double>* %ptr, <4 x i1> %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.v8f64(<8 x double>, <8 x double>*, <8 x i1>, i32) + +define void @vpstore_v8f64(<8 x double> %val, <8 x double>* %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.v8f64(<8 x double> %val, <8 x double>* %ptr, <8 x i1> %m, i32 %evl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll new file mode 100644 index 000000000000..24c31ccae122 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -0,0 +1,341 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.vp.load.nxv1i8(*, , i32) + +define @vpload_nxv1i8(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv1i8(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2i8(*, , i32) + +define @vpload_nxv2i8(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2i8(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4i8(*, , i32) + +define @vpload_nxv4i8(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv4i8(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv8i8(*, , i32) + +define @vpload_nxv8i8(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv8i8(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv1i16(*, , i32) + +define @vpload_nxv1i16(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv1i16(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2i16(*, , i32) + +define @vpload_nxv2i16(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2i16(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4i16(*, , i32) + +define @vpload_nxv4i16(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv4i16(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv8i16(*, , i32) + +define @vpload_nxv8i16(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv8i16(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv1i32(*, , i32) + +define @vpload_nxv1i32(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv1i32(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2i32(*, , i32) + +define @vpload_nxv2i32(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2i32(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4i32(*, , i32) + +define @vpload_nxv4i32(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv4i32(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv8i32(*, , i32) + +define @vpload_nxv8i32(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv8i32(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv1i64(*, , i32) + +define @vpload_nxv1i64(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv1i64(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2i64(*, , i32) + +define @vpload_nxv2i64(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2i64(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4i64(*, , i32) + +define @vpload_nxv4i64(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv4i64(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv8i64(*, , i32) + +define @vpload_nxv8i64(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv8i64(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv1f16(*, , i32) + +define @vpload_nxv1f16(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv1f16(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2f16(*, , i32) + +define @vpload_nxv2f16(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2f16(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4f16(*, , i32) + +define @vpload_nxv4f16(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv4f16(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv8f16(*, , i32) + +define @vpload_nxv8f16(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv8f16(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv1f32(*, , i32) + +define @vpload_nxv1f32(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv1f32(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2f32(*, , i32) + +define @vpload_nxv2f32(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2f32(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4f32(*, , i32) + +define @vpload_nxv4f32(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv4f32(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv8f32(*, , i32) + +define @vpload_nxv8f32(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv8f32(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv1f64(*, , i32) + +define @vpload_nxv1f64(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv1f64(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2f64(*, , i32) + +define @vpload_nxv2f64(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv2f64(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4f64(*, , i32) + +define @vpload_nxv4f64(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv4f64(* %ptr, %m, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv8f64(*, , i32) + +define @vpload_nxv8f64(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: ret + %load = call @llvm.vp.load.nxv8f64(* %ptr, %m, i32 %evl) + ret %load +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll new file mode 100644 index 000000000000..de402428f31c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -0,0 +1,341 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare void @llvm.vp.store.nxv1i8(, *, , i32) + +define void @vpstore_nxv1i8( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vse8.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv1i8( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv2i8(, *, , i32) + +define void @vpstore_nxv2i8( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vse8.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv2i8( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv4i8(, *, , i32) + +define void @vpstore_nxv4i8( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vse8.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv4i8( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv8i8(, *, , i32) + +define void @vpstore_nxv8i8( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vse8.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv8i8( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv1i16(, *, , i32) + +define void @vpstore_nxv1i16( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv1i16( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv2i16(, *, , i32) + +define void @vpstore_nxv2i16( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv2i16( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv4i16(, *, , i32) + +define void @vpstore_nxv4i16( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv4i16( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv8i16(, *, , i32) + +define void @vpstore_nxv8i16( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv8i16( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv1i32(, *, , i32) + +define void @vpstore_nxv1i32( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv1i32( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv2i32(, *, , i32) + +define void @vpstore_nxv2i32( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv2i32( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv4i32(, *, , i32) + +define void @vpstore_nxv4i32( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv4i32( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv8i32(, *, , i32) + +define void @vpstore_nxv8i32( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv8i32( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv1i64(, *, , i32) + +define void @vpstore_nxv1i64( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv1i64( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv2i64(, *, , i32) + +define void @vpstore_nxv2i64( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv2i64( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv4i64(, *, , i32) + +define void @vpstore_nxv4i64( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv4i64( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv8i64(, *, , i32) + +define void @vpstore_nxv8i64( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv8i64( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv1f16(, *, , i32) + +define void @vpstore_nxv1f16( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv1f16( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv2f16(, *, , i32) + +define void @vpstore_nxv2f16( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv2f16( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv4f16(, *, , i32) + +define void @vpstore_nxv4f16( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv4f16( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv8f16(, *, , i32) + +define void @vpstore_nxv8f16( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv8f16( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv1f32(, *, , i32) + +define void @vpstore_nxv1f32( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv1f32( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv2f32(, *, , i32) + +define void @vpstore_nxv2f32( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv2f32( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv4f32(, *, , i32) + +define void @vpstore_nxv4f32( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv4f32( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv8f32(, *, , i32) + +define void @vpstore_nxv8f32( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv8f32( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv1f64(, *, , i32) + +define void @vpstore_nxv1f64( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv1f64( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv2f64(, *, , i32) + +define void @vpstore_nxv2f64( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv2f64( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv4f64(, *, , i32) + +define void @vpstore_nxv4f64( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv4f64( %val, * %ptr, %m, i32 %evl) + ret void +} + +declare void @llvm.vp.store.nxv8f64(, *, , i32) + +define void @vpstore_nxv8f64( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpstore_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: ret + call void @llvm.vp.store.nxv8f64( %val, * %ptr, %m, i32 %evl) + ret void +}