forked from OSchip/llvm-project
[RISCV] Begin to support more subvector inserts/extracts
This patch adds support for INSERT_SUBVECTOR and EXTRACT_SUBVECTOR (nominally where both operands are scalable vector types) where the vector, subvector, and index align sufficiently to allow decomposition to subregister manipulation: * For extracts, the extracted subvector must correctly align with the lower elements of a vector register. * For inserts, the inserted subvector must be at least one full vector register, and correctly align as above. This approach should work for fixed-length vector insertion/extraction too, but that will come later. Reviewed By: craig.topper, khchen, arcbbb Differential Revision: https://reviews.llvm.org/D96873
This commit is contained in:
parent
0176fecfbc
commit
d876214990
|
@ -382,6 +382,48 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned IntNo,
|
|||
ReplaceNode(Node, Store);
|
||||
}
|
||||
|
||||
static unsigned getRegClassIDForVecVT(MVT VT) {
|
||||
if (VT.getVectorElementType() == MVT::i1)
|
||||
return RISCV::VRRegClassID;
|
||||
return getRegClassIDForLMUL(getLMUL(VT));
|
||||
}
|
||||
|
||||
// Attempt to decompose a subvector insert/extract between VecVT and
|
||||
// SubVecVT via subregister indices. Returns the subregister index that
|
||||
// can perform the subvector insert/extract with the given element index, as
|
||||
// well as the index corresponding to any leftover subvectors that must be
|
||||
// further inserted/extracted within the register class for SubVecVT.
|
||||
static std::pair<unsigned, unsigned>
|
||||
decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT,
|
||||
unsigned InsertExtractIdx,
|
||||
const RISCVRegisterInfo *TRI) {
|
||||
static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
|
||||
RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
|
||||
RISCV::VRM2RegClassID > RISCV::VRRegClassID),
|
||||
"Register classes not ordered");
|
||||
unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
|
||||
unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
|
||||
// Try to compose a subregister index that takes us from the incoming
|
||||
// LMUL>1 register class down to the outgoing one. At each step we half
|
||||
// the LMUL:
|
||||
// nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
|
||||
// Note that this is not guaranteed to find a subregister index, such as
|
||||
// when we are extracting from one VR type to another.
|
||||
unsigned SubRegIdx = RISCV::NoSubRegister;
|
||||
for (const unsigned RCID :
|
||||
{RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
|
||||
if (VecRegClassID > RCID && SubRegClassID <= RCID) {
|
||||
VecVT = VecVT.getHalfNumVectorElementsVT();
|
||||
bool IsHi =
|
||||
InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
|
||||
SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
|
||||
getSubregIndexByMVT(VecVT, IsHi));
|
||||
if (IsHi)
|
||||
InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
|
||||
}
|
||||
return {SubRegIdx, InsertExtractIdx};
|
||||
}
|
||||
|
||||
void RISCVDAGToDAGISel::Select(SDNode *Node) {
|
||||
// If we have a custom node, we have already selected.
|
||||
if (Node->isMachineOpcode()) {
|
||||
|
@ -704,56 +746,127 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
|
|||
break;
|
||||
}
|
||||
case ISD::INSERT_SUBVECTOR: {
|
||||
// Bail when not a "cast" like insert_subvector.
|
||||
if (Node->getConstantOperandVal(2) != 0)
|
||||
break;
|
||||
if (!Node->getOperand(0).isUndef())
|
||||
break;
|
||||
SDValue V = Node->getOperand(0);
|
||||
SDValue SubV = Node->getOperand(1);
|
||||
SDLoc DL(SubV);
|
||||
auto Idx = Node->getConstantOperandVal(2);
|
||||
MVT SubVecVT = Node->getOperand(1).getSimpleValueType();
|
||||
|
||||
// Bail when normal isel should do the job.
|
||||
MVT InVT = Node->getOperand(1).getSimpleValueType();
|
||||
if (VT.isFixedLengthVector() || InVT.isScalableVector())
|
||||
break;
|
||||
// TODO: This method of selecting INSERT_SUBVECTOR should work
|
||||
// with any type of insertion (fixed <-> scalable) but we don't yet
|
||||
// correctly identify the canonical register class for fixed-length types.
|
||||
// For now, keep the two paths separate.
|
||||
if (VT.isScalableVector() && SubVecVT.isScalableVector()) {
|
||||
bool IsFullVecReg = false;
|
||||
switch (getLMUL(SubVecVT)) {
|
||||
default:
|
||||
break;
|
||||
case RISCVVLMUL::LMUL_1:
|
||||
case RISCVVLMUL::LMUL_2:
|
||||
case RISCVVLMUL::LMUL_4:
|
||||
case RISCVVLMUL::LMUL_8:
|
||||
IsFullVecReg = true;
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned RegClassID;
|
||||
if (VT.getVectorElementType() == MVT::i1)
|
||||
RegClassID = RISCV::VRRegClassID;
|
||||
else
|
||||
RegClassID = getRegClassIDForLMUL(getLMUL(VT));
|
||||
// If the subvector doesn't occupy a full vector register then we can't
|
||||
// insert it purely using subregister manipulation. We must not clobber
|
||||
// the untouched elements (say, in the upper half of the VR register).
|
||||
if (!IsFullVecReg)
|
||||
break;
|
||||
|
||||
SDValue V = Node->getOperand(1);
|
||||
SDLoc DL(V);
|
||||
SDValue RC =
|
||||
CurDAG->getTargetConstant(RegClassID, DL, Subtarget->getXLenVT());
|
||||
SDNode *NewNode =
|
||||
CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
|
||||
ReplaceNode(Node, NewNode);
|
||||
return;
|
||||
const auto *TRI = Subtarget->getRegisterInfo();
|
||||
unsigned SubRegIdx;
|
||||
std::tie(SubRegIdx, Idx) =
|
||||
decomposeSubvectorInsertExtractToSubRegs(VT, SubVecVT, Idx, TRI);
|
||||
|
||||
// If the Idx hasn't been completely eliminated then this is a subvector
|
||||
// extract which doesn't naturally align to a vector register. These must
|
||||
// be handled using instructions to manipulate the vector registers.
|
||||
if (Idx != 0)
|
||||
break;
|
||||
|
||||
SDNode *NewNode = CurDAG->getMachineNode(
|
||||
TargetOpcode::INSERT_SUBREG, DL, VT, V, SubV,
|
||||
CurDAG->getTargetConstant(SubRegIdx, DL, Subtarget->getXLenVT()));
|
||||
return ReplaceNode(Node, NewNode);
|
||||
}
|
||||
|
||||
if (VT.isScalableVector() && SubVecVT.isFixedLengthVector()) {
|
||||
// Bail when not a "cast" like insert_subvector.
|
||||
if (Idx != 0)
|
||||
break;
|
||||
if (!Node->getOperand(0).isUndef())
|
||||
break;
|
||||
|
||||
unsigned RegClassID = getRegClassIDForVecVT(VT);
|
||||
|
||||
SDValue RC =
|
||||
CurDAG->getTargetConstant(RegClassID, DL, Subtarget->getXLenVT());
|
||||
SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
|
||||
DL, VT, SubV, RC);
|
||||
ReplaceNode(Node, NewNode);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::EXTRACT_SUBVECTOR: {
|
||||
// Bail when not a "cast" like extract_subvector.
|
||||
if (Node->getConstantOperandVal(1) != 0)
|
||||
break;
|
||||
|
||||
// Bail when normal isel can do the job.
|
||||
MVT InVT = Node->getOperand(0).getSimpleValueType();
|
||||
if (VT.isScalableVector() || InVT.isFixedLengthVector())
|
||||
break;
|
||||
|
||||
unsigned RegClassID;
|
||||
if (InVT.getVectorElementType() == MVT::i1)
|
||||
RegClassID = RISCV::VRRegClassID;
|
||||
else
|
||||
RegClassID = getRegClassIDForLMUL(getLMUL(InVT));
|
||||
|
||||
SDValue V = Node->getOperand(0);
|
||||
auto Idx = Node->getConstantOperandVal(1);
|
||||
MVT InVT = Node->getOperand(0).getSimpleValueType();
|
||||
SDLoc DL(V);
|
||||
SDValue RC =
|
||||
CurDAG->getTargetConstant(RegClassID, DL, Subtarget->getXLenVT());
|
||||
SDNode *NewNode =
|
||||
CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
|
||||
ReplaceNode(Node, NewNode);
|
||||
return;
|
||||
|
||||
// TODO: This method of selecting EXTRACT_SUBVECTOR should work
|
||||
// with any type of extraction (fixed <-> scalable) but we don't yet
|
||||
// correctly identify the canonical register class for fixed-length types.
|
||||
// For now, keep the two paths separate.
|
||||
if (VT.isScalableVector() && InVT.isScalableVector()) {
|
||||
const auto *TRI = Subtarget->getRegisterInfo();
|
||||
unsigned SubRegIdx;
|
||||
std::tie(SubRegIdx, Idx) =
|
||||
decomposeSubvectorInsertExtractToSubRegs(InVT, VT, Idx, TRI);
|
||||
|
||||
// If the Idx hasn't been completely eliminated then this is a subvector
|
||||
// extract which doesn't naturally align to a vector register. These must
|
||||
// be handled using instructions to manipulate the vector registers.
|
||||
if (Idx != 0)
|
||||
break;
|
||||
|
||||
// If we haven't set a SubRegIdx, then we must be going between LMUL<=1
|
||||
// types (VR -> VR). This can be done as a copy.
|
||||
if (SubRegIdx == RISCV::NoSubRegister) {
|
||||
unsigned RegClassID = getRegClassIDForVecVT(VT);
|
||||
unsigned InRegClassID = getRegClassIDForVecVT(InVT);
|
||||
assert(RegClassID == InRegClassID &&
|
||||
RegClassID == RISCV::VRRegClassID &&
|
||||
"Unexpected subvector extraction");
|
||||
SDValue RC =
|
||||
CurDAG->getTargetConstant(InRegClassID, DL, Subtarget->getXLenVT());
|
||||
SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
|
||||
DL, VT, V, RC);
|
||||
return ReplaceNode(Node, NewNode);
|
||||
}
|
||||
SDNode *NewNode = CurDAG->getMachineNode(
|
||||
TargetOpcode::EXTRACT_SUBREG, DL, VT, V,
|
||||
CurDAG->getTargetConstant(SubRegIdx, DL, Subtarget->getXLenVT()));
|
||||
return ReplaceNode(Node, NewNode);
|
||||
}
|
||||
|
||||
if (VT.isFixedLengthVector() && InVT.isScalableVector()) {
|
||||
// Bail when not a "cast" like extract_subvector.
|
||||
if (Idx != 0)
|
||||
break;
|
||||
|
||||
unsigned InRegClassID = getRegClassIDForVecVT(InVT);
|
||||
|
||||
SDValue RC =
|
||||
CurDAG->getTargetConstant(InRegClassID, DL, Subtarget->getXLenVT());
|
||||
SDNode *NewNode =
|
||||
CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
|
||||
ReplaceNode(Node, NewNode);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,226 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv8i32_nxv4i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m4
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
|
||||
ret <vscale x 4 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv8i32_nxv4i32_4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv2r.v v8, v10
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, i64 4)
|
||||
ret <vscale x 4 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv8i32_nxv2i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m4
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 0)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv8i32_nxv2i32_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 2)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv8i32_nxv2i32_4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v10
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 4)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv8i32_nxv2i32_6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v11
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 6)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 8 x i32> @extract_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv8i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $v8m4 killed $v8m4 killed $v8m8
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 8 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
|
||||
ret <vscale x 8 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 8 x i32> @extract_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv8i32_8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv4r.v v8, v12
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 8 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 8)
|
||||
ret <vscale x 8 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv4i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m8
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
|
||||
ret <vscale x 4 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv4i32_4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv2r.v v8, v10
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 4)
|
||||
ret <vscale x 4 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv4i32_8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv2r.v v8, v12
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 8)
|
||||
ret <vscale x 4 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @extract_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv4i32_12:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv2r.v v8, v14
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 12)
|
||||
ret <vscale x 4 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv2i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m8
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv2i32_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 2)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv2i32_4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v10
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 4)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv2i32_6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v11
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 6)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv2i32_8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v12
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 8)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv2i32_10:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v13
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 10)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv2i32_12:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v14
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 12)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @extract_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv2i32_14:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v15
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 14)
|
||||
ret <vscale x 2 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 1 x i32> @extract_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv1i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: # kill: def $v8 killed $v8 killed $v8m8
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
|
||||
ret <vscale x 1 x i32> %c
|
||||
}
|
||||
|
||||
; TODO: Extracts that don't align to a vector register are not yet supported.
|
||||
; In this case we want to extract the upper half of the lowest VR subregister
|
||||
; in the LMUL group.
|
||||
; define <vscale x 1 x i32> @extract_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec) {
|
||||
; %c = call <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, i64 1)
|
||||
; ret <vscale x 1 x i32> %c
|
||||
; }
|
||||
|
||||
define <vscale x 1 x i32> @extract_nxv16i32_nxv1i32_2(<vscale x 16 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv16i32_nxv1i32_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, i64 2)
|
||||
ret <vscale x 1 x i32> %c
|
||||
}
|
||||
|
||||
define <vscale x 1 x i32> @extract_nxv2i32_nxv1i32_0(<vscale x 2 x i32> %vec) {
|
||||
; CHECK-LABEL: extract_nxv2i32_nxv1i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ret
|
||||
%c = call <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv2i32(<vscale x 2 x i32> %vec, i64 0)
|
||||
ret <vscale x 1 x i32> %c
|
||||
}
|
||||
|
||||
declare <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv2i32(<vscale x 2 x i32> %vec, i64 %idx)
|
||||
|
||||
declare <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, i64 %idx)
|
||||
declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, i64 %idx)
|
||||
|
||||
declare <vscale x 1 x i32> @llvm.experimental.vector.extract.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
|
||||
declare <vscale x 2 x i32> @llvm.experimental.vector.extract.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
|
||||
declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
|
||||
declare <vscale x 8 x i32> @llvm.experimental.vector.extract.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, i64 %idx)
|
|
@ -0,0 +1,206 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv8i32_nxv4i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv2r.v v8, v12
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
|
||||
ret <vscale x 8 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv8i32_nxv4i32_4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv2r.v v10, v12
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
|
||||
ret <vscale x 8 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv8i32_nxv2i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v12
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
|
||||
ret <vscale x 8 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv8i32_nxv2i32_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v9, v12
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
|
||||
ret <vscale x 8 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv8i32_nxv2i32_4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v10, v12
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
|
||||
ret <vscale x 8 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv8i32_nxv2i32_6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v11, v12
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
|
||||
ret <vscale x 8 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv8i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv4r.v v8, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 0)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv8i32_8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv4r.v v12, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 8)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv4i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv2r.v v8, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv4i32_4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv2r.v v10, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv4i32_8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv2r.v v12, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 8)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv4i32_12:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv2r.v v14, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 12)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv2i32_0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v8, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv2i32_2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v9, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv2i32_4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v10, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv2i32_6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v11, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv2i32_8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v12, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 8)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv2i32_10:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v13, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 10)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv2i32_12:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v14, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 12)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
|
||||
; CHECK-LABEL: insert_nxv16i32_nxv2i32_14:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmv1r.v v15, v16
|
||||
; CHECK-NEXT: ret
|
||||
%v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 14)
|
||||
ret <vscale x 16 x i32> %v
|
||||
}
|
||||
|
||||
; TODO: Inserts that are less than LMUL=1 are not yet supported. In this case
|
||||
; we need mask out the unaffected elements (top half of the VR %subvec
|
||||
; register)
|
||||
;define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
|
||||
; %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 0)
|
||||
; ret <vscale x 16 x i32> %v
|
||||
;}
|
||||
|
||||
; TODO: Inserts that don't align to a vector register are not yet supported.
|
||||
; In this case we want to insert the subvector into the upper half of the
|
||||
; lowest VR subregister in the LMUL group.
|
||||
;define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
|
||||
; %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
|
||||
; ret <vscale x 16 x i32> %v
|
||||
;}
|
||||
|
||||
declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32>, <vscale x 2 x i32>, i64 %idx)
|
||||
declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32>, <vscale x 4 x i32>, i64 %idx)
|
||||
|
||||
declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32>, <vscale x 1 x i32>, i64 %idx)
|
||||
declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32>, <vscale x 2 x i32>, i64 %idx)
|
||||
declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64 %idx)
|
||||
declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32>, <vscale x 8 x i32>, i64 %idx)
|
Loading…
Reference in New Issue