[RISCV] Add side-effect-free vsetvli intrinsics

This patch introduces new intrinsics that enable the use of vsetvli in
contexts where only the returned vector length is of interest. The
pre-existing intrinsics are marked with side-effects, which prevents
even trivial optimizations on/across them.

These intrinsics are intended to be used in situations where the vector
length is fed in turn to RVV intrinsics or to vector-predication
intrinsics during loop vectorization, for example. Those codegen paths
ensure that instructions are generated with their own implicit vsetvli,
so the vector length and vtype can be relied upon to be correct.

No corresponding C builtins are planned at this stage, though that is a
possibility for the future if the need arises.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117910
This commit is contained in:
Fraser Cormack 2022-01-21 17:58:39 +00:00
parent ada6d78a78
commit d42678b453
6 changed files with 226 additions and 49 deletions

View File

@ -159,6 +159,22 @@ let TargetPrefix = "riscv" in {
ImmArg<ArgIndex<0>>,
ImmArg<ArgIndex<1>>]>;
// Versions without side effects: better optimizable and usable if only the
// returned vector length is important.
def int_riscv_vsetvli_opt : Intrinsic<[llvm_anyint_ty],
/* AVL */ [LLVMMatchType<0>,
/* VSEW */ LLVMMatchType<0>,
/* VLMUL */ LLVMMatchType<0>],
[IntrNoMem,
ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
def int_riscv_vsetvlimax_opt : Intrinsic<[llvm_anyint_ty],
/* VSEW */ [LLVMMatchType<0>,
/* VLMUL */ LLVMMatchType<0>],
[IntrNoMem,
ImmArg<ArgIndex<0>>,
ImmArg<ArgIndex<1>>]>;
// For unit stride load
// Input: (pointer, vl)
class RISCVUSLoad

View File

@ -495,6 +495,75 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
ReplaceNode(Node, Store);
}
void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
if (!Subtarget->hasVInstructions())
return;
assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) &&
"Unexpected opcode");
SDLoc DL(Node);
MVT XLenVT = Subtarget->getXLenVT();
bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN;
unsigned IntNoOffset = HasChain ? 1 : 0;
unsigned IntNo = Node->getConstantOperandVal(IntNoOffset);
assert((IntNo == Intrinsic::riscv_vsetvli ||
IntNo == Intrinsic::riscv_vsetvlimax ||
IntNo == Intrinsic::riscv_vsetvli_opt ||
IntNo == Intrinsic::riscv_vsetvlimax_opt) &&
"Unexpected vsetvli intrinsic");
bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax ||
IntNo == Intrinsic::riscv_vsetvlimax_opt;
unsigned Offset = IntNoOffset + (VLMax ? 1 : 2);
assert(Node->getNumOperands() == Offset + 2 &&
"Unexpected number of operands");
unsigned SEW =
RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
Node->getConstantOperandVal(Offset + 1) & 0x7);
unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
/*MaskAgnostic*/ false);
SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
SmallVector<EVT, 2> VTs = {XLenVT};
if (HasChain)
VTs.push_back(MVT::Other);
SDValue VLOperand;
unsigned Opcode = RISCV::PseudoVSETVLI;
if (VLMax) {
VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
Opcode = RISCV::PseudoVSETVLIX0;
} else {
VLOperand = Node->getOperand(IntNoOffset + 1);
if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
uint64_t AVL = C->getZExtValue();
if (isUInt<5>(AVL)) {
SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp};
if (HasChain)
Ops.push_back(Node->getOperand(0));
ReplaceNode(
Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
return;
}
}
}
SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp};
if (HasChain)
Ops.push_back(Node->getOperand(0));
ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
}
void RISCVDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we have already selected.
@ -1017,6 +1086,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
{Cmp, Mask, VL, MaskSEW}));
return;
}
case Intrinsic::riscv_vsetvli_opt:
case Intrinsic::riscv_vsetvlimax_opt:
return selectVSETVLI(Node);
}
break;
}
@ -1026,54 +1098,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// By default we do not custom select any intrinsic.
default:
break;
case Intrinsic::riscv_vsetvli:
case Intrinsic::riscv_vsetvlimax: {
if (!Subtarget->hasVInstructions())
break;
bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
unsigned Offset = VLMax ? 2 : 3;
assert(Node->getNumOperands() == Offset + 2 &&
"Unexpected number of operands");
unsigned SEW =
RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
Node->getConstantOperandVal(Offset + 1) & 0x7);
unsigned VTypeI = RISCVVType::encodeVTYPE(
VLMul, SEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false);
SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
SDValue VLOperand;
unsigned Opcode = RISCV::PseudoVSETVLI;
if (VLMax) {
VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
Opcode = RISCV::PseudoVSETVLIX0;
} else {
VLOperand = Node->getOperand(2);
if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
uint64_t AVL = C->getZExtValue();
if (isUInt<5>(AVL)) {
SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
ReplaceNode(
Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT,
MVT::Other, VLImm, VTypeIOp,
/* Chain */ Node->getOperand(0)));
return;
}
}
}
ReplaceNode(Node,
CurDAG->getMachineNode(Opcode, DL, XLenVT,
MVT::Other, VLOperand, VTypeIOp,
/* Chain */ Node->getOperand(0)));
return;
}
case Intrinsic::riscv_vsetvlimax:
return selectVSETVLI(Node);
case Intrinsic::riscv_vlseg2:
case Intrinsic::riscv_vlseg3:
case Intrinsic::riscv_vlseg4:

View File

@ -87,6 +87,8 @@ public:
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided);
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered);
void selectVSETVLI(SDNode *Node);
// Return the RISC-V condition code that matches the given DAG integer
// condition code. The CondCode must be one of those supported by the RISC-V
// ISA (see translateSetCCForBranch).

View File

@ -8162,14 +8162,18 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// We assume VLENB is no more than 65536 / 8 bytes.
Known.Zero.setBitsFrom(14);
break;
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = Op.getConstantOperandVal(1);
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo =
Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
switch (IntNo) {
default:
// We can't do anything for most intrinsics.
break;
case Intrinsic::riscv_vsetvli:
case Intrinsic::riscv_vsetvlimax:
case Intrinsic::riscv_vsetvli_opt:
case Intrinsic::riscv_vsetvlimax_opt:
// Assume that VL output is positive and would fit in an int32_t.
// TODO: VLEN might be capped at 16 bits in a future V spec update.
if (BitWidth >= 32)

View File

@ -3,6 +3,8 @@
declare i32 @llvm.riscv.vsetvli.i32(i32, i32, i32)
declare i32 @llvm.riscv.vsetvlimax.i32(i32, i32)
declare i32 @llvm.riscv.vsetvli.opt.i32(i32, i32, i32)
declare i32 @llvm.riscv.vsetvlimax.opt.i32(i32, i32)
define void @test_vsetvli_e64mf8(i32 %avl) nounwind {
; CHECK-LABEL: test_vsetvli_e64mf8:
@ -31,6 +33,68 @@ define void @test_vsetvlimax_e64m8() nounwind {
ret void
}
define i32 @test_vsetvli_opt_e8m1(i32 %avl) nounwind {
; CHECK-LABEL: test_vsetvli_opt_e8m1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, a0, e8, m1, ta, mu
; CHECK-NEXT: ret
%vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 0, i32 0)
ret i32 %vl
}
; Check that we remove the intrinsic if it's unused.
define void @test_vsetvli_opt_e8m1_nouse(i32 %avl) nounwind {
; CHECK-LABEL: test_vsetvli_opt_e8m1_nouse:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 0, i32 0)
ret void
}
define i32 @test_vsetvli_opt_e16mf4(i32 %avl) nounwind {
; CHECK-LABEL: test_vsetvli_opt_e16mf4:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, a0, e16, mf4, ta, mu
; CHECK-NEXT: ret
%vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 1, i32 6)
ret i32 %vl
}
define i32 @test_vsetvli_opt_e32mf8_zero_avl() nounwind {
; CHECK-LABEL: test_vsetvli_opt_e32mf8_zero_avl:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a0, 0, e16, mf4, ta, mu
; CHECK-NEXT: ret
%vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 0, i32 1, i32 6)
ret i32 %vl
}
define i32 @test_vsetvlimax_opt_e32m2() nounwind {
; CHECK-LABEL: test_vsetvlimax_opt_e32m2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
; CHECK-NEXT: ret
%vl = call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 2, i32 1)
ret i32 %vl
}
define void @test_vsetvlimax_opt_e32m2_nouse() nounwind {
; CHECK-LABEL: test_vsetvlimax_opt_e32m2_nouse:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 2, i32 1)
ret void
}
define i32 @test_vsetvlimax_opt_e64m4() nounwind {
; CHECK-LABEL: test_vsetvlimax_opt_e64m4:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu
; CHECK-NEXT: ret
%vl = call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 3, i32 2)
ret i32 %vl
}
declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>*, i32)
; Check that we remove the redundant vsetvli when followed by another operation

View File

@ -3,6 +3,8 @@
declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64)
declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64)
declare i64 @llvm.riscv.vsetvli.opt.i64(i64, i64, i64)
declare i64 @llvm.riscv.vsetvlimax.opt.i64(i64, i64)
define void @test_vsetvli_e8m1(i64 %avl) nounwind {
; CHECK-LABEL: test_vsetvli_e8m1:
@ -49,6 +51,68 @@ define void @test_vsetvlimax_e64m4() nounwind {
ret void
}
define i64 @test_vsetvli_opt_e8m1(i64 %avl) nounwind {
; CHECK-LABEL: test_vsetvli_opt_e8m1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, a0, e8, m1, ta, mu
; CHECK-NEXT: ret
%vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 0, i64 0)
ret i64 %vl
}
; Check that we remove the intrinsic if it's unused.
define void @test_vsetvli_opt_e8m1_nouse(i64 %avl) nounwind {
; CHECK-LABEL: test_vsetvli_opt_e8m1_nouse:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 0, i64 0)
ret void
}
define i64 @test_vsetvli_opt_e16mf4(i64 %avl) nounwind {
; CHECK-LABEL: test_vsetvli_opt_e16mf4:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, a0, e16, mf4, ta, mu
; CHECK-NEXT: ret
%vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 1, i64 6)
ret i64 %vl
}
define i64 @test_vsetvli_opt_e32mf8_zero_avl() nounwind {
; CHECK-LABEL: test_vsetvli_opt_e32mf8_zero_avl:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a0, 0, e16, mf4, ta, mu
; CHECK-NEXT: ret
%vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 0, i64 1, i64 6)
ret i64 %vl
}
define i64 @test_vsetvlimax_opt_e32m2() nounwind {
; CHECK-LABEL: test_vsetvlimax_opt_e32m2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
; CHECK-NEXT: ret
%vl = call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 2, i64 1)
ret i64 %vl
}
define void @test_vsetvlimax_opt_e32m2_nouse() nounwind {
; CHECK-LABEL: test_vsetvlimax_opt_e32m2_nouse:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 2, i64 1)
ret void
}
define i64 @test_vsetvlimax_opt_e64m4() nounwind {
; CHECK-LABEL: test_vsetvlimax_opt_e64m4:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu
; CHECK-NEXT: ret
%vl = call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 3, i64 2)
ret i64 %vl
}
declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>*, i64)
; Check that we remove the redundant vsetvli when followed by another operation