[RISCV] Add a VL output to vleff intrinsics.

The fault-only-first-load instructions can reduce VL if an element
other than element 0 triggers a memory fault. This can be used to
vectorize loops with data dependent exit conditions like strcmp or
strlen.

This patch adds a VL output to these intrinsics so that the new
VL value can be captured by software. This will be expanded to
'csrr gpr, vl' after the vleff instruction during SelectionDAG.

By doing this with one intrinsic we are able to guarantee that the
csrr reads the VL value produced by the vleff instruction. Having
it as a separate intrinsic would make it impossible to guarantee
ordering without making every other vector intrinsic have side
effects.

The intrinsics are expanded during lowering into two ISD nodes
that are glued together. These ISD nodes will go
through isel separately, but should maintain the glue so that they
get emitted adjacently by InstrEmitter.

I've only ran the chain through the vleff instruction, allowing
the READ_VL to be deleted if it is unused.

Reviewed By: HsiangKai

Differential Revision: https://reviews.llvm.org/D94286
This commit is contained in:
Craig Topper 2021-01-21 17:08:41 -08:00
parent 8120cfedf5
commit 3b5430eb0d
7 changed files with 1716 additions and 623 deletions

View File

@ -101,6 +101,16 @@ let TargetPrefix = "riscv" in {
[LLVMPointerType<LLVMMatchType<0>>,
llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
// For unit stride fault-only-first load
// Input: (pointer, vl)
// Output: (data, vl)
// NOTE: We model this with default memory properties since we model writing
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
class RISCVUSLoadFF
: Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
[LLVMPointerType<LLVMMatchType<0>>, LLVMMatchType<1>],
[NoCapture<ArgIndex<0>>]>,
RISCVVIntrinsic;
// For unit stride load with mask
// Input: (maskedoff, pointer, mask, vl)
class RISCVUSLoadMask
@ -110,6 +120,18 @@ let TargetPrefix = "riscv" in {
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty],
[NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
// For unit stride fault-only-first load with mask
// Input: (maskedoff, pointer, mask, vl)
// Output: (data, vl)
// NOTE: We model this with default memory properties since we model writing
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
class RISCVUSLoadFFMask
: Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<1>],
[NoCapture<ArgIndex<1>>]>, RISCVVIntrinsic;
// For strided load
// Input: (pointer, stride, vl)
class RISCVSLoad
@ -564,6 +586,10 @@ let TargetPrefix = "riscv" in {
def "int_riscv_" # NAME : RISCVUSLoad;
def "int_riscv_" # NAME # "_mask" : RISCVUSLoadMask;
}
multiclass RISCVUSLoadFF {
def "int_riscv_" # NAME : RISCVUSLoadFF;
def "int_riscv_" # NAME # "_mask" : RISCVUSLoadFFMask;
}
multiclass RISCVSLoad {
def "int_riscv_" # NAME : RISCVSLoad;
def "int_riscv_" # NAME # "_mask" : RISCVSLoadMask;
@ -680,7 +706,7 @@ let TargetPrefix = "riscv" in {
}
defm vle : RISCVUSLoad;
defm vleff : RISCVUSLoad;
defm vleff : RISCVUSLoadFF;
defm vse : RISCVUSStore;
defm vlse: RISCVSLoad;
defm vsse: RISCVSStore;

View File

@ -362,6 +362,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
@ -1367,7 +1369,29 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
}
return SDValue();
switch (IntNo) {
default:
return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::riscv_vleff: {
SDLoc DL(Op);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0),
Op.getOperand(2), Op.getOperand(3));
VTs = DAG.getVTList(Op->getValueType(1), MVT::Other);
SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2));
return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
}
case Intrinsic::riscv_vleff_mask: {
SDLoc DL(Op);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0),
Op.getOperand(2), Op.getOperand(3),
Op.getOperand(4), Op.getOperand(5));
VTs = DAG.getVTList(Op->getValueType(1), MVT::Other);
SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2));
return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
}
}
}
// Returns the opcode of the target-specific SDNode that implements the 32-bit
@ -3815,6 +3839,9 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SPLAT_VECTOR_I64)
NODE_NAME_CASE(READ_VLENB)
NODE_NAME_CASE(TRUNCATE_VECTOR)
NODE_NAME_CASE(VLEFF)
NODE_NAME_CASE(VLEFF_MASK)
NODE_NAME_CASE(READ_VL)
}
// clang-format on
return nullptr;

View File

@ -95,6 +95,11 @@ enum NodeType : unsigned {
READ_VLENB,
// Truncates a RVV integer vector by one power-of-two.
TRUNCATE_VECTOR,
// Unit-stride fault-only-first load
VLEFF,
VLEFF_MASK,
// read vl CSR
READ_VL,
};
} // namespace RISCVISD

View File

@ -20,6 +20,23 @@ def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;
def riscv_vleff : SDNode<"RISCVISD::VLEFF",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
SDTCisVT<2, XLenVT>]>,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
SDNPSideEffect]>;
def riscv_vleff_mask : SDNode<"RISCVISD::VLEFF_MASK",
SDTypeProfile<1, 4, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisPtrTy<2>,
SDTCVecEltisVT<3, i1>,
SDTCisVT<4, XLenVT>]>,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
SDNPSideEffect]>;
def riscv_read_vl : SDNode<"RISCVISD::READ_VL",
SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>,
[SDNPInGlue]>;
// X0 has special meaning for vsetvl/vsetvli.
// rd | rs1 | AVL value | Effect on vl
//--------------------------------------------------------------
@ -1903,6 +1920,23 @@ multiclass VPatUSLoad<string intrinsic,
$rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
}
multiclass VPatUSLoadFF<string inst,
LLVMType type,
LLVMType mask_type,
int sew,
LMULInfo vlmul,
VReg reg_class>
{
defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
def : Pat<(type (riscv_vleff GPR:$rs1, GPR:$vl)),
(Pseudo $rs1, (NoX0 GPR:$vl), sew)>;
defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
def : Pat<(type (riscv_vleff_mask (type GetVRegNoV0<reg_class>.R:$merge),
GPR:$rs1, (mask_type V0), GPR:$vl)),
(PseudoMask $merge,
$rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
}
multiclass VPatSLoad<string intrinsic,
string inst,
LLVMType type,
@ -2817,6 +2851,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
[(set GPR:$rd, (riscv_read_vlenb))]>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1,
Uses = [VL] in
def PseudoReadVL : Pseudo<(outs GPR:$rd), (ins),
[(set GPR:$rd, (riscv_read_vl))]>;
//===----------------------------------------------------------------------===//
// 6. Configuration-Setting Instructions
//===----------------------------------------------------------------------===//
@ -3388,9 +3427,8 @@ foreach vti = AllVectors in
defm : VPatUSLoad<"int_riscv_vle",
"PseudoVLE" # vti.SEW,
vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
defm : VPatUSLoad<"int_riscv_vleff",
"PseudoVLE" # vti.SEW # "FF",
vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
defm : VPatUSLoadFF<"PseudoVLE" # vti.SEW # "FF",
vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
defm : VPatUSStore<"int_riscv_vse",
"PseudoVSE" # vti.SEW,
vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;

View File

@ -219,4 +219,11 @@ void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
return;
}
if (OutMI.getOpcode() == RISCV::PseudoReadVL) {
OutMI.setOpcode(RISCV::CSRRS);
OutMI.addOperand(MCOperand::createImm(
RISCVSysReg::lookupSysRegByName("VL")->Encoding));
OutMI.addOperand(MCOperand::createReg(RISCV::X0));
return;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff