forked from OSchip/llvm-project
[RISCV] Add a VL output to vleff intrinsics.
The fault-only-first-load instructions can reduce VL if an element other than element 0 triggers a memory fault. This can be used to vectorize loops with data dependent exit conditions like strcmp or strlen. This patch adds a VL output to these intrinsics so that the new VL value can be captured by software. This will be expanded to 'csrr gpr, vl' after the vleff instruction during SelectionDAG. By doing this with one intrinsic we are able to guarantee that the csrr reads the VL value produced by the vleff instruction. Having it as a separate intrinsic would make it impossible to guarantee ordering without making every other vector intrinsic have side effects. The intrinsics are expanded during lowering into two ISD nodes that are glued together. These ISD nodes will go through isel separately, but should maintain the glue so that they get emitted adjacently by InstrEmitter. I've only ran the chain through the vleff instruction, allowing the READ_VL to be deleted if it is unused. Reviewed By: HsiangKai Differential Revision: https://reviews.llvm.org/D94286
This commit is contained in:
parent
8120cfedf5
commit
3b5430eb0d
|
@ -101,6 +101,16 @@ let TargetPrefix = "riscv" in {
|
|||
[LLVMPointerType<LLVMMatchType<0>>,
|
||||
llvm_anyint_ty],
|
||||
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
|
||||
// For unit stride fault-only-first load
|
||||
// Input: (pointer, vl)
|
||||
// Output: (data, vl)
|
||||
// NOTE: We model this with default memory properties since we model writing
|
||||
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
|
||||
class RISCVUSLoadFF
|
||||
: Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
|
||||
[LLVMPointerType<LLVMMatchType<0>>, LLVMMatchType<1>],
|
||||
[NoCapture<ArgIndex<0>>]>,
|
||||
RISCVVIntrinsic;
|
||||
// For unit stride load with mask
|
||||
// Input: (maskedoff, pointer, mask, vl)
|
||||
class RISCVUSLoadMask
|
||||
|
@ -110,6 +120,18 @@ let TargetPrefix = "riscv" in {
|
|||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_anyint_ty],
|
||||
[NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
|
||||
// For unit stride fault-only-first load with mask
|
||||
// Input: (maskedoff, pointer, mask, vl)
|
||||
// Output: (data, vl)
|
||||
// NOTE: We model this with default memory properties since we model writing
|
||||
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
|
||||
class RISCVUSLoadFFMask
|
||||
: Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
|
||||
[LLVMMatchType<0>,
|
||||
LLVMPointerType<LLVMMatchType<0>>,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
LLVMMatchType<1>],
|
||||
[NoCapture<ArgIndex<1>>]>, RISCVVIntrinsic;
|
||||
// For strided load
|
||||
// Input: (pointer, stride, vl)
|
||||
class RISCVSLoad
|
||||
|
@ -564,6 +586,10 @@ let TargetPrefix = "riscv" in {
|
|||
def "int_riscv_" # NAME : RISCVUSLoad;
|
||||
def "int_riscv_" # NAME # "_mask" : RISCVUSLoadMask;
|
||||
}
|
||||
multiclass RISCVUSLoadFF {
|
||||
def "int_riscv_" # NAME : RISCVUSLoadFF;
|
||||
def "int_riscv_" # NAME # "_mask" : RISCVUSLoadFFMask;
|
||||
}
|
||||
multiclass RISCVSLoad {
|
||||
def "int_riscv_" # NAME : RISCVSLoad;
|
||||
def "int_riscv_" # NAME # "_mask" : RISCVSLoadMask;
|
||||
|
@ -680,7 +706,7 @@ let TargetPrefix = "riscv" in {
|
|||
}
|
||||
|
||||
defm vle : RISCVUSLoad;
|
||||
defm vleff : RISCVUSLoad;
|
||||
defm vleff : RISCVUSLoadFF;
|
||||
defm vse : RISCVUSStore;
|
||||
defm vlse: RISCVSLoad;
|
||||
defm vsse: RISCVSStore;
|
||||
|
|
|
@ -362,6 +362,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
|
||||
|
||||
if (Subtarget.is64Bit()) {
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
|
||||
|
@ -1367,7 +1369,29 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
switch (IntNo) {
|
||||
default:
|
||||
return SDValue(); // Don't custom lower most intrinsics.
|
||||
case Intrinsic::riscv_vleff: {
|
||||
SDLoc DL(Op);
|
||||
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
|
||||
SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0),
|
||||
Op.getOperand(2), Op.getOperand(3));
|
||||
VTs = DAG.getVTList(Op->getValueType(1), MVT::Other);
|
||||
SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2));
|
||||
return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
|
||||
}
|
||||
case Intrinsic::riscv_vleff_mask: {
|
||||
SDLoc DL(Op);
|
||||
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
|
||||
SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0),
|
||||
Op.getOperand(2), Op.getOperand(3),
|
||||
Op.getOperand(4), Op.getOperand(5));
|
||||
VTs = DAG.getVTList(Op->getValueType(1), MVT::Other);
|
||||
SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2));
|
||||
return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the opcode of the target-specific SDNode that implements the 32-bit
|
||||
|
@ -3815,6 +3839,9 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
NODE_NAME_CASE(SPLAT_VECTOR_I64)
|
||||
NODE_NAME_CASE(READ_VLENB)
|
||||
NODE_NAME_CASE(TRUNCATE_VECTOR)
|
||||
NODE_NAME_CASE(VLEFF)
|
||||
NODE_NAME_CASE(VLEFF_MASK)
|
||||
NODE_NAME_CASE(READ_VL)
|
||||
}
|
||||
// clang-format on
|
||||
return nullptr;
|
||||
|
|
|
@ -95,6 +95,11 @@ enum NodeType : unsigned {
|
|||
READ_VLENB,
|
||||
// Truncates a RVV integer vector by one power-of-two.
|
||||
TRUNCATE_VECTOR,
|
||||
// Unit-stride fault-only-first load
|
||||
VLEFF,
|
||||
VLEFF_MASK,
|
||||
// read vl CSR
|
||||
READ_VL,
|
||||
};
|
||||
} // namespace RISCVISD
|
||||
|
||||
|
|
|
@ -20,6 +20,23 @@ def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
|
|||
def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
|
||||
SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>>;
|
||||
|
||||
def riscv_vleff : SDNode<"RISCVISD::VLEFF",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
|
||||
SDTCisVT<2, XLenVT>]>,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
|
||||
SDNPSideEffect]>;
|
||||
def riscv_vleff_mask : SDNode<"RISCVISD::VLEFF_MASK",
|
||||
SDTypeProfile<1, 4, [SDTCisVec<0>,
|
||||
SDTCisSameAs<0, 1>,
|
||||
SDTCisPtrTy<2>,
|
||||
SDTCVecEltisVT<3, i1>,
|
||||
SDTCisVT<4, XLenVT>]>,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
|
||||
SDNPSideEffect]>;
|
||||
def riscv_read_vl : SDNode<"RISCVISD::READ_VL",
|
||||
SDTypeProfile<1, 0, [SDTCisVT<0, XLenVT>]>,
|
||||
[SDNPInGlue]>;
|
||||
|
||||
// X0 has special meaning for vsetvl/vsetvli.
|
||||
// rd | rs1 | AVL value | Effect on vl
|
||||
//--------------------------------------------------------------
|
||||
|
@ -1903,6 +1920,23 @@ multiclass VPatUSLoad<string intrinsic,
|
|||
$rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
|
||||
}
|
||||
|
||||
multiclass VPatUSLoadFF<string inst,
|
||||
LLVMType type,
|
||||
LLVMType mask_type,
|
||||
int sew,
|
||||
LMULInfo vlmul,
|
||||
VReg reg_class>
|
||||
{
|
||||
defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
|
||||
def : Pat<(type (riscv_vleff GPR:$rs1, GPR:$vl)),
|
||||
(Pseudo $rs1, (NoX0 GPR:$vl), sew)>;
|
||||
defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
|
||||
def : Pat<(type (riscv_vleff_mask (type GetVRegNoV0<reg_class>.R:$merge),
|
||||
GPR:$rs1, (mask_type V0), GPR:$vl)),
|
||||
(PseudoMask $merge,
|
||||
$rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
|
||||
}
|
||||
|
||||
multiclass VPatSLoad<string intrinsic,
|
||||
string inst,
|
||||
LLVMType type,
|
||||
|
@ -2817,6 +2851,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
|
|||
[(set GPR:$rd, (riscv_read_vlenb))]>;
|
||||
}
|
||||
|
||||
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1,
|
||||
Uses = [VL] in
|
||||
def PseudoReadVL : Pseudo<(outs GPR:$rd), (ins),
|
||||
[(set GPR:$rd, (riscv_read_vl))]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// 6. Configuration-Setting Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -3388,9 +3427,8 @@ foreach vti = AllVectors in
|
|||
defm : VPatUSLoad<"int_riscv_vle",
|
||||
"PseudoVLE" # vti.SEW,
|
||||
vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
|
||||
defm : VPatUSLoad<"int_riscv_vleff",
|
||||
"PseudoVLE" # vti.SEW # "FF",
|
||||
vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
|
||||
defm : VPatUSLoadFF<"PseudoVLE" # vti.SEW # "FF",
|
||||
vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
|
||||
defm : VPatUSStore<"int_riscv_vse",
|
||||
"PseudoVSE" # vti.SEW,
|
||||
vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
|
||||
|
|
|
@ -219,4 +219,11 @@ void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
|
|||
return;
|
||||
}
|
||||
|
||||
if (OutMI.getOpcode() == RISCV::PseudoReadVL) {
|
||||
OutMI.setOpcode(RISCV::CSRRS);
|
||||
OutMI.addOperand(MCOperand::createImm(
|
||||
RISCVSysReg::lookupSysRegByName("VL")->Encoding));
|
||||
OutMI.addOperand(MCOperand::createReg(RISCV::X0));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue