forked from OSchip/llvm-project
[RISCV] Add initial support for converting fixed vectors to scalable vectors during lowering to use RVV instructions.
This is an alternative to D95563. This is modeled after a similar feature for AArch64's SVE that uses predicated scalable vector instructions.a Rather than use predication, this patch uses an explicit VL operand. I've limited it to always use LMUL=1 for now, but we can improve this in the future. This requires a bunch of new ISD opcodes to carry the VL operand. I think we can probably lower intrinsics to these ISD opcodes to cut down on the size of the isel table. Which is why I've added patterns for all integer/float types and not just LMUL=1. I'm only testing one vector width right now, but the width is programmable via the command line. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D95705
This commit is contained in:
parent
eea34aae2e
commit
a719b667a9
|
@ -140,6 +140,32 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
|||
if (Subtarget.hasStdExtD())
|
||||
for (MVT VT : F64VecVTs)
|
||||
addRegClassForRVV(VT);
|
||||
|
||||
if (Subtarget.useRVVForFixedLengthVectors()) {
|
||||
auto addRegClassForFixedVectors = [this](MVT VT) {
|
||||
unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
|
||||
const TargetRegisterClass *RC;
|
||||
if (LMul == 1)
|
||||
RC = &RISCV::VRRegClass;
|
||||
else if (LMul == 2)
|
||||
RC = &RISCV::VRM2RegClass;
|
||||
else if (LMul == 4)
|
||||
RC = &RISCV::VRM4RegClass;
|
||||
else if (LMul == 8)
|
||||
RC = &RISCV::VRM8RegClass;
|
||||
else
|
||||
llvm_unreachable("Unexpected LMul!");
|
||||
|
||||
addRegisterClass(VT, RC);
|
||||
};
|
||||
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
|
||||
if (useRVVForFixedLengthVectorVT(VT))
|
||||
addRegClassForFixedVectors(VT);
|
||||
|
||||
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
|
||||
if (useRVVForFixedLengthVectorVT(VT))
|
||||
addRegClassForFixedVectors(VT);
|
||||
}
|
||||
}
|
||||
|
||||
// Compute derived properties from the register classes.
|
||||
|
@ -484,6 +510,56 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
|||
if (Subtarget.hasStdExtD())
|
||||
for (MVT VT : F64VecVTs)
|
||||
SetCommonVFPActions(VT);
|
||||
|
||||
if (Subtarget.useRVVForFixedLengthVectors()) {
|
||||
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
|
||||
if (!useRVVForFixedLengthVectorVT(VT))
|
||||
continue;
|
||||
|
||||
// By default everything must be expanded.
|
||||
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
|
||||
setOperationAction(Op, VT, Expand);
|
||||
|
||||
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
|
||||
|
||||
setOperationAction(ISD::LOAD, VT, Custom);
|
||||
setOperationAction(ISD::STORE, VT, Custom);
|
||||
setOperationAction(ISD::ADD, VT, Custom);
|
||||
setOperationAction(ISD::MUL, VT, Custom);
|
||||
setOperationAction(ISD::SUB, VT, Custom);
|
||||
setOperationAction(ISD::AND, VT, Custom);
|
||||
setOperationAction(ISD::OR, VT, Custom);
|
||||
setOperationAction(ISD::XOR, VT, Custom);
|
||||
setOperationAction(ISD::SDIV, VT, Custom);
|
||||
setOperationAction(ISD::SREM, VT, Custom);
|
||||
setOperationAction(ISD::UDIV, VT, Custom);
|
||||
setOperationAction(ISD::UREM, VT, Custom);
|
||||
setOperationAction(ISD::SHL, VT, Custom);
|
||||
setOperationAction(ISD::SRA, VT, Custom);
|
||||
setOperationAction(ISD::SRL, VT, Custom);
|
||||
}
|
||||
|
||||
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
|
||||
if (!useRVVForFixedLengthVectorVT(VT))
|
||||
continue;
|
||||
|
||||
// By default everything must be expanded.
|
||||
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
|
||||
setOperationAction(Op, VT, Expand);
|
||||
|
||||
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
|
||||
|
||||
setOperationAction(ISD::LOAD, VT, Custom);
|
||||
setOperationAction(ISD::STORE, VT, Custom);
|
||||
setOperationAction(ISD::FADD, VT, Custom);
|
||||
setOperationAction(ISD::FSUB, VT, Custom);
|
||||
setOperationAction(ISD::FMUL, VT, Custom);
|
||||
setOperationAction(ISD::FDIV, VT, Custom);
|
||||
setOperationAction(ISD::FNEG, VT, Custom);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Function alignments.
|
||||
|
@ -928,6 +1004,46 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
|
|||
case ISD::VECREDUCE_FADD:
|
||||
case ISD::VECREDUCE_SEQ_FADD:
|
||||
return lowerFPVECREDUCE(Op, DAG);
|
||||
case ISD::LOAD:
|
||||
return lowerFixedLengthVectorLoadToRVV(Op, DAG);
|
||||
case ISD::STORE:
|
||||
return lowerFixedLengthVectorStoreToRVV(Op, DAG);
|
||||
case ISD::ADD:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
|
||||
case ISD::SUB:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
|
||||
case ISD::MUL:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
|
||||
case ISD::AND:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::AND_VL);
|
||||
case ISD::OR:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::OR_VL);
|
||||
case ISD::XOR:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::XOR_VL);
|
||||
case ISD::SDIV:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
|
||||
case ISD::SREM:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
|
||||
case ISD::UDIV:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
|
||||
case ISD::UREM:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
|
||||
case ISD::SHL:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
|
||||
case ISD::SRA:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
|
||||
case ISD::SRL:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
|
||||
case ISD::FADD:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
|
||||
case ISD::FSUB:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
|
||||
case ISD::FMUL:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
|
||||
case ISD::FDIV:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
|
||||
case ISD::FNEG:
|
||||
return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1742,6 +1858,137 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
|
|||
DAG.getConstant(0, DL, Subtarget.getXLenVT()));
|
||||
}
|
||||
|
||||
// Return the largest legal scalable vector type that matches VT's element type.
|
||||
static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
|
||||
const RISCVSubtarget &Subtarget) {
|
||||
assert(VT.isFixedLengthVector() &&
|
||||
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
|
||||
"Expected legal fixed length vector!");
|
||||
|
||||
unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
|
||||
assert(LMul <= 8 && isPowerOf2_32(LMul) && "Unexpected LMUL!");
|
||||
|
||||
switch (VT.getVectorElementType().SimpleTy) {
|
||||
default:
|
||||
llvm_unreachable("unexpected element type for RVV container");
|
||||
case MVT::i8:
|
||||
return MVT::getScalableVectorVT(MVT::i8, LMul * 8);
|
||||
case MVT::i16:
|
||||
return MVT::getScalableVectorVT(MVT::i16, LMul * 4);
|
||||
case MVT::i32:
|
||||
return MVT::getScalableVectorVT(MVT::i32, LMul * 2);
|
||||
case MVT::i64:
|
||||
return MVT::getScalableVectorVT(MVT::i64, LMul);
|
||||
case MVT::f16:
|
||||
return MVT::getScalableVectorVT(MVT::f16, LMul * 4);
|
||||
case MVT::f32:
|
||||
return MVT::getScalableVectorVT(MVT::f32, LMul * 2);
|
||||
case MVT::f64:
|
||||
return MVT::getScalableVectorVT(MVT::f64, LMul);
|
||||
}
|
||||
}
|
||||
|
||||
// Grow V to consume an entire RVV register.
|
||||
static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
|
||||
const RISCVSubtarget &Subtarget) {
|
||||
assert(VT.isScalableVector() &&
|
||||
"Expected to convert into a scalable vector!");
|
||||
assert(V.getValueType().isFixedLengthVector() &&
|
||||
"Expected a fixed length vector operand!");
|
||||
SDLoc DL(V);
|
||||
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
|
||||
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
|
||||
}
|
||||
|
||||
// Shrink V so it's just big enough to maintain a VT's worth of data.
|
||||
static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
|
||||
const RISCVSubtarget &Subtarget) {
|
||||
assert(VT.isFixedLengthVector() &&
|
||||
"Expected to convert into a fixed length vector!");
|
||||
assert(V.getValueType().isScalableVector() &&
|
||||
"Expected a scalable vector operand!");
|
||||
SDLoc DL(V);
|
||||
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
|
||||
}
|
||||
|
||||
SDValue
|
||||
RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
auto *Load = cast<LoadSDNode>(Op);
|
||||
|
||||
SDLoc DL(Op);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
|
||||
|
||||
SDValue VL =
|
||||
DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
|
||||
|
||||
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
|
||||
SDValue NewLoad = DAG.getMemIntrinsicNode(
|
||||
RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
|
||||
Load->getMemoryVT(), Load->getMemOperand());
|
||||
|
||||
SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
|
||||
return DAG.getMergeValues({Result, Load->getChain()}, DL);
|
||||
}
|
||||
|
||||
SDValue
|
||||
RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
auto *Store = cast<StoreSDNode>(Op);
|
||||
|
||||
SDLoc DL(Op);
|
||||
MVT VT = Store->getValue().getSimpleValueType();
|
||||
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
|
||||
|
||||
SDValue VL =
|
||||
DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
|
||||
|
||||
SDValue NewValue =
|
||||
convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget);
|
||||
return DAG.getMemIntrinsicNode(
|
||||
RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
|
||||
{Store->getChain(), NewValue, Store->getBasePtr(), VL},
|
||||
Store->getMemoryVT(), Store->getMemOperand());
|
||||
}
|
||||
|
||||
SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
|
||||
unsigned NewOpc) const {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
assert(useRVVForFixedLengthVectorVT(VT) &&
|
||||
"Only expected to lower fixed length vector operation!");
|
||||
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
|
||||
|
||||
// Create list of operands by converting existing ones to scalable types.
|
||||
SmallVector<SDValue, 6> Ops;
|
||||
for (const SDValue &V : Op->op_values()) {
|
||||
assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
|
||||
|
||||
// Pass through non-vector operands.
|
||||
if (!V.getValueType().isVector()) {
|
||||
Ops.push_back(V);
|
||||
continue;
|
||||
}
|
||||
|
||||
// "cast" fixed length vector to a scalable vector.
|
||||
assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
|
||||
"Only fixed length vectors are supported!");
|
||||
Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
|
||||
}
|
||||
|
||||
SDLoc DL(Op);
|
||||
SDValue VL =
|
||||
DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
|
||||
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
|
||||
Ops.push_back(Mask);
|
||||
Ops.push_back(VL);
|
||||
|
||||
SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
|
||||
return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
|
||||
}
|
||||
|
||||
// Returns the opcode of the target-specific SDNode that implements the 32-bit
|
||||
// form of the given Opcode.
|
||||
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
|
||||
|
@ -4310,6 +4557,28 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
NODE_NAME_CASE(VECREDUCE_XOR)
|
||||
NODE_NAME_CASE(VECREDUCE_FADD)
|
||||
NODE_NAME_CASE(VECREDUCE_SEQ_FADD)
|
||||
NODE_NAME_CASE(ADD_VL)
|
||||
NODE_NAME_CASE(AND_VL)
|
||||
NODE_NAME_CASE(MUL_VL)
|
||||
NODE_NAME_CASE(OR_VL)
|
||||
NODE_NAME_CASE(SDIV_VL)
|
||||
NODE_NAME_CASE(SHL_VL)
|
||||
NODE_NAME_CASE(SREM_VL)
|
||||
NODE_NAME_CASE(SRA_VL)
|
||||
NODE_NAME_CASE(SRL_VL)
|
||||
NODE_NAME_CASE(SUB_VL)
|
||||
NODE_NAME_CASE(UDIV_VL)
|
||||
NODE_NAME_CASE(UREM_VL)
|
||||
NODE_NAME_CASE(XOR_VL)
|
||||
NODE_NAME_CASE(FADD_VL)
|
||||
NODE_NAME_CASE(FSUB_VL)
|
||||
NODE_NAME_CASE(FMUL_VL)
|
||||
NODE_NAME_CASE(FDIV_VL)
|
||||
NODE_NAME_CASE(FNEG_VL)
|
||||
NODE_NAME_CASE(VMCLR_VL)
|
||||
NODE_NAME_CASE(VMSET_VL)
|
||||
NODE_NAME_CASE(VLE_VL)
|
||||
NODE_NAME_CASE(VSE_VL)
|
||||
}
|
||||
// clang-format on
|
||||
return nullptr;
|
||||
|
@ -4747,6 +5016,50 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
|
||||
if (!Subtarget.useRVVForFixedLengthVectors())
|
||||
return false;
|
||||
|
||||
if (!VT.isFixedLengthVector())
|
||||
return false;
|
||||
|
||||
// Don't use RVV for vectors we cannot scalarize if required.
|
||||
switch (VT.getVectorElementType().SimpleTy) {
|
||||
default:
|
||||
return false;
|
||||
case MVT::i1:
|
||||
case MVT::i8:
|
||||
case MVT::i16:
|
||||
case MVT::i32:
|
||||
case MVT::i64:
|
||||
break;
|
||||
case MVT::f16:
|
||||
if (!Subtarget.hasStdExtZfh())
|
||||
return false;
|
||||
break;
|
||||
case MVT::f32:
|
||||
if (!Subtarget.hasStdExtF())
|
||||
return false;
|
||||
break;
|
||||
case MVT::f64:
|
||||
if (!Subtarget.hasStdExtD())
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
|
||||
// Don't use RVV for types that don't fit.
|
||||
if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
|
||||
return false;
|
||||
|
||||
// TODO: Perhaps an artificial restriction, but worth having whilst getting
|
||||
// the base fixed length RVV support in place.
|
||||
if (!VT.isPow2VectorType())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#define GET_REGISTER_MATCHER
|
||||
#include "RISCVGenAsmMatcher.inc"
|
||||
|
||||
|
|
|
@ -133,6 +133,39 @@ enum NodeType : unsigned {
|
|||
VECREDUCE_XOR,
|
||||
VECREDUCE_FADD,
|
||||
VECREDUCE_SEQ_FADD,
|
||||
|
||||
// Vector binary and unary ops with VL as a third operand.
|
||||
// FIXME: Can we replace these with ISD::VP_*?
|
||||
ADD_VL,
|
||||
AND_VL,
|
||||
MUL_VL,
|
||||
OR_VL,
|
||||
SDIV_VL,
|
||||
SHL_VL,
|
||||
SREM_VL,
|
||||
SRA_VL,
|
||||
SRL_VL,
|
||||
SUB_VL,
|
||||
UDIV_VL,
|
||||
UREM_VL,
|
||||
XOR_VL,
|
||||
FADD_VL,
|
||||
FSUB_VL,
|
||||
FMUL_VL,
|
||||
FDIV_VL,
|
||||
FNEG_VL,
|
||||
|
||||
// Set mask vector to all zeros or ones.
|
||||
VMCLR_VL,
|
||||
VMSET_VL,
|
||||
|
||||
// Memory opcodes start here.
|
||||
VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||
VSE_VL,
|
||||
|
||||
// WARNING: Do not add anything in the end unless you want the node to
|
||||
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
|
||||
// opcodes will be thought as target memory ops!
|
||||
};
|
||||
} // namespace RISCVISD
|
||||
|
||||
|
@ -336,6 +369,10 @@ private:
|
|||
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
|
||||
unsigned NewOpc) const;
|
||||
|
||||
bool isEligibleForTailCallOptimization(
|
||||
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
|
||||
|
@ -346,6 +383,8 @@ private:
|
|||
void validateCCReservedRegs(
|
||||
const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
|
||||
MachineFunction &MF) const;
|
||||
|
||||
bool useRVVForFixedLengthVectorVT(MVT VT) const;
|
||||
};
|
||||
|
||||
namespace RISCVVIntrinsicsTable {
|
||||
|
|
|
@ -4435,3 +4435,4 @@ let Predicates = [HasStdExtV, HasStdExtF] in {
|
|||
|
||||
// Include the non-intrinsic ISel patterns
|
||||
include "RISCVInstrInfoVSDPatterns.td"
|
||||
include "RISCVInstrInfoVVLPatterns.td"
|
||||
|
|
|
@ -0,0 +1,190 @@
|
|||
//===- RISCVInstrInfoVVLPatterns.td - RVV VL patterns ------*- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// This file contains the required infrastructure and VL patterns to
|
||||
/// support code generation for the standard 'V' (Vector) extension, version
|
||||
/// 0.10. This version is still experimental as the 'V' extension hasn't been
|
||||
/// ratified yet.
|
||||
///
|
||||
/// This file is included from and depends upon RISCVInstrInfoVPseudos.td
|
||||
///
|
||||
/// Note: the patterns for RVV intrinsics are found in
|
||||
/// RISCVInstrInfoVPseudos.td.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helpers to define the VL patterns.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def SDT_RISCVVLE_VL : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
|
||||
SDTCisVT<2, XLenVT>]>;
|
||||
def SDT_RISCVVSE_VL : SDTypeProfile<0, 3, [SDTCisVec<0>, SDTCisPtrTy<1>,
|
||||
SDTCisVT<2, XLenVT>]>;
|
||||
|
||||
def SDT_RISCVIntBinOp_VL : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisVec<0>, SDTCisInt<0>,
|
||||
SDTCVecEltisVT<3, i1>,
|
||||
SDTCisSameNumEltsAs<0, 3>,
|
||||
SDTCisVT<4, XLenVT>]>;
|
||||
|
||||
def SDT_RISCVFPUnOp_VL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
|
||||
SDTCisVec<0>, SDTCisFP<0>,
|
||||
SDTCVecEltisVT<2, i1>,
|
||||
SDTCisSameNumEltsAs<0, 2>,
|
||||
SDTCisVT<3, XLenVT>]>;
|
||||
def SDT_RISCVFPBinOp_VL : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisVec<0>, SDTCisFP<0>,
|
||||
SDTCVecEltisVT<3, i1>,
|
||||
SDTCisSameNumEltsAs<0, 3>,
|
||||
SDTCisVT<4, XLenVT>]>;
|
||||
|
||||
def riscv_vle_vl : SDNode<"RISCVISD::VLE_VL", SDT_RISCVVLE_VL,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def riscv_vse_vl : SDNode<"RISCVISD::VSE_VL", SDT_RISCVVSE_VL,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
||||
def riscv_add_vl : SDNode<"RISCVISD::ADD_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
|
||||
def riscv_sub_vl : SDNode<"RISCVISD::SUB_VL", SDT_RISCVIntBinOp_VL>;
|
||||
def riscv_mul_vl : SDNode<"RISCVISD::MUL_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
|
||||
def riscv_and_vl : SDNode<"RISCVISD::AND_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
|
||||
def riscv_or_vl : SDNode<"RISCVISD::OR_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
|
||||
def riscv_xor_vl : SDNode<"RISCVISD::XOR_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
|
||||
def riscv_sdiv_vl : SDNode<"RISCVISD::SDIV_VL", SDT_RISCVIntBinOp_VL>;
|
||||
def riscv_srem_vl : SDNode<"RISCVISD::SREM_VL", SDT_RISCVIntBinOp_VL>;
|
||||
def riscv_udiv_vl : SDNode<"RISCVISD::UDIV_VL", SDT_RISCVIntBinOp_VL>;
|
||||
def riscv_urem_vl : SDNode<"RISCVISD::UREM_VL", SDT_RISCVIntBinOp_VL>;
|
||||
def riscv_shl_vl : SDNode<"RISCVISD::SHL_VL", SDT_RISCVIntBinOp_VL>;
|
||||
def riscv_sra_vl : SDNode<"RISCVISD::SRA_VL", SDT_RISCVIntBinOp_VL>;
|
||||
def riscv_srl_vl : SDNode<"RISCVISD::SRL_VL", SDT_RISCVIntBinOp_VL>;
|
||||
def riscv_fadd_vl : SDNode<"RISCVISD::FADD_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
|
||||
def riscv_fsub_vl : SDNode<"RISCVISD::FSUB_VL", SDT_RISCVFPBinOp_VL>;
|
||||
def riscv_fmul_vl : SDNode<"RISCVISD::FMUL_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
|
||||
def riscv_fdiv_vl : SDNode<"RISCVISD::FDIV_VL", SDT_RISCVFPBinOp_VL>;
|
||||
def riscv_fneg_vl : SDNode<"RISCVISD::FNEG_VL", SDT_RISCVFPUnOp_VL>;
|
||||
|
||||
def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCisVec<0>,
|
||||
SDTCVecEltisVT<0, i1>,
|
||||
SDTCisVT<1, XLenVT>]>;
|
||||
def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>;
|
||||
def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>;
|
||||
|
||||
def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>;
|
||||
|
||||
class VPatBinaryVL_VV<SDNode vop,
|
||||
string instruction_name,
|
||||
ValueType result_type,
|
||||
ValueType op_type,
|
||||
ValueType mask_type,
|
||||
int sew,
|
||||
LMULInfo vlmul,
|
||||
VReg RetClass,
|
||||
VReg op_reg_class> :
|
||||
Pat<(result_type (vop
|
||||
(op_type op_reg_class:$rs1),
|
||||
(op_type op_reg_class:$rs2),
|
||||
(mask_type true_mask),
|
||||
(XLenVT (VLOp GPR:$vl)))),
|
||||
(!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX)
|
||||
op_reg_class:$rs1,
|
||||
op_reg_class:$rs2,
|
||||
GPR:$vl, sew)>;
|
||||
|
||||
multiclass VPatBinaryVL_VV_VX<SDNode vop, string instruction_name> {
|
||||
foreach vti = AllIntegerVectors in {
|
||||
def : VPatBinaryVL_VV<vop, instruction_name,
|
||||
vti.Vector, vti.Vector, vti.Mask, vti.SEW,
|
||||
vti.LMul, vti.RegClass, vti.RegClass>;
|
||||
// FIXME: Support splats.
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VPatBinaryVL_VV_VX_VI<SDNode vop, string instruction_name,
|
||||
Operand ImmType = simm5> {
|
||||
foreach vti = AllIntegerVectors in {
|
||||
def : VPatBinaryVL_VV<vop, instruction_name,
|
||||
vti.Vector, vti.Vector, vti.Mask, vti.SEW,
|
||||
vti.LMul, vti.RegClass, vti.RegClass>;
|
||||
// FIXME: Support splats.
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VPatBinaryFPVL_VV_VF<SDNode vop, string instruction_name> {
|
||||
foreach vti = AllFloatVectors in {
|
||||
def : VPatBinaryVL_VV<vop, instruction_name,
|
||||
vti.Vector, vti.Vector, vti.Mask, vti.SEW,
|
||||
vti.LMul, vti.RegClass, vti.RegClass>;
|
||||
// FIXME: Support splats.
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Patterns.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [HasStdExtV] in {
|
||||
|
||||
// 7.4. Vector Unit-Stride Instructions
|
||||
foreach vti = AllVectors in {
|
||||
defvar load_instr = !cast<Instruction>("PseudoVLE"#vti.SEW#"_V_"#vti.LMul.MX);
|
||||
defvar store_instr = !cast<Instruction>("PseudoVSE"#vti.SEW#"_V_"#vti.LMul.MX);
|
||||
// Load
|
||||
def : Pat<(vti.Vector (riscv_vle_vl RVVBaseAddr:$rs1, (XLenVT (VLOp GPR:$vl)))),
|
||||
(load_instr RVVBaseAddr:$rs1, GPR:$vl, vti.SEW)>;
|
||||
// Store
|
||||
def : Pat<(riscv_vse_vl (vti.Vector vti.RegClass:$rs2), RVVBaseAddr:$rs1, (XLenVT (VLOp GPR:$vl))),
|
||||
(store_instr vti.RegClass:$rs2, RVVBaseAddr:$rs1, GPR:$vl, vti.SEW)>;
|
||||
}
|
||||
|
||||
// 12.1. Vector Single-Width Integer Add and Subtract
|
||||
defm "" : VPatBinaryVL_VV_VX_VI<riscv_add_vl, "PseudoVADD">;
|
||||
defm "" : VPatBinaryVL_VV_VX<riscv_sub_vl, "PseudoVSUB">;
|
||||
|
||||
// 12.5. Vector Bitwise Logical Instructions
|
||||
defm "" : VPatBinaryVL_VV_VX_VI<riscv_and_vl, "PseudoVAND">;
|
||||
defm "" : VPatBinaryVL_VV_VX_VI<riscv_or_vl, "PseudoVOR">;
|
||||
defm "" : VPatBinaryVL_VV_VX_VI<riscv_xor_vl, "PseudoVXOR">;
|
||||
|
||||
// 12.6. Vector Single-Width Bit Shift Instructions
|
||||
defm "" : VPatBinaryVL_VV_VX_VI<riscv_shl_vl, "PseudoVSLL", uimm5>;
|
||||
defm "" : VPatBinaryVL_VV_VX_VI<riscv_srl_vl, "PseudoVSRL", uimm5>;
|
||||
defm "" : VPatBinaryVL_VV_VX_VI<riscv_sra_vl, "PseudoVSRA", uimm5>;
|
||||
|
||||
// 12.10. Vector Single-Width Integer Multiply Instructions
|
||||
defm "" : VPatBinaryVL_VV_VX<riscv_mul_vl, "PseudoVMUL">;
|
||||
|
||||
// 12.11. Vector Integer Divide Instructions
|
||||
defm "" : VPatBinaryVL_VV_VX<riscv_udiv_vl, "PseudoVDIVU">;
|
||||
defm "" : VPatBinaryVL_VV_VX<riscv_sdiv_vl, "PseudoVDIV">;
|
||||
defm "" : VPatBinaryVL_VV_VX<riscv_urem_vl, "PseudoVREMU">;
|
||||
defm "" : VPatBinaryVL_VV_VX<riscv_srem_vl, "PseudoVREM">;
|
||||
|
||||
} // Predicates = [HasStdExtV]
|
||||
|
||||
let Predicates = [HasStdExtV, HasStdExtF] in {
|
||||
|
||||
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
|
||||
defm "" : VPatBinaryFPVL_VV_VF<riscv_fadd_vl, "PseudoVFADD">;
|
||||
defm "" : VPatBinaryFPVL_VV_VF<riscv_fsub_vl, "PseudoVFSUB">;
|
||||
|
||||
// 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
|
||||
defm "" : VPatBinaryFPVL_VV_VF<riscv_fmul_vl, "PseudoVFMUL">;
|
||||
defm "" : VPatBinaryFPVL_VV_VF<riscv_fdiv_vl, "PseudoVFDIV">;
|
||||
|
||||
// 14.10. Vector Floating-Point Sign-Injection Instructions
|
||||
// Handle fneg with VFSGNJN using the same input for both operands.
|
||||
foreach vti = AllFloatVectors in {
|
||||
def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask true_mask),
|
||||
(XLenVT (VLOp GPR:$vl))),
|
||||
(!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
|
||||
vti.RegClass:$rs, vti.RegClass:$rs, GPR:$vl, vti.SEW)>;
|
||||
}
|
||||
|
||||
} // Predicates = [HasStdExtV, HasStdExtF]
|
|
@ -27,6 +27,18 @@ using namespace llvm;
|
|||
#define GET_SUBTARGETINFO_CTOR
|
||||
#include "RISCVGenSubtargetInfo.inc"
|
||||
|
||||
static cl::opt<unsigned> RVVVectorBitsMin(
|
||||
"riscv-v-vector-bits-min",
|
||||
cl::desc("Assume V extension vector registers are at least this big, "
|
||||
"with zero meaning no minimum size is assumed."),
|
||||
cl::init(0), cl::Hidden);
|
||||
|
||||
static cl::opt<unsigned> RVVVectorLMULMax(
|
||||
"riscv-v-fixed-length-vector-lmul-max",
|
||||
cl::desc("The maximum LMUL value to use for fixed length vectors. "
|
||||
"Fractional LMUL values are not supported."),
|
||||
cl::init(8), cl::Hidden);
|
||||
|
||||
void RISCVSubtarget::anchor() {}
|
||||
|
||||
RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(
|
||||
|
@ -81,3 +93,30 @@ const LegalizerInfo *RISCVSubtarget::getLegalizerInfo() const {
|
|||
const RegisterBankInfo *RISCVSubtarget::getRegBankInfo() const {
|
||||
return RegBankInfo.get();
|
||||
}
|
||||
|
||||
unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const {
|
||||
assert(hasStdExtV() &&
|
||||
"Tried to get vector length without V extension support!");
|
||||
assert((RVVVectorBitsMin == 0 ||
|
||||
(RVVVectorBitsMin >= 128 && isPowerOf2_32(RVVVectorBitsMin))) &&
|
||||
"V extension requires vector length to be at least 128 and a power of "
|
||||
"2!");
|
||||
return PowerOf2Floor(RVVVectorBitsMin < 128 ? 0 : RVVVectorBitsMin);
|
||||
}
|
||||
|
||||
unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const {
|
||||
assert(hasStdExtV() &&
|
||||
"Tried to get maximum LMUL without V extension support!");
|
||||
assert(RVVVectorLMULMax <= 8 && isPowerOf2_32(RVVVectorLMULMax) &&
|
||||
"V extension requires a LMUL to be at most 8 and a power of 2!");
|
||||
return PowerOf2Floor(std::max<unsigned>(RVVVectorLMULMax, 1));
|
||||
}
|
||||
|
||||
bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
|
||||
return hasStdExtV() && getMinRVVVectorSizeInBits() != 0;
|
||||
}
|
||||
|
||||
unsigned RISCVSubtarget::getLMULForFixedLengthVector(MVT VT) const {
|
||||
unsigned MinVLen = getMinRVVVectorSizeInBits();
|
||||
return divideCeil(VT.getSizeInBits(), MinVLen);
|
||||
}
|
||||
|
|
|
@ -147,6 +147,14 @@ public:
|
|||
InstructionSelector *getInstructionSelector() const override;
|
||||
const LegalizerInfo *getLegalizerInfo() const override;
|
||||
const RegisterBankInfo *getRegBankInfo() const override;
|
||||
|
||||
// Return the known range for the bit length of RVV data registers. A value
|
||||
// of 0 means nothing is known about that particular limit beyond what's
|
||||
// implied by the architecture.
|
||||
unsigned getMinRVVVectorSizeInBits() const;
|
||||
unsigned getLMULForFixedLengthVector(MVT VT) const;
|
||||
unsigned getMaxLMULForFixedLengthVectors() const;
|
||||
bool useRVVForFixedLengthVectors() const;
|
||||
};
|
||||
} // End llvm namespace
|
||||
|
||||
|
|
|
@ -0,0 +1,926 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
|
||||
|
||||
define void @fadd_v8f16(<8 x half>* %x, <8 x half>* %y) {
|
||||
; CHECK-LABEL: fadd_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 8
|
||||
; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; CHECK-NEXT: vle16.v v25, (a0)
|
||||
; CHECK-NEXT: vle16.v v26, (a1)
|
||||
; CHECK-NEXT: vfadd.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse16.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x half>, <8 x half>* %x
|
||||
%b = load <8 x half>, <8 x half>* %y
|
||||
%c = fadd <8 x half> %a, %b
|
||||
store <8 x half> %c, <8 x half>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fadd_v4f32(<4 x float>* %x, <4 x float>* %y) {
|
||||
; CHECK-LABEL: fadd_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 4
|
||||
; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; CHECK-NEXT: vle32.v v25, (a0)
|
||||
; CHECK-NEXT: vle32.v v26, (a1)
|
||||
; CHECK-NEXT: vfadd.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse32.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = load <4 x float>, <4 x float>* %y
|
||||
%c = fadd <4 x float> %a, %b
|
||||
store <4 x float> %c, <4 x float>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fadd_v2f64(<2 x double>* %x, <2 x double>* %y) {
|
||||
; CHECK-LABEL: fadd_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 2
|
||||
; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vle64.v v25, (a0)
|
||||
; CHECK-NEXT: vle64.v v26, (a1)
|
||||
; CHECK-NEXT: vfadd.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse64.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x double>, <2 x double>* %x
|
||||
%b = load <2 x double>, <2 x double>* %y
|
||||
%c = fadd <2 x double> %a, %b
|
||||
store <2 x double> %c, <2 x double>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fsub_v8f16(<8 x half>* %x, <8 x half>* %y) {
|
||||
; CHECK-LABEL: fsub_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 8
|
||||
; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; CHECK-NEXT: vle16.v v25, (a0)
|
||||
; CHECK-NEXT: vle16.v v26, (a1)
|
||||
; CHECK-NEXT: vfsub.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse16.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x half>, <8 x half>* %x
|
||||
%b = load <8 x half>, <8 x half>* %y
|
||||
%c = fsub <8 x half> %a, %b
|
||||
store <8 x half> %c, <8 x half>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fsub_v4f32(<4 x float>* %x, <4 x float>* %y) {
|
||||
; CHECK-LABEL: fsub_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 4
|
||||
; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; CHECK-NEXT: vle32.v v25, (a0)
|
||||
; CHECK-NEXT: vle32.v v26, (a1)
|
||||
; CHECK-NEXT: vfsub.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse32.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = load <4 x float>, <4 x float>* %y
|
||||
%c = fsub <4 x float> %a, %b
|
||||
store <4 x float> %c, <4 x float>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fsub_v2f64(<2 x double>* %x, <2 x double>* %y) {
|
||||
; CHECK-LABEL: fsub_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 2
|
||||
; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vle64.v v25, (a0)
|
||||
; CHECK-NEXT: vle64.v v26, (a1)
|
||||
; CHECK-NEXT: vfsub.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse64.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x double>, <2 x double>* %x
|
||||
%b = load <2 x double>, <2 x double>* %y
|
||||
%c = fsub <2 x double> %a, %b
|
||||
store <2 x double> %c, <2 x double>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmul_v8f16(<8 x half>* %x, <8 x half>* %y) {
|
||||
; CHECK-LABEL: fmul_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 8
|
||||
; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; CHECK-NEXT: vle16.v v25, (a0)
|
||||
; CHECK-NEXT: vle16.v v26, (a1)
|
||||
; CHECK-NEXT: vfmul.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse16.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x half>, <8 x half>* %x
|
||||
%b = load <8 x half>, <8 x half>* %y
|
||||
%c = fmul <8 x half> %a, %b
|
||||
store <8 x half> %c, <8 x half>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmul_v4f32(<4 x float>* %x, <4 x float>* %y) {
|
||||
; CHECK-LABEL: fmul_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 4
|
||||
; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; CHECK-NEXT: vle32.v v25, (a0)
|
||||
; CHECK-NEXT: vle32.v v26, (a1)
|
||||
; CHECK-NEXT: vfmul.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse32.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = load <4 x float>, <4 x float>* %y
|
||||
%c = fmul <4 x float> %a, %b
|
||||
store <4 x float> %c, <4 x float>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmul_v2f64(<2 x double>* %x, <2 x double>* %y) {
|
||||
; CHECK-LABEL: fmul_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 2
|
||||
; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vle64.v v25, (a0)
|
||||
; CHECK-NEXT: vle64.v v26, (a1)
|
||||
; CHECK-NEXT: vfmul.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse64.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x double>, <2 x double>* %x
|
||||
%b = load <2 x double>, <2 x double>* %y
|
||||
%c = fmul <2 x double> %a, %b
|
||||
store <2 x double> %c, <2 x double>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fdiv_v8f16(<8 x half>* %x, <8 x half>* %y) {
|
||||
; CHECK-LABEL: fdiv_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 8
|
||||
; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; CHECK-NEXT: vle16.v v25, (a0)
|
||||
; CHECK-NEXT: vle16.v v26, (a1)
|
||||
; CHECK-NEXT: vfdiv.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse16.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x half>, <8 x half>* %x
|
||||
%b = load <8 x half>, <8 x half>* %y
|
||||
%c = fdiv <8 x half> %a, %b
|
||||
store <8 x half> %c, <8 x half>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fdiv_v4f32(<4 x float>* %x, <4 x float>* %y) {
|
||||
; CHECK-LABEL: fdiv_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 4
|
||||
; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; CHECK-NEXT: vle32.v v25, (a0)
|
||||
; CHECK-NEXT: vle32.v v26, (a1)
|
||||
; CHECK-NEXT: vfdiv.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse32.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = load <4 x float>, <4 x float>* %y
|
||||
%c = fdiv <4 x float> %a, %b
|
||||
store <4 x float> %c, <4 x float>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fdiv_v2f64(<2 x double>* %x, <2 x double>* %y) {
|
||||
; CHECK-LABEL: fdiv_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a2, zero, 2
|
||||
; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vle64.v v25, (a0)
|
||||
; CHECK-NEXT: vle64.v v26, (a1)
|
||||
; CHECK-NEXT: vfdiv.vv v25, v25, v26
|
||||
; CHECK-NEXT: vse64.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x double>, <2 x double>* %x
|
||||
%b = load <2 x double>, <2 x double>* %y
|
||||
%c = fdiv <2 x double> %a, %b
|
||||
store <2 x double> %c, <2 x double>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fneg_v8f16(<8 x half>* %x) {
|
||||
; CHECK-LABEL: fneg_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a1, zero, 8
|
||||
; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
|
||||
; CHECK-NEXT: vle16.v v25, (a0)
|
||||
; CHECK-NEXT: vfsgnjn.vv v25, v25, v25
|
||||
; CHECK-NEXT: vse16.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x half>, <8 x half>* %x
|
||||
%b = fneg <8 x half> %a
|
||||
store <8 x half> %b, <8 x half>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fneg_v4f32(<4 x float>* %x) {
|
||||
; CHECK-LABEL: fneg_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a1, zero, 4
|
||||
; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
|
||||
; CHECK-NEXT: vle32.v v25, (a0)
|
||||
; CHECK-NEXT: vfsgnjn.vv v25, v25, v25
|
||||
; CHECK-NEXT: vse32.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = fneg <4 x float> %a
|
||||
store <4 x float> %b, <4 x float>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fneg_v2f64(<2 x double>* %x) {
|
||||
; CHECK-LABEL: fneg_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi a1, zero, 2
|
||||
; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vle64.v v25, (a0)
|
||||
; CHECK-NEXT: vfsgnjn.vv v25, v25, v25
|
||||
; CHECK-NEXT: vse64.v v25, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x double>, <2 x double>* %x
|
||||
%b = fneg <2 x double> %a
|
||||
store <2 x double> %b, <2 x double>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fadd_v16f16(<16 x half>* %x, <16 x half>* %y) {
|
||||
; LMULMAX2-LABEL: fadd_v16f16:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 16
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfadd.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fadd_v16f16:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 8
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfadd.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfadd.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse16.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fadd_v16f16:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 8
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfadd.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfadd.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse16.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <16 x half>, <16 x half>* %x
|
||||
%b = load <16 x half>, <16 x half>* %y
|
||||
%c = fadd <16 x half> %a, %b
|
||||
store <16 x half> %c, <16 x half>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fadd_v8f32(<8 x float>* %x, <8 x float>* %y) {
|
||||
; LMULMAX2-LABEL: fadd_v8f32:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 8
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfadd.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fadd_v8f32:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 4
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfadd.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfadd.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse32.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fadd_v8f32:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 4
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfadd.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfadd.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse32.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <8 x float>, <8 x float>* %x
|
||||
%b = load <8 x float>, <8 x float>* %y
|
||||
%c = fadd <8 x float> %a, %b
|
||||
store <8 x float> %c, <8 x float>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fadd_v4f64(<4 x double>* %x, <4 x double>* %y) {
|
||||
; LMULMAX2-LABEL: fadd_v4f64:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 4
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfadd.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fadd_v4f64:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 2
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfadd.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfadd.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse64.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fadd_v4f64:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 2
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfadd.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfadd.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse64.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <4 x double>, <4 x double>* %x
|
||||
%b = load <4 x double>, <4 x double>* %y
|
||||
%c = fadd <4 x double> %a, %b
|
||||
store <4 x double> %c, <4 x double>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fsub_v16f16(<16 x half>* %x, <16 x half>* %y) {
|
||||
; LMULMAX2-LABEL: fsub_v16f16:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 16
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfsub.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fsub_v16f16:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 8
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfsub.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfsub.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse16.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fsub_v16f16:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 8
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfsub.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfsub.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse16.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <16 x half>, <16 x half>* %x
|
||||
%b = load <16 x half>, <16 x half>* %y
|
||||
%c = fsub <16 x half> %a, %b
|
||||
store <16 x half> %c, <16 x half>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fsub_v8f32(<8 x float>* %x, <8 x float>* %y) {
|
||||
; LMULMAX2-LABEL: fsub_v8f32:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 8
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfsub.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fsub_v8f32:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 4
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfsub.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfsub.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse32.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fsub_v8f32:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 4
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfsub.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfsub.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse32.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <8 x float>, <8 x float>* %x
|
||||
%b = load <8 x float>, <8 x float>* %y
|
||||
%c = fsub <8 x float> %a, %b
|
||||
store <8 x float> %c, <8 x float>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fsub_v4f64(<4 x double>* %x, <4 x double>* %y) {
|
||||
; LMULMAX2-LABEL: fsub_v4f64:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 4
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfsub.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fsub_v4f64:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 2
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfsub.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfsub.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse64.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fsub_v4f64:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 2
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfsub.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfsub.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse64.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <4 x double>, <4 x double>* %x
|
||||
%b = load <4 x double>, <4 x double>* %y
|
||||
%c = fsub <4 x double> %a, %b
|
||||
store <4 x double> %c, <4 x double>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmul_v16f16(<16 x half>* %x, <16 x half>* %y) {
|
||||
; LMULMAX2-LABEL: fmul_v16f16:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 16
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfmul.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fmul_v16f16:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 8
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfmul.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfmul.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse16.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fmul_v16f16:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 8
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfmul.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfmul.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse16.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <16 x half>, <16 x half>* %x
|
||||
%b = load <16 x half>, <16 x half>* %y
|
||||
%c = fmul <16 x half> %a, %b
|
||||
store <16 x half> %c, <16 x half>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmul_v8f32(<8 x float>* %x, <8 x float>* %y) {
|
||||
; LMULMAX2-LABEL: fmul_v8f32:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 8
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfmul.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fmul_v8f32:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 4
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfmul.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfmul.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse32.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fmul_v8f32:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 4
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfmul.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfmul.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse32.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <8 x float>, <8 x float>* %x
|
||||
%b = load <8 x float>, <8 x float>* %y
|
||||
%c = fmul <8 x float> %a, %b
|
||||
store <8 x float> %c, <8 x float>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fmul_v4f64(<4 x double>* %x, <4 x double>* %y) {
|
||||
; LMULMAX2-LABEL: fmul_v4f64:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 4
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfmul.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fmul_v4f64:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 2
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfmul.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfmul.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse64.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fmul_v4f64:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 2
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfmul.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfmul.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse64.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <4 x double>, <4 x double>* %x
|
||||
%b = load <4 x double>, <4 x double>* %y
|
||||
%c = fmul <4 x double> %a, %b
|
||||
store <4 x double> %c, <4 x double>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fdiv_v16f16(<16 x half>* %x, <16 x half>* %y) {
|
||||
; LMULMAX2-LABEL: fdiv_v16f16:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 16
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e16,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfdiv.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fdiv_v16f16:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 8
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfdiv.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfdiv.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse16.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse16.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fdiv_v16f16:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 8
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e16,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle16.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfdiv.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfdiv.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse16.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse16.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <16 x half>, <16 x half>* %x
|
||||
%b = load <16 x half>, <16 x half>* %y
|
||||
%c = fdiv <16 x half> %a, %b
|
||||
store <16 x half> %c, <16 x half>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fdiv_v8f32(<8 x float>* %x, <8 x float>* %y) {
|
||||
; LMULMAX2-LABEL: fdiv_v8f32:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 8
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e32,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfdiv.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fdiv_v8f32:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 4
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfdiv.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfdiv.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse32.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse32.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fdiv_v8f32:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 4
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e32,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle32.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfdiv.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfdiv.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse32.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse32.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <8 x float>, <8 x float>* %x
|
||||
%b = load <8 x float>, <8 x float>* %y
|
||||
%c = fdiv <8 x float> %a, %b
|
||||
store <8 x float> %c, <8 x float>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fdiv_v4f64(<4 x double>* %x, <4 x double>* %y) {
|
||||
; LMULMAX2-LABEL: fdiv_v4f64:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a2, zero, 4
|
||||
; LMULMAX2-NEXT: vsetvli a2, a2, e64,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vfdiv.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV32-LABEL: fdiv_v4f64:
|
||||
; LMULMAX1-RV32: # %bb.0:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, zero, 2
|
||||
; LMULMAX1-RV32-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a1, 16
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v27, (a3)
|
||||
; LMULMAX1-RV32-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX1-RV32-NEXT: vfdiv.vv v26, v26, v27
|
||||
; LMULMAX1-RV32-NEXT: vfdiv.vv v25, v25, v28
|
||||
; LMULMAX1-RV32-NEXT: vse64.v v25, (a0)
|
||||
; LMULMAX1-RV32-NEXT: vse64.v v26, (a2)
|
||||
; LMULMAX1-RV32-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-RV64-LABEL: fdiv_v4f64:
|
||||
; LMULMAX1-RV64: # %bb.0:
|
||||
; LMULMAX1-RV64-NEXT: addi a2, zero, 2
|
||||
; LMULMAX1-RV64-NEXT: vsetvli a2, a2, e64,m1,ta,mu
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a1, 16
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: addi a2, a0, 16
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v27, (a2)
|
||||
; LMULMAX1-RV64-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX1-RV64-NEXT: vfdiv.vv v26, v27, v26
|
||||
; LMULMAX1-RV64-NEXT: vfdiv.vv v25, v25, v28
|
||||
; LMULMAX1-RV64-NEXT: vse64.v v25, (a0)
|
||||
; LMULMAX1-RV64-NEXT: vse64.v v26, (a2)
|
||||
; LMULMAX1-RV64-NEXT: ret
|
||||
%a = load <4 x double>, <4 x double>* %x
|
||||
%b = load <4 x double>, <4 x double>* %y
|
||||
%c = fdiv <4 x double> %a, %b
|
||||
store <4 x double> %c, <4 x double>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fneg_v16f16(<16 x half>* %x) {
|
||||
; LMULMAX2-LABEL: fneg_v16f16:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a1, zero, 16
|
||||
; LMULMAX2-NEXT: vsetvli a1, a1, e16,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vfsgnjn.vv v26, v26, v26
|
||||
; LMULMAX2-NEXT: vse16.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-LABEL: fneg_v16f16:
|
||||
; LMULMAX1: # %bb.0:
|
||||
; LMULMAX1-NEXT: addi a1, zero, 8
|
||||
; LMULMAX1-NEXT: vsetvli a1, a1, e16,m1,ta,mu
|
||||
; LMULMAX1-NEXT: addi a1, a0, 16
|
||||
; LMULMAX1-NEXT: vle16.v v25, (a1)
|
||||
; LMULMAX1-NEXT: vle16.v v26, (a0)
|
||||
; LMULMAX1-NEXT: vfsgnjn.vv v25, v25, v25
|
||||
; LMULMAX1-NEXT: vfsgnjn.vv v26, v26, v26
|
||||
; LMULMAX1-NEXT: vse16.v v26, (a0)
|
||||
; LMULMAX1-NEXT: vse16.v v25, (a1)
|
||||
; LMULMAX1-NEXT: ret
|
||||
%a = load <16 x half>, <16 x half>* %x
|
||||
%b = fneg <16 x half> %a
|
||||
store <16 x half> %b, <16 x half>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fneg_v8f32(<8 x float>* %x) {
|
||||
; LMULMAX2-LABEL: fneg_v8f32:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a1, zero, 8
|
||||
; LMULMAX2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vfsgnjn.vv v26, v26, v26
|
||||
; LMULMAX2-NEXT: vse32.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-LABEL: fneg_v8f32:
|
||||
; LMULMAX1: # %bb.0:
|
||||
; LMULMAX1-NEXT: addi a1, zero, 4
|
||||
; LMULMAX1-NEXT: vsetvli a1, a1, e32,m1,ta,mu
|
||||
; LMULMAX1-NEXT: addi a1, a0, 16
|
||||
; LMULMAX1-NEXT: vle32.v v25, (a1)
|
||||
; LMULMAX1-NEXT: vle32.v v26, (a0)
|
||||
; LMULMAX1-NEXT: vfsgnjn.vv v25, v25, v25
|
||||
; LMULMAX1-NEXT: vfsgnjn.vv v26, v26, v26
|
||||
; LMULMAX1-NEXT: vse32.v v26, (a0)
|
||||
; LMULMAX1-NEXT: vse32.v v25, (a1)
|
||||
; LMULMAX1-NEXT: ret
|
||||
%a = load <8 x float>, <8 x float>* %x
|
||||
%b = fneg <8 x float> %a
|
||||
store <8 x float> %b, <8 x float>* %x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fneg_v4f64(<4 x double>* %x) {
|
||||
; LMULMAX2-LABEL: fneg_v4f64:
|
||||
; LMULMAX2: # %bb.0:
|
||||
; LMULMAX2-NEXT: addi a1, zero, 4
|
||||
; LMULMAX2-NEXT: vsetvli a1, a1, e64,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vfsgnjn.vv v26, v26, v26
|
||||
; LMULMAX2-NEXT: vse64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
;
|
||||
; LMULMAX1-LABEL: fneg_v4f64:
|
||||
; LMULMAX1: # %bb.0:
|
||||
; LMULMAX1-NEXT: addi a1, zero, 2
|
||||
; LMULMAX1-NEXT: vsetvli a1, a1, e64,m1,ta,mu
|
||||
; LMULMAX1-NEXT: addi a1, a0, 16
|
||||
; LMULMAX1-NEXT: vle64.v v25, (a1)
|
||||
; LMULMAX1-NEXT: vle64.v v26, (a0)
|
||||
; LMULMAX1-NEXT: vfsgnjn.vv v25, v25, v25
|
||||
; LMULMAX1-NEXT: vfsgnjn.vv v26, v26, v26
|
||||
; LMULMAX1-NEXT: vse64.v v26, (a0)
|
||||
; LMULMAX1-NEXT: vse64.v v25, (a1)
|
||||
; LMULMAX1-NEXT: ret
|
||||
%a = load <4 x double>, <4 x double>* %x
|
||||
%b = fneg <4 x double> %a
|
||||
store <4 x double> %b, <4 x double>* %x
|
||||
ret void
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue