forked from OSchip/llvm-project
[RISCV] Enable cross basic block aware vsetvli insertion
This patch extends D102737 to allow VL/VTYPE changes to be taken into account before adding an explicit vsetvli. We do this by using a data flow analysis to propagate VL/VTYPE information from predecessors until we've determined a value for every value in the function. We use this information to determine if a vsetvli needs to be inserted before the first vector instruction the block. Differential Revision: https://reviews.llvm.org/D102739
This commit is contained in:
parent
ea91a8cbab
commit
1b47a3de48
|
@ -9,13 +9,17 @@
|
|||
// This file implements a function pass that inserts VSETVLI instructions where
|
||||
// needed.
|
||||
//
|
||||
// The pass consists of a single pass over each basic block looking for changes
|
||||
// in VL/VTYPE usage that requires a vsetvli to be inserted. We assume the
|
||||
// VL/VTYPE values are unknown from predecessors so the first vector instruction
|
||||
// will always require a new VSETVLI.
|
||||
// This pass consists of 3 phases:
|
||||
//
|
||||
// TODO: Future enhancements to this pass will take into account VL/VTYPE from
|
||||
// predecessors.
|
||||
// Phase 1 collects how each basic block affects VL/VTYPE.
|
||||
//
|
||||
// Phase 2 uses the information from phase 1 to do a data flow analysis to
|
||||
// propagate the VL/VTYPE changes through the function. This gives us the
|
||||
// VL/VTYPE at the start of each basic block.
|
||||
//
|
||||
// Phase 3 inserts VSETVLI instructions in each basic block. Information from
|
||||
// phase 2 is used to prevent inserting a VSETVLI before the first vector
|
||||
// instruction in the block if possible.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
@ -23,6 +27,7 @@
|
|||
#include "RISCVSubtarget.h"
|
||||
#include "llvm/CodeGen/LiveIntervals.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include <queue>
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "riscv-insert-vsetvli"
|
||||
|
@ -52,6 +57,12 @@ class VSETVLIInfo {
|
|||
public:
|
||||
VSETVLIInfo() : AVLImm(0) {}
|
||||
|
||||
static VSETVLIInfo getUnknown() {
|
||||
VSETVLIInfo Info;
|
||||
Info.setUnknown();
|
||||
return Info;
|
||||
}
|
||||
|
||||
bool isValid() const { return State != Uninitialized; }
|
||||
void setUnknown() { State = Unknown; }
|
||||
bool isUnknown() const { return State == Unknown; }
|
||||
|
@ -148,12 +159,89 @@ public:
|
|||
|
||||
return getAVLReg() == Other.getAVLReg();
|
||||
}
|
||||
|
||||
bool operator==(const VSETVLIInfo &Other) const {
|
||||
// Uninitialized is only equal to another Uninitialized.
|
||||
if (!isValid())
|
||||
return !Other.isValid();
|
||||
if (!Other.isValid())
|
||||
return !isValid();
|
||||
|
||||
// Unknown is only equal to another Unknown.
|
||||
if (isUnknown())
|
||||
return Other.isUnknown();
|
||||
if (Other.isUnknown())
|
||||
return isUnknown();
|
||||
|
||||
// Otherwise compare the VTYPE and AVL.
|
||||
return hasSameVTYPE(Other) && hasSameAVL(Other);
|
||||
}
|
||||
|
||||
bool operator!=(const VSETVLIInfo &Other) const { return !(*this == Other); }
|
||||
|
||||
// Calculate the VSETVLIInfo visible to a block assuming this and Other are
|
||||
// both predecessors.
|
||||
VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
|
||||
// If the new value isn't valid, ignore it.
|
||||
if (!Other.isValid())
|
||||
return *this;
|
||||
|
||||
// If this value isn't valid, this must be the first predecessor, use it.
|
||||
if (!isValid())
|
||||
return Other;
|
||||
|
||||
if (*this == Other)
|
||||
return *this;
|
||||
|
||||
// If the configurations don't match, assume unknown.
|
||||
return VSETVLIInfo::getUnknown();
|
||||
}
|
||||
|
||||
// Calculate the VSETVLIInfo visible at the end of the block assuming this
|
||||
// is the predecessor value, and Other is change for this block.
|
||||
VSETVLIInfo merge(const VSETVLIInfo &Other) const {
|
||||
assert(isValid() && "Can only merge with a valid VSETVLInfo");
|
||||
|
||||
// Nothing changed from the predecessor, keep it.
|
||||
if (!Other.isValid())
|
||||
return *this;
|
||||
|
||||
// If the change is compatible with the input, we won't create a VSETVLI
|
||||
// and should keep the predecessor.
|
||||
if (isCompatible(Other))
|
||||
return *this;
|
||||
|
||||
// Otherwise just use whatever is in this block.
|
||||
return Other;
|
||||
}
|
||||
};
|
||||
|
||||
struct BlockData {
|
||||
// The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
|
||||
// made by this block. Calculated in Phase 1.
|
||||
VSETVLIInfo Change;
|
||||
|
||||
// The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
|
||||
// block. Calculated in Phase 2.
|
||||
VSETVLIInfo Exit;
|
||||
|
||||
// The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
|
||||
// blocks. Calculated in Phase 2, and used by Phase 3.
|
||||
VSETVLIInfo Pred;
|
||||
|
||||
// Keeps track of whether the block is already in the queue.
|
||||
bool InQueue = false;
|
||||
|
||||
BlockData() {}
|
||||
};
|
||||
|
||||
class RISCVInsertVSETVLI : public MachineFunctionPass {
|
||||
const TargetInstrInfo *TII;
|
||||
MachineRegisterInfo *MRI;
|
||||
|
||||
std::vector<BlockData> BlockInfo;
|
||||
std::queue<const MachineBasicBlock *> WorkList;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
|
@ -170,10 +258,13 @@ public:
|
|||
StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
|
||||
|
||||
private:
|
||||
bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo);
|
||||
void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
|
||||
const VSETVLIInfo &Info);
|
||||
|
||||
bool emitVSETVLIs(MachineBasicBlock &MBB);
|
||||
bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
|
||||
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
|
||||
void emitVSETVLIs(MachineBasicBlock &MBB);
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
@ -276,7 +367,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
|
|||
|
||||
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
|
||||
// VSETIVLI instruction.
|
||||
VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
|
||||
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
|
||||
VSETVLIInfo NewInfo;
|
||||
if (MI.getOpcode() == RISCV::PseudoVSETVLI) {
|
||||
Register AVLReg = MI.getOperand(1).getReg();
|
||||
|
@ -292,12 +383,111 @@ VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
|
|||
return NewInfo;
|
||||
}
|
||||
|
||||
bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
|
||||
bool MadeChange = false;
|
||||
bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
|
||||
const VSETVLIInfo &CurInfo) {
|
||||
if (CurInfo.isCompatible(Require))
|
||||
return false;
|
||||
|
||||
// Assume predecessor state is unknown.
|
||||
// We didn't find a compatible value. If our AVL is a virtual register,
|
||||
// it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
|
||||
// and the last VL/VTYPE we observed is the same, we don't need a
|
||||
// VSETVLI here.
|
||||
if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
|
||||
Require.getAVLReg().isVirtual() && Require.hasSameVTYPE(CurInfo)) {
|
||||
if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
|
||||
if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
|
||||
DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {
|
||||
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
|
||||
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
|
||||
bool HadVectorOp = false;
|
||||
|
||||
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
|
||||
for (const MachineInstr &MI : MBB) {
|
||||
// If this is an explicit VSETVLI or VSETIVLI, update our state.
|
||||
if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
|
||||
MI.getOpcode() == RISCV::PseudoVSETIVLI) {
|
||||
HadVectorOp = true;
|
||||
BBInfo.Change = getInfoForVSETVLI(MI);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t TSFlags = MI.getDesc().TSFlags;
|
||||
if (RISCVII::hasSEWOp(TSFlags)) {
|
||||
HadVectorOp = true;
|
||||
|
||||
VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
|
||||
|
||||
if (!BBInfo.Change.isValid()) {
|
||||
BBInfo.Change = NewInfo;
|
||||
} else {
|
||||
// If this instruction isn't compatible with the previous VL/VTYPE
|
||||
// we need to insert a VSETVLI.
|
||||
if (needVSETVLI(NewInfo, BBInfo.Change))
|
||||
BBInfo.Change = NewInfo;
|
||||
}
|
||||
}
|
||||
|
||||
// If this is something that updates VL/VTYPE that we don't know about, set
|
||||
// the state to unknown.
|
||||
if (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
|
||||
MI.modifiesRegister(RISCV::VTYPE)) {
|
||||
BBInfo.Change = VSETVLIInfo::getUnknown();
|
||||
}
|
||||
}
|
||||
|
||||
// Initial exit state is whatever change we found in the block.
|
||||
BBInfo.Exit = BBInfo.Change;
|
||||
|
||||
return HadVectorOp;
|
||||
}
|
||||
|
||||
void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
|
||||
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
|
||||
|
||||
BBInfo.InQueue = false;
|
||||
|
||||
VSETVLIInfo InInfo;
|
||||
if (MBB.pred_empty()) {
|
||||
// There are no predecessors, so use the default starting status.
|
||||
InInfo.setUnknown();
|
||||
} else {
|
||||
for (MachineBasicBlock *P : MBB.predecessors())
|
||||
InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
|
||||
}
|
||||
|
||||
// If we don't have any valid predecessor value, wait until we do.
|
||||
if (!InInfo.isValid())
|
||||
return;
|
||||
|
||||
BBInfo.Pred = InInfo;
|
||||
|
||||
VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change);
|
||||
|
||||
// If the new exit value matches the old exit value, we don't need to revisit
|
||||
// any blocks.
|
||||
if (BBInfo.Exit == TmpStatus)
|
||||
return;
|
||||
|
||||
BBInfo.Exit = TmpStatus;
|
||||
|
||||
// Add the successors to the work list so we can propagate the changed exit
|
||||
// status.
|
||||
for (MachineBasicBlock *S : MBB.successors())
|
||||
if (!BlockInfo[S->getNumber()].InQueue)
|
||||
WorkList.push(S);
|
||||
}
|
||||
|
||||
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
|
||||
VSETVLIInfo CurInfo;
|
||||
CurInfo.setUnknown();
|
||||
|
||||
for (MachineInstr &MI : MBB) {
|
||||
// If this is an explicit VSETVLI or VSETIVLI, update our state.
|
||||
|
@ -309,7 +499,6 @@ bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
|
|||
"Unexpected operands where VL and VTYPE should be");
|
||||
MI.getOperand(3).setIsDead(false);
|
||||
MI.getOperand(4).setIsDead(false);
|
||||
MadeChange = true;
|
||||
CurInfo = getInfoForVSETVLI(MI);
|
||||
continue;
|
||||
}
|
||||
|
@ -330,47 +519,32 @@ bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
|
|||
MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
|
||||
/*isImp*/ true));
|
||||
|
||||
bool NeedVSETVLI = true;
|
||||
if (CurInfo.isValid() && CurInfo.isCompatible(NewInfo))
|
||||
NeedVSETVLI = false;
|
||||
|
||||
// We didn't find a compatible value. If our AVL is a virtual register,
|
||||
// it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
|
||||
// and the last VL/VTYPE we observed is the same, we don't need a
|
||||
// VSETVLI here.
|
||||
if (NeedVSETVLI && !CurInfo.isUnknown() && NewInfo.hasAVLReg() &&
|
||||
NewInfo.getAVLReg().isVirtual() && NewInfo.hasSameVTYPE(CurInfo)) {
|
||||
if (MachineInstr *DefMI = MRI->getVRegDef(NewInfo.getAVLReg())) {
|
||||
if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
|
||||
DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {
|
||||
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
|
||||
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo))
|
||||
NeedVSETVLI = false;
|
||||
}
|
||||
if (!CurInfo.isValid()) {
|
||||
// We haven't found any vector instructions or VL/VTYPE changes yet,
|
||||
// use the predecessor information.
|
||||
assert(BlockInfo[MBB.getNumber()].Pred.isValid() &&
|
||||
"Expected a valid predecessor state.");
|
||||
if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred)) {
|
||||
insertVSETVLI(MBB, MI, NewInfo);
|
||||
CurInfo = NewInfo;
|
||||
}
|
||||
} else {
|
||||
// If this instruction isn't compatible with the previous VL/VTYPE
|
||||
// we need to insert a VSETVLI.
|
||||
if (needVSETVLI(NewInfo, CurInfo)) {
|
||||
insertVSETVLI(MBB, MI, NewInfo);
|
||||
CurInfo = NewInfo;
|
||||
}
|
||||
}
|
||||
|
||||
// If this instruction isn't compatible with the previous VL/VTYPE
|
||||
// we need to insert a VSETVLI.
|
||||
if (NeedVSETVLI) {
|
||||
insertVSETVLI(MBB, MI, NewInfo);
|
||||
CurInfo = NewInfo;
|
||||
}
|
||||
|
||||
// If we find an instruction we at least changed the operands.
|
||||
MadeChange = true;
|
||||
}
|
||||
|
||||
// If this is something updates VL/VTYPE that we don't know about, set
|
||||
// the state to unknown.
|
||||
if (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
|
||||
MI.modifiesRegister(RISCV::VTYPE)) {
|
||||
VSETVLIInfo NewInfo;
|
||||
NewInfo.setUnknown();
|
||||
CurInfo = NewInfo;
|
||||
CurInfo = VSETVLIInfo::getUnknown();
|
||||
}
|
||||
}
|
||||
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
@ -382,12 +556,41 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
|
|||
TII = ST.getInstrInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
|
||||
bool Changed = false;
|
||||
assert(BlockInfo.empty() && "Expect empty block infos");
|
||||
BlockInfo.resize(MF.getNumBlockIDs());
|
||||
|
||||
for (MachineBasicBlock &MBB : MF)
|
||||
Changed |= emitVSETVLIs(MBB);
|
||||
bool HaveVectorOp = false;
|
||||
|
||||
return Changed;
|
||||
// Phase 1 - determine how VL/VTYPE are affected by the each block.
|
||||
for (const MachineBasicBlock &MBB : MF)
|
||||
HaveVectorOp |= computeVLVTYPEChanges(MBB);
|
||||
|
||||
// If we didn't find any instructions that need VSETVLI, we're done.
|
||||
if (HaveVectorOp) {
|
||||
// Phase 2 - determine the exit VL/VTYPE from each block. We add all
|
||||
// blocks to the list here, but will also add any that need to be revisited
|
||||
// during Phase 2 processing.
|
||||
for (const MachineBasicBlock &MBB : MF) {
|
||||
WorkList.push(&MBB);
|
||||
BlockInfo[MBB.getNumber()].InQueue = true;
|
||||
}
|
||||
while (!WorkList.empty()) {
|
||||
const MachineBasicBlock &MBB = *WorkList.front();
|
||||
WorkList.pop();
|
||||
computeIncomingVLVTYPE(MBB);
|
||||
}
|
||||
|
||||
// Phase 3 - add any vsetvli instructions needed in the block. Use the
|
||||
// Phase 2 information to avoid adding vsetvlis before the first vector
|
||||
// instruction in the block if the VL/VTYPE is satisfied by its
|
||||
// predecessors.
|
||||
for (MachineBasicBlock &MBB : MF)
|
||||
emitVSETVLIs(MBB);
|
||||
}
|
||||
|
||||
BlockInfo.clear();
|
||||
|
||||
return HaveVectorOp;
|
||||
}
|
||||
|
||||
/// Returns an instance of the Insert VSETVLI pass.
|
||||
|
|
|
@ -3667,11 +3667,10 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
|
||||
; LMULMAX2-RV32-NEXT: lui a1, 61681
|
||||
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
|
||||
; LMULMAX2-RV32-NEXT: lui a2, 4112
|
||||
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
|
||||
; LMULMAX2-RV32-NEXT: lui a1, 4112
|
||||
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
|
||||
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2
|
||||
; LMULMAX2-RV32-NEXT: # %bb.1:
|
||||
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
|
@ -3723,14 +3722,12 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
|
||||
; LMULMAX2-RV32-NEXT: .LBB3_3:
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 0(sp)
|
||||
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vslidedown.vi v25, v25, 1
|
||||
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
|
||||
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 0(sp)
|
||||
; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_5
|
||||
; LMULMAX2-RV32-NEXT: # %bb.4:
|
||||
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
|
@ -3758,8 +3755,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: addi a1, a1, 32
|
||||
; LMULMAX2-RV32-NEXT: j .LBB3_6
|
||||
; LMULMAX2-RV32-NEXT: .LBB3_5:
|
||||
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a5, a1
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
|
||||
|
@ -3902,11 +3899,10 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: addi a3, a1, 819
|
||||
; LMULMAX1-RV32-NEXT: lui a1, 61681
|
||||
; LMULMAX1-RV32-NEXT: addi a7, a1, -241
|
||||
; LMULMAX1-RV32-NEXT: lui a2, 4112
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a2, 257
|
||||
; LMULMAX1-RV32-NEXT: lui a1, 4112
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a1, 257
|
||||
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2
|
||||
; LMULMAX1-RV32-NEXT: # %bb.1:
|
||||
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
|
||||
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a5
|
||||
|
@ -3958,14 +3954,12 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: mul a1, a1, a2
|
||||
; LMULMAX1-RV32-NEXT: srli a5, a1, 24
|
||||
; LMULMAX1-RV32-NEXT: .LBB3_3:
|
||||
; LMULMAX1-RV32-NEXT: sw a5, 0(sp)
|
||||
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1
|
||||
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26
|
||||
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX1-RV32-NEXT: sw a5, 0(sp)
|
||||
; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_5
|
||||
; LMULMAX1-RV32-NEXT: # %bb.4:
|
||||
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
|
||||
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a5
|
||||
|
@ -3993,8 +3987,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
|
||||
; LMULMAX1-RV32-NEXT: j .LBB3_6
|
||||
; LMULMAX1-RV32-NEXT: .LBB3_5:
|
||||
; LMULMAX1-RV32-NEXT: srli a1, a5, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a5, a1
|
||||
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a5
|
||||
; LMULMAX1-RV32-NEXT: srli a5, a1, 2
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a5
|
||||
; LMULMAX1-RV32-NEXT: srli a5, a1, 4
|
||||
|
@ -11124,11 +11118,10 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
|
||||
; LMULMAX2-RV32-NEXT: lui a1, 61681
|
||||
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
|
||||
; LMULMAX2-RV32-NEXT: lui a2, 4112
|
||||
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
|
||||
; LMULMAX2-RV32-NEXT: lui a1, 4112
|
||||
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
|
||||
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2
|
||||
; LMULMAX2-RV32-NEXT: # %bb.1:
|
||||
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
|
@ -11180,14 +11173,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
|
||||
; LMULMAX2-RV32-NEXT: .LBB7_3:
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 0(sp)
|
||||
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 3
|
||||
; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30
|
||||
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 0(sp)
|
||||
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_5
|
||||
; LMULMAX2-RV32-NEXT: # %bb.4:
|
||||
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
|
@ -11215,8 +11206,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
|
||||
; LMULMAX2-RV32-NEXT: j .LBB7_6
|
||||
; LMULMAX2-RV32-NEXT: .LBB7_5:
|
||||
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a5, a1
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
|
||||
|
@ -11239,14 +11230,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
|
||||
; LMULMAX2-RV32-NEXT: .LBB7_6:
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 24(sp)
|
||||
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2
|
||||
; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30
|
||||
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 24(sp)
|
||||
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_8
|
||||
; LMULMAX2-RV32-NEXT: # %bb.7:
|
||||
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
|
@ -11274,8 +11263,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
|
||||
; LMULMAX2-RV32-NEXT: j .LBB7_9
|
||||
; LMULMAX2-RV32-NEXT: .LBB7_8:
|
||||
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a5, a1
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
|
||||
|
@ -11298,14 +11287,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
|
||||
; LMULMAX2-RV32-NEXT: .LBB7_9:
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 16(sp)
|
||||
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1
|
||||
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
|
||||
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 16(sp)
|
||||
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_11
|
||||
; LMULMAX2-RV32-NEXT: # %bb.10:
|
||||
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
|
@ -11333,8 +11320,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: addi a1, a1, 32
|
||||
; LMULMAX2-RV32-NEXT: j .LBB7_12
|
||||
; LMULMAX2-RV32-NEXT: .LBB7_11:
|
||||
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a5, a1
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
|
||||
; LMULMAX2-RV32-NEXT: or a1, a1, a5
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
|
||||
|
@ -11552,11 +11539,10 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: addi a4, a2, 819
|
||||
; LMULMAX1-RV32-NEXT: lui a2, 61681
|
||||
; LMULMAX1-RV32-NEXT: addi t0, a2, -241
|
||||
; LMULMAX1-RV32-NEXT: lui a3, 4112
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a3, 257
|
||||
; LMULMAX1-RV32-NEXT: lui a2, 4112
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a2, 257
|
||||
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2
|
||||
; LMULMAX1-RV32-NEXT: # %bb.1:
|
||||
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a2
|
||||
|
@ -11608,14 +11594,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: mul a1, a1, a3
|
||||
; LMULMAX1-RV32-NEXT: srli a1, a1, 24
|
||||
; LMULMAX1-RV32-NEXT: .LBB7_3:
|
||||
; LMULMAX1-RV32-NEXT: sw a1, 16(sp)
|
||||
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vslidedown.vi v26, v26, 1
|
||||
; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27
|
||||
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v27
|
||||
; LMULMAX1-RV32-NEXT: sw a1, 16(sp)
|
||||
; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_5
|
||||
; LMULMAX1-RV32-NEXT: # %bb.4:
|
||||
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a2
|
||||
|
@ -11643,8 +11627,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
|
||||
; LMULMAX1-RV32-NEXT: j .LBB7_6
|
||||
; LMULMAX1-RV32-NEXT: .LBB7_5:
|
||||
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a2
|
||||
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a2, a1
|
||||
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a2
|
||||
; LMULMAX1-RV32-NEXT: srli a2, a1, 4
|
||||
|
@ -11669,13 +11653,11 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: .LBB7_6:
|
||||
; LMULMAX1-RV32-NEXT: sw a1, 24(sp)
|
||||
; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
|
||||
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
|
||||
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
|
||||
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8
|
||||
; LMULMAX1-RV32-NEXT: # %bb.7:
|
||||
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
|
||||
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a2
|
||||
|
@ -11727,14 +11709,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: mul a1, a1, a3
|
||||
; LMULMAX1-RV32-NEXT: srli a1, a1, 24
|
||||
; LMULMAX1-RV32-NEXT: .LBB7_9:
|
||||
; LMULMAX1-RV32-NEXT: sw a1, 0(sp)
|
||||
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1
|
||||
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26
|
||||
; LMULMAX1-RV32-NEXT: sw a1, 0(sp)
|
||||
; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_11
|
||||
; LMULMAX1-RV32-NEXT: # %bb.10:
|
||||
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
|
||||
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a2
|
||||
|
@ -11762,8 +11742,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
|
||||
; LMULMAX1-RV32-NEXT: j .LBB7_12
|
||||
; LMULMAX1-RV32-NEXT: .LBB7_11:
|
||||
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a2
|
||||
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
|
||||
; LMULMAX1-RV32-NEXT: or a1, a2, a1
|
||||
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
|
||||
; LMULMAX1-RV32-NEXT: or a1, a1, a2
|
||||
; LMULMAX1-RV32-NEXT: srli a2, a1, 4
|
||||
|
|
|
@ -2592,7 +2592,6 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25
|
||||
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5
|
||||
; LMULMAX2-RV32-NEXT: # %bb.4:
|
||||
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vsrl.vx v25, v25, a6
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
|
||||
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
|
||||
|
@ -2774,7 +2773,6 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25
|
||||
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5
|
||||
; LMULMAX1-RV32-NEXT: # %bb.4:
|
||||
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
|
||||
; LMULMAX1-RV32-NEXT: addi a5, a1, -1
|
||||
|
@ -7655,9 +7653,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
|
||||
; LMULMAX2-RV32-NEXT: lui a1, 61681
|
||||
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
|
||||
; LMULMAX2-RV32-NEXT: lui a2, 4112
|
||||
; LMULMAX2-RV32-NEXT: lui a1, 4112
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
|
||||
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
|
||||
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
|
||||
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2
|
||||
; LMULMAX2-RV32-NEXT: # %bb.1:
|
||||
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
|
||||
|
@ -7703,7 +7701,6 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
|
||||
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5
|
||||
; LMULMAX2-RV32-NEXT: # %bb.4:
|
||||
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
|
||||
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
|
||||
|
@ -7740,13 +7737,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
|
||||
; LMULMAX2-RV32-NEXT: .LBB7_6:
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 24(sp)
|
||||
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
|
||||
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 24(sp)
|
||||
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_8
|
||||
; LMULMAX2-RV32-NEXT: # %bb.7:
|
||||
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
|
||||
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
|
||||
|
@ -7767,9 +7762,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
|
||||
; LMULMAX2-RV32-NEXT: j .LBB7_9
|
||||
; LMULMAX2-RV32-NEXT: .LBB7_8:
|
||||
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
|
||||
; LMULMAX2-RV32-NEXT: not a5, a5
|
||||
; LMULMAX2-RV32-NEXT: and a1, a5, a1
|
||||
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
|
||||
; LMULMAX2-RV32-NEXT: not a1, a1
|
||||
; LMULMAX2-RV32-NEXT: and a1, a1, a5
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: and a5, a5, a4
|
||||
; LMULMAX2-RV32-NEXT: sub a1, a1, a5
|
||||
|
@ -7783,13 +7778,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
|
||||
; LMULMAX2-RV32-NEXT: .LBB7_9:
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 16(sp)
|
||||
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
|
||||
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX2-RV32-NEXT: sw a5, 16(sp)
|
||||
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_11
|
||||
; LMULMAX2-RV32-NEXT: # %bb.10:
|
||||
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
|
||||
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v26, a6
|
||||
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
|
||||
|
@ -7810,9 +7803,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-RV32-NEXT: addi a1, a1, 32
|
||||
; LMULMAX2-RV32-NEXT: j .LBB7_12
|
||||
; LMULMAX2-RV32-NEXT: .LBB7_11:
|
||||
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
|
||||
; LMULMAX2-RV32-NEXT: not a5, a5
|
||||
; LMULMAX2-RV32-NEXT: and a1, a5, a1
|
||||
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
|
||||
; LMULMAX2-RV32-NEXT: not a1, a1
|
||||
; LMULMAX2-RV32-NEXT: and a1, a1, a5
|
||||
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
|
||||
; LMULMAX2-RV32-NEXT: and a4, a5, a4
|
||||
; LMULMAX2-RV32-NEXT: sub a1, a1, a4
|
||||
|
@ -7978,9 +7971,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: addi a4, a1, 819
|
||||
; LMULMAX1-RV32-NEXT: lui a1, 61681
|
||||
; LMULMAX1-RV32-NEXT: addi t0, a1, -241
|
||||
; LMULMAX1-RV32-NEXT: lui a3, 4112
|
||||
; LMULMAX1-RV32-NEXT: lui a2, 4112
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a3, 257
|
||||
; LMULMAX1-RV32-NEXT: addi a3, a2, 257
|
||||
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2
|
||||
; LMULMAX1-RV32-NEXT: # %bb.1:
|
||||
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
|
@ -8026,7 +8019,6 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5
|
||||
; LMULMAX1-RV32-NEXT: # %bb.4:
|
||||
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v26, a6
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
|
||||
|
@ -8065,12 +8057,10 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: .LBB7_6:
|
||||
; LMULMAX1-RV32-NEXT: sw a1, 24(sp)
|
||||
; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
|
||||
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
|
||||
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
|
||||
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
|
||||
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8
|
||||
; LMULMAX1-RV32-NEXT: # %bb.7:
|
||||
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
|
||||
|
@ -8107,13 +8097,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: mul a1, a1, a3
|
||||
; LMULMAX1-RV32-NEXT: srli a1, a1, 24
|
||||
; LMULMAX1-RV32-NEXT: .LBB7_9:
|
||||
; LMULMAX1-RV32-NEXT: sw a1, 0(sp)
|
||||
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
|
||||
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25
|
||||
; LMULMAX1-RV32-NEXT: sw a1, 0(sp)
|
||||
; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_11
|
||||
; LMULMAX1-RV32-NEXT: # %bb.10:
|
||||
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
|
||||
; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6
|
||||
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
|
||||
|
@ -8134,9 +8122,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
|
||||
; LMULMAX1-RV32-NEXT: j .LBB7_12
|
||||
; LMULMAX1-RV32-NEXT: .LBB7_11:
|
||||
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
|
||||
; LMULMAX1-RV32-NEXT: not a1, a1
|
||||
; LMULMAX1-RV32-NEXT: and a1, a1, a2
|
||||
; LMULMAX1-RV32-NEXT: addi a1, a2, -1
|
||||
; LMULMAX1-RV32-NEXT: not a2, a2
|
||||
; LMULMAX1-RV32-NEXT: and a1, a2, a1
|
||||
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
|
||||
; LMULMAX1-RV32-NEXT: and a2, a2, a5
|
||||
; LMULMAX1-RV32-NEXT: sub a1, a1, a2
|
||||
|
|
|
@ -3959,7 +3959,6 @@ define void @extract_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX2-NEXT: vsetivli a2, 4, e64,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vle64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX2-NEXT: vsetivli a1, 4, e64,m2,ta,mu
|
||||
; LMULMAX2-NEXT: vadd.vv v26, v26, v28
|
||||
; LMULMAX2-NEXT: vse64.v v26, (a0)
|
||||
; LMULMAX2-NEXT: ret
|
||||
|
@ -3973,7 +3972,6 @@ define void @extract_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
|
|||
; LMULMAX1-NEXT: vle64.v v27, (a1)
|
||||
; LMULMAX1-NEXT: addi a1, a1, 16
|
||||
; LMULMAX1-NEXT: vle64.v v28, (a1)
|
||||
; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu
|
||||
; LMULMAX1-NEXT: vadd.vv v26, v26, v28
|
||||
; LMULMAX1-NEXT: vadd.vv v25, v25, v27
|
||||
; LMULMAX1-NEXT: vse64.v v25, (a0)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,447 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+experimental-v \
|
||||
; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s
|
||||
|
||||
; The following tests check whether inserting VSETVLI avoids inserting
|
||||
; unneeded vsetvlis across basic blocks.
|
||||
|
||||
declare i64 @llvm.riscv.vsetvli(i64, i64, i64)
|
||||
|
||||
declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, i64)
|
||||
declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64)
|
||||
|
||||
declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, i64)
|
||||
|
||||
declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, i64)
|
||||
|
||||
declare <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(double, i64)
|
||||
declare <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(float, i64)
|
||||
|
||||
declare void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>* nocapture, i64)
|
||||
declare void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>* nocapture, i64)
|
||||
|
||||
define <vscale x 1 x double> @test1(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: beqz a1, .LBB0_2
|
||||
; CHECK-NEXT: # %bb.1: # %if.then
|
||||
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB0_2: # %if.else
|
||||
; CHECK-NEXT: vfsub.vv v8, v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
|
||||
%tobool = icmp eq i8 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
|
||||
ret <vscale x 1 x double> %c.0
|
||||
}
|
||||
|
||||
@scratch = global i8 0, align 16
|
||||
|
||||
define <vscale x 1 x double> @test2(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: beqz a1, .LBB1_2
|
||||
; CHECK-NEXT: # %bb.1: # %if.then
|
||||
; CHECK-NEXT: vfadd.vv v25, v8, v9
|
||||
; CHECK-NEXT: vfmul.vv v8, v25, v8
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB1_2: # %if.else
|
||||
; CHECK-NEXT: vfsub.vv v25, v8, v9
|
||||
; CHECK-NEXT: vfmul.vv v8, v25, v8
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
|
||||
%tobool = icmp eq i8 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
|
||||
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 %0)
|
||||
ret <vscale x 1 x double> %3
|
||||
}
|
||||
|
||||
; FIXME: The last vsetvli is redundant, but we need to look through a phi to
|
||||
; prove it.
|
||||
define <vscale x 1 x double> @test3(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: beqz a1, .LBB2_2
|
||||
; CHECK-NEXT: # %bb.1: # %if.then
|
||||
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vfadd.vv v25, v8, v9
|
||||
; CHECK-NEXT: j .LBB2_3
|
||||
; CHECK-NEXT: .LBB2_2: # %if.else
|
||||
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vfsub.vv v25, v8, v9
|
||||
; CHECK-NEXT: .LBB2_3: # %if.end
|
||||
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vfmul.vv v8, v25, v8
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%tobool = icmp eq i8 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
|
||||
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%2 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
|
||||
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %2)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%vl.0 = phi i64 [ %0, %if.then], [ %2, %if.else ]
|
||||
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %3, %if.else ]
|
||||
%4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 %vl.0)
|
||||
ret <vscale x 1 x double> %4
|
||||
}
|
||||
|
||||
define <vscale x 1 x double> @test4(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %l, <vscale x 1 x double> %r) nounwind {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: beqz a1, .LBB3_2
|
||||
; CHECK-NEXT: # %bb.1: # %if.then
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
|
||||
; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_0)
|
||||
; CHECK-NEXT: vsetvli a2, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vlse64.v v25, (a1), zero
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI3_1)
|
||||
; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_1)
|
||||
; CHECK-NEXT: vlse64.v v26, (a1), zero
|
||||
; CHECK-NEXT: vfadd.vv v25, v25, v26
|
||||
; CHECK-NEXT: lui a1, %hi(scratch)
|
||||
; CHECK-NEXT: addi a1, a1, %lo(scratch)
|
||||
; CHECK-NEXT: vse64.v v25, (a1)
|
||||
; CHECK-NEXT: j .LBB3_3
|
||||
; CHECK-NEXT: .LBB3_2: # %if.else
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI3_2)
|
||||
; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_2)
|
||||
; CHECK-NEXT: vsetvli a2, a0, e32,m1,ta,mu
|
||||
; CHECK-NEXT: vlse32.v v25, (a1), zero
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI3_3)
|
||||
; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_3)
|
||||
; CHECK-NEXT: vlse32.v v26, (a1), zero
|
||||
; CHECK-NEXT: vfadd.vv v25, v25, v26
|
||||
; CHECK-NEXT: lui a1, %hi(scratch)
|
||||
; CHECK-NEXT: addi a1, a1, %lo(scratch)
|
||||
; CHECK-NEXT: vse32.v v25, (a1)
|
||||
; CHECK-NEXT: .LBB3_3: # %if.end
|
||||
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vfmul.vv v8, v8, v9
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%tobool = icmp eq i8 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%0 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 1.000000e+00, i64 %avl)
|
||||
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 2.000000e+00, i64 %avl)
|
||||
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 %avl)
|
||||
%3 = bitcast i8* @scratch to <vscale x 1 x double>*
|
||||
tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %2, <vscale x 1 x double>* %3, i64 %avl)
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%4 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 1.000000e+00, i64 %avl)
|
||||
%5 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 2.000000e+00, i64 %avl)
|
||||
%6 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> %4, <vscale x 2 x float> %5, i64 %avl)
|
||||
%7 = bitcast i8* @scratch to <vscale x 2 x float>*
|
||||
tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %6, <vscale x 2 x float>* %7, i64 %avl)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%8 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %l, <vscale x 1 x double> %r, i64 %avl)
|
||||
ret <vscale x 1 x double> %8
|
||||
}
|
||||
|
||||
define <vscale x 1 x double> @test5(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: andi a2, a1, 1
|
||||
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: bnez a2, .LBB4_3
|
||||
; CHECK-NEXT: # %bb.1: # %if.else
|
||||
; CHECK-NEXT: vfsub.vv v25, v8, v9
|
||||
; CHECK-NEXT: andi a0, a1, 2
|
||||
; CHECK-NEXT: beqz a0, .LBB4_4
|
||||
; CHECK-NEXT: .LBB4_2: # %if.then4
|
||||
; CHECK-NEXT: vfmul.vv v8, v25, v8
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB4_3: # %if.then
|
||||
; CHECK-NEXT: vfadd.vv v25, v8, v9
|
||||
; CHECK-NEXT: andi a0, a1, 2
|
||||
; CHECK-NEXT: bnez a0, .LBB4_2
|
||||
; CHECK-NEXT: .LBB4_4: # %if.else5
|
||||
; CHECK-NEXT: vfmul.vv v8, v8, v25
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
|
||||
%conv = zext i8 %cond to i32
|
||||
%and = and i32 %conv, 1
|
||||
%tobool = icmp eq i32 %and, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
|
||||
%and2 = and i32 %conv, 2
|
||||
%tobool3 = icmp eq i32 %and2, 0
|
||||
br i1 %tobool3, label %if.else5, label %if.then4
|
||||
|
||||
if.then4: ; preds = %if.end
|
||||
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 %0)
|
||||
br label %if.end6
|
||||
|
||||
if.else5: ; preds = %if.end
|
||||
%4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %c.0, i64 %0)
|
||||
br label %if.end6
|
||||
|
||||
if.end6: ; preds = %if.else5, %if.then4
|
||||
%c.1 = phi <vscale x 1 x double> [ %3, %if.then4 ], [ %4, %if.else5 ]
|
||||
ret <vscale x 1 x double> %c.1
|
||||
}
|
||||
|
||||
; FIXME: The explicit vsetvli in if.then4 could be removed as it is redundant
|
||||
; with the one in the entry, but we lack the ability to remove explicit
|
||||
; vsetvli instructions.
|
||||
define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: andi a3, a1, 1
|
||||
; CHECK-NEXT: vsetvli a2, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: bnez a3, .LBB5_3
|
||||
; CHECK-NEXT: # %bb.1: # %if.else
|
||||
; CHECK-NEXT: vfsub.vv v25, v8, v9
|
||||
; CHECK-NEXT: andi a1, a1, 2
|
||||
; CHECK-NEXT: beqz a1, .LBB5_4
|
||||
; CHECK-NEXT: .LBB5_2: # %if.then4
|
||||
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
|
||||
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
|
||||
; CHECK-NEXT: vlse64.v v26, (a0), zero
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI5_1)
|
||||
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_1)
|
||||
; CHECK-NEXT: vlse64.v v27, (a0), zero
|
||||
; CHECK-NEXT: vfadd.vv v26, v26, v27
|
||||
; CHECK-NEXT: lui a0, %hi(scratch)
|
||||
; CHECK-NEXT: addi a0, a0, %lo(scratch)
|
||||
; CHECK-NEXT: vse64.v v26, (a0)
|
||||
; CHECK-NEXT: j .LBB5_5
|
||||
; CHECK-NEXT: .LBB5_3: # %if.then
|
||||
; CHECK-NEXT: vfadd.vv v25, v8, v9
|
||||
; CHECK-NEXT: andi a1, a1, 2
|
||||
; CHECK-NEXT: bnez a1, .LBB5_2
|
||||
; CHECK-NEXT: .LBB5_4: # %if.else5
|
||||
; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI5_2)
|
||||
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_2)
|
||||
; CHECK-NEXT: vlse32.v v26, (a0), zero
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI5_3)
|
||||
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_3)
|
||||
; CHECK-NEXT: vlse32.v v27, (a0), zero
|
||||
; CHECK-NEXT: vfadd.vv v26, v26, v27
|
||||
; CHECK-NEXT: lui a0, %hi(scratch)
|
||||
; CHECK-NEXT: addi a0, a0, %lo(scratch)
|
||||
; CHECK-NEXT: vse32.v v26, (a0)
|
||||
; CHECK-NEXT: .LBB5_5: # %if.end10
|
||||
; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vfmul.vv v8, v25, v25
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
|
||||
%conv = zext i8 %cond to i32
|
||||
%and = and i32 %conv, 1
|
||||
%tobool = icmp eq i32 %and, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
|
||||
%and2 = and i32 %conv, 2
|
||||
%tobool3 = icmp eq i32 %and2, 0
|
||||
br i1 %tobool3, label %if.else5, label %if.then4
|
||||
|
||||
if.then4: ; preds = %if.end
|
||||
%3 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
|
||||
%4 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 1.000000e+00, i64 %3)
|
||||
%5 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 2.000000e+00, i64 %3)
|
||||
%6 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %4, <vscale x 1 x double> %5, i64 %3)
|
||||
%7 = bitcast i8* @scratch to <vscale x 1 x double>*
|
||||
tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %6, <vscale x 1 x double>* %7, i64 %3)
|
||||
br label %if.end10
|
||||
|
||||
if.else5: ; preds = %if.end
|
||||
%8 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0)
|
||||
%9 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 1.000000e+00, i64 %8)
|
||||
%10 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 2.000000e+00, i64 %8)
|
||||
%11 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> %9, <vscale x 2 x float> %10, i64 %8)
|
||||
%12 = bitcast i8* @scratch to <vscale x 2 x float>*
|
||||
tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %11, <vscale x 2 x float>* %12, i64 %8)
|
||||
br label %if.end10
|
||||
|
||||
if.end10: ; preds = %if.else5, %if.then4
|
||||
%13 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %c.0, <vscale x 1 x double> %c.0, i64 %0)
|
||||
ret <vscale x 1 x double> %13
|
||||
}
|
||||
|
||||
declare void @foo()
|
||||
|
||||
; Similar to test1, but contains a call to @foo to act as barrier to analyzing
|
||||
; VL/VTYPE.
|
||||
define <vscale x 1 x double> @test8(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
|
||||
; CHECK-LABEL: test8:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addi sp, sp, -32
|
||||
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: csrr a2, vlenb
|
||||
; CHECK-NEXT: slli a2, a2, 1
|
||||
; CHECK-NEXT: sub sp, sp, a2
|
||||
; CHECK-NEXT: vsetvli s0, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: beqz a1, .LBB6_2
|
||||
; CHECK-NEXT: # %bb.1: # %if.then
|
||||
; CHECK-NEXT: vfadd.vv v8, v8, v9
|
||||
; CHECK-NEXT: j .LBB6_3
|
||||
; CHECK-NEXT: .LBB6_2: # %if.else
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: add a0, a0, sp
|
||||
; CHECK-NEXT: addi a0, a0, 16
|
||||
; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
|
||||
; CHECK-NEXT: addi a0, sp, 16
|
||||
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
|
||||
; CHECK-NEXT: call foo@plt
|
||||
; CHECK-NEXT: vsetvli a0, s0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: add a0, a0, sp
|
||||
; CHECK-NEXT: addi a0, a0, 16
|
||||
; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload
|
||||
; CHECK-NEXT: addi a0, sp, 16
|
||||
; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload
|
||||
; CHECK-NEXT: vfsub.vv v8, v26, v25
|
||||
; CHECK-NEXT: .LBB6_3: # %if.then
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: slli a0, a0, 1
|
||||
; CHECK-NEXT: add sp, sp, a0
|
||||
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi sp, sp, 32
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
|
||||
%tobool = icmp eq i8 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
call void @foo()
|
||||
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
|
||||
ret <vscale x 1 x double> %c.0
|
||||
}
|
||||
|
||||
; Similar to test2, but contains a call to @foo to act as barrier to analyzing
|
||||
; VL/VTYPE.
|
||||
define <vscale x 1 x double> @test9(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
|
||||
; CHECK-LABEL: test9:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addi sp, sp, -32
|
||||
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
|
||||
; CHECK-NEXT: csrr a2, vlenb
|
||||
; CHECK-NEXT: slli a2, a2, 1
|
||||
; CHECK-NEXT: sub sp, sp, a2
|
||||
; CHECK-NEXT: vsetvli s0, a0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: beqz a1, .LBB7_2
|
||||
; CHECK-NEXT: # %bb.1: # %if.then
|
||||
; CHECK-NEXT: vfadd.vv v25, v8, v9
|
||||
; CHECK-NEXT: addi a0, sp, 16
|
||||
; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: add a0, a0, sp
|
||||
; CHECK-NEXT: addi a0, a0, 16
|
||||
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
|
||||
; CHECK-NEXT: call foo@plt
|
||||
; CHECK-NEXT: addi a0, sp, 16
|
||||
; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: add a0, a0, sp
|
||||
; CHECK-NEXT: addi a0, a0, 16
|
||||
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
|
||||
; CHECK-NEXT: j .LBB7_3
|
||||
; CHECK-NEXT: .LBB7_2: # %if.else
|
||||
; CHECK-NEXT: vfsub.vv v25, v8, v9
|
||||
; CHECK-NEXT: .LBB7_3: # %if.end
|
||||
; CHECK-NEXT: vsetvli a0, s0, e64,m1,ta,mu
|
||||
; CHECK-NEXT: vfmul.vv v8, v25, v8
|
||||
; CHECK-NEXT: csrr a0, vlenb
|
||||
; CHECK-NEXT: slli a0, a0, 1
|
||||
; CHECK-NEXT: add sp, sp, a0
|
||||
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi sp, sp, 32
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
|
||||
%tobool = icmp eq i8 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
call void @foo()
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
|
||||
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 %0)
|
||||
ret <vscale x 1 x double> %3
|
||||
}
|
||||
|
|
@ -0,0 +1,415 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \
|
||||
# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s
|
||||
|
||||
--- |
|
||||
; ModuleID = 'vsetvli-insert.ll'
|
||||
source_filename = "vsetvli-insert.ll"
|
||||
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
|
||||
target triple = "riscv64"
|
||||
|
||||
define <vscale x 1 x i64> @load_add_or_sub(i8 zeroext %cond, <vscale x 1 x i64>* %0, <vscale x 1 x i64> %1, i64 %2) #0 {
|
||||
entry:
|
||||
%a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* %0, i64 %2)
|
||||
%tobool = icmp eq i8 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %1, i64 %2)
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%c = call <vscale x 1 x i64> @llvm.riscv.vsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %1, i64 %2)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%d = phi <vscale x 1 x i64> [ %b, %if.then ], [ %c, %if.else ]
|
||||
ret <vscale x 1 x i64> %d
|
||||
}
|
||||
|
||||
define void @load_zext_or_sext(i8 zeroext %cond, <vscale x 1 x i32>* %0, <vscale x 1 x i64>* %1, i64 %2) #0 {
|
||||
entry:
|
||||
%a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* %0, i64 %2)
|
||||
%tobool = icmp eq i8 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%b = call <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32> %a, i64 %2)
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%c = call <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32> %a, i64 %2)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%d = phi <vscale x 1 x i64> [ %b, %if.then ], [ %c, %if.else ]
|
||||
call void @llvm.riscv.vse.nxv1i64.i64(<vscale x 1 x i64> %d, <vscale x 1 x i64>* %1, i64 %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i64 @llvm.riscv.vmv.x.s.nxv1i64(<vscale x 1 x i64>) #1
|
||||
|
||||
define i64 @vmv_x_s(i8 zeroext %cond, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2) #0 {
|
||||
entry:
|
||||
%tobool = icmp eq i8 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2)
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%b = call <vscale x 1 x i64> @llvm.riscv.vsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %1, <vscale x 1 x i64> %1, i64 %2)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%c = phi <vscale x 1 x i64> [ %a, %if.then ], [ %b, %if.else ]
|
||||
%d = call i64 @llvm.riscv.vmv.x.s.nxv1i64(<vscale x 1 x i64> %c)
|
||||
ret i64 %d
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #2
|
||||
|
||||
define <vscale x 1 x i64> @vsetvli_add_or_sub(i8 zeroext %cond, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %avl) #0 {
|
||||
entry:
|
||||
%vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 3, i64 0)
|
||||
%tobool = icmp eq i8 %cond, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %vl)
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%c = call <vscale x 1 x i64> @llvm.riscv.vsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %vl)
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
%d = phi <vscale x 1 x i64> [ %b, %if.then ], [ %c, %if.else ]
|
||||
ret <vscale x 1 x i64> %d
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <vscale x 1 x i64> @llvm.riscv.vsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* nocapture, i64) #3
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* nocapture, i64) #3
|
||||
|
||||
; Function Attrs: nounwind writeonly
|
||||
declare void @llvm.riscv.vse.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>* nocapture, i64) #4
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32>, i64) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32>, i64) #1
|
||||
|
||||
attributes #0 = { "target-features"="+experimental-v" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind }
|
||||
attributes #3 = { nounwind readonly }
|
||||
attributes #4 = { nounwind writeonly }
|
||||
|
||||
...
|
||||
---
|
||||
name: load_add_or_sub
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vr }
|
||||
- { id: 1, class: vr }
|
||||
- { id: 2, class: vr }
|
||||
- { id: 3, class: vr }
|
||||
- { id: 4, class: gpr }
|
||||
- { id: 5, class: gpr }
|
||||
- { id: 6, class: vr }
|
||||
- { id: 7, class: gpr }
|
||||
- { id: 8, class: gpr }
|
||||
liveins:
|
||||
- { reg: '$x10', virtual-reg: '%4' }
|
||||
- { reg: '$x11', virtual-reg: '%5' }
|
||||
- { reg: '$v8', virtual-reg: '%6' }
|
||||
- { reg: '$x12', virtual-reg: '%7' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
; CHECK-LABEL: name: load_add_or_sub
|
||||
; CHECK: bb.0.entry:
|
||||
; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
; CHECK: liveins: $x10, $x11, $v8, $x12
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x12
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v8
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x11
|
||||
; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10
|
||||
; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0
|
||||
; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2
|
||||
; CHECK: PseudoBR %bb.1
|
||||
; CHECK: bb.1.if.then:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: PseudoBR %bb.3
|
||||
; CHECK: bb.2.if.else:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: bb.3.if.end:
|
||||
; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2
|
||||
; CHECK: $v8 = COPY [[PHI]]
|
||||
; CHECK: PseudoRET implicit $v8
|
||||
bb.0.entry:
|
||||
successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
liveins: $x10, $x11, $v8, $x12
|
||||
|
||||
%7:gpr = COPY $x12
|
||||
%6:vr = COPY $v8
|
||||
%5:gpr = COPY $x11
|
||||
%4:gpr = COPY $x10
|
||||
%0:vr = PseudoVLE64_V_M1 %5, %7, 6
|
||||
%8:gpr = COPY $x0
|
||||
BEQ %4, %8, %bb.2
|
||||
PseudoBR %bb.1
|
||||
|
||||
bb.1.if.then:
|
||||
%1:vr = PseudoVADD_VV_M1 %0, %6, %7, 6
|
||||
PseudoBR %bb.3
|
||||
|
||||
bb.2.if.else:
|
||||
%2:vr = PseudoVSUB_VV_M1 %0, %6, %7, 6
|
||||
|
||||
bb.3.if.end:
|
||||
%3:vr = PHI %1, %bb.1, %2, %bb.2
|
||||
$v8 = COPY %3
|
||||
PseudoRET implicit $v8
|
||||
|
||||
...
|
||||
---
|
||||
name: load_zext_or_sext
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vr }
|
||||
- { id: 1, class: vr }
|
||||
- { id: 2, class: vr }
|
||||
- { id: 3, class: vr }
|
||||
- { id: 4, class: gpr }
|
||||
- { id: 5, class: gpr }
|
||||
- { id: 6, class: gpr }
|
||||
- { id: 7, class: gpr }
|
||||
- { id: 8, class: gpr }
|
||||
liveins:
|
||||
- { reg: '$x10', virtual-reg: '%4' }
|
||||
- { reg: '$x11', virtual-reg: '%5' }
|
||||
- { reg: '$x12', virtual-reg: '%6' }
|
||||
- { reg: '$x13', virtual-reg: '%7' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
; CHECK-LABEL: name: load_zext_or_sext
|
||||
; CHECK: bb.0.entry:
|
||||
; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
; CHECK: liveins: $x10, $x11, $x12, $x13
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x13
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x12
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x11
|
||||
; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10
|
||||
; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 87, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 [[COPY2]], $noreg, 5, implicit $vl, implicit $vtype
|
||||
; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0
|
||||
; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2
|
||||
; CHECK: PseudoBR %bb.1
|
||||
; CHECK: bb.1.if.then:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: dead %10:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: PseudoBR %bb.3
|
||||
; CHECK: bb.2.if.else:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: dead %11:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK: early-clobber %2:vr = PseudoVSEXT_VF2_M1 [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: bb.3.if.end:
|
||||
; CHECK: [[PHI:%[0-9]+]]:vr = PHI %1, %bb.1, %2, %bb.2
|
||||
; CHECK: PseudoVSE64_V_M1 [[PHI]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: PseudoRET
|
||||
bb.0.entry:
|
||||
successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
liveins: $x10, $x11, $x12, $x13
|
||||
|
||||
%7:gpr = COPY $x13
|
||||
%6:gpr = COPY $x12
|
||||
%5:gpr = COPY $x11
|
||||
%4:gpr = COPY $x10
|
||||
%0:vr = PseudoVLE32_V_MF2 %5, %7, 5
|
||||
%8:gpr = COPY $x0
|
||||
BEQ %4, %8, %bb.2
|
||||
PseudoBR %bb.1
|
||||
|
||||
bb.1.if.then:
|
||||
early-clobber %1:vr = PseudoVZEXT_VF2_M1 %0, %7, 6
|
||||
PseudoBR %bb.3
|
||||
|
||||
bb.2.if.else:
|
||||
early-clobber %2:vr = PseudoVSEXT_VF2_M1 %0, %7, 6
|
||||
|
||||
bb.3.if.end:
|
||||
%3:vr = PHI %1, %bb.1, %2, %bb.2
|
||||
PseudoVSE64_V_M1 %3, %6, %7, 6
|
||||
PseudoRET
|
||||
|
||||
...
|
||||
---
|
||||
name: vmv_x_s
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vr }
|
||||
- { id: 1, class: vr }
|
||||
- { id: 2, class: vr }
|
||||
- { id: 3, class: gpr }
|
||||
- { id: 4, class: vr }
|
||||
- { id: 5, class: vr }
|
||||
- { id: 6, class: gpr }
|
||||
- { id: 7, class: gpr }
|
||||
- { id: 8, class: gpr }
|
||||
liveins:
|
||||
- { reg: '$x10', virtual-reg: '%3' }
|
||||
- { reg: '$v8', virtual-reg: '%4' }
|
||||
- { reg: '$v9', virtual-reg: '%5' }
|
||||
- { reg: '$x11', virtual-reg: '%6' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
; CHECK-LABEL: name: vmv_x_s
|
||||
; CHECK: bb.0.entry:
|
||||
; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
; CHECK: liveins: $x10, $v8, $v9, $x11
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8
|
||||
; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10
|
||||
; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0
|
||||
; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2
|
||||
; CHECK: PseudoBR %bb.1
|
||||
; CHECK: bb.1.if.then:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: PseudoBR %bb.3
|
||||
; CHECK: bb.2.if.else:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: dead %10:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY1]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: bb.3.if.end:
|
||||
; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2
|
||||
; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 [[PHI]], 6, implicit $vtype
|
||||
; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]]
|
||||
; CHECK: PseudoRET implicit $x10
|
||||
bb.0.entry:
|
||||
successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
liveins: $x10, $v8, $v9, $x11
|
||||
|
||||
%6:gpr = COPY $x11
|
||||
%5:vr = COPY $v9
|
||||
%4:vr = COPY $v8
|
||||
%3:gpr = COPY $x10
|
||||
%7:gpr = COPY $x0
|
||||
BEQ %3, %7, %bb.2
|
||||
PseudoBR %bb.1
|
||||
|
||||
bb.1.if.then:
|
||||
%0:vr = PseudoVADD_VV_M1 %4, %5, %6, 6
|
||||
PseudoBR %bb.3
|
||||
|
||||
bb.2.if.else:
|
||||
%1:vr = PseudoVSUB_VV_M1 %5, %5, %6, 6
|
||||
|
||||
bb.3.if.end:
|
||||
%2:vr = PHI %0, %bb.1, %1, %bb.2
|
||||
%8:gpr = PseudoVMV_X_S_M1 %2, 6
|
||||
$x10 = COPY %8
|
||||
PseudoRET implicit $x10
|
||||
|
||||
...
|
||||
---
|
||||
name: vsetvli_add_or_sub
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gpr }
|
||||
- { id: 1, class: vr }
|
||||
- { id: 2, class: vr }
|
||||
- { id: 3, class: vr }
|
||||
- { id: 4, class: gpr }
|
||||
- { id: 5, class: vr }
|
||||
- { id: 6, class: vr }
|
||||
- { id: 7, class: gpr }
|
||||
- { id: 8, class: gpr }
|
||||
liveins:
|
||||
- { reg: '$x10', virtual-reg: '%4' }
|
||||
- { reg: '$v8', virtual-reg: '%5' }
|
||||
- { reg: '$v9', virtual-reg: '%6' }
|
||||
- { reg: '$x11', virtual-reg: '%7' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
; CHECK-LABEL: name: vsetvli_add_or_sub
|
||||
; CHECK: bb.0.entry:
|
||||
; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
; CHECK: liveins: $x10, $v8, $v9, $x11
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8
|
||||
; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10
|
||||
; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
|
||||
; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0
|
||||
; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2
|
||||
; CHECK: PseudoBR %bb.1
|
||||
; CHECK: bb.1.if.then:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: PseudoBR %bb.3
|
||||
; CHECK: bb.2.if.else:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
|
||||
; CHECK: bb.3.if.end:
|
||||
; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2
|
||||
; CHECK: $v8 = COPY [[PHI]]
|
||||
; CHECK: PseudoRET implicit $v8
|
||||
bb.0.entry:
|
||||
successors: %bb.2(0x30000000), %bb.1(0x50000000)
|
||||
liveins: $x10, $v8, $v9, $x11
|
||||
|
||||
%7:gpr = COPY $x11
|
||||
%6:vr = COPY $v9
|
||||
%5:vr = COPY $v8
|
||||
%4:gpr = COPY $x10
|
||||
%0:gpr = PseudoVSETVLI %7, 88, implicit-def dead $vl, implicit-def dead $vtype
|
||||
%8:gpr = COPY $x0
|
||||
BEQ %4, %8, %bb.2
|
||||
PseudoBR %bb.1
|
||||
|
||||
bb.1.if.then:
|
||||
%1:vr = PseudoVADD_VV_M1 %5, %6, %0, 6
|
||||
PseudoBR %bb.3
|
||||
|
||||
bb.2.if.else:
|
||||
%2:vr = PseudoVSUB_VV_M1 %5, %6, %0, 6
|
||||
|
||||
bb.3.if.end:
|
||||
%3:vr = PHI %1, %bb.1, %2, %bb.2
|
||||
$v8 = COPY %3
|
||||
PseudoRET implicit $v8
|
||||
|
||||
...
|
Loading…
Reference in New Issue