[RISCV] Enable cross basic block aware vsetvli insertion

This patch extends D102737 to allow VL/VTYPE changes to be taken
into account before adding an explicit vsetvli.

We do this by using a data flow analysis to propagate VL/VTYPE
information from predecessors until we've determined a value for
every value in the function.

We use this information to determine if a vsetvli needs to be
inserted before the first vector instruction the block.

Differential Revision: https://reviews.llvm.org/D102739
This commit is contained in:
Craig Topper 2021-05-25 16:28:34 -07:00
parent ea91a8cbab
commit 1b47a3de48
8 changed files with 2191 additions and 2307 deletions

View File

@ -9,13 +9,17 @@
// This file implements a function pass that inserts VSETVLI instructions where
// needed.
//
// The pass consists of a single pass over each basic block looking for changes
// in VL/VTYPE usage that requires a vsetvli to be inserted. We assume the
// VL/VTYPE values are unknown from predecessors so the first vector instruction
// will always require a new VSETVLI.
// This pass consists of 3 phases:
//
// TODO: Future enhancements to this pass will take into account VL/VTYPE from
// predecessors.
// Phase 1 collects how each basic block affects VL/VTYPE.
//
// Phase 2 uses the information from phase 1 to do a data flow analysis to
// propagate the VL/VTYPE changes through the function. This gives us the
// VL/VTYPE at the start of each basic block.
//
// Phase 3 inserts VSETVLI instructions in each basic block. Information from
// phase 2 is used to prevent inserting a VSETVLI before the first vector
// instruction in the block if possible.
//
//===----------------------------------------------------------------------===//
@ -23,6 +27,7 @@
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include <queue>
using namespace llvm;
#define DEBUG_TYPE "riscv-insert-vsetvli"
@ -52,6 +57,12 @@ class VSETVLIInfo {
public:
VSETVLIInfo() : AVLImm(0) {}
static VSETVLIInfo getUnknown() {
VSETVLIInfo Info;
Info.setUnknown();
return Info;
}
bool isValid() const { return State != Uninitialized; }
void setUnknown() { State = Unknown; }
bool isUnknown() const { return State == Unknown; }
@ -148,12 +159,89 @@ public:
return getAVLReg() == Other.getAVLReg();
}
bool operator==(const VSETVLIInfo &Other) const {
// Uninitialized is only equal to another Uninitialized.
if (!isValid())
return !Other.isValid();
if (!Other.isValid())
return !isValid();
// Unknown is only equal to another Unknown.
if (isUnknown())
return Other.isUnknown();
if (Other.isUnknown())
return isUnknown();
// Otherwise compare the VTYPE and AVL.
return hasSameVTYPE(Other) && hasSameAVL(Other);
}
bool operator!=(const VSETVLIInfo &Other) const { return !(*this == Other); }
// Calculate the VSETVLIInfo visible to a block assuming this and Other are
// both predecessors.
VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
// If the new value isn't valid, ignore it.
if (!Other.isValid())
return *this;
// If this value isn't valid, this must be the first predecessor, use it.
if (!isValid())
return Other;
if (*this == Other)
return *this;
// If the configurations don't match, assume unknown.
return VSETVLIInfo::getUnknown();
}
// Calculate the VSETVLIInfo visible at the end of the block assuming this
// is the predecessor value, and Other is change for this block.
VSETVLIInfo merge(const VSETVLIInfo &Other) const {
assert(isValid() && "Can only merge with a valid VSETVLInfo");
// Nothing changed from the predecessor, keep it.
if (!Other.isValid())
return *this;
// If the change is compatible with the input, we won't create a VSETVLI
// and should keep the predecessor.
if (isCompatible(Other))
return *this;
// Otherwise just use whatever is in this block.
return Other;
}
};
struct BlockData {
// The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
// made by this block. Calculated in Phase 1.
VSETVLIInfo Change;
// The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
// block. Calculated in Phase 2.
VSETVLIInfo Exit;
// The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
// blocks. Calculated in Phase 2, and used by Phase 3.
VSETVLIInfo Pred;
// Keeps track of whether the block is already in the queue.
bool InQueue = false;
BlockData() {}
};
class RISCVInsertVSETVLI : public MachineFunctionPass {
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
std::vector<BlockData> BlockInfo;
std::queue<const MachineBasicBlock *> WorkList;
public:
static char ID;
@ -170,10 +258,13 @@ public:
StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
private:
bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo);
void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
const VSETVLIInfo &Info);
bool emitVSETVLIs(MachineBasicBlock &MBB);
bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
};
} // end anonymous namespace
@ -276,7 +367,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
// VSETIVLI instruction.
VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
VSETVLIInfo NewInfo;
if (MI.getOpcode() == RISCV::PseudoVSETVLI) {
Register AVLReg = MI.getOperand(1).getReg();
@ -292,12 +383,111 @@ VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
return NewInfo;
}
bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
bool MadeChange = false;
bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
const VSETVLIInfo &CurInfo) {
if (CurInfo.isCompatible(Require))
return false;
// Assume predecessor state is unknown.
// We didn't find a compatible value. If our AVL is a virtual register,
// it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
// and the last VL/VTYPE we observed is the same, we don't need a
// VSETVLI here.
if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
Require.getAVLReg().isVirtual() && Require.hasSameVTYPE(CurInfo)) {
if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo))
return false;
}
}
}
return true;
}
bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
bool HadVectorOp = false;
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
for (const MachineInstr &MI : MBB) {
// If this is an explicit VSETVLI or VSETIVLI, update our state.
if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
MI.getOpcode() == RISCV::PseudoVSETIVLI) {
HadVectorOp = true;
BBInfo.Change = getInfoForVSETVLI(MI);
continue;
}
uint64_t TSFlags = MI.getDesc().TSFlags;
if (RISCVII::hasSEWOp(TSFlags)) {
HadVectorOp = true;
VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
if (!BBInfo.Change.isValid()) {
BBInfo.Change = NewInfo;
} else {
// If this instruction isn't compatible with the previous VL/VTYPE
// we need to insert a VSETVLI.
if (needVSETVLI(NewInfo, BBInfo.Change))
BBInfo.Change = NewInfo;
}
}
// If this is something that updates VL/VTYPE that we don't know about, set
// the state to unknown.
if (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
MI.modifiesRegister(RISCV::VTYPE)) {
BBInfo.Change = VSETVLIInfo::getUnknown();
}
}
// Initial exit state is whatever change we found in the block.
BBInfo.Exit = BBInfo.Change;
return HadVectorOp;
}
void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
BBInfo.InQueue = false;
VSETVLIInfo InInfo;
if (MBB.pred_empty()) {
// There are no predecessors, so use the default starting status.
InInfo.setUnknown();
} else {
for (MachineBasicBlock *P : MBB.predecessors())
InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
}
// If we don't have any valid predecessor value, wait until we do.
if (!InInfo.isValid())
return;
BBInfo.Pred = InInfo;
VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change);
// If the new exit value matches the old exit value, we don't need to revisit
// any blocks.
if (BBInfo.Exit == TmpStatus)
return;
BBInfo.Exit = TmpStatus;
// Add the successors to the work list so we can propagate the changed exit
// status.
for (MachineBasicBlock *S : MBB.successors())
if (!BlockInfo[S->getNumber()].InQueue)
WorkList.push(S);
}
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
VSETVLIInfo CurInfo;
CurInfo.setUnknown();
for (MachineInstr &MI : MBB) {
// If this is an explicit VSETVLI or VSETIVLI, update our state.
@ -309,7 +499,6 @@ bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
"Unexpected operands where VL and VTYPE should be");
MI.getOperand(3).setIsDead(false);
MI.getOperand(4).setIsDead(false);
MadeChange = true;
CurInfo = getInfoForVSETVLI(MI);
continue;
}
@ -330,47 +519,32 @@ bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
/*isImp*/ true));
bool NeedVSETVLI = true;
if (CurInfo.isValid() && CurInfo.isCompatible(NewInfo))
NeedVSETVLI = false;
// We didn't find a compatible value. If our AVL is a virtual register,
// it might be defined by a VSET(I)VLI. If it has the same VTYPE we need
// and the last VL/VTYPE we observed is the same, we don't need a
// VSETVLI here.
if (NeedVSETVLI && !CurInfo.isUnknown() && NewInfo.hasAVLReg() &&
NewInfo.getAVLReg().isVirtual() && NewInfo.hasSameVTYPE(CurInfo)) {
if (MachineInstr *DefMI = MRI->getVRegDef(NewInfo.getAVLReg())) {
if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo))
NeedVSETVLI = false;
}
if (!CurInfo.isValid()) {
// We haven't found any vector instructions or VL/VTYPE changes yet,
// use the predecessor information.
assert(BlockInfo[MBB.getNumber()].Pred.isValid() &&
"Expected a valid predecessor state.");
if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred)) {
insertVSETVLI(MBB, MI, NewInfo);
CurInfo = NewInfo;
}
} else {
// If this instruction isn't compatible with the previous VL/VTYPE
// we need to insert a VSETVLI.
if (needVSETVLI(NewInfo, CurInfo)) {
insertVSETVLI(MBB, MI, NewInfo);
CurInfo = NewInfo;
}
}
// If this instruction isn't compatible with the previous VL/VTYPE
// we need to insert a VSETVLI.
if (NeedVSETVLI) {
insertVSETVLI(MBB, MI, NewInfo);
CurInfo = NewInfo;
}
// If we find an instruction we at least changed the operands.
MadeChange = true;
}
// If this is something updates VL/VTYPE that we don't know about, set
// the state to unknown.
if (MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
MI.modifiesRegister(RISCV::VTYPE)) {
VSETVLIInfo NewInfo;
NewInfo.setUnknown();
CurInfo = NewInfo;
CurInfo = VSETVLIInfo::getUnknown();
}
}
return MadeChange;
}
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
@ -382,12 +556,41 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
bool Changed = false;
assert(BlockInfo.empty() && "Expect empty block infos");
BlockInfo.resize(MF.getNumBlockIDs());
for (MachineBasicBlock &MBB : MF)
Changed |= emitVSETVLIs(MBB);
bool HaveVectorOp = false;
return Changed;
// Phase 1 - determine how VL/VTYPE are affected by the each block.
for (const MachineBasicBlock &MBB : MF)
HaveVectorOp |= computeVLVTYPEChanges(MBB);
// If we didn't find any instructions that need VSETVLI, we're done.
if (HaveVectorOp) {
// Phase 2 - determine the exit VL/VTYPE from each block. We add all
// blocks to the list here, but will also add any that need to be revisited
// during Phase 2 processing.
for (const MachineBasicBlock &MBB : MF) {
WorkList.push(&MBB);
BlockInfo[MBB.getNumber()].InQueue = true;
}
while (!WorkList.empty()) {
const MachineBasicBlock &MBB = *WorkList.front();
WorkList.pop();
computeIncomingVLVTYPE(MBB);
}
// Phase 3 - add any vsetvli instructions needed in the block. Use the
// Phase 2 information to avoid adding vsetvlis before the first vector
// instruction in the block if the VL/VTYPE is satisfied by its
// predecessors.
for (MachineBasicBlock &MBB : MF)
emitVSETVLIs(MBB);
}
BlockInfo.clear();
return HaveVectorOp;
}
/// Returns an instance of the Insert VSETVLI pass.

View File

@ -3667,11 +3667,10 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
; LMULMAX2-RV32-NEXT: lui a2, 4112
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
; LMULMAX2-RV32-NEXT: lui a1, 4112
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2
; LMULMAX2-RV32-NEXT: # %bb.1:
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
@ -3723,14 +3722,12 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
; LMULMAX2-RV32-NEXT: .LBB3_3:
; LMULMAX2-RV32-NEXT: sw a5, 0(sp)
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: sw a5, 0(sp)
; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_5
; LMULMAX2-RV32-NEXT: # %bb.4:
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
@ -3758,8 +3755,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a1, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB3_6
; LMULMAX2-RV32-NEXT: .LBB3_5:
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -3902,11 +3899,10 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a3, a1, 819
; LMULMAX1-RV32-NEXT: lui a1, 61681
; LMULMAX1-RV32-NEXT: addi a7, a1, -241
; LMULMAX1-RV32-NEXT: lui a2, 4112
; LMULMAX1-RV32-NEXT: addi a2, a2, 257
; LMULMAX1-RV32-NEXT: lui a1, 4112
; LMULMAX1-RV32-NEXT: addi a2, a1, 257
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2
; LMULMAX1-RV32-NEXT: # %bb.1:
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a5
@ -3958,14 +3954,12 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: mul a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a5, a1, 24
; LMULMAX1-RV32-NEXT: .LBB3_3:
; LMULMAX1-RV32-NEXT: sw a5, 0(sp)
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: sw a5, 0(sp)
; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_5
; LMULMAX1-RV32-NEXT: # %bb.4:
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a5
@ -3993,8 +3987,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB3_6
; LMULMAX1-RV32-NEXT: .LBB3_5:
; LMULMAX1-RV32-NEXT: srli a1, a5, 1
; LMULMAX1-RV32-NEXT: or a1, a5, a1
; LMULMAX1-RV32-NEXT: srli a5, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a5, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a5
; LMULMAX1-RV32-NEXT: srli a5, a1, 4
@ -11124,11 +11118,10 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
; LMULMAX2-RV32-NEXT: lui a2, 4112
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
; LMULMAX2-RV32-NEXT: lui a1, 4112
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2
; LMULMAX2-RV32-NEXT: # %bb.1:
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
@ -11180,14 +11173,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
; LMULMAX2-RV32-NEXT: .LBB7_3:
; LMULMAX2-RV32-NEXT: sw a5, 0(sp)
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 3
; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30
; LMULMAX2-RV32-NEXT: sw a5, 0(sp)
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_5
; LMULMAX2-RV32-NEXT: # %bb.4:
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
@ -11215,8 +11206,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB7_6
; LMULMAX2-RV32-NEXT: .LBB7_5:
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -11239,14 +11230,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
; LMULMAX2-RV32-NEXT: .LBB7_6:
; LMULMAX2-RV32-NEXT: sw a5, 24(sp)
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2
; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30
; LMULMAX2-RV32-NEXT: sw a5, 24(sp)
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_8
; LMULMAX2-RV32-NEXT: # %bb.7:
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
@ -11274,8 +11263,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB7_9
; LMULMAX2-RV32-NEXT: .LBB7_8:
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -11298,14 +11287,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
; LMULMAX2-RV32-NEXT: .LBB7_9:
; LMULMAX2-RV32-NEXT: sw a5, 16(sp)
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: sw a5, 16(sp)
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_11
; LMULMAX2-RV32-NEXT: # %bb.10:
; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
@ -11333,8 +11320,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a1, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB7_12
; LMULMAX2-RV32-NEXT: .LBB7_11:
; LMULMAX2-RV32-NEXT: srli a1, a5, 1
; LMULMAX2-RV32-NEXT: or a1, a5, a1
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 2
; LMULMAX2-RV32-NEXT: or a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 4
@ -11552,11 +11539,10 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a4, a2, 819
; LMULMAX1-RV32-NEXT: lui a2, 61681
; LMULMAX1-RV32-NEXT: addi t0, a2, -241
; LMULMAX1-RV32-NEXT: lui a3, 4112
; LMULMAX1-RV32-NEXT: addi a3, a3, 257
; LMULMAX1-RV32-NEXT: lui a2, 4112
; LMULMAX1-RV32-NEXT: addi a3, a2, 257
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2
; LMULMAX1-RV32-NEXT: # %bb.1:
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
@ -11608,14 +11594,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: mul a1, a1, a3
; LMULMAX1-RV32-NEXT: srli a1, a1, 24
; LMULMAX1-RV32-NEXT: .LBB7_3:
; LMULMAX1-RV32-NEXT: sw a1, 16(sp)
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v27
; LMULMAX1-RV32-NEXT: sw a1, 16(sp)
; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_5
; LMULMAX1-RV32-NEXT: # %bb.4:
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
@ -11643,8 +11627,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB7_6
; LMULMAX1-RV32-NEXT: .LBB7_5:
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
; LMULMAX1-RV32-NEXT: or a1, a2, a1
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 4
@ -11669,13 +11653,11 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: .LBB7_6:
; LMULMAX1-RV32-NEXT: sw a1, 24(sp)
; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8
; LMULMAX1-RV32-NEXT: # %bb.7:
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
@ -11727,14 +11709,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: mul a1, a1, a3
; LMULMAX1-RV32-NEXT: srli a1, a1, 24
; LMULMAX1-RV32-NEXT: .LBB7_9:
; LMULMAX1-RV32-NEXT: sw a1, 0(sp)
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26
; LMULMAX1-RV32-NEXT: sw a1, 0(sp)
; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_11
; LMULMAX1-RV32-NEXT: # %bb.10:
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
@ -11762,8 +11742,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB7_12
; LMULMAX1-RV32-NEXT: .LBB7_11:
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a1, a2, 1
; LMULMAX1-RV32-NEXT: or a1, a2, a1
; LMULMAX1-RV32-NEXT: srli a2, a1, 2
; LMULMAX1-RV32-NEXT: or a1, a1, a2
; LMULMAX1-RV32-NEXT: srli a2, a1, 4

View File

@ -2592,7 +2592,6 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5
; LMULMAX2-RV32-NEXT: # %bb.4:
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v25, v25, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
@ -2774,7 +2773,6 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25
; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5
; LMULMAX1-RV32-NEXT: # %bb.4:
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: addi a5, a1, -1
@ -7655,9 +7653,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a3, a1, 819
; LMULMAX2-RV32-NEXT: lui a1, 61681
; LMULMAX2-RV32-NEXT: addi a7, a1, -241
; LMULMAX2-RV32-NEXT: lui a2, 4112
; LMULMAX2-RV32-NEXT: lui a1, 4112
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: addi a2, a2, 257
; LMULMAX2-RV32-NEXT: addi a2, a1, 257
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2
; LMULMAX2-RV32-NEXT: # %bb.1:
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
@ -7703,7 +7701,6 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5
; LMULMAX2-RV32-NEXT: # %bb.4:
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
@ -7740,13 +7737,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
; LMULMAX2-RV32-NEXT: .LBB7_6:
; LMULMAX2-RV32-NEXT: sw a5, 24(sp)
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: sw a5, 24(sp)
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_8
; LMULMAX2-RV32-NEXT: # %bb.7:
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
@ -7767,9 +7762,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a5, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB7_9
; LMULMAX2-RV32-NEXT: .LBB7_8:
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
; LMULMAX2-RV32-NEXT: not a5, a5
; LMULMAX2-RV32-NEXT: and a1, a5, a1
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: and a5, a5, a4
; LMULMAX2-RV32-NEXT: sub a1, a1, a5
@ -7783,13 +7778,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: mul a1, a1, a2
; LMULMAX2-RV32-NEXT: srli a5, a1, 24
; LMULMAX2-RV32-NEXT: .LBB7_9:
; LMULMAX2-RV32-NEXT: sw a5, 16(sp)
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1
; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26
; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: sw a5, 16(sp)
; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_11
; LMULMAX2-RV32-NEXT: # %bb.10:
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu
; LMULMAX2-RV32-NEXT: vsrl.vx v26, v26, a6
; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
@ -7810,9 +7803,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-RV32-NEXT: addi a1, a1, 32
; LMULMAX2-RV32-NEXT: j .LBB7_12
; LMULMAX2-RV32-NEXT: .LBB7_11:
; LMULMAX2-RV32-NEXT: addi a1, a5, -1
; LMULMAX2-RV32-NEXT: not a5, a5
; LMULMAX2-RV32-NEXT: and a1, a5, a1
; LMULMAX2-RV32-NEXT: addi a5, a1, -1
; LMULMAX2-RV32-NEXT: not a1, a1
; LMULMAX2-RV32-NEXT: and a1, a1, a5
; LMULMAX2-RV32-NEXT: srli a5, a1, 1
; LMULMAX2-RV32-NEXT: and a4, a5, a4
; LMULMAX2-RV32-NEXT: sub a1, a1, a4
@ -7978,9 +7971,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a4, a1, 819
; LMULMAX1-RV32-NEXT: lui a1, 61681
; LMULMAX1-RV32-NEXT: addi t0, a1, -241
; LMULMAX1-RV32-NEXT: lui a3, 4112
; LMULMAX1-RV32-NEXT: lui a2, 4112
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: addi a3, a3, 257
; LMULMAX1-RV32-NEXT: addi a3, a2, 257
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2
; LMULMAX1-RV32-NEXT: # %bb.1:
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
@ -8026,7 +8019,6 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5
; LMULMAX1-RV32-NEXT: # %bb.4:
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v26, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
@ -8065,12 +8057,10 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: .LBB7_6:
; LMULMAX1-RV32-NEXT: sw a1, 24(sp)
; LMULMAX1-RV32-NEXT: sw zero, 12(sp)
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: sw zero, 4(sp)
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8
; LMULMAX1-RV32-NEXT: # %bb.7:
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
@ -8107,13 +8097,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: mul a1, a1, a3
; LMULMAX1-RV32-NEXT: srli a1, a1, 24
; LMULMAX1-RV32-NEXT: .LBB7_9:
; LMULMAX1-RV32-NEXT: sw a1, 0(sp)
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11
; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25
; LMULMAX1-RV32-NEXT: sw a1, 0(sp)
; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_11
; LMULMAX1-RV32-NEXT: # %bb.10:
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu
; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6
; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
@ -8134,9 +8122,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-RV32-NEXT: addi a1, a1, 32
; LMULMAX1-RV32-NEXT: j .LBB7_12
; LMULMAX1-RV32-NEXT: .LBB7_11:
; LMULMAX1-RV32-NEXT: addi a2, a1, -1
; LMULMAX1-RV32-NEXT: not a1, a1
; LMULMAX1-RV32-NEXT: and a1, a1, a2
; LMULMAX1-RV32-NEXT: addi a1, a2, -1
; LMULMAX1-RV32-NEXT: not a2, a2
; LMULMAX1-RV32-NEXT: and a1, a2, a1
; LMULMAX1-RV32-NEXT: srli a2, a1, 1
; LMULMAX1-RV32-NEXT: and a2, a2, a5
; LMULMAX1-RV32-NEXT: sub a1, a1, a2

View File

@ -3959,7 +3959,6 @@ define void @extract_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX2-NEXT: vsetivli a2, 4, e64,m2,ta,mu
; LMULMAX2-NEXT: vle64.v v26, (a0)
; LMULMAX2-NEXT: vle64.v v28, (a1)
; LMULMAX2-NEXT: vsetivli a1, 4, e64,m2,ta,mu
; LMULMAX2-NEXT: vadd.vv v26, v26, v28
; LMULMAX2-NEXT: vse64.v v26, (a0)
; LMULMAX2-NEXT: ret
@ -3973,7 +3972,6 @@ define void @extract_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
; LMULMAX1-NEXT: vle64.v v27, (a1)
; LMULMAX1-NEXT: addi a1, a1, 16
; LMULMAX1-NEXT: vle64.v v28, (a1)
; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; LMULMAX1-NEXT: vadd.vv v26, v26, v28
; LMULMAX1-NEXT: vadd.vv v25, v25, v27
; LMULMAX1-NEXT: vse64.v v25, (a0)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,447 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+experimental-v \
; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s
; The following tests check whether inserting VSETVLI avoids inserting
; unneeded vsetvlis across basic blocks.
declare i64 @llvm.riscv.vsetvli(i64, i64, i64)
declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, i64)
declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64)
declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, i64)
declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, i64)
declare <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(double, i64)
declare <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(float, i64)
declare void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>* nocapture, i64)
declare void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>* nocapture, i64)
define <vscale x 1 x double> @test1(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: beqz a1, .LBB0_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_2: # %if.else
; CHECK-NEXT: vfsub.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.else: ; preds = %entry
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
ret <vscale x 1 x double> %c.0
}
@scratch = global i8 0, align 16
define <vscale x 1 x double> @test2(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: beqz a1, .LBB1_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v25, v8, v9
; CHECK-NEXT: vfmul.vv v8, v25, v8
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_2: # %if.else
; CHECK-NEXT: vfsub.vv v25, v8, v9
; CHECK-NEXT: vfmul.vv v8, v25, v8
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.else: ; preds = %entry
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 %0)
ret <vscale x 1 x double> %3
}
; FIXME: The last vsetvli is redundant, but we need to look through a phi to
; prove it.
define <vscale x 1 x double> @test3(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: beqz a1, .LBB2_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: vfadd.vv v25, v8, v9
; CHECK-NEXT: j .LBB2_3
; CHECK-NEXT: .LBB2_2: # %if.else
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: vfsub.vv v25, v8, v9
; CHECK-NEXT: .LBB2_3: # %if.end
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: vfmul.vv v8, v25, v8
; CHECK-NEXT: ret
entry:
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.else: ; preds = %entry
%2 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %2)
br label %if.end
if.end: ; preds = %if.else, %if.then
%vl.0 = phi i64 [ %0, %if.then], [ %2, %if.else ]
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %3, %if.else ]
%4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 %vl.0)
ret <vscale x 1 x double> %4
}
define <vscale x 1 x double> @test4(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %l, <vscale x 1 x double> %r) nounwind {
; CHECK-LABEL: test4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: beqz a1, .LBB3_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_0)
; CHECK-NEXT: vsetvli a2, a0, e64,m1,ta,mu
; CHECK-NEXT: vlse64.v v25, (a1), zero
; CHECK-NEXT: lui a1, %hi(.LCPI3_1)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_1)
; CHECK-NEXT: vlse64.v v26, (a1), zero
; CHECK-NEXT: vfadd.vv v25, v25, v26
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse64.v v25, (a1)
; CHECK-NEXT: j .LBB3_3
; CHECK-NEXT: .LBB3_2: # %if.else
; CHECK-NEXT: lui a1, %hi(.LCPI3_2)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_2)
; CHECK-NEXT: vsetvli a2, a0, e32,m1,ta,mu
; CHECK-NEXT: vlse32.v v25, (a1), zero
; CHECK-NEXT: lui a1, %hi(.LCPI3_3)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_3)
; CHECK-NEXT: vlse32.v v26, (a1), zero
; CHECK-NEXT: vfadd.vv v25, v25, v26
; CHECK-NEXT: lui a1, %hi(scratch)
; CHECK-NEXT: addi a1, a1, %lo(scratch)
; CHECK-NEXT: vse32.v v25, (a1)
; CHECK-NEXT: .LBB3_3: # %if.end
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: vfmul.vv v8, v8, v9
; CHECK-NEXT: ret
entry:
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%0 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 1.000000e+00, i64 %avl)
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 2.000000e+00, i64 %avl)
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 %avl)
%3 = bitcast i8* @scratch to <vscale x 1 x double>*
tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %2, <vscale x 1 x double>* %3, i64 %avl)
br label %if.end
if.else: ; preds = %entry
%4 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 1.000000e+00, i64 %avl)
%5 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 2.000000e+00, i64 %avl)
%6 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> %4, <vscale x 2 x float> %5, i64 %avl)
%7 = bitcast i8* @scratch to <vscale x 2 x float>*
tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %6, <vscale x 2 x float>* %7, i64 %avl)
br label %if.end
if.end: ; preds = %if.else, %if.then
%8 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %l, <vscale x 1 x double> %r, i64 %avl)
ret <vscale x 1 x double> %8
}
define <vscale x 1 x double> @test5(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a2, a1, 1
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: bnez a2, .LBB4_3
; CHECK-NEXT: # %bb.1: # %if.else
; CHECK-NEXT: vfsub.vv v25, v8, v9
; CHECK-NEXT: andi a0, a1, 2
; CHECK-NEXT: beqz a0, .LBB4_4
; CHECK-NEXT: .LBB4_2: # %if.then4
; CHECK-NEXT: vfmul.vv v8, v25, v8
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB4_3: # %if.then
; CHECK-NEXT: vfadd.vv v25, v8, v9
; CHECK-NEXT: andi a0, a1, 2
; CHECK-NEXT: bnez a0, .LBB4_2
; CHECK-NEXT: .LBB4_4: # %if.else5
; CHECK-NEXT: vfmul.vv v8, v8, v25
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%conv = zext i8 %cond to i32
%and = and i32 %conv, 1
%tobool = icmp eq i32 %and, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.else: ; preds = %entry
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
%and2 = and i32 %conv, 2
%tobool3 = icmp eq i32 %and2, 0
br i1 %tobool3, label %if.else5, label %if.then4
if.then4: ; preds = %if.end
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 %0)
br label %if.end6
if.else5: ; preds = %if.end
%4 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %c.0, i64 %0)
br label %if.end6
if.end6: ; preds = %if.else5, %if.then4
%c.1 = phi <vscale x 1 x double> [ %3, %if.then4 ], [ %4, %if.else5 ]
ret <vscale x 1 x double> %c.1
}
; FIXME: The explicit vsetvli in if.then4 could be removed as it is redundant
; with the one in the entry, but we lack the ability to remove explicit
; vsetvli instructions.
define <vscale x 1 x double> @test6(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a3, a1, 1
; CHECK-NEXT: vsetvli a2, a0, e64,m1,ta,mu
; CHECK-NEXT: bnez a3, .LBB5_3
; CHECK-NEXT: # %bb.1: # %if.else
; CHECK-NEXT: vfsub.vv v25, v8, v9
; CHECK-NEXT: andi a1, a1, 2
; CHECK-NEXT: beqz a1, .LBB5_4
; CHECK-NEXT: .LBB5_2: # %if.then4
; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: lui a0, %hi(.LCPI5_1)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_1)
; CHECK-NEXT: vlse64.v v27, (a0), zero
; CHECK-NEXT: vfadd.vv v26, v26, v27
; CHECK-NEXT: lui a0, %hi(scratch)
; CHECK-NEXT: addi a0, a0, %lo(scratch)
; CHECK-NEXT: vse64.v v26, (a0)
; CHECK-NEXT: j .LBB5_5
; CHECK-NEXT: .LBB5_3: # %if.then
; CHECK-NEXT: vfadd.vv v25, v8, v9
; CHECK-NEXT: andi a1, a1, 2
; CHECK-NEXT: bnez a1, .LBB5_2
; CHECK-NEXT: .LBB5_4: # %if.else5
; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
; CHECK-NEXT: lui a0, %hi(.LCPI5_2)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_2)
; CHECK-NEXT: vlse32.v v26, (a0), zero
; CHECK-NEXT: lui a0, %hi(.LCPI5_3)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_3)
; CHECK-NEXT: vlse32.v v27, (a0), zero
; CHECK-NEXT: vfadd.vv v26, v26, v27
; CHECK-NEXT: lui a0, %hi(scratch)
; CHECK-NEXT: addi a0, a0, %lo(scratch)
; CHECK-NEXT: vse32.v v26, (a0)
; CHECK-NEXT: .LBB5_5: # %if.end10
; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu
; CHECK-NEXT: vfmul.vv v8, v25, v25
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%conv = zext i8 %cond to i32
%and = and i32 %conv, 1
%tobool = icmp eq i32 %and, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.else: ; preds = %entry
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
%and2 = and i32 %conv, 2
%tobool3 = icmp eq i32 %and2, 0
br i1 %tobool3, label %if.else5, label %if.then4
if.then4: ; preds = %if.end
%3 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%4 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 1.000000e+00, i64 %3)
%5 = tail call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 2.000000e+00, i64 %3)
%6 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %4, <vscale x 1 x double> %5, i64 %3)
%7 = bitcast i8* @scratch to <vscale x 1 x double>*
tail call void @llvm.riscv.vse.nxv1f64(<vscale x 1 x double> %6, <vscale x 1 x double>* %7, i64 %3)
br label %if.end10
if.else5: ; preds = %if.end
%8 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0)
%9 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 1.000000e+00, i64 %8)
%10 = tail call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 2.000000e+00, i64 %8)
%11 = tail call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(<vscale x 2 x float> %9, <vscale x 2 x float> %10, i64 %8)
%12 = bitcast i8* @scratch to <vscale x 2 x float>*
tail call void @llvm.riscv.vse.nxv2f32(<vscale x 2 x float> %11, <vscale x 2 x float>* %12, i64 %8)
br label %if.end10
if.end10: ; preds = %if.else5, %if.then4
%13 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %c.0, <vscale x 1 x double> %c.0, i64 %0)
ret <vscale x 1 x double> %13
}
declare void @foo()
; Similar to test1, but contains a call to @foo to act as barrier to analyzing
; VL/VTYPE.
define <vscale x 1 x double> @test8(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 1
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: vsetvli s0, a0, e64,m1,ta,mu
; CHECK-NEXT: beqz a1, .LBB6_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: j .LBB6_3
; CHECK-NEXT: .LBB6_2: # %if.else
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, a0, sp
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: call foo@plt
; CHECK-NEXT: vsetvli a0, s0, e64,m1,ta,mu
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, a0, sp
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfsub.vv v8, v26, v25
; CHECK-NEXT: .LBB6_3: # %if.then
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.else: ; preds = %entry
call void @foo()
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
ret <vscale x 1 x double> %c.0
}
; Similar to test2, but contains a call to @foo to act as barrier to analyzing
; VL/VTYPE.
define <vscale x 1 x double> @test9(i64 %avl, i8 zeroext %cond, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test9:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 1
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: vsetvli s0, a0, e64,m1,ta,mu
; CHECK-NEXT: beqz a1, .LBB7_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vfadd.vv v25, v8, v9
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, a0, sp
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: call foo@plt
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, a0, sp
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: j .LBB7_3
; CHECK-NEXT: .LBB7_2: # %if.else
; CHECK-NEXT: vfsub.vv v25, v8, v9
; CHECK-NEXT: .LBB7_3: # %if.end
; CHECK-NEXT: vsetvli a0, s0, e64,m1,ta,mu
; CHECK-NEXT: vfmul.vv v8, v25, v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
entry:
%0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0)
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%1 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
call void @foo()
br label %if.end
if.else: ; preds = %entry
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %0)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0 = phi <vscale x 1 x double> [ %1, %if.then ], [ %2, %if.else ]
%3 = tail call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(<vscale x 1 x double> %c.0, <vscale x 1 x double> %a, i64 %0)
ret <vscale x 1 x double> %3
}

View File

@ -0,0 +1,415 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \
# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s
--- |
; ModuleID = 'vsetvli-insert.ll'
source_filename = "vsetvli-insert.ll"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"
define <vscale x 1 x i64> @load_add_or_sub(i8 zeroext %cond, <vscale x 1 x i64>* %0, <vscale x 1 x i64> %1, i64 %2) #0 {
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* %0, i64 %2)
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %1, i64 %2)
br label %if.end
if.else: ; preds = %entry
%c = call <vscale x 1 x i64> @llvm.riscv.vsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %1, i64 %2)
br label %if.end
if.end: ; preds = %if.else, %if.then
%d = phi <vscale x 1 x i64> [ %b, %if.then ], [ %c, %if.else ]
ret <vscale x 1 x i64> %d
}
define void @load_zext_or_sext(i8 zeroext %cond, <vscale x 1 x i32>* %0, <vscale x 1 x i64>* %1, i64 %2) #0 {
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* %0, i64 %2)
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%b = call <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32> %a, i64 %2)
br label %if.end
if.else: ; preds = %entry
%c = call <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32> %a, i64 %2)
br label %if.end
if.end: ; preds = %if.else, %if.then
%d = phi <vscale x 1 x i64> [ %b, %if.then ], [ %c, %if.else ]
call void @llvm.riscv.vse.nxv1i64.i64(<vscale x 1 x i64> %d, <vscale x 1 x i64>* %1, i64 %2)
ret void
}
; Function Attrs: nounwind readnone
declare i64 @llvm.riscv.vmv.x.s.nxv1i64(<vscale x 1 x i64>) #1
define i64 @vmv_x_s(i8 zeroext %cond, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2) #0 {
entry:
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2)
br label %if.end
if.else: ; preds = %entry
%b = call <vscale x 1 x i64> @llvm.riscv.vsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %1, <vscale x 1 x i64> %1, i64 %2)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c = phi <vscale x 1 x i64> [ %a, %if.then ], [ %b, %if.else ]
%d = call i64 @llvm.riscv.vmv.x.s.nxv1i64(<vscale x 1 x i64> %c)
ret i64 %d
}
; Function Attrs: nounwind
declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #2
define <vscale x 1 x i64> @vsetvli_add_or_sub(i8 zeroext %cond, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %avl) #0 {
entry:
%vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 3, i64 0)
%tobool = icmp eq i8 %cond, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %vl)
br label %if.end
if.else: ; preds = %entry
%c = call <vscale x 1 x i64> @llvm.riscv.vsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %vl)
br label %if.end
if.end: ; preds = %if.else, %if.then
%d = phi <vscale x 1 x i64> [ %b, %if.then ], [ %c, %if.else ]
ret <vscale x 1 x i64> %d
}
; Function Attrs: nounwind readnone
declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
; Function Attrs: nounwind readnone
declare <vscale x 1 x i64> @llvm.riscv.vsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
; Function Attrs: nounwind readonly
declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* nocapture, i64) #3
; Function Attrs: nounwind readonly
declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* nocapture, i64) #3
; Function Attrs: nounwind writeonly
declare void @llvm.riscv.vse.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>* nocapture, i64) #4
; Function Attrs: nounwind readnone
declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32>, i64) #1
; Function Attrs: nounwind readnone
declare <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32>, i64) #1
attributes #0 = { "target-features"="+experimental-v" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
attributes #3 = { nounwind readonly }
attributes #4 = { nounwind writeonly }
...
---
name: load_add_or_sub
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: vr }
- { id: 1, class: vr }
- { id: 2, class: vr }
- { id: 3, class: vr }
- { id: 4, class: gpr }
- { id: 5, class: gpr }
- { id: 6, class: vr }
- { id: 7, class: gpr }
- { id: 8, class: gpr }
liveins:
- { reg: '$x10', virtual-reg: '%4' }
- { reg: '$x11', virtual-reg: '%5' }
- { reg: '$v8', virtual-reg: '%6' }
- { reg: '$x12', virtual-reg: '%7' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: load_add_or_sub
; CHECK: bb.0.entry:
; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000)
; CHECK: liveins: $x10, $x11, $v8, $x12
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x12
; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v8
; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x11
; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10
; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0
; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2
; CHECK: PseudoBR %bb.1
; CHECK: bb.1.if.then:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: PseudoBR %bb.3
; CHECK: bb.2.if.else:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: bb.3.if.end:
; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2
; CHECK: $v8 = COPY [[PHI]]
; CHECK: PseudoRET implicit $v8
bb.0.entry:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
liveins: $x10, $x11, $v8, $x12
%7:gpr = COPY $x12
%6:vr = COPY $v8
%5:gpr = COPY $x11
%4:gpr = COPY $x10
%0:vr = PseudoVLE64_V_M1 %5, %7, 6
%8:gpr = COPY $x0
BEQ %4, %8, %bb.2
PseudoBR %bb.1
bb.1.if.then:
%1:vr = PseudoVADD_VV_M1 %0, %6, %7, 6
PseudoBR %bb.3
bb.2.if.else:
%2:vr = PseudoVSUB_VV_M1 %0, %6, %7, 6
bb.3.if.end:
%3:vr = PHI %1, %bb.1, %2, %bb.2
$v8 = COPY %3
PseudoRET implicit $v8
...
---
name: load_zext_or_sext
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: vr }
- { id: 1, class: vr }
- { id: 2, class: vr }
- { id: 3, class: vr }
- { id: 4, class: gpr }
- { id: 5, class: gpr }
- { id: 6, class: gpr }
- { id: 7, class: gpr }
- { id: 8, class: gpr }
liveins:
- { reg: '$x10', virtual-reg: '%4' }
- { reg: '$x11', virtual-reg: '%5' }
- { reg: '$x12', virtual-reg: '%6' }
- { reg: '$x13', virtual-reg: '%7' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: load_zext_or_sext
; CHECK: bb.0.entry:
; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000)
; CHECK: liveins: $x10, $x11, $x12, $x13
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x13
; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x12
; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x11
; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10
; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 87, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 [[COPY2]], $noreg, 5, implicit $vl, implicit $vtype
; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0
; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2
; CHECK: PseudoBR %bb.1
; CHECK: bb.1.if.then:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: dead %10:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
; CHECK: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: PseudoBR %bb.3
; CHECK: bb.2.if.else:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: dead %11:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
; CHECK: early-clobber %2:vr = PseudoVSEXT_VF2_M1 [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: bb.3.if.end:
; CHECK: [[PHI:%[0-9]+]]:vr = PHI %1, %bb.1, %2, %bb.2
; CHECK: PseudoVSE64_V_M1 [[PHI]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: PseudoRET
bb.0.entry:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
liveins: $x10, $x11, $x12, $x13
%7:gpr = COPY $x13
%6:gpr = COPY $x12
%5:gpr = COPY $x11
%4:gpr = COPY $x10
%0:vr = PseudoVLE32_V_MF2 %5, %7, 5
%8:gpr = COPY $x0
BEQ %4, %8, %bb.2
PseudoBR %bb.1
bb.1.if.then:
early-clobber %1:vr = PseudoVZEXT_VF2_M1 %0, %7, 6
PseudoBR %bb.3
bb.2.if.else:
early-clobber %2:vr = PseudoVSEXT_VF2_M1 %0, %7, 6
bb.3.if.end:
%3:vr = PHI %1, %bb.1, %2, %bb.2
PseudoVSE64_V_M1 %3, %6, %7, 6
PseudoRET
...
---
name: vmv_x_s
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: vr }
- { id: 1, class: vr }
- { id: 2, class: vr }
- { id: 3, class: gpr }
- { id: 4, class: vr }
- { id: 5, class: vr }
- { id: 6, class: gpr }
- { id: 7, class: gpr }
- { id: 8, class: gpr }
liveins:
- { reg: '$x10', virtual-reg: '%3' }
- { reg: '$v8', virtual-reg: '%4' }
- { reg: '$v9', virtual-reg: '%5' }
- { reg: '$x11', virtual-reg: '%6' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: vmv_x_s
; CHECK: bb.0.entry:
; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000)
; CHECK: liveins: $x10, $v8, $v9, $x11
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11
; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9
; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8
; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10
; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0
; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2
; CHECK: PseudoBR %bb.1
; CHECK: bb.1.if.then:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: PseudoBR %bb.3
; CHECK: bb.2.if.else:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: dead %10:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY1]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: bb.3.if.end:
; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2
; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 [[PHI]], 6, implicit $vtype
; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]]
; CHECK: PseudoRET implicit $x10
bb.0.entry:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
liveins: $x10, $v8, $v9, $x11
%6:gpr = COPY $x11
%5:vr = COPY $v9
%4:vr = COPY $v8
%3:gpr = COPY $x10
%7:gpr = COPY $x0
BEQ %3, %7, %bb.2
PseudoBR %bb.1
bb.1.if.then:
%0:vr = PseudoVADD_VV_M1 %4, %5, %6, 6
PseudoBR %bb.3
bb.2.if.else:
%1:vr = PseudoVSUB_VV_M1 %5, %5, %6, 6
bb.3.if.end:
%2:vr = PHI %0, %bb.1, %1, %bb.2
%8:gpr = PseudoVMV_X_S_M1 %2, 6
$x10 = COPY %8
PseudoRET implicit $x10
...
---
name: vsetvli_add_or_sub
alignment: 4
tracksRegLiveness: true
registers:
- { id: 0, class: gpr }
- { id: 1, class: vr }
- { id: 2, class: vr }
- { id: 3, class: vr }
- { id: 4, class: gpr }
- { id: 5, class: vr }
- { id: 6, class: vr }
- { id: 7, class: gpr }
- { id: 8, class: gpr }
liveins:
- { reg: '$x10', virtual-reg: '%4' }
- { reg: '$v8', virtual-reg: '%5' }
- { reg: '$v9', virtual-reg: '%6' }
- { reg: '$x11', virtual-reg: '%7' }
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: vsetvli_add_or_sub
; CHECK: bb.0.entry:
; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000)
; CHECK: liveins: $x10, $v8, $v9, $x11
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11
; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9
; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8
; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10
; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype
; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0
; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2
; CHECK: PseudoBR %bb.1
; CHECK: bb.1.if.then:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: PseudoBR %bb.3
; CHECK: bb.2.if.else:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype
; CHECK: bb.3.if.end:
; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2
; CHECK: $v8 = COPY [[PHI]]
; CHECK: PseudoRET implicit $v8
bb.0.entry:
successors: %bb.2(0x30000000), %bb.1(0x50000000)
liveins: $x10, $v8, $v9, $x11
%7:gpr = COPY $x11
%6:vr = COPY $v9
%5:vr = COPY $v8
%4:gpr = COPY $x10
%0:gpr = PseudoVSETVLI %7, 88, implicit-def dead $vl, implicit-def dead $vtype
%8:gpr = COPY $x0
BEQ %4, %8, %bb.2
PseudoBR %bb.1
bb.1.if.then:
%1:vr = PseudoVADD_VV_M1 %5, %6, %0, 6
PseudoBR %bb.3
bb.2.if.else:
%2:vr = PseudoVSUB_VV_M1 %5, %6, %0, 6
bb.3.if.end:
%3:vr = PHI %1, %bb.1, %2, %bb.2
$v8 = COPY %3
PseudoRET implicit $v8
...