From 1b47a3de48d2ac3ee4420209ab5d191f66849979 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 25 May 2021 16:28:34 -0700 Subject: [PATCH] [RISCV] Enable cross basic block aware vsetvli insertion This patch extends D102737 to allow VL/VTYPE changes to be taken into account before adding an explicit vsetvli. We do this by using a data flow analysis to propagate VL/VTYPE information from predecessors until we've determined a value for every value in the function. We use this information to determine if a vsetvli needs to be inserted before the first vector instruction the block. Differential Revision: https://reviews.llvm.org/D102739 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 301 +- .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll | 108 +- .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll | 58 +- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 2 - .../RISCV/rvv/fixed-vectors-select-fp.ll | 497 --- .../RISCV/rvv/fixed-vectors-vselect.ll | 2670 +++++++---------- .../RISCV/rvv/vsetvli-insert-crossbb.ll | 447 +++ .../RISCV/rvv/vsetvli-insert-crossbb.mir | 415 +++ 8 files changed, 2191 insertions(+), 2307 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index c3ddd57d5caa..94b5d91f304c 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -9,13 +9,17 @@ // This file implements a function pass that inserts VSETVLI instructions where // needed. // -// The pass consists of a single pass over each basic block looking for changes -// in VL/VTYPE usage that requires a vsetvli to be inserted. We assume the -// VL/VTYPE values are unknown from predecessors so the first vector instruction -// will always require a new VSETVLI. +// This pass consists of 3 phases: // -// TODO: Future enhancements to this pass will take into account VL/VTYPE from -// predecessors. +// Phase 1 collects how each basic block affects VL/VTYPE. +// +// Phase 2 uses the information from phase 1 to do a data flow analysis to +// propagate the VL/VTYPE changes through the function. This gives us the +// VL/VTYPE at the start of each basic block. +// +// Phase 3 inserts VSETVLI instructions in each basic block. Information from +// phase 2 is used to prevent inserting a VSETVLI before the first vector +// instruction in the block if possible. // //===----------------------------------------------------------------------===// @@ -23,6 +27,7 @@ #include "RISCVSubtarget.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include using namespace llvm; #define DEBUG_TYPE "riscv-insert-vsetvli" @@ -52,6 +57,12 @@ class VSETVLIInfo { public: VSETVLIInfo() : AVLImm(0) {} + static VSETVLIInfo getUnknown() { + VSETVLIInfo Info; + Info.setUnknown(); + return Info; + } + bool isValid() const { return State != Uninitialized; } void setUnknown() { State = Unknown; } bool isUnknown() const { return State == Unknown; } @@ -148,12 +159,89 @@ public: return getAVLReg() == Other.getAVLReg(); } + + bool operator==(const VSETVLIInfo &Other) const { + // Uninitialized is only equal to another Uninitialized. + if (!isValid()) + return !Other.isValid(); + if (!Other.isValid()) + return !isValid(); + + // Unknown is only equal to another Unknown. + if (isUnknown()) + return Other.isUnknown(); + if (Other.isUnknown()) + return isUnknown(); + + // Otherwise compare the VTYPE and AVL. + return hasSameVTYPE(Other) && hasSameAVL(Other); + } + + bool operator!=(const VSETVLIInfo &Other) const { return !(*this == Other); } + + // Calculate the VSETVLIInfo visible to a block assuming this and Other are + // both predecessors. + VSETVLIInfo intersect(const VSETVLIInfo &Other) const { + // If the new value isn't valid, ignore it. + if (!Other.isValid()) + return *this; + + // If this value isn't valid, this must be the first predecessor, use it. + if (!isValid()) + return Other; + + if (*this == Other) + return *this; + + // If the configurations don't match, assume unknown. + return VSETVLIInfo::getUnknown(); + } + + // Calculate the VSETVLIInfo visible at the end of the block assuming this + // is the predecessor value, and Other is change for this block. + VSETVLIInfo merge(const VSETVLIInfo &Other) const { + assert(isValid() && "Can only merge with a valid VSETVLInfo"); + + // Nothing changed from the predecessor, keep it. + if (!Other.isValid()) + return *this; + + // If the change is compatible with the input, we won't create a VSETVLI + // and should keep the predecessor. + if (isCompatible(Other)) + return *this; + + // Otherwise just use whatever is in this block. + return Other; + } +}; + +struct BlockData { + // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers + // made by this block. Calculated in Phase 1. + VSETVLIInfo Change; + + // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this + // block. Calculated in Phase 2. + VSETVLIInfo Exit; + + // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor + // blocks. Calculated in Phase 2, and used by Phase 3. + VSETVLIInfo Pred; + + // Keeps track of whether the block is already in the queue. + bool InQueue = false; + + BlockData() {} }; class RISCVInsertVSETVLI : public MachineFunctionPass { const TargetInstrInfo *TII; MachineRegisterInfo *MRI; + std::vector BlockInfo; + std::queue WorkList; + public: static char ID; @@ -170,10 +258,13 @@ public: StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } private: + bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo); void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, const VSETVLIInfo &Info); - bool emitVSETVLIs(MachineBasicBlock &MBB); + bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); + void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); + void emitVSETVLIs(MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -276,7 +367,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, // Return a VSETVLIInfo representing the changes made by this VSETVLI or // VSETIVLI instruction. -VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { +static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { VSETVLIInfo NewInfo; if (MI.getOpcode() == RISCV::PseudoVSETVLI) { Register AVLReg = MI.getOperand(1).getReg(); @@ -292,12 +383,111 @@ VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { return NewInfo; } -bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { - bool MadeChange = false; +bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo) { + if (CurInfo.isCompatible(Require)) + return false; - // Assume predecessor state is unknown. + // We didn't find a compatible value. If our AVL is a virtual register, + // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need + // and the last VL/VTYPE we observed is the same, we don't need a + // VSETVLI here. + if (!CurInfo.isUnknown() && Require.hasAVLReg() && + Require.getAVLReg().isVirtual() && Require.hasSameVTYPE(CurInfo)) { + if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { + if (DefMI->getOpcode() == RISCV::PseudoVSETVLI || + DefMI->getOpcode() == RISCV::PseudoVSETIVLI) { + VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); + if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo)) + return false; + } + } + } + + return true; +} + +bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { + bool HadVectorOp = false; + + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + for (const MachineInstr &MI : MBB) { + // If this is an explicit VSETVLI or VSETIVLI, update our state. + if (MI.getOpcode() == RISCV::PseudoVSETVLI || + MI.getOpcode() == RISCV::PseudoVSETIVLI) { + HadVectorOp = true; + BBInfo.Change = getInfoForVSETVLI(MI); + continue; + } + + uint64_t TSFlags = MI.getDesc().TSFlags; + if (RISCVII::hasSEWOp(TSFlags)) { + HadVectorOp = true; + + VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); + + if (!BBInfo.Change.isValid()) { + BBInfo.Change = NewInfo; + } else { + // If this instruction isn't compatible with the previous VL/VTYPE + // we need to insert a VSETVLI. + if (needVSETVLI(NewInfo, BBInfo.Change)) + BBInfo.Change = NewInfo; + } + } + + // If this is something that updates VL/VTYPE that we don't know about, set + // the state to unknown. + if (MI.isCall() || MI.modifiesRegister(RISCV::VL) || + MI.modifiesRegister(RISCV::VTYPE)) { + BBInfo.Change = VSETVLIInfo::getUnknown(); + } + } + + // Initial exit state is whatever change we found in the block. + BBInfo.Exit = BBInfo.Change; + + return HadVectorOp; +} + +void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + + BBInfo.InQueue = false; + + VSETVLIInfo InInfo; + if (MBB.pred_empty()) { + // There are no predecessors, so use the default starting status. + InInfo.setUnknown(); + } else { + for (MachineBasicBlock *P : MBB.predecessors()) + InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); + } + + // If we don't have any valid predecessor value, wait until we do. + if (!InInfo.isValid()) + return; + + BBInfo.Pred = InInfo; + + VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change); + + // If the new exit value matches the old exit value, we don't need to revisit + // any blocks. + if (BBInfo.Exit == TmpStatus) + return; + + BBInfo.Exit = TmpStatus; + + // Add the successors to the work list so we can propagate the changed exit + // status. + for (MachineBasicBlock *S : MBB.successors()) + if (!BlockInfo[S->getNumber()].InQueue) + WorkList.push(S); +} + +void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { VSETVLIInfo CurInfo; - CurInfo.setUnknown(); for (MachineInstr &MI : MBB) { // If this is an explicit VSETVLI or VSETIVLI, update our state. @@ -309,7 +499,6 @@ bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { "Unexpected operands where VL and VTYPE should be"); MI.getOperand(3).setIsDead(false); MI.getOperand(4).setIsDead(false); - MadeChange = true; CurInfo = getInfoForVSETVLI(MI); continue; } @@ -330,47 +519,32 @@ bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, /*isImp*/ true)); - bool NeedVSETVLI = true; - if (CurInfo.isValid() && CurInfo.isCompatible(NewInfo)) - NeedVSETVLI = false; - - // We didn't find a compatible value. If our AVL is a virtual register, - // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need - // and the last VL/VTYPE we observed is the same, we don't need a - // VSETVLI here. - if (NeedVSETVLI && !CurInfo.isUnknown() && NewInfo.hasAVLReg() && - NewInfo.getAVLReg().isVirtual() && NewInfo.hasSameVTYPE(CurInfo)) { - if (MachineInstr *DefMI = MRI->getVRegDef(NewInfo.getAVLReg())) { - if (DefMI->getOpcode() == RISCV::PseudoVSETVLI || - DefMI->getOpcode() == RISCV::PseudoVSETIVLI) { - VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); - if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo)) - NeedVSETVLI = false; - } + if (!CurInfo.isValid()) { + // We haven't found any vector instructions or VL/VTYPE changes yet, + // use the predecessor information. + assert(BlockInfo[MBB.getNumber()].Pred.isValid() && + "Expected a valid predecessor state."); + if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred)) { + insertVSETVLI(MBB, MI, NewInfo); + CurInfo = NewInfo; + } + } else { + // If this instruction isn't compatible with the previous VL/VTYPE + // we need to insert a VSETVLI. + if (needVSETVLI(NewInfo, CurInfo)) { + insertVSETVLI(MBB, MI, NewInfo); + CurInfo = NewInfo; } } - - // If this instruction isn't compatible with the previous VL/VTYPE - // we need to insert a VSETVLI. - if (NeedVSETVLI) { - insertVSETVLI(MBB, MI, NewInfo); - CurInfo = NewInfo; - } - - // If we find an instruction we at least changed the operands. - MadeChange = true; } + // If this is something updates VL/VTYPE that we don't know about, set // the state to unknown. if (MI.isCall() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) { - VSETVLIInfo NewInfo; - NewInfo.setUnknown(); - CurInfo = NewInfo; + CurInfo = VSETVLIInfo::getUnknown(); } } - - return MadeChange; } bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { @@ -382,12 +556,41 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { TII = ST.getInstrInfo(); MRI = &MF.getRegInfo(); - bool Changed = false; + assert(BlockInfo.empty() && "Expect empty block infos"); + BlockInfo.resize(MF.getNumBlockIDs()); - for (MachineBasicBlock &MBB : MF) - Changed |= emitVSETVLIs(MBB); + bool HaveVectorOp = false; - return Changed; + // Phase 1 - determine how VL/VTYPE are affected by the each block. + for (const MachineBasicBlock &MBB : MF) + HaveVectorOp |= computeVLVTYPEChanges(MBB); + + // If we didn't find any instructions that need VSETVLI, we're done. + if (HaveVectorOp) { + // Phase 2 - determine the exit VL/VTYPE from each block. We add all + // blocks to the list here, but will also add any that need to be revisited + // during Phase 2 processing. + for (const MachineBasicBlock &MBB : MF) { + WorkList.push(&MBB); + BlockInfo[MBB.getNumber()].InQueue = true; + } + while (!WorkList.empty()) { + const MachineBasicBlock &MBB = *WorkList.front(); + WorkList.pop(); + computeIncomingVLVTYPE(MBB); + } + + // Phase 3 - add any vsetvli instructions needed in the block. Use the + // Phase 2 information to avoid adding vsetvlis before the first vector + // instruction in the block if the VL/VTYPE is satisfied by its + // predecessors. + for (MachineBasicBlock &MBB : MF) + emitVSETVLIs(MBB); + } + + BlockInfo.clear(); + + return HaveVectorOp; } /// Returns an instance of the Insert VSETVLI pass. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll index 89efeb392c4a..7f22289350e4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -3667,11 +3667,10 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a2, 4112 -; LMULMAX2-RV32-NEXT: addi a2, a2, 257 +; LMULMAX2-RV32-NEXT: lui a1, 4112 +; LMULMAX2-RV32-NEXT: addi a2, a1, 257 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -3723,14 +3722,12 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB3_3: -; LMULMAX2-RV32-NEXT: sw a5, 0(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: sw a5, 0(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -3758,8 +3755,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX2-RV32-NEXT: addi a1, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB3_6 ; LMULMAX2-RV32-NEXT: .LBB3_5: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -3902,11 +3899,10 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX1-RV32-NEXT: addi a3, a1, 819 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a7, a1, -241 -; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: addi a2, a2, 257 +; LMULMAX1-RV32-NEXT: lui a1, 4112 +; LMULMAX1-RV32-NEXT: addi a2, a1, 257 ; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 @@ -3958,14 +3954,12 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX1-RV32-NEXT: mul a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a5, a1, 24 ; LMULMAX1-RV32-NEXT: .LBB3_3: -; LMULMAX1-RV32-NEXT: sw a5, 0(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: sw a5, 0(sp) +; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 @@ -3993,8 +3987,8 @@ define void @ctlz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB3_6 ; LMULMAX1-RV32-NEXT: .LBB3_5: -; LMULMAX1-RV32-NEXT: srli a1, a5, 1 -; LMULMAX1-RV32-NEXT: or a1, a5, a1 +; LMULMAX1-RV32-NEXT: srli a5, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 4 @@ -11124,11 +11118,10 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a2, 4112 -; LMULMAX2-RV32-NEXT: addi a2, a2, 257 +; LMULMAX2-RV32-NEXT: lui a1, 4112 +; LMULMAX2-RV32-NEXT: addi a2, a1, 257 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -11180,14 +11173,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB7_3: -; LMULMAX2-RV32-NEXT: sw a5, 0(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 3 ; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30 +; LMULMAX2-RV32-NEXT: sw a5, 0(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -11215,8 +11206,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_6 ; LMULMAX2-RV32-NEXT: .LBB7_5: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11239,14 +11230,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB7_6: -; LMULMAX2-RV32-NEXT: sw a5, 24(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2 ; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30 +; LMULMAX2-RV32-NEXT: sw a5, 24(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX2-RV32-NEXT: # %bb.7: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -11274,8 +11263,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_9 ; LMULMAX2-RV32-NEXT: .LBB7_8: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11298,14 +11287,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB7_9: -; LMULMAX2-RV32-NEXT: sw a5, 16(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 +; LMULMAX2-RV32-NEXT: sw a5, 16(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_11 ; LMULMAX2-RV32-NEXT: # %bb.10: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -11333,8 +11320,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: addi a1, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_12 ; LMULMAX2-RV32-NEXT: .LBB7_11: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11552,11 +11539,10 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: addi a4, a2, 819 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi t0, a2, -241 -; LMULMAX1-RV32-NEXT: lui a3, 4112 -; LMULMAX1-RV32-NEXT: addi a3, a3, 257 +; LMULMAX1-RV32-NEXT: lui a2, 4112 +; LMULMAX1-RV32-NEXT: addi a3, a2, 257 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 @@ -11608,14 +11594,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: mul a1, a1, a3 ; LMULMAX1-RV32-NEXT: srli a1, a1, 24 ; LMULMAX1-RV32-NEXT: .LBB7_3: -; LMULMAX1-RV32-NEXT: sw a1, 16(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5 +; LMULMAX1-RV32-NEXT: vmv.x.s a2, v27 +; LMULMAX1-RV32-NEXT: sw a1, 16(sp) +; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 @@ -11643,8 +11627,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB7_6 ; LMULMAX1-RV32-NEXT: .LBB7_5: -; LMULMAX1-RV32-NEXT: srli a2, a1, 1 -; LMULMAX1-RV32-NEXT: or a1, a1, a2 +; LMULMAX1-RV32-NEXT: srli a1, a2, 1 +; LMULMAX1-RV32-NEXT: or a1, a2, a1 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 @@ -11669,13 +11653,11 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: .LBB7_6: ; LMULMAX1-RV32-NEXT: sw a1, 24(sp) ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) -; LMULMAX1-RV32-NEXT: sw zero, 4(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: sw zero, 4(sp) ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX1-RV32-NEXT: # %bb.7: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 @@ -11727,14 +11709,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: mul a1, a1, a3 ; LMULMAX1-RV32-NEXT: srli a1, a1, 24 ; LMULMAX1-RV32-NEXT: .LBB7_9: -; LMULMAX1-RV32-NEXT: sw a1, 0(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11 +; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 +; LMULMAX1-RV32-NEXT: sw a1, 0(sp) +; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_11 ; LMULMAX1-RV32-NEXT: # %bb.10: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 @@ -11762,8 +11742,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB7_12 ; LMULMAX1-RV32-NEXT: .LBB7_11: -; LMULMAX1-RV32-NEXT: srli a2, a1, 1 -; LMULMAX1-RV32-NEXT: or a1, a1, a2 +; LMULMAX1-RV32-NEXT: srli a1, a2, 1 +; LMULMAX1-RV32-NEXT: or a1, a2, a1 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index 85ad0aee92db..02788c91bc45 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -2592,7 +2592,6 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vsrl.vx v25, v25, a6 ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 @@ -2774,7 +2773,6 @@ define void @cttz_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25 ; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: addi a5, a1, -1 @@ -7655,9 +7653,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a2, 4112 +; LMULMAX2-RV32-NEXT: lui a1, 4112 ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX2-RV32-NEXT: addi a2, a2, 257 +; LMULMAX2-RV32-NEXT: addi a2, a1, 257 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2 ; LMULMAX2-RV32-NEXT: # %bb.1: ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu @@ -7703,7 +7701,6 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 @@ -7740,13 +7737,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB7_6: -; LMULMAX2-RV32-NEXT: sw a5, 24(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 +; LMULMAX2-RV32-NEXT: sw a5, 24(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX2-RV32-NEXT: # %bb.7: -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 @@ -7767,9 +7762,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_9 ; LMULMAX2-RV32-NEXT: .LBB7_8: -; LMULMAX2-RV32-NEXT: addi a1, a5, -1 -; LMULMAX2-RV32-NEXT: not a5, a5 -; LMULMAX2-RV32-NEXT: and a1, a5, a1 +; LMULMAX2-RV32-NEXT: addi a5, a1, -1 +; LMULMAX2-RV32-NEXT: not a1, a1 +; LMULMAX2-RV32-NEXT: and a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -7783,13 +7778,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB7_9: -; LMULMAX2-RV32-NEXT: sw a5, 16(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: sw a5, 16(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_11 ; LMULMAX2-RV32-NEXT: # %bb.10: -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vsrl.vx v26, v26, a6 ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 @@ -7810,9 +7803,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV32-NEXT: addi a1, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_12 ; LMULMAX2-RV32-NEXT: .LBB7_11: -; LMULMAX2-RV32-NEXT: addi a1, a5, -1 -; LMULMAX2-RV32-NEXT: not a5, a5 -; LMULMAX2-RV32-NEXT: and a1, a5, a1 +; LMULMAX2-RV32-NEXT: addi a5, a1, -1 +; LMULMAX2-RV32-NEXT: not a1, a1 +; LMULMAX2-RV32-NEXT: and a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a4, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a4 @@ -7978,9 +7971,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: addi a4, a1, 819 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi t0, a1, -241 -; LMULMAX1-RV32-NEXT: lui a3, 4112 +; LMULMAX1-RV32-NEXT: lui a2, 4112 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: addi a3, a3, 257 +; LMULMAX1-RV32-NEXT: addi a3, a2, 257 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2 ; LMULMAX1-RV32-NEXT: # %bb.1: ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu @@ -8026,7 +8019,6 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v26, a6 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: addi a2, a1, -1 @@ -8065,12 +8057,10 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: .LBB7_6: ; LMULMAX1-RV32-NEXT: sw a1, 24(sp) ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) -; LMULMAX1-RV32-NEXT: sw zero, 4(sp) -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: sw zero, 4(sp) ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX1-RV32-NEXT: # %bb.7: -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: addi a2, a1, -1 @@ -8107,13 +8097,11 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: mul a1, a1, a3 ; LMULMAX1-RV32-NEXT: srli a1, a1, 24 ; LMULMAX1-RV32-NEXT: .LBB7_9: -; LMULMAX1-RV32-NEXT: sw a1, 0(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11 +; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25 +; LMULMAX1-RV32-NEXT: sw a1, 0(sp) +; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_11 ; LMULMAX1-RV32-NEXT: # %bb.10: -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: addi a2, a1, -1 @@ -8134,9 +8122,9 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB7_12 ; LMULMAX1-RV32-NEXT: .LBB7_11: -; LMULMAX1-RV32-NEXT: addi a2, a1, -1 -; LMULMAX1-RV32-NEXT: not a1, a1 -; LMULMAX1-RV32-NEXT: and a1, a1, a2 +; LMULMAX1-RV32-NEXT: addi a1, a2, -1 +; LMULMAX1-RV32-NEXT: not a2, a2 +; LMULMAX1-RV32-NEXT: and a1, a2, a1 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index e24e3b554ba9..9924c82f24c6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -3959,7 +3959,6 @@ define void @extract_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-NEXT: vsetivli a2, 4, e64,m2,ta,mu ; LMULMAX2-NEXT: vle64.v v26, (a0) ; LMULMAX2-NEXT: vle64.v v28, (a1) -; LMULMAX2-NEXT: vsetivli a1, 4, e64,m2,ta,mu ; LMULMAX2-NEXT: vadd.vv v26, v26, v28 ; LMULMAX2-NEXT: vse64.v v26, (a0) ; LMULMAX2-NEXT: ret @@ -3973,7 +3972,6 @@ define void @extract_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-NEXT: vle64.v v27, (a1) ; LMULMAX1-NEXT: addi a1, a1, 16 ; LMULMAX1-NEXT: vle64.v v28, (a1) -; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vadd.vv v26, v26, v28 ; LMULMAX1-NEXT: vadd.vv v25, v25, v27 ; LMULMAX1-NEXT: vse64.v v25, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll index e494151ce693..381b2373a236 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll @@ -47,14 +47,11 @@ define <2 x half> @selectcc_v2f16(half %a, half %b, <2 x half> %c, <2 x half> %d ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: bnez a0, .LBB1_5 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v9 ; CHECK-NEXT: j .LBB1_6 ; CHECK-NEXT: .LBB1_5: -; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: .LBB1_6: -; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 ; CHECK-NEXT: ret @@ -91,22 +88,18 @@ define <4 x half> @select_v4f16(i1 zeroext %c, <4 x half> %a, <4 x half> %b) { ; CHECK-NEXT: fsh ft0, 14(sp) ; CHECK-NEXT: bnez a0, .LBB2_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB2_8 ; CHECK-NEXT: .LBB2_7: -; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB2_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB2_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB2_11 ; CHECK-NEXT: .LBB2_10: -; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB2_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -149,22 +142,18 @@ define <4 x half> @selectcc_v4f16(half %a, half %b, <4 x half> %c, <4 x half> %d ; CHECK-NEXT: fsh ft0, 14(sp) ; CHECK-NEXT: bnez a0, .LBB3_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB3_8 ; CHECK-NEXT: .LBB3_7: -; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB3_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB3_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB3_11 ; CHECK-NEXT: .LBB3_10: -; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB3_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -207,66 +196,54 @@ define <8 x half> @select_v8f16(i1 zeroext %c, <8 x half> %a, <8 x half> %b) { ; CHECK-NEXT: fsh ft0, 14(sp) ; CHECK-NEXT: bnez a0, .LBB4_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 6 ; CHECK-NEXT: j .LBB4_8 ; CHECK-NEXT: .LBB4_7: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 6 ; CHECK-NEXT: .LBB4_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB4_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 5 ; CHECK-NEXT: j .LBB4_11 ; CHECK-NEXT: .LBB4_10: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 5 ; CHECK-NEXT: .LBB4_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 10(sp) ; CHECK-NEXT: bnez a0, .LBB4_13 ; CHECK-NEXT: # %bb.12: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 4 ; CHECK-NEXT: j .LBB4_14 ; CHECK-NEXT: .LBB4_13: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 4 ; CHECK-NEXT: .LBB4_14: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 8(sp) ; CHECK-NEXT: bnez a0, .LBB4_16 ; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 ; CHECK-NEXT: j .LBB4_17 ; CHECK-NEXT: .LBB4_16: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: .LBB4_17: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 6(sp) ; CHECK-NEXT: bnez a0, .LBB4_19 ; CHECK-NEXT: # %bb.18: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB4_20 ; CHECK-NEXT: .LBB4_19: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB4_20: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 4(sp) ; CHECK-NEXT: bnez a0, .LBB4_22 ; CHECK-NEXT: # %bb.21: -; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB4_23 ; CHECK-NEXT: .LBB4_22: -; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB4_23: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -308,66 +285,54 @@ define <8 x half> @selectcc_v8f16(half %a, half %b, <8 x half> %c, <8 x half> %d ; CHECK-NEXT: fsh ft0, 14(sp) ; CHECK-NEXT: bnez a0, .LBB5_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 6 ; CHECK-NEXT: j .LBB5_8 ; CHECK-NEXT: .LBB5_7: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 6 ; CHECK-NEXT: .LBB5_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB5_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 5 ; CHECK-NEXT: j .LBB5_11 ; CHECK-NEXT: .LBB5_10: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 5 ; CHECK-NEXT: .LBB5_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 10(sp) ; CHECK-NEXT: bnez a0, .LBB5_13 ; CHECK-NEXT: # %bb.12: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 4 ; CHECK-NEXT: j .LBB5_14 ; CHECK-NEXT: .LBB5_13: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 4 ; CHECK-NEXT: .LBB5_14: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 8(sp) ; CHECK-NEXT: bnez a0, .LBB5_16 ; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 ; CHECK-NEXT: j .LBB5_17 ; CHECK-NEXT: .LBB5_16: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: .LBB5_17: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 6(sp) ; CHECK-NEXT: bnez a0, .LBB5_19 ; CHECK-NEXT: # %bb.18: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB5_20 ; CHECK-NEXT: .LBB5_19: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB5_20: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 4(sp) ; CHECK-NEXT: bnez a0, .LBB5_22 ; CHECK-NEXT: # %bb.21: -; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB5_23 ; CHECK-NEXT: .LBB5_22: -; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB5_23: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -416,154 +381,126 @@ define <16 x half> @select_v16f16(i1 zeroext %c, <16 x half> %a, <16 x half> %b) ; RV32-NEXT: fsh ft0, 30(sp) ; RV32-NEXT: bnez a0, .LBB6_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 14 ; RV32-NEXT: j .LBB6_8 ; RV32-NEXT: .LBB6_7: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 14 ; RV32-NEXT: .LBB6_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB6_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 13 ; RV32-NEXT: j .LBB6_11 ; RV32-NEXT: .LBB6_10: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 13 ; RV32-NEXT: .LBB6_11: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 26(sp) ; RV32-NEXT: bnez a0, .LBB6_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 12 ; RV32-NEXT: j .LBB6_14 ; RV32-NEXT: .LBB6_13: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 12 ; RV32-NEXT: .LBB6_14: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB6_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 11 ; RV32-NEXT: j .LBB6_17 ; RV32-NEXT: .LBB6_16: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 11 ; RV32-NEXT: .LBB6_17: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 22(sp) ; RV32-NEXT: bnez a0, .LBB6_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 10 ; RV32-NEXT: j .LBB6_20 ; RV32-NEXT: .LBB6_19: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 10 ; RV32-NEXT: .LBB6_20: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB6_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 9 ; RV32-NEXT: j .LBB6_23 ; RV32-NEXT: .LBB6_22: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 9 ; RV32-NEXT: .LBB6_23: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 18(sp) ; RV32-NEXT: bnez a0, .LBB6_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 8 ; RV32-NEXT: j .LBB6_26 ; RV32-NEXT: .LBB6_25: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 8 ; RV32-NEXT: .LBB6_26: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB6_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 7 ; RV32-NEXT: j .LBB6_29 ; RV32-NEXT: .LBB6_28: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 7 ; RV32-NEXT: .LBB6_29: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 14(sp) ; RV32-NEXT: bnez a0, .LBB6_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 ; RV32-NEXT: j .LBB6_32 ; RV32-NEXT: .LBB6_31: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 ; RV32-NEXT: .LBB6_32: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB6_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 ; RV32-NEXT: j .LBB6_35 ; RV32-NEXT: .LBB6_34: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 ; RV32-NEXT: .LBB6_35: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 10(sp) ; RV32-NEXT: bnez a0, .LBB6_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 ; RV32-NEXT: j .LBB6_38 ; RV32-NEXT: .LBB6_37: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 ; RV32-NEXT: .LBB6_38: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB6_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 ; RV32-NEXT: j .LBB6_41 ; RV32-NEXT: .LBB6_40: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 ; RV32-NEXT: .LBB6_41: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 6(sp) ; RV32-NEXT: bnez a0, .LBB6_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB6_44 ; RV32-NEXT: .LBB6_43: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB6_44: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 4(sp) ; RV32-NEXT: bnez a0, .LBB6_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB6_47 ; RV32-NEXT: .LBB6_46: -; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB6_47: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -610,154 +547,126 @@ define <16 x half> @select_v16f16(i1 zeroext %c, <16 x half> %a, <16 x half> %b) ; RV64-NEXT: fsh ft0, 30(sp) ; RV64-NEXT: bnez a0, .LBB6_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 14 ; RV64-NEXT: j .LBB6_8 ; RV64-NEXT: .LBB6_7: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 14 ; RV64-NEXT: .LBB6_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB6_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 13 ; RV64-NEXT: j .LBB6_11 ; RV64-NEXT: .LBB6_10: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 13 ; RV64-NEXT: .LBB6_11: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 26(sp) ; RV64-NEXT: bnez a0, .LBB6_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 12 ; RV64-NEXT: j .LBB6_14 ; RV64-NEXT: .LBB6_13: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 12 ; RV64-NEXT: .LBB6_14: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB6_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 11 ; RV64-NEXT: j .LBB6_17 ; RV64-NEXT: .LBB6_16: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 11 ; RV64-NEXT: .LBB6_17: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 22(sp) ; RV64-NEXT: bnez a0, .LBB6_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 10 ; RV64-NEXT: j .LBB6_20 ; RV64-NEXT: .LBB6_19: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 10 ; RV64-NEXT: .LBB6_20: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB6_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 9 ; RV64-NEXT: j .LBB6_23 ; RV64-NEXT: .LBB6_22: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 9 ; RV64-NEXT: .LBB6_23: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 18(sp) ; RV64-NEXT: bnez a0, .LBB6_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 8 ; RV64-NEXT: j .LBB6_26 ; RV64-NEXT: .LBB6_25: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 8 ; RV64-NEXT: .LBB6_26: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB6_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 7 ; RV64-NEXT: j .LBB6_29 ; RV64-NEXT: .LBB6_28: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 7 ; RV64-NEXT: .LBB6_29: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 14(sp) ; RV64-NEXT: bnez a0, .LBB6_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 ; RV64-NEXT: j .LBB6_32 ; RV64-NEXT: .LBB6_31: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 ; RV64-NEXT: .LBB6_32: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB6_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 ; RV64-NEXT: j .LBB6_35 ; RV64-NEXT: .LBB6_34: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 ; RV64-NEXT: .LBB6_35: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 10(sp) ; RV64-NEXT: bnez a0, .LBB6_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 ; RV64-NEXT: j .LBB6_38 ; RV64-NEXT: .LBB6_37: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 ; RV64-NEXT: .LBB6_38: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB6_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 ; RV64-NEXT: j .LBB6_41 ; RV64-NEXT: .LBB6_40: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 ; RV64-NEXT: .LBB6_41: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 6(sp) ; RV64-NEXT: bnez a0, .LBB6_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB6_44 ; RV64-NEXT: .LBB6_43: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB6_44: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 4(sp) ; RV64-NEXT: bnez a0, .LBB6_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB6_47 ; RV64-NEXT: .LBB6_46: -; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB6_47: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -809,154 +718,126 @@ define <16 x half> @selectcc_v16f16(half %a, half %b, <16 x half> %c, <16 x half ; RV32-NEXT: fsh ft0, 30(sp) ; RV32-NEXT: bnez a0, .LBB7_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 14 ; RV32-NEXT: j .LBB7_8 ; RV32-NEXT: .LBB7_7: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 14 ; RV32-NEXT: .LBB7_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB7_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 13 ; RV32-NEXT: j .LBB7_11 ; RV32-NEXT: .LBB7_10: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 13 ; RV32-NEXT: .LBB7_11: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 26(sp) ; RV32-NEXT: bnez a0, .LBB7_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 12 ; RV32-NEXT: j .LBB7_14 ; RV32-NEXT: .LBB7_13: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 12 ; RV32-NEXT: .LBB7_14: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB7_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 11 ; RV32-NEXT: j .LBB7_17 ; RV32-NEXT: .LBB7_16: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 11 ; RV32-NEXT: .LBB7_17: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 22(sp) ; RV32-NEXT: bnez a0, .LBB7_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 10 ; RV32-NEXT: j .LBB7_20 ; RV32-NEXT: .LBB7_19: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 10 ; RV32-NEXT: .LBB7_20: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB7_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 9 ; RV32-NEXT: j .LBB7_23 ; RV32-NEXT: .LBB7_22: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 9 ; RV32-NEXT: .LBB7_23: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 18(sp) ; RV32-NEXT: bnez a0, .LBB7_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 8 ; RV32-NEXT: j .LBB7_26 ; RV32-NEXT: .LBB7_25: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 8 ; RV32-NEXT: .LBB7_26: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB7_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 7 ; RV32-NEXT: j .LBB7_29 ; RV32-NEXT: .LBB7_28: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 7 ; RV32-NEXT: .LBB7_29: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 14(sp) ; RV32-NEXT: bnez a0, .LBB7_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 ; RV32-NEXT: j .LBB7_32 ; RV32-NEXT: .LBB7_31: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 ; RV32-NEXT: .LBB7_32: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB7_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 ; RV32-NEXT: j .LBB7_35 ; RV32-NEXT: .LBB7_34: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 ; RV32-NEXT: .LBB7_35: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 10(sp) ; RV32-NEXT: bnez a0, .LBB7_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 ; RV32-NEXT: j .LBB7_38 ; RV32-NEXT: .LBB7_37: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 ; RV32-NEXT: .LBB7_38: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB7_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 ; RV32-NEXT: j .LBB7_41 ; RV32-NEXT: .LBB7_40: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 ; RV32-NEXT: .LBB7_41: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 6(sp) ; RV32-NEXT: bnez a0, .LBB7_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB7_44 ; RV32-NEXT: .LBB7_43: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB7_44: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 4(sp) ; RV32-NEXT: bnez a0, .LBB7_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB7_47 ; RV32-NEXT: .LBB7_46: -; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB7_47: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -1004,154 +885,126 @@ define <16 x half> @selectcc_v16f16(half %a, half %b, <16 x half> %c, <16 x half ; RV64-NEXT: fsh ft0, 30(sp) ; RV64-NEXT: bnez a0, .LBB7_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 14 ; RV64-NEXT: j .LBB7_8 ; RV64-NEXT: .LBB7_7: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 14 ; RV64-NEXT: .LBB7_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB7_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 13 ; RV64-NEXT: j .LBB7_11 ; RV64-NEXT: .LBB7_10: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 13 ; RV64-NEXT: .LBB7_11: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 26(sp) ; RV64-NEXT: bnez a0, .LBB7_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 12 ; RV64-NEXT: j .LBB7_14 ; RV64-NEXT: .LBB7_13: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 12 ; RV64-NEXT: .LBB7_14: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB7_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 11 ; RV64-NEXT: j .LBB7_17 ; RV64-NEXT: .LBB7_16: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 11 ; RV64-NEXT: .LBB7_17: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 22(sp) ; RV64-NEXT: bnez a0, .LBB7_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 10 ; RV64-NEXT: j .LBB7_20 ; RV64-NEXT: .LBB7_19: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 10 ; RV64-NEXT: .LBB7_20: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB7_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 9 ; RV64-NEXT: j .LBB7_23 ; RV64-NEXT: .LBB7_22: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 9 ; RV64-NEXT: .LBB7_23: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 18(sp) ; RV64-NEXT: bnez a0, .LBB7_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 8 ; RV64-NEXT: j .LBB7_26 ; RV64-NEXT: .LBB7_25: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 8 ; RV64-NEXT: .LBB7_26: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB7_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 7 ; RV64-NEXT: j .LBB7_29 ; RV64-NEXT: .LBB7_28: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 7 ; RV64-NEXT: .LBB7_29: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 14(sp) ; RV64-NEXT: bnez a0, .LBB7_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 ; RV64-NEXT: j .LBB7_32 ; RV64-NEXT: .LBB7_31: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 ; RV64-NEXT: .LBB7_32: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB7_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 ; RV64-NEXT: j .LBB7_35 ; RV64-NEXT: .LBB7_34: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 ; RV64-NEXT: .LBB7_35: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 10(sp) ; RV64-NEXT: bnez a0, .LBB7_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 ; RV64-NEXT: j .LBB7_38 ; RV64-NEXT: .LBB7_37: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 ; RV64-NEXT: .LBB7_38: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB7_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 ; RV64-NEXT: j .LBB7_41 ; RV64-NEXT: .LBB7_40: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 ; RV64-NEXT: .LBB7_41: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 6(sp) ; RV64-NEXT: bnez a0, .LBB7_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB7_44 ; RV64-NEXT: .LBB7_43: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB7_44: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 4(sp) ; RV64-NEXT: bnez a0, .LBB7_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB7_47 ; RV64-NEXT: .LBB7_46: -; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB7_47: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -1211,14 +1064,11 @@ define <2 x float> @selectcc_v2f32(float %a, float %b, <2 x float> %c, <2 x floa ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: bnez a0, .LBB9_5 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v9 ; CHECK-NEXT: j .LBB9_6 ; CHECK-NEXT: .LBB9_5: -; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: .LBB9_6: -; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 ; CHECK-NEXT: ret @@ -1255,22 +1105,18 @@ define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %a, <4 x float> %b) ; CHECK-NEXT: fsw ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB10_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB10_8 ; CHECK-NEXT: .LBB10_7: -; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB10_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsw ft0, 8(sp) ; CHECK-NEXT: bnez a0, .LBB10_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB10_11 ; CHECK-NEXT: .LBB10_10: -; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB10_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -1312,22 +1158,18 @@ define <4 x float> @selectcc_v4f32(float %a, float %b, <4 x float> %c, <4 x floa ; CHECK-NEXT: fsw ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB11_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB11_8 ; CHECK-NEXT: .LBB11_7: -; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB11_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsw ft0, 8(sp) ; CHECK-NEXT: bnez a0, .LBB11_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB11_11 ; CHECK-NEXT: .LBB11_10: -; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB11_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -1376,66 +1218,54 @@ define <8 x float> @select_v8f32(i1 zeroext %c, <8 x float> %a, <8 x float> %b) ; RV32-NEXT: fsw ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB12_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 ; RV32-NEXT: j .LBB12_8 ; RV32-NEXT: .LBB12_7: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 ; RV32-NEXT: .LBB12_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB12_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 ; RV32-NEXT: j .LBB12_11 ; RV32-NEXT: .LBB12_10: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 ; RV32-NEXT: .LBB12_11: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB12_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 ; RV32-NEXT: j .LBB12_14 ; RV32-NEXT: .LBB12_13: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 ; RV32-NEXT: .LBB12_14: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB12_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 ; RV32-NEXT: j .LBB12_17 ; RV32-NEXT: .LBB12_16: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 ; RV32-NEXT: .LBB12_17: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB12_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB12_20 ; RV32-NEXT: .LBB12_19: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB12_20: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB12_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB12_23 ; RV32-NEXT: .LBB12_22: -; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB12_23: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -1482,66 +1312,54 @@ define <8 x float> @select_v8f32(i1 zeroext %c, <8 x float> %a, <8 x float> %b) ; RV64-NEXT: fsw ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB12_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 ; RV64-NEXT: j .LBB12_8 ; RV64-NEXT: .LBB12_7: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 ; RV64-NEXT: .LBB12_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB12_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 ; RV64-NEXT: j .LBB12_11 ; RV64-NEXT: .LBB12_10: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 ; RV64-NEXT: .LBB12_11: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB12_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 ; RV64-NEXT: j .LBB12_14 ; RV64-NEXT: .LBB12_13: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 ; RV64-NEXT: .LBB12_14: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB12_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 ; RV64-NEXT: j .LBB12_17 ; RV64-NEXT: .LBB12_16: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 ; RV64-NEXT: .LBB12_17: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB12_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB12_20 ; RV64-NEXT: .LBB12_19: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB12_20: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB12_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB12_23 ; RV64-NEXT: .LBB12_22: -; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB12_23: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -1593,66 +1411,54 @@ define <8 x float> @selectcc_v8f32(float %a, float %b, <8 x float> %c, <8 x floa ; RV32-NEXT: fsw ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB13_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 ; RV32-NEXT: j .LBB13_8 ; RV32-NEXT: .LBB13_7: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 ; RV32-NEXT: .LBB13_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB13_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 ; RV32-NEXT: j .LBB13_11 ; RV32-NEXT: .LBB13_10: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 ; RV32-NEXT: .LBB13_11: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB13_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 ; RV32-NEXT: j .LBB13_14 ; RV32-NEXT: .LBB13_13: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 ; RV32-NEXT: .LBB13_14: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB13_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 ; RV32-NEXT: j .LBB13_17 ; RV32-NEXT: .LBB13_16: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 ; RV32-NEXT: .LBB13_17: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB13_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB13_20 ; RV32-NEXT: .LBB13_19: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB13_20: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB13_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB13_23 ; RV32-NEXT: .LBB13_22: -; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB13_23: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -1700,66 +1506,54 @@ define <8 x float> @selectcc_v8f32(float %a, float %b, <8 x float> %c, <8 x floa ; RV64-NEXT: fsw ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB13_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 ; RV64-NEXT: j .LBB13_8 ; RV64-NEXT: .LBB13_7: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 ; RV64-NEXT: .LBB13_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB13_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 ; RV64-NEXT: j .LBB13_11 ; RV64-NEXT: .LBB13_10: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 ; RV64-NEXT: .LBB13_11: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB13_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 ; RV64-NEXT: j .LBB13_14 ; RV64-NEXT: .LBB13_13: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 ; RV64-NEXT: .LBB13_14: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB13_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 ; RV64-NEXT: j .LBB13_17 ; RV64-NEXT: .LBB13_16: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 ; RV64-NEXT: .LBB13_17: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB13_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB13_20 ; RV64-NEXT: .LBB13_19: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB13_20: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB13_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB13_23 ; RV64-NEXT: .LBB13_22: -; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB13_23: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -1811,154 +1605,126 @@ define <16 x float> @select_v16f32(i1 zeroext %c, <16 x float> %a, <16 x float> ; RV32-NEXT: fsw ft0, 60(sp) ; RV32-NEXT: bnez a0, .LBB14_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 14 ; RV32-NEXT: j .LBB14_8 ; RV32-NEXT: .LBB14_7: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 14 ; RV32-NEXT: .LBB14_8: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB14_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 13 ; RV32-NEXT: j .LBB14_11 ; RV32-NEXT: .LBB14_10: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 13 ; RV32-NEXT: .LBB14_11: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 52(sp) ; RV32-NEXT: bnez a0, .LBB14_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 12 ; RV32-NEXT: j .LBB14_14 ; RV32-NEXT: .LBB14_13: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 12 ; RV32-NEXT: .LBB14_14: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB14_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 11 ; RV32-NEXT: j .LBB14_17 ; RV32-NEXT: .LBB14_16: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 11 ; RV32-NEXT: .LBB14_17: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 44(sp) ; RV32-NEXT: bnez a0, .LBB14_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 10 ; RV32-NEXT: j .LBB14_20 ; RV32-NEXT: .LBB14_19: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 10 ; RV32-NEXT: .LBB14_20: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB14_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 9 ; RV32-NEXT: j .LBB14_23 ; RV32-NEXT: .LBB14_22: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 9 ; RV32-NEXT: .LBB14_23: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 36(sp) ; RV32-NEXT: bnez a0, .LBB14_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 8 ; RV32-NEXT: j .LBB14_26 ; RV32-NEXT: .LBB14_25: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 8 ; RV32-NEXT: .LBB14_26: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB14_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 ; RV32-NEXT: j .LBB14_29 ; RV32-NEXT: .LBB14_28: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 7 ; RV32-NEXT: .LBB14_29: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB14_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 ; RV32-NEXT: j .LBB14_32 ; RV32-NEXT: .LBB14_31: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 ; RV32-NEXT: .LBB14_32: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB14_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 ; RV32-NEXT: j .LBB14_35 ; RV32-NEXT: .LBB14_34: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 ; RV32-NEXT: .LBB14_35: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB14_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 ; RV32-NEXT: j .LBB14_38 ; RV32-NEXT: .LBB14_37: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 ; RV32-NEXT: .LBB14_38: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB14_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 ; RV32-NEXT: j .LBB14_41 ; RV32-NEXT: .LBB14_40: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 ; RV32-NEXT: .LBB14_41: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB14_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 ; RV32-NEXT: j .LBB14_44 ; RV32-NEXT: .LBB14_43: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 2 ; RV32-NEXT: .LBB14_44: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB14_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 1 ; RV32-NEXT: j .LBB14_47 ; RV32-NEXT: .LBB14_46: -; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 ; RV32-NEXT: .LBB14_47: ; RV32-NEXT: vfmv.f.s ft0, v28 @@ -2005,154 +1771,126 @@ define <16 x float> @select_v16f32(i1 zeroext %c, <16 x float> %a, <16 x float> ; RV64-NEXT: fsw ft0, 60(sp) ; RV64-NEXT: bnez a0, .LBB14_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 14 ; RV64-NEXT: j .LBB14_8 ; RV64-NEXT: .LBB14_7: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 14 ; RV64-NEXT: .LBB14_8: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB14_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 13 ; RV64-NEXT: j .LBB14_11 ; RV64-NEXT: .LBB14_10: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 13 ; RV64-NEXT: .LBB14_11: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 52(sp) ; RV64-NEXT: bnez a0, .LBB14_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 12 ; RV64-NEXT: j .LBB14_14 ; RV64-NEXT: .LBB14_13: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 12 ; RV64-NEXT: .LBB14_14: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB14_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 11 ; RV64-NEXT: j .LBB14_17 ; RV64-NEXT: .LBB14_16: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 11 ; RV64-NEXT: .LBB14_17: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 44(sp) ; RV64-NEXT: bnez a0, .LBB14_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 10 ; RV64-NEXT: j .LBB14_20 ; RV64-NEXT: .LBB14_19: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 10 ; RV64-NEXT: .LBB14_20: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB14_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 9 ; RV64-NEXT: j .LBB14_23 ; RV64-NEXT: .LBB14_22: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 9 ; RV64-NEXT: .LBB14_23: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 36(sp) ; RV64-NEXT: bnez a0, .LBB14_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 8 ; RV64-NEXT: j .LBB14_26 ; RV64-NEXT: .LBB14_25: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 8 ; RV64-NEXT: .LBB14_26: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB14_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 ; RV64-NEXT: j .LBB14_29 ; RV64-NEXT: .LBB14_28: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 7 ; RV64-NEXT: .LBB14_29: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB14_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 ; RV64-NEXT: j .LBB14_32 ; RV64-NEXT: .LBB14_31: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 ; RV64-NEXT: .LBB14_32: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB14_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 ; RV64-NEXT: j .LBB14_35 ; RV64-NEXT: .LBB14_34: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 ; RV64-NEXT: .LBB14_35: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB14_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 ; RV64-NEXT: j .LBB14_38 ; RV64-NEXT: .LBB14_37: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 ; RV64-NEXT: .LBB14_38: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB14_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 ; RV64-NEXT: j .LBB14_41 ; RV64-NEXT: .LBB14_40: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 ; RV64-NEXT: .LBB14_41: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB14_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 ; RV64-NEXT: j .LBB14_44 ; RV64-NEXT: .LBB14_43: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 2 ; RV64-NEXT: .LBB14_44: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB14_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 1 ; RV64-NEXT: j .LBB14_47 ; RV64-NEXT: .LBB14_46: -; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 ; RV64-NEXT: .LBB14_47: ; RV64-NEXT: vfmv.f.s ft0, v28 @@ -2204,154 +1942,126 @@ define <16 x float> @selectcc_v16f32(float %a, float %b, <16 x float> %c, <16 x ; RV32-NEXT: fsw ft0, 60(sp) ; RV32-NEXT: bnez a0, .LBB15_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 14 ; RV32-NEXT: j .LBB15_8 ; RV32-NEXT: .LBB15_7: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 14 ; RV32-NEXT: .LBB15_8: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB15_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 13 ; RV32-NEXT: j .LBB15_11 ; RV32-NEXT: .LBB15_10: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 13 ; RV32-NEXT: .LBB15_11: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 52(sp) ; RV32-NEXT: bnez a0, .LBB15_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 12 ; RV32-NEXT: j .LBB15_14 ; RV32-NEXT: .LBB15_13: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 12 ; RV32-NEXT: .LBB15_14: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB15_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 11 ; RV32-NEXT: j .LBB15_17 ; RV32-NEXT: .LBB15_16: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 11 ; RV32-NEXT: .LBB15_17: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 44(sp) ; RV32-NEXT: bnez a0, .LBB15_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 10 ; RV32-NEXT: j .LBB15_20 ; RV32-NEXT: .LBB15_19: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 10 ; RV32-NEXT: .LBB15_20: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB15_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 9 ; RV32-NEXT: j .LBB15_23 ; RV32-NEXT: .LBB15_22: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 9 ; RV32-NEXT: .LBB15_23: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 36(sp) ; RV32-NEXT: bnez a0, .LBB15_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 8 ; RV32-NEXT: j .LBB15_26 ; RV32-NEXT: .LBB15_25: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 8 ; RV32-NEXT: .LBB15_26: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB15_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 ; RV32-NEXT: j .LBB15_29 ; RV32-NEXT: .LBB15_28: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 7 ; RV32-NEXT: .LBB15_29: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB15_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 ; RV32-NEXT: j .LBB15_32 ; RV32-NEXT: .LBB15_31: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 ; RV32-NEXT: .LBB15_32: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB15_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 ; RV32-NEXT: j .LBB15_35 ; RV32-NEXT: .LBB15_34: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 ; RV32-NEXT: .LBB15_35: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB15_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 ; RV32-NEXT: j .LBB15_38 ; RV32-NEXT: .LBB15_37: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 ; RV32-NEXT: .LBB15_38: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB15_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 ; RV32-NEXT: j .LBB15_41 ; RV32-NEXT: .LBB15_40: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 ; RV32-NEXT: .LBB15_41: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB15_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 ; RV32-NEXT: j .LBB15_44 ; RV32-NEXT: .LBB15_43: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 2 ; RV32-NEXT: .LBB15_44: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB15_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 1 ; RV32-NEXT: j .LBB15_47 ; RV32-NEXT: .LBB15_46: -; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 ; RV32-NEXT: .LBB15_47: ; RV32-NEXT: vfmv.f.s ft0, v28 @@ -2399,154 +2109,126 @@ define <16 x float> @selectcc_v16f32(float %a, float %b, <16 x float> %c, <16 x ; RV64-NEXT: fsw ft0, 60(sp) ; RV64-NEXT: bnez a0, .LBB15_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 14 ; RV64-NEXT: j .LBB15_8 ; RV64-NEXT: .LBB15_7: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 14 ; RV64-NEXT: .LBB15_8: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB15_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 13 ; RV64-NEXT: j .LBB15_11 ; RV64-NEXT: .LBB15_10: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 13 ; RV64-NEXT: .LBB15_11: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 52(sp) ; RV64-NEXT: bnez a0, .LBB15_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 12 ; RV64-NEXT: j .LBB15_14 ; RV64-NEXT: .LBB15_13: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 12 ; RV64-NEXT: .LBB15_14: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB15_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 11 ; RV64-NEXT: j .LBB15_17 ; RV64-NEXT: .LBB15_16: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 11 ; RV64-NEXT: .LBB15_17: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 44(sp) ; RV64-NEXT: bnez a0, .LBB15_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 10 ; RV64-NEXT: j .LBB15_20 ; RV64-NEXT: .LBB15_19: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 10 ; RV64-NEXT: .LBB15_20: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB15_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 9 ; RV64-NEXT: j .LBB15_23 ; RV64-NEXT: .LBB15_22: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 9 ; RV64-NEXT: .LBB15_23: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 36(sp) ; RV64-NEXT: bnez a0, .LBB15_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 8 ; RV64-NEXT: j .LBB15_26 ; RV64-NEXT: .LBB15_25: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 8 ; RV64-NEXT: .LBB15_26: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB15_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 ; RV64-NEXT: j .LBB15_29 ; RV64-NEXT: .LBB15_28: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 7 ; RV64-NEXT: .LBB15_29: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB15_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 ; RV64-NEXT: j .LBB15_32 ; RV64-NEXT: .LBB15_31: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 ; RV64-NEXT: .LBB15_32: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB15_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 ; RV64-NEXT: j .LBB15_35 ; RV64-NEXT: .LBB15_34: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 ; RV64-NEXT: .LBB15_35: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB15_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 ; RV64-NEXT: j .LBB15_38 ; RV64-NEXT: .LBB15_37: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 ; RV64-NEXT: .LBB15_38: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB15_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 ; RV64-NEXT: j .LBB15_41 ; RV64-NEXT: .LBB15_40: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 ; RV64-NEXT: .LBB15_41: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB15_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 ; RV64-NEXT: j .LBB15_44 ; RV64-NEXT: .LBB15_43: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 2 ; RV64-NEXT: .LBB15_44: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB15_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 1 ; RV64-NEXT: j .LBB15_47 ; RV64-NEXT: .LBB15_46: -; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 ; RV64-NEXT: .LBB15_47: ; RV64-NEXT: vfmv.f.s ft0, v28 @@ -2606,14 +2288,11 @@ define <2 x double> @selectcc_v2f64(double %a, double %b, <2 x double> %c, <2 x ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: bnez a0, .LBB17_5 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v9 ; CHECK-NEXT: j .LBB17_6 ; CHECK-NEXT: .LBB17_5: -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: .LBB17_6: -; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 ; CHECK-NEXT: ret @@ -2657,22 +2336,18 @@ define <4 x double> @select_v4f64(i1 zeroext %c, <4 x double> %a, <4 x double> % ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB18_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB18_8 ; RV32-NEXT: .LBB18_7: -; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB18_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB18_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB18_11 ; RV32-NEXT: .LBB18_10: -; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB18_11: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -2719,22 +2394,18 @@ define <4 x double> @select_v4f64(i1 zeroext %c, <4 x double> %a, <4 x double> % ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB18_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB18_8 ; RV64-NEXT: .LBB18_7: -; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB18_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB18_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB18_11 ; RV64-NEXT: .LBB18_10: -; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB18_11: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -2786,22 +2457,18 @@ define <4 x double> @selectcc_v4f64(double %a, double %b, <4 x double> %c, <4 x ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB19_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB19_8 ; RV32-NEXT: .LBB19_7: -; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB19_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB19_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB19_11 ; RV32-NEXT: .LBB19_10: -; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB19_11: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -2849,22 +2516,18 @@ define <4 x double> @selectcc_v4f64(double %a, double %b, <4 x double> %c, <4 x ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB19_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB19_8 ; RV64-NEXT: .LBB19_7: -; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB19_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB19_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB19_11 ; RV64-NEXT: .LBB19_10: -; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB19_11: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -2916,66 +2579,54 @@ define <8 x double> @select_v8f64(i1 zeroext %c, <8 x double> %a, <8 x double> % ; RV32-NEXT: fsd ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB20_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 ; RV32-NEXT: j .LBB20_8 ; RV32-NEXT: .LBB20_7: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 ; RV32-NEXT: .LBB20_8: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB20_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 ; RV32-NEXT: j .LBB20_11 ; RV32-NEXT: .LBB20_10: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 ; RV32-NEXT: .LBB20_11: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB20_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 ; RV32-NEXT: j .LBB20_14 ; RV32-NEXT: .LBB20_13: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 ; RV32-NEXT: .LBB20_14: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB20_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 ; RV32-NEXT: j .LBB20_17 ; RV32-NEXT: .LBB20_16: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 ; RV32-NEXT: .LBB20_17: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB20_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 ; RV32-NEXT: j .LBB20_20 ; RV32-NEXT: .LBB20_19: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 2 ; RV32-NEXT: .LBB20_20: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB20_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 1 ; RV32-NEXT: j .LBB20_23 ; RV32-NEXT: .LBB20_22: -; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 ; RV32-NEXT: .LBB20_23: ; RV32-NEXT: vfmv.f.s ft0, v28 @@ -3022,66 +2673,54 @@ define <8 x double> @select_v8f64(i1 zeroext %c, <8 x double> %a, <8 x double> % ; RV64-NEXT: fsd ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB20_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 ; RV64-NEXT: j .LBB20_8 ; RV64-NEXT: .LBB20_7: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 ; RV64-NEXT: .LBB20_8: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB20_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 ; RV64-NEXT: j .LBB20_11 ; RV64-NEXT: .LBB20_10: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 ; RV64-NEXT: .LBB20_11: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB20_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 ; RV64-NEXT: j .LBB20_14 ; RV64-NEXT: .LBB20_13: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 ; RV64-NEXT: .LBB20_14: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB20_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 ; RV64-NEXT: j .LBB20_17 ; RV64-NEXT: .LBB20_16: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 ; RV64-NEXT: .LBB20_17: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB20_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 ; RV64-NEXT: j .LBB20_20 ; RV64-NEXT: .LBB20_19: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 2 ; RV64-NEXT: .LBB20_20: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB20_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 1 ; RV64-NEXT: j .LBB20_23 ; RV64-NEXT: .LBB20_22: -; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 ; RV64-NEXT: .LBB20_23: ; RV64-NEXT: vfmv.f.s ft0, v28 @@ -3133,66 +2772,54 @@ define <8 x double> @selectcc_v8f64(double %a, double %b, <8 x double> %c, <8 x ; RV32-NEXT: fsd ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB21_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 ; RV32-NEXT: j .LBB21_8 ; RV32-NEXT: .LBB21_7: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 ; RV32-NEXT: .LBB21_8: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB21_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 ; RV32-NEXT: j .LBB21_11 ; RV32-NEXT: .LBB21_10: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 ; RV32-NEXT: .LBB21_11: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB21_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 ; RV32-NEXT: j .LBB21_14 ; RV32-NEXT: .LBB21_13: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 ; RV32-NEXT: .LBB21_14: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB21_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 ; RV32-NEXT: j .LBB21_17 ; RV32-NEXT: .LBB21_16: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 ; RV32-NEXT: .LBB21_17: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB21_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 ; RV32-NEXT: j .LBB21_20 ; RV32-NEXT: .LBB21_19: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 2 ; RV32-NEXT: .LBB21_20: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB21_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 1 ; RV32-NEXT: j .LBB21_23 ; RV32-NEXT: .LBB21_22: -; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 ; RV32-NEXT: .LBB21_23: ; RV32-NEXT: vfmv.f.s ft0, v28 @@ -3240,66 +2867,54 @@ define <8 x double> @selectcc_v8f64(double %a, double %b, <8 x double> %c, <8 x ; RV64-NEXT: fsd ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB21_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 ; RV64-NEXT: j .LBB21_8 ; RV64-NEXT: .LBB21_7: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 ; RV64-NEXT: .LBB21_8: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB21_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 ; RV64-NEXT: j .LBB21_11 ; RV64-NEXT: .LBB21_10: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 ; RV64-NEXT: .LBB21_11: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB21_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 ; RV64-NEXT: j .LBB21_14 ; RV64-NEXT: .LBB21_13: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 ; RV64-NEXT: .LBB21_14: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB21_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 ; RV64-NEXT: j .LBB21_17 ; RV64-NEXT: .LBB21_16: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 ; RV64-NEXT: .LBB21_17: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB21_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 ; RV64-NEXT: j .LBB21_20 ; RV64-NEXT: .LBB21_19: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 2 ; RV64-NEXT: .LBB21_20: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB21_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 1 ; RV64-NEXT: j .LBB21_23 ; RV64-NEXT: .LBB21_22: -; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 ; RV64-NEXT: .LBB21_23: ; RV64-NEXT: vfmv.f.s ft0, v28 @@ -3351,154 +2966,126 @@ define <16 x double> @select_v16f64(i1 zeroext %c, <16 x double> %a, <16 x doubl ; RV32-NEXT: fsd ft0, 120(sp) ; RV32-NEXT: bnez a0, .LBB22_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 14 ; RV32-NEXT: j .LBB22_8 ; RV32-NEXT: .LBB22_7: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 14 ; RV32-NEXT: .LBB22_8: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 112(sp) ; RV32-NEXT: bnez a0, .LBB22_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 13 ; RV32-NEXT: j .LBB22_11 ; RV32-NEXT: .LBB22_10: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 13 ; RV32-NEXT: .LBB22_11: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 104(sp) ; RV32-NEXT: bnez a0, .LBB22_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 12 ; RV32-NEXT: j .LBB22_14 ; RV32-NEXT: .LBB22_13: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 12 ; RV32-NEXT: .LBB22_14: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 96(sp) ; RV32-NEXT: bnez a0, .LBB22_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 11 ; RV32-NEXT: j .LBB22_17 ; RV32-NEXT: .LBB22_16: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 11 ; RV32-NEXT: .LBB22_17: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 88(sp) ; RV32-NEXT: bnez a0, .LBB22_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 10 ; RV32-NEXT: j .LBB22_20 ; RV32-NEXT: .LBB22_19: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 10 ; RV32-NEXT: .LBB22_20: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 80(sp) ; RV32-NEXT: bnez a0, .LBB22_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 9 ; RV32-NEXT: j .LBB22_23 ; RV32-NEXT: .LBB22_22: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 9 ; RV32-NEXT: .LBB22_23: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 72(sp) ; RV32-NEXT: bnez a0, .LBB22_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 8 ; RV32-NEXT: j .LBB22_26 ; RV32-NEXT: .LBB22_25: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 8 ; RV32-NEXT: .LBB22_26: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 64(sp) ; RV32-NEXT: bnez a0, .LBB22_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 7 ; RV32-NEXT: j .LBB22_29 ; RV32-NEXT: .LBB22_28: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 7 ; RV32-NEXT: .LBB22_29: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB22_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 6 ; RV32-NEXT: j .LBB22_32 ; RV32-NEXT: .LBB22_31: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 6 ; RV32-NEXT: .LBB22_32: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB22_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 5 ; RV32-NEXT: j .LBB22_35 ; RV32-NEXT: .LBB22_34: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 5 ; RV32-NEXT: .LBB22_35: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB22_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 4 ; RV32-NEXT: j .LBB22_38 ; RV32-NEXT: .LBB22_37: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 4 ; RV32-NEXT: .LBB22_38: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB22_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 3 ; RV32-NEXT: j .LBB22_41 ; RV32-NEXT: .LBB22_40: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 3 ; RV32-NEXT: .LBB22_41: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB22_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 2 ; RV32-NEXT: j .LBB22_44 ; RV32-NEXT: .LBB22_43: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 2 ; RV32-NEXT: .LBB22_44: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB22_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v8, v16, 1 ; RV32-NEXT: j .LBB22_47 ; RV32-NEXT: .LBB22_46: -; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: .LBB22_47: ; RV32-NEXT: vfmv.f.s ft0, v8 @@ -3545,154 +3132,126 @@ define <16 x double> @select_v16f64(i1 zeroext %c, <16 x double> %a, <16 x doubl ; RV64-NEXT: fsd ft0, 120(sp) ; RV64-NEXT: bnez a0, .LBB22_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 14 ; RV64-NEXT: j .LBB22_8 ; RV64-NEXT: .LBB22_7: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 14 ; RV64-NEXT: .LBB22_8: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 112(sp) ; RV64-NEXT: bnez a0, .LBB22_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 13 ; RV64-NEXT: j .LBB22_11 ; RV64-NEXT: .LBB22_10: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 13 ; RV64-NEXT: .LBB22_11: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 104(sp) ; RV64-NEXT: bnez a0, .LBB22_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 12 ; RV64-NEXT: j .LBB22_14 ; RV64-NEXT: .LBB22_13: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 12 ; RV64-NEXT: .LBB22_14: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 96(sp) ; RV64-NEXT: bnez a0, .LBB22_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 11 ; RV64-NEXT: j .LBB22_17 ; RV64-NEXT: .LBB22_16: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 11 ; RV64-NEXT: .LBB22_17: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 88(sp) ; RV64-NEXT: bnez a0, .LBB22_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 10 ; RV64-NEXT: j .LBB22_20 ; RV64-NEXT: .LBB22_19: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 10 ; RV64-NEXT: .LBB22_20: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 80(sp) ; RV64-NEXT: bnez a0, .LBB22_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 9 ; RV64-NEXT: j .LBB22_23 ; RV64-NEXT: .LBB22_22: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 9 ; RV64-NEXT: .LBB22_23: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 72(sp) ; RV64-NEXT: bnez a0, .LBB22_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 8 ; RV64-NEXT: j .LBB22_26 ; RV64-NEXT: .LBB22_25: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 8 ; RV64-NEXT: .LBB22_26: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 64(sp) ; RV64-NEXT: bnez a0, .LBB22_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 7 ; RV64-NEXT: j .LBB22_29 ; RV64-NEXT: .LBB22_28: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 7 ; RV64-NEXT: .LBB22_29: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB22_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 6 ; RV64-NEXT: j .LBB22_32 ; RV64-NEXT: .LBB22_31: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 6 ; RV64-NEXT: .LBB22_32: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB22_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 5 ; RV64-NEXT: j .LBB22_35 ; RV64-NEXT: .LBB22_34: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 5 ; RV64-NEXT: .LBB22_35: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB22_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 4 ; RV64-NEXT: j .LBB22_38 ; RV64-NEXT: .LBB22_37: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 4 ; RV64-NEXT: .LBB22_38: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB22_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 3 ; RV64-NEXT: j .LBB22_41 ; RV64-NEXT: .LBB22_40: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 3 ; RV64-NEXT: .LBB22_41: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB22_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 2 ; RV64-NEXT: j .LBB22_44 ; RV64-NEXT: .LBB22_43: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 2 ; RV64-NEXT: .LBB22_44: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB22_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v16, 1 ; RV64-NEXT: j .LBB22_47 ; RV64-NEXT: .LBB22_46: -; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: .LBB22_47: ; RV64-NEXT: vfmv.f.s ft0, v8 @@ -3744,154 +3303,126 @@ define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <1 ; RV32-NEXT: fsd ft0, 120(sp) ; RV32-NEXT: bnez a0, .LBB23_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 14 ; RV32-NEXT: j .LBB23_8 ; RV32-NEXT: .LBB23_7: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 14 ; RV32-NEXT: .LBB23_8: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 112(sp) ; RV32-NEXT: bnez a0, .LBB23_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 13 ; RV32-NEXT: j .LBB23_11 ; RV32-NEXT: .LBB23_10: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 13 ; RV32-NEXT: .LBB23_11: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 104(sp) ; RV32-NEXT: bnez a0, .LBB23_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 12 ; RV32-NEXT: j .LBB23_14 ; RV32-NEXT: .LBB23_13: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 12 ; RV32-NEXT: .LBB23_14: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 96(sp) ; RV32-NEXT: bnez a0, .LBB23_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 11 ; RV32-NEXT: j .LBB23_17 ; RV32-NEXT: .LBB23_16: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 11 ; RV32-NEXT: .LBB23_17: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 88(sp) ; RV32-NEXT: bnez a0, .LBB23_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 10 ; RV32-NEXT: j .LBB23_20 ; RV32-NEXT: .LBB23_19: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 10 ; RV32-NEXT: .LBB23_20: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 80(sp) ; RV32-NEXT: bnez a0, .LBB23_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 9 ; RV32-NEXT: j .LBB23_23 ; RV32-NEXT: .LBB23_22: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 9 ; RV32-NEXT: .LBB23_23: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 72(sp) ; RV32-NEXT: bnez a0, .LBB23_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 8 ; RV32-NEXT: j .LBB23_26 ; RV32-NEXT: .LBB23_25: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 8 ; RV32-NEXT: .LBB23_26: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 64(sp) ; RV32-NEXT: bnez a0, .LBB23_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 7 ; RV32-NEXT: j .LBB23_29 ; RV32-NEXT: .LBB23_28: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 7 ; RV32-NEXT: .LBB23_29: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB23_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 6 ; RV32-NEXT: j .LBB23_32 ; RV32-NEXT: .LBB23_31: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 6 ; RV32-NEXT: .LBB23_32: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB23_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 5 ; RV32-NEXT: j .LBB23_35 ; RV32-NEXT: .LBB23_34: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 5 ; RV32-NEXT: .LBB23_35: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB23_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 4 ; RV32-NEXT: j .LBB23_38 ; RV32-NEXT: .LBB23_37: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 4 ; RV32-NEXT: .LBB23_38: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB23_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 3 ; RV32-NEXT: j .LBB23_41 ; RV32-NEXT: .LBB23_40: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 3 ; RV32-NEXT: .LBB23_41: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB23_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 2 ; RV32-NEXT: j .LBB23_44 ; RV32-NEXT: .LBB23_43: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 2 ; RV32-NEXT: .LBB23_44: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB23_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v8, v16, 1 ; RV32-NEXT: j .LBB23_47 ; RV32-NEXT: .LBB23_46: -; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: .LBB23_47: ; RV32-NEXT: vfmv.f.s ft0, v8 @@ -3939,154 +3470,126 @@ define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <1 ; RV64-NEXT: fsd ft0, 120(sp) ; RV64-NEXT: bnez a0, .LBB23_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 14 ; RV64-NEXT: j .LBB23_8 ; RV64-NEXT: .LBB23_7: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 14 ; RV64-NEXT: .LBB23_8: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 112(sp) ; RV64-NEXT: bnez a0, .LBB23_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 13 ; RV64-NEXT: j .LBB23_11 ; RV64-NEXT: .LBB23_10: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 13 ; RV64-NEXT: .LBB23_11: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 104(sp) ; RV64-NEXT: bnez a0, .LBB23_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 12 ; RV64-NEXT: j .LBB23_14 ; RV64-NEXT: .LBB23_13: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 12 ; RV64-NEXT: .LBB23_14: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 96(sp) ; RV64-NEXT: bnez a0, .LBB23_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 11 ; RV64-NEXT: j .LBB23_17 ; RV64-NEXT: .LBB23_16: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 11 ; RV64-NEXT: .LBB23_17: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 88(sp) ; RV64-NEXT: bnez a0, .LBB23_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 10 ; RV64-NEXT: j .LBB23_20 ; RV64-NEXT: .LBB23_19: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 10 ; RV64-NEXT: .LBB23_20: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 80(sp) ; RV64-NEXT: bnez a0, .LBB23_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 9 ; RV64-NEXT: j .LBB23_23 ; RV64-NEXT: .LBB23_22: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 9 ; RV64-NEXT: .LBB23_23: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 72(sp) ; RV64-NEXT: bnez a0, .LBB23_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 8 ; RV64-NEXT: j .LBB23_26 ; RV64-NEXT: .LBB23_25: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 8 ; RV64-NEXT: .LBB23_26: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 64(sp) ; RV64-NEXT: bnez a0, .LBB23_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 7 ; RV64-NEXT: j .LBB23_29 ; RV64-NEXT: .LBB23_28: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 7 ; RV64-NEXT: .LBB23_29: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB23_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 6 ; RV64-NEXT: j .LBB23_32 ; RV64-NEXT: .LBB23_31: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 6 ; RV64-NEXT: .LBB23_32: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB23_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 5 ; RV64-NEXT: j .LBB23_35 ; RV64-NEXT: .LBB23_34: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 5 ; RV64-NEXT: .LBB23_35: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB23_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 4 ; RV64-NEXT: j .LBB23_38 ; RV64-NEXT: .LBB23_37: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 4 ; RV64-NEXT: .LBB23_38: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB23_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 3 ; RV64-NEXT: j .LBB23_41 ; RV64-NEXT: .LBB23_40: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 3 ; RV64-NEXT: .LBB23_41: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB23_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 2 ; RV64-NEXT: j .LBB23_44 ; RV64-NEXT: .LBB23_43: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 2 ; RV64-NEXT: .LBB23_44: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB23_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v16, 1 ; RV64-NEXT: j .LBB23_47 ; RV64-NEXT: .LBB23_46: -; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: .LBB23_47: ; RV64-NEXT: vfmv.f.s ft0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 1dadd1217b83..b1cb89025cf5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -261,11 +261,9 @@ define <2 x i1> @vselect_v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %cc) { ; CHECK-NEXT: vmerge.vim v26, v26, 1, v0 ; CHECK-NEXT: bnez a0, .LBB12_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetvli zero, zero, e8,mf8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: j .LBB12_3 ; CHECK-NEXT: .LBB12_2: -; CHECK-NEXT: vsetvli zero, zero, e8,mf8,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: .LBB12_3: ; CHECK-NEXT: vsetivli a1, 1, e8,mf8,ta,mu @@ -274,11 +272,9 @@ define <2 x i1> @vselect_v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %cc) { ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: bnez a1, .LBB12_5 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vsetivli a1, 1, e8,mf8,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v25, 1 ; CHECK-NEXT: j .LBB12_6 ; CHECK-NEXT: .LBB12_5: -; CHECK-NEXT: vsetivli a1, 1, e8,mf8,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v26, 1 ; CHECK-NEXT: .LBB12_6: ; CHECK-NEXT: vmv.x.s a1, v25 @@ -310,11 +306,9 @@ define <4 x i1> @vselect_v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %cc) { ; CHECK-NEXT: vmerge.vim v26, v26, 1, v0 ; CHECK-NEXT: bnez a0, .LBB13_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetvli zero, zero, e8,mf4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: j .LBB13_3 ; CHECK-NEXT: .LBB13_2: -; CHECK-NEXT: vsetvli zero, zero, e8,mf4,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: .LBB13_3: ; CHECK-NEXT: sb a0, 12(sp) @@ -324,41 +318,33 @@ define <4 x i1> @vselect_v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %cc) { ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: bnez a0, .LBB13_5 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vsetivli a0, 1, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 3 ; CHECK-NEXT: j .LBB13_6 ; CHECK-NEXT: .LBB13_5: -; CHECK-NEXT: vsetivli a0, 1, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 3 ; CHECK-NEXT: .LBB13_6: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 15(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 2 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB13_8 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 15(sp) +; CHECK-NEXT: bnez a1, .LBB13_8 ; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: vsetivli a0, 1, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 2 ; CHECK-NEXT: j .LBB13_9 ; CHECK-NEXT: .LBB13_8: -; CHECK-NEXT: vsetivli a0, 1, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 2 ; CHECK-NEXT: .LBB13_9: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 14(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v27, v27, 1 -; CHECK-NEXT: vmv.x.s a0, v27 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB13_11 +; CHECK-NEXT: vmv.x.s a1, v27 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 14(sp) +; CHECK-NEXT: bnez a1, .LBB13_11 ; CHECK-NEXT: # %bb.10: -; CHECK-NEXT: vsetivli a0, 1, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v25, 1 ; CHECK-NEXT: j .LBB13_12 ; CHECK-NEXT: .LBB13_11: -; CHECK-NEXT: vsetivli a0, 1, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v26, 1 ; CHECK-NEXT: .LBB13_12: ; CHECK-NEXT: vmv.x.s a0, v25 @@ -392,11 +378,9 @@ define <8 x i1> @vselect_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %cc) { ; CHECK-NEXT: vmerge.vim v26, v26, 1, v0 ; CHECK-NEXT: bnez a0, .LBB14_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetvli zero, zero, e8,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: j .LBB14_3 ; CHECK-NEXT: .LBB14_2: -; CHECK-NEXT: vsetvli zero, zero, e8,mf2,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: .LBB14_3: ; CHECK-NEXT: sb a0, 8(sp) @@ -406,101 +390,81 @@ define <8 x i1> @vselect_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %cc) { ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: bnez a0, .LBB14_5 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 7 ; CHECK-NEXT: j .LBB14_6 ; CHECK-NEXT: .LBB14_5: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 7 ; CHECK-NEXT: .LBB14_6: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 15(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 6 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB14_8 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 15(sp) +; CHECK-NEXT: bnez a1, .LBB14_8 ; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 6 ; CHECK-NEXT: j .LBB14_9 ; CHECK-NEXT: .LBB14_8: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 6 ; CHECK-NEXT: .LBB14_9: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 14(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 5 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB14_11 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 14(sp) +; CHECK-NEXT: bnez a1, .LBB14_11 ; CHECK-NEXT: # %bb.10: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 5 ; CHECK-NEXT: j .LBB14_12 ; CHECK-NEXT: .LBB14_11: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 5 ; CHECK-NEXT: .LBB14_12: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 13(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 4 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB14_14 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 13(sp) +; CHECK-NEXT: bnez a1, .LBB14_14 ; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 4 ; CHECK-NEXT: j .LBB14_15 ; CHECK-NEXT: .LBB14_14: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 4 ; CHECK-NEXT: .LBB14_15: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 12(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 3 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB14_17 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 12(sp) +; CHECK-NEXT: bnez a1, .LBB14_17 ; CHECK-NEXT: # %bb.16: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 3 ; CHECK-NEXT: j .LBB14_18 ; CHECK-NEXT: .LBB14_17: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 3 ; CHECK-NEXT: .LBB14_18: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 11(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 2 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB14_20 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 11(sp) +; CHECK-NEXT: bnez a1, .LBB14_20 ; CHECK-NEXT: # %bb.19: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 2 ; CHECK-NEXT: j .LBB14_21 ; CHECK-NEXT: .LBB14_20: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 2 ; CHECK-NEXT: .LBB14_21: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 10(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v27, v27, 1 -; CHECK-NEXT: vmv.x.s a0, v27 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB14_23 +; CHECK-NEXT: vmv.x.s a1, v27 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 10(sp) +; CHECK-NEXT: bnez a1, .LBB14_23 ; CHECK-NEXT: # %bb.22: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v25, 1 ; CHECK-NEXT: j .LBB14_24 ; CHECK-NEXT: .LBB14_23: -; CHECK-NEXT: vsetivli a0, 1, e8,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v26, 1 ; CHECK-NEXT: .LBB14_24: ; CHECK-NEXT: vmv.x.s a0, v25 @@ -534,11 +498,9 @@ define <16 x i1> @vselect_v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %cc) { ; CHECK-NEXT: vmerge.vim v26, v26, 1, v0 ; CHECK-NEXT: bnez a0, .LBB15_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: j .LBB15_3 ; CHECK-NEXT: .LBB15_2: -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v26 ; CHECK-NEXT: .LBB15_3: ; CHECK-NEXT: sb a0, 0(sp) @@ -548,221 +510,177 @@ define <16 x i1> @vselect_v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %cc) { ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: bnez a0, .LBB15_5 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 15 ; CHECK-NEXT: j .LBB15_6 ; CHECK-NEXT: .LBB15_5: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 15 ; CHECK-NEXT: .LBB15_6: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 15(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 14 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_8 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 15(sp) +; CHECK-NEXT: bnez a1, .LBB15_8 ; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 14 ; CHECK-NEXT: j .LBB15_9 ; CHECK-NEXT: .LBB15_8: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 14 ; CHECK-NEXT: .LBB15_9: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 14(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 13 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_11 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 14(sp) +; CHECK-NEXT: bnez a1, .LBB15_11 ; CHECK-NEXT: # %bb.10: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 13 ; CHECK-NEXT: j .LBB15_12 ; CHECK-NEXT: .LBB15_11: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 13 ; CHECK-NEXT: .LBB15_12: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 13(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 12 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_14 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 13(sp) +; CHECK-NEXT: bnez a1, .LBB15_14 ; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 12 ; CHECK-NEXT: j .LBB15_15 ; CHECK-NEXT: .LBB15_14: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 12 ; CHECK-NEXT: .LBB15_15: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 12(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 11 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_17 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 12(sp) +; CHECK-NEXT: bnez a1, .LBB15_17 ; CHECK-NEXT: # %bb.16: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 11 ; CHECK-NEXT: j .LBB15_18 ; CHECK-NEXT: .LBB15_17: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 11 ; CHECK-NEXT: .LBB15_18: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 11(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 10 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_20 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 11(sp) +; CHECK-NEXT: bnez a1, .LBB15_20 ; CHECK-NEXT: # %bb.19: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 10 ; CHECK-NEXT: j .LBB15_21 ; CHECK-NEXT: .LBB15_20: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 10 ; CHECK-NEXT: .LBB15_21: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 10(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 9 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_23 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 10(sp) +; CHECK-NEXT: bnez a1, .LBB15_23 ; CHECK-NEXT: # %bb.22: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 9 ; CHECK-NEXT: j .LBB15_24 ; CHECK-NEXT: .LBB15_23: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 9 ; CHECK-NEXT: .LBB15_24: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 9(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 8 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_26 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 9(sp) +; CHECK-NEXT: bnez a1, .LBB15_26 ; CHECK-NEXT: # %bb.25: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 8 ; CHECK-NEXT: j .LBB15_27 ; CHECK-NEXT: .LBB15_26: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 8 ; CHECK-NEXT: .LBB15_27: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 8(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 7 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_29 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 8(sp) +; CHECK-NEXT: bnez a1, .LBB15_29 ; CHECK-NEXT: # %bb.28: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 7 ; CHECK-NEXT: j .LBB15_30 ; CHECK-NEXT: .LBB15_29: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 7 ; CHECK-NEXT: .LBB15_30: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 7(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 6 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_32 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 7(sp) +; CHECK-NEXT: bnez a1, .LBB15_32 ; CHECK-NEXT: # %bb.31: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 6 ; CHECK-NEXT: j .LBB15_33 ; CHECK-NEXT: .LBB15_32: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 6 ; CHECK-NEXT: .LBB15_33: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 6(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 5 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_35 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 6(sp) +; CHECK-NEXT: bnez a1, .LBB15_35 ; CHECK-NEXT: # %bb.34: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 5 ; CHECK-NEXT: j .LBB15_36 ; CHECK-NEXT: .LBB15_35: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 5 ; CHECK-NEXT: .LBB15_36: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 5(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 4 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_38 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 5(sp) +; CHECK-NEXT: bnez a1, .LBB15_38 ; CHECK-NEXT: # %bb.37: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 4 ; CHECK-NEXT: j .LBB15_39 ; CHECK-NEXT: .LBB15_38: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 4 ; CHECK-NEXT: .LBB15_39: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 4(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 3 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_41 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 4(sp) +; CHECK-NEXT: bnez a1, .LBB15_41 ; CHECK-NEXT: # %bb.40: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 3 ; CHECK-NEXT: j .LBB15_42 ; CHECK-NEXT: .LBB15_41: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 3 ; CHECK-NEXT: .LBB15_42: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 3(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v27, 2 -; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_44 +; CHECK-NEXT: vmv.x.s a1, v28 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 3(sp) +; CHECK-NEXT: bnez a1, .LBB15_44 ; CHECK-NEXT: # %bb.43: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v25, 2 ; CHECK-NEXT: j .LBB15_45 ; CHECK-NEXT: .LBB15_44: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v26, 2 ; CHECK-NEXT: .LBB15_45: ; CHECK-NEXT: vmv.x.s a0, v28 -; CHECK-NEXT: sb a0, 2(sp) -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v27, v27, 1 -; CHECK-NEXT: vmv.x.s a0, v27 -; CHECK-NEXT: andi a0, a0, 1 -; CHECK-NEXT: bnez a0, .LBB15_47 +; CHECK-NEXT: vmv.x.s a1, v27 +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: sb a0, 2(sp) +; CHECK-NEXT: bnez a1, .LBB15_47 ; CHECK-NEXT: # %bb.46: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v25, 1 ; CHECK-NEXT: j .LBB15_48 ; CHECK-NEXT: .LBB15_47: -; CHECK-NEXT: vsetivli a0, 1, e8,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v26, 1 ; CHECK-NEXT: .LBB15_48: ; CHECK-NEXT: vmv.x.s a0, v25 @@ -803,11 +721,9 @@ define <32 x i1> @vselect_v32i1(<32 x i1> %a, <32 x i1> %b, <32 x i1> %cc) { ; RV32-NEXT: vmerge.vim v28, v28, 1, v0 ; RV32-NEXT: bnez a1, .LBB16_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e8,m2,ta,mu ; RV32-NEXT: vmv.x.s a1, v26 ; RV32-NEXT: j .LBB16_3 ; RV32-NEXT: .LBB16_2: -; RV32-NEXT: vsetvli zero, zero, e8,m2,ta,mu ; RV32-NEXT: vmv.x.s a1, v28 ; RV32-NEXT: .LBB16_3: ; RV32-NEXT: sb a1, 0(sp) @@ -817,461 +733,369 @@ define <32 x i1> @vselect_v32i1(<32 x i1> %a, <32 x i1> %b, <32 x i1> %cc) { ; RV32-NEXT: andi a1, a1, 1 ; RV32-NEXT: bnez a1, .LBB16_5 ; RV32-NEXT: # %bb.4: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 31 ; RV32-NEXT: j .LBB16_6 ; RV32-NEXT: .LBB16_5: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 31 ; RV32-NEXT: .LBB16_6: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 31(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 30 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_8 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 31(sp) +; RV32-NEXT: bnez a2, .LBB16_8 ; RV32-NEXT: # %bb.7: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 30 ; RV32-NEXT: j .LBB16_9 ; RV32-NEXT: .LBB16_8: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 30 ; RV32-NEXT: .LBB16_9: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 30(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 29 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_11 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 30(sp) +; RV32-NEXT: bnez a2, .LBB16_11 ; RV32-NEXT: # %bb.10: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 29 ; RV32-NEXT: j .LBB16_12 ; RV32-NEXT: .LBB16_11: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 29 ; RV32-NEXT: .LBB16_12: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 29(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 28 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_14 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 29(sp) +; RV32-NEXT: bnez a2, .LBB16_14 ; RV32-NEXT: # %bb.13: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 28 ; RV32-NEXT: j .LBB16_15 ; RV32-NEXT: .LBB16_14: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 28 ; RV32-NEXT: .LBB16_15: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 28(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 27 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_17 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 28(sp) +; RV32-NEXT: bnez a2, .LBB16_17 ; RV32-NEXT: # %bb.16: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 27 ; RV32-NEXT: j .LBB16_18 ; RV32-NEXT: .LBB16_17: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 27 ; RV32-NEXT: .LBB16_18: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 27(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 26 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_20 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 27(sp) +; RV32-NEXT: bnez a2, .LBB16_20 ; RV32-NEXT: # %bb.19: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 26 ; RV32-NEXT: j .LBB16_21 ; RV32-NEXT: .LBB16_20: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 26 ; RV32-NEXT: .LBB16_21: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 26(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 25 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_23 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 26(sp) +; RV32-NEXT: bnez a2, .LBB16_23 ; RV32-NEXT: # %bb.22: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 25 ; RV32-NEXT: j .LBB16_24 ; RV32-NEXT: .LBB16_23: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 25 ; RV32-NEXT: .LBB16_24: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 25(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 24 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_26 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 25(sp) +; RV32-NEXT: bnez a2, .LBB16_26 ; RV32-NEXT: # %bb.25: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 24 ; RV32-NEXT: j .LBB16_27 ; RV32-NEXT: .LBB16_26: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 24 ; RV32-NEXT: .LBB16_27: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 24(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 23 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_29 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 24(sp) +; RV32-NEXT: bnez a2, .LBB16_29 ; RV32-NEXT: # %bb.28: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 23 ; RV32-NEXT: j .LBB16_30 ; RV32-NEXT: .LBB16_29: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 23 ; RV32-NEXT: .LBB16_30: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 23(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 22 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_32 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 23(sp) +; RV32-NEXT: bnez a2, .LBB16_32 ; RV32-NEXT: # %bb.31: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 22 ; RV32-NEXT: j .LBB16_33 ; RV32-NEXT: .LBB16_32: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 22 ; RV32-NEXT: .LBB16_33: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 22(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 21 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_35 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 22(sp) +; RV32-NEXT: bnez a2, .LBB16_35 ; RV32-NEXT: # %bb.34: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 21 ; RV32-NEXT: j .LBB16_36 ; RV32-NEXT: .LBB16_35: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 21 ; RV32-NEXT: .LBB16_36: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 21(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 20 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_38 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 21(sp) +; RV32-NEXT: bnez a2, .LBB16_38 ; RV32-NEXT: # %bb.37: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 20 ; RV32-NEXT: j .LBB16_39 ; RV32-NEXT: .LBB16_38: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 20 ; RV32-NEXT: .LBB16_39: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 20(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 19 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_41 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 20(sp) +; RV32-NEXT: bnez a2, .LBB16_41 ; RV32-NEXT: # %bb.40: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 19 ; RV32-NEXT: j .LBB16_42 ; RV32-NEXT: .LBB16_41: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 19 ; RV32-NEXT: .LBB16_42: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 19(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 18 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_44 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 19(sp) +; RV32-NEXT: bnez a2, .LBB16_44 ; RV32-NEXT: # %bb.43: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 18 ; RV32-NEXT: j .LBB16_45 ; RV32-NEXT: .LBB16_44: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 18 ; RV32-NEXT: .LBB16_45: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 18(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 17 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_47 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 18(sp) +; RV32-NEXT: bnez a2, .LBB16_47 ; RV32-NEXT: # %bb.46: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 17 ; RV32-NEXT: j .LBB16_48 ; RV32-NEXT: .LBB16_47: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 17 ; RV32-NEXT: .LBB16_48: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 17(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 16 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_50 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 17(sp) +; RV32-NEXT: bnez a2, .LBB16_50 ; RV32-NEXT: # %bb.49: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 16 ; RV32-NEXT: j .LBB16_51 ; RV32-NEXT: .LBB16_50: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 16 ; RV32-NEXT: .LBB16_51: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 16(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 15 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_53 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 16(sp) +; RV32-NEXT: bnez a2, .LBB16_53 ; RV32-NEXT: # %bb.52: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 15 ; RV32-NEXT: j .LBB16_54 ; RV32-NEXT: .LBB16_53: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 15 ; RV32-NEXT: .LBB16_54: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 15(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 14 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_56 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 15(sp) +; RV32-NEXT: bnez a2, .LBB16_56 ; RV32-NEXT: # %bb.55: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 14 ; RV32-NEXT: j .LBB16_57 ; RV32-NEXT: .LBB16_56: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 14 ; RV32-NEXT: .LBB16_57: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 14(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 13 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_59 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 14(sp) +; RV32-NEXT: bnez a2, .LBB16_59 ; RV32-NEXT: # %bb.58: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 13 ; RV32-NEXT: j .LBB16_60 ; RV32-NEXT: .LBB16_59: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 13 ; RV32-NEXT: .LBB16_60: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 13(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 12 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_62 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 13(sp) +; RV32-NEXT: bnez a2, .LBB16_62 ; RV32-NEXT: # %bb.61: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 12 ; RV32-NEXT: j .LBB16_63 ; RV32-NEXT: .LBB16_62: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 12 ; RV32-NEXT: .LBB16_63: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 12(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 11 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_65 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 12(sp) +; RV32-NEXT: bnez a2, .LBB16_65 ; RV32-NEXT: # %bb.64: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 11 ; RV32-NEXT: j .LBB16_66 ; RV32-NEXT: .LBB16_65: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 11 ; RV32-NEXT: .LBB16_66: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 11(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 10 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_68 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 11(sp) +; RV32-NEXT: bnez a2, .LBB16_68 ; RV32-NEXT: # %bb.67: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 10 ; RV32-NEXT: j .LBB16_69 ; RV32-NEXT: .LBB16_68: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 10 ; RV32-NEXT: .LBB16_69: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 10(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 9 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_71 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 10(sp) +; RV32-NEXT: bnez a2, .LBB16_71 ; RV32-NEXT: # %bb.70: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 9 ; RV32-NEXT: j .LBB16_72 ; RV32-NEXT: .LBB16_71: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 9 ; RV32-NEXT: .LBB16_72: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 9(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 8 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_74 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 9(sp) +; RV32-NEXT: bnez a2, .LBB16_74 ; RV32-NEXT: # %bb.73: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 8 ; RV32-NEXT: j .LBB16_75 ; RV32-NEXT: .LBB16_74: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 8 ; RV32-NEXT: .LBB16_75: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 8(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 7 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_77 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 8(sp) +; RV32-NEXT: bnez a2, .LBB16_77 ; RV32-NEXT: # %bb.76: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 7 ; RV32-NEXT: j .LBB16_78 ; RV32-NEXT: .LBB16_77: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 7 ; RV32-NEXT: .LBB16_78: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 7(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 6 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_80 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 7(sp) +; RV32-NEXT: bnez a2, .LBB16_80 ; RV32-NEXT: # %bb.79: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 6 ; RV32-NEXT: j .LBB16_81 ; RV32-NEXT: .LBB16_80: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 6 ; RV32-NEXT: .LBB16_81: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 6(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 5 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_83 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 6(sp) +; RV32-NEXT: bnez a2, .LBB16_83 ; RV32-NEXT: # %bb.82: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 5 ; RV32-NEXT: j .LBB16_84 ; RV32-NEXT: .LBB16_83: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 5 ; RV32-NEXT: .LBB16_84: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 5(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 4 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_86 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 5(sp) +; RV32-NEXT: bnez a2, .LBB16_86 ; RV32-NEXT: # %bb.85: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 4 ; RV32-NEXT: j .LBB16_87 ; RV32-NEXT: .LBB16_86: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 4 ; RV32-NEXT: .LBB16_87: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 4(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 3 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_89 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 4(sp) +; RV32-NEXT: bnez a2, .LBB16_89 ; RV32-NEXT: # %bb.88: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 3 ; RV32-NEXT: j .LBB16_90 ; RV32-NEXT: .LBB16_89: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 3 ; RV32-NEXT: .LBB16_90: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 3(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v30, 2 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_92 +; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 3(sp) +; RV32-NEXT: bnez a2, .LBB16_92 ; RV32-NEXT: # %bb.91: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v26, 2 ; RV32-NEXT: j .LBB16_93 ; RV32-NEXT: .LBB16_92: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v8, v28, 2 ; RV32-NEXT: .LBB16_93: ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sb a1, 2(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v30, v30, 1 -; RV32-NEXT: vmv.x.s a1, v30 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB16_95 +; RV32-NEXT: vmv.x.s a2, v30 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 2(sp) +; RV32-NEXT: bnez a2, .LBB16_95 ; RV32-NEXT: # %bb.94: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v26, 1 ; RV32-NEXT: j .LBB16_96 ; RV32-NEXT: .LBB16_95: -; RV32-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v28, 1 ; RV32-NEXT: .LBB16_96: ; RV32-NEXT: vmv.x.s a1, v26 @@ -1311,11 +1135,9 @@ define <32 x i1> @vselect_v32i1(<32 x i1> %a, <32 x i1> %b, <32 x i1> %cc) { ; RV64-NEXT: vmerge.vim v28, v28, 1, v0 ; RV64-NEXT: bnez a1, .LBB16_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e8,m2,ta,mu ; RV64-NEXT: vmv.x.s a1, v26 ; RV64-NEXT: j .LBB16_3 ; RV64-NEXT: .LBB16_2: -; RV64-NEXT: vsetvli zero, zero, e8,m2,ta,mu ; RV64-NEXT: vmv.x.s a1, v28 ; RV64-NEXT: .LBB16_3: ; RV64-NEXT: sb a1, 0(sp) @@ -1325,461 +1147,369 @@ define <32 x i1> @vselect_v32i1(<32 x i1> %a, <32 x i1> %b, <32 x i1> %cc) { ; RV64-NEXT: andi a1, a1, 1 ; RV64-NEXT: bnez a1, .LBB16_5 ; RV64-NEXT: # %bb.4: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 31 ; RV64-NEXT: j .LBB16_6 ; RV64-NEXT: .LBB16_5: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 31 ; RV64-NEXT: .LBB16_6: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 31(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 30 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_8 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 31(sp) +; RV64-NEXT: bnez a2, .LBB16_8 ; RV64-NEXT: # %bb.7: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 30 ; RV64-NEXT: j .LBB16_9 ; RV64-NEXT: .LBB16_8: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 30 ; RV64-NEXT: .LBB16_9: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 30(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 29 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_11 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 30(sp) +; RV64-NEXT: bnez a2, .LBB16_11 ; RV64-NEXT: # %bb.10: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 29 ; RV64-NEXT: j .LBB16_12 ; RV64-NEXT: .LBB16_11: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 29 ; RV64-NEXT: .LBB16_12: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 29(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 28 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_14 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 29(sp) +; RV64-NEXT: bnez a2, .LBB16_14 ; RV64-NEXT: # %bb.13: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 28 ; RV64-NEXT: j .LBB16_15 ; RV64-NEXT: .LBB16_14: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 28 ; RV64-NEXT: .LBB16_15: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 28(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 27 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_17 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 28(sp) +; RV64-NEXT: bnez a2, .LBB16_17 ; RV64-NEXT: # %bb.16: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 27 ; RV64-NEXT: j .LBB16_18 ; RV64-NEXT: .LBB16_17: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 27 ; RV64-NEXT: .LBB16_18: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 27(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 26 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_20 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 27(sp) +; RV64-NEXT: bnez a2, .LBB16_20 ; RV64-NEXT: # %bb.19: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 26 ; RV64-NEXT: j .LBB16_21 ; RV64-NEXT: .LBB16_20: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 26 ; RV64-NEXT: .LBB16_21: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 26(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 25 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_23 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 26(sp) +; RV64-NEXT: bnez a2, .LBB16_23 ; RV64-NEXT: # %bb.22: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 25 ; RV64-NEXT: j .LBB16_24 ; RV64-NEXT: .LBB16_23: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 25 ; RV64-NEXT: .LBB16_24: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 25(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 24 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_26 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 25(sp) +; RV64-NEXT: bnez a2, .LBB16_26 ; RV64-NEXT: # %bb.25: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 24 ; RV64-NEXT: j .LBB16_27 ; RV64-NEXT: .LBB16_26: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 24 ; RV64-NEXT: .LBB16_27: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 24(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 23 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_29 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 24(sp) +; RV64-NEXT: bnez a2, .LBB16_29 ; RV64-NEXT: # %bb.28: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 23 ; RV64-NEXT: j .LBB16_30 ; RV64-NEXT: .LBB16_29: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 23 ; RV64-NEXT: .LBB16_30: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 23(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 22 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_32 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 23(sp) +; RV64-NEXT: bnez a2, .LBB16_32 ; RV64-NEXT: # %bb.31: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 22 ; RV64-NEXT: j .LBB16_33 ; RV64-NEXT: .LBB16_32: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 22 ; RV64-NEXT: .LBB16_33: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 22(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 21 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_35 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 22(sp) +; RV64-NEXT: bnez a2, .LBB16_35 ; RV64-NEXT: # %bb.34: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 21 ; RV64-NEXT: j .LBB16_36 ; RV64-NEXT: .LBB16_35: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 21 ; RV64-NEXT: .LBB16_36: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 21(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 20 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_38 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 21(sp) +; RV64-NEXT: bnez a2, .LBB16_38 ; RV64-NEXT: # %bb.37: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 20 ; RV64-NEXT: j .LBB16_39 ; RV64-NEXT: .LBB16_38: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 20 ; RV64-NEXT: .LBB16_39: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 20(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 19 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_41 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 20(sp) +; RV64-NEXT: bnez a2, .LBB16_41 ; RV64-NEXT: # %bb.40: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 19 ; RV64-NEXT: j .LBB16_42 ; RV64-NEXT: .LBB16_41: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 19 ; RV64-NEXT: .LBB16_42: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 19(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 18 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_44 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 19(sp) +; RV64-NEXT: bnez a2, .LBB16_44 ; RV64-NEXT: # %bb.43: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 18 ; RV64-NEXT: j .LBB16_45 ; RV64-NEXT: .LBB16_44: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 18 ; RV64-NEXT: .LBB16_45: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 18(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 17 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_47 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 18(sp) +; RV64-NEXT: bnez a2, .LBB16_47 ; RV64-NEXT: # %bb.46: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 17 ; RV64-NEXT: j .LBB16_48 ; RV64-NEXT: .LBB16_47: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 17 ; RV64-NEXT: .LBB16_48: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 17(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 16 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_50 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 17(sp) +; RV64-NEXT: bnez a2, .LBB16_50 ; RV64-NEXT: # %bb.49: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 16 ; RV64-NEXT: j .LBB16_51 ; RV64-NEXT: .LBB16_50: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 16 ; RV64-NEXT: .LBB16_51: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 16(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 15 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_53 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 16(sp) +; RV64-NEXT: bnez a2, .LBB16_53 ; RV64-NEXT: # %bb.52: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 15 ; RV64-NEXT: j .LBB16_54 ; RV64-NEXT: .LBB16_53: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 15 ; RV64-NEXT: .LBB16_54: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 15(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 14 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_56 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 15(sp) +; RV64-NEXT: bnez a2, .LBB16_56 ; RV64-NEXT: # %bb.55: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 14 ; RV64-NEXT: j .LBB16_57 ; RV64-NEXT: .LBB16_56: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 14 ; RV64-NEXT: .LBB16_57: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 14(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 13 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_59 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 14(sp) +; RV64-NEXT: bnez a2, .LBB16_59 ; RV64-NEXT: # %bb.58: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 13 ; RV64-NEXT: j .LBB16_60 ; RV64-NEXT: .LBB16_59: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 13 ; RV64-NEXT: .LBB16_60: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 13(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 12 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_62 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 13(sp) +; RV64-NEXT: bnez a2, .LBB16_62 ; RV64-NEXT: # %bb.61: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 12 ; RV64-NEXT: j .LBB16_63 ; RV64-NEXT: .LBB16_62: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 12 ; RV64-NEXT: .LBB16_63: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 12(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 11 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_65 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 12(sp) +; RV64-NEXT: bnez a2, .LBB16_65 ; RV64-NEXT: # %bb.64: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 11 ; RV64-NEXT: j .LBB16_66 ; RV64-NEXT: .LBB16_65: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 11 ; RV64-NEXT: .LBB16_66: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 11(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 10 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_68 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 11(sp) +; RV64-NEXT: bnez a2, .LBB16_68 ; RV64-NEXT: # %bb.67: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 10 ; RV64-NEXT: j .LBB16_69 ; RV64-NEXT: .LBB16_68: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 10 ; RV64-NEXT: .LBB16_69: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 10(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 9 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_71 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 10(sp) +; RV64-NEXT: bnez a2, .LBB16_71 ; RV64-NEXT: # %bb.70: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 9 ; RV64-NEXT: j .LBB16_72 ; RV64-NEXT: .LBB16_71: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 9 ; RV64-NEXT: .LBB16_72: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 9(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 8 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_74 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 9(sp) +; RV64-NEXT: bnez a2, .LBB16_74 ; RV64-NEXT: # %bb.73: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 8 ; RV64-NEXT: j .LBB16_75 ; RV64-NEXT: .LBB16_74: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 8 ; RV64-NEXT: .LBB16_75: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 8(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 7 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_77 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 8(sp) +; RV64-NEXT: bnez a2, .LBB16_77 ; RV64-NEXT: # %bb.76: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 7 ; RV64-NEXT: j .LBB16_78 ; RV64-NEXT: .LBB16_77: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 7 ; RV64-NEXT: .LBB16_78: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 7(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 6 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_80 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 7(sp) +; RV64-NEXT: bnez a2, .LBB16_80 ; RV64-NEXT: # %bb.79: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 6 ; RV64-NEXT: j .LBB16_81 ; RV64-NEXT: .LBB16_80: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 6 ; RV64-NEXT: .LBB16_81: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 6(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 5 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_83 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 6(sp) +; RV64-NEXT: bnez a2, .LBB16_83 ; RV64-NEXT: # %bb.82: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 5 ; RV64-NEXT: j .LBB16_84 ; RV64-NEXT: .LBB16_83: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 5 ; RV64-NEXT: .LBB16_84: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 5(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 4 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_86 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 5(sp) +; RV64-NEXT: bnez a2, .LBB16_86 ; RV64-NEXT: # %bb.85: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 4 ; RV64-NEXT: j .LBB16_87 ; RV64-NEXT: .LBB16_86: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 4 ; RV64-NEXT: .LBB16_87: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 4(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 3 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_89 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 4(sp) +; RV64-NEXT: bnez a2, .LBB16_89 ; RV64-NEXT: # %bb.88: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 3 ; RV64-NEXT: j .LBB16_90 ; RV64-NEXT: .LBB16_89: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 3 ; RV64-NEXT: .LBB16_90: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 3(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v30, 2 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_92 +; RV64-NEXT: vmv.x.s a2, v8 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 3(sp) +; RV64-NEXT: bnez a2, .LBB16_92 ; RV64-NEXT: # %bb.91: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v26, 2 ; RV64-NEXT: j .LBB16_93 ; RV64-NEXT: .LBB16_92: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v8, v28, 2 ; RV64-NEXT: .LBB16_93: ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sb a1, 2(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v30, v30, 1 -; RV64-NEXT: vmv.x.s a1, v30 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB16_95 +; RV64-NEXT: vmv.x.s a2, v30 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 2(sp) +; RV64-NEXT: bnez a2, .LBB16_95 ; RV64-NEXT: # %bb.94: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v26, 1 ; RV64-NEXT: j .LBB16_96 ; RV64-NEXT: .LBB16_95: -; RV64-NEXT: vsetivli a1, 1, e8,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v28, 1 ; RV64-NEXT: .LBB16_96: ; RV64-NEXT: vmv.x.s a1, v26 @@ -1823,11 +1553,9 @@ define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) { ; RV32-NEXT: vmerge.vim v8, v16, 1, v0 ; RV32-NEXT: bnez a1, .LBB17_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: vsetvli zero, zero, e8,m4,ta,mu ; RV32-NEXT: vmv.x.s a1, v28 ; RV32-NEXT: j .LBB17_3 ; RV32-NEXT: .LBB17_2: -; RV32-NEXT: vsetvli zero, zero, e8,m4,ta,mu ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: .LBB17_3: ; RV32-NEXT: sb a1, 0(sp) @@ -1838,972 +1566,784 @@ define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) { ; RV32-NEXT: andi a2, a2, 1 ; RV32-NEXT: bnez a2, .LBB17_5 ; RV32-NEXT: # %bb.4: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vx v16, v28, a1 ; RV32-NEXT: j .LBB17_6 ; RV32-NEXT: .LBB17_5: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vx v16, v8, a1 ; RV32-NEXT: .LBB17_6: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 62 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 63(sp) -; RV32-NEXT: addi a1, zero, 62 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_8 +; RV32-NEXT: bnez a3, .LBB17_8 ; RV32-NEXT: # %bb.7: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_9 ; RV32-NEXT: .LBB17_8: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_9: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 61 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 62(sp) -; RV32-NEXT: addi a1, zero, 61 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_11 +; RV32-NEXT: bnez a3, .LBB17_11 ; RV32-NEXT: # %bb.10: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_12 ; RV32-NEXT: .LBB17_11: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_12: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 60 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 61(sp) -; RV32-NEXT: addi a1, zero, 60 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_14 +; RV32-NEXT: bnez a3, .LBB17_14 ; RV32-NEXT: # %bb.13: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_15 ; RV32-NEXT: .LBB17_14: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_15: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 59 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 60(sp) -; RV32-NEXT: addi a1, zero, 59 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_17 +; RV32-NEXT: bnez a3, .LBB17_17 ; RV32-NEXT: # %bb.16: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_18 ; RV32-NEXT: .LBB17_17: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_18: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 58 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 59(sp) -; RV32-NEXT: addi a1, zero, 58 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_20 +; RV32-NEXT: bnez a3, .LBB17_20 ; RV32-NEXT: # %bb.19: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_21 ; RV32-NEXT: .LBB17_20: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_21: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 57 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 58(sp) -; RV32-NEXT: addi a1, zero, 57 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_23 +; RV32-NEXT: bnez a3, .LBB17_23 ; RV32-NEXT: # %bb.22: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_24 ; RV32-NEXT: .LBB17_23: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_24: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 56 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 57(sp) -; RV32-NEXT: addi a1, zero, 56 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_26 +; RV32-NEXT: bnez a3, .LBB17_26 ; RV32-NEXT: # %bb.25: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_27 ; RV32-NEXT: .LBB17_26: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_27: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 55 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 56(sp) -; RV32-NEXT: addi a1, zero, 55 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_29 +; RV32-NEXT: bnez a3, .LBB17_29 ; RV32-NEXT: # %bb.28: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_30 ; RV32-NEXT: .LBB17_29: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_30: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 54 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 55(sp) -; RV32-NEXT: addi a1, zero, 54 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_32 +; RV32-NEXT: bnez a3, .LBB17_32 ; RV32-NEXT: # %bb.31: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_33 ; RV32-NEXT: .LBB17_32: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_33: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 53 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 54(sp) -; RV32-NEXT: addi a1, zero, 53 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_35 +; RV32-NEXT: bnez a3, .LBB17_35 ; RV32-NEXT: # %bb.34: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_36 ; RV32-NEXT: .LBB17_35: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_36: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 52 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 53(sp) -; RV32-NEXT: addi a1, zero, 52 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_38 +; RV32-NEXT: bnez a3, .LBB17_38 ; RV32-NEXT: # %bb.37: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_39 ; RV32-NEXT: .LBB17_38: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_39: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 51 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 52(sp) -; RV32-NEXT: addi a1, zero, 51 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_41 +; RV32-NEXT: bnez a3, .LBB17_41 ; RV32-NEXT: # %bb.40: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_42 ; RV32-NEXT: .LBB17_41: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_42: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 50 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 51(sp) -; RV32-NEXT: addi a1, zero, 50 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_44 +; RV32-NEXT: bnez a3, .LBB17_44 ; RV32-NEXT: # %bb.43: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_45 ; RV32-NEXT: .LBB17_44: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_45: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 49 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 50(sp) -; RV32-NEXT: addi a1, zero, 49 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_47 +; RV32-NEXT: bnez a3, .LBB17_47 ; RV32-NEXT: # %bb.46: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_48 ; RV32-NEXT: .LBB17_47: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_48: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 48 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 49(sp) -; RV32-NEXT: addi a1, zero, 48 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_50 +; RV32-NEXT: bnez a3, .LBB17_50 ; RV32-NEXT: # %bb.49: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_51 ; RV32-NEXT: .LBB17_50: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_51: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 47 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 48(sp) -; RV32-NEXT: addi a1, zero, 47 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_53 +; RV32-NEXT: bnez a3, .LBB17_53 ; RV32-NEXT: # %bb.52: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_54 ; RV32-NEXT: .LBB17_53: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_54: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 46 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 47(sp) -; RV32-NEXT: addi a1, zero, 46 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_56 +; RV32-NEXT: bnez a3, .LBB17_56 ; RV32-NEXT: # %bb.55: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_57 ; RV32-NEXT: .LBB17_56: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_57: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 45 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 46(sp) -; RV32-NEXT: addi a1, zero, 45 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_59 +; RV32-NEXT: bnez a3, .LBB17_59 ; RV32-NEXT: # %bb.58: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_60 ; RV32-NEXT: .LBB17_59: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_60: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 44 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 45(sp) -; RV32-NEXT: addi a1, zero, 44 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_62 +; RV32-NEXT: bnez a3, .LBB17_62 ; RV32-NEXT: # %bb.61: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_63 ; RV32-NEXT: .LBB17_62: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_63: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 43 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 44(sp) -; RV32-NEXT: addi a1, zero, 43 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_65 +; RV32-NEXT: bnez a3, .LBB17_65 ; RV32-NEXT: # %bb.64: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_66 ; RV32-NEXT: .LBB17_65: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_66: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 42 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 43(sp) -; RV32-NEXT: addi a1, zero, 42 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_68 +; RV32-NEXT: bnez a3, .LBB17_68 ; RV32-NEXT: # %bb.67: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_69 ; RV32-NEXT: .LBB17_68: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_69: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 41 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 42(sp) -; RV32-NEXT: addi a1, zero, 41 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_71 +; RV32-NEXT: bnez a3, .LBB17_71 ; RV32-NEXT: # %bb.70: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_72 ; RV32-NEXT: .LBB17_71: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_72: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 40 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 41(sp) -; RV32-NEXT: addi a1, zero, 40 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_74 +; RV32-NEXT: bnez a3, .LBB17_74 ; RV32-NEXT: # %bb.73: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_75 ; RV32-NEXT: .LBB17_74: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_75: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 39 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 40(sp) -; RV32-NEXT: addi a1, zero, 39 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_77 +; RV32-NEXT: bnez a3, .LBB17_77 ; RV32-NEXT: # %bb.76: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_78 ; RV32-NEXT: .LBB17_77: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_78: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 38 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 39(sp) -; RV32-NEXT: addi a1, zero, 38 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_80 +; RV32-NEXT: bnez a3, .LBB17_80 ; RV32-NEXT: # %bb.79: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_81 ; RV32-NEXT: .LBB17_80: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_81: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 37 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 38(sp) -; RV32-NEXT: addi a1, zero, 37 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_83 +; RV32-NEXT: bnez a3, .LBB17_83 ; RV32-NEXT: # %bb.82: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_84 ; RV32-NEXT: .LBB17_83: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_84: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 36 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 37(sp) -; RV32-NEXT: addi a1, zero, 36 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_86 +; RV32-NEXT: bnez a3, .LBB17_86 ; RV32-NEXT: # %bb.85: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_87 ; RV32-NEXT: .LBB17_86: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_87: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 35 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 36(sp) -; RV32-NEXT: addi a1, zero, 35 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_89 +; RV32-NEXT: bnez a3, .LBB17_89 ; RV32-NEXT: # %bb.88: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_90 ; RV32-NEXT: .LBB17_89: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_90: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 34 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 35(sp) -; RV32-NEXT: addi a1, zero, 34 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_92 +; RV32-NEXT: bnez a3, .LBB17_92 ; RV32-NEXT: # %bb.91: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_93 ; RV32-NEXT: .LBB17_92: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_93: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 33 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 34(sp) -; RV32-NEXT: addi a1, zero, 33 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_95 +; RV32-NEXT: bnez a3, .LBB17_95 ; RV32-NEXT: # %bb.94: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_96 ; RV32-NEXT: .LBB17_95: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_96: ; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: addi a2, zero, 32 +; RV32-NEXT: vslidedown.vx v16, v12, a2 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: andi a3, a3, 1 ; RV32-NEXT: sb a1, 33(sp) -; RV32-NEXT: addi a1, zero, 32 -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v12, a1 -; RV32-NEXT: vmv.x.s a2, v16 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: bnez a2, .LBB17_98 +; RV32-NEXT: bnez a3, .LBB17_98 ; RV32-NEXT: # %bb.97: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v28, a1 +; RV32-NEXT: vslidedown.vx v16, v28, a2 ; RV32-NEXT: j .LBB17_99 ; RV32-NEXT: .LBB17_98: -; RV32-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV32-NEXT: vslidedown.vx v16, v8, a1 +; RV32-NEXT: vslidedown.vx v16, v8, a2 ; RV32-NEXT: .LBB17_99: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 32(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 31 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_101 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 32(sp) +; RV32-NEXT: bnez a2, .LBB17_101 ; RV32-NEXT: # %bb.100: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 31 ; RV32-NEXT: j .LBB17_102 ; RV32-NEXT: .LBB17_101: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 31 ; RV32-NEXT: .LBB17_102: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 31(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 30 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_104 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 31(sp) +; RV32-NEXT: bnez a2, .LBB17_104 ; RV32-NEXT: # %bb.103: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 30 ; RV32-NEXT: j .LBB17_105 ; RV32-NEXT: .LBB17_104: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 30 ; RV32-NEXT: .LBB17_105: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 30(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 29 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_107 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 30(sp) +; RV32-NEXT: bnez a2, .LBB17_107 ; RV32-NEXT: # %bb.106: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 29 ; RV32-NEXT: j .LBB17_108 ; RV32-NEXT: .LBB17_107: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 29 ; RV32-NEXT: .LBB17_108: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 29(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 28 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_110 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 29(sp) +; RV32-NEXT: bnez a2, .LBB17_110 ; RV32-NEXT: # %bb.109: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 28 ; RV32-NEXT: j .LBB17_111 ; RV32-NEXT: .LBB17_110: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 28 ; RV32-NEXT: .LBB17_111: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 28(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 27 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_113 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 28(sp) +; RV32-NEXT: bnez a2, .LBB17_113 ; RV32-NEXT: # %bb.112: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 27 ; RV32-NEXT: j .LBB17_114 ; RV32-NEXT: .LBB17_113: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 27 ; RV32-NEXT: .LBB17_114: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 27(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 26 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_116 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 27(sp) +; RV32-NEXT: bnez a2, .LBB17_116 ; RV32-NEXT: # %bb.115: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 26 ; RV32-NEXT: j .LBB17_117 ; RV32-NEXT: .LBB17_116: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 26 ; RV32-NEXT: .LBB17_117: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 26(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 25 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_119 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 26(sp) +; RV32-NEXT: bnez a2, .LBB17_119 ; RV32-NEXT: # %bb.118: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 25 ; RV32-NEXT: j .LBB17_120 ; RV32-NEXT: .LBB17_119: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 25 ; RV32-NEXT: .LBB17_120: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 25(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 24 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_122 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 25(sp) +; RV32-NEXT: bnez a2, .LBB17_122 ; RV32-NEXT: # %bb.121: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 24 ; RV32-NEXT: j .LBB17_123 ; RV32-NEXT: .LBB17_122: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 24 ; RV32-NEXT: .LBB17_123: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 24(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 23 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_125 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 24(sp) +; RV32-NEXT: bnez a2, .LBB17_125 ; RV32-NEXT: # %bb.124: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 23 ; RV32-NEXT: j .LBB17_126 ; RV32-NEXT: .LBB17_125: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 23 ; RV32-NEXT: .LBB17_126: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 23(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 22 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_128 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 23(sp) +; RV32-NEXT: bnez a2, .LBB17_128 ; RV32-NEXT: # %bb.127: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 22 ; RV32-NEXT: j .LBB17_129 ; RV32-NEXT: .LBB17_128: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 22 ; RV32-NEXT: .LBB17_129: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 22(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 21 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_131 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 22(sp) +; RV32-NEXT: bnez a2, .LBB17_131 ; RV32-NEXT: # %bb.130: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 21 ; RV32-NEXT: j .LBB17_132 ; RV32-NEXT: .LBB17_131: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 21 ; RV32-NEXT: .LBB17_132: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 21(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 20 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_134 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 21(sp) +; RV32-NEXT: bnez a2, .LBB17_134 ; RV32-NEXT: # %bb.133: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 20 ; RV32-NEXT: j .LBB17_135 ; RV32-NEXT: .LBB17_134: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 20 ; RV32-NEXT: .LBB17_135: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 20(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 19 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_137 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 20(sp) +; RV32-NEXT: bnez a2, .LBB17_137 ; RV32-NEXT: # %bb.136: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 19 ; RV32-NEXT: j .LBB17_138 ; RV32-NEXT: .LBB17_137: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 19 ; RV32-NEXT: .LBB17_138: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 19(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 18 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_140 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 19(sp) +; RV32-NEXT: bnez a2, .LBB17_140 ; RV32-NEXT: # %bb.139: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 18 ; RV32-NEXT: j .LBB17_141 ; RV32-NEXT: .LBB17_140: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 18 ; RV32-NEXT: .LBB17_141: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 18(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 17 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_143 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 18(sp) +; RV32-NEXT: bnez a2, .LBB17_143 ; RV32-NEXT: # %bb.142: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 17 ; RV32-NEXT: j .LBB17_144 ; RV32-NEXT: .LBB17_143: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 17 ; RV32-NEXT: .LBB17_144: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 17(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 16 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_146 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 17(sp) +; RV32-NEXT: bnez a2, .LBB17_146 ; RV32-NEXT: # %bb.145: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 16 ; RV32-NEXT: j .LBB17_147 ; RV32-NEXT: .LBB17_146: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 16 ; RV32-NEXT: .LBB17_147: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 16(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 15 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_149 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 16(sp) +; RV32-NEXT: bnez a2, .LBB17_149 ; RV32-NEXT: # %bb.148: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 15 ; RV32-NEXT: j .LBB17_150 ; RV32-NEXT: .LBB17_149: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 15 ; RV32-NEXT: .LBB17_150: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 15(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 14 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_152 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 15(sp) +; RV32-NEXT: bnez a2, .LBB17_152 ; RV32-NEXT: # %bb.151: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 14 ; RV32-NEXT: j .LBB17_153 ; RV32-NEXT: .LBB17_152: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 14 ; RV32-NEXT: .LBB17_153: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 14(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 13 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_155 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 14(sp) +; RV32-NEXT: bnez a2, .LBB17_155 ; RV32-NEXT: # %bb.154: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 13 ; RV32-NEXT: j .LBB17_156 ; RV32-NEXT: .LBB17_155: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 13 ; RV32-NEXT: .LBB17_156: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 13(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 12 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_158 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 13(sp) +; RV32-NEXT: bnez a2, .LBB17_158 ; RV32-NEXT: # %bb.157: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 12 ; RV32-NEXT: j .LBB17_159 ; RV32-NEXT: .LBB17_158: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 12 ; RV32-NEXT: .LBB17_159: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 12(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 11 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_161 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 12(sp) +; RV32-NEXT: bnez a2, .LBB17_161 ; RV32-NEXT: # %bb.160: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 11 ; RV32-NEXT: j .LBB17_162 ; RV32-NEXT: .LBB17_161: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 11 ; RV32-NEXT: .LBB17_162: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 11(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 10 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_164 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 11(sp) +; RV32-NEXT: bnez a2, .LBB17_164 ; RV32-NEXT: # %bb.163: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 10 ; RV32-NEXT: j .LBB17_165 ; RV32-NEXT: .LBB17_164: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 10 ; RV32-NEXT: .LBB17_165: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 10(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 9 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_167 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 10(sp) +; RV32-NEXT: bnez a2, .LBB17_167 ; RV32-NEXT: # %bb.166: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 9 ; RV32-NEXT: j .LBB17_168 ; RV32-NEXT: .LBB17_167: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 9 ; RV32-NEXT: .LBB17_168: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 9(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 8 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_170 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 9(sp) +; RV32-NEXT: bnez a2, .LBB17_170 ; RV32-NEXT: # %bb.169: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 8 ; RV32-NEXT: j .LBB17_171 ; RV32-NEXT: .LBB17_170: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 8 ; RV32-NEXT: .LBB17_171: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 8(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 7 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_173 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 8(sp) +; RV32-NEXT: bnez a2, .LBB17_173 ; RV32-NEXT: # %bb.172: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 7 ; RV32-NEXT: j .LBB17_174 ; RV32-NEXT: .LBB17_173: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 7 ; RV32-NEXT: .LBB17_174: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 7(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 6 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_176 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 7(sp) +; RV32-NEXT: bnez a2, .LBB17_176 ; RV32-NEXT: # %bb.175: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 6 ; RV32-NEXT: j .LBB17_177 ; RV32-NEXT: .LBB17_176: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 6 ; RV32-NEXT: .LBB17_177: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 6(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 5 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_179 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 6(sp) +; RV32-NEXT: bnez a2, .LBB17_179 ; RV32-NEXT: # %bb.178: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 5 ; RV32-NEXT: j .LBB17_180 ; RV32-NEXT: .LBB17_179: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 5 ; RV32-NEXT: .LBB17_180: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 5(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 4 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_182 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 5(sp) +; RV32-NEXT: bnez a2, .LBB17_182 ; RV32-NEXT: # %bb.181: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 4 ; RV32-NEXT: j .LBB17_183 ; RV32-NEXT: .LBB17_182: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 4 ; RV32-NEXT: .LBB17_183: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 4(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 3 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_185 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 4(sp) +; RV32-NEXT: bnez a2, .LBB17_185 ; RV32-NEXT: # %bb.184: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 3 ; RV32-NEXT: j .LBB17_186 ; RV32-NEXT: .LBB17_185: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 3 ; RV32-NEXT: .LBB17_186: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 3(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v12, 2 -; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_188 +; RV32-NEXT: vmv.x.s a2, v16 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 3(sp) +; RV32-NEXT: bnez a2, .LBB17_188 ; RV32-NEXT: # %bb.187: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v28, 2 ; RV32-NEXT: j .LBB17_189 ; RV32-NEXT: .LBB17_188: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v16, v8, 2 ; RV32-NEXT: .LBB17_189: ; RV32-NEXT: vmv.x.s a1, v16 -; RV32-NEXT: sb a1, 2(sp) -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v12, v12, 1 -; RV32-NEXT: vmv.x.s a1, v12 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: bnez a1, .LBB17_191 +; RV32-NEXT: vmv.x.s a2, v12 +; RV32-NEXT: andi a2, a2, 1 +; RV32-NEXT: sb a1, 2(sp) +; RV32-NEXT: bnez a2, .LBB17_191 ; RV32-NEXT: # %bb.190: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v28, 1 ; RV32-NEXT: j .LBB17_192 ; RV32-NEXT: .LBB17_191: -; RV32-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 ; RV32-NEXT: .LBB17_192: ; RV32-NEXT: vmv.x.s a1, v28 @@ -2843,11 +2383,9 @@ define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) { ; RV64-NEXT: vmerge.vim v8, v16, 1, v0 ; RV64-NEXT: bnez a1, .LBB17_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: vsetvli zero, zero, e8,m4,ta,mu ; RV64-NEXT: vmv.x.s a1, v28 ; RV64-NEXT: j .LBB17_3 ; RV64-NEXT: .LBB17_2: -; RV64-NEXT: vsetvli zero, zero, e8,m4,ta,mu ; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: .LBB17_3: ; RV64-NEXT: sb a1, 0(sp) @@ -2858,972 +2396,784 @@ define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) { ; RV64-NEXT: andi a2, a2, 1 ; RV64-NEXT: bnez a2, .LBB17_5 ; RV64-NEXT: # %bb.4: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vx v16, v28, a1 ; RV64-NEXT: j .LBB17_6 ; RV64-NEXT: .LBB17_5: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vx v16, v8, a1 ; RV64-NEXT: .LBB17_6: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 62 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 63(sp) -; RV64-NEXT: addi a1, zero, 62 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_8 +; RV64-NEXT: bnez a3, .LBB17_8 ; RV64-NEXT: # %bb.7: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_9 ; RV64-NEXT: .LBB17_8: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_9: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 61 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 62(sp) -; RV64-NEXT: addi a1, zero, 61 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_11 +; RV64-NEXT: bnez a3, .LBB17_11 ; RV64-NEXT: # %bb.10: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_12 ; RV64-NEXT: .LBB17_11: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_12: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 60 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 61(sp) -; RV64-NEXT: addi a1, zero, 60 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_14 +; RV64-NEXT: bnez a3, .LBB17_14 ; RV64-NEXT: # %bb.13: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_15 ; RV64-NEXT: .LBB17_14: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_15: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 59 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 60(sp) -; RV64-NEXT: addi a1, zero, 59 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_17 +; RV64-NEXT: bnez a3, .LBB17_17 ; RV64-NEXT: # %bb.16: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_18 ; RV64-NEXT: .LBB17_17: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_18: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 58 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 59(sp) -; RV64-NEXT: addi a1, zero, 58 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_20 +; RV64-NEXT: bnez a3, .LBB17_20 ; RV64-NEXT: # %bb.19: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_21 ; RV64-NEXT: .LBB17_20: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_21: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 57 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 58(sp) -; RV64-NEXT: addi a1, zero, 57 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_23 +; RV64-NEXT: bnez a3, .LBB17_23 ; RV64-NEXT: # %bb.22: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_24 ; RV64-NEXT: .LBB17_23: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_24: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 56 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 57(sp) -; RV64-NEXT: addi a1, zero, 56 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_26 +; RV64-NEXT: bnez a3, .LBB17_26 ; RV64-NEXT: # %bb.25: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_27 ; RV64-NEXT: .LBB17_26: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_27: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 55 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 56(sp) -; RV64-NEXT: addi a1, zero, 55 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_29 +; RV64-NEXT: bnez a3, .LBB17_29 ; RV64-NEXT: # %bb.28: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_30 ; RV64-NEXT: .LBB17_29: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_30: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 54 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 55(sp) -; RV64-NEXT: addi a1, zero, 54 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_32 +; RV64-NEXT: bnez a3, .LBB17_32 ; RV64-NEXT: # %bb.31: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_33 ; RV64-NEXT: .LBB17_32: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_33: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 53 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 54(sp) -; RV64-NEXT: addi a1, zero, 53 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_35 +; RV64-NEXT: bnez a3, .LBB17_35 ; RV64-NEXT: # %bb.34: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_36 ; RV64-NEXT: .LBB17_35: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_36: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 52 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 53(sp) -; RV64-NEXT: addi a1, zero, 52 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_38 +; RV64-NEXT: bnez a3, .LBB17_38 ; RV64-NEXT: # %bb.37: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_39 ; RV64-NEXT: .LBB17_38: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_39: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 51 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 52(sp) -; RV64-NEXT: addi a1, zero, 51 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_41 +; RV64-NEXT: bnez a3, .LBB17_41 ; RV64-NEXT: # %bb.40: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_42 ; RV64-NEXT: .LBB17_41: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_42: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 50 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 51(sp) -; RV64-NEXT: addi a1, zero, 50 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_44 +; RV64-NEXT: bnez a3, .LBB17_44 ; RV64-NEXT: # %bb.43: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_45 ; RV64-NEXT: .LBB17_44: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_45: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 49 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 50(sp) -; RV64-NEXT: addi a1, zero, 49 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_47 +; RV64-NEXT: bnez a3, .LBB17_47 ; RV64-NEXT: # %bb.46: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_48 ; RV64-NEXT: .LBB17_47: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_48: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 48 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 49(sp) -; RV64-NEXT: addi a1, zero, 48 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_50 +; RV64-NEXT: bnez a3, .LBB17_50 ; RV64-NEXT: # %bb.49: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_51 ; RV64-NEXT: .LBB17_50: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_51: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 47 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 48(sp) -; RV64-NEXT: addi a1, zero, 47 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_53 +; RV64-NEXT: bnez a3, .LBB17_53 ; RV64-NEXT: # %bb.52: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_54 ; RV64-NEXT: .LBB17_53: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_54: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 46 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 47(sp) -; RV64-NEXT: addi a1, zero, 46 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_56 +; RV64-NEXT: bnez a3, .LBB17_56 ; RV64-NEXT: # %bb.55: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_57 ; RV64-NEXT: .LBB17_56: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_57: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 45 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 46(sp) -; RV64-NEXT: addi a1, zero, 45 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_59 +; RV64-NEXT: bnez a3, .LBB17_59 ; RV64-NEXT: # %bb.58: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_60 ; RV64-NEXT: .LBB17_59: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_60: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 44 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 45(sp) -; RV64-NEXT: addi a1, zero, 44 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_62 +; RV64-NEXT: bnez a3, .LBB17_62 ; RV64-NEXT: # %bb.61: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_63 ; RV64-NEXT: .LBB17_62: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_63: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 43 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 44(sp) -; RV64-NEXT: addi a1, zero, 43 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_65 +; RV64-NEXT: bnez a3, .LBB17_65 ; RV64-NEXT: # %bb.64: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_66 ; RV64-NEXT: .LBB17_65: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_66: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 42 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 43(sp) -; RV64-NEXT: addi a1, zero, 42 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_68 +; RV64-NEXT: bnez a3, .LBB17_68 ; RV64-NEXT: # %bb.67: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_69 ; RV64-NEXT: .LBB17_68: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_69: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 41 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 42(sp) -; RV64-NEXT: addi a1, zero, 41 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_71 +; RV64-NEXT: bnez a3, .LBB17_71 ; RV64-NEXT: # %bb.70: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_72 ; RV64-NEXT: .LBB17_71: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_72: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 40 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 41(sp) -; RV64-NEXT: addi a1, zero, 40 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_74 +; RV64-NEXT: bnez a3, .LBB17_74 ; RV64-NEXT: # %bb.73: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_75 ; RV64-NEXT: .LBB17_74: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_75: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 39 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 40(sp) -; RV64-NEXT: addi a1, zero, 39 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_77 +; RV64-NEXT: bnez a3, .LBB17_77 ; RV64-NEXT: # %bb.76: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_78 ; RV64-NEXT: .LBB17_77: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_78: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 38 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 39(sp) -; RV64-NEXT: addi a1, zero, 38 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_80 +; RV64-NEXT: bnez a3, .LBB17_80 ; RV64-NEXT: # %bb.79: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_81 ; RV64-NEXT: .LBB17_80: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_81: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 37 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 38(sp) -; RV64-NEXT: addi a1, zero, 37 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_83 +; RV64-NEXT: bnez a3, .LBB17_83 ; RV64-NEXT: # %bb.82: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_84 ; RV64-NEXT: .LBB17_83: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_84: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 36 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 37(sp) -; RV64-NEXT: addi a1, zero, 36 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_86 +; RV64-NEXT: bnez a3, .LBB17_86 ; RV64-NEXT: # %bb.85: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_87 ; RV64-NEXT: .LBB17_86: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_87: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 35 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 36(sp) -; RV64-NEXT: addi a1, zero, 35 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_89 +; RV64-NEXT: bnez a3, .LBB17_89 ; RV64-NEXT: # %bb.88: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_90 ; RV64-NEXT: .LBB17_89: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_90: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 34 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 35(sp) -; RV64-NEXT: addi a1, zero, 34 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_92 +; RV64-NEXT: bnez a3, .LBB17_92 ; RV64-NEXT: # %bb.91: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_93 ; RV64-NEXT: .LBB17_92: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_93: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 33 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 34(sp) -; RV64-NEXT: addi a1, zero, 33 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_95 +; RV64-NEXT: bnez a3, .LBB17_95 ; RV64-NEXT: # %bb.94: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_96 ; RV64-NEXT: .LBB17_95: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_96: ; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: addi a2, zero, 32 +; RV64-NEXT: vslidedown.vx v16, v12, a2 +; RV64-NEXT: vmv.x.s a3, v16 +; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: sb a1, 33(sp) -; RV64-NEXT: addi a1, zero, 32 -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v12, a1 -; RV64-NEXT: vmv.x.s a2, v16 -; RV64-NEXT: andi a2, a2, 1 -; RV64-NEXT: bnez a2, .LBB17_98 +; RV64-NEXT: bnez a3, .LBB17_98 ; RV64-NEXT: # %bb.97: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v28, a1 +; RV64-NEXT: vslidedown.vx v16, v28, a2 ; RV64-NEXT: j .LBB17_99 ; RV64-NEXT: .LBB17_98: -; RV64-NEXT: vsetivli a2, 1, e8,m4,ta,mu -; RV64-NEXT: vslidedown.vx v16, v8, a1 +; RV64-NEXT: vslidedown.vx v16, v8, a2 ; RV64-NEXT: .LBB17_99: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 32(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 31 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_101 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 32(sp) +; RV64-NEXT: bnez a2, .LBB17_101 ; RV64-NEXT: # %bb.100: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 31 ; RV64-NEXT: j .LBB17_102 ; RV64-NEXT: .LBB17_101: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 31 ; RV64-NEXT: .LBB17_102: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 31(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 30 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_104 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 31(sp) +; RV64-NEXT: bnez a2, .LBB17_104 ; RV64-NEXT: # %bb.103: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 30 ; RV64-NEXT: j .LBB17_105 ; RV64-NEXT: .LBB17_104: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 30 ; RV64-NEXT: .LBB17_105: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 30(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 29 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_107 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 30(sp) +; RV64-NEXT: bnez a2, .LBB17_107 ; RV64-NEXT: # %bb.106: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 29 ; RV64-NEXT: j .LBB17_108 ; RV64-NEXT: .LBB17_107: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 29 ; RV64-NEXT: .LBB17_108: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 29(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 28 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_110 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 29(sp) +; RV64-NEXT: bnez a2, .LBB17_110 ; RV64-NEXT: # %bb.109: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 28 ; RV64-NEXT: j .LBB17_111 ; RV64-NEXT: .LBB17_110: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 28 ; RV64-NEXT: .LBB17_111: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 28(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 27 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_113 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 28(sp) +; RV64-NEXT: bnez a2, .LBB17_113 ; RV64-NEXT: # %bb.112: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 27 ; RV64-NEXT: j .LBB17_114 ; RV64-NEXT: .LBB17_113: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 27 ; RV64-NEXT: .LBB17_114: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 27(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 26 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_116 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 27(sp) +; RV64-NEXT: bnez a2, .LBB17_116 ; RV64-NEXT: # %bb.115: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 26 ; RV64-NEXT: j .LBB17_117 ; RV64-NEXT: .LBB17_116: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 26 ; RV64-NEXT: .LBB17_117: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 26(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 25 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_119 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 26(sp) +; RV64-NEXT: bnez a2, .LBB17_119 ; RV64-NEXT: # %bb.118: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 25 ; RV64-NEXT: j .LBB17_120 ; RV64-NEXT: .LBB17_119: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 25 ; RV64-NEXT: .LBB17_120: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 25(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 24 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_122 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 25(sp) +; RV64-NEXT: bnez a2, .LBB17_122 ; RV64-NEXT: # %bb.121: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 24 ; RV64-NEXT: j .LBB17_123 ; RV64-NEXT: .LBB17_122: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 24 ; RV64-NEXT: .LBB17_123: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 24(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 23 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_125 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 24(sp) +; RV64-NEXT: bnez a2, .LBB17_125 ; RV64-NEXT: # %bb.124: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 23 ; RV64-NEXT: j .LBB17_126 ; RV64-NEXT: .LBB17_125: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 23 ; RV64-NEXT: .LBB17_126: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 23(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 22 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_128 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 23(sp) +; RV64-NEXT: bnez a2, .LBB17_128 ; RV64-NEXT: # %bb.127: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 22 ; RV64-NEXT: j .LBB17_129 ; RV64-NEXT: .LBB17_128: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 22 ; RV64-NEXT: .LBB17_129: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 22(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 21 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_131 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 22(sp) +; RV64-NEXT: bnez a2, .LBB17_131 ; RV64-NEXT: # %bb.130: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 21 ; RV64-NEXT: j .LBB17_132 ; RV64-NEXT: .LBB17_131: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 21 ; RV64-NEXT: .LBB17_132: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 21(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 20 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_134 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 21(sp) +; RV64-NEXT: bnez a2, .LBB17_134 ; RV64-NEXT: # %bb.133: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 20 ; RV64-NEXT: j .LBB17_135 ; RV64-NEXT: .LBB17_134: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 20 ; RV64-NEXT: .LBB17_135: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 20(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 19 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_137 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 20(sp) +; RV64-NEXT: bnez a2, .LBB17_137 ; RV64-NEXT: # %bb.136: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 19 ; RV64-NEXT: j .LBB17_138 ; RV64-NEXT: .LBB17_137: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 19 ; RV64-NEXT: .LBB17_138: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 19(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 18 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_140 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 19(sp) +; RV64-NEXT: bnez a2, .LBB17_140 ; RV64-NEXT: # %bb.139: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 18 ; RV64-NEXT: j .LBB17_141 ; RV64-NEXT: .LBB17_140: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 18 ; RV64-NEXT: .LBB17_141: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 18(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 17 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_143 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 18(sp) +; RV64-NEXT: bnez a2, .LBB17_143 ; RV64-NEXT: # %bb.142: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 17 ; RV64-NEXT: j .LBB17_144 ; RV64-NEXT: .LBB17_143: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 17 ; RV64-NEXT: .LBB17_144: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 17(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 16 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_146 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 17(sp) +; RV64-NEXT: bnez a2, .LBB17_146 ; RV64-NEXT: # %bb.145: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 16 ; RV64-NEXT: j .LBB17_147 ; RV64-NEXT: .LBB17_146: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: .LBB17_147: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 16(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 15 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_149 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 16(sp) +; RV64-NEXT: bnez a2, .LBB17_149 ; RV64-NEXT: # %bb.148: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 15 ; RV64-NEXT: j .LBB17_150 ; RV64-NEXT: .LBB17_149: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 15 ; RV64-NEXT: .LBB17_150: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 15(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 14 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_152 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 15(sp) +; RV64-NEXT: bnez a2, .LBB17_152 ; RV64-NEXT: # %bb.151: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 14 ; RV64-NEXT: j .LBB17_153 ; RV64-NEXT: .LBB17_152: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 14 ; RV64-NEXT: .LBB17_153: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 14(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 13 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_155 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 14(sp) +; RV64-NEXT: bnez a2, .LBB17_155 ; RV64-NEXT: # %bb.154: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 13 ; RV64-NEXT: j .LBB17_156 ; RV64-NEXT: .LBB17_155: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 13 ; RV64-NEXT: .LBB17_156: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 13(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 12 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_158 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 13(sp) +; RV64-NEXT: bnez a2, .LBB17_158 ; RV64-NEXT: # %bb.157: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 12 ; RV64-NEXT: j .LBB17_159 ; RV64-NEXT: .LBB17_158: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 12 ; RV64-NEXT: .LBB17_159: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 12(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 11 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_161 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 12(sp) +; RV64-NEXT: bnez a2, .LBB17_161 ; RV64-NEXT: # %bb.160: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 11 ; RV64-NEXT: j .LBB17_162 ; RV64-NEXT: .LBB17_161: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 11 ; RV64-NEXT: .LBB17_162: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 11(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 10 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_164 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 11(sp) +; RV64-NEXT: bnez a2, .LBB17_164 ; RV64-NEXT: # %bb.163: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 10 ; RV64-NEXT: j .LBB17_165 ; RV64-NEXT: .LBB17_164: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 10 ; RV64-NEXT: .LBB17_165: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 10(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 9 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_167 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 10(sp) +; RV64-NEXT: bnez a2, .LBB17_167 ; RV64-NEXT: # %bb.166: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 9 ; RV64-NEXT: j .LBB17_168 ; RV64-NEXT: .LBB17_167: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 9 ; RV64-NEXT: .LBB17_168: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 9(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 8 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_170 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 9(sp) +; RV64-NEXT: bnez a2, .LBB17_170 ; RV64-NEXT: # %bb.169: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 8 ; RV64-NEXT: j .LBB17_171 ; RV64-NEXT: .LBB17_170: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 8 ; RV64-NEXT: .LBB17_171: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 8(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 7 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_173 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 8(sp) +; RV64-NEXT: bnez a2, .LBB17_173 ; RV64-NEXT: # %bb.172: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 7 ; RV64-NEXT: j .LBB17_174 ; RV64-NEXT: .LBB17_173: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 7 ; RV64-NEXT: .LBB17_174: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 7(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 6 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_176 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 7(sp) +; RV64-NEXT: bnez a2, .LBB17_176 ; RV64-NEXT: # %bb.175: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 6 ; RV64-NEXT: j .LBB17_177 ; RV64-NEXT: .LBB17_176: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 6 ; RV64-NEXT: .LBB17_177: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 6(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 5 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_179 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 6(sp) +; RV64-NEXT: bnez a2, .LBB17_179 ; RV64-NEXT: # %bb.178: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 5 ; RV64-NEXT: j .LBB17_180 ; RV64-NEXT: .LBB17_179: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 5 ; RV64-NEXT: .LBB17_180: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 5(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 4 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_182 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 5(sp) +; RV64-NEXT: bnez a2, .LBB17_182 ; RV64-NEXT: # %bb.181: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 4 ; RV64-NEXT: j .LBB17_183 ; RV64-NEXT: .LBB17_182: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 4 ; RV64-NEXT: .LBB17_183: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 4(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 3 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_185 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 4(sp) +; RV64-NEXT: bnez a2, .LBB17_185 ; RV64-NEXT: # %bb.184: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 3 ; RV64-NEXT: j .LBB17_186 ; RV64-NEXT: .LBB17_185: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 3 ; RV64-NEXT: .LBB17_186: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 3(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v12, 2 -; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_188 +; RV64-NEXT: vmv.x.s a2, v16 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 3(sp) +; RV64-NEXT: bnez a2, .LBB17_188 ; RV64-NEXT: # %bb.187: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v28, 2 ; RV64-NEXT: j .LBB17_189 ; RV64-NEXT: .LBB17_188: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v16, v8, 2 ; RV64-NEXT: .LBB17_189: ; RV64-NEXT: vmv.x.s a1, v16 -; RV64-NEXT: sb a1, 2(sp) -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v12, v12, 1 -; RV64-NEXT: vmv.x.s a1, v12 -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: bnez a1, .LBB17_191 +; RV64-NEXT: vmv.x.s a2, v12 +; RV64-NEXT: andi a2, a2, 1 +; RV64-NEXT: sb a1, 2(sp) +; RV64-NEXT: bnez a2, .LBB17_191 ; RV64-NEXT: # %bb.190: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v28, 1 ; RV64-NEXT: j .LBB17_192 ; RV64-NEXT: .LBB17_191: -; RV64-NEXT: vsetivli a1, 1, e8,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 ; RV64-NEXT: .LBB17_192: ; RV64-NEXT: vmv.x.s a1, v28 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll new file mode 100644 index 000000000000..b526a3688637 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -0,0 +1,447 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+experimental-v \ +; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s + +; The following tests check whether inserting VSETVLI avoids inserting +; unneeded vsetvlis across basic blocks. + +declare i64 @llvm.riscv.vsetvli(i64, i64, i64) + +declare @llvm.riscv.vfadd.nxv1f64.nxv1f64(, , i64) +declare @llvm.riscv.vfadd.nxv2f32.nxv2f32(, , i64) + +declare @llvm.riscv.vfsub.nxv1f64.nxv1f64(, , i64) + +declare @llvm.riscv.vfmul.nxv1f64.nxv1f64(, , i64) + +declare @llvm.riscv.vfmv.v.f.nxv1f64.f64(double, i64) +declare @llvm.riscv.vfmv.v.f.nxv2f32.f32(float, i64) + +declare void @llvm.riscv.vse.nxv1f64(, * nocapture, i64) +declare void @llvm.riscv.vse.nxv2f32(, * nocapture, i64) + +define @test1(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: beqz a1, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %if.else +; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + ret %c.0 +} + +@scratch = global i8 0, align 16 + +define @test2(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: beqz a1, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: vfadd.vv v25, v8, v9 +; CHECK-NEXT: vfmul.vv v8, v25, v8 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %if.else +; CHECK-NEXT: vfsub.vv v25, v8, v9 +; CHECK-NEXT: vfmul.vv v8, v25, v8 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + %3 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %c.0, %a, i64 %0) + ret %3 +} + +; FIXME: The last vsetvli is redundant, but we need to look through a phi to +; prove it. +define @test3(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: beqz a1, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfadd.vv v25, v8, v9 +; CHECK-NEXT: j .LBB2_3 +; CHECK-NEXT: .LBB2_2: # %if.else +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfsub.vv v25, v8, v9 +; CHECK-NEXT: .LBB2_3: # %if.end +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmul.vv v8, v25, v8 +; CHECK-NEXT: ret +entry: + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + %2 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %3 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %2) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %vl.0 = phi i64 [ %0, %if.then], [ %2, %if.else ] + %c.0 = phi [ %1, %if.then ], [ %3, %if.else ] + %4 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %c.0, %a, i64 %vl.0) + ret %4 +} + +define @test4(i64 %avl, i8 zeroext %cond, %l, %r) nounwind { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: beqz a1, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: lui a1, %hi(.LCPI3_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_0) +; CHECK-NEXT: vsetvli a2, a0, e64,m1,ta,mu +; CHECK-NEXT: vlse64.v v25, (a1), zero +; CHECK-NEXT: lui a1, %hi(.LCPI3_1) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_1) +; CHECK-NEXT: vlse64.v v26, (a1), zero +; CHECK-NEXT: vfadd.vv v25, v25, v26 +; CHECK-NEXT: lui a1, %hi(scratch) +; CHECK-NEXT: addi a1, a1, %lo(scratch) +; CHECK-NEXT: vse64.v v25, (a1) +; CHECK-NEXT: j .LBB3_3 +; CHECK-NEXT: .LBB3_2: # %if.else +; CHECK-NEXT: lui a1, %hi(.LCPI3_2) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_2) +; CHECK-NEXT: vsetvli a2, a0, e32,m1,ta,mu +; CHECK-NEXT: vlse32.v v25, (a1), zero +; CHECK-NEXT: lui a1, %hi(.LCPI3_3) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_3) +; CHECK-NEXT: vlse32.v v26, (a1), zero +; CHECK-NEXT: vfadd.vv v25, v25, v26 +; CHECK-NEXT: lui a1, %hi(scratch) +; CHECK-NEXT: addi a1, a1, %lo(scratch) +; CHECK-NEXT: vse32.v v25, (a1) +; CHECK-NEXT: .LBB3_3: # %if.end +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %0 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 1.000000e+00, i64 %avl) + %1 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 2.000000e+00, i64 %avl) + %2 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %0, %1, i64 %avl) + %3 = bitcast i8* @scratch to * + tail call void @llvm.riscv.vse.nxv1f64( %2, * %3, i64 %avl) + br label %if.end + +if.else: ; preds = %entry + %4 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 1.000000e+00, i64 %avl) + %5 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 2.000000e+00, i64 %avl) + %6 = tail call @llvm.riscv.vfadd.nxv2f32.nxv2f32( %4, %5, i64 %avl) + %7 = bitcast i8* @scratch to * + tail call void @llvm.riscv.vse.nxv2f32( %6, * %7, i64 %avl) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %8 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %l, %r, i64 %avl) + ret %8 +} + +define @test5(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a2, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: bnez a2, .LBB4_3 +; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: vfsub.vv v25, v8, v9 +; CHECK-NEXT: andi a0, a1, 2 +; CHECK-NEXT: beqz a0, .LBB4_4 +; CHECK-NEXT: .LBB4_2: # %if.then4 +; CHECK-NEXT: vfmul.vv v8, v25, v8 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_3: # %if.then +; CHECK-NEXT: vfadd.vv v25, v8, v9 +; CHECK-NEXT: andi a0, a1, 2 +; CHECK-NEXT: bnez a0, .LBB4_2 +; CHECK-NEXT: .LBB4_4: # %if.else5 +; CHECK-NEXT: vfmul.vv v8, v8, v25 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %conv = zext i8 %cond to i32 + %and = and i32 %conv, 1 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + %and2 = and i32 %conv, 2 + %tobool3 = icmp eq i32 %and2, 0 + br i1 %tobool3, label %if.else5, label %if.then4 + +if.then4: ; preds = %if.end + %3 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %c.0, %a, i64 %0) + br label %if.end6 + +if.else5: ; preds = %if.end + %4 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %a, %c.0, i64 %0) + br label %if.end6 + +if.end6: ; preds = %if.else5, %if.then4 + %c.1 = phi [ %3, %if.then4 ], [ %4, %if.else5 ] + ret %c.1 +} + +; FIXME: The explicit vsetvli in if.then4 could be removed as it is redundant +; with the one in the entry, but we lack the ability to remove explicit +; vsetvli instructions. +define @test6(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a3, a1, 1 +; CHECK-NEXT: vsetvli a2, a0, e64,m1,ta,mu +; CHECK-NEXT: bnez a3, .LBB5_3 +; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: vfsub.vv v25, v8, v9 +; CHECK-NEXT: andi a1, a1, 2 +; CHECK-NEXT: beqz a1, .LBB5_4 +; CHECK-NEXT: .LBB5_2: # %if.then4 +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) +; CHECK-NEXT: vlse64.v v26, (a0), zero +; CHECK-NEXT: lui a0, %hi(.LCPI5_1) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_1) +; CHECK-NEXT: vlse64.v v27, (a0), zero +; CHECK-NEXT: vfadd.vv v26, v26, v27 +; CHECK-NEXT: lui a0, %hi(scratch) +; CHECK-NEXT: addi a0, a0, %lo(scratch) +; CHECK-NEXT: vse64.v v26, (a0) +; CHECK-NEXT: j .LBB5_5 +; CHECK-NEXT: .LBB5_3: # %if.then +; CHECK-NEXT: vfadd.vv v25, v8, v9 +; CHECK-NEXT: andi a1, a1, 2 +; CHECK-NEXT: bnez a1, .LBB5_2 +; CHECK-NEXT: .LBB5_4: # %if.else5 +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: lui a0, %hi(.LCPI5_2) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_2) +; CHECK-NEXT: vlse32.v v26, (a0), zero +; CHECK-NEXT: lui a0, %hi(.LCPI5_3) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_3) +; CHECK-NEXT: vlse32.v v27, (a0), zero +; CHECK-NEXT: vfadd.vv v26, v26, v27 +; CHECK-NEXT: lui a0, %hi(scratch) +; CHECK-NEXT: addi a0, a0, %lo(scratch) +; CHECK-NEXT: vse32.v v26, (a0) +; CHECK-NEXT: .LBB5_5: # %if.end10 +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: vfmul.vv v8, v25, v25 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %conv = zext i8 %cond to i32 + %and = and i32 %conv, 1 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + %and2 = and i32 %conv, 2 + %tobool3 = icmp eq i32 %and2, 0 + br i1 %tobool3, label %if.else5, label %if.then4 + +if.then4: ; preds = %if.end + %3 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %4 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 1.000000e+00, i64 %3) + %5 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 2.000000e+00, i64 %3) + %6 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %4, %5, i64 %3) + %7 = bitcast i8* @scratch to * + tail call void @llvm.riscv.vse.nxv1f64( %6, * %7, i64 %3) + br label %if.end10 + +if.else5: ; preds = %if.end + %8 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0) + %9 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 1.000000e+00, i64 %8) + %10 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 2.000000e+00, i64 %8) + %11 = tail call @llvm.riscv.vfadd.nxv2f32.nxv2f32( %9, %10, i64 %8) + %12 = bitcast i8* @scratch to * + tail call void @llvm.riscv.vse.nxv2f32( %11, * %12, i64 %8) + br label %if.end10 + +if.end10: ; preds = %if.else5, %if.then4 + %13 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %c.0, %c.0, i64 %0) + ret %13 +} + +declare void @foo() + +; Similar to test1, but contains a call to @foo to act as barrier to analyzing +; VL/VTYPE. +define @test8(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: vsetvli s0, a0, e64,m1,ta,mu +; CHECK-NEXT: beqz a1, .LBB6_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: j .LBB6_3 +; CHECK-NEXT: .LBB6_2: # %if.else +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: call foo@plt +; CHECK-NEXT: vsetvli a0, s0, e64,m1,ta,mu +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsub.vv v8, v26, v25 +; CHECK-NEXT: .LBB6_3: # %if.then +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + call void @foo() + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + ret %c.0 +} + +; Similar to test2, but contains a call to @foo to act as barrier to analyzing +; VL/VTYPE. +define @test9(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: vsetvli s0, a0, e64,m1,ta,mu +; CHECK-NEXT: beqz a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: vfadd.vv v25, v8, v9 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: call foo@plt +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: j .LBB7_3 +; CHECK-NEXT: .LBB7_2: # %if.else +; CHECK-NEXT: vfsub.vv v25, v8, v9 +; CHECK-NEXT: .LBB7_3: # %if.end +; CHECK-NEXT: vsetvli a0, s0, e64,m1,ta,mu +; CHECK-NEXT: vfmul.vv v8, v25, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + call void @foo() + br label %if.end + +if.else: ; preds = %entry + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + %3 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %c.0, %a, i64 %0) + ret %3 +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir new file mode 100644 index 000000000000..f734b94841a7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -0,0 +1,415 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \ +# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s + +--- | + ; ModuleID = 'vsetvli-insert.ll' + source_filename = "vsetvli-insert.ll" + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" + target triple = "riscv64" + + define @load_add_or_sub(i8 zeroext %cond, * %0, %1, i64 %2) #0 { + entry: + %a = call @llvm.riscv.vle.nxv1i64.i64(* %0, i64 %2) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + + if.then: ; preds = %entry + %b = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %a, %1, i64 %2) + br label %if.end + + if.else: ; preds = %entry + %c = call @llvm.riscv.vsub.nxv1i64.nxv1i64.i64( %a, %1, i64 %2) + br label %if.end + + if.end: ; preds = %if.else, %if.then + %d = phi [ %b, %if.then ], [ %c, %if.else ] + ret %d + } + + define void @load_zext_or_sext(i8 zeroext %cond, * %0, * %1, i64 %2) #0 { + entry: + %a = call @llvm.riscv.vle.nxv1i32.i64(* %0, i64 %2) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + + if.then: ; preds = %entry + %b = call @llvm.riscv.vzext.nxv1i64.nxv1i32.i64( %a, i64 %2) + br label %if.end + + if.else: ; preds = %entry + %c = call @llvm.riscv.vsext.nxv1i64.nxv1i32.i64( %a, i64 %2) + br label %if.end + + if.end: ; preds = %if.else, %if.then + %d = phi [ %b, %if.then ], [ %c, %if.else ] + call void @llvm.riscv.vse.nxv1i64.i64( %d, * %1, i64 %2) + ret void + } + + ; Function Attrs: nounwind readnone + declare i64 @llvm.riscv.vmv.x.s.nxv1i64() #1 + + define i64 @vmv_x_s(i8 zeroext %cond, %0, %1, i64 %2) #0 { + entry: + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + + if.then: ; preds = %entry + %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %0, %1, i64 %2) + br label %if.end + + if.else: ; preds = %entry + %b = call @llvm.riscv.vsub.nxv1i64.nxv1i64.i64( %1, %1, i64 %2) + br label %if.end + + if.end: ; preds = %if.else, %if.then + %c = phi [ %a, %if.then ], [ %b, %if.else ] + %d = call i64 @llvm.riscv.vmv.x.s.nxv1i64( %c) + ret i64 %d + } + + ; Function Attrs: nounwind + declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #2 + + define @vsetvli_add_or_sub(i8 zeroext %cond, %0, %1, i64 %avl) #0 { + entry: + %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 3, i64 0) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + + if.then: ; preds = %entry + %b = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %0, %1, i64 %vl) + br label %if.end + + if.else: ; preds = %entry + %c = call @llvm.riscv.vsub.nxv1i64.nxv1i64.i64( %0, %1, i64 %vl) + br label %if.end + + if.end: ; preds = %if.else, %if.then + %d = phi [ %b, %if.then ], [ %c, %if.else ] + ret %d + } + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(, , i64) #1 + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vsub.nxv1i64.nxv1i64.i64(, , i64) #1 + + ; Function Attrs: nounwind readonly + declare @llvm.riscv.vle.nxv1i64.i64(* nocapture, i64) #3 + + ; Function Attrs: nounwind readonly + declare @llvm.riscv.vle.nxv1i32.i64(* nocapture, i64) #3 + + ; Function Attrs: nounwind writeonly + declare void @llvm.riscv.vse.nxv1i64.i64(, * nocapture, i64) #4 + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(, i64) #1 + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vsext.nxv1i64.nxv1i32.i64(, i64) #1 + + attributes #0 = { "target-features"="+experimental-v" } + attributes #1 = { nounwind readnone } + attributes #2 = { nounwind } + attributes #3 = { nounwind readonly } + attributes #4 = { nounwind writeonly } + +... +--- +name: load_add_or_sub +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: vr } + - { id: 3, class: vr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: vr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%4' } + - { reg: '$x11', virtual-reg: '%5' } + - { reg: '$v8', virtual-reg: '%6' } + - { reg: '$x12', virtual-reg: '%7' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: load_add_or_sub + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $x10, $x11, $v8, $x12 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x12 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v8 + ; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2 + ; CHECK: PseudoBR %bb.1 + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoBR %bb.3 + ; CHECK: bb.2.if.else: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: bb.3.if.end: + ; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 + ; CHECK: $v8 = COPY [[PHI]] + ; CHECK: PseudoRET implicit $v8 + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $x10, $x11, $v8, $x12 + + %7:gpr = COPY $x12 + %6:vr = COPY $v8 + %5:gpr = COPY $x11 + %4:gpr = COPY $x10 + %0:vr = PseudoVLE64_V_M1 %5, %7, 6 + %8:gpr = COPY $x0 + BEQ %4, %8, %bb.2 + PseudoBR %bb.1 + + bb.1.if.then: + %1:vr = PseudoVADD_VV_M1 %0, %6, %7, 6 + PseudoBR %bb.3 + + bb.2.if.else: + %2:vr = PseudoVSUB_VV_M1 %0, %6, %7, 6 + + bb.3.if.end: + %3:vr = PHI %1, %bb.1, %2, %bb.2 + $v8 = COPY %3 + PseudoRET implicit $v8 + +... +--- +name: load_zext_or_sext +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: vr } + - { id: 3, class: vr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%4' } + - { reg: '$x11', virtual-reg: '%5' } + - { reg: '$x12', virtual-reg: '%6' } + - { reg: '$x13', virtual-reg: '%7' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: load_zext_or_sext + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $x10, $x11, $x12, $x13 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 87, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 [[COPY2]], $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2 + ; CHECK: PseudoBR %bb.1 + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: dead %10:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoBR %bb.3 + ; CHECK: bb.2.if.else: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: dead %11:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: early-clobber %2:vr = PseudoVSEXT_VF2_M1 [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: bb.3.if.end: + ; CHECK: [[PHI:%[0-9]+]]:vr = PHI %1, %bb.1, %2, %bb.2 + ; CHECK: PseudoVSE64_V_M1 [[PHI]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoRET + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $x10, $x11, $x12, $x13 + + %7:gpr = COPY $x13 + %6:gpr = COPY $x12 + %5:gpr = COPY $x11 + %4:gpr = COPY $x10 + %0:vr = PseudoVLE32_V_MF2 %5, %7, 5 + %8:gpr = COPY $x0 + BEQ %4, %8, %bb.2 + PseudoBR %bb.1 + + bb.1.if.then: + early-clobber %1:vr = PseudoVZEXT_VF2_M1 %0, %7, 6 + PseudoBR %bb.3 + + bb.2.if.else: + early-clobber %2:vr = PseudoVSEXT_VF2_M1 %0, %7, 6 + + bb.3.if.end: + %3:vr = PHI %1, %bb.1, %2, %bb.2 + PseudoVSE64_V_M1 %3, %6, %7, 6 + PseudoRET + +... +--- +name: vmv_x_s +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: vr } + - { id: 3, class: gpr } + - { id: 4, class: vr } + - { id: 5, class: vr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%3' } + - { reg: '$v8', virtual-reg: '%4' } + - { reg: '$v9', virtual-reg: '%5' } + - { reg: '$x11', virtual-reg: '%6' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: vmv_x_s + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $x10, $v8, $v9, $x11 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8 + ; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2 + ; CHECK: PseudoBR %bb.1 + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoBR %bb.3 + ; CHECK: bb.2.if.else: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: dead %10:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY1]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: bb.3.if.end: + ; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 + ; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 [[PHI]], 6, implicit $vtype + ; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]] + ; CHECK: PseudoRET implicit $x10 + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $x10, $v8, $v9, $x11 + + %6:gpr = COPY $x11 + %5:vr = COPY $v9 + %4:vr = COPY $v8 + %3:gpr = COPY $x10 + %7:gpr = COPY $x0 + BEQ %3, %7, %bb.2 + PseudoBR %bb.1 + + bb.1.if.then: + %0:vr = PseudoVADD_VV_M1 %4, %5, %6, 6 + PseudoBR %bb.3 + + bb.2.if.else: + %1:vr = PseudoVSUB_VV_M1 %5, %5, %6, 6 + + bb.3.if.end: + %2:vr = PHI %0, %bb.1, %1, %bb.2 + %8:gpr = PseudoVMV_X_S_M1 %2, 6 + $x10 = COPY %8 + PseudoRET implicit $x10 + +... +--- +name: vsetvli_add_or_sub +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: vr } + - { id: 2, class: vr } + - { id: 3, class: vr } + - { id: 4, class: gpr } + - { id: 5, class: vr } + - { id: 6, class: vr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%4' } + - { reg: '$v8', virtual-reg: '%5' } + - { reg: '$v9', virtual-reg: '%6' } + - { reg: '$x11', virtual-reg: '%7' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: vsetvli_add_or_sub + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $x10, $v8, $v9, $x11 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8 + ; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2 + ; CHECK: PseudoBR %bb.1 + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoBR %bb.3 + ; CHECK: bb.2.if.else: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: bb.3.if.end: + ; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 + ; CHECK: $v8 = COPY [[PHI]] + ; CHECK: PseudoRET implicit $v8 + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $x10, $v8, $v9, $x11 + + %7:gpr = COPY $x11 + %6:vr = COPY $v9 + %5:vr = COPY $v8 + %4:gpr = COPY $x10 + %0:gpr = PseudoVSETVLI %7, 88, implicit-def dead $vl, implicit-def dead $vtype + %8:gpr = COPY $x0 + BEQ %4, %8, %bb.2 + PseudoBR %bb.1 + + bb.1.if.then: + %1:vr = PseudoVADD_VV_M1 %5, %6, %0, 6 + PseudoBR %bb.3 + + bb.2.if.else: + %2:vr = PseudoVSUB_VV_M1 %5, %6, %0, 6 + + bb.3.if.end: + %3:vr = PHI %1, %bb.1, %2, %bb.2 + $v8 = COPY %3 + PseudoRET implicit $v8 + +...